diff --git a/.gitignore b/.gitignore index d5c7f763cf..d7fcb019ae 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ mac/ .mypy_cache *.tmp *.swp +*.swo *.orig src/connector/nodejs/node_modules/ src/connector/nodejs/out/ diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 9e7aea03ea..e1aadd4486 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -153,11 +153,10 @@ typedef struct SQueryTableDataCond { int32_t order; // desc|asc order to iterate the data block int32_t numOfCols; SColumnInfo* colList; - int32_t type; // data block load type: - // int32_t numOfTWindows; - STimeWindow twindows; - int64_t startVersion; - int64_t endVersion; + int32_t type; // data block load type: + STimeWindow twindows; + int64_t startVersion; + int64_t endVersion; } SQueryTableDataCond; int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock); diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index e382fa4efd..ba16acf7b0 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -104,6 +104,7 @@ typedef struct SJoinLogicNode { SNode* pMergeCondition; SNode* pOnConditions; bool isSingleTableJoin; + EOrder inputTsOrder; } SJoinLogicNode; typedef struct SAggLogicNode { @@ -201,6 +202,7 @@ typedef struct SWindowLogicNode { int64_t watermark; int8_t igExpired; EWindowAlgorithm windowAlgo; + EOrder inputTsOrder; } SWindowLogicNode; typedef struct SFillLogicNode { @@ -356,15 +358,14 @@ typedef struct SInterpFuncPhysiNode { SNode* pTimeSeries; // SColumnNode } SInterpFuncPhysiNode; -typedef struct SJoinPhysiNode { +typedef struct SSortMergeJoinPhysiNode { SPhysiNode node; EJoinType joinType; SNode* pMergeCondition; SNode* pOnConditions; SNodeList* pTargets; -} SJoinPhysiNode; - -typedef SJoinPhysiNode SSortMergeJoinPhysiNode; + EOrder inputTsOrder; +} SSortMergeJoinPhysiNode; typedef struct SAggPhysiNode { SPhysiNode node; diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index f8c7024591..81ed5b5ecd 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -255,6 +255,7 @@ typedef struct SSelectStmt { int32_t selectFuncNum; bool isEmptyResult; bool isTimeLineResult; + bool isSubquery; bool hasAggFuncs; bool hasRepeatScanFuncs; bool hasIndefiniteRowsFunc; diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 907b3be560..220c4f73e0 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -114,21 +114,30 @@ typedef struct SWal { int64_t refId; TdThreadMutex mutex; // ref - SHashObj *pRefHash; // ref -> SWalRef + SHashObj *pRefHash; // refId -> SWalRef // path char path[WAL_PATH_LEN]; // reusable write head SWalCkHead writeHead; -} SWal; // WAL HANDLE +} SWal; + +typedef struct { + int64_t refId; + int64_t refVer; + int64_t refFile; + SWal *pWal; +} SWalRef; typedef struct { int8_t scanUncommited; + int8_t scanNotApplied; int8_t scanMeta; int8_t enableRef; } SWalFilterCond; typedef struct { SWal *pWal; + int64_t readerId; TdFilePtr pLogFile; TdFilePtr pIdxFile; int64_t curFileFirstVer; @@ -138,7 +147,8 @@ typedef struct { int8_t curStopped; TdThreadMutex mutex; SWalFilterCond cond; - SWalCkHead *pHead; + // TODO remove it + SWalCkHead *pHead; } SWalReader; // module initialization @@ -157,11 +167,7 @@ int32_t walWrite(SWal *, int64_t index, tmsg_t msgType, const void *body, int32_ int32_t walWriteWithSyncInfo(SWal *, int64_t index, tmsg_t msgType, SWalSyncInfo syncMeta, const void *body, int32_t bodyLen); -// This interface assign version automatically and return to caller. -// When using this interface with concurrent writes, -// wal will write all logs atomically, -// but not sure which one will be actually write first, -// and then the unique index of successful writen is returned. +// Assign version automatically and return to caller, // -1 will be returned for failed writes int64_t walAppendLog(SWal *, tmsg_t msgType, SWalSyncInfo syncMeta, const void *body, int32_t bodyLen); @@ -191,17 +197,15 @@ void walSetReaderCapacity(SWalReader *pRead, int32_t capacity); int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead); int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead); int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead); -typedef struct { - int64_t refId; - int64_t ver; -} SWalRef; + +SWalRef *walRefCommittedVer(SWal *); SWalRef *walOpenRef(SWal *); -void walCloseRef(SWalRef *); +void walCloseRef(SWal *pWal, int64_t refId); int32_t walRefVer(SWalRef *, int64_t ver); -int32_t walUnrefVer(SWal *); +void walUnrefVer(SWalRef *); -// help function for raft +// helper function for raft bool walLogExist(SWal *, int64_t ver); bool walIsEmpty(SWal *); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index a73808f2ed..36900e3dfa 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -213,10 +213,6 @@ static int32_t taosSetTfsCfg(SConfig *pCfg) { memcpy(&tsDiskCfg[index], pCfg, sizeof(SDiskCfg)); if (pCfg->level == 0 && pCfg->primary == 1) { tstrncpy(tsDataDir, pCfg->dir, PATH_MAX); - if (taosMulMkDir(tsDataDir) != 0) { - uError("failed to create dataDir:%s since %s", tsDataDir, terrstr()); - return -1; - } } if (taosMulMkDir(pCfg->dir) != 0) { uError("failed to create tfsDir:%s since %s", tsDataDir, terrstr()); @@ -227,12 +223,13 @@ static int32_t taosSetTfsCfg(SConfig *pCfg) { if (tsDataDir[0] == 0) { if (pItem->str != NULL) { - taosAddDataDir(0, pItem->str, 0, 1); + taosAddDataDir(tsDiskCfgNum, pItem->str, 0, 1); tstrncpy(tsDataDir, pItem->str, PATH_MAX); if (taosMulMkDir(tsDataDir) != 0) { - uError("failed to create dataDir:%s since %s", tsDataDir, terrstr()); + uError("failed to create tfsDir:%s since %s", tsDataDir, terrstr()); return -1; } + tsDiskCfgNum++; } else { uError("datadir not set"); return -1; diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c index 5871d56a8f..f6ecd4493d 100644 --- a/source/dnode/mnode/impl/src/mndProfile.c +++ b/source/dnode/mnode/impl/src/mndProfile.c @@ -312,7 +312,7 @@ static int32_t mndSaveQueryList(SConnObj *pConn, SQueryHbReqBasic *pBasic) { pConn->numOfQueries = pBasic->queryDesc ? taosArrayGetSize(pBasic->queryDesc) : 0; pBasic->queryDesc = NULL; - mDebug("queries updated in conn %d, num:%d", pConn->id, pConn->numOfQueries); + mDebug("queries updated in conn %u, num:%d", pConn->id, pConn->numOfQueries); taosWUnLockLatch(&pConn->queryLock); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 6d976083d2..4d95a9d7a5 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -118,9 +118,8 @@ int32_t metaTbCursorNext(SMTbCursor *pTbCur); // typedef struct STsdb STsdb; typedef struct STsdbReader STsdbReader; -#define BLOCK_LOAD_OFFSET_ORDER 1 -#define BLOCK_LOAD_TABLESEQ_ORDER 2 -#define BLOCK_LOAD_EXTERN_ORDER 3 +#define TIMEWINDOW_RANGE_CONTAINED 1 +#define TIMEWINDOW_RANGE_EXTERNAL 2 #define LASTROW_RETRIEVE_TYPE_ALL 0x1 #define LASTROW_RETRIEVE_TYPE_SINGLE 0x2 diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index b063e552f6..262300a3e7 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -104,6 +104,8 @@ typedef struct { // TODO remove SWalReader* pWalReader; + SWalRef* pRef; + // push STqPushHandle pushHandle; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 8ea4b8d98b..118e3a5d43 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -212,6 +212,15 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, char* msg, int32_t msgLen) { ASSERT(0); return -1; } + + if (offset.val.type == TMQ_OFFSET__LOG) { + STqHandle* pHandle = taosHashGet(pTq->handles, offset.subKey, strlen(offset.subKey)); + if (walRefVer(pHandle->pRef, offset.val.version) < 0) { + ASSERT(0); + return -1; + } + } + /*}*/ /*}*/ @@ -376,8 +385,8 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } if (pHandle->execHandle.subType != TOPIC_SUB_TYPE__COLUMN) { - int64_t fetchVer = fetchOffsetNew.version + 1; - SWalCkHead* pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); + int64_t fetchVer = fetchOffsetNew.version + 1; + pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { code = -1; goto OVER; @@ -534,11 +543,14 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { pHandle->execHandle.subType = req.subType; pHandle->fetchMeta = req.withMeta; + // TODO version should be assigned and refed during preprocess + SWalRef* pRef = walRefCommittedVer(pTq->pVnode->pWal); + if (pRef == NULL) { + ASSERT(0); + } + int64_t ver = pRef->refVer; + pHandle->pRef = pRef; - pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - - // TODO version should be assigned in preprocess - int64_t ver = walGetCommittedVer(pTq->pVnode->pWal); if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { pHandle->execHandle.execCol.qmsg = req.qmsg; pHandle->snapshotVer = ver; @@ -560,10 +572,14 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { pHandle->execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); ASSERT(pHandle->execHandle.pExecReader); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { + pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); + pHandle->execHandle.pExecReader = tqOpenReader(pTq->pVnode); pHandle->execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { + pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); + pHandle->execHandle.execTb.suid = req.suid; SArray* tbUidList = taosArrayInit(0, sizeof(int64_t)); vnodeGetCtbIdList(pTq->pVnode, req.suid, tbUidList); diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 620417016f..290ffe5c8d 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -52,7 +52,7 @@ int32_t tqMetaOpen(STQ* pTq) { ASSERT(0); } - TXN txn; + TXN txn = {0}; if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { ASSERT(0); @@ -75,7 +75,13 @@ int32_t tqMetaOpen(STQ* pTq) { STqHandle handle; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); tDecodeSTqHandle(&decoder, &handle); - handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); + + handle.pRef = walOpenRef(pTq->pVnode->pWal); + if (handle.pRef == NULL) { + ASSERT(0); + } + walRefVer(handle.pRef, handle.snapshotVer); + if (handle.execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { SReadHandle reader = { .meta = pTq->pVnode->pMeta, @@ -94,6 +100,7 @@ int32_t tqMetaOpen(STQ* pTq) { handle.execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); ASSERT(handle.execHandle.pExecReader); } else { + handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); handle.execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index cec714e0ee..ea8ac09429 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -16,6 +16,12 @@ #include "tsdb.h" #define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +typedef enum { + EXTERNAL_ROWS_PREV = 0x1, + EXTERNAL_ROWS_MAIN = 0x2, + EXTERNAL_ROWS_NEXT = 0x3, +} EContentData; + typedef struct { STbDataIter* iter; int32_t index; @@ -70,9 +76,9 @@ typedef struct SFilesetIter { } SFilesetIter; typedef struct SFileDataBlockInfo { - int32_t - tbBlockIdx; // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it + // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it uint64_t uid; + int32_t tbBlockIdx; } SFileDataBlockInfo; typedef struct SDataBlockIter { @@ -99,12 +105,11 @@ typedef struct SReaderStatus { SHashObj* pTableMap; // SHash STableBlockScanInfo* pTableIter; // table iterator used in building in-memory buffer data blocks. SFileBlockDumpInfo fBlockDumpInfo; - - SDFileSet* pCurrentFileset; // current opened file set - SBlockData fileBlockData; - SFilesetIter fileIter; - SDataBlockIter blockIter; - bool composedDataBlock; // the returned data block is a composed block or not + SDFileSet* pCurrentFileset; // current opened file set + SBlockData fileBlockData; + SFilesetIter fileIter; + SDataBlockIter blockIter; + bool composedDataBlock; // the returned data block is a composed block or not } SReaderStatus; struct STsdbReader { @@ -115,15 +120,17 @@ struct STsdbReader { SSDataBlock* pResBlock; int32_t capacity; SReaderStatus status; - char* idStr; // query info handle, for debug purpose - int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows + char* idStr; // query info handle, for debug purpose + int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; STsdbReadSnap* pReadSnap; + SIOCostSummary cost; + STSchema* pSchema; + SDataFReader* pFileReader; + SVersionRange verRange; - SIOCostSummary cost; - STSchema* pSchema; - SDataFReader* pFileReader; - SVersionRange verRange; + int32_t step; + STsdbReader* innerReader[2]; }; static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); @@ -200,6 +207,9 @@ static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableK pTsdbReader->idStr); } + tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, %s", pTsdbReader, numOfTables, (sizeof(STableBlockScanInfo)*numOfTables)/1024.0, + pTsdbReader->idStr); + return pTableMap; } @@ -328,7 +338,7 @@ static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) { continue; } - tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, fid, pReader->window.skey, + tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", %s", pReader, fid, pReader->window.skey, pReader->window.ekey, pReader->idStr); return true; } @@ -378,7 +388,7 @@ static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) return pResBlock; } -static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, const char* idstr) { +static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, int32_t capacity, const char* idstr) { int32_t code = 0; int8_t level = 0; STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader)); @@ -392,7 +402,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); pReader->suid = pCond->suid; pReader->order = pCond->order; - pReader->capacity = 4096; + pReader->capacity = capacity; pReader->idStr = (idstr != NULL) ? strdup(idstr) : NULL; pReader->verRange = getQueryVerRange(pVnode, pCond, level); pReader->type = pCond->type; @@ -483,95 +493,6 @@ _end: // return res; // } -// static TSKEY extractFirstTraverseKey(STableBlockScanInfo* pCheckInfo, int32_t order, int32_t update, TDRowVerT -// maxVer) { -// TSDBROW row = {0}; -// STSRow *rmem = NULL, *rimem = NULL; - -// if (pCheckInfo->iter) { -// if (tsdbTbDataIterGet(pCheckInfo->iter, &row)) { -// rmem = row.pTSRow; -// } -// } - -// if (pCheckInfo->iiter) { -// if (tsdbTbDataIterGet(pCheckInfo->iiter, &row)) { -// rimem = row.pTSRow; -// } -// } - -// if (rmem == NULL && rimem == NULL) { -// return TSKEY_INITIAL_VAL; -// } - -// if (rmem != NULL && rimem == NULL) { -// pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; -// return TD_ROW_KEY(rmem); -// } - -// if (rmem == NULL && rimem != NULL) { -// pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; -// return TD_ROW_KEY(rimem); -// } - -// TSKEY r1 = TD_ROW_KEY(rmem); -// TSKEY r2 = TD_ROW_KEY(rimem); - -// if (r1 == r2) { -// if (TD_SUPPORT_UPDATE(update)) { -// pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH; -// } else { -// pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; -// tsdbTbDataIterNext(pCheckInfo->iter); -// } -// return r1; -// } else if (r1 < r2 && ASCENDING_TRAVERSE(order)) { -// pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; -// return r1; -// } else { -// pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; -// return r2; -// } -// } - -// static bool moveToNextRowInMem(STableBlockScanInfo* pCheckInfo) { -// bool hasNext = false; -// if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) { -// if (pCheckInfo->iter != NULL) { -// hasNext = tsdbTbDataIterNext(pCheckInfo->iter); -// } - -// if (hasNext) { -// return hasNext; -// } - -// if (pCheckInfo->iiter != NULL) { -// return tsdbTbDataIterGet(pCheckInfo->iiter, NULL); -// } -// } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM) { -// if (pCheckInfo->iiter != NULL) { -// hasNext = tsdbTbDataIterNext(pCheckInfo->iiter); -// } - -// if (hasNext) { -// return hasNext; -// } - -// if (pCheckInfo->iter != NULL) { -// return tsdbTbDataIterGet(pCheckInfo->iter, NULL); -// } -// } else { -// if (pCheckInfo->iter != NULL) { -// hasNext = tsdbTbDataIterNext(pCheckInfo->iter); -// } -// if (pCheckInfo->iiter != NULL) { -// hasNext = tsdbTbDataIterNext(pCheckInfo->iiter) || hasNext; -// } -// } - -// return hasNext; -// } - // static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { // int32_t firstSlot = 0; // int32_t lastSlot = numOfBlocks - 1; @@ -602,18 +523,22 @@ _end: static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) { SArray* aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); + int64_t st = taosGetTimestampUs(); int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx, NULL); if (code != TSDB_CODE_SUCCESS) { goto _end; } - if (taosArrayGetSize(aBlockIdx) == 0) { + size_t num = taosArrayGetSize(aBlockIdx); + if (num == 0) { taosArrayClear(aBlockIdx); return TSDB_CODE_SUCCESS; } - SBlockIdx* pBlockIdx; - for (int32_t i = 0; i < taosArrayGetSize(aBlockIdx); ++i) { + int64_t et1 = taosGetTimestampUs(); + + SBlockIdx* pBlockIdx = NULL; + for (int32_t i = 0; i < num; ++i) { pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); // uid check @@ -627,17 +552,6 @@ static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, continue; } - // todo: not valid info in bockIndex - // time range check - // if (pBlockIdx->minKey > pReader->window.ekey || pBlockIdx->maxKey < pReader->window.skey) { - // continue; - // } - - // version check - // if (pBlockIdx->minVersion > pReader->verRange.maxVer || pBlockIdx->maxVersion < pReader->verRange.minVer) { - // continue; - // } - STableBlockScanInfo* pScanInfo = p; if (pScanInfo->pBlockList == NULL) { pScanInfo->pBlockList = taosArrayInit(16, sizeof(SBlock)); @@ -647,6 +561,9 @@ static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, taosArrayPush(pIndexList, pBlockIdx); } + int64_t et2 = taosGetTimestampUs(); + tsdbDebug("load block index for %d tables completed, elapsed time:%.2f ms, set blockIdx:%.2f ms, size:%d bytes %s", + (int32_t)num, (et1 - st)/1000.0, (et2-et1)/1000.0, num * sizeof(SBlockIdx), pReader->idStr); _end: taosArrayDestroy(aBlockIdx); return code; @@ -655,9 +572,11 @@ _end: static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_t* numOfValidTables, int32_t* numOfBlocks) { size_t numOfTables = taosArrayGetSize(pIndexList); - *numOfValidTables = 0; + int64_t st = taosGetTimestampUs(); + size_t size = 0; + STableBlockScanInfo* px = NULL; while (1) { px = taosHashIterate(pReader->status.pTableMap, px); @@ -675,6 +594,8 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_ tMapDataReset(&mapData); tsdbReadBlock(pReader->pFileReader, pBlockIdx, &mapData, NULL); + size += mapData.nData; + STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(int64_t)); for (int32_t j = 0; j < mapData.nItem; ++j) { SBlock block = {0}; @@ -706,6 +627,10 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_ } } + int64_t et = taosGetTimestampUs(); + tsdbDebug("load block of %d tables completed, blocks:%d in %d tables, size:%.2f Kb, elapsed time:%.2f ms %s", + numOfTables, *numOfBlocks, *numOfValidTables, size/1000.0, (et-st)/1000.0, pReader->idStr); + return TSDB_CODE_SUCCESS; } @@ -816,7 +741,6 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn return TSDB_CODE_SUCCESS; } -// todo consider the output buffer size static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { int64_t st = taosGetTimestampUs(); @@ -853,346 +777,6 @@ _error: return code; } -// static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { -// int firstPos, lastPos, midPos = -1; -// int numOfRows; -// TSKEY* keyList; - -// assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); - -// if (num <= 0) return -1; - -// keyList = (TSKEY*)pValue; -// firstPos = 0; -// lastPos = num - 1; - -// if (order == TSDB_ORDER_DESC) { -// // find the first position which is smaller than the key -// while (1) { -// if (key >= keyList[lastPos]) return lastPos; -// if (key == keyList[firstPos]) return firstPos; -// if (key < keyList[firstPos]) return firstPos - 1; - -// numOfRows = lastPos - firstPos + 1; -// midPos = (numOfRows >> 1) + firstPos; - -// if (key < keyList[midPos]) { -// lastPos = midPos - 1; -// } else if (key > keyList[midPos]) { -// firstPos = midPos + 1; -// } else { -// break; -// } -// } - -// } else { -// // find the first position which is bigger than the key -// while (1) { -// if (key <= keyList[firstPos]) return firstPos; -// if (key == keyList[lastPos]) return lastPos; - -// if (key > keyList[lastPos]) { -// lastPos = lastPos + 1; -// if (lastPos >= num) -// return -1; -// else -// return lastPos; -// } - -// numOfRows = lastPos - firstPos + 1; -// midPos = (numOfRows >> 1) + firstPos; - -// if (key < keyList[midPos]) { -// lastPos = midPos - 1; -// } else if (key > keyList[midPos]) { -// firstPos = midPos + 1; -// } else { -// break; -// } -// } -// } - -// return midPos; -// } - -// static void doCheckGeneratedBlockRange(STsdbReader* pTsdbReadHandle) { -// SQueryFilePos* cur = &pTsdbReadHandle->cur; - -// if (cur->rows > 0) { -// if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) { -// assert(cur->win.skey >= pTsdbReadHandle->window.skey && cur->win.ekey <= pTsdbReadHandle->window.ekey); -// } else { -// assert(cur->win.skey >= pTsdbReadHandle->window.ekey && cur->win.ekey <= pTsdbReadHandle->window.skey); -// } - -// SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, 0); -// assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && -// cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows - 1]); -// } else { -// cur->win = pTsdbReadHandle->window; - -// int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1; -// cur->lastKey = pTsdbReadHandle->window.ekey + step; -// } -// } - -// static void copyAllRemainRowsFromFileBlock(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo, -// SDataBlockInfo* pBlockInfo, int32_t endPos) { -// SQueryFilePos* cur = &pTsdbReadHandle->cur; - -// SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0]; -// TSKEY* tsArray = pCols->cols[0].pData; - -// bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order); - -// int32_t step = ascScan ? 1 : -1; - -// int32_t start = cur->pos; -// int32_t end = endPos; - -// if (!ascScan) { -// TSWAP(start, end); -// } - -// assert(pTsdbReadHandle->outputCapacity >= (end - start + 1)); -// int32_t numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, 0, start, end); - -// // the time window should always be ascending order: skey <= ekey -// cur->win = (STimeWindow){.skey = tsArray[start], .ekey = tsArray[end]}; -// cur->mixBlock = (numOfRows != pBlockInfo->rows); -// cur->lastKey = tsArray[endPos] + step; -// cur->blockCompleted = (ascScan ? (endPos == pBlockInfo->rows - 1) : (endPos == 0)); - -// // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases. -// int32_t pos = endPos + step; -// updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos); -// doCheckGeneratedBlockRange(pTsdbReadHandle); - -// tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s", -// pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows, -// pTsdbReadHandle->idStr); -// } - -// // only return the qualified data to client in terms of query time window, data rows in the same block but do not -// // be included in the query time window will be discarded -// static void doMergeTwoLevelData(STsdbReader* pTsdbReadHandle, STableBlockScanInfo* pCheckInfo, SBlock* pBlock) { -// SQueryFilePos* cur = &pTsdbReadHandle->cur; -// SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); -// STsdbCfg* pCfg = REPO_CFG(pTsdbReadHandle->pTsdb); - -// initTableMemIterator(pTsdbReadHandle, pCheckInfo); - -// SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0]; -// assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_ID && -// cur->pos >= 0 && cur->pos < pBlock->numOfRows); -// // Even Multi-Version supported, the records with duplicated TSKEY would be merged inside of tsdbLoadData -// interface. TSKEY* tsArray = pCols->cols[0].pData; assert(pCols->numOfRows == pBlock->numOfRows && tsArray[0] == -// pBlock->minKey.ts && -// tsArray[pBlock->numOfRows - 1] == pBlock->maxKey.ts); - -// bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order); -// int32_t step = ascScan ? 1 : -1; - -// // for search the endPos, so the order needs to reverse -// int32_t order = ascScan ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; - -// int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pTsdbReadHandle)); -// int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &blockInfo); - -// STimeWindow* pWin = &blockInfo.window; -// tsdbDebug("%p uid:%" PRIu64 " start merge data block, file block range:%" PRIu64 "-%" PRIu64 -// " rows:%d, start:%d, end:%d, %s", -// pTsdbReadHandle, pCheckInfo->tableId, pWin->skey, pWin->ekey, blockInfo.rows, cur->pos, endPos, -// pTsdbReadHandle->idStr); - -// // compared with the data from in-memory buffer, to generate the correct timestamp array list -// int32_t numOfRows = 0; -// int32_t curRow = 0; - -// int16_t rv1 = -1; -// int16_t rv2 = -1; -// STSchema* pSchema1 = NULL; -// STSchema* pSchema2 = NULL; - -// int32_t pos = cur->pos; -// cur->win = TSWINDOW_INITIALIZER; -// bool adjustPos = false; - -// // no data in buffer, load data from file directly -// if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) { -// copyAllRemainRowsFromFileBlock(pTsdbReadHandle, pCheckInfo, &blockInfo, endPos); -// return; -// } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) { -// SSkipListNode* node = NULL; -// TSKEY lastKeyAppend = TSKEY_INITIAL_VAL; - -// do { -// STSRow* row2 = NULL; -// STSRow* row1 = getSRowInTableMem(pCheckInfo, pTsdbReadHandle->order, pCfg->update, &row2, TD_VER_MAX); -// if (row1 == NULL) { -// break; -// } - -// TSKEY key = TD_ROW_KEY(row1); -// if ((key > pTsdbReadHandle->window.ekey && ascScan) || (key < pTsdbReadHandle->window.ekey && !ascScan)) { -// break; -// } - -// if (adjustPos) { -// if (key == lastKeyAppend) { -// pos -= step; -// } -// adjustPos = false; -// } - -// if (((pos > endPos || tsArray[pos] > pTsdbReadHandle->window.ekey) && ascScan) || -// ((pos < endPos || tsArray[pos] < pTsdbReadHandle->window.ekey) && !ascScan)) { -// break; -// } - -// if ((key < tsArray[pos] && ascScan) || (key > tsArray[pos] && !ascScan)) { -// if (rv1 != TD_ROW_SVER(row1)) { -// // pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1)); -// rv1 = TD_ROW_SVER(row1); -// } -// if (row2 && rv2 != TD_ROW_SVER(row2)) { -// // pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2)); -// rv2 = TD_ROW_SVER(row2); -// } - -// numOfRows += -// mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols, -// pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend); -// if (cur->win.skey == TSKEY_INITIAL_VAL) { -// cur->win.skey = key; -// } - -// cur->win.ekey = key; -// cur->lastKey = key + step; -// cur->mixBlock = true; -// moveToNextRowInMem(pCheckInfo); -// } else if (key == tsArray[pos]) { // data in buffer has the same timestamp of data in file block, ignore it -// if (TD_SUPPORT_UPDATE(pCfg->update)) { -// if (lastKeyAppend != key) { -// if (lastKeyAppend != TSKEY_INITIAL_VAL) { -// ++curRow; -// } -// lastKeyAppend = key; -// } -// // load data from file firstly -// numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, pos, pos); - -// if (rv1 != TD_ROW_SVER(row1)) { -// rv1 = TD_ROW_SVER(row1); -// } -// if (row2 && rv2 != TD_ROW_SVER(row2)) { -// rv2 = TD_ROW_SVER(row2); -// } - -// // still assign data into current row -// numOfRows += -// mergeTwoRowFromMem(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, &curRow, row1, row2, numOfCols, -// pCheckInfo->tableId, pSchema1, pSchema2, pCfg->update, &lastKeyAppend); - -// if (cur->win.skey == TSKEY_INITIAL_VAL) { -// cur->win.skey = key; -// } - -// cur->win.ekey = key; -// cur->lastKey = key + step; -// cur->mixBlock = true; - -// moveToNextRowInMem(pCheckInfo); - -// pos += step; -// adjustPos = true; -// } else { -// // discard the memory record -// moveToNextRowInMem(pCheckInfo); -// } -// } else if ((key > tsArray[pos] && ascScan) || (key < tsArray[pos] && !ascScan)) { -// if (cur->win.skey == TSKEY_INITIAL_VAL) { -// cur->win.skey = tsArray[pos]; -// } - -// int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, key, order); -// assert(end != -1); - -// if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it -// #if 0 -// if (pCfg->update == TD_ROW_DISCARD_UPDATE) { -// moveToNextRowInMem(pCheckInfo); -// } else { -// end -= step; -// } -// #endif -// if (!TD_SUPPORT_UPDATE(pCfg->update)) { -// moveToNextRowInMem(pCheckInfo); -// } else { -// end -= step; -// } -// } - -// int32_t qstart = 0, qend = 0; -// getQualifiedRowsPos(pTsdbReadHandle, pos, end, numOfRows, &qstart, &qend); - -// if ((lastKeyAppend != TSKEY_INITIAL_VAL) && (lastKeyAppend != (ascScan ? tsArray[qstart] : tsArray[qend]))) { -// ++curRow; -// } - -// numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, curRow, qstart, qend); -// pos += (qend - qstart + 1) * step; -// if (numOfRows > 0) { -// curRow = numOfRows - 1; -// } - -// cur->win.ekey = ascScan ? tsArray[qend] : tsArray[qstart]; -// cur->lastKey = cur->win.ekey + step; -// lastKeyAppend = cur->win.ekey; -// } -// } while (numOfRows < pTsdbReadHandle->outputCapacity); - -// if (numOfRows < pTsdbReadHandle->outputCapacity) { -// /** -// * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT -// * copy them all to result buffer, since it may be overlapped with file data block. -// */ -// if (node == NULL || ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) > pTsdbReadHandle->window.ekey) && ascScan) -// || -// ((TD_ROW_KEY((STSRow*)SL_GET_NODE_DATA(node)) < pTsdbReadHandle->window.ekey) && !ascScan)) { -// // no data in cache or data in cache is greater than the ekey of time window, load data from file block -// if (cur->win.skey == TSKEY_INITIAL_VAL) { -// cur->win.skey = tsArray[pos]; -// } - -// int32_t start = -1, end = -1; -// getQualifiedRowsPos(pTsdbReadHandle, pos, endPos, numOfRows, &start, &end); - -// numOfRows = doCopyRowsFromFileBlock(pTsdbReadHandle, pTsdbReadHandle->outputCapacity, numOfRows, start, end); -// pos += (end - start + 1) * step; - -// cur->win.ekey = ascScan ? tsArray[end] : tsArray[start]; -// cur->lastKey = cur->win.ekey + step; -// cur->mixBlock = true; -// } -// } -// } - -// cur->blockCompleted = (((pos > endPos || cur->lastKey > pTsdbReadHandle->window.ekey) && ascScan) || -// ((pos < endPos || cur->lastKey < pTsdbReadHandle->window.ekey) && !ascScan)); - -// if (!ascScan) { -// TSWAP(cur->win.skey, cur->win.ekey); -// } - -// updateInfoAfterMerge(pTsdbReadHandle, pCheckInfo, numOfRows, pos); -// doCheckGeneratedBlockRange(pTsdbReadHandle); - -// tsdbDebug("%p uid:%" PRIu64 ", data block created, mixblock:%d, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s", -// pTsdbReadHandle, pCheckInfo->tableId, cur->mixBlock, cur->win.skey, cur->win.ekey, cur->rows, -// pTsdbReadHandle->idStr); -// } - static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { taosMemoryFreeClear(pSup->numOfBlocksPerTable); taosMemoryFreeClear(pSup->indexPerTable); @@ -1252,8 +836,9 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte // access data blocks according to the offset of each block in asc/desc order. int32_t numOfTables = (int32_t)taosHashGetSize(pReader->status.pTableMap); - SBlockOrderSupporter sup = {0}; + int64_t st = taosGetTimestampUs(); + SBlockOrderSupporter sup = {0}; int32_t code = initBlockOrderSupporter(&sup, numOfTables); if (code != TSDB_CODE_SUCCESS) { return code; @@ -1302,11 +887,12 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i}; taosArrayPush(pBlockIter->blockList, &blockInfo); } - tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted %s", pReader, cnt, - pReader->idStr); + + int64_t et = taosGetTimestampUs(); + tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", pReader, cnt, + (et - st)/1000.0, pReader->idStr); pBlockIter->index = asc ? 0 : (numOfBlocks - 1); - cleanupBlockOrderSupporter(&sup); return TSDB_CODE_SUCCESS; } @@ -1340,7 +926,8 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); } - tsdbDebug("%p %d data blocks sort completed, %s", pReader, cnt, pReader->idStr); + int64_t et = taosGetTimestampUs(); + tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, cnt, (et-st)/1000.0, pReader->idStr); cleanupBlockOrderSupporter(&sup); taosMemoryFree(pTree); @@ -1813,6 +1400,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* SBlockData* pBlockData = &pReader->status.fileBlockData; int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; + int64_t st = taosGetTimestampUs(); + while (1) { // todo check the validate of row in file block { @@ -1851,10 +1440,11 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* blockDataUpdateTsWindow(pResBlock, 0); setComposedBlockFlag(pReader, true); + int64_t et = taosGetTimestampUs(); - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, %s", pReader, + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, elapsed time:%.2f ms %s", pReader, pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows, - pReader->idStr); + (et - st)/1000.0, pReader->idStr); return TSDB_CODE_SUCCESS; } @@ -2031,7 +1621,9 @@ static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pRead static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) { SReaderStatus* pStatus = &pReader->status; - SArray* pIndexList = taosArrayInit(4, sizeof(SBlockIdx)); + + size_t numOfTables = taosHashGetSize(pReader->status.pTableMap); + SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBlockIdx)); while (1) { bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader); @@ -2799,24 +2391,57 @@ int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) { // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader, const char* idstr) { - int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, idstr); - if (code) { + int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, 4096, idstr); + if (code != TSDB_CODE_SUCCESS) { goto _err; } - if (pCond->suid != 0) { - (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, (*ppReader)->suid, -1); - } else if (taosArrayGetSize(pTableList) > 0) { - STableKeyInfo* pKey = taosArrayGet(pTableList, 0); - (*ppReader)->pSchema = metaGetTbTSchema((*ppReader)->pTsdb->pVnode->pMeta, pKey->uid, -1); - } - + // check for query time window STsdbReader* pReader = *ppReader; if (isEmptyQueryTimeWindow(&pReader->window)) { tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr); return TSDB_CODE_SUCCESS; } + if (pCond->type == TIMEWINDOW_RANGE_EXTERNAL) { + // update the SQueryTableDataCond to create inner reader + STimeWindow w = pCond->twindows; + int32_t order = pCond->order; + if (order == TSDB_ORDER_ASC) { + pCond->twindows.ekey = pCond->twindows.skey; + pCond->twindows.skey = INT64_MIN; + pCond->order = TSDB_ORDER_DESC; + } else { + pCond->twindows.skey = pCond->twindows.ekey; + pCond->twindows.ekey = INT64_MAX; + pCond->order = TSDB_ORDER_ASC; + } + + code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[0], 1, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (order == TSDB_ORDER_ASC) { + pCond->twindows.skey = w.ekey; + pCond->twindows.ekey = INT64_MAX; + } else { + pCond->twindows.skey = INT64_MIN; + pCond->twindows.ekey = w.ekey; + } + code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[1], 1, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + } + + if (pCond->suid != 0) { + pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->suid, -1); + } else if (taosArrayGetSize(pTableList) > 0) { + STableKeyInfo* pKey = taosArrayGet(pTableList, 0); + pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, -1); + } + int32_t numOfTables = taosArrayGetSize(pTableList); pReader->status.pTableMap = createDataBlockScanInfo(pReader, pTableList->pData, numOfTables); if (pReader->status.pTableMap == NULL) { @@ -2827,21 +2452,41 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl goto _err; } - SDataBlockIter* pBlockIter = &pReader->status.blockIter; - code = tsdbTakeReadSnap(pReader->pTsdb, &pReader->pReadSnap); - if (code) goto _err; + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); - resetDataBlockIterator(&pReader->status.blockIter, pReader->order); + if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) { + SDataBlockIter* pBlockIter = &pReader->status.blockIter; - // no data in files, let's try buffer in memory - if (pReader->status.fileIter.numOfFiles == 0) { - pReader->status.loadFromFile = false; + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); + resetDataBlockIterator(&pReader->status.blockIter, pReader->order); + + // no data in files, let's try buffer in memory + if (pReader->status.fileIter.numOfFiles == 0) { + pReader->status.loadFromFile = false; + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - if (code != TSDB_CODE_SUCCESS) { - return code; + STsdbReader* pPrevReader = pReader->innerReader[0]; + SDataBlockIter* pBlockIter = &pPrevReader->status.blockIter; + + initFilesetIterator(&pPrevReader->status.fileIter, pPrevReader->pReadSnap->fs.aDFileSet, pPrevReader->order, pPrevReader->idStr); + resetDataBlockIterator(&pPrevReader->status.blockIter, pPrevReader->order); + + // no data in files, let's try buffer in memory + if (pPrevReader->status.fileIter.numOfFiles == 0) { + pPrevReader->status.loadFromFile = false; + } else { + code = initForFirstBlockInFile(pPrevReader, pBlockIter); + if (code != TSDB_CODE_SUCCESS) { + return code; + } } } @@ -2881,20 +2526,6 @@ void tsdbReaderClose(STsdbReader* pReader) { tsdbDataFReaderClose(&pReader->pFileReader); } -#if 0 -// if (pReader->status.pTableScanInfo != NULL) { -// pReader->status.pTableScanInfo = destroyTableCheckInfo(pReader->status.pTableScanInfo); -// } - -// tsdbDestroyReadH(&pReader->rhelper); - -// tdFreeDataCols(pReader->pDataCols); -// pReader->pDataCols = NULL; -// -// pReader->prev = doFreeColumnInfoData(pReader->prev); -// pReader->next = doFreeColumnInfoData(pReader->next); -#endif - SIOCostSummary* pCost = &pReader->cost; tsdbDebug("%p :io-cost summary: head-file read cnt:%" PRIu64 ", head-file time:%" PRIu64 " us, statis-info:%" PRId64 @@ -2907,55 +2538,100 @@ void tsdbReaderClose(STsdbReader* pReader) { taosMemoryFreeClear(pReader); } -bool tsdbNextDataBlock(STsdbReader* pReader) { - if (isEmptyQueryTimeWindow(&pReader->window)) { - return false; - } - +static bool doTsdbNextDataBlock(STsdbReader* pReader) { // cleanup the data that belongs to the previous data block SSDataBlock* pBlock = pReader->pResBlock; blockDataCleanup(pBlock); int64_t stime = taosGetTimestampUs(); - int64_t elapsedTime = stime; SReaderStatus* pStatus = &pReader->status; - if (pReader->type == BLOCK_LOAD_OFFSET_ORDER) { - if (pStatus->loadFromFile) { - int32_t code = buildBlockFromFiles(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } + if (pStatus->loadFromFile) { + int32_t code = buildBlockFromFiles(pReader); + if (code != TSDB_CODE_SUCCESS) { + return false; + } - if (pBlock->info.rows > 0) { - return true; - } else { - buildBlockFromBufferSequentially(pReader); - return pBlock->info.rows > 0; - } - } else { // no data in files, let's try the buffer + if (pBlock->info.rows > 0) { + return true; + } else { buildBlockFromBufferSequentially(pReader); return pBlock->info.rows > 0; } - } else if (pReader->type == BLOCK_LOAD_TABLESEQ_ORDER) { - } else if (pReader->type == BLOCK_LOAD_EXTERN_ORDER) { - } else { - ASSERT(0); + } else { // no data in files, let's try the buffer + buildBlockFromBufferSequentially(pReader); + return pBlock->info.rows > 0; } + return false; } -void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) { +bool tsdbNextDataBlock(STsdbReader* pReader) { + if (isEmptyQueryTimeWindow(&pReader->window)) { + return false; + } + + if (pReader->innerReader[0] != NULL) { + bool ret = doTsdbNextDataBlock(pReader->innerReader[0]); + if (ret) { + pReader->step = EXTERNAL_ROWS_PREV; + return ret; + } + + tsdbReaderClose(pReader->innerReader[0]); + pReader->innerReader[0] = NULL; + } + + pReader->step = EXTERNAL_ROWS_MAIN; + bool ret = doTsdbNextDataBlock(pReader); + if (ret) { + return ret; + } + + if (pReader->innerReader[1] != NULL) { + bool ret1 = doTsdbNextDataBlock(pReader->innerReader[1]); + if (ret1) { + pReader->step = EXTERNAL_ROWS_NEXT; + return ret1; + } + + tsdbReaderClose(pReader->innerReader[1]); + pReader->innerReader[1] = NULL; + } + + return false; +} + +static void setBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) { ASSERT(pDataBlockInfo != NULL && pReader != NULL); pDataBlockInfo->rows = pReader->pResBlock->info.rows; pDataBlockInfo->uid = pReader->pResBlock->info.uid; pDataBlockInfo->window = pReader->pResBlock->info.window; } +void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockInfo) { + if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + if (pReader->step == EXTERNAL_ROWS_MAIN) { + setBlockInfo(pReader, pDataBlockInfo); + } else if (pReader->step == EXTERNAL_ROWS_PREV) { + setBlockInfo(pReader->innerReader[0], pDataBlockInfo); + } else { + setBlockInfo(pReader->innerReader[1], pDataBlockInfo); + } + } else { + setBlockInfo(pReader, pDataBlockInfo); + } +} + int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { int32_t code = 0; *allHave = false; + if(pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + *pBlockStatis = NULL; + return TSDB_CODE_SUCCESS; + } + // there is no statistics data for composed block if (pReader->status.composedDataBlock) { *pBlockStatis = NULL; @@ -3025,7 +2701,7 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockS return code; } -SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { +static SArray* doRetrieveDataBlock(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; if (pStatus->composedDataBlock) { @@ -3054,16 +2730,27 @@ SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { return pReader->pResBlock->pDataBlock; } +SArray* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { + if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + if (pReader->step == EXTERNAL_ROWS_PREV) { + return doRetrieveDataBlock(pReader->innerReader[0]); + } else if (pReader->step == EXTERNAL_ROWS_NEXT) { + return doRetrieveDataBlock(pReader->innerReader[1]); + } + } + + return doRetrieveDataBlock(pReader); +} + int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { if (isEmptyQueryTimeWindow(&pReader->window)) { return TSDB_CODE_SUCCESS; } pReader->order = pCond->order; - pReader->type = BLOCK_LOAD_OFFSET_ORDER; + pReader->type = TIMEWINDOW_RANGE_CONTAINED; pReader->status.loadFromFile = true; pReader->status.pTableIter = NULL; - pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); // allocate buffer in order to load data blocks from file @@ -3073,10 +2760,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; tsdbDataFReaderClose(&pReader->pFileReader); - // todo set the correct numOfTables - int32_t numOfTables = 1; - SDataBlockIter* pBlockIter = &pReader->status.blockIter; - + int32_t numOfTables = taosHashGetSize(pReader->status.pTableMap); tsdbDataFReaderClose(&pReader->pFileReader); initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); @@ -3084,18 +2768,23 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { resetDataBlockScanInfo(pReader->status.pTableMap); int32_t code = 0; + SDataBlockIter* pBlockIter = &pReader->status.blockIter; + // no data in files, let's try buffer in memory if (pReader->status.fileIter.numOfFiles == 0) { pReader->status.loadFromFile = false; } else { code = initForFirstBlockInFile(pReader, pBlockIter); if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", + pReader, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr); return code; } } tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader, pReader->suid, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr); + return code; } @@ -3186,7 +2875,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { STbData* d = NULL; if (pReader->pTsdb->mem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pTsdb->mem, pReader->suid, pBlockScanInfo->uid, &d); + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); if (d != NULL) { rows += tsdbGetNRowsInTbData(d); } @@ -3194,7 +2883,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { STbData* di = NULL; if (pReader->pTsdb->imem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pTsdb->imem, pReader->suid, pBlockScanInfo->uid, &di); + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); if (di != NULL) { rows += tsdbGetNRowsInTbData(di); } diff --git a/source/libs/command/src/explain.c b/source/libs/command/src/explain.c index 9ffdfc2289..266f96b41e 100644 --- a/source/libs/command/src/explain.c +++ b/source/libs/command/src/explain.c @@ -135,7 +135,7 @@ int32_t qExplainGenerateResChildren(SPhysiNode *pNode, SExplainGroup *group, SNo break; } case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: { - SJoinPhysiNode *pJoinNode = (SJoinPhysiNode *)pNode; + SSortMergeJoinPhysiNode *pJoinNode = (SSortMergeJoinPhysiNode *)pNode; pPhysiChildren = pJoinNode->node.pChildren; break; } @@ -434,7 +434,8 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: { STableScanPhysiNode *pTblScanNode = (STableScanPhysiNode *)pNode; EXPLAIN_ROW_NEW(level, - QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == pNode->type ? EXPLAIN_TBL_MERGE_SCAN_FORMAT : EXPLAIN_TBL_SCAN_FORMAT, + QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == pNode->type ? EXPLAIN_TBL_MERGE_SCAN_FORMAT + : EXPLAIN_TBL_SCAN_FORMAT, pTblScanNode->scan.tableName.tname); EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT); if (pResNode->pExecInfo) { @@ -551,7 +552,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); if (pSTblScanNode->scan.pScanPseudoCols) { EXPLAIN_ROW_APPEND(EXPLAIN_PSEUDO_COLUMNS_FORMAT, pSTblScanNode->scan.pScanPseudoCols->length); - EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); } EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pSTblScanNode->scan.node.pOutputDataBlockDesc->totalRowSize); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); @@ -613,7 +614,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i break; } case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: { - SJoinPhysiNode *pJoinNode = (SJoinPhysiNode *)pNode; + SSortMergeJoinPhysiNode *pJoinNode = (SSortMergeJoinPhysiNode *)pNode; EXPLAIN_ROW_NEW(level, EXPLAIN_JOIN_FORMAT, EXPLAIN_JOIN_STRING(pJoinNode->joinType)); EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT); if (pResNode->pExecInfo) { @@ -1180,7 +1181,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); if (pDistScanNode->pScanPseudoCols) { EXPLAIN_ROW_APPEND(EXPLAIN_PSEUDO_COLUMNS_FORMAT, pDistScanNode->pScanPseudoCols->length); - EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); } EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pDistScanNode->node.pOutputDataBlockDesc->totalRowSize); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); @@ -1367,7 +1368,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_FUNCTIONS_FORMAT, pInterpNode->pFuncs->length); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); } - + EXPLAIN_ROW_APPEND(EXPLAIN_MODE_FORMAT, nodesGetFillModeString(pInterpNode->fillMode)); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); @@ -1419,7 +1420,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i } } break; - } + } default: qError("not supported physical node type %d", pNode->type); return TSDB_CODE_QRY_APP_ERROR; diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 23732a6f9a..be97b20455 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -82,8 +82,6 @@ size_t getResultRowSize(struct SqlFunctionCtx* pCtx, int32_t numOfOutput); void initResultRowInfo(SResultRowInfo* pResultRowInfo); void cleanupResultRowInfo(SResultRowInfo* pResultRowInfo); -void closeAllResultRows(SResultRowInfo* pResultRowInfo); - void initResultRow(SResultRow* pResultRow); void closeResultRow(SResultRow* pResultRow); bool isResultRowClosed(SResultRow* pResultRow); @@ -96,6 +94,11 @@ static FORCE_INLINE SResultRow* getResultRowByPos(SDiskbasedBuf* pBuf, SResultRo return pRow; } +static FORCE_INLINE void setResultBufPageDirty(SDiskbasedBuf* pBuf, SResultRowPosition* pos) { + void* pPage = getBufPage(pBuf, pos->pageId); + setBufPageDirty(pPage, true); +} + void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SHashObj* pHashmap, int32_t order); void cleanupGroupResInfo(SGroupResInfo* pGroupResInfo); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 1ad17bbc76..0beb6f1784 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -108,7 +108,6 @@ typedef struct STaskCostInfo { SFileBlockLoadRecorder* pRecoder; uint64_t elapsedTime; - uint64_t firstStageMergeTime; uint64_t winInfoSize; uint64_t tableInfoSize; uint64_t hashSize; @@ -321,6 +320,47 @@ typedef struct STableScanInfo { int8_t noTable; } STableScanInfo; +typedef struct STableMergeScanInfo { + STableListInfo* tableListInfo; + int32_t tableStartIndex; + int32_t tableEndIndex; + bool hasGroupId; + uint64_t groupId; + SArray* dataReaders; // array of tsdbReaderT* + SReadHandle readHandle; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SArray* pSortInfo; + SSortHandle* pSortHandle; + + SSDataBlock* pSortInputBlock; + int64_t startTs; // sort start time + SArray* sortSourceParams; + + SFileBlockLoadRecorder readRecorder; + int64_t numOfRows; + SScanInfo scanInfo; + int32_t scanTimes; + SNode* pFilterNode; // filter info, which is push down by optimizer + SqlFunctionCtx* pCtx; // which belongs to the direct upstream operator operator query context + SResultRowInfo* pResultRowInfo; + int32_t* rowEntryInfoOffset; + SExprInfo* pExpr; + SSDataBlock* pResBlock; + SArray* pColMatchInfo; + int32_t numOfOutput; + + SExprSupp pseudoSup; + + SQueryTableDataCond cond; + int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan + int32_t dataBlockLoadFlag; + // if the upstream is an interval operator, the interval info is also kept here to get the time + // window to check if current data block needs to be loaded. + SInterval interval; + SSampleExecInfo sample; // sample execution info +} STableMergeScanInfo; + typedef struct STagScanInfo { SColumnInfo *pCols; SSDataBlock *pRes; @@ -352,6 +392,11 @@ typedef enum EStreamScanMode { STREAM_SCAN_FROM_DATAREADER_RANGE, } EStreamScanMode; +enum { + PROJECT_RETRIEVE_CONTINUE = 0x1, + PROJECT_RETRIEVE_DONE = 0x2, +}; + typedef struct SCatchSupporter { SHashObj* pWindowHashTable; // quick locate the window object for each window SDiskbasedBuf* pDataBuf; // buffer based on blocked-wised disk file @@ -549,6 +594,7 @@ typedef struct SProjectOperatorInfo { SLimitInfo limitInfo; bool mergeDataBlocks; SSDataBlock* pFinalRes; + SNode* pCondition; } SProjectOperatorInfo; typedef struct SIndefOperatorInfo { @@ -881,7 +927,7 @@ SOperatorInfo* createPartitionOperatorInfo(SOperatorInfo* downstream, SPartition SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SJoinPhysiNode* pJoinNode, +SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index a76253ab20..ec8e3c4abb 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -43,10 +43,6 @@ void cleanupResultRowInfo(SResultRowInfo* pResultRowInfo) { } } -void closeAllResultRows(SResultRowInfo* pResultRowInfo) { - // do nothing -} - bool isResultRowClosed(SResultRow* pRow) { return (pRow->closed == true); } void closeResultRow(SResultRow* pResultRow) { pResultRow->closed = true; } @@ -160,11 +156,13 @@ int32_t getNumOfTotalRes(SGroupResInfo* pGroupResInfo) { SArray* createSortInfo(SNodeList* pNodeList) { size_t numOfCols = 0; + if (pNodeList != NULL) { numOfCols = LIST_LENGTH(pNodeList); } else { numOfCols = 0; } + SArray* pList = taosArrayInit(numOfCols, sizeof(SBlockOrderInfo)); if (pList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -196,10 +194,6 @@ SSDataBlock* createResDataBlock(SDataBlockDescNode* pNode) { for (int32_t i = 0; i < numOfCols; ++i) { SSlotDescNode* pDescNode = (SSlotDescNode*)nodesListGetNode(pNode->pSlots, i); - /*if (!pDescNode->output) { // todo disable it temporarily*/ - /*continue;*/ - /*}*/ - SColumnInfoData idata = createColumnInfoData(pDescNode->dataType.type, pDescNode->dataType.bytes, pDescNode->slotId); idata.info.scale = pDescNode->dataType.scale; @@ -701,9 +695,6 @@ static int32_t setSelectValueColumnInfo(SqlFunctionCtx* pCtx, int32_t numOfOutpu } } -#ifdef BUF_PAGE_DEBUG - qDebug("page_setSelect num:%d", num); -#endif if (p != NULL) { p->subsidiaries.pCtx = pValCtx; p->subsidiaries.num = num; @@ -852,7 +843,7 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi // TODO: get it from stable scan node pCond->twindows = pTableScanNode->scanRange; pCond->suid = pTableScanNode->scan.suid; - pCond->type = BLOCK_LOAD_OFFSET_ORDER; + pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = -1; // pCond->type = pTableScanNode->scanFlag; @@ -947,6 +938,7 @@ STimeWindow getFirstQualifiedTimeWindow(int64_t ts, STimeWindow* pWindow, SInter } // get the correct time window according to the handled timestamp +// todo refactor STimeWindow getActiveTimeWindow(SDiskbasedBuf* pBuf, SResultRowInfo* pResultRowInfo, int64_t ts, SInterval* pInterval, int32_t order) { STimeWindow w = {0}; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 7bac828a53..36ae1d19ec 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -42,11 +42,6 @@ #define GET_FORWARD_DIRECTION_FACTOR(ord) (((ord) == TSDB_ORDER_ASC) ? QUERY_ASC_FORWARD_STEP : QUERY_DESC_FORWARD_STEP) -enum { - PROJECT_RETRIEVE_CONTINUE = 0x1, - PROJECT_RETRIEVE_DONE = 0x2, -}; - #if 0 static UNUSED_FUNC void *u_malloc (size_t __size) { uint32_t v = taosRand(); @@ -575,6 +570,26 @@ static void setPseudoOutputColInfo(SSDataBlock* pResult, SqlFunctionCtx* pCtx, S int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBlock* pSrcBlock, SqlFunctionCtx* pCtx, int32_t numOfOutput, SArray* pPseudoList) { setPseudoOutputColInfo(pResult, pCtx, pPseudoList); + + if (pSrcBlock == NULL) { + for (int32_t k = 0; k < numOfOutput; ++k) { + int32_t outputSlotId = pExpr[k].base.resSchema.slotId; + + ASSERT(pExpr[k].pExpr->nodeType == QUERY_NODE_VALUE); + SColumnInfoData* pColInfoData = taosArrayGet(pResult->pDataBlock, outputSlotId); + + int32_t type = pExpr[k].base.pParam[0].param.nType; + if (TSDB_DATA_TYPE_NULL == type) { + colDataAppendNNULL(pColInfoData, 0, 1); + } else { + colDataAppend(pColInfoData, 0, taosVariantGet(&pExpr[k].base.pParam[0].param, type), false); + } + } + + pResult->info.rows = 1; + return TSDB_CODE_SUCCESS; + } + pResult->info.groupId = pSrcBlock->info.groupId; // if the source equals to the destination, it is to create a new column as the result of scalar @@ -1243,52 +1258,6 @@ void initResultRow(SResultRow* pResultRow) { // pResultRow->pEntryInfo = (struct SResultRowEntryInfo*)((char*)pResultRow + sizeof(SResultRow)); } -/* - * The start of each column SResultRowEntryInfo is denote by RowCellInfoOffset. - * Note that in case of top/bottom query, the whole multiple rows of result is treated as only one row of results. - * +------------+-----------------result column 1------------+------------------result column 2-----------+ - * | SResultRow | SResultRowEntryInfo | intermediate buffer1 | SResultRowEntryInfo | intermediate buffer 2| - * +------------+--------------------------------------------+--------------------------------------------+ - * offset[0] offset[1] offset[2] - */ -// TODO refactor: some function move away -void setFunctionResultOutput(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, SAggSupporter* pSup, int32_t stage, - int32_t numOfExprs) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SqlFunctionCtx* pCtx = pOperator->exprSupp.pCtx; - int32_t* rowEntryInfoOffset = pOperator->exprSupp.rowEntryInfoOffset; - - SResultRowInfo* pResultRowInfo = &pInfo->resultRowInfo; - initResultRowInfo(pResultRowInfo); - - int64_t tid = 0; - int64_t groupId = 0; - SResultRow* pRow = doSetResultOutBufByKey(pSup->pResultBuf, pResultRowInfo, (char*)&tid, sizeof(tid), true, groupId, - pTaskInfo, false, pSup); - - for (int32_t i = 0; i < numOfExprs; ++i) { - struct SResultRowEntryInfo* pEntry = getResultEntryInfo(pRow, i, rowEntryInfoOffset); - cleanupResultRowEntry(pEntry); - - pCtx[i].resultInfo = pEntry; - pCtx[i].scanFlag = stage; - } - - initCtxOutputBuffer(pCtx, numOfExprs); -} - -void initCtxOutputBuffer(SqlFunctionCtx* pCtx, int32_t size) { - for (int32_t j = 0; j < size; ++j) { - struct SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[j]); - if (isRowEntryInitialized(pResInfo) || fmIsPseudoColumnFunc(pCtx[j].functionId) || pCtx[j].functionId == -1 || - fmIsScalarFunc(pCtx[j].functionId)) { - continue; - } - - pCtx[j].fpSet.init(&pCtx[j], pCtx[j].resultInfo); - } -} - void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status) { if (status == TASK_NOT_COMPLETED) { pTaskInfo->status = status; @@ -1356,7 +1325,7 @@ void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock, const SArray* pColM extractQualifiedTupleByFilterResult(pBlock, rowRes, keep); if (pColMatchInfo != NULL) { - for(int32_t i = 0; i < taosArrayGetSize(pColMatchInfo); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pColMatchInfo); ++i) { SColMatchInfo* pInfo = taosArrayGet(pColMatchInfo, i); if (pInfo->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, pInfo->targetSlotId); @@ -1665,9 +1634,6 @@ void queryCostStatis(SExecTaskInfo* pTaskInfo) { // hashSize += taosHashGetMemSize(pRuntimeEnv->tableqinfoGroupInfo.map); // pSummary->hashSize = hashSize; - // add the merge time - pSummary->elapsedTime += pSummary->firstStageMergeTime; - // SResultRowPool* p = pTaskInfo->pool; // if (p != NULL) { // pSummary->winInfoSize = getResultRowPoolMemSize(p); @@ -1676,17 +1642,16 @@ void queryCostStatis(SExecTaskInfo* pTaskInfo) { // pSummary->winInfoSize = 0; // pSummary->numOfTimeWindows = 0; // } - // - // calculateOperatorProfResults(pQInfo); SFileBlockLoadRecorder* pRecorder = pSummary->pRecoder; if (pSummary->pRecoder != NULL) { - qDebug("%s :cost summary: elapsed time:%" PRId64 " us, first merge:%" PRId64 - " us, total blocks:%d, " - "load block statis:%d, load data block:%d, total rows:%" PRId64 ", check rows:%" PRId64, - GET_TASKID(pTaskInfo), pSummary->elapsedTime, pSummary->firstStageMergeTime, pRecorder->totalBlocks, - pRecorder->loadBlockStatis, pRecorder->loadBlocks, pRecorder->totalRows, pRecorder->totalCheckedRows); + qDebug( + "%s :cost summary: elapsed time:%.2f ms, total blocks:%d, load block SMA:%d, load data block:%d, total " + "rows:%" PRId64 ", check rows:%" PRId64, + GET_TASKID(pTaskInfo), pSummary->elapsedTime / 1000.0, pRecorder->totalBlocks, pRecorder->loadBlockStatis, + pRecorder->loadBlocks, pRecorder->totalRows, pRecorder->totalCheckedRows); } + // qDebug("QInfo:0x%"PRIx64" :cost summary: winResPool size:%.2f Kb, numOfWin:%"PRId64", tableInfoSize:%.2f Kb, // hashTable:%.2f Kb", pQInfo->qId, pSummary->winInfoSize/1024.0, // pSummary->numOfTimeWindows, pSummary->tableInfoSize/1024.0, pSummary->hashSize/1024.0); @@ -2809,73 +2774,6 @@ static int32_t initGroupCol(SExprInfo* pExprInfo, int32_t numOfCols, SArray* pGr return TSDB_CODE_SUCCESS; } -SOperatorInfo* createSortedMergeOperatorInfo(SOperatorInfo** downstream, int32_t numOfDownstream, SExprInfo* pExprInfo, - int32_t num, SArray* pSortInfo, SArray* pGroupInfo, - SExecTaskInfo* pTaskInfo) { - SSortedMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SSortedMergeOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - goto _error; - } - - int32_t code = initExprSupp(&pOperator->exprSupp, pExprInfo, num); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - initResultRowInfo(&pInfo->binfo.resultRowInfo); - - if (pOperator->exprSupp.pCtx == NULL || pInfo->binfo.pRes == NULL) { - goto _error; - } - - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; - code = doInitAggInfoSup(&pInfo->aggSup, pOperator->exprSupp.pCtx, num, keyBufSize, pTaskInfo->id.str); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - setFunctionResultOutput(pOperator, &pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, num); - code = initGroupCol(pExprInfo, num, pGroupInfo, pInfo); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - // pInfo->resultRowFactor = (int32_t)(getRowNumForMultioutput(pRuntimeEnv->pQueryAttr, - // pRuntimeEnv->pQueryAttr->topBotQuery, false)); - pInfo->sortBufSize = 1024 * 16; // 1MB - pInfo->bufPageSize = 1024; - pInfo->pSortInfo = pSortInfo; - - pOperator->resultInfo.capacity = blockDataGetCapacityInRow(pInfo->binfo.pRes, pInfo->bufPageSize); - - pOperator->name = "SortedMerge"; - // pOperator->operatorType = OP_SortedMerge; - pOperator->blocking = true; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - pOperator->pTaskInfo = pTaskInfo; - - pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doSortedMerge, NULL, NULL, destroySortedMergeOperatorInfo, - NULL, NULL, NULL); - code = appendDownstream(pOperator, downstream, numOfDownstream); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - return pOperator; - -_error: - if (pInfo != NULL) { - destroySortedMergeOperatorInfo(pInfo, num); - } - - taosMemoryFreeClear(pInfo); - taosMemoryFreeClear(pOperator); - terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; - return NULL; -} - int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scanFlag) { // todo add more information about exchange operation int32_t type = pOperator->operatorType; @@ -2885,11 +2783,16 @@ int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scan *order = TSDB_ORDER_ASC; *scanFlag = MAIN_SCAN; return TSDB_CODE_SUCCESS; - } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) { + } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { STableScanInfo* pTableScanInfo = pOperator->info; *order = pTableScanInfo->cond.order; *scanFlag = pTableScanInfo->scanFlag; return TSDB_CODE_SUCCESS; + } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) { + STableMergeScanInfo* pTableScanInfo = pOperator->info; + *order = pTableScanInfo->cond.order; + *scanFlag = pTableScanInfo->scanFlag; + return TSDB_CODE_SUCCESS; } else { if (pOperator->pDownstream == NULL || pOperator->pDownstream[0] == NULL) { return TSDB_CODE_INVALID_PARA; @@ -3031,7 +2934,6 @@ static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { } } - closeAllResultRows(&pAggInfo->binfo.resultRowInfo); initGroupedResultInfo(&pAggInfo->groupResInfo, pAggInfo->aggSup.pResultRowHashTable, 0); OPTR_SET_OPENED(pOperator); @@ -3279,162 +3181,6 @@ int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDa } } -static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { - SProjectOperatorInfo* pProjectInfo = pOperator->info; - SOptrBasicInfo* pInfo = &pProjectInfo->binfo; - - SExprSupp* pSup = &pOperator->exprSupp; - SSDataBlock* pRes = pInfo->pRes; - SSDataBlock* pFinalRes = pProjectInfo->pFinalRes; - - blockDataCleanup(pFinalRes); - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - if (pOperator->status == OP_EXEC_DONE) { - if (pTaskInfo->execModel == OPTR_EXEC_MODEL_QUEUE) { - pOperator->status = OP_OPENED; - return NULL; - } - return NULL; - } - - int64_t st = 0; - int32_t order = 0; - int32_t scanFlag = 0; - - if (pOperator->cost.openCost == 0) { - st = taosGetTimestampUs(); - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - SLimitInfo* pLimitInfo = &pProjectInfo->limitInfo; - - while(1) { - while (1) { - blockDataCleanup(pRes); - - // The downstream exec may change the value of the newgroup, so use a local variable instead. - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); - if (pBlock == NULL) { - doSetOperatorCompleted(pOperator); - break; - } - - if (pBlock->info.type == STREAM_RETRIEVE) { - // for stream interval - return pBlock; - } - - if (pLimitInfo->remainGroupOffset > 0) { - if (pLimitInfo->currentGroupId == 0 || pLimitInfo->currentGroupId == pBlock->info.groupId) { // it is the first group - pLimitInfo->currentGroupId = pBlock->info.groupId; - continue; - } else if (pLimitInfo->currentGroupId != pBlock->info.groupId) { - // now it is the data from a new group - pLimitInfo->remainGroupOffset -= 1; - pLimitInfo->currentGroupId = pBlock->info.groupId; - - // ignore data block in current group - if (pLimitInfo->remainGroupOffset > 0) { - continue; - } - } - - // set current group id of the project operator - pLimitInfo->currentGroupId = pBlock->info.groupId; - } - - // remainGroupOffset == 0 - // here check for a new group data, we need to handle the data of the previous group. - if (pLimitInfo->currentGroupId != 0 && pLimitInfo->currentGroupId != pBlock->info.groupId) { - pLimitInfo->numOfOutputGroups += 1; - if ((pLimitInfo->slimit.limit > 0) && (pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups)) { - doSetOperatorCompleted(pOperator); - break; - } - - // reset the value for a new group data - // existing rows that belongs to previous group. - pLimitInfo->numOfOutputRows = 0; - pLimitInfo->remainOffset = pLimitInfo->limit.offset; - } - - // the pDataBlock are always the same one, no need to call this again - int32_t code = getTableScanInfo(pOperator->pDownstream[0], &order, &scanFlag); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } - - setInputDataBlock(pOperator, pSup->pCtx, pBlock, order, scanFlag, false); - blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); - - code = projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, - pProjectInfo->pPseudoColInfo); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } - - // set current group id - pLimitInfo->currentGroupId = pBlock->info.groupId; - - if (pLimitInfo->remainOffset >= pInfo->pRes->info.rows) { - pLimitInfo->remainOffset -= pInfo->pRes->info.rows; - blockDataCleanup(pInfo->pRes); - continue; - } else if (pLimitInfo->remainOffset < pInfo->pRes->info.rows && pLimitInfo->remainOffset > 0) { - blockDataTrimFirstNRows(pInfo->pRes, pLimitInfo->remainOffset); - pLimitInfo->remainOffset = 0; - } - - // check for the limitation in each group - if (pLimitInfo->limit.limit >= 0 && - pLimitInfo->numOfOutputRows + pInfo->pRes->info.rows >= pLimitInfo->limit.limit) { - int32_t keepRows = (int32_t)(pLimitInfo->limit.limit - pLimitInfo->numOfOutputRows); - blockDataKeepFirstNRows(pInfo->pRes, keepRows); - if (pLimitInfo->slimit.limit > 0 && pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups) { - pOperator->status = OP_EXEC_DONE; - } - } - - pLimitInfo->numOfOutputRows += pInfo->pRes->info.rows; - break; - } - - // no results generated - if (pInfo->pRes->info.rows == 0 || (!pProjectInfo->mergeDataBlocks)) { - break; - } - - if (pProjectInfo->mergeDataBlocks) { - pFinalRes->info.groupId = pInfo->pRes->info.groupId; - pFinalRes->info.version = pInfo->pRes->info.version; - - // continue merge data, ignore the group id - blockDataMerge(pFinalRes, pInfo->pRes); - - if (pFinalRes->info.rows + pInfo->pRes->info.rows <= pOperator->resultInfo.threshold) { - continue; - } - } - - // do apply filter - SSDataBlock* p = pProjectInfo->mergeDataBlocks ? pFinalRes : pRes; - doFilter(pProjectInfo->pFilterNode, p, NULL); - if (p->info.rows > 0) { - break; - } - } - - SSDataBlock* p = pProjectInfo->mergeDataBlocks ? pFinalRes : pRes; - pOperator->resultInfo.totalRows += p->info.rows; - - if (pOperator->cost.openCost == 0) { - pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; - } - - return (p->info.rows > 0) ? p : NULL; -} - static void doHandleRemainBlockForNewGroupImpl(SFillOperatorInfo* pInfo, SResultInfo* pResultInfo, SExecTaskInfo* pTaskInfo) { pInfo->totalInputRows = pInfo->existNewGroupBlock->info.rows; @@ -3815,30 +3561,6 @@ void destroySFillOperatorInfo(void* param, int32_t numOfOutput) { taosMemoryFreeClear(param); } -static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput) { - if (NULL == param) { - return; - } - SProjectOperatorInfo* pInfo = (SProjectOperatorInfo*)param; - cleanupBasicInfo(&pInfo->binfo); - cleanupAggSup(&pInfo->aggSup); - taosArrayDestroy(pInfo->pPseudoColInfo); - - blockDataDestroy(pInfo->pFinalRes); - taosMemoryFreeClear(param); -} - -static void destroyIndefinitOperatorInfo(void* param, int32_t numOfOutput) { - SIndefOperatorInfo* pInfo = (SIndefOperatorInfo*)param; - cleanupBasicInfo(&pInfo->binfo); - - taosArrayDestroy(pInfo->pPseudoColInfo); - cleanupAggSup(&pInfo->aggSup); - cleanupExprSupp(&pInfo->scalarSup); - - taosMemoryFreeClear(param); -} - void destroyExchangeOperatorInfo(void* param, int32_t numOfOutput) { SExchangeInfo* pExInfo = (SExchangeInfo*)param; taosRemoveRef(exchangeObjRefPool, pExInfo->self); @@ -3858,260 +3580,6 @@ void doDestroyExchangeOperatorInfo(void* param) { taosMemoryFreeClear(param); } -static SArray* setRowTsColumnOutputInfo(SqlFunctionCtx* pCtx, int32_t numOfCols) { - SArray* pList = taosArrayInit(4, sizeof(int32_t)); - for (int32_t i = 0; i < numOfCols; ++i) { - if (fmIsPseudoColumnFunc(pCtx[i].functionId)) { - taosArrayPush(pList, &i); - } - } - - return pList; -} - -SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode, - SExecTaskInfo* pTaskInfo) { - SProjectOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SProjectOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - goto _error; - } - - int32_t numOfCols = 0; - SExprInfo* pExprInfo = createExprInfo(pProjPhyNode->pProjections, NULL, &numOfCols); - - SSDataBlock* pResBlock = createResDataBlock(pProjPhyNode->node.pOutputDataBlockDesc); - initLimitInfo(pProjPhyNode->node.pLimit, pProjPhyNode->node.pSlimit, &pInfo->limitInfo); - - pInfo->binfo.pRes = pResBlock; - pInfo->pFinalRes = createOneDataBlock(pResBlock, false); - - pInfo->pFilterNode = pProjPhyNode->node.pConditions; - pInfo->mergeDataBlocks = pProjPhyNode->mergeDataBlock; - - int32_t numOfRows = 4096; - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; - - // Make sure the size of SSDataBlock will never exceed the size of 2MB. - int32_t TWOMB = 2 * 1024 * 1024; - if (numOfRows * pResBlock->info.rowSize > TWOMB) { - numOfRows = TWOMB / pResBlock->info.rowSize; - } - initResultSizeInfo(&pOperator->resultInfo, numOfRows); - - initAggInfo(&pOperator->exprSupp, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str); - initBasicInfo(&pInfo->binfo, pResBlock); - setFunctionResultOutput(pOperator, &pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, numOfCols); - - pInfo->pPseudoColInfo = setRowTsColumnOutputInfo(pOperator->exprSupp.pCtx, numOfCols); - pOperator->name = "ProjectOperator"; - pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_PROJECT; - pOperator->blocking = false; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - pOperator->pTaskInfo = pTaskInfo; - - pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doProjectOperation, NULL, NULL, - destroyProjectOperatorInfo, NULL, NULL, NULL); - - int32_t code = appendDownstream(pOperator, &downstream, 1); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - return pOperator; - -_error: - pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; - return NULL; -} - -static void doHandleDataBlock(SOperatorInfo* pOperator, SSDataBlock* pBlock, SOperatorInfo* downstream, - SExecTaskInfo* pTaskInfo) { - int32_t order = 0; - int32_t scanFlag = 0; - - SIndefOperatorInfo* pIndefInfo = pOperator->info; - SOptrBasicInfo* pInfo = &pIndefInfo->binfo; - SExprSupp* pSup = &pOperator->exprSupp; - - // the pDataBlock are always the same one, no need to call this again - int32_t code = getTableScanInfo(downstream, &order, &scanFlag); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } - - // there is an scalar expression that needs to be calculated before apply the group aggregation. - SExprSupp* pScalarSup = &pIndefInfo->scalarSup; - if (pScalarSup->pExprInfo != NULL) { - code = projectApplyFunctions(pScalarSup->pExprInfo, pBlock, pBlock, pScalarSup->pCtx, pScalarSup->numOfExprs, - pIndefInfo->pPseudoColInfo); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } - } - - setInputDataBlock(pOperator, pSup->pCtx, pBlock, order, scanFlag, false); - blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); - - code = projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, - pIndefInfo->pPseudoColInfo); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } -} - -static SSDataBlock* doApplyIndefinitFunction(SOperatorInfo* pOperator) { - SIndefOperatorInfo* pIndefInfo = pOperator->info; - SOptrBasicInfo* pInfo = &pIndefInfo->binfo; - SExprSupp* pSup = &pOperator->exprSupp; - - SSDataBlock* pRes = pInfo->pRes; - blockDataCleanup(pRes); - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - int64_t st = 0; - - if (pOperator->cost.openCost == 0) { - st = taosGetTimestampUs(); - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - - while (1) { - // here we need to handle the existsed group results - if (pIndefInfo->pNextGroupRes != NULL) { // todo extract method - for (int32_t k = 0; k < pSup->numOfExprs; ++k) { - SqlFunctionCtx* pCtx = &pSup->pCtx[k]; - - SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); - pResInfo->initialized = false; - pCtx->pOutput = NULL; - } - - doHandleDataBlock(pOperator, pIndefInfo->pNextGroupRes, downstream, pTaskInfo); - pIndefInfo->pNextGroupRes = NULL; - } - - if (pInfo->pRes->info.rows < pOperator->resultInfo.threshold) { - while (1) { - // The downstream exec may change the value of the newgroup, so use a local variable instead. - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); - if (pBlock == NULL) { - doSetOperatorCompleted(pOperator); - break; - } - - if (pIndefInfo->groupId == 0 && pBlock->info.groupId != 0) { - pIndefInfo->groupId = pBlock->info.groupId; // this is the initial group result - } else { - if (pIndefInfo->groupId != pBlock->info.groupId) { // reset output buffer and computing status - pIndefInfo->groupId = pBlock->info.groupId; - pIndefInfo->pNextGroupRes = pBlock; - break; - } - } - - doHandleDataBlock(pOperator, pBlock, downstream, pTaskInfo); - if (pInfo->pRes->info.rows >= pOperator->resultInfo.threshold) { - break; - } - } - } - - doFilter(pIndefInfo->pCondition, pInfo->pRes, NULL); - size_t rows = pInfo->pRes->info.rows; - if (rows > 0 || pOperator->status == OP_EXEC_DONE) { - break; - } else { - blockDataCleanup(pInfo->pRes); - } - } - - size_t rows = pInfo->pRes->info.rows; - pOperator->resultInfo.totalRows += rows; - - if (pOperator->cost.openCost == 0) { - pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; - } - - return (rows > 0) ? pInfo->pRes : NULL; -} - -SOperatorInfo* createIndefinitOutputOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, - SExecTaskInfo* pTaskInfo) { - SIndefOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIndefOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - goto _error; - } - - SExprSupp* pSup = &pOperator->exprSupp; - - SIndefRowsFuncPhysiNode* pPhyNode = (SIndefRowsFuncPhysiNode*)pNode; - - int32_t numOfExpr = 0; - SExprInfo* pExprInfo = createExprInfo(pPhyNode->pFuncs, NULL, &numOfExpr); - - if (pPhyNode->pExprs != NULL) { - int32_t num = 0; - SExprInfo* pSExpr = createExprInfo(pPhyNode->pExprs, NULL, &num); - int32_t code = initExprSupp(&pInfo->scalarSup, pSExpr, num); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - } - - SSDataBlock* pResBlock = createResDataBlock(pPhyNode->node.pOutputDataBlockDesc); - - int32_t numOfRows = 4096; - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; - - // Make sure the size of SSDataBlock will never exceed the size of 2MB. - int32_t TWOMB = 2 * 1024 * 1024; - if (numOfRows * pResBlock->info.rowSize > TWOMB) { - numOfRows = TWOMB / pResBlock->info.rowSize; - } - - initResultSizeInfo(&pOperator->resultInfo, numOfRows); - - initAggInfo(pSup, &pInfo->aggSup, pExprInfo, numOfExpr, keyBufSize, pTaskInfo->id.str); - initBasicInfo(&pInfo->binfo, pResBlock); - - setFunctionResultOutput(pOperator, &pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, numOfExpr); - - pInfo->binfo.pRes = pResBlock; - pInfo->pCondition = pPhyNode->node.pConditions; - pInfo->pPseudoColInfo = setRowTsColumnOutputInfo(pSup->pCtx, numOfExpr); - - pOperator->name = "IndefinitOperator"; - pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC; - pOperator->blocking = false; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - pOperator->pTaskInfo = pTaskInfo; - - pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doApplyIndefinitFunction, NULL, NULL, - destroyIndefinitOperatorInfo, NULL, NULL, NULL); - - int32_t code = appendDownstream(pOperator, &downstream, 1); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - return pOperator; - -_error: - taosMemoryFree(pInfo); - taosMemoryFree(pOperator); - pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; - return NULL; -} - static int32_t initFillInfo(SFillOperatorInfo* pInfo, SExprInfo* pExpr, int32_t numOfCols, SNodeListNode* pValNode, STimeWindow win, int32_t capacity, const char* id, SInterval* pInterval, int32_t fillType) { SFillColInfo* pColInfo = createFillColInfo(pExpr, numOfCols, pValNode); @@ -4265,7 +3733,7 @@ SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode) { } // this the tags and pseudo function columns, we only keep the tag columns - for(int32_t i = 0; i < numOfTags; ++i) { + for (int32_t i = 0; i < numOfTags; ++i) { STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanPseudoCols, i); int32_t type = nodeType(pNode->pExpr); @@ -4381,7 +3849,7 @@ int32_t generateGroupIdMap(STableListInfo* pTableListInfo, SReadHandle* pHandle, int32_t groupNum = 0; for (int32_t i = 0; i < taosArrayGetSize(pTableListInfo->pTableList); i++) { STableKeyInfo* info = taosArrayGet(pTableListInfo->pTableList, i); - int32_t code = getGroupIdFromTagsVal(pHandle->meta, info->uid, group, keyBuf, &info->groupId); + int32_t code = getGroupIdFromTagsVal(pHandle->meta, info->uid, group, keyBuf, &info->groupId); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4416,7 +3884,7 @@ static int32_t initTableblockDistQueryCond(uint64_t uid, SQueryTableDataCond* pC pCond->twindows = (STimeWindow){.skey = INT64_MIN, .ekey = INT64_MAX}; pCond->suid = uid; - pCond->type = BLOCK_LOAD_OFFSET_ORDER; + pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = -1; @@ -4504,7 +3972,6 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo return createSysTableScanOperatorInfo(pHandle, pSysScanPhyNode, pUser, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN == type) { STagScanPhysiNode* pScanPhyNode = (STagScanPhysiNode*)pPhyNode; - int32_t code = getTableList(pHandle->meta, pHandle->vnode, pScanPhyNode, pTagCond, pTagIndexCond, pTableListInfo); if (code != TSDB_CODE_SUCCESS) { pTaskInfo->code = terrno; @@ -4555,6 +4022,8 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } return createLastrowScanOperator(pScanNode, pHandle, pTaskInfo); + } else if (QUERY_NODE_PHYSICAL_PLAN_PROJECT == type) { + return createProjectOperatorInfo(NULL, (SProjectPhysiNode*)pPhyNode, pTaskInfo); } else { ASSERT(0); } @@ -4701,7 +4170,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) { pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) { - pOptr = createMergeJoinOperatorInfo(ops, size, (SJoinPhysiNode*)pPhyNode, pTaskInfo); + pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) { pOptr = createFillOperatorInfo(ops[0], (SFillPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC == type) { diff --git a/source/libs/executor/src/joinoperator.c b/source/libs/executor/src/joinoperator.c index 2e6c9bd351..f26b2f4f0a 100644 --- a/source/libs/executor/src/joinoperator.c +++ b/source/libs/executor/src/joinoperator.c @@ -28,30 +28,30 @@ static SSDataBlock* doMergeJoin(struct SOperatorInfo* pOperator); static void destroyMergeJoinOperator(void* param, int32_t numOfOutput); static void extractTimeCondition(SJoinOperatorInfo* Info, SLogicConditionNode* pLogicConditionNode); -SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SJoinPhysiNode* pJoinNode, - SExecTaskInfo* pTaskInfo) { +SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, + SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo) { SJoinOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SJoinOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pOperator == NULL || pInfo == NULL) { goto _error; } - SSDataBlock* pResBlock = createResDataBlock(pJoinNode->node.pOutputDataBlockDesc); + SSDataBlock* pResBlock = createResDataBlock(pJoinNode->node.pOutputDataBlockDesc); - int32_t numOfCols = 0; + int32_t numOfCols = 0; SExprInfo* pExprInfo = createExprInfo(pJoinNode->pTargets, NULL, &numOfCols); initResultSizeInfo(&pOperator->resultInfo, 4096); - pInfo->pRes = pResBlock; - pOperator->name = "MergeJoinOperator"; + pInfo->pRes = pResBlock; + pOperator->name = "MergeJoinOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN; - pOperator->blocking = false; - pOperator->status = OP_NOT_OPENED; - pOperator->exprSupp.pExprInfo = pExprInfo; - pOperator->exprSupp.numOfExprs = numOfCols; - pOperator->info = pInfo; - pOperator->pTaskInfo = pTaskInfo; + pOperator->blocking = false; + pOperator->status = OP_NOT_OPENED; + pOperator->exprSupp.pExprInfo = pExprInfo; + pOperator->exprSupp.numOfExprs = numOfCols; + pOperator->info = pInfo; + pOperator->pTaskInfo = pTaskInfo; SNode* pMergeCondition = pJoinNode->pMergeCondition; if (nodeType(pMergeCondition) == QUERY_NODE_OPERATOR) { @@ -104,7 +104,7 @@ void setJoinColumnInfo(SColumnInfo* pColumn, const SColumnNode* pColumnNode) { void destroyMergeJoinOperator(void* param, int32_t numOfOutput) { SJoinOperatorInfo* pJoinOperator = (SJoinOperatorInfo*)param; nodesDestroyNode(pJoinOperator->pCondAfterMerge); - + taosMemoryFreeClear(param); } diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c new file mode 100644 index 0000000000..34149d7499 --- /dev/null +++ b/source/libs/executor/src/projectoperator.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "executorimpl.h" +#include "functionMgt.h" + +static SSDataBlock* doGenerateSourceData(SOperatorInfo* pOperator); +static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator); +static SSDataBlock* doApplyIndefinitFunction(SOperatorInfo* pOperator); +static SArray* setRowTsColumnOutputInfo(SqlFunctionCtx* pCtx, int32_t numOfCols); +static void setFunctionResultOutput(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, SAggSupporter* pSup, int32_t stage, + int32_t numOfExprs); + +static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput) { + if (NULL == param) { + return; + } + + SProjectOperatorInfo* pInfo = (SProjectOperatorInfo*)param; + cleanupBasicInfo(&pInfo->binfo); + cleanupAggSup(&pInfo->aggSup); + taosArrayDestroy(pInfo->pPseudoColInfo); + + blockDataDestroy(pInfo->pFinalRes); + taosMemoryFreeClear(param); +} + +static void destroyIndefinitOperatorInfo(void* param, int32_t numOfOutput) { + SIndefOperatorInfo* pInfo = (SIndefOperatorInfo*)param; + cleanupBasicInfo(&pInfo->binfo); + + taosArrayDestroy(pInfo->pPseudoColInfo); + cleanupAggSup(&pInfo->aggSup); + cleanupExprSupp(&pInfo->scalarSup); + + taosMemoryFreeClear(param); +} + +SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode, + SExecTaskInfo* pTaskInfo) { + SProjectOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SProjectOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + goto _error; + } + + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pProjPhyNode->pProjections, NULL, &numOfCols); + + SSDataBlock* pResBlock = createResDataBlock(pProjPhyNode->node.pOutputDataBlockDesc); + initLimitInfo(pProjPhyNode->node.pLimit, pProjPhyNode->node.pSlimit, &pInfo->limitInfo); + + pInfo->binfo.pRes = pResBlock; + pInfo->pFinalRes = createOneDataBlock(pResBlock, false); + pInfo->pFilterNode = pProjPhyNode->node.pConditions; + pInfo->mergeDataBlocks = pProjPhyNode->mergeDataBlock; + + // todo remove it soon + if (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) { + pInfo->mergeDataBlocks = true; + } + + int32_t numOfRows = 4096; + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + + // Make sure the size of SSDataBlock will never exceed the size of 2MB. + int32_t TWOMB = 2 * 1024 * 1024; + if (numOfRows * pResBlock->info.rowSize > TWOMB) { + numOfRows = TWOMB / pResBlock->info.rowSize; + } + initResultSizeInfo(&pOperator->resultInfo, numOfRows); + + initAggInfo(&pOperator->exprSupp, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str); + initBasicInfo(&pInfo->binfo, pResBlock); + setFunctionResultOutput(pOperator, &pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, numOfCols); + + pInfo->pPseudoColInfo = setRowTsColumnOutputInfo(pOperator->exprSupp.pCtx, numOfCols); + pOperator->name = "ProjectOperator"; + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_PROJECT; + pOperator->blocking = false; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + pOperator->pTaskInfo = pTaskInfo; + + pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doProjectOperation, NULL, NULL, + destroyProjectOperatorInfo, NULL, NULL, NULL); + + int32_t code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return pOperator; + + _error: + pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; + return NULL; +} + +static int32_t discardGroupDataBlock(SSDataBlock* pBlock, SLimitInfo* pLimitInfo) { + if (pLimitInfo->remainGroupOffset > 0) { + // it is the first group + if (pLimitInfo->currentGroupId == 0 || pLimitInfo->currentGroupId == pBlock->info.groupId) { + pLimitInfo->currentGroupId = pBlock->info.groupId; + return PROJECT_RETRIEVE_CONTINUE; + } else if (pLimitInfo->currentGroupId != pBlock->info.groupId) { + // now it is the data from a new group + pLimitInfo->remainGroupOffset -= 1; + pLimitInfo->currentGroupId = pBlock->info.groupId; + + // ignore data block in current group + if (pLimitInfo->remainGroupOffset > 0) { + return PROJECT_RETRIEVE_CONTINUE; + } + } + + // set current group id of the project operator + pLimitInfo->currentGroupId = pBlock->info.groupId; + } + + return PROJECT_RETRIEVE_DONE; +} + +static int32_t setInfoForNewGroup(SSDataBlock* pBlock, SLimitInfo* pLimitInfo, SOperatorInfo* pOperator) { + // remainGroupOffset == 0 + // here check for a new group data, we need to handle the data of the previous group. + ASSERT(pLimitInfo->remainGroupOffset == 0 || pLimitInfo->remainGroupOffset == -1); + + if (pLimitInfo->currentGroupId != 0 && pLimitInfo->currentGroupId != pBlock->info.groupId) { + pLimitInfo->numOfOutputGroups += 1; + if ((pLimitInfo->slimit.limit > 0) && (pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups)) { + doSetOperatorCompleted(pOperator); + return PROJECT_RETRIEVE_DONE; + } + + // reset the value for a new group data + // existing rows that belongs to previous group. + pLimitInfo->numOfOutputRows = 0; + pLimitInfo->remainOffset = pLimitInfo->limit.offset; + } + + return PROJECT_RETRIEVE_DONE; +} + +static int32_t doIngroupLimitOffset(SLimitInfo* pLimitInfo, uint64_t groupId, SSDataBlock* pBlock, SOperatorInfo* pOperator) { + // set current group id + pLimitInfo->currentGroupId = groupId; + + if (pLimitInfo->remainOffset >= pBlock->info.rows) { + pLimitInfo->remainOffset -= pBlock->info.rows; + blockDataCleanup(pBlock); + return PROJECT_RETRIEVE_CONTINUE; + } else if (pLimitInfo->remainOffset < pBlock->info.rows && pLimitInfo->remainOffset > 0) { + blockDataTrimFirstNRows(pBlock, pLimitInfo->remainOffset); + pLimitInfo->remainOffset = 0; + } + + // check for the limitation in each group + if (pLimitInfo->limit.limit >= 0 && + pLimitInfo->numOfOutputRows + pBlock->info.rows >= pLimitInfo->limit.limit) { + int32_t keepRows = (int32_t)(pLimitInfo->limit.limit - pLimitInfo->numOfOutputRows); + blockDataKeepFirstNRows(pBlock, keepRows); + if (pLimitInfo->slimit.limit > 0 && pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups) { + doSetOperatorCompleted(pOperator); + } + } + + pLimitInfo->numOfOutputRows += pBlock->info.rows; + return PROJECT_RETRIEVE_DONE; +} + +SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { + SProjectOperatorInfo* pProjectInfo = pOperator->info; + SOptrBasicInfo* pInfo = &pProjectInfo->binfo; + + SExprSupp* pSup = &pOperator->exprSupp; + SSDataBlock* pRes = pInfo->pRes; + SSDataBlock* pFinalRes = pProjectInfo->pFinalRes; + + blockDataCleanup(pFinalRes); + + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + if (pOperator->status == OP_EXEC_DONE) { + if (pTaskInfo->execModel == OPTR_EXEC_MODEL_QUEUE) { + pOperator->status = OP_OPENED; + return NULL; + } + + return NULL; + } + + int64_t st = 0; + int32_t order = 0; + int32_t scanFlag = 0; + + if (pOperator->cost.openCost == 0) { + st = taosGetTimestampUs(); + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + SLimitInfo* pLimitInfo = &pProjectInfo->limitInfo; + + if (downstream == NULL) { + return doGenerateSourceData(pOperator); + } + + while (1) { + while (1) { + blockDataCleanup(pRes); + + // The downstream exec may change the value of the newgroup, so use a local variable instead. + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + doSetOperatorCompleted(pOperator); + break; + } + + // for stream interval + if (pBlock->info.type == STREAM_RETRIEVE) { + return pBlock; + } + + int32_t status = discardGroupDataBlock(pBlock, pLimitInfo); + if (status == PROJECT_RETRIEVE_CONTINUE) { + continue; + } + + setInfoForNewGroup(pBlock, pLimitInfo, pOperator); + if (pOperator->status == OP_EXEC_DONE) { + break; + } + + // the pDataBlock are always the same one, no need to call this again + int32_t code = getTableScanInfo(downstream, &order, &scanFlag); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + setInputDataBlock(pOperator, pSup->pCtx, pBlock, order, scanFlag, false); + blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); + + code = projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, + pProjectInfo->pPseudoColInfo); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + status = doIngroupLimitOffset(pLimitInfo, pBlock->info.groupId, pInfo->pRes, pOperator); + if (status == PROJECT_RETRIEVE_CONTINUE) { + continue; + } + + break; + } + + if (pProjectInfo->mergeDataBlocks) { + if (pRes->info.rows > 0) { + pFinalRes->info.groupId = pRes->info.groupId; + pFinalRes->info.version = pRes->info.version; + + // continue merge data, ignore the group id + blockDataMerge(pFinalRes, pRes); + if (pFinalRes->info.rows + pRes->info.rows <= pOperator->resultInfo.threshold) { + continue; + } + } + + // do apply filter + doFilter(pProjectInfo->pFilterNode, pFinalRes, NULL); + if (pFinalRes->info.rows > 0 || pRes->info.rows == 0) { + break; + } + } else { + // do apply filter + if (pRes->info.rows > 0) { + doFilter(pProjectInfo->pFilterNode, pRes, NULL); + if (pRes->info.rows == 0) { + continue; + } + } + + // no results generated + break; + } + } + + SSDataBlock* p = pProjectInfo->mergeDataBlocks ? pFinalRes : pRes; + pOperator->resultInfo.totalRows += p->info.rows; + + if (pOperator->cost.openCost == 0) { + pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; + } + + return (p->info.rows > 0) ? p : NULL; +} + +SOperatorInfo* createIndefinitOutputOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pNode, + SExecTaskInfo* pTaskInfo) { + SIndefOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIndefOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + goto _error; + } + + SExprSupp* pSup = &pOperator->exprSupp; + + SIndefRowsFuncPhysiNode* pPhyNode = (SIndefRowsFuncPhysiNode*)pNode; + + int32_t numOfExpr = 0; + SExprInfo* pExprInfo = createExprInfo(pPhyNode->pFuncs, NULL, &numOfExpr); + + if (pPhyNode->pExprs != NULL) { + int32_t num = 0; + SExprInfo* pSExpr = createExprInfo(pPhyNode->pExprs, NULL, &num); + int32_t code = initExprSupp(&pInfo->scalarSup, pSExpr, num); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + + SSDataBlock* pResBlock = createResDataBlock(pPhyNode->node.pOutputDataBlockDesc); + + int32_t numOfRows = 4096; + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + + // Make sure the size of SSDataBlock will never exceed the size of 2MB. + int32_t TWOMB = 2 * 1024 * 1024; + if (numOfRows * pResBlock->info.rowSize > TWOMB) { + numOfRows = TWOMB / pResBlock->info.rowSize; + } + + initResultSizeInfo(&pOperator->resultInfo, numOfRows); + + initAggInfo(pSup, &pInfo->aggSup, pExprInfo, numOfExpr, keyBufSize, pTaskInfo->id.str); + initBasicInfo(&pInfo->binfo, pResBlock); + + setFunctionResultOutput(pOperator, &pInfo->binfo, &pInfo->aggSup, MAIN_SCAN, numOfExpr); + + pInfo->binfo.pRes = pResBlock; + pInfo->pCondition = pPhyNode->node.pConditions; + pInfo->pPseudoColInfo = setRowTsColumnOutputInfo(pSup->pCtx, numOfExpr); + + pOperator->name = "IndefinitOperator"; + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC; + pOperator->blocking = false; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + pOperator->pTaskInfo = pTaskInfo; + + pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doApplyIndefinitFunction, NULL, NULL, + destroyIndefinitOperatorInfo, NULL, NULL, NULL); + + int32_t code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return pOperator; + + _error: + taosMemoryFree(pInfo); + taosMemoryFree(pOperator); + pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; + return NULL; +} + +static void doHandleDataBlock(SOperatorInfo* pOperator, SSDataBlock* pBlock, SOperatorInfo* downstream, + SExecTaskInfo* pTaskInfo) { + int32_t order = 0; + int32_t scanFlag = 0; + + SIndefOperatorInfo* pIndefInfo = pOperator->info; + SOptrBasicInfo* pInfo = &pIndefInfo->binfo; + SExprSupp* pSup = &pOperator->exprSupp; + + // the pDataBlock are always the same one, no need to call this again + int32_t code = getTableScanInfo(downstream, &order, &scanFlag); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + // there is an scalar expression that needs to be calculated before apply the group aggregation. + SExprSupp* pScalarSup = &pIndefInfo->scalarSup; + if (pScalarSup->pExprInfo != NULL) { + code = projectApplyFunctions(pScalarSup->pExprInfo, pBlock, pBlock, pScalarSup->pCtx, pScalarSup->numOfExprs, + pIndefInfo->pPseudoColInfo); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + } + + setInputDataBlock(pOperator, pSup->pCtx, pBlock, order, scanFlag, false); + blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); + + code = projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, + pIndefInfo->pPseudoColInfo); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } +} + +SSDataBlock* doApplyIndefinitFunction(SOperatorInfo* pOperator) { + SIndefOperatorInfo* pIndefInfo = pOperator->info; + SOptrBasicInfo* pInfo = &pIndefInfo->binfo; + SExprSupp* pSup = &pOperator->exprSupp; + + SSDataBlock* pRes = pInfo->pRes; + blockDataCleanup(pRes); + + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + int64_t st = 0; + + if (pOperator->cost.openCost == 0) { + st = taosGetTimestampUs(); + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + + while (1) { + // here we need to handle the existsed group results + if (pIndefInfo->pNextGroupRes != NULL) { // todo extract method + for (int32_t k = 0; k < pSup->numOfExprs; ++k) { + SqlFunctionCtx* pCtx = &pSup->pCtx[k]; + + SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); + pResInfo->initialized = false; + pCtx->pOutput = NULL; + } + + doHandleDataBlock(pOperator, pIndefInfo->pNextGroupRes, downstream, pTaskInfo); + pIndefInfo->pNextGroupRes = NULL; + } + + if (pInfo->pRes->info.rows < pOperator->resultInfo.threshold) { + while (1) { + // The downstream exec may change the value of the newgroup, so use a local variable instead. + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + doSetOperatorCompleted(pOperator); + break; + } + + if (pIndefInfo->groupId == 0 && pBlock->info.groupId != 0) { + pIndefInfo->groupId = pBlock->info.groupId; // this is the initial group result + } else { + if (pIndefInfo->groupId != pBlock->info.groupId) { // reset output buffer and computing status + pIndefInfo->groupId = pBlock->info.groupId; + pIndefInfo->pNextGroupRes = pBlock; + break; + } + } + + doHandleDataBlock(pOperator, pBlock, downstream, pTaskInfo); + if (pInfo->pRes->info.rows >= pOperator->resultInfo.threshold) { + break; + } + } + } + + doFilter(pIndefInfo->pCondition, pInfo->pRes, NULL); + size_t rows = pInfo->pRes->info.rows; + if (rows > 0 || pOperator->status == OP_EXEC_DONE) { + break; + } else { + blockDataCleanup(pInfo->pRes); + } + } + + size_t rows = pInfo->pRes->info.rows; + pOperator->resultInfo.totalRows += rows; + + if (pOperator->cost.openCost == 0) { + pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; + } + + return (rows > 0) ? pInfo->pRes : NULL; +} + +void initCtxOutputBuffer(SqlFunctionCtx* pCtx, int32_t size) { + for (int32_t j = 0; j < size; ++j) { + struct SResultRowEntryInfo* pResInfo = GET_RES_INFO(&pCtx[j]); + if (isRowEntryInitialized(pResInfo) || fmIsPseudoColumnFunc(pCtx[j].functionId) || pCtx[j].functionId == -1 || + fmIsScalarFunc(pCtx[j].functionId)) { + continue; + } + + pCtx[j].fpSet.init(&pCtx[j], pCtx[j].resultInfo); + } +} + +/* + * The start of each column SResultRowEntryInfo is denote by RowCellInfoOffset. + * Note that in case of top/bottom query, the whole multiple rows of result is treated as only one row of results. + * +------------+-----------------result column 1------------+------------------result column 2-----------+ + * | SResultRow | SResultRowEntryInfo | intermediate buffer1 | SResultRowEntryInfo | intermediate buffer 2| + * +------------+--------------------------------------------+--------------------------------------------+ + * offset[0] offset[1] offset[2] + */ +// TODO refactor: some function move away +void setFunctionResultOutput(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, SAggSupporter* pSup, int32_t stage, + int32_t numOfExprs) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SqlFunctionCtx* pCtx = pOperator->exprSupp.pCtx; + int32_t* rowEntryInfoOffset = pOperator->exprSupp.rowEntryInfoOffset; + + SResultRowInfo* pResultRowInfo = &pInfo->resultRowInfo; + initResultRowInfo(pResultRowInfo); + + int64_t tid = 0; + int64_t groupId = 0; + SResultRow* pRow = doSetResultOutBufByKey(pSup->pResultBuf, pResultRowInfo, (char*)&tid, sizeof(tid), true, groupId, + pTaskInfo, false, pSup); + + for (int32_t i = 0; i < numOfExprs; ++i) { + struct SResultRowEntryInfo* pEntry = getResultEntryInfo(pRow, i, rowEntryInfoOffset); + cleanupResultRowEntry(pEntry); + + pCtx[i].resultInfo = pEntry; + pCtx[i].scanFlag = stage; + } + + initCtxOutputBuffer(pCtx, numOfExprs); +} + +SArray* setRowTsColumnOutputInfo(SqlFunctionCtx* pCtx, int32_t numOfCols) { + SArray* pList = taosArrayInit(4, sizeof(int32_t)); + for (int32_t i = 0; i < numOfCols; ++i) { + if (fmIsPseudoColumnFunc(pCtx[i].functionId)) { + taosArrayPush(pList, &i); + } + } + + return pList; +} + +SSDataBlock* doGenerateSourceData(SOperatorInfo* pOperator) { + SProjectOperatorInfo* pProjectInfo = pOperator->info; + + SExprSupp* pSup = &pOperator->exprSupp; + SSDataBlock* pRes = pProjectInfo->binfo.pRes; + + blockDataEnsureCapacity(pRes, pOperator->resultInfo.capacity); + SExprInfo* pExpr = pSup->pExprInfo; + + int64_t st = taosGetTimestampUs(); + + for (int32_t k = 0; k < pSup->numOfExprs; ++k) { + int32_t outputSlotId = pExpr[k].base.resSchema.slotId; + + ASSERT(pExpr[k].pExpr->nodeType == QUERY_NODE_VALUE); + SColumnInfoData* pColInfoData = taosArrayGet(pRes->pDataBlock, outputSlotId); + + int32_t type = pExpr[k].base.pParam[0].param.nType; + if (TSDB_DATA_TYPE_NULL == type) { + colDataAppendNNULL(pColInfoData, 0, 1); + } else { + colDataAppend(pColInfoData, 0, taosVariantGet(&pExpr[k].base.pParam[0].param, type), false); + } + } + + pRes->info.rows = 1; + doFilter(pProjectInfo->pFilterNode, pRes, NULL); + + /*int32_t status = */doIngroupLimitOffset(&pProjectInfo->limitInfo, 0, pRes, pOperator); + + pOperator->resultInfo.totalRows += pRes->info.rows; + + doSetOperatorCompleted(pOperator); + if (pOperator->cost.openCost == 0) { + pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; + } + + return (pRes->info.rows > 0) ? pRes : NULL; +} \ No newline at end of file diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index a9d03aebbe..2dcb555834 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -274,7 +274,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca qDebug("%s data block filter out, brange:%" PRId64 "-%" PRId64 ", rows:%d", GET_TASKID(pTaskInfo), pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows); } else { - qDebug("%s data block filter out, elapsed time:%"PRId64, GET_TASKID(pTaskInfo), (et - st)); + qDebug("%s data block filter out, elapsed time:%" PRId64, GET_TASKID(pTaskInfo), (et - st)); } return TSDB_CODE_SUCCESS; @@ -1838,11 +1838,14 @@ static SSDataBlock* sysTableScanUserTags(SOperatorInfo* pOperator) { int8_t tagType = smr.me.stbEntry.schemaTag.pSchema[i].type; pColInfoData = taosArrayGet(p->pDataBlock, 4); char tagTypeStr[VARSTR_HEADER_SIZE + 32]; - int tagTypeLen = sprintf(varDataVal(tagTypeStr), "%s", tDataTypes[tagType].name); + int tagTypeLen = sprintf(varDataVal(tagTypeStr), "%s", tDataTypes[tagType].name); if (tagType == TSDB_DATA_TYPE_VARCHAR) { - tagTypeLen += sprintf(varDataVal(tagTypeStr) + tagTypeLen, "(%d)", (int32_t)(smr.me.stbEntry.schemaTag.pSchema[i].bytes - VARSTR_HEADER_SIZE)); + tagTypeLen += sprintf(varDataVal(tagTypeStr) + tagTypeLen, "(%d)", + (int32_t)(smr.me.stbEntry.schemaTag.pSchema[i].bytes - VARSTR_HEADER_SIZE)); } else if (tagType == TSDB_DATA_TYPE_NCHAR) { - tagTypeLen += sprintf(varDataVal(tagTypeStr) + tagTypeLen, "(%d)", (int32_t)((smr.me.stbEntry.schemaTag.pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)); + tagTypeLen += + sprintf(varDataVal(tagTypeStr) + tagTypeLen, "(%d)", + (int32_t)((smr.me.stbEntry.schemaTag.pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)); } varDataSetLen(tagTypeStr, tagTypeLen); colDataAppend(pColInfoData, numOfRows, (char*)tagTypeStr, false); @@ -2527,49 +2530,6 @@ _error: return NULL; } -typedef struct STableMergeScanInfo { - STableListInfo* tableListInfo; - int32_t tableStartIndex; - int32_t tableEndIndex; - bool hasGroupId; - uint64_t groupId; - SArray* dataReaders; // array of tsdbReaderT* - SReadHandle readHandle; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort - SArray* pSortInfo; - SSortHandle* pSortHandle; - - SSDataBlock* pSortInputBlock; - int64_t startTs; // sort start time - SArray* sortSourceParams; - - SFileBlockLoadRecorder readRecorder; - int64_t numOfRows; - SScanInfo scanInfo; - int32_t scanTimes; - SNode* pFilterNode; // filter info, which is push down by optimizer - SqlFunctionCtx* pCtx; // which belongs to the direct upstream operator operator query context - SResultRowInfo* pResultRowInfo; - int32_t* rowEntryInfoOffset; - SExprInfo* pExpr; - SSDataBlock* pResBlock; - SArray* pColMatchInfo; - int32_t numOfOutput; - - SExprInfo* pPseudoExpr; - int32_t numOfPseudoExpr; - SqlFunctionCtx* pPseudoCtx; - - SQueryTableDataCond cond; - int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan - int32_t dataBlockLoadFlag; - // if the upstream is an interval operator, the interval info is also kept here to get the time - // window to check if current data block needs to be loaded. - SInterval interval; - SSampleExecInfo sample; // sample execution info -} STableMergeScanInfo; - int32_t createScanTableListInfo(SScanPhysiNode* pScanNode, SNodeList* pGroupTags, bool groupSort, SReadHandle* pHandle, STableListInfo* pTableListInfo, SNode* pTagCond, SNode* pTagIndexCond, const char* idStr) { @@ -2700,9 +2660,9 @@ static int32_t loadDataBlockFromOneTable(SOperatorInfo* pOperator, STableMergeSc relocateColumnData(pBlock, pTableScanInfo->pColMatchInfo, pCols, true); // currently only the tbname pseudo column - if (pTableScanInfo->numOfPseudoExpr > 0) { - int32_t code = addTagPseudoColumnData(&pTableScanInfo->readHandle, pTableScanInfo->pPseudoExpr, - pTableScanInfo->numOfPseudoExpr, pBlock, GET_TASKID(pTaskInfo)); + if (pTableScanInfo->pseudoSup.numOfExprs > 0) { + int32_t code = addTagPseudoColumnData(&pTableScanInfo->readHandle, pTableScanInfo->pseudoSup.pExprInfo, + pTableScanInfo->pseudoSup.numOfExprs, pBlock, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, code); } @@ -2869,29 +2829,31 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - tsortDestroySortHandle(pInfo->pSortHandle); + size_t numReaders = taosArrayGetSize(pInfo->dataReaders); + + for (int32_t i = 0; i < numReaders; ++i) { + STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); + blockDataDestroy(param->inputBlock); + } taosArrayClear(pInfo->sortSourceParams); - for (int32_t i = 0; i < taosArrayGetSize(pInfo->dataReaders); ++i) { + tsortDestroySortHandle(pInfo->pSortHandle); + + for (int32_t i = 0; i < numReaders; ++i) { STsdbReader* reader = taosArrayGetP(pInfo->dataReaders, i); tsdbReaderClose(reader); } - taosArrayDestroy(pInfo->dataReaders); pInfo->dataReaders = NULL; return TSDB_CODE_SUCCESS; } -SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, int32_t capacity, SOperatorInfo* pOperator) { +SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* pResBlock, int32_t capacity, SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SSDataBlock* p = tsortGetSortedDataBlock(pHandle); - if (p == NULL) { - return NULL; - } - - blockDataEnsureCapacity(p, capacity); + blockDataCleanup(pResBlock); + blockDataEnsureCapacity(pResBlock, capacity); while (1) { STupleHandle* pTupleHandle = tsortNextTuple(pHandle); @@ -2899,14 +2861,15 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, int32_t capa break; } - appendOneRowToDataBlock(p, pTupleHandle); - if (p->info.rows >= capacity) { + appendOneRowToDataBlock(pResBlock, pTupleHandle); + if (pResBlock->info.rows >= capacity) { break; } } - qDebug("%s get sorted row blocks, rows:%d", GET_TASKID(pTaskInfo), p->info.rows); - return (p->info.rows > 0) ? p : NULL; + + qDebug("%s get sorted row blocks, rows:%d", GET_TASKID(pTaskInfo), pResBlock->info.rows); + return (pResBlock->info.rows > 0) ? pResBlock : NULL; } SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) { @@ -2935,7 +2898,7 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) { } SSDataBlock* pBlock = NULL; while (pInfo->tableStartIndex < tableListSize) { - pBlock = getSortedTableMergeScanBlockData(pInfo->pSortHandle, pOperator->resultInfo.capacity, pOperator); + pBlock = getSortedTableMergeScanBlockData(pInfo->pSortHandle, pInfo->pResBlock, pOperator->resultInfo.capacity, pOperator); if (pBlock != NULL) { pBlock->info.groupId = pInfo->groupId; pOperator->resultInfo.totalRows += pBlock->info.rows; @@ -2959,6 +2922,7 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) { void destroyTableMergeScanOperatorInfo(void* param, int32_t numOfOutput) { STableMergeScanInfo* pTableScanInfo = (STableMergeScanInfo*)param; cleanupQueryTableDataCond(&pTableScanInfo->cond); + taosArrayDestroy(pTableScanInfo->sortSourceParams); for (int32_t i = 0; i < taosArrayGetSize(pTableScanInfo->dataReaders); ++i) { STsdbReader* reader = taosArrayGetP(pTableScanInfo->dataReaders, i); @@ -2974,7 +2938,9 @@ void destroyTableMergeScanOperatorInfo(void* param, int32_t numOfOutput) { pTableScanInfo->pSortInputBlock = blockDataDestroy(pTableScanInfo->pSortInputBlock); taosArrayDestroy(pTableScanInfo->pSortInfo); + cleanupExprSupp(&pTableScanInfo->pseudoSup); + taosMemoryFreeClear(pTableScanInfo->rowEntryInfoOffset); taosMemoryFreeClear(param); } @@ -3031,8 +2997,9 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN } if (pTableScanNode->scan.pScanPseudoCols != NULL) { - pInfo->pPseudoExpr = createExprInfo(pTableScanNode->scan.pScanPseudoCols, NULL, &pInfo->numOfPseudoExpr); - pInfo->pPseudoCtx = createSqlFunctionCtx(pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, &pInfo->rowEntryInfoOffset); + SExprSupp* pSup = &pInfo->pseudoSup; + pSup->pExprInfo = createExprInfo(pTableScanNode->scan.pScanPseudoCols, NULL, &pSup->numOfExprs); + pSup->pCtx = createSqlFunctionCtx(pSup->pExprInfo, pSup->numOfExprs, &pSup->rowEntryInfoOffset); } pInfo->scanInfo = (SScanInfo){.numOfAsc = pTableScanNode->scanSeq[0], .numOfDesc = pTableScanNode->scanSeq[1]}; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 0f1272c964..1e001a29a0 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -940,6 +940,7 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul if (pInfo->execModel == OPTR_EXEC_MODEL_STREAM && pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { saveResultRow(pResult, tableGroupId, pUpdated); + setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); } } @@ -996,6 +997,7 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul if (pInfo->execModel == OPTR_EXEC_MODEL_STREAM && pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { saveResultRow(pResult, tableGroupId, pUpdated); + setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); } ekey = ascScan ? nextWin.ekey : nextWin.skey; @@ -1092,7 +1094,6 @@ static int32_t doOpenIntervalAgg(SOperatorInfo* pOperator) { hashIntervalAgg(pOperator, &pInfo->binfo.resultRowInfo, pBlock, scanFlag, NULL); } - closeAllResultRows(&pInfo->binfo.resultRowInfo); initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, pInfo->order); OPTR_SET_OPENED(pOperator); @@ -1248,7 +1249,6 @@ static SSDataBlock* doStateWindowAgg(SOperatorInfo* pOperator) { pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; pOperator->status = OP_RES_TO_RETURN; - closeAllResultRows(&pBInfo->resultRowInfo); initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, TSDB_ORDER_ASC); blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); @@ -2043,7 +2043,6 @@ static SSDataBlock* doSessionWindowAgg(SOperatorInfo* pOperator) { // restore the value pOperator->status = OP_RES_TO_RETURN; - closeAllResultRows(&pBInfo->resultRowInfo); initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, TSDB_ORDER_ASC); blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); @@ -2207,8 +2206,6 @@ static SSDataBlock* doTimeslice(SOperatorInfo* pOperator) { SSDataBlock* pResBlock = pSliceInfo->pRes; SExprSupp* pSup = &pOperator->exprSupp; - blockDataEnsureCapacity(pResBlock, pOperator->resultInfo.capacity); - // if (pOperator->status == OP_RES_TO_RETURN) { // // doBuildResultDatablock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); // if (pResBlock->info.rows == 0 || !hasDataInGroupInfo(&pSliceInfo->groupResInfo)) { @@ -2348,10 +2345,10 @@ SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode initResultSizeInfo(&pOperator->resultInfo, 4096); pInfo->pFillColInfo = createFillColInfo(pExprInfo, numOfExprs, (SNodeListNode*)pInterpPhyNode->pFillValues); - pInfo->pRes = createResDataBlock(pPhyNode->pOutputDataBlockDesc); - pInfo->win = pInterpPhyNode->timeRange; + pInfo->pRes = createResDataBlock(pPhyNode->pOutputDataBlockDesc); + pInfo->win = pInterpPhyNode->timeRange; pInfo->interval.interval = pInterpPhyNode->interval; - pInfo->current = pInfo->win.skey; + pInfo->current = pInfo->win.skey; pOperator->name = "TimeSliceOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC; @@ -2542,6 +2539,7 @@ static void rebuildIntervalWindow(SStreamFinalIntervalOperatorInfo* pInfo, SExpr } if (find && pUpdated) { saveResultRow(pCurResult, pWinRes->groupId, pUpdated); + setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pInfo->binfo.resultRowInfo.cur); } } } @@ -2662,6 +2660,7 @@ static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBloc } if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdated) { saveResultRow(pResult, tableGroupId, pUpdated); + setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); } updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); doApplyFunctions(pTaskInfo, pSup->pCtx, &nextWin, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, tsCols, diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 121e697630..5279d015b4 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -375,6 +375,7 @@ static int32_t logicJoinCopy(const SJoinLogicNode* pSrc, SJoinLogicNode* pDst) { CLONE_NODE_FIELD(pMergeCondition); CLONE_NODE_FIELD(pOnConditions); COPY_SCALAR_FIELD(isSingleTableJoin); + COPY_SCALAR_FIELD(inputTsOrder); return TSDB_CODE_SUCCESS; } @@ -440,6 +441,7 @@ static int32_t logicWindowCopy(const SWindowLogicNode* pSrc, SWindowLogicNode* p COPY_SCALAR_FIELD(watermark); COPY_SCALAR_FIELD(igExpired); COPY_SCALAR_FIELD(windowAlgo); + COPY_SCALAR_FIELD(inputTsOrder); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index eec4780293..186a51f000 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1717,7 +1717,7 @@ static const char* jkJoinPhysiPlanOnConditions = "OnConditions"; static const char* jkJoinPhysiPlanTargets = "Targets"; static int32_t physiJoinNodeToJson(const void* pObj, SJson* pJson) { - const SJoinPhysiNode* pNode = (const SJoinPhysiNode*)pObj; + const SSortMergeJoinPhysiNode* pNode = (const SSortMergeJoinPhysiNode*)pObj; int32_t code = physicPlanNodeToJson(pObj, pJson); if (TSDB_CODE_SUCCESS == code) { @@ -1737,7 +1737,7 @@ static int32_t physiJoinNodeToJson(const void* pObj, SJson* pJson) { } static int32_t jsonToPhysiJoinNode(const SJson* pJson, void* pObj) { - SJoinPhysiNode* pNode = (SJoinPhysiNode*)pObj; + SSortMergeJoinPhysiNode* pNode = (SSortMergeJoinPhysiNode*)pObj; int32_t code = jsonToPhysicPlanNode(pJson, pObj); if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/nodes/src/nodesTraverseFuncs.c b/source/libs/nodes/src/nodesTraverseFuncs.c index b12e3b14c7..77681af1bc 100644 --- a/source/libs/nodes/src/nodesTraverseFuncs.c +++ b/source/libs/nodes/src/nodesTraverseFuncs.c @@ -468,7 +468,7 @@ static EDealRes dispatchPhysiPlan(SNode* pNode, ETraversalOrder order, FNodeWalk break; } case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: { - SJoinPhysiNode* pJoin = (SJoinPhysiNode*)pNode; + SSortMergeJoinPhysiNode* pJoin = (SSortMergeJoinPhysiNode*)pNode; res = walkPhysiNode((SPhysiNode*)pNode, order, walker, pContext); if (DEAL_RES_ERROR != res && DEAL_RES_END != res) { res = walkPhysiPlan(pJoin->pMergeCondition, order, walker, pContext); diff --git a/source/libs/nodes/src/nodesUtilFuncs.c b/source/libs/nodes/src/nodesUtilFuncs.c index 23f0bb088d..3c6fbe409c 100644 --- a/source/libs/nodes/src/nodesUtilFuncs.c +++ b/source/libs/nodes/src/nodesUtilFuncs.c @@ -287,7 +287,7 @@ SNode* nodesMakeNode(ENodeType type) { case QUERY_NODE_PHYSICAL_PLAN_PROJECT: return makeNode(type, sizeof(SProjectPhysiNode)); case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: - return makeNode(type, sizeof(SJoinPhysiNode)); + return makeNode(type, sizeof(SSortMergeJoinPhysiNode)); case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: return makeNode(type, sizeof(SAggPhysiNode)); case QUERY_NODE_PHYSICAL_PLAN_EXCHANGE: @@ -883,7 +883,7 @@ void nodesDestroyNode(SNode* pNode) { break; } case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: { - SJoinPhysiNode* pPhyNode = (SJoinPhysiNode*)pNode; + SSortMergeJoinPhysiNode* pPhyNode = (SSortMergeJoinPhysiNode*)pNode; destroyPhysiNode((SPhysiNode*)pPhyNode); nodesDestroyNode(pPhyNode->pMergeCondition); nodesDestroyNode(pPhyNode->pOnConditions); diff --git a/source/libs/parser/inc/parAst.h b/source/libs/parser/inc/parAst.h index f3ea332fe2..12c18733b1 100644 --- a/source/libs/parser/inc/parAst.h +++ b/source/libs/parser/inc/parAst.h @@ -90,7 +90,7 @@ SNode* createValueNode(SAstCreateContext* pCxt, int32_t dataType, const SToken* SNode* createDurationValueNode(SAstCreateContext* pCxt, const SToken* pLiteral); SNode* createDefaultDatabaseCondValue(SAstCreateContext* pCxt); SNode* createPlaceholderValueNode(SAstCreateContext* pCxt, const SToken* pLiteral); -SNode* setProjectionAlias(SAstCreateContext* pCxt, SNode* pNode, const SToken* pAlias); +SNode* setProjectionAlias(SAstCreateContext* pCxt, SNode* pNode, SToken* pAlias); SNode* createLogicConditionNode(SAstCreateContext* pCxt, ELogicConditionType type, SNode* pParam1, SNode* pParam2); SNode* createOperatorNode(SAstCreateContext* pCxt, EOperatorType type, SNode* pLeft, SNode* pRight); SNode* createBetweenAnd(SAstCreateContext* pCxt, SNode* pExpr, SNode* pLeft, SNode* pRight); diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index b237fd2c6e..a54dae1ee9 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -527,6 +527,7 @@ SNode* createTempTableNode(SAstCreateContext* pCxt, SNode* pSubquery, const STok } if (QUERY_NODE_SELECT_STMT == nodeType(pSubquery)) { strcpy(((SSelectStmt*)pSubquery)->stmtName, tempTable->table.tableAlias); + ((SSelectStmt*)pSubquery)->isSubquery = true; } else if (QUERY_NODE_SET_OPERATOR == nodeType(pSubquery)) { strcpy(((SSetOperator*)pSubquery)->stmtName, tempTable->table.tableAlias); } @@ -637,8 +638,9 @@ SNode* createInterpTimeRange(SAstCreateContext* pCxt, SNode* pStart, SNode* pEnd return createBetweenAnd(pCxt, createPrimaryKeyCol(pCxt), pStart, pEnd); } -SNode* setProjectionAlias(SAstCreateContext* pCxt, SNode* pNode, const SToken* pAlias) { +SNode* setProjectionAlias(SAstCreateContext* pCxt, SNode* pNode, SToken* pAlias) { CHECK_PARSER_STATUS(pCxt); + trimEscape(pAlias); int32_t len = TMIN(sizeof(((SExprNode*)pNode)->aliasName) - 1, pAlias->n); strncpy(((SExprNode*)pNode)->aliasName, pAlias->z, len); ((SExprNode*)pNode)->aliasName[len] = '\0'; diff --git a/source/libs/parser/src/parInsert.c b/source/libs/parser/src/parInsert.c index 702422e022..d564d53633 100644 --- a/source/libs/parser/src/parInsert.c +++ b/source/libs/parser/src/parInsert.c @@ -739,12 +739,13 @@ static int32_t parseBoundColumns(SInsertParseContext* pCxt, SParsedDataColInfo* return TSDB_CODE_SUCCESS; } -static void buildCreateTbReq(SVCreateTbReq* pTbReq, const char* tname, STag* pTag, int64_t suid, const char* sname, SArray* tagName, uint8_t tagNum) { +static void buildCreateTbReq(SVCreateTbReq* pTbReq, const char* tname, STag* pTag, int64_t suid, const char* sname, + SArray* tagName, uint8_t tagNum) { pTbReq->type = TD_CHILD_TABLE; pTbReq->name = strdup(tname); pTbReq->ctb.suid = suid; pTbReq->ctb.tagNum = tagNum; - if(sname) pTbReq->ctb.name = strdup(sname); + if (sname) pTbReq->ctb.name = strdup(sname); pTbReq->ctb.pTag = (uint8_t*)pTag; pTbReq->ctb.tagName = taosArrayDup(tagName); pTbReq->commentLen = -1; @@ -969,7 +970,7 @@ static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pSchema, uint } SSchema* pTagSchema = &pSchema[pCxt->tags.boundColumns[i]]; - char tmpTokenBuf[TSDB_MAX_BYTES_PER_ROW] = {0}; // todo this can be optimize with parse column + char tmpTokenBuf[TSDB_MAX_BYTES_PER_ROW] = {0}; // todo this can be optimize with parse column code = checkAndTrimValue(&sToken, tmpTokenBuf, &pCxt->msg); if (code != TSDB_CODE_SUCCESS) { goto end; @@ -1012,7 +1013,8 @@ static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pSchema, uint goto end; } - buildCreateTbReq(&pCxt->createTblReq, tName, pTag, pCxt->pTableMeta->suid, pCxt->sTableName, tagName, pCxt->pTableMeta->tableInfo.numOfTags); + buildCreateTbReq(&pCxt->createTblReq, tName, pTag, pCxt->pTableMeta->suid, pCxt->sTableName, tagName, + pCxt->pTableMeta->tableInfo.numOfTags); end: for (int i = 0; i < taosArrayGetSize(pTagVals); ++i) { @@ -1650,7 +1652,6 @@ static int32_t skipUsingClause(SInsertParseSyntaxCxt* pCxt) { static int32_t collectTableMetaKey(SInsertParseSyntaxCxt* pCxt, SToken* pTbToken) { SName name; CHECK_CODE(createSName(&name, pTbToken, pCxt->pComCxt->acctId, pCxt->pComCxt->db, &pCxt->msg)); - CHECK_CODE(reserveDbCfgInCache(pCxt->pComCxt->acctId, name.dbname, pCxt->pMetaCache)); CHECK_CODE(reserveUserAuthInCacheExt(pCxt->pComCxt->pUser, &name, AUTH_TYPE_WRITE, pCxt->pMetaCache)); CHECK_CODE(reserveTableMetaInCacheExt(&name, pCxt->pMetaCache)); CHECK_CODE(reserveTableVgroupInCacheExt(&name, pCxt->pMetaCache)); @@ -2332,7 +2333,8 @@ int32_t smlBindData(void* handle, SArray* tags, SArray* colsSchema, SArray* cols return ret; } - buildCreateTbReq(&smlHandle->tableExecHandle.createTblReq, tableName, pTag, pTableMeta->suid, NULL, tagName, pTableMeta->tableInfo.numOfTags); + buildCreateTbReq(&smlHandle->tableExecHandle.createTblReq, tableName, pTag, pTableMeta->suid, NULL, tagName, + pTableMeta->tableInfo.numOfTags); taosArrayDestroy(tagName); smlHandle->tableExecHandle.createTblReq.ctb.name = taosMemoryMalloc(sTableNameLen + 1); diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index 74d5f03dc1..7c9a8b10dd 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -92,7 +92,7 @@ static char* getSyntaxErrFormat(int32_t errCode) { case TSDB_CODE_PAR_INTER_SLIDING_TOO_BIG: return "sliding value no larger than the interval value"; case TSDB_CODE_PAR_INTER_SLIDING_TOO_SMALL: - return "sliding value can not less than 1% of interval value"; + return "sliding value can not less than 1%% of interval value"; case TSDB_CODE_PAR_ONLY_ONE_JSON_TAG: return "Only one tag if there is a json tag"; case TSDB_CODE_PAR_INCORRECT_NUM_OF_COL: diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 84e712b466..30e3b676df 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -339,6 +339,7 @@ static int32_t createJoinLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect pJoin->joinType = pJoinTable->joinType; pJoin->isSingleTableJoin = pJoinTable->table.singleTable; + pJoin->inputTsOrder = ORDER_ASC; pJoin->node.groupAction = GROUP_ACTION_CLEAR; pJoin->node.requireDataOrder = DATA_ORDER_LEVEL_GLOBAL; pJoin->node.requireDataOrder = DATA_ORDER_LEVEL_GLOBAL; @@ -625,14 +626,14 @@ static int32_t createInterpFuncLogicNode(SLogicPlanContext* pCxt, SSelectStmt* p static int32_t createWindowLogicNodeFinalize(SLogicPlanContext* pCxt, SSelectStmt* pSelect, SWindowLogicNode* pWindow, SLogicNode** pLogicNode) { - int32_t code = nodesCollectFuncs(pSelect, SQL_CLAUSE_WINDOW, fmIsWindowClauseFunc, &pWindow->pFuncs); - if (pCxt->pPlanCxt->streamQuery) { pWindow->triggerType = pCxt->pPlanCxt->triggerType; pWindow->watermark = pCxt->pPlanCxt->watermark; pWindow->igExpired = pCxt->pPlanCxt->igExpired; } + pWindow->inputTsOrder = ORDER_ASC; + int32_t code = nodesCollectFuncs(pSelect, SQL_CLAUSE_WINDOW, fmIsWindowClauseFunc, &pWindow->pFuncs); if (TSDB_CODE_SUCCESS == code) { code = rewriteExprsForSelect(pWindow->pFuncs, pSelect, SQL_CLAUSE_WINDOW); } @@ -861,7 +862,8 @@ static int32_t createProjectLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSel TSWAP(pProject->node.pLimit, pSelect->pLimit); TSWAP(pProject->node.pSlimit, pSelect->pSlimit); - pProject->node.groupAction = GROUP_ACTION_CLEAR; + pProject->node.groupAction = + (!pSelect->isSubquery && pCxt->pPlanCxt->streamQuery) ? GROUP_ACTION_KEEP : GROUP_ACTION_CLEAR; pProject->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; pProject->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 7b60710c7d..fcc395af62 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -993,25 +993,28 @@ static bool sortPriKeyOptMayBeOptimized(SLogicNode* pNode) { } static int32_t sortPriKeyOptGetScanNodesImpl(SLogicNode* pNode, bool* pNotOptimize, SNodeList** pScanNodes) { - int32_t code = TSDB_CODE_SUCCESS; - switch (nodeType(pNode)) { - case QUERY_NODE_LOGIC_PLAN_SCAN: - if (TSDB_SUPER_TABLE != ((SScanLogicNode*)pNode)->tableType) { - return nodesListMakeAppend(pScanNodes, (SNode*)pNode); + case QUERY_NODE_LOGIC_PLAN_SCAN: { + SScanLogicNode* pScan = (SScanLogicNode*)pNode; + if (NULL != pScan->pGroupTags) { + *pNotOptimize = true; + return TSDB_CODE_SUCCESS; } - break; - case QUERY_NODE_LOGIC_PLAN_JOIN: - code = + return nodesListMakeAppend(pScanNodes, (SNode*)pNode); + } + case QUERY_NODE_LOGIC_PLAN_JOIN: { + int32_t code = sortPriKeyOptGetScanNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), pNotOptimize, pScanNodes); if (TSDB_CODE_SUCCESS == code) { code = sortPriKeyOptGetScanNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 1), pNotOptimize, pScanNodes); } return code; + } case QUERY_NODE_LOGIC_PLAN_AGG: + case QUERY_NODE_LOGIC_PLAN_PARTITION: *pNotOptimize = true; - return code; + return TSDB_CODE_SUCCESS; default: break; } @@ -1037,17 +1040,33 @@ static EOrder sortPriKeyOptGetPriKeyOrder(SSortLogicNode* pSort) { return ((SOrderByExprNode*)nodesListGetNode(pSort->pSortKeys, 0))->order; } +static void sortPriKeyOptSetParentOrder(SLogicNode* pNode, EOrder order) { + if (NULL == pNode) { + return; + } + if (QUERY_NODE_LOGIC_PLAN_WINDOW == nodeType(pNode)) { + ((SWindowLogicNode*)pNode)->inputTsOrder = order; + } else if (QUERY_NODE_LOGIC_PLAN_JOIN == nodeType(pNode)) { + ((SJoinLogicNode*)pNode)->inputTsOrder = order; + } + sortPriKeyOptSetParentOrder(pNode->pParent, order); +} + static int32_t sortPriKeyOptApply(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SSortLogicNode* pSort, SNodeList* pScanNodes) { EOrder order = sortPriKeyOptGetPriKeyOrder(pSort); - if (ORDER_DESC == order) { - SNode* pScanNode = NULL; - FOREACH(pScanNode, pScanNodes) { - SScanLogicNode* pScan = (SScanLogicNode*)pScanNode; - if (pScan->scanSeq[0] > 0) { - TSWAP(pScan->scanSeq[0], pScan->scanSeq[1]); - } + SNode* pScanNode = NULL; + FOREACH(pScanNode, pScanNodes) { + SScanLogicNode* pScan = (SScanLogicNode*)pScanNode; + if (ORDER_DESC == order && pScan->scanSeq[0] > 0) { + TSWAP(pScan->scanSeq[0], pScan->scanSeq[1]); } + if (TSDB_SUPER_TABLE == pScan->tableType) { + pScan->scanType = SCAN_TYPE_TABLE_MERGE; + pScan->node.resultDataOrder = DATA_ORDER_LEVEL_GLOBAL; + pScan->node.requireDataOrder = DATA_ORDER_LEVEL_GLOBAL; + } + sortPriKeyOptSetParentOrder(pScan->node.pParent, order); } SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pSort->node.pChildren, 0); @@ -1613,10 +1632,10 @@ static void alignProjectionWithTarget(SLogicNode* pNode) { } SProjectLogicNode* pProjectNode = (SProjectLogicNode*)pNode; - SNode* pProjection = NULL; + SNode* pProjection = NULL; FOREACH(pProjection, pProjectNode->pProjections) { SNode* pTarget = NULL; - bool keep = false; + bool keep = false; FOREACH(pTarget, pNode->pTargets) { if (0 == strcmp(((SColumnNode*)pProjection)->node.aliasName, ((SColumnNode*)pTarget)->colName)) { keep = true; @@ -2214,7 +2233,7 @@ static bool tagScanMayBeOptimized(SLogicNode* pNode) { !planOptNodeListHasTbname(pAgg->pGroupKeys)) { return false; } - + SNode* pGroupKey = NULL; FOREACH(pGroupKey, pAgg->pGroupKeys) { SNode* pGroup = NULL; diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 0a1f8bbd0b..587e566939 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -415,7 +415,6 @@ static int32_t createScanPhysiNodeFinalize(SPhysiPlanContext* pCxt, SSubplan* pS SScanPhysiNode* pScanPhysiNode, SPhysiNode** pPhyNode) { int32_t code = createScanCols(pCxt, pScanPhysiNode, pScanLogicNode->pScanCols); if (TSDB_CODE_SUCCESS == code) { - // Data block describe also needs to be set without scanning column, such as SELECT COUNT(*) FROM t code = addDataBlockSlots(pCxt, pScanPhysiNode->pScanCols, pScanPhysiNode->node.pOutputDataBlockDesc); } @@ -622,8 +621,8 @@ static int32_t createScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSubplan, static int32_t createJoinPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, SJoinLogicNode* pJoinLogicNode, SPhysiNode** pPhyNode) { - SJoinPhysiNode* pJoin = - (SJoinPhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pJoinLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN); + SSortMergeJoinPhysiNode* pJoin = + (SSortMergeJoinPhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pJoinLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN); if (NULL == pJoin) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -975,6 +974,9 @@ static int32_t createInterpFuncPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pCh } static bool projectCanMergeDataBlock(SProjectLogicNode* pProject) { + if (GROUP_ACTION_KEEP == pProject->node.groupAction) { + return false; + } if (DATA_ORDER_LEVEL_NONE == pProject->node.resultDataOrder) { return true; } diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index 8586234b7e..81e2bff179 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -469,7 +469,7 @@ static int32_t stbSplCreateExchangeNode(SSplitContext* pCxt, SLogicNode* pParent return code; } -static int32_t stbSplCreateMergeKeysByPrimaryKey(SNode* pPrimaryKey, SNodeList** pMergeKeys) { +static int32_t stbSplCreateMergeKeysByPrimaryKey(SNode* pPrimaryKey, EOrder order, SNodeList** pMergeKeys) { SOrderByExprNode* pMergeKey = (SOrderByExprNode*)nodesMakeNode(QUERY_NODE_ORDER_BY_EXPR); if (NULL == pMergeKey) { return TSDB_CODE_OUT_OF_MEMORY; @@ -479,7 +479,7 @@ static int32_t stbSplCreateMergeKeysByPrimaryKey(SNode* pPrimaryKey, SNodeList** nodesDestroyNode((SNode*)pMergeKey); return TSDB_CODE_OUT_OF_MEMORY; } - pMergeKey->order = ORDER_ASC; + pMergeKey->order = order; pMergeKey->nullOrder = NULL_ORDER_FIRST; return nodesListMakeStrictAppend(pMergeKeys, (SNode*)pMergeKey); } @@ -491,7 +491,8 @@ static int32_t stbSplSplitIntervalForBatch(SSplitContext* pCxt, SStableSplitInfo ((SWindowLogicNode*)pPartWindow)->windowAlgo = INTERVAL_ALGO_HASH; ((SWindowLogicNode*)pInfo->pSplitNode)->windowAlgo = INTERVAL_ALGO_MERGE; SNodeList* pMergeKeys = NULL; - code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk, &pMergeKeys); + code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk, + ((SWindowLogicNode*)pInfo->pSplitNode)->inputTsOrder, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true); } @@ -579,7 +580,8 @@ static int32_t stbSplSplitSessionOrStateForBatch(SSplitContext* pCxt, SStableSpl SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pWindow->pChildren, 0); SNodeList* pMergeKeys = NULL; - int32_t code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pWindow)->pTspk, &pMergeKeys); + int32_t code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pWindow)->pTspk, + ((SWindowLogicNode*)pWindow)->inputTsOrder, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true); @@ -913,27 +915,70 @@ static int32_t stbSplSplitScanNodeWithPartTags(SSplitContext* pCxt, SStableSplit } static SNode* stbSplFindPrimaryKeyFromScan(SScanLogicNode* pScan) { + bool find = false; SNode* pCol = NULL; FOREACH(pCol, pScan->pScanCols) { if (PRIMARYKEY_TIMESTAMP_COL_ID == ((SColumnNode*)pCol)->colId) { + find = true; + break; + } + } + if (!find) { + return NULL; + } + SNode* pTarget = NULL; + FOREACH(pTarget, pScan->node.pTargets) { + if (nodesEqualNode(pTarget, pCol)) { return pCol; } } - return NULL; + nodesListStrictAppend(pScan->node.pTargets, nodesCloneNode(pCol)); + return pCol; +} + +static int32_t stbSplCreateMergeScanNode(SScanLogicNode* pScan, SLogicNode** pOutputMergeScan, + SNodeList** pOutputMergeKeys) { + SNodeList* pChildren = pScan->node.pChildren; + pScan->node.pChildren = NULL; + + int32_t code = TSDB_CODE_SUCCESS; + SScanLogicNode* pMergeScan = (SScanLogicNode*)nodesCloneNode((SNode*)pScan); + if (NULL == pMergeScan) { + code = TSDB_CODE_OUT_OF_MEMORY; + } + + SNodeList* pMergeKeys = NULL; + if (TSDB_CODE_SUCCESS == code) { + pMergeScan->scanType = SCAN_TYPE_TABLE_MERGE; + pMergeScan->node.pChildren = pChildren; + splSetParent((SLogicNode*)pMergeScan); + code = stbSplCreateMergeKeysByPrimaryKey(stbSplFindPrimaryKeyFromScan(pMergeScan), + pMergeScan->scanSeq[0] > 0 ? ORDER_ASC : ORDER_DESC, &pMergeKeys); + } + + if (TSDB_CODE_SUCCESS == code) { + *pOutputMergeScan = (SLogicNode*)pMergeScan; + *pOutputMergeKeys = pMergeKeys; + } else { + nodesDestroyNode((SNode*)pMergeScan); + nodesDestroyList(pMergeKeys); + } + + return code; } static int32_t stbSplSplitMergeScanNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SScanLogicNode* pScan, bool groupSort) { - SNodeList* pMergeKeys = NULL; - int32_t code = stbSplCreateMergeKeysByPrimaryKey(stbSplFindPrimaryKeyFromScan(pScan), &pMergeKeys); + SLogicNode* pMergeScan = NULL; + SNodeList* pMergeKeys = NULL; + int32_t code = stbSplCreateMergeScanNode(pScan, &pMergeScan, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, (SLogicNode*)pScan, groupSort); + code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pSubplan->pChildren, - (SNode*)splCreateScanSubplan(pCxt, (SLogicNode*)pScan, SPLIT_FLAG_STABLE_SPLIT)); + (SNode*)splCreateScanSubplan(pCxt, pMergeScan, SPLIT_FLAG_STABLE_SPLIT)); } - pScan->scanType = SCAN_TYPE_TABLE_MERGE; ++(pCxt->groupId); return code; } @@ -978,14 +1023,14 @@ static int32_t stbSplSplitJoinNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) } static int32_t stbSplCreateMergeKeysForPartitionNode(SLogicNode* pPart, SNodeList** pMergeKeys) { - SNode* pPrimaryKey = - nodesCloneNode(stbSplFindPrimaryKeyFromScan((SScanLogicNode*)nodesListGetNode(pPart->pChildren, 0))); + SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pPart->pChildren, 0); + SNode* pPrimaryKey = nodesCloneNode(stbSplFindPrimaryKeyFromScan(pScan)); if (NULL == pPrimaryKey) { return TSDB_CODE_OUT_OF_MEMORY; } int32_t code = nodesListAppend(pPart->pTargets, pPrimaryKey); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeKeysByPrimaryKey(pPrimaryKey, pMergeKeys); + code = stbSplCreateMergeKeysByPrimaryKey(pPrimaryKey, pScan->scanSeq[0] > 0 ? ORDER_ASC : ORDER_DESC, pMergeKeys); } return code; } diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index bfa6079cb1..7aab8a7ca3 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -124,7 +124,8 @@ int32_t replaceLogicNode(SLogicSubplan* pSubplan, SLogicNode* pOld, SLogicNode* } static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel requirement) { - if (SCAN_TYPE_TABLE != pScan->scanType && SCAN_TYPE_TABLE_MERGE != pScan->scanType) { + if ((SCAN_TYPE_TABLE != pScan->scanType && SCAN_TYPE_TABLE_MERGE != pScan->scanType) || + DATA_ORDER_LEVEL_GLOBAL == pScan->node.requireDataOrder) { return TSDB_CODE_SUCCESS; } // The lowest sort level of scan output data is DATA_ORDER_LEVEL_IN_BLOCK diff --git a/source/libs/planner/test/planBasicTest.cpp b/source/libs/planner/test/planBasicTest.cpp index 8f9cd94c19..9cfae68d34 100644 --- a/source/libs/planner/test/planBasicTest.cpp +++ b/source/libs/planner/test/planBasicTest.cpp @@ -24,9 +24,10 @@ TEST_F(PlanBasicTest, selectClause) { useDb("root", "test"); run("SELECT * FROM t1"); - run("SELECT 1 FROM t1"); - run("SELECT * FROM st1"); - run("SELECT 1 FROM st1"); + + run("SELECT MAX(c1) c2, c2 FROM t1"); + + run("SELECT MAX(c1) c2, c2 FROM st1"); } TEST_F(PlanBasicTest, whereClause) { diff --git a/source/libs/planner/test/planOptimizeTest.cpp b/source/libs/planner/test/planOptimizeTest.cpp index 770ac94e5b..058705403b 100644 --- a/source/libs/planner/test/planOptimizeTest.cpp +++ b/source/libs/planner/test/planOptimizeTest.cpp @@ -53,6 +53,8 @@ TEST_F(PlanOptimizeTest, sortPrimaryKey) { run("SELECT c1 FROM t1 ORDER BY ts"); + run("SELECT c1 FROM st1 ORDER BY ts"); + run("SELECT c1 FROM t1 ORDER BY ts DESC"); run("SELECT COUNT(*) FROM t1 INTERVAL(10S) ORDER BY _WSTART DESC"); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index ebccb7950c..d77e42388b 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -66,7 +66,7 @@ int32_t qwHandleTaskComplete(QW_FPARAMS_DEF, SQWTaskCtx *ctx) { return TSDB_CODE_SUCCESS; } -int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryEnd) { +int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryStop) { int32_t code = 0; bool qcontinue = true; SSDataBlock *pRes = NULL; @@ -104,8 +104,8 @@ int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryEnd) { QW_ERR_RET(qwHandleTaskComplete(QW_FPARAMS(), ctx)); - if (queryEnd) { - *queryEnd = true; + if (queryStop) { + *queryStop = true; } break; @@ -125,6 +125,10 @@ int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryEnd) { QW_TASK_DLOG("data put into sink, rows:%d, continueExecTask:%d", rows, qcontinue); if (!qcontinue) { + if (queryStop) { + *queryStop = true; + } + break; } @@ -566,7 +570,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { SQWPhaseInput input = {0}; void *rsp = NULL; int32_t dataLen = 0; - bool queryEnd = false; + bool queryStop = false; do { QW_ERR_JRET(qwHandlePrePhaseEvents(QW_FPARAMS(), QW_PHASE_PRE_CQUERY, &input, NULL)); @@ -576,7 +580,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { atomic_store_8((int8_t *)&ctx->queryInQueue, 0); atomic_store_8((int8_t *)&ctx->queryContinue, 0); - QW_ERR_JRET(qwExecTask(QW_FPARAMS(), ctx, &queryEnd)); + QW_ERR_JRET(qwExecTask(QW_FPARAMS(), ctx, &queryStop)); if (QW_EVENT_RECEIVED(ctx, QW_EVENT_FETCH)) { SOutputData sOutput = {0}; @@ -627,7 +631,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { } QW_LOCK(QW_WRITE, &ctx->lock); - if (queryEnd || code || 0 == atomic_load_8((int8_t *)&ctx->queryContinue)) { + if (queryStop || code || 0 == atomic_load_8((int8_t *)&ctx->queryContinue)) { // Note: query is not running anymore QW_SET_PHASE(ctx, 0); QW_UNLOCK(QW_WRITE, &ctx->lock); diff --git a/source/libs/wal/src/walMgmt.c b/source/libs/wal/src/walMgmt.c index 491b982968..85238e87b9 100644 --- a/source/libs/wal/src/walMgmt.c +++ b/source/libs/wal/src/walMgmt.c @@ -93,7 +93,7 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { } // init ref - pWal->pRefHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); + pWal->pRefHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); if (pWal->pRefHash == NULL) { taosMemoryFree(pWal); return NULL; diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 6d0e844e8e..ac62b7d98d 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -21,107 +21,112 @@ static int32_t walFetchBodyNew(SWalReader *pRead); static int32_t walSkipFetchBodyNew(SWalReader *pRead); SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { - SWalReader *pRead = taosMemoryCalloc(1, sizeof(SWalReader)); - if (pRead == NULL) { + SWalReader *pReader = taosMemoryCalloc(1, sizeof(SWalReader)); + if (pReader == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - pRead->pWal = pWal; - pRead->pIdxFile = NULL; - pRead->pLogFile = NULL; - pRead->curVersion = -1; - pRead->curFileFirstVer = -1; - pRead->curInvalid = 1; - pRead->capacity = 0; + pReader->pWal = pWal; + pReader->readerId = tGenIdPI64(); + pReader->pIdxFile = NULL; + pReader->pLogFile = NULL; + pReader->curVersion = -1; + pReader->curFileFirstVer = -1; + pReader->curInvalid = 1; + pReader->capacity = 0; if (cond) { - pRead->cond = *cond; + pReader->cond = *cond; } else { - pRead->cond.scanMeta = 0; - pRead->cond.scanUncommited = 0; - pRead->cond.enableRef = 0; + pReader->cond.scanUncommited = 0; + pReader->cond.scanNotApplied = 0; + pReader->cond.scanMeta = 0; + pReader->cond.enableRef = 0; } - taosThreadMutexInit(&pRead->mutex, NULL); + taosThreadMutexInit(&pReader->mutex, NULL); - /*if (pRead->cond.enableRef) {*/ - /*walOpenRef(pWal);*/ - /*}*/ - - pRead->pHead = taosMemoryMalloc(sizeof(SWalCkHead)); - if (pRead->pHead == NULL) { + pReader->pHead = taosMemoryMalloc(sizeof(SWalCkHead)); + if (pReader->pHead == NULL) { terrno = TSDB_CODE_WAL_OUT_OF_MEMORY; - taosMemoryFree(pRead); + taosMemoryFree(pReader); return NULL; } - return pRead; + /*if (pReader->cond.enableRef) {*/ + /* taosHashPut(pWal->pRefHash, &pReader->readerId, sizeof(int64_t), &pReader, sizeof(void *));*/ + /*}*/ + + return pReader; } -void walCloseReader(SWalReader *pRead) { - taosCloseFile(&pRead->pIdxFile); - taosCloseFile(&pRead->pLogFile); - taosMemoryFreeClear(pRead->pHead); - taosMemoryFree(pRead); +void walCloseReader(SWalReader *pReader) { + taosCloseFile(&pReader->pIdxFile); + taosCloseFile(&pReader->pLogFile); + /*if (pReader->cond.enableRef) {*/ + /*taosHashRemove(pReader->pWal->pRefHash, &pReader->readerId, sizeof(int64_t));*/ + /*}*/ + taosMemoryFreeClear(pReader->pHead); + taosMemoryFree(pReader); } -int32_t walNextValidMsg(SWalReader *pRead) { - int64_t fetchVer = pRead->curVersion; - int64_t lastVer = walGetLastVer(pRead->pWal); - int64_t committedVer = walGetCommittedVer(pRead->pWal); - int64_t appliedVer = walGetAppliedVer(pRead->pWal); - int64_t endVer = pRead->cond.scanUncommited ? lastVer : committedVer; +int32_t walNextValidMsg(SWalReader *pReader) { + int64_t fetchVer = pReader->curVersion; + int64_t lastVer = walGetLastVer(pReader->pWal); + int64_t committedVer = walGetCommittedVer(pReader->pWal); + int64_t appliedVer = walGetAppliedVer(pReader->pWal); + int64_t endVer = pReader->cond.scanUncommited ? lastVer : committedVer; endVer = TMIN(appliedVer, endVer); wDebug("vgId:%d wal start to fetch, ver %ld, last ver %ld commit ver %ld, applied ver %ld, end ver %ld", - pRead->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer, endVer); - pRead->curStopped = 0; + pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer, endVer); + pReader->curStopped = 0; while (fetchVer <= endVer) { - if (walFetchHeadNew(pRead, fetchVer) < 0) { + if (walFetchHeadNew(pReader, fetchVer) < 0) { return -1; } - if (pRead->pHead->head.msgType == TDMT_VND_SUBMIT || - (IS_META_MSG(pRead->pHead->head.msgType) && pRead->cond.scanMeta)) { - if (walFetchBodyNew(pRead) < 0) { + if (pReader->pHead->head.msgType == TDMT_VND_SUBMIT || + (IS_META_MSG(pReader->pHead->head.msgType) && pReader->cond.scanMeta)) { + if (walFetchBodyNew(pReader) < 0) { return -1; } return 0; } else { - if (walSkipFetchBodyNew(pRead) < 0) { + if (walSkipFetchBodyNew(pReader) < 0) { return -1; } fetchVer++; - ASSERT(fetchVer == pRead->curVersion); + ASSERT(fetchVer == pReader->curVersion); } } - pRead->curStopped = 1; + pReader->curStopped = 1; return -1; } -static int64_t walReadSeekFilePos(SWalReader *pRead, int64_t fileFirstVer, int64_t ver) { +static int64_t walReadSeekFilePos(SWalReader *pReader, int64_t fileFirstVer, int64_t ver) { int64_t ret = 0; - TdFilePtr pIdxTFile = pRead->pIdxFile; - TdFilePtr pLogTFile = pRead->pLogFile; + TdFilePtr pIdxTFile = pReader->pIdxFile; + TdFilePtr pLogTFile = pReader->pLogFile; // seek position int64_t offset = (ver - fileFirstVer) * sizeof(SWalIdxEntry); ret = taosLSeekFile(pIdxTFile, offset, SEEK_SET); if (ret < 0) { terrno = TAOS_SYSTEM_ERROR(errno); - wError("vgId:%d, failed to seek idx file, index:%" PRId64 ", pos:%" PRId64 ", since %s", pRead->pWal->cfg.vgId, ver, - offset, terrstr()); + wError("vgId:%d, failed to seek idx file, index:%" PRId64 ", pos:%" PRId64 ", since %s", pReader->pWal->cfg.vgId, + ver, offset, terrstr()); return -1; } SWalIdxEntry entry = {0}; if ((ret = taosReadFile(pIdxTFile, &entry, sizeof(SWalIdxEntry))) != sizeof(SWalIdxEntry)) { if (ret < 0) { terrno = TAOS_SYSTEM_ERROR(errno); - wError("vgId:%d, failed to read idx file, since %s", pRead->pWal->cfg.vgId, terrstr()); + wError("vgId:%d, failed to read idx file, since %s", pReader->pWal->cfg.vgId, terrstr()); } else { terrno = TSDB_CODE_WAL_FILE_CORRUPTED; wError("vgId:%d, read idx file incompletely, read bytes %" PRId64 ", bytes should be %" PRIu64, - pRead->pWal->cfg.vgId, ret, sizeof(SWalIdxEntry)); + pReader->pWal->cfg.vgId, ret, sizeof(SWalIdxEntry)); } return -1; } @@ -130,79 +135,79 @@ static int64_t walReadSeekFilePos(SWalReader *pRead, int64_t fileFirstVer, int64 ret = taosLSeekFile(pLogTFile, entry.offset, SEEK_SET); if (ret < 0) { terrno = TAOS_SYSTEM_ERROR(errno); - wError("vgId:%d, failed to seek log file, index:%" PRId64 ", pos:%" PRId64 ", since %s", pRead->pWal->cfg.vgId, ver, - entry.offset, terrstr()); + wError("vgId:%d, failed to seek log file, index:%" PRId64 ", pos:%" PRId64 ", since %s", pReader->pWal->cfg.vgId, + ver, entry.offset, terrstr()); return -1; } return ret; } -static int32_t walReadChangeFile(SWalReader *pRead, int64_t fileFirstVer) { +static int32_t walReadChangeFile(SWalReader *pReader, int64_t fileFirstVer) { char fnameStr[WAL_FILE_LEN]; - taosCloseFile(&pRead->pIdxFile); - taosCloseFile(&pRead->pLogFile); + taosCloseFile(&pReader->pIdxFile); + taosCloseFile(&pReader->pLogFile); - walBuildLogName(pRead->pWal, fileFirstVer, fnameStr); - TdFilePtr pLogTFile = taosOpenFile(fnameStr, TD_FILE_READ); - if (pLogTFile == NULL) { + walBuildLogName(pReader->pWal, fileFirstVer, fnameStr); + TdFilePtr pLogFile = taosOpenFile(fnameStr, TD_FILE_READ); + if (pLogFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - wError("vgId:%d, cannot open file %s, since %s", pRead->pWal->cfg.vgId, fnameStr, terrstr()); + wError("vgId:%d, cannot open file %s, since %s", pReader->pWal->cfg.vgId, fnameStr, terrstr()); return -1; } - pRead->pLogFile = pLogTFile; + pReader->pLogFile = pLogFile; - walBuildIdxName(pRead->pWal, fileFirstVer, fnameStr); - TdFilePtr pIdxTFile = taosOpenFile(fnameStr, TD_FILE_READ); - if (pIdxTFile == NULL) { + walBuildIdxName(pReader->pWal, fileFirstVer, fnameStr); + TdFilePtr pIdxFile = taosOpenFile(fnameStr, TD_FILE_READ); + if (pIdxFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - wError("vgId:%d, cannot open file %s, since %s", pRead->pWal->cfg.vgId, fnameStr, terrstr()); + wError("vgId:%d, cannot open file %s, since %s", pReader->pWal->cfg.vgId, fnameStr, terrstr()); return -1; } - pRead->pIdxFile = pIdxTFile; + pReader->pIdxFile = pIdxFile; return 0; } -int32_t walReadSeekVerImpl(SWalReader *pRead, int64_t ver) { - SWal *pWal = pRead->pWal; +int32_t walReadSeekVerImpl(SWalReader *pReader, int64_t ver) { + SWal *pWal = pReader->pWal; + // bsearch in fileSet SWalFileInfo tmpInfo; tmpInfo.firstVer = ver; - // bsearch in fileSet SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE); ASSERT(pRet != NULL); - if (pRead->curFileFirstVer != pRet->firstVer) { + if (pReader->curFileFirstVer != pRet->firstVer) { // error code was set inner - if (walReadChangeFile(pRead, pRet->firstVer) < 0) { + if (walReadChangeFile(pReader, pRet->firstVer) < 0) { return -1; } } // error code was set inner - if (walReadSeekFilePos(pRead, pRet->firstVer, ver) < 0) { + if (walReadSeekFilePos(pReader, pRet->firstVer, ver) < 0) { return -1; } - wDebug("wal version reset from %ld(invalid: %d) to %ld", pRead->curVersion, pRead->curInvalid, ver); + wDebug("wal version reset from %ld(invalid: %d) to %ld", pReader->curVersion, pReader->curInvalid, ver); - pRead->curVersion = ver; + pReader->curVersion = ver; return 0; } -int32_t walReadSeekVer(SWalReader *pRead, int64_t ver) { - SWal *pWal = pRead->pWal; - if (!pRead->curInvalid && ver == pRead->curVersion) { +int32_t walReadSeekVer(SWalReader *pReader, int64_t ver) { + SWal *pWal = pReader->pWal; + if (!pReader->curInvalid && ver == pReader->curVersion) { wDebug("wal version %ld match, no need to reset", ver); return 0; } - pRead->curInvalid = 1; - pRead->curVersion = ver; + pReader->curInvalid = 1; + pReader->curVersion = ver; if (ver > pWal->vers.lastVer || ver < pWal->vers.firstVer) { - wDebug("vgId:%d, invalid index:%" PRId64 ", first index:%" PRId64 ", last index:%" PRId64, pRead->pWal->cfg.vgId, + wDebug("vgId:%d, invalid index:%" PRId64 ", first index:%" PRId64 ", last index:%" PRId64, pReader->pWal->cfg.vgId, ver, pWal->vers.firstVer, pWal->vers.lastVer); terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; return -1; @@ -210,7 +215,7 @@ int32_t walReadSeekVer(SWalReader *pRead, int64_t ver) { if (ver < pWal->vers.snapshotVer) { } - if (walReadSeekVerImpl(pRead, ver) < 0) { + if (walReadSeekVerImpl(pReader, ver) < 0) { return -1; } diff --git a/source/libs/wal/src/walRef.c b/source/libs/wal/src/walRef.c new file mode 100644 index 0000000000..bd0f6fb1a8 --- /dev/null +++ b/source/libs/wal/src/walRef.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "cJSON.h" +#include "os.h" +#include "taoserror.h" +#include "tutil.h" +#include "walInt.h" + +SWalRef *walOpenRef(SWal *pWal) { + SWalRef *pRef = taosMemoryCalloc(1, sizeof(SWalRef)); + if (pRef == NULL) { + return NULL; + } + pRef->refId = tGenIdPI64(); + pRef->refVer = -1; + pRef->refFile = -1; + pRef->pWal = pWal; + taosHashPut(pWal->pRefHash, &pRef->refId, sizeof(int64_t), &pRef, sizeof(void *)); + return pRef; +} + +void walCloseRef(SWal *pWal, int64_t refId) { + SWalRef *pRef = *(SWalRef **)taosHashGet(pWal->pRefHash, &refId, sizeof(int64_t)); + taosHashRemove(pWal->pRefHash, &refId, sizeof(int64_t)); + taosMemoryFree(pRef); +} + +int32_t walRefVer(SWalRef *pRef, int64_t ver) { + SWal *pWal = pRef->pWal; + if (pRef->refVer != ver) { + taosThreadMutexLock(&pWal->mutex); + if (ver < pWal->vers.firstVer || ver > pWal->vers.lastVer) { + taosThreadMutexUnlock(&pWal->mutex); + terrno = TSDB_CODE_WAL_INVALID_VER; + return -1; + } + + pRef->refVer = ver; + // bsearch in fileSet + SWalFileInfo tmpInfo; + tmpInfo.firstVer = ver; + SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE); + ASSERT(pRet != NULL); + pRef->refFile = pRet->firstVer; + + taosThreadMutexUnlock(&pWal->mutex); + } + + return 0; +} + +void walUnrefVer(SWalRef *pRef) { + pRef->refId = -1; + pRef->refFile = -1; +} + +SWalRef *walRefCommittedVer(SWal *pWal) { + SWalRef *pRef = walOpenRef(pWal); + if (pRef == NULL) { + return NULL; + } + taosThreadMutexLock(&pWal->mutex); + + int64_t ver = walGetCommittedVer(pWal); + + pRef->refVer = ver; + // bsearch in fileSet + SWalFileInfo tmpInfo; + tmpInfo.firstVer = ver; + SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE); + ASSERT(pRet != NULL); + pRef->refFile = pRet->firstVer; + + taosThreadMutexUnlock(&pWal->mutex); + return pRef; +} diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index d6348cc5dd..81500d8088 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -26,7 +26,7 @@ int32_t walRestoreFromSnapshot(SWal *pWal, int64_t ver) { pIter = taosHashIterate(pWal->pRefHash, pIter); if (pIter == NULL) break; SWalRef *pRef = (SWalRef *)pIter; - if (pRef->ver != -1) { + if (pRef->refVer != -1) { taosHashCancelIterate(pWal->pRefHash, pIter); return -1; } @@ -215,22 +215,23 @@ int32_t walRollback(SWal *pWal, int64_t ver) { static FORCE_INLINE int32_t walCheckAndRoll(SWal *pWal) { if (taosArrayGetSize(pWal->fileInfoSet) == 0) { - /*pWal->vers.firstVer = index;*/ if (walRollImpl(pWal) < 0) { return -1; } - } else { - int64_t passed = walGetSeq() - pWal->lastRollSeq; - if (pWal->cfg.rollPeriod != -1 && pWal->cfg.rollPeriod != 0 && passed > pWal->cfg.rollPeriod) { - if (walRollImpl(pWal) < 0) { - return -1; - } - } else if (pWal->cfg.segSize != -1 && pWal->cfg.segSize != 0 && walGetLastFileSize(pWal) > pWal->cfg.segSize) { - if (walRollImpl(pWal) < 0) { - return -1; - } + return 0; + } + + int64_t passed = walGetSeq() - pWal->lastRollSeq; + if (pWal->cfg.rollPeriod != -1 && pWal->cfg.rollPeriod != 0 && passed > pWal->cfg.rollPeriod) { + if (walRollImpl(pWal) < 0) { + return -1; + } + } else if (pWal->cfg.segSize != -1 && pWal->cfg.segSize != 0 && walGetLastFileSize(pWal) > pWal->cfg.segSize) { + if (walRollImpl(pWal) < 0) { + return -1; } } + return 0; } @@ -260,6 +261,16 @@ int32_t walEndSnapshot(SWal *pWal) { pWal->vers.snapshotVer = ver; int ts = taosGetTimestampSec(); + int64_t minVerToDelete = ver; + void *pIter = NULL; + while (1) { + pIter = taosHashIterate(pWal->pRefHash, pIter); + if (pIter == NULL) break; + SWalRef *pRef = *(SWalRef **)pIter; + if (pRef->refVer == -1) continue; + minVerToDelete = TMIN(minVerToDelete, pRef->refVer); + } + int deleteCnt = 0; int64_t newTotSize = pWal->totSize; SWalFileInfo tmp; diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 80ad480e43..ad6eff3c12 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -512,7 +512,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_OFFSET_UNIT, "Cannot use 'year' as TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_OFFSET_TOO_BIG, "Interval offset should be shorter than interval") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_SLIDING_UNIT, "Does not support sliding when interval is natural month/year") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_SLIDING_TOO_BIG, "sliding value no larger than the interval value") -TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_SLIDING_TOO_SMALL, "sliding value can not less than 1% of interval value") +TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_SLIDING_TOO_SMALL, "sliding value can not less than 1%% of interval value") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_ONLY_ONE_JSON_TAG, "Only one tag if there is a json tag") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INCORRECT_NUM_OF_COL, "Query block has incorrect number of result columns") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INCORRECT_TIMESTAMP_VAL, "Incorrect TIMESTAMP value") diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 8990c24305..d5ffc1b7c1 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1327,6 +1327,8 @@ class Task(): # TDengine 3.0 Error Codes: 0x0333, # Object is creating # TODO: this really is NOT an acceptable error + 0x0369, # Tag already exists + 0x0388, # Database not exist 0x03A0, # STable already exists 0x03A1, # STable [does] not exist 0x03AA, # Tag already exists diff --git a/tests/system-test/7-tmq/TD-17803.py b/tests/system-test/7-tmq/TD-17803.py new file mode 100644 index 0000000000..771ff83a29 --- /dev/null +++ b/tests/system-test/7-tmq/TD-17803.py @@ -0,0 +1,198 @@ +from distutils.log import error +import taos +import sys +import time +import socket +import os +import threading +import subprocess +import platform + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +sys.path.append("./7-tmq") +from tmqCommon import * + + + +class TDTestCase: + def __init__(self): + self.snapshot = 0 + self.replica = 3 + self.vgroups = 3 + self.ctbNum = 2 + self.rowsPerTbl = 2 + + def init(self, conn, logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + #tdSql.init(conn.cursor(), logSql) # output sql.txt file + + def checkFileContent(self, consumerId, queryString): + buildPath = tdCom.getBuildPath() + cfgPath = tdCom.getClientCfgPath() + dstFile = '%s/../log/dstrows_%d.txt'%(cfgPath, consumerId) + cmdStr = '%s/build/bin/taos -c %s -s "%s >> %s"'%(buildPath, cfgPath, queryString, dstFile) + tdLog.info(cmdStr) + os.system(cmdStr) + + consumeRowsFile = '%s/../log/consumerid_%d.txt'%(cfgPath, consumerId) + tdLog.info("rows file: %s, %s"%(consumeRowsFile, dstFile)) + + consumeFile = open(consumeRowsFile, mode='r') + queryFile = open(dstFile, mode='r') + + # skip first line for it is schema + queryFile.readline() + + while True: + dst = queryFile.readline() + src = consumeFile.readline() + + if dst: + if dst != src: + tdLog.exit("consumerId %d consume rows is not match the rows by direct query"%consumerId) + else: + break + return + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 4, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 2, + 'rowsPerTbl': 1000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 3, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=self.replica) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + tdLog.info("create ctb") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + tdLog.info("insert data") + tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + # tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix="ctbx", + # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + # tmqCom.asyncInsertDataByInterlace(paraDict) + tdLog.printNoPrefix("11111111111111111111111") + tmqCom.create_ntable(tdSql, dbname=paraDict["dbName"], tbname_prefix="ntb", tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=1) + tdLog.printNoPrefix("222222222222222") + tmqCom.insert_rows_into_ntbl(tdSql, dbname=paraDict["dbName"], tbname_prefix="ntb", tbname_index_start_num = 1, column_ele_list=paraDict["colSchema"], startTs=paraDict["startTs"], tblNum=1, rows=2) # tdLog.info("restart taosd to ensure that the data falls into the disk") + + tdLog.printNoPrefix("333333333333333333333") + tdSql.query("drop database %s"%paraDict["dbName"]) + tdLog.printNoPrefix("44444444444444444") + return + + def tmqCase1(self): + tdLog.printNoPrefix("======== test case 1: ") + + # create and start thread + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 4, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 1000, + 'batchNum': 100, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 3, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdLog.info("create topics from stb1") + topicFromStb1 = 'topic_stb1' + queryString = "select ts, c1, c2 from %s.%s where t4 == 'beijing' or t4 == 'changsha' "%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicFromStb1, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] + topicList = topicFromStb1 + ifcheckdata = 0 + ifManualCommit = 0 + keyList = 'group.id:cgrp1,\ + enable.auto.commit:false,\ + auto.commit.interval.ms:6000,\ + auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor") + pollDelay = 100 + showMsg = 1 + showRow = 1 + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + + tdLog.info("start to check consume result") + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + totalConsumeRows = 0 + for i in range(expectRows): + totalConsumeRows += resultList[i] + + tdSql.query(queryString) + totalRowsInserted = tdSql.getRows() + + tdLog.info("act consume rows: %d, act insert rows: %d, expect consume rows: %d, "%(totalConsumeRows, totalRowsInserted, expectrowcnt)) + + if totalConsumeRows != expectrowcnt: + tdLog.exit("tmq consume rows error!") + + # tmqCom.checkFileContent(consumerId, queryString) + + tmqCom.waitSubscriptionExit(tdSql, topicFromStb1) + tdSql.query("drop topic %s"%topicFromStb1) + + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + self.prepareTestEnv() + # self.tmqCase1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py b/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py new file mode 100644 index 0000000000..2216000214 --- /dev/null +++ b/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py @@ -0,0 +1,250 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +sys.path.append("./7-tmq") +from tmqCommon import * + +class TDTestCase: + def __init__(self): + self.vgroups = 4 + self.ctbNum = 100 + self.rowsPerTbl = 1000 + + def init(self, conn, logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 1000, + 'batchNum': 100, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 10, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + tdLog.info("create ctb") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("insert data") + tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("flush db to let data falls into the disk") + tdSql.query("flush database %s"%(paraDict['dbName'])) + return + + def tmqCase1(self): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 1000, + 'batchNum': 500, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 5, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb with filter") + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tdSql.query(queryString) + # expectRowsList.append(tdSql.getRows()) + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2 + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + # after start consume, continue insert some data + paraDict['batchNum'] = 100 + paraDict['startTs'] = paraDict['startTs'] + self.rowsPerTbl + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + + pInsertThread.join() + + tdSql.query(queryString) + expectRowsList.append(tdSql.getRows()) + + tdLog.info("wait the consume result") + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + + tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectRowsList[0], resultList[0])) + if expectRowsList[0] != resultList[0]: + tdLog.exit("%d tmq consume rows error!"%consumerId) + + # tmqCom.checkFileContent(consumerId, queryString) + + tdSql.query("flush database %s"%(paraDict['dbName'])) + + for i in range(len(topicNameList)): + tmqCom.waitSubscriptionExit(tdSql,topicNameList[i]) + tdSql.query("drop topic %s"%topicNameList[i]) + + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def tmqCase2(self): + tdLog.printNoPrefix("======== test case 2: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 1000, + 'batchNum': 500, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 3, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb with filter") + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + tdSql.query(queryString) + expectRowsList.append(tdSql.getRows()) + totalRowsInserted = expectRowsList[0] + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 1 + expectrowcnt = math.ceil(paraDict["rowsPerTbl"] * paraDict["ctbNum"] / 3) + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor 0") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + actConsumeRows = resultList[0] + + tdLog.info("act consume rows: %d, expect consume rows between %d and %d"%(actConsumeRows, expectrowcnt, totalRowsInserted)) + if not (expectrowcnt <= actConsumeRows and totalRowsInserted >= actConsumeRows): + tdLog.exit("%d tmq consume rows error!"%consumerId) + + # reinit consume info, and start tmq_sim, then check consume result + tmqCom.initConsumerTable() + consumerId = 2 + expectrowcnt = math.ceil(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2/3) + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor 1") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + + actConsumeRows = resultList[0] + tdLog.info("act consume rows: %d, expect rows: %d, act insert rows: %d"%(actConsumeRows, expectrowcnt, totalRowsInserted)) + if not ((actConsumeRows >= expectrowcnt) and (totalRowsInserted > actConsumeRows)): + tdLog.exit("%d tmq consume rows error!"%consumerId) + + for i in range(len(topicNameList)): + tmqCom.waitSubscriptionExit(tdSql,topicNameList[i]) + tdSql.query("drop topic %s"%topicNameList[i]) + + tdLog.printNoPrefix("======== test case 2 end ...... ") + + def run(self): + tdSql.prepare() + self.prepareTestEnv() + self.tmqCase1() + self.tmqCase2() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/dataFromTsdbNWal.py b/tests/system-test/7-tmq/dataFromTsdbNWal.py index 227ce9d5a5..faa70f4820 100644 --- a/tests/system-test/7-tmq/dataFromTsdbNWal.py +++ b/tests/system-test/7-tmq/dataFromTsdbNWal.py @@ -17,8 +17,8 @@ from tmqCommon import * class TDTestCase: def __init__(self): - self.vgroups = 1 - self.ctbNum = 100 + self.vgroups = 4 + self.ctbNum = 1 self.rowsPerTbl = 10000 def init(self, conn, logSql): @@ -38,9 +38,9 @@ class TDTestCase: 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], 'ctbPrefix': 'ctb', 'ctbStartIdx': 0, - 'ctbNum': 100, + 'ctbNum': 1, 'rowsPerTbl': 10000, - 'batchNum': 3000, + 'batchNum': 100, 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 'pollDelay': 10, 'showMsg': 1, @@ -85,7 +85,7 @@ class TDTestCase: 'rowsPerTbl': 10000, 'batchNum': 100, 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 - 'pollDelay': 3, + 'pollDelay': 5, 'showMsg': 1, 'showRow': 1, 'snapshot': 1} @@ -117,17 +117,16 @@ class TDTestCase: keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:1000, auto.offset.reset:earliest' tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) - tdLog.info("start consume processor") - tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) - # after start consume, continue insert some data paraDict['batchNum'] = 100 paraDict['startTs'] = paraDict['startTs'] + self.rowsPerTbl - tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], - ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) - # + pInsertThread.join() + tdSql.query(queryString) expectRowsList.append(tdSql.getRows()) @@ -135,15 +134,16 @@ class TDTestCase: expectRows = 1 resultList = tmqCom.selectConsumeResult(expectRows) - tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectRowsList[0], resultList[0])) if expectRowsList[0] != resultList[0]: tdLog.exit("%d tmq consume rows error!"%consumerId) tmqCom.checkFileContent(consumerId, queryString) - time.sleep(10) + tdSql.query("flush database %s"%(paraDict['dbName'])) + for i in range(len(topicNameList)): + tmqCom.waitSubscriptionExit(tdSql,topicNameList[i]) tdSql.query("drop topic %s"%topicNameList[i]) tdLog.printNoPrefix("======== test case 1 end ...... ") @@ -204,13 +204,12 @@ class TDTestCase: expectRows = 1 resultList = tmqCom.selectConsumeResult(expectRows) - - if not (expectrowcnt <= resultList[0] and totalRowsInserted >= resultList[0]): - tdLog.info("act consume rows: %d, expect consume rows between %d and %d"%(resultList[0], expectrowcnt, totalRowsInserted)) + actConsumeRows = resultList[0] + + tdLog.info("act consume rows: %d, expect consume rows between %d and %d"%(actConsumeRows, expectrowcnt, totalRowsInserted)) + if not (expectrowcnt <= actConsumeRows and totalRowsInserted >= actConsumeRows): tdLog.exit("%d tmq consume rows error!"%consumerId) - - firstConsumeRows = resultList[0] - + # reinit consume info, and start tmq_sim, then check consume result tmqCom.initConsumerTable() consumerId = 2 @@ -224,15 +223,13 @@ class TDTestCase: expectRows = 1 resultList = tmqCom.selectConsumeResult(expectRows) - actConsumeTotalRows = firstConsumeRows + resultList[0] - - if not (expectrowcnt >= resultList[0] and totalRowsInserted == actConsumeTotalRows): - tdLog.info("act consume rows, first: %d, second: %d "%(firstConsumeRows, resultList[0])) - tdLog.info("and sum of two consume rows: %d should be equal to total inserted rows: %d"%(actConsumeTotalRows, totalRowsInserted)) + actConsumeRows = resultList[0] + tdLog.info("act consume rows: %d, expect rows: %d, act insert rows: %d"%(actConsumeRows, expectrowcnt, totalRowsInserted)) + if not ((actConsumeRows >= expectrowcnt) and (totalRowsInserted > actConsumeRows)): tdLog.exit("%d tmq consume rows error!"%consumerId) - time.sleep(10) for i in range(len(topicNameList)): + tmqCom.waitSubscriptionExit(tdSql,topicNameList[i]) tdSql.query("drop topic %s"%topicNameList[i]) tdLog.printNoPrefix("======== test case 2 end ...... ") @@ -241,7 +238,7 @@ class TDTestCase: tdSql.prepare() self.prepareTestEnv() self.tmqCase1() - # self.tmqCase2() + self.tmqCase2() def stop(self): tdSql.close() diff --git a/tests/system-test/7-tmq/tmqDnodeRestart.py b/tests/system-test/7-tmq/tmqDnodeRestart.py index cec6985a4e..5117ee3d24 100644 --- a/tests/system-test/7-tmq/tmqDnodeRestart.py +++ b/tests/system-test/7-tmq/tmqDnodeRestart.py @@ -151,41 +151,6 @@ class TDTestCase: if not (totalConsumeRows == totalRowsFromQury): tdLog.exit("tmq consume rows error!") - - - - # tdLog.info("****************************************************************************") - # tmqCom.initConsumerTable() - # consumerId = 1 - # expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2 - # topicList = topicFromStb1 - # ifcheckdata = 0 - # ifManualCommit = 0 - # keyList = 'group.id:cgrp2,\ - # enable.auto.commit:true,\ - # auto.commit.interval.ms:3000,\ - # auto.offset.reset:earliest' - # tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) - - # tdLog.info("start consume processor") - # tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) - - # expectRows = 1 - # resultList = tmqCom.selectConsumeResult(expectRows) - # totalConsumeRows = 0 - # for i in range(expectRows): - # totalConsumeRows += resultList[i] - - # tdSql.query(queryString) - # totalRowsFromQury = tdSql.getRows() - - # tdLog.info("act consume rows: %d, act query rows: %d"%(totalConsumeRows, totalRowsFromQury)) - # if not (totalConsumeRows == totalRowsFromQury): - # tdLog.exit("tmq consume rows error!") - - - # tdLog.info("****************************************************************************") - tmqCom.waitSubscriptionExit(tdSql, topicFromStb1) tdSql.query("drop topic %s"%topicFromStb1) @@ -259,7 +224,7 @@ class TDTestCase: tdLog.info("create some new child table and insert data ") paraDict["batchNum"] = 100 paraDict["ctbPrefix"] = 'newCtb' - # tmqCom.insert_data_with_autoCreateTbl(tdSql,paraDict["dbName"],paraDict["stbName"],paraDict["ctbPrefix"],paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"]) + tmqCom.insert_data_with_autoCreateTbl(tdSql,paraDict["dbName"],paraDict["stbName"],paraDict["ctbPrefix"],paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"]) tdLog.info("insert process end, and start to check consume result") expectRows = 1 diff --git a/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py b/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py new file mode 100644 index 0000000000..650d918828 --- /dev/null +++ b/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py @@ -0,0 +1,225 @@ + +import taos +import sys +import time +import socket +import os +import threading +from enum import Enum + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +sys.path.append("./7-tmq") +from tmqCommon import * + +class TDTestCase: + def __init__(self): + self.snapshot = 0 + self.vgroups = 4 + self.ctbNum = 1000 + self.rowsPerTbl = 10 + + def init(self, conn, logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + # drop some ntbs + def tmqCase1(self): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 4, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ntb', + 'ctbStartIdx': 0, + 'ctbNum': 1000, + 'rowsPerTbl': 100, + 'batchNum': 100, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'endTs': 0, + 'pollDelay': 5, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + paraDict['snapshot'] = self.snapshot + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tmqCom.initConsumerTable() + tdLog.info("start create database....") + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) + tdLog.info("start create normal tables....") + tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) + tdLog.info("start insert data into normal tables....") + tmqCom.insert_rows_into_ntbl(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_ele_list=paraDict["colSchema"],startTs=paraDict["startTs"], tblNum=paraDict["ctbNum"], rows=paraDict["rowsPerTbl"]) + + tdLog.info("create topics from database") + topicFromDb = 'topic_dbt' + tdSql.execute("create topic %s as database %s" %(topicFromDb, paraDict['dbName'])) + + if self.snapshot == 0: + consumerId = 0 + elif self.snapshot == 1: + consumerId = 1 + + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"]) + topicList = topicFromDb + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1,\ + enable.auto.commit:true,\ + auto.commit.interval.ms:1000,\ + auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + + tmqCom.getStartConsumeNotifyFromTmqsim() + tdLog.info("drop some ntables") + # drop 1/4 ctbls from half offset + paraDict["ctbStartIdx"] = paraDict["ctbStartIdx"] + int(paraDict["ctbNum"] * 1 / 2) + paraDict["ctbNum"] = int(paraDict["ctbNum"] / 4) + tmqCom.drop_ctable(tdSql, dbname=paraDict['dbName'], count=paraDict["ctbNum"], default_ctbname_prefix=paraDict["ctbPrefix"], ctbStartIdx=paraDict["ctbStartIdx"]) + + tdLog.info("start to check consume result") + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + totalConsumeRows = 0 + for i in range(expectRows): + totalConsumeRows += resultList[i] + + tdLog.info("act consume rows: %d, expect consume rows: %d"%(totalConsumeRows, expectrowcnt)) + + if not ((totalConsumeRows >= expectrowcnt * 3/4) and (totalConsumeRows < expectrowcnt)): + tdLog.exit("tmq consume rows error with snapshot = 0!") + + tdLog.info("wait subscriptions exit ....") + tmqCom.waitSubscriptionExit(tdSql, topicFromDb) + + tdSql.query("drop topic %s"%topicFromDb) + tdLog.info("success dorp topic: %s"%topicFromDb) + tdLog.printNoPrefix("======== test case 1 end ...... ") + + + + # drop some ntbs and create some new ntbs + def tmqCase2(self): + tdLog.printNoPrefix("======== test case 2: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 4, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ntb', + 'ctbStartIdx': 0, + 'ctbNum': 1000, + 'rowsPerTbl': 100, + 'batchNum': 100, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'endTs': 0, + 'pollDelay': 10, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + paraDict['snapshot'] = self.snapshot + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tmqCom.initConsumerTable() + tdLog.info("start create database....") + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) + tdLog.info("start create normal tables....") + tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) + tdLog.info("start insert data into normal tables....") + tmqCom.insert_rows_into_ntbl(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_ele_list=paraDict["colSchema"],startTs=paraDict["startTs"], tblNum=paraDict["ctbNum"], rows=paraDict["rowsPerTbl"]) + + tdLog.info("create topics from database") + topicFromDb = 'topic_dbt' + tdSql.execute("create topic %s as database %s" %(topicFromDb, paraDict['dbName'])) + + if self.snapshot == 0: + consumerId = 2 + elif self.snapshot == 1: + consumerId = 3 + + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2) + topicList = topicFromDb + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1,\ + enable.auto.commit:true,\ + auto.commit.interval.ms:1000,\ + auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + + tmqCom.getStartConsumeNotifyFromTmqsim() + tdLog.info("drop some ntables") + # drop 1/4 ctbls from half offset + paraDict["ctbStartIdx"] = paraDict["ctbStartIdx"] + int(paraDict["ctbNum"] * 1 / 2) + paraDict["ctbNum"] = int(paraDict["ctbNum"] / 4) + tmqCom.drop_ctable(tdSql, dbname=paraDict['dbName'], count=paraDict["ctbNum"], default_ctbname_prefix=paraDict["ctbPrefix"], ctbStartIdx=paraDict["ctbStartIdx"]) + + tdLog.info("start create some new normal tables....") + paraDict["ctbPrefix"] = 'newCtb' + paraDict["ctbNum"] = self.ctbNum + tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) + tdLog.info("start insert data into these new normal tables....") + tmqCom.insert_rows_into_ntbl(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_ele_list=paraDict["colSchema"],startTs=paraDict["startTs"], tblNum=paraDict["ctbNum"], rows=paraDict["rowsPerTbl"]) + + tdLog.info("start to check consume result") + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + totalConsumeRows = 0 + for i in range(expectRows): + totalConsumeRows += resultList[i] + + tdLog.info("act consume rows: %d, expect consume rows: %d"%(totalConsumeRows, expectrowcnt)) + + if not ((totalConsumeRows >= expectrowcnt / 2 * (1 + 3/4)) and (totalConsumeRows < expectrowcnt)): + tdLog.exit("tmq consume rows error with snapshot = 0!") + + tdLog.info("wait subscriptions exit ....") + tmqCom.waitSubscriptionExit(tdSql, topicFromDb) + + tdSql.query("drop topic %s"%topicFromDb) + tdLog.info("success dorp topic: %s"%topicFromDb) + tdLog.printNoPrefix("======== test case 2 end ...... ") + + def run(self): + tdLog.printNoPrefix("=============================================") + tdLog.printNoPrefix("======== snapshot is 0: only consume from wal") + self.snapshot = 0 + self.tmqCase1() + self.tmqCase2() + + # tdLog.printNoPrefix("====================================================================") + # tdLog.printNoPrefix("======== snapshot is 1: firstly consume from tsbs, and then from wal") + # self.snapshot = 1 + # self.tmqCase1() + # self.tmqCase2() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/tmqDropNtb.py b/tests/system-test/7-tmq/tmqDropNtb-snapshot1.py similarity index 93% rename from tests/system-test/7-tmq/tmqDropNtb.py rename to tests/system-test/7-tmq/tmqDropNtb-snapshot1.py index e1f5794ce2..b23f422585 100644 --- a/tests/system-test/7-tmq/tmqDropNtb.py +++ b/tests/system-test/7-tmq/tmqDropNtb-snapshot1.py @@ -18,7 +18,7 @@ class TDTestCase: def __init__(self): self.snapshot = 0 self.vgroups = 4 - self.ctbNum = 100 + self.ctbNum = 1000 self.rowsPerTbl = 10 def init(self, conn, logSql): @@ -39,9 +39,9 @@ class TDTestCase: 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], 'ctbPrefix': 'ntb', 'ctbStartIdx': 0, - 'ctbNum': 100, - 'rowsPerTbl': 1000, - 'batchNum': 1000, + 'ctbNum': 1000, + 'rowsPerTbl': 100, + 'batchNum': 100, 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 'endTs': 0, 'pollDelay': 5, @@ -125,9 +125,9 @@ class TDTestCase: 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], 'ctbPrefix': 'ntb', 'ctbStartIdx': 0, - 'ctbNum': 100, - 'rowsPerTbl': 1000, - 'batchNum': 1000, + 'ctbNum': 1000, + 'rowsPerTbl': 100, + 'batchNum': 100, 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 'endTs': 0, 'pollDelay': 10, @@ -203,16 +203,16 @@ class TDTestCase: tdLog.printNoPrefix("======== test case 2 end ...... ") def run(self): - tdLog.printNoPrefix("=============================================") - tdLog.printNoPrefix("======== snapshot is 0: only consume from wal") - self.snapshot = 0 + # tdLog.printNoPrefix("=============================================") + # tdLog.printNoPrefix("======== snapshot is 0: only consume from wal") + # self.snapshot = 0 # self.tmqCase1() - self.tmqCase2() + # self.tmqCase2() tdLog.printNoPrefix("====================================================================") tdLog.printNoPrefix("======== snapshot is 1: firstly consume from tsbs, and then from wal") self.snapshot = 1 - # self.tmqCase1() + self.tmqCase1() self.tmqCase2() def stop(self): diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 5cc7aca675..f074bd8850 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -210,7 +210,7 @@ python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-1ctb-funcNFilter.py python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb.py python3 ./test.py -f 7-tmq/tmqAutoCreateTbl.py -#python3 ./test.py -f 7-tmq/tmqDnodeRestart.py +python3 ./test.py -f 7-tmq/tmqDnodeRestart.py python3 ./test.py -f 7-tmq/tmqUpdate-1ctb.py python3 ./test.py -f 7-tmq/tmqUpdateWithConsume.py python3 ./test.py -f 7-tmq/tmqUpdate-multiCtb-snapshot0.py @@ -219,12 +219,14 @@ python3 ./test.py -f 7-tmq/tmqDelete-1ctb.py python3 ./test.py -f 7-tmq/tmqDelete-multiCtb.py python3 ./test.py -f 7-tmq/tmqDropStb.py python3 ./test.py -f 7-tmq/tmqDropStbCtb.py -python3 ./test.py -f 7-tmq/tmqDropNtb.py +python3 ./test.py -f 7-tmq/tmqDropNtb-snapshot0.py +python3 ./test.py -f 7-tmq/tmqDropNtb-snapshot1.py python3 ./test.py -f 7-tmq/tmqUdf.py python3 ./test.py -f 7-tmq/tmqUdf-multCtb-snapshot0.py python3 ./test.py -f 7-tmq/tmqUdf-multCtb-snapshot1.py python3 ./test.py -f 7-tmq/stbTagFilter-1ctb.py - +python3 ./test.py -f 7-tmq/dataFromTsdbNWal.py +python3 ./test.py -f 7-tmq/dataFromTsdbNWal-multiCtb.py # python3 ./test.py -f 7-tmq/stbTagFilter-multiCtb.py #------------querPolicy 2----------- diff --git a/tools/taos-tools b/tools/taos-tools index 9cfa195713..0b8a3373bb 160000 --- a/tools/taos-tools +++ b/tools/taos-tools @@ -1 +1 @@ -Subproject commit 9cfa195713d1cae9edf417a8d49bde87dd971016 +Subproject commit 0b8a3373bb7548f8106d13e7d3b0a988d3c4d48a