From f195decac985bb6df39f4d01af8fe5c9a31566b3 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Jul 2022 06:27:32 +0000 Subject: [PATCH 01/27] more fix --- source/dnode/vnode/src/inc/tsdb.h | 88 +++++--- source/dnode/vnode/src/tsdb/tsdbCommit.c | 33 +-- source/dnode/vnode/src/tsdb/tsdbFS.c | 73 +++---- source/dnode/vnode/src/tsdb/tsdbFile.c | 193 ++++++------------ source/dnode/vnode/src/tsdb/tsdbMemTable.c | 2 + source/dnode/vnode/src/tsdb/tsdbRead.c | 56 +++++ .../dnode/vnode/src/tsdb/tsdbReaderWriter.c | 180 ++++++++++------ source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 32 +-- 8 files changed, 373 insertions(+), 284 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 30a6188db0..d465ba4d9b 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -64,6 +64,7 @@ typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; typedef struct STsdbFSState STsdbFSState; typedef struct STsdbSnapHdr STsdbSnapHdr; +typedef struct STsdbReadSnap STsdbReadSnap; #define TSDB_MAX_SUBBLOCKS 8 #define TSDB_FHDR_SIZE 512 @@ -188,16 +189,22 @@ bool tsdbTbDataIterNext(STbDataIter *pIter); int32_t tsdbGetNRowsInTbData(STbData *pTbData); // tsdbFile.c ============================================================================================== typedef enum { TSDB_HEAD_FILE = 0, TSDB_DATA_FILE, TSDB_LAST_FILE, TSDB_SMA_FILE } EDataFileT; -void tsdbDataFileName(STsdb *pTsdb, SDFileSet *pDFileSet, EDataFileT ftype, char fname[]); -bool tsdbFileIsSame(SDFileSet *pDFileSet1, SDFileSet *pDFileSet2, EDataFileT ftype); + bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2); -int32_t tsdbUpdateDFileHdr(TdFilePtr pFD, SDFileSet *pSet, EDataFileT ftype); int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype); -int32_t tPutDataFileHdr(uint8_t *p, SDFileSet *pSet, EDataFileT ftype); +int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile); +int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile); +int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile); +int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile); int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tGetDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet); int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet); + +void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]); +void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]); +void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]); +void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]); // SDelFile void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]); // tsdbFS.c ============================================================================================== @@ -222,8 +229,7 @@ int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *pMapData, uint8_t **ppBu int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2, SBlockIdx *pBlockIdx, SBlock *pBlock, int8_t cmprAlg); -SDFileSet *tsdbDataFWriterGetWSet(SDataFWriter *pWriter); -int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); +int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); // SDataFReader int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet); int32_t tsdbDataFReaderClose(SDataFReader **ppReader); @@ -245,6 +251,9 @@ int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb int32_t tsdbDelFReaderClose(SDelFReader **ppReader); int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, uint8_t **ppBuf); int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf); +// tsdbRead.c ============================================================================================== +int32_t tsdbTakeReadSnap(STsdb *pTsdb, STsdbReadSnap **ppSnap); +void tsdbUntakeReadSnap(STsdb *pTsdb, STsdbReadSnap *pSnap); #define TSDB_CACHE_NO(c) ((c).cacheLast == 0) #define TSDB_CACHE_LAST_ROW(c) (((c).cacheLast & 1) > 0) @@ -465,12 +474,6 @@ struct SDelIdx { int64_t size; }; -struct SDelFile { - int64_t commitID; - int64_t size; - int64_t offset; -}; - #pragma pack(push, 1) struct SBlockDataHdr { uint32_t delimiter; @@ -479,34 +482,50 @@ struct SBlockDataHdr { }; #pragma pack(pop) +struct SDelFile { + volatile int32_t nRef; + + int64_t commitID; + int64_t size; + int64_t offset; +}; + struct SHeadFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; int64_t offset; }; struct SDataFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SLastFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SSmaFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SDFileSet { - SDiskID diskId; - int32_t fid; - SHeadFile fHead; - SDataFile fData; - SLastFile fLast; - SSmaFile fSma; + SDiskID diskId; + int32_t fid; + SHeadFile *pHeadF; + SDataFile *pDataF; + SLastFile *pLastF; + SSmaFile *pSmaF; }; struct SRowIter { @@ -528,11 +547,13 @@ struct STsdbFSState { }; struct STsdbFS { - STsdb *pTsdb; - TdThreadRwlock lock; - int8_t inTxn; - STsdbFSState *cState; - STsdbFSState *nState; + STsdb *pTsdb; + STsdbFSState *cState; + STsdbFSState *nState; + + // new + SDelFile *pDelFile; + SArray aDFileSetP; // SArray }; struct SDelFWriter { @@ -541,6 +562,27 @@ struct SDelFWriter { TdFilePtr pWriteH; }; +struct SDataFWriter { + STsdb *pTsdb; + SDFileSet wSet; + + TdFilePtr pHeadFD; + TdFilePtr pDataFD; + TdFilePtr pLastFD; + TdFilePtr pSmaFD; + + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; +}; + +struct STsdbReadSnap { + SMemTable *pMem; + SMemTable *pIMem; + STsdbFS fs; +}; + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 13f310ae27..3c496918e8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -273,7 +273,6 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SDFileSet *pRSet = NULL; - SDFileSet wSet; // memory pCommitter->nextKey = TSKEY_MAX; @@ -292,23 +291,29 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { } // new + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; + taosArrayClear(pCommitter->aBlockIdxN); tMapDataReset(&pCommitter->nBlockMap); tBlockDataReset(&pCommitter->nBlockData); if (pRSet) { - wSet = (SDFileSet){.diskId = pRSet->diskId, - .fid = pCommitter->commitFid, - .fHead = {.commitID = pCommitter->commitID, .offset = 0, .size = 0}, - .fData = pRSet->fData, - .fLast = {.commitID = pCommitter->commitID, .size = 0}, - .fSma = pRSet->fSma}; + wSet.diskId = pRSet->diskId; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; + fData = *pRSet->pDataF; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; + fSma = *pRSet->pSmaF; } else { - wSet = (SDFileSet){.diskId = (SDiskID){.level = 0, .id = 0}, - .fid = pCommitter->commitFid, - .fHead = {.commitID = pCommitter->commitID, .offset = 0, .size = 0}, - .fData = {.commitID = pCommitter->commitID, .size = 0}, - .fLast = {.commitID = pCommitter->commitID, .size = 0}, - .fSma = {.commitID = pCommitter->commitID, .size = 0}}; + wSet.diskId = (SDiskID){.level = 0, .id = 0}; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; + fData = (SDataFile){.commitID = pCommitter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; + fSma = (SSmaFile){.commitID = pCommitter->commitID, .size = 0}; } code = tsdbDataFWriterOpen(&pCommitter->pWriter, pTsdb, &wSet); if (code) goto _err; @@ -855,7 +860,7 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { if (code) goto _err; // upsert SDFileSet - code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, tsdbDataFWriterGetWSet(pCommitter->pWriter)); + code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, &pCommitter->pWriter->wSet); if (code) goto _err; // close and sync diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 3bc79621e1..f5e6e9744e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -174,62 +174,64 @@ static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet char fname[TSDB_FILENAME_LEN]; if (pFrom && pTo) { + bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); + // head - if (tsdbFileIsSame(pFrom, pTo, TSDB_HEAD_FILE)) { - ASSERT(pFrom->fHead.size == pTo->fHead.size); - ASSERT(pFrom->fHead.offset == pTo->fHead.offset); + if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { + ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); + ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); taosRemoveFile(fname); } // data - if (tsdbFileIsSame(pFrom, pTo, TSDB_DATA_FILE)) { - if (pFrom->fData.size > pTo->fData.size) { + if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { + if (pFrom->pDataF->size > pTo->pDataF->size) { code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); if (code) goto _err; } } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_DATA_FILE, fname); + tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); taosRemoveFile(fname); } // last - if (tsdbFileIsSame(pFrom, pTo, TSDB_LAST_FILE)) { - if (pFrom->fLast.size > pTo->fLast.size) { + if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { + if (pFrom->pLastF->size > pTo->pLastF->size) { code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); if (code) goto _err; } } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_LAST_FILE, fname); + tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); taosRemoveFile(fname); } // sma - if (tsdbFileIsSame(pFrom, pTo, TSDB_SMA_FILE)) { - if (pFrom->fSma.size > pTo->fSma.size) { + if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { + if (pFrom->pSmaF->size > pTo->pSmaF->size) { code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); if (code) goto _err; } } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); taosRemoveFile(fname); } } else if (pFrom) { // head - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); taosRemoveFile(fname); // data - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_DATA_FILE, fname); + tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); taosRemoveFile(fname); // last - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_LAST_FILE, fname); + tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); taosRemoveFile(fname); // fsm - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); taosRemoveFile(fname); } @@ -341,7 +343,6 @@ static void tsdbFSDestroy(STsdbFS *pFS) { taosMemoryFree(pFS->cState); } - taosThreadRwlockDestroy(&pFS->lock); taosMemoryFree(pFS); } // TODO @@ -358,15 +359,6 @@ static int32_t tsdbFSCreate(STsdb *pTsdb, STsdbFS **ppFS) { } pFS->pTsdb = pTsdb; - code = taosThreadRwlockInit(&pFS->lock, NULL); - if (code) { - taosMemoryFree(pFS); - code = TAOS_SYSTEM_ERROR(code); - goto _err; - } - - pFS->inTxn = 0; - pFS->cState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); if (pFS->cState == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -431,7 +423,7 @@ static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); // head ========= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pHeadF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -442,16 +434,16 @@ static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { } // data ========= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pDataF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size < pDFileSet->fData.size) { + if (size < pDFileSet->pDataF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->fData.size) { + } else if (size > pDFileSet->pDataF->size) { ASSERT(0); // need to rollback the file } @@ -461,16 +453,16 @@ static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { } // last =========== - tsdbDataFileName(pTsdb, pDFileSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pLastF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size < pDFileSet->fLast.size) { + if (size < pDFileSet->pLastF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->fLast.size) { + } else if (size > pDFileSet->pLastF->size) { ASSERT(0); // need to rollback the file } @@ -480,16 +472,16 @@ static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { } // sma ============= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pSmaF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size < pDFileSet->fSma.size) { + if (size < pDFileSet->pSmaF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->fSma.size) { + } else if (size > pDFileSet->pSmaF->size) { ASSERT(0); // need to rollback the file } @@ -573,8 +565,6 @@ int32_t tsdbFSClose(STsdbFS *pFS) { int32_t tsdbFSBegin(STsdbFS *pFS) { int32_t code = 0; - ASSERT(!pFS->inTxn); - // SDelFile pFS->nState->pDelFile = NULL; if (pFS->cState->pDelFile) { @@ -593,7 +583,6 @@ int32_t tsdbFSBegin(STsdbFS *pFS) { } } - pFS->inTxn = 1; return code; _err: @@ -631,8 +620,6 @@ int32_t tsdbFSCommit(STsdbFS *pFS) { code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); if (code) goto _err; - pFS->inTxn = 0; - return code; _err: @@ -646,8 +633,6 @@ int32_t tsdbFSRollback(STsdbFS *pFS) { code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); if (code) goto _err; - pFS->inTxn = 0; - return code; _err: diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index f15ad072e7..4a41e9fb41 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -15,7 +15,7 @@ #include "tsdb.h" -static int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile) { +int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pHeadFile->commitID); @@ -35,7 +35,7 @@ static int32_t tGetHeadFile(uint8_t *p, SHeadFile *pHeadFile) { return n; } -static int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile) { +int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pDataFile->commitID); @@ -53,7 +53,7 @@ static int32_t tGetDataFile(uint8_t *p, SDataFile *pDataFile) { return n; } -static int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { +int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pLastFile->commitID); @@ -71,7 +71,7 @@ static int32_t tGetLastFile(uint8_t *p, SLastFile *pLastFile) { return n; } -static int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile) { +int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pSmaFile->commitID); @@ -90,90 +90,63 @@ static int32_t tGetSmaFile(uint8_t *p, SSmaFile *pSmaFile) { } // EXPOSED APIS ================================================== -void tsdbDataFileName(STsdb *pTsdb, SDFileSet *pDFileSet, EDataFileT ftype, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; - - switch (ftype) { - case TSDB_HEAD_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fHead.commitID, - ".head"); - break; - case TSDB_DATA_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fData.commitID, - ".data"); - break; - case TSDB_LAST_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fLast.commitID, - ".last"); - break; - case TSDB_SMA_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fSma.commitID, - ".sma"); - break; - default: - ASSERT(0); - break; - } +void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pHeadF->commitID, ".head"); } -bool tsdbFileIsSame(SDFileSet *pDFileSet1, SDFileSet *pDFileSet2, EDataFileT ftype) { - if (pDFileSet1->diskId.level != pDFileSet2->diskId.level || pDFileSet1->diskId.id != pDFileSet2->diskId.id) { - return false; - } +void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pDataF->commitID, ".data"); +} - switch (ftype) { - case TSDB_HEAD_FILE: - return pDFileSet1->fHead.commitID == pDFileSet2->fHead.commitID; - case TSDB_DATA_FILE: - return pDFileSet1->fData.commitID == pDFileSet2->fData.commitID; - case TSDB_LAST_FILE: - return pDFileSet1->fLast.commitID == pDFileSet2->fLast.commitID; - case TSDB_SMA_FILE: - return pDFileSet1->fSma.commitID == pDFileSet2->fSma.commitID; - default: - ASSERT(0); - break; - } +void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pLastF->commitID, ".last"); +} + +void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pSmaF->commitID, ".sma"); } bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2) { return pDelFile1->commitID == pDelFile2->commitID; } -int32_t tsdbUpdateDFileHdr(TdFilePtr pFD, SDFileSet *pSet, EDataFileT ftype) { - int32_t code = 0; - int64_t n; - char hdr[TSDB_FHDR_SIZE]; - - memset(hdr, 0, TSDB_FHDR_SIZE); - tPutDataFileHdr(hdr, pSet, ftype); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); - - n = taosLSeekFile(pFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - - n = taosWriteFile(pFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - -_exit: - return code; -} - int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { int32_t code = 0; int64_t size; + int64_t n; TdFilePtr pFD; char fname[TSDB_FILENAME_LEN]; + char hdr[TSDB_FHDR_SIZE] = {0}; - tsdbDataFileName(pTsdb, pSet, ftype, fname); + // truncate + switch (ftype) { + case TSDB_HEAD_FILE: + size = pSet->pHeadF->size; + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + tPutHeadFile(hdr, pSet->pHeadF); + break; + case TSDB_DATA_FILE: + size = pSet->pDataF->size; + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + tPutDataFile(hdr, pSet->pDataF); + break; + case TSDB_LAST_FILE: + size = pSet->pLastF->size; + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); + tPutLastFile(hdr, pSet->pLastF); + break; + case TSDB_SMA_FILE: + size = pSet->pSmaF->size; + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + tPutSmaFile(hdr, pSet->pSmaF); + break; + default: + ASSERT(0); + } + + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); // open pFD = taosOpenFile(fname, TD_FILE_WRITE); @@ -182,31 +155,24 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { goto _err; } - // truncate - switch (ftype) { - case TSDB_HEAD_FILE: - size = pSet->fHead.size; - break; - case TSDB_DATA_FILE: - size = pSet->fData.size; - break; - case TSDB_LAST_FILE: - size = pSet->fLast.size; - break; - case TSDB_SMA_FILE: - size = pSet->fSma.size; - break; - default: - ASSERT(0); - } + // ftruncate if (taosFtruncateFile(pFD, size) < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } // update header - code = tsdbUpdateDFileHdr(pFD, pSet, ftype); - if (code) goto _err; + n = taosLSeekFile(pFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // sync if (taosFsyncFile(pFD) < 0) { @@ -223,39 +189,16 @@ _err: return code; } -int32_t tPutDataFileHdr(uint8_t *p, SDFileSet *pSet, EDataFileT ftype) { - int32_t n = 0; - - switch (ftype) { - case TSDB_HEAD_FILE: - n += tPutHeadFile(p ? p + n : p, &pSet->fHead); - break; - case TSDB_DATA_FILE: - n += tPutDataFile(p ? p + n : p, &pSet->fData); - break; - case TSDB_LAST_FILE: - n += tPutLastFile(p ? p + n : p, &pSet->fLast); - break; - case TSDB_SMA_FILE: - n += tPutSmaFile(p ? p + n : p, &pSet->fSma); - break; - default: - ASSERT(0); - } - - return n; -} - int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet) { int32_t n = 0; n += tPutI32v(p ? p + n : p, pSet->diskId.level); n += tPutI32v(p ? p + n : p, pSet->diskId.id); n += tPutI32v(p ? p + n : p, pSet->fid); - n += tPutHeadFile(p ? p + n : p, &pSet->fHead); - n += tPutDataFile(p ? p + n : p, &pSet->fData); - n += tPutLastFile(p ? p + n : p, &pSet->fLast); - n += tPutSmaFile(p ? p + n : p, &pSet->fSma); + n += tPutHeadFile(p ? p + n : p, pSet->pHeadF); + n += tPutDataFile(p ? p + n : p, pSet->pDataF); + n += tPutLastFile(p ? p + n : p, pSet->pLastF); + n += tPutSmaFile(p ? p + n : p, pSet->pSmaF); return n; } @@ -266,10 +209,10 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { n += tGetI32v(p + n, &pSet->diskId.level); n += tGetI32v(p + n, &pSet->diskId.id); n += tGetI32v(p + n, &pSet->fid); - n += tGetHeadFile(p + n, &pSet->fHead); - n += tGetDataFile(p + n, &pSet->fData); - n += tGetLastFile(p + n, &pSet->fLast); - n += tGetSmaFile(p + n, &pSet->fSma); + n += tGetHeadFile(p + n, pSet->pHeadF); + n += tGetDataFile(p + n, pSet->pDataF); + n += tGetLastFile(p + n, pSet->pLastF); + n += tGetSmaFile(p + n, pSet->pSmaF); return n; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index ee8a23e76e..80ba5f0363 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -607,6 +607,7 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) { } int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem) { + ASSERT(0); int32_t code = 0; // lock @@ -640,6 +641,7 @@ _exit: } void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem) { + ASSERT(0); if (pMem) { tsdbUnrefMemTable(pMem); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 5dc1915bff..3375dd69ba 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3244,3 +3244,59 @@ int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int6 return TSDB_CODE_SUCCESS; } + +int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { + int32_t code = 0; + + // alloc + *ppSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap)); + if (*ppSnap == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + // lock + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + // take snapshot + (*ppSnap)->pMem = pTsdb->mem; + (*ppSnap)->pIMem = pTsdb->imem; + + if ((*ppSnap)->pMem) { + tsdbRefMemTable((*ppSnap)->pMem); + } + + if ((*ppSnap)->pIMem) { + tsdbRefMemTable((*ppSnap)->pIMem); + } + + // fs (todo) + + // unlock + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + +_exit: + return code; +} + +void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { + if (pSnap) { + if (pSnap->pMem) { + tsdbUnrefMemTable(pSnap->pMem); + } + + if (pSnap->pIMem) { + tsdbUnrefMemTable(pSnap->pIMem); + } + + // fs (todo) + } +} diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 5e8157864f..7365ac23b8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -459,7 +459,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS // open impl // head - tsdbDataFileName(pTsdb, pSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); pReader->pHeadFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pHeadFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -467,7 +467,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // data - tsdbDataFileName(pTsdb, pSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); pReader->pDataFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pDataFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -475,7 +475,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // last - tsdbDataFileName(pTsdb, pSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); pReader->pLastFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pLastFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -483,7 +483,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // sma - tsdbDataFileName(pTsdb, pSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); pReader->pSmaFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pSmaFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -536,8 +536,8 @@ _err: int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppBuf) { int32_t code = 0; - int64_t offset = pReader->pSet->fHead.offset; - int64_t size = pReader->pSet->fHead.size - offset; + int64_t offset = pReader->pSet->pHeadF->offset; + int64_t size = pReader->pSet->pHeadF->size - offset; uint8_t *pBuf = NULL; int64_t n; uint32_t delimiter; @@ -1211,17 +1211,6 @@ _err: } // SDataFWriter ==================================================== -struct SDataFWriter { - STsdb *pTsdb; - SDFileSet wSet; - TdFilePtr pHeadFD; - TdFilePtr pDataFD; - TdFilePtr pLastFD; - TdFilePtr pSmaFD; -}; - -SDFileSet *tsdbDataFWriterGetWSet(SDataFWriter *pWriter) { return &pWriter->wSet; } - int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet) { int32_t code = 0; int32_t flag; @@ -1237,12 +1226,20 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } pWriter->pTsdb = pTsdb; - pWriter->wSet = *pSet; - pSet = &pWriter->wSet; + pWriter->wSet = (SDFileSet){.diskId = pSet->diskId, + .fid = pSet->fid, + .pHeadF = &pWriter->fHead, + .pDataF = &pWriter->fData, + .pLastF = &pWriter->fLast, + .pSmaF = &pWriter->fSma}; + pWriter->fHead = *pSet->pHeadF; + pWriter->fData = *pSet->pDataF; + pWriter->fLast = *pSet->pLastF; + pWriter->fSma = *pSet->pSmaF; // head flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; - tsdbDataFileName(pTsdb, pSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname); pWriter->pHeadFD = taosOpenFile(fname, flag); if (pWriter->pHeadFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -1257,28 +1254,28 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS ASSERT(n == TSDB_FHDR_SIZE); - pSet->fHead.size += TSDB_FHDR_SIZE; + pWriter->fHead.size += TSDB_FHDR_SIZE; // data - if (pSet->fData.size == 0) { + if (pWriter->fData.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname); pWriter->pDataFD = taosOpenFile(fname, flag); if (pWriter->pDataFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fData.size == 0) { + if (pWriter->fData.size == 0) { n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fData.size += TSDB_FHDR_SIZE; + pWriter->fData.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_END); if (n < 0) { @@ -1286,29 +1283,29 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fData.size); + ASSERT(n == pWriter->fData.size); } // last - if (pSet->fLast.size == 0) { + if (pWriter->fLast.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fLast, fname); pWriter->pLastFD = taosOpenFile(fname, flag); if (pWriter->pLastFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fLast.size == 0) { + if (pWriter->fLast.size == 0) { n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fLast.size += TSDB_FHDR_SIZE; + pWriter->fLast.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_END); if (n < 0) { @@ -1316,29 +1313,29 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fLast.size); + ASSERT(n == pWriter->fLast.size); } // sma - if (pSet->fSma.size == 0) { + if (pWriter->fSma.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname); pWriter->pSmaFD = taosOpenFile(fname, flag); if (pWriter->pSmaFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fSma.size == 0) { + if (pWriter->fSma.size == 0) { n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fSma.size += TSDB_FHDR_SIZE; + pWriter->fSma.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_END); if (n < 0) { @@ -1346,7 +1343,7 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fSma.size); + ASSERT(n == pWriter->fSma.size); } *ppWriter = pWriter; @@ -1418,22 +1415,76 @@ _err: int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) { int32_t code = 0; + int64_t n; + char hdr[TSDB_FHDR_SIZE]; // head ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_HEAD_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutHeadFile(hdr, &pWriter->fHead); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pHeadFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // data ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_DATA_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutDataFile(hdr, &pWriter->fData); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // last ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_LAST_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutLastFile(hdr, &pWriter->fLast); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // sma ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_SMA_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutSmaFile(hdr, &pWriter->fSma); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } return code; @@ -1444,7 +1495,7 @@ _err: int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx, uint8_t **ppBuf) { int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->wSet.fHead; + SHeadFile *pHeadFile = &pWriter->fHead; uint8_t *pBuf = NULL; int64_t size; int64_t n; @@ -1494,7 +1545,7 @@ _err: int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, uint8_t **ppBuf, SBlockIdx *pBlockIdx) { int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->wSet.fHead; + SHeadFile *pHeadFile = &pWriter->fHead; SBlockDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, .suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; uint8_t *pBuf = NULL; int64_t size; @@ -1831,9 +1882,9 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ pSubBlock->nRow = pBlockData->nRow; pSubBlock->cmprAlg = cmprAlg; if (pBlock->last) { - pSubBlock->offset = pWriter->wSet.fLast.size; + pSubBlock->offset = pWriter->fLast.size; } else { - pSubBlock->offset = pWriter->wSet.fData.size; + pSubBlock->offset = pWriter->fData.size; } // ======================= BLOCK DATA ======================= @@ -1881,9 +1932,9 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ pSubBlock->szBlock = pSubBlock->szBlockCol + sizeof(TSCKSUM) + nData; if (pBlock->last) { - pWriter->wSet.fLast.size += pSubBlock->szBlock; + pWriter->fLast.size += pSubBlock->szBlock; } else { - pWriter->wSet.fData.size += pSubBlock->szBlock; + pWriter->fData.size += pSubBlock->szBlock; } // ======================= BLOCK SMA ======================= @@ -1896,8 +1947,8 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ if (code) goto _err; if (pSubBlock->nSma > 0) { - pSubBlock->sOffset = pWriter->wSet.fSma.size; - pWriter->wSet.fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); + pSubBlock->sOffset = pWriter->fSma.size; + pWriter->fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); } _exit: @@ -1924,8 +1975,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { char fNameTo[TSDB_FILENAME_LEN]; // head - tsdbDataFileName(pTsdb, pSetFrom, TSDB_HEAD_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_HEAD_FILE, fNameTo); + tsdbHeadFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pHeadF, fNameFrom); + tsdbHeadFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pHeadF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1939,7 +1990,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fHead.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pHeadF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1948,8 +1999,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // data - tsdbDataFileName(pTsdb, pSetFrom, TSDB_DATA_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_DATA_FILE, fNameTo); + tsdbDataFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pDataF, fNameFrom); + tsdbDataFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pDataF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1963,7 +2014,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fData.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pDataF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1972,8 +2023,9 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // last - tsdbDataFileName(pTsdb, pSetFrom, TSDB_LAST_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_LAST_FILE, fNameTo); + tsdbLastFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pLastF, fNameFrom); + tsdbLastFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pLastF, fNameTo); + pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -1986,7 +2038,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fLast.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pLastF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1995,8 +2047,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // sma - tsdbDataFileName(pTsdb, pSetFrom, TSDB_SMA_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_SMA_FILE, fNameTo); + tsdbSmaFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pSmaF, fNameFrom); + tsdbSmaFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pSmaF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -2010,7 +2062,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fSma.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pSmaF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index fea0254045..dfb01f2ded 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -798,7 +798,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL); if (code) goto _err; - code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, tsdbDataFWriterGetWSet(pWriter->pDataFWriter)); + code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, &pWriter->pDataFWriter->wSet); if (code) goto _err; code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); @@ -863,22 +863,26 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 tBlockDataReset(&pWriter->bDataR); // write - SDFileSet wSet; + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; if (pSet) { - wSet = (SDFileSet){.diskId = pSet->diskId, - .fid = fid, - .fHead = {.commitID = pWriter->commitID, .offset = 0, .size = 0}, - .fData = pSet->fData, - .fLast = {.commitID = pWriter->commitID, .size = 0}, - .fSma = pSet->fSma}; + wSet.diskId = pSet->diskId; + wSet.fid = fid; + fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; + fData = *pSet->pDataF; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fSma = *pSet->pSmaF; } else { - wSet = (SDFileSet){.diskId = (SDiskID){.level = 0, .id = 0}, - .fid = fid, - .fHead = {.commitID = pWriter->commitID, .offset = 0, .size = 0}, - .fData = {.commitID = pWriter->commitID, .size = 0}, - .fLast = {.commitID = pWriter->commitID, .size = 0}, - .fSma = {.commitID = pWriter->commitID, .size = 0}}; + wSet.diskId = (SDiskID){.level = 0, .id = 0}; + wSet.fid = fid; + fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; + fData = (SDataFile){.commitID = pWriter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fSma = (SSmaFile){.commitID = pWriter->commitID, .size = 0}; } code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); From c54daccf2d4421e2342071421523c37e9d3ff54a Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 14:35:36 +0800 Subject: [PATCH 02/27] fix: plan problem caused by project eliminate optimize --- include/libs/nodes/plannodes.h | 3 +- source/libs/parser/src/parInsert.c | 30 ++++++----- source/libs/parser/src/parTranslater.c | 2 +- source/libs/parser/src/parser.c | 2 +- source/libs/parser/test/parSelectTest.cpp | 7 +++ source/libs/planner/src/planLogicCreater.c | 28 ++++++++++ source/libs/planner/src/planOptimizer.c | 53 +++++++++++++++---- source/libs/planner/src/planPhysiCreater.c | 13 +++++ source/libs/planner/test/planSubqueryTest.cpp | 8 +++ 9 files changed, 121 insertions(+), 25 deletions(-) diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 1c3e5903f6..46aa11f562 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -29,7 +29,8 @@ extern "C" { typedef enum EDataOrderLevel { DATA_ORDER_LEVEL_NONE = 1, DATA_ORDER_LEVEL_IN_BLOCK, - DATA_ORDER_LEVEL_IN_GROUP + DATA_ORDER_LEVEL_IN_GROUP, + DATA_ORDER_LEVEL_GLOBAL } EDataOrderLevel; typedef struct SLogicNode { diff --git a/source/libs/parser/src/parInsert.c b/source/libs/parser/src/parInsert.c index c6b608ddb4..517df1a977 100644 --- a/source/libs/parser/src/parInsert.c +++ b/source/libs/parser/src/parInsert.c @@ -735,11 +735,12 @@ static int32_t parseBoundColumns(SInsertParseContext* pCxt, SParsedDataColInfo* return TSDB_CODE_SUCCESS; } -static void buildCreateTbReq(SVCreateTbReq* pTbReq, const char* tname, STag* pTag, int64_t suid, const char* sname, SArray* tagName) { +static void buildCreateTbReq(SVCreateTbReq* pTbReq, const char* tname, STag* pTag, int64_t suid, const char* sname, + SArray* tagName) { pTbReq->type = TD_CHILD_TABLE; pTbReq->name = strdup(tname); pTbReq->ctb.suid = suid; - if(sname) pTbReq->ctb.name = strdup(sname); + if (sname) pTbReq->ctb.name = strdup(sname); pTbReq->ctb.pTag = (uint8_t*)pTag; pTbReq->ctb.tagName = taosArrayDup(tagName); pTbReq->commentLen = -1; @@ -761,7 +762,7 @@ static int32_t parseTagToken(char** end, SToken* pToken, SSchema* pSchema, int16 return TSDB_CODE_SUCCESS; } -// strcpy(val->colName, pSchema->name); + // strcpy(val->colName, pSchema->name); val->cid = pSchema->colId; val->type = pSchema->type; @@ -1321,7 +1322,11 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, TdFilePtr fp, STableDataB static int32_t parseDataFromFile(SInsertParseContext* pCxt, SToken filePath, STableDataBlocks* dataBuf) { char filePathStr[TSDB_FILENAME_LEN] = {0}; - strncpy(filePathStr, filePath.z, filePath.n); + if (TK_NK_STRING == filePath.type) { + trimString(filePath.z, filePath.n, filePathStr, sizeof(filePathStr)); + } else { + strncpy(filePathStr, filePath.z, filePath.n); + } TdFilePtr fp = taosOpenFile(filePathStr, TD_FILE_READ | TD_FILE_STREAM); if (NULL == fp) { return TAOS_SYSTEM_ERROR(errno); @@ -1802,8 +1807,8 @@ int32_t qBuildStmtOutput(SQuery* pQuery, SHashObj* pVgHash, SHashObj* pBlockHash return TSDB_CODE_SUCCESS; } -int32_t qBindStmtTagsValue(void* pBlock, void* boundTags, int64_t suid, const char* sTableName, char* tName, TAOS_MULTI_BIND* bind, - char* msgBuf, int32_t msgBufLen) { +int32_t qBindStmtTagsValue(void* pBlock, void* boundTags, int64_t suid, const char* sTableName, char* tName, + TAOS_MULTI_BIND* bind, char* msgBuf, int32_t msgBufLen) { STableDataBlocks* pDataBlock = (STableDataBlocks*)pBlock; SMsgBuf pBuf = {.buf = msgBuf, .len = msgBufLen}; SParsedDataColInfo* tags = (SParsedDataColInfo*)boundTags; @@ -1854,7 +1859,7 @@ int32_t qBindStmtTagsValue(void* pBlock, void* boundTags, int64_t suid, const ch } } else { STagVal val = {.cid = pTagSchema->colId, .type = pTagSchema->type}; -// strcpy(val.colName, pTagSchema->name); + // strcpy(val.colName, pTagSchema->name); if (pTagSchema->type == TSDB_DATA_TYPE_BINARY) { val.pData = (uint8_t*)bind[c].buffer; val.nData = colLen; @@ -2245,7 +2250,8 @@ static int32_t smlBoundColumnData(SArray* cols, SParsedDataColInfo* pColList, SS * @param msg * @return int32_t */ -static int32_t smlBuildTagRow(SArray* cols, SParsedDataColInfo* tags, SSchema* pSchema, STag** ppTag, SArray** tagName, SMsgBuf* msg) { +static int32_t smlBuildTagRow(SArray* cols, SParsedDataColInfo* tags, SSchema* pSchema, STag** ppTag, SArray** tagName, + SMsgBuf* msg) { SArray* pTagArray = taosArrayInit(tags->numOfBound, sizeof(STagVal)); if (!pTagArray) { return TSDB_CODE_TSC_OUT_OF_MEMORY; @@ -2262,7 +2268,7 @@ static int32_t smlBuildTagRow(SArray* cols, SParsedDataColInfo* tags, SSchema* p taosArrayPush(*tagName, pTagSchema->name); STagVal val = {.cid = pTagSchema->colId, .type = pTagSchema->type}; -// strcpy(val.colName, pTagSchema->name); + // strcpy(val.colName, pTagSchema->name); if (pTagSchema->type == TSDB_DATA_TYPE_BINARY) { val.pData = (uint8_t*)kv->value; val.nData = kv->length; @@ -2318,7 +2324,7 @@ int32_t smlBindData(void* handle, SArray* tags, SArray* colsSchema, SArray* cols buildInvalidOperationMsg(&pBuf, "bound tags error"); return ret; } - STag* pTag = NULL; + STag* pTag = NULL; SArray* tagName = NULL; ret = smlBuildTagRow(tags, &smlHandle->tableExecHandle.tags, pTagsSchema, &pTag, &tagName, &pBuf); if (ret != TSDB_CODE_SUCCESS) { @@ -2402,9 +2408,9 @@ int32_t smlBindData(void* handle, SArray* tags, SArray* colsSchema, SArray* cols } else { int32_t colLen = kv->length; if (pColSchema->type == TSDB_DATA_TYPE_TIMESTAMP) { -// uError("SML:data before:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); + // uError("SML:data before:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); kv->i = convertTimePrecision(kv->i, TSDB_TIME_PRECISION_NANO, pTableMeta->tableInfo.precision); -// uError("SML:data after:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); + // uError("SML:data after:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); } if (IS_VAR_DATA_TYPE(kv->type)) { diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 892ae6d5ac..e198df4bc4 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -1089,7 +1089,7 @@ static int32_t translateScanPseudoColumnFunc(STranslateContext* pCxt, SFunctionN return TSDB_CODE_SUCCESS; } if (0 == LIST_LENGTH(pFunc->pParameterList)) { - if (!isSelectStmt(pCxt->pCurrStmt) || + if (!isSelectStmt(pCxt->pCurrStmt) || NULL == ((SSelectStmt*)pCxt->pCurrStmt)->pFromTable || QUERY_NODE_REAL_TABLE != nodeType(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TBNAME); } diff --git a/source/libs/parser/src/parser.c b/source/libs/parser/src/parser.c index e995dd715d..b35871e272 100644 --- a/source/libs/parser/src/parser.c +++ b/source/libs/parser/src/parser.c @@ -36,7 +36,7 @@ bool qIsInsertValuesSql(const char* pStr, size_t length) { pStr += index; index = 0; t = tStrGetToken((char*)pStr, &index, false); - if (TK_USING == t.type || TK_VALUES == t.type) { + if (TK_USING == t.type || TK_VALUES == t.type || TK_FILE == t.type) { return true; } else if (TK_SELECT == t.type) { return false; diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index b9a760d342..849ba14d11 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -444,4 +444,11 @@ TEST_F(ParserSelectTest, withoutFrom) { run("SELECT USER()"); } +TEST_F(ParserSelectTest, withoutFromSemanticCheck) { + useDb("root", "test"); + + run("SELECT c1", TSDB_CODE_PAR_INVALID_COLUMN); + run("SELECT TBNAME", TSDB_CODE_PAR_INVALID_TBNAME); +} + } // namespace ParserTest diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 9ced5c1cb6..7d6015d5c4 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -818,6 +818,8 @@ static int32_t createProjectLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSel TSWAP(pProject->node.pLimit, pSelect->pLimit); TSWAP(pProject->node.pSlimit, pSelect->pSlimit); + pProject->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; + pProject->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; int32_t code = TSDB_CODE_SUCCESS; @@ -1346,6 +1348,31 @@ static void setLogicSubplanType(bool hasScan, SLogicSubplan* pSubplan) { } } +static int32_t adjustLogicNodeDataRequirementImpl(SLogicNode* pNode, EDataOrderLevel requirement) { + switch (nodeType(pNode)) { + case QUERY_NODE_LOGIC_PLAN_SCAN: + case QUERY_NODE_LOGIC_PLAN_JOIN: + case QUERY_NODE_LOGIC_PLAN_AGG: + case QUERY_NODE_LOGIC_PLAN_PROJECT: + case QUERY_NODE_LOGIC_PLAN_VNODE_MODIFY: + case QUERY_NODE_LOGIC_PLAN_EXCHANGE: + case QUERY_NODE_LOGIC_PLAN_MERGE: + case QUERY_NODE_LOGIC_PLAN_WINDOW: + case QUERY_NODE_LOGIC_PLAN_FILL: + case QUERY_NODE_LOGIC_PLAN_SORT: + case QUERY_NODE_LOGIC_PLAN_PARTITION: + case QUERY_NODE_LOGIC_PLAN_INDEF_ROWS_FUNC: + case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: + default: + break; + } + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode) { + return adjustLogicNodeDataRequirementImpl(pNode, DATA_ORDER_LEVEL_NONE); +} + int32_t createLogicPlan(SPlanContext* pCxt, SLogicSubplan** pLogicSubplan) { SLogicPlanContext cxt = {.pPlanCxt = pCxt, .pCurrRoot = NULL, .hasScan = false}; @@ -1361,6 +1388,7 @@ int32_t createLogicPlan(SPlanContext* pCxt, SLogicSubplan** pLogicSubplan) { if (TSDB_CODE_SUCCESS == code) { setLogicNodeParent(pSubplan->pNode); setLogicSubplanType(cxt.hasScan, pSubplan); + code = adjustLogicNodeDataRequirement(pSubplan->pNode); } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index c0494aa2ae..a1eb607cef 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1579,6 +1579,34 @@ static bool eliminateProjOptMayBeOptimized(SLogicNode* pNode) { return eliminateProjOptCheckProjColumnNames(pProjectNode); } +typedef struct CheckNewChildTargetsCxt { + SNodeList* pNewChildTargets; + bool canUse; +} CheckNewChildTargetsCxt; + +static EDealRes eliminateProjOptCanUseNewChildTargetsImpl(SNode* pNode, void* pContext) { + if (QUERY_NODE_COLUMN == nodeType(pNode)) { + CheckNewChildTargetsCxt* pCxt = pContext; + SNode* pTarget = NULL; + FOREACH(pTarget, pCxt->pNewChildTargets) { + if (!nodesEqualNode(pTarget, pNode)) { + pCxt->canUse = false; + return DEAL_RES_END; + } + } + } + return DEAL_RES_CONTINUE; +} + +static bool eliminateProjOptCanUseNewChildTargets(SLogicNode* pChild, SNodeList* pNewChildTargets) { + if (NULL == pChild->pConditions) { + return true; + } + CheckNewChildTargetsCxt cxt = {.pNewChildTargets = pNewChildTargets, .canUse = true}; + nodesWalkExpr(pChild->pConditions, eliminateProjOptCanUseNewChildTargetsImpl, &cxt); + return cxt.canUse; +} + static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SProjectLogicNode* pProjectNode) { SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pProjectNode->node.pChildren, 0); @@ -1594,8 +1622,13 @@ static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* } } } - nodesDestroyList(pChild->pTargets); - pChild->pTargets = pNewChildTargets; + if (eliminateProjOptCanUseNewChildTargets(pChild, pNewChildTargets)) { + nodesDestroyList(pChild->pTargets); + pChild->pTargets = pNewChildTargets; + } else { + nodesDestroyList(pNewChildTargets); + return TSDB_CODE_SUCCESS; + } int32_t code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pProjectNode, pChild); if (TSDB_CODE_SUCCESS == code) { @@ -2145,8 +2178,8 @@ static bool tagScanMayBeOptimized(SLogicNode* pNode) { } SAggLogicNode* pAgg = (SAggLogicNode*)(pNode->pParent); - if (NULL == pAgg->pGroupKeys || NULL != pAgg->pAggFuncs || - planOptNodeListHasCol(pAgg->pGroupKeys) || !planOptNodeListHasTbname(pAgg->pGroupKeys)) { + if (NULL == pAgg->pGroupKeys || NULL != pAgg->pAggFuncs || planOptNodeListHasCol(pAgg->pGroupKeys) || + !planOptNodeListHasTbname(pAgg->pGroupKeys)) { return false; } @@ -2162,16 +2195,16 @@ static int32_t tagScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubp pScanNode->scanType = SCAN_TYPE_TAG; SNode* pTarget = NULL; FOREACH(pTarget, pScanNode->node.pTargets) { - if (PRIMARYKEY_TIMESTAMP_COL_ID == ((SColumnNode*)(pTarget))->colId) { - ERASE_NODE(pScanNode->node.pTargets); - break; - } + if (PRIMARYKEY_TIMESTAMP_COL_ID == ((SColumnNode*)(pTarget))->colId) { + ERASE_NODE(pScanNode->node.pTargets); + break; + } } - + NODES_DESTORY_LIST(pScanNode->pScanCols); SLogicNode* pAgg = pScanNode->node.pParent; - int32_t code = replaceLogicNode(pLogicSubplan, pAgg, (SLogicNode*)pScanNode); + int32_t code = replaceLogicNode(pLogicSubplan, pAgg, (SLogicNode*)pScanNode); if (TSDB_CODE_SUCCESS == code) { NODES_CLEAR_LIST(pAgg->pChildren); } diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index ee2457e400..0a1f8bbd0b 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -974,6 +974,17 @@ static int32_t createInterpFuncPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pCh return code; } +static bool projectCanMergeDataBlock(SProjectLogicNode* pProject) { + if (DATA_ORDER_LEVEL_NONE == pProject->node.resultDataOrder) { + return true; + } + if (1 != LIST_LENGTH(pProject->node.pChildren)) { + return false; + } + SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pProject->node.pChildren, 0); + return DATA_ORDER_LEVEL_GLOBAL == pChild->resultDataOrder ? true : false; +} + static int32_t createProjectPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, SProjectLogicNode* pProjectLogicNode, SPhysiNode** pPhyNode) { SProjectPhysiNode* pProject = @@ -982,6 +993,8 @@ static int32_t createProjectPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChild return TSDB_CODE_OUT_OF_MEMORY; } + pProject->mergeDataBlock = projectCanMergeDataBlock(pProjectLogicNode); + int32_t code = TSDB_CODE_SUCCESS; if (0 == LIST_LENGTH(pChildren)) { pProject->pProjections = nodesCloneList(pProjectLogicNode->pProjections); diff --git a/source/libs/planner/test/planSubqueryTest.cpp b/source/libs/planner/test/planSubqueryTest.cpp index 2ba3794d5c..9cf99d019f 100644 --- a/source/libs/planner/test/planSubqueryTest.cpp +++ b/source/libs/planner/test/planSubqueryTest.cpp @@ -55,3 +55,11 @@ TEST_F(PlanSubqeuryTest, withSetOperator) { run("SELECT c1 FROM (SELECT c1 FROM t1 UNION SELECT c1 FROM t1)"); } + +TEST_F(PlanSubqeuryTest, withFill) { + useDb("root", "test"); + + run("SELECT cnt FROM (SELECT _WSTART ts, COUNT(*) cnt FROM t1 " + "WHERE ts > '2022-04-01 00:00:00' and ts < '2022-04-30 23:59:59' INTERVAL(10s) FILL(LINEAR)) " + "WHERE ts > '2022-04-06 00:00:00'"); +} From 72f44315e01c09982db77c09f85a22e356fed7b8 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 15:24:51 +0800 Subject: [PATCH 03/27] fix: plan problem caused by partition by tbname interval optimize --- source/libs/planner/src/planSpliter.c | 3 +++ source/libs/planner/test/planIntervalTest.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index 4cbbf12385..a7ccb72792 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -657,6 +657,9 @@ static int32_t stbSplSplitWindowForPartTable(SSplitContext* pCxt, SStableSplitIn return TSDB_CODE_SUCCESS; } + if (NULL != pInfo->pSplitNode->pParent && QUERY_NODE_LOGIC_PLAN_FILL == nodeType(pInfo->pSplitNode->pParent)) { + pInfo->pSplitNode = pInfo->pSplitNode->pParent; + } SExchangeLogicNode* pExchange = NULL; int32_t code = splCreateExchangeNode(pCxt, pInfo->pSplitNode, &pExchange); if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/test/planIntervalTest.cpp b/source/libs/planner/test/planIntervalTest.cpp index 9f34ead6ff..8645992500 100644 --- a/source/libs/planner/test/planIntervalTest.cpp +++ b/source/libs/planner/test/planIntervalTest.cpp @@ -41,6 +41,9 @@ TEST_F(PlanIntervalTest, fill) { run("SELECT COUNT(*), SUM(c1) FROM t1 " "WHERE ts > TIMESTAMP '2022-04-01 00:00:00' and ts < TIMESTAMP '2022-04-30 23:59:59' " "INTERVAL(10s) FILL(VALUE, 10, 20)"); + + run("SELECT COUNT(*) FROM st1 WHERE ts > TIMESTAMP '2022-04-01 00:00:00' and ts < TIMESTAMP '2022-04-30 23:59:59' " + "PARTITION BY TBNAME interval(10s) fill(prev)"); } TEST_F(PlanIntervalTest, selectFunc) { From c54ed52a1976ebc6b58733c1e6a6d5151aec9d4b Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 16:03:33 +0800 Subject: [PATCH 04/27] fix: problem with insert 'null' into binary/nchar column --- source/libs/parser/src/parInsert.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/parser/src/parInsert.c b/source/libs/parser/src/parInsert.c index 517df1a977..aa8af7e38f 100644 --- a/source/libs/parser/src/parInsert.c +++ b/source/libs/parser/src/parInsert.c @@ -434,8 +434,8 @@ static FORCE_INLINE int32_t checkAndTrimValue(SToken* pToken, char* tmpTokenBuf, } static bool isNullStr(SToken* pToken) { - return (pToken->type == TK_NULL) || ((pToken->type == TK_NK_STRING) && (pToken->n != 0) && - (strncasecmp(TSDB_DATA_NULL_STR_L, pToken->z, pToken->n) == 0)); + return ((pToken->type == TK_NK_STRING) && (pToken->n != 0) && + (strncasecmp(TSDB_DATA_NULL_STR_L, pToken->z, pToken->n) == 0)); } static FORCE_INLINE int32_t toDouble(SToken* pToken, double* value, char** endPtr) { @@ -461,7 +461,7 @@ static int32_t parseValueToken(char** end, SToken* pToken, SSchema* pSchema, int return code; } - if (isNullStr(pToken)) { + if (TK_NULL == pToken->type || (!IS_VAR_DATA_TYPE(pSchema->type) && isNullStr(pToken))) { if (TSDB_DATA_TYPE_TIMESTAMP == pSchema->type && PRIMARYKEY_TIMESTAMP_COL_ID == pSchema->colId) { return buildSyntaxErrMsg(pMsgBuf, "primary timestamp should not be null", pToken->z); } From 1979801c93bb2e677df1b1a1377af5bc92dbc697 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 19:21:15 +0800 Subject: [PATCH 05/27] fix: the problem of data loss when interval is used for outer query --- include/libs/nodes/plannodes.h | 8 + source/libs/nodes/src/nodesCloneFuncs.c | 3 + source/libs/planner/src/planLogicCreater.c | 213 ++++++++++++++++-- source/libs/planner/test/planSubqueryTest.cpp | 12 +- 4 files changed, 217 insertions(+), 19 deletions(-) diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 46aa11f562..a11ea0b2cd 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -33,6 +33,13 @@ typedef enum EDataOrderLevel { DATA_ORDER_LEVEL_GLOBAL } EDataOrderLevel; +typedef enum EGroupAction { + GROUP_ACTION_NONE = 1, + GROUP_ACTION_SET, + GROUP_ACTION_KEEP, + GROUP_ACTION_CLEAR +} EGroupAction; + typedef struct SLogicNode { ENodeType type; SNodeList* pTargets; // SColumnNode @@ -45,6 +52,7 @@ typedef struct SLogicNode { SNode* pSlimit; EDataOrderLevel requireDataOrder; // requirements for input data EDataOrderLevel resultDataOrder; // properties of the output data + EGroupAction groupAction; } SLogicNode; typedef enum EScanType { diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index e7109b5a87..121e697630 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -332,6 +332,9 @@ static int32_t logicNodeCopy(const SLogicNode* pSrc, SLogicNode* pDst) { COPY_SCALAR_FIELD(precision); CLONE_NODE_FIELD(pLimit); CLONE_NODE_FIELD(pSlimit); + COPY_SCALAR_FIELD(requireDataOrder); + COPY_SCALAR_FIELD(resultDataOrder); + COPY_SCALAR_FIELD(groupAction); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 7d6015d5c4..9b07578212 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -250,6 +250,9 @@ static int32_t createScanLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect SScanLogicNode* pScan = NULL; int32_t code = makeScanLogicNode(pCxt, pRealTable, pSelect->hasRepeatScanFuncs, (SLogicNode**)&pScan); + pScan->node.groupAction = GROUP_ACTION_NONE; + pScan->node.resultDataOrder = DATA_ORDER_LEVEL_IN_BLOCK; + // set columns to scan if (TSDB_CODE_SUCCESS == code) { code = nodesCollectColumns(pSelect, SQL_CLAUSE_FROM, pRealTable->table.tableAlias, COLLECT_COL_TYPE_COL, @@ -336,6 +339,9 @@ static int32_t createJoinLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect pJoin->joinType = pJoinTable->joinType; pJoin->isSingleTableJoin = pJoinTable->table.singleTable; + pJoin->node.groupAction = GROUP_ACTION_CLEAR; + pJoin->node.requireDataOrder = DATA_ORDER_LEVEL_GLOBAL; + pJoin->node.requireDataOrder = DATA_ORDER_LEVEL_GLOBAL; int32_t code = TSDB_CODE_SUCCESS; @@ -472,6 +478,9 @@ static int32_t createAggLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect, } pAgg->hasLastRow = pSelect->hasLastRowFunc; + pAgg->node.groupAction = GROUP_ACTION_SET; + pAgg->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; + pAgg->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; int32_t code = TSDB_CODE_SUCCESS; @@ -540,6 +549,10 @@ static int32_t createIndefRowsFuncLogicNode(SLogicPlanContext* pCxt, SSelectStmt pIdfRowsFunc->isTailFunc = pSelect->hasTailFunc; pIdfRowsFunc->isUniqueFunc = pSelect->hasUniqueFunc; pIdfRowsFunc->isTimeLineFunc = pSelect->hasTimeLineFunc; + pIdfRowsFunc->node.groupAction = GROUP_ACTION_KEEP; + pIdfRowsFunc->node.requireDataOrder = + pIdfRowsFunc->isTimeLineFunc ? DATA_ORDER_LEVEL_IN_GROUP : DATA_ORDER_LEVEL_NONE; + pIdfRowsFunc->node.resultDataOrder = pIdfRowsFunc->node.requireDataOrder; // indefinite rows functions and _select_values functions int32_t code = nodesCollectFuncs(pSelect, SQL_CLAUSE_SELECT, fmIsVectorFunc, &pIdfRowsFunc->pFuncs); @@ -571,6 +584,10 @@ static int32_t createInterpFuncLogicNode(SLogicPlanContext* pCxt, SSelectStmt* p return TSDB_CODE_OUT_OF_MEMORY; } + pInterpFunc->node.groupAction = GROUP_ACTION_KEEP; + pInterpFunc->node.requireDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + pInterpFunc->node.resultDataOrder = pInterpFunc->node.requireDataOrder; + int32_t code = nodesCollectFuncs(pSelect, SQL_CLAUSE_SELECT, fmIsInterpFunc, &pInterpFunc->pFuncs); if (TSDB_CODE_SUCCESS == code) { code = rewriteExprsForSelect(pInterpFunc->pFuncs, pSelect, SQL_CLAUSE_SELECT); @@ -642,10 +659,12 @@ static int32_t createWindowLogicNodeByState(SLogicPlanContext* pCxt, SStateWindo } pWindow->winType = WINDOW_TYPE_STATE; + pWindow->node.groupAction = GROUP_ACTION_KEEP; + pWindow->node.requireDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_GROUP; pWindow->pStateExpr = nodesCloneNode(pState->pExpr); - pWindow->pTspk = nodesCloneNode(pState->pCol); - if (NULL == pWindow->pTspk) { + if (NULL == pWindow->pStateExpr || NULL == pWindow->pTspk) { nodesDestroyNode((SNode*)pWindow); return TSDB_CODE_OUT_OF_MEMORY; } @@ -663,6 +682,9 @@ static int32_t createWindowLogicNodeBySession(SLogicPlanContext* pCxt, SSessionW pWindow->winType = WINDOW_TYPE_SESSION; pWindow->sessionGap = ((SValueNode*)pSession->pGap)->datum.i; pWindow->windowAlgo = pCxt->pPlanCxt->streamQuery ? SESSION_ALGO_STREAM_SINGLE : SESSION_ALGO_MERGE; + pWindow->node.groupAction = GROUP_ACTION_KEEP; + pWindow->node.requireDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_GROUP; pWindow->pTspk = nodesCloneNode((SNode*)pSession->pCol); if (NULL == pWindow->pTspk) { @@ -689,6 +711,9 @@ static int32_t createWindowLogicNodeByInterval(SLogicPlanContext* pCxt, SInterva pWindow->slidingUnit = (NULL != pInterval->pSliding ? ((SValueNode*)pInterval->pSliding)->unit : pWindow->intervalUnit); pWindow->windowAlgo = pCxt->pPlanCxt->streamQuery ? INTERVAL_ALGO_STREAM_SINGLE : INTERVAL_ALGO_HASH; + pWindow->node.groupAction = GROUP_ACTION_KEEP; + pWindow->node.requireDataOrder = DATA_ORDER_LEVEL_IN_BLOCK; + pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_BLOCK; pWindow->pTspk = nodesCloneNode(pInterval->pCol); if (NULL == pWindow->pTspk) { @@ -734,6 +759,10 @@ static int32_t createFillLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect return TSDB_CODE_OUT_OF_MEMORY; } + pFill->node.groupAction = GROUP_ACTION_KEEP; + pFill->node.requireDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + pFill->node.resultDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + int32_t code = nodesCollectColumns(pSelect, SQL_CLAUSE_WINDOW, NULL, COLLECT_COL_TYPE_ALL, &pFill->node.pTargets); if (TSDB_CODE_SUCCESS == code && NULL == pFill->node.pTargets) { code = nodesListMakeStrictAppend(&pFill->node.pTargets, @@ -768,6 +797,9 @@ static int32_t createSortLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect } pSort->groupSort = pSelect->groupSort; + pSort->node.groupAction = pSort->groupSort ? GROUP_ACTION_KEEP : GROUP_ACTION_CLEAR; + pSort->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; + pSort->node.resultDataOrder = pSort->groupSort ? DATA_ORDER_LEVEL_IN_GROUP : DATA_ORDER_LEVEL_GLOBAL; int32_t code = nodesCollectColumns(pSelect, SQL_CLAUSE_ORDER_BY, NULL, COLLECT_COL_TYPE_ALL, &pSort->node.pTargets); if (TSDB_CODE_SUCCESS == code && NULL == pSort->node.pTargets) { @@ -818,6 +850,7 @@ static int32_t createProjectLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSel TSWAP(pProject->node.pLimit, pSelect->pLimit); TSWAP(pProject->node.pSlimit, pSelect->pSlimit); + pProject->node.groupAction = GROUP_ACTION_CLEAR; pProject->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; pProject->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; @@ -852,6 +885,10 @@ static int32_t createPartitionLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pS return TSDB_CODE_OUT_OF_MEMORY; } + pPartition->node.groupAction = GROUP_ACTION_SET; + pPartition->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; + pPartition->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; + int32_t code = nodesCollectColumns(pSelect, SQL_CLAUSE_PARTITION_BY, NULL, COLLECT_COL_TYPE_ALL, &pPartition->node.pTargets); if (TSDB_CODE_SUCCESS == code && NULL == pPartition->node.pTargets) { @@ -884,6 +921,10 @@ static int32_t createDistinctLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSe return TSDB_CODE_OUT_OF_MEMORY; } + pAgg->node.groupAction = GROUP_ACTION_SET; + pAgg->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; + pAgg->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; + int32_t code = TSDB_CODE_SUCCESS; // set grouyp keys, agg funcs and having conditions pAgg->pGroupKeys = nodesCloneList(pSelect->pProjectionList); @@ -1348,27 +1389,165 @@ static void setLogicSubplanType(bool hasScan, SLogicSubplan* pSubplan) { } } -static int32_t adjustLogicNodeDataRequirementImpl(SLogicNode* pNode, EDataOrderLevel requirement) { - switch (nodeType(pNode)) { - case QUERY_NODE_LOGIC_PLAN_SCAN: - case QUERY_NODE_LOGIC_PLAN_JOIN: - case QUERY_NODE_LOGIC_PLAN_AGG: - case QUERY_NODE_LOGIC_PLAN_PROJECT: - case QUERY_NODE_LOGIC_PLAN_VNODE_MODIFY: - case QUERY_NODE_LOGIC_PLAN_EXCHANGE: - case QUERY_NODE_LOGIC_PLAN_MERGE: - case QUERY_NODE_LOGIC_PLAN_WINDOW: - case QUERY_NODE_LOGIC_PLAN_FILL: - case QUERY_NODE_LOGIC_PLAN_SORT: - case QUERY_NODE_LOGIC_PLAN_PARTITION: - case QUERY_NODE_LOGIC_PLAN_INDEF_ROWS_FUNC: - case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: +static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel requirement) { + if (requirement <= DATA_ORDER_LEVEL_IN_BLOCK) { + return TSDB_CODE_SUCCESS; + } + pScan->scanType = SCAN_TYPE_TABLE_MERGE; + pScan->node.resultDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustJoinDataRequirement(SJoinLogicNode* pJoin, EDataOrderLevel requirement) { + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustAggDataRequirement(SAggLogicNode* pAgg, EDataOrderLevel requirement) { + if (requirement > DATA_ORDER_LEVEL_NONE) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustProjectDataRequirement(SProjectLogicNode* pProject, EDataOrderLevel requirement) { + pProject->node.resultDataOrder = requirement; + pProject->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + if (requirement <= DATA_ORDER_LEVEL_IN_BLOCK) { + return TSDB_CODE_SUCCESS; + } + pWindow->node.resultDataOrder = requirement; + pWindow->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustSessionDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + return TSDB_CODE_SUCCESS; + } + pWindow->node.resultDataOrder = requirement; + pWindow->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustStateDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + return TSDB_CODE_SUCCESS; + } + pWindow->node.resultDataOrder = requirement; + pWindow->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustWindowDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + switch (pWindow->winType) { + case WINDOW_TYPE_INTERVAL: + return adjustIntervalDataRequirement(pWindow, requirement); + case WINDOW_TYPE_SESSION: + return adjustSessionDataRequirement(pWindow, requirement); + case WINDOW_TYPE_STATE: + return adjustStateDataRequirement(pWindow, requirement); default: break; } + return TSDB_CODE_PLAN_INTERNAL_ERROR; +} + +static int32_t adjustFillDataRequirement(SFillLogicNode* pFill, EDataOrderLevel requirement) { + if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + return TSDB_CODE_SUCCESS; + } + pFill->node.resultDataOrder = requirement; + pFill->node.requireDataOrder = requirement; return TSDB_CODE_SUCCESS; } +static int32_t adjustSortDataRequirement(SSortLogicNode* pSort, EDataOrderLevel requirement) { + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustPartitionDataRequirement(SPartitionLogicNode* pPart, EDataOrderLevel requirement) { + if (DATA_ORDER_LEVEL_GLOBAL == requirement) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + pPart->node.resultDataOrder = requirement; + pPart->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustIndefRowsDataRequirement(SIndefRowsFuncLogicNode* pIndef, EDataOrderLevel requirement) { + if (requirement <= pIndef->node.resultDataOrder) { + return TSDB_CODE_SUCCESS; + } + pIndef->node.resultDataOrder = requirement; + pIndef->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustInterpDataRequirement(SInterpFuncLogicNode* pInterp, EDataOrderLevel requirement) { + if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + return TSDB_CODE_SUCCESS; + } + pInterp->node.resultDataOrder = requirement; + pInterp->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustLogicNodeDataRequirementImpl(SLogicNode* pNode, EDataOrderLevel requirement) { + int32_t code = TSDB_CODE_SUCCESS; + switch (nodeType(pNode)) { + case QUERY_NODE_LOGIC_PLAN_SCAN: + code = adjustScanDataRequirement((SScanLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_JOIN: + code = adjustJoinDataRequirement((SJoinLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_AGG: + code = adjustAggDataRequirement((SAggLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_PROJECT: + code = adjustProjectDataRequirement((SProjectLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_VNODE_MODIFY: + case QUERY_NODE_LOGIC_PLAN_EXCHANGE: + case QUERY_NODE_LOGIC_PLAN_MERGE: + break; + case QUERY_NODE_LOGIC_PLAN_WINDOW: + code = adjustWindowDataRequirement((SWindowLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_FILL: + code = adjustFillDataRequirement((SFillLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_SORT: + code = adjustSortDataRequirement((SSortLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_PARTITION: + code = adjustPartitionDataRequirement((SPartitionLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_INDEF_ROWS_FUNC: + code = adjustIndefRowsDataRequirement((SIndefRowsFuncLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: + code = adjustInterpDataRequirement((SInterpFuncLogicNode*)pNode, requirement); + break; + default: + break; + } + if (TSDB_CODE_SUCCESS == code) { + SNode* pChild = NULL; + FOREACH(pChild, pNode->pChildren) { + code = adjustLogicNodeDataRequirementImpl((SLogicNode*)pChild, pNode->requireDataOrder); + if (TSDB_CODE_SUCCESS != code) { + break; + } + } + } + return code; +} + static int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode) { return adjustLogicNodeDataRequirementImpl(pNode, DATA_ORDER_LEVEL_NONE); } diff --git a/source/libs/planner/test/planSubqueryTest.cpp b/source/libs/planner/test/planSubqueryTest.cpp index 9cf99d019f..fde373c66d 100644 --- a/source/libs/planner/test/planSubqueryTest.cpp +++ b/source/libs/planner/test/planSubqueryTest.cpp @@ -48,7 +48,7 @@ TEST_F(PlanSubqeuryTest, doubleGroupBy) { "WHERE a > 100 GROUP BY b"); } -TEST_F(PlanSubqeuryTest, withSetOperator) { +TEST_F(PlanSubqeuryTest, innerSetOperator) { useDb("root", "test"); run("SELECT c1 FROM (SELECT c1 FROM t1 UNION ALL SELECT c1 FROM t1)"); @@ -56,10 +56,18 @@ TEST_F(PlanSubqeuryTest, withSetOperator) { run("SELECT c1 FROM (SELECT c1 FROM t1 UNION SELECT c1 FROM t1)"); } -TEST_F(PlanSubqeuryTest, withFill) { +TEST_F(PlanSubqeuryTest, innerFill) { useDb("root", "test"); run("SELECT cnt FROM (SELECT _WSTART ts, COUNT(*) cnt FROM t1 " "WHERE ts > '2022-04-01 00:00:00' and ts < '2022-04-30 23:59:59' INTERVAL(10s) FILL(LINEAR)) " "WHERE ts > '2022-04-06 00:00:00'"); } + +TEST_F(PlanSubqeuryTest, outerInterval) { + useDb("root", "test"); + + run("SELECT COUNT(*) FROM (SELECT * FROM st1) INTERVAL(5s)"); + + run("SELECT COUNT(*) + SUM(c1) FROM (SELECT * FROM st1) INTERVAL(5s)"); +} From 684dd82358bf3863aec4a5a28b22ebdec5956ef6 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Jul 2022 11:42:42 +0000 Subject: [PATCH 06/27] fix read concurrency --- source/dnode/vnode/src/inc/tsdb.h | 47 +- source/dnode/vnode/src/tsdb/tsdbCache.c | 804 +++++------ source/dnode/vnode/src/tsdb/tsdbCommit.c | 42 +- source/dnode/vnode/src/tsdb/tsdbFS.c | 1357 ++++++++++++------- source/dnode/vnode/src/tsdb/tsdbFile.c | 17 +- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 45 - source/dnode/vnode/src/tsdb/tsdbOpen.c | 4 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 37 +- source/dnode/vnode/src/tsdb/tsdbRetention.c | 8 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 51 +- 10 files changed, 1391 insertions(+), 1021 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index d465ba4d9b..cfadc91d89 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -62,7 +62,6 @@ typedef struct SDelFReader SDelFReader; typedef struct SRowIter SRowIter; typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; -typedef struct STsdbFSState STsdbFSState; typedef struct STsdbSnapHdr STsdbSnapHdr; typedef struct STsdbReadSnap STsdbReadSnap; @@ -177,8 +176,6 @@ void tsdbMemTableDestroy(SMemTable *pMemTable); void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData); void tsdbRefMemTable(SMemTable *pMemTable); void tsdbUnrefMemTable(SMemTable *pMemTable); -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem); -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem); // STbDataIter int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter); void *tsdbTbDataIterDestroy(STbDataIter *pIter); @@ -208,17 +205,20 @@ void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, ch // SDelFile void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]); // tsdbFS.c ============================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS); -int32_t tsdbFSClose(STsdbFS *pFS); -int32_t tsdbFSBegin(STsdbFS *pFS); -int32_t tsdbFSCommit(STsdbFS *pFS); +int32_t tsdbFSOpen(STsdb *pTsdb); +int32_t tsdbFSClose(STsdb *pTsdb); +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSDestroy(STsdbFS *pFS); +int32_t tDFileSetCmprFn(const void *p1, const void *p2); +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS); + int32_t tsdbFSRollback(STsdbFS *pFS); -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile); -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet); -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid); -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag); +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet); +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile); // tsdbReaderWriter.c ============================================================================================== // SDataFWriter int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet); @@ -285,6 +285,11 @@ typedef struct { TSKEY minKey; } SRtn; +struct STsdbFS { + SDelFile *pDelFile; + SArray *aDFileSet; // SArray +}; + struct STsdb { char *path; SVnode *pVnode; @@ -292,7 +297,7 @@ struct STsdb { TdThreadRwlock rwLock; SMemTable *mem; SMemTable *imem; - STsdbFS *pFS; + STsdbFS fs; SLRUCache *lruCache; }; @@ -540,22 +545,6 @@ struct SRowMerger { SArray *pArray; // SArray }; -struct STsdbFSState { - SDelFile *pDelFile; - SArray *aDFileSet; // SArray - SDelFile delFile; -}; - -struct STsdbFS { - STsdb *pTsdb; - STsdbFSState *cState; - STsdbFSState *nState; - - // new - SDelFile *pDelFile; - SArray aDFileSetP; // SArray -}; - struct SDelFWriter { STsdb *pTsdb; SDelFile fDel; diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 484020e6e1..e259dde29c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -464,7 +464,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { switch (state->state) { case SFSNEXTROW_FS: - state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; + // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; state->nFileSet = taosArrayGetSize(state->aDFileSet); state->iFileSet = state->nFileSet; @@ -793,9 +793,10 @@ typedef struct { TSDBROW memRow, imemRow, fsRow; TsdbNextRowState input[3]; - SMemTable *pMemTable; - SMemTable *pIMemTable; - STsdb *pTsdb; + // SMemTable *pMemTable; + // SMemTable *pIMemTable; + STsdbReadSnap *pReadSnap; + STsdb *pTsdb; } CacheNextRowIter; static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb) { @@ -803,16 +804,16 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - tsdbTakeMemSnapshot(pTsdb, &pIter->pMemTable, &pIter->pIMemTable); + tsdbTakeReadSnap(pTsdb, &pIter->pReadSnap); STbData *pMem = NULL; - if (pIter->pMemTable) { - tsdbGetTbDataFromMemTable(pIter->pMemTable, suid, uid, &pMem); + if (pIter->pReadSnap->pMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid, &pMem); } STbData *pIMem = NULL; - if (pIter->pIMemTable) { - tsdbGetTbDataFromMemTable(pIter->pIMemTable, suid, uid, &pIMem); + if (pIter->pReadSnap->pIMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid, &pIMem); } pIter->pTsdb = pTsdb; @@ -821,7 +822,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile *pDelFile = pIter->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader *pDelFReader; @@ -846,6 +847,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->fsState.state = SFSNEXTROW_FS; pIter->fsState.pTsdb = pTsdb; + pIter->fsState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; pIter->fsState.pBlockIdxExp = &pIter->idx; pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, &pIter->memState, getNextRowFromMem, NULL}; @@ -885,7 +887,7 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { taosArrayDestroy(pIter->pSkyline); } - tsdbUntakeMemSnapshot(pIter->pTsdb, pIter->pMemTable, pIter->pIMemTable); + tsdbUntakeReadSnap(pIter->pTsdb, pIter->pReadSnap); return code; _err: @@ -1172,480 +1174,480 @@ _err: return code; } -static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { - int32_t code = 0; - SArray *pSkyline = NULL; +// static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { +// int32_t code = 0; +// SArray *pSkyline = NULL; - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } - *ppRow = NULL; +// *ppRow = NULL; - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - SDelIdx delIdx; +// SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - SBlockIdx idx = {.suid = suid, .uid = uid}; +// SBlockIdx idx = {.suid = suid, .uid = uid}; - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - if (input[i].pRow == NULL) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// if (input[i].pRow == NULL) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } - } +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } +// } - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = i; - merge[nMerge++] = max[i]; - } +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = i; +// merge[nMerge++] = max[i]; +// } - input[iMax[i]].next = deleted; - } +// input[iMax[i]].next = deleted; +// } - // merge if nMerge > 1 - if (nMerge > 0) { - *dup = false; +// // merge if nMerge > 1 +// if (nMerge > 0) { +// *dup = false; - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } - } while (1); +// } while (1); - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); - return code; -_err: - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// return code; +// _err: +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } // static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, STSRow **ppRow) { -static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { - int32_t code = 0; - SArray *pSkyline = NULL; - STSRow *pRow = NULL; - STSRow **ppRow = &pRow; +// static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { +// int32_t code = 0; +// SArray *pSkyline = NULL; +// STSRow *pRow = NULL; +// STSRow **ppRow = &pRow; - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } - *ppLastArray = NULL; +// *ppLastArray = NULL; - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - SDelIdx delIdx; +// SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - SBlockIdx idx = {.suid = suid, .uid = uid}; +// SBlockIdx idx = {.suid = suid, .uid = uid}; - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = iMax[i]; - merge[nMerge++] = max[i]; - } +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = iMax[i]; +// merge[nMerge++] = max[i]; +// } - input[iMax[i]].next = deleted; - } +// input[iMax[i]].next = deleted; +// } - // merge if nMerge > 1 - if (nMerge > 0) { - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; +// // merge if nMerge > 1 +// if (nMerge > 0) { +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } else { - /* *ppRow = NULL; */ - /* return code; */ - continue; - } +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } else { +// /* *ppRow = NULL; */ +// /* return code; */ +// continue; +// } - if (iCol == 0) { - STColumn *pTColumn = &pTSchema->columns[0]; - SColVal *pColVal = &(SColVal){0}; +// if (iCol == 0) { +// STColumn *pTColumn = &pTSchema->columns[0]; +// SColVal *pColVal = &(SColVal){0}; - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); +// *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } - ++iCol; +// ++iCol; - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); - tTSRowGetVal(*ppRow, pTSchema, i, pColVal); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); +// tTSRowGetVal(*ppRow, pTSchema, i, pColVal); +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } - if (pColVal->isNull || pColVal->isNone) { - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - if (!setICol) { - iCol = i; - setICol = true; - } - } else { - --nilColCount; - } - } +// if (pColVal->isNull || pColVal->isNone) { +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// if (!setICol) { +// iCol = i; +// setICol = true; +// } +// } else { +// --nilColCount; +// } +// } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } - continue; - } +// continue; +// } - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - SColVal colVal = {0}; - tTSRowGetVal(*ppRow, pTSchema, i, &colVal); - TSKEY rowTs = (*ppRow)->ts; +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// SColVal colVal = {0}; +// tTSRowGetVal(*ppRow, pTSchema, i, &colVal); +// TSKEY rowTs = (*ppRow)->ts; - // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); - SColVal *tColVal = &tTsVal->colVal; +// // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); +// SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); +// SColVal *tColVal = &tTsVal->colVal; - if (!colVal.isNone && !colVal.isNull) { - if (tColVal->isNull || tColVal->isNone) { - // taosArraySet(pColArray, i, &colVal); - taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); - --nilColCount; - } - } else { - if ((tColVal->isNull || tColVal->isNone) && !setICol) { - iCol = i; - setICol = true; +// if (!colVal.isNone && !colVal.isNull) { +// if (tColVal->isNull || tColVal->isNone) { +// // taosArraySet(pColArray, i, &colVal); +// taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); +// --nilColCount; +// } +// } else { +// if ((tColVal->isNull || tColVal->isNone) && !setICol) { +// iCol = i; +// setICol = true; - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - } - } - } +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// } +// } +// } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - } while (nilColCount > 0); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// } while (nilColCount > 0); - // if () new ts row from pColArray if non empty - /* if (taosArrayGetSize(pColArray) == nCol) { */ - /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ - /* if (code) goto _err; */ - /* } */ - /* taosArrayDestroy(pColArray); */ - if (taosArrayGetSize(pColArray) <= 0) { - *ppLastArray = NULL; - taosArrayDestroy(pColArray); - } else { - *ppLastArray = pColArray; - } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } +// // if () new ts row from pColArray if non empty +// /* if (taosArrayGetSize(pColArray) == nCol) { */ +// /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ +// /* if (code) goto _err; */ +// /* } */ +// /* taosArrayDestroy(pColArray); */ +// if (taosArrayGetSize(pColArray) <= 0) { +// *ppLastArray = NULL; +// taosArrayDestroy(pColArray); +// } else { +// *ppLastArray = pColArray; +// } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); - return code; -_err: - taosArrayDestroy(pColArray); - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// return code; +// _err: +// taosArrayDestroy(pColArray); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, STsdb *pTsdb, LRUHandle **handle) { int32_t code = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 3c496918e8..c4dc341a63 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -29,6 +29,7 @@ typedef struct { int32_t minRow; int32_t maxRow; int8_t cmprAlg; + STsdbFS fs; // -------------- TSKEY nextKey; // reset by each table commit int32_t commitFid; @@ -119,9 +120,6 @@ int32_t tsdbCommit(STsdb *pTsdb) { code = tsdbCommitDel(&commith); if (code) goto _err; - code = tsdbCommitCache(&commith); - if (code) goto _err; - // end commit code = tsdbEndCommit(&commith, 0); if (code) goto _err; @@ -158,7 +156,7 @@ static int32_t tsdbCommitDelStart(SCommitter *pCommitter) { goto _err; } - SDelFile *pDelFileR = pTsdb->pFS->nState->pDelFile; + SDelFile *pDelFileR = pCommitter->fs.pDelFile; if (pDelFileR) { code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb, NULL); if (code) goto _err; @@ -247,7 +245,7 @@ static int32_t tsdbCommitDelEnd(SCommitter *pCommitter) { code = tsdbUpdateDelFileHdr(pCommitter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pCommitter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pCommitter->fs, &pCommitter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pCommitter->pDelFWriter, 1); @@ -281,7 +279,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { taosArrayClear(pCommitter->aBlockIdx); tMapDataReset(&pCommitter->oBlockMap); tBlockDataReset(&pCommitter->oBlockData); - pRSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, pCommitter->commitFid, TD_EQ); + pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, + tDFileSetCmprFn, TD_EQ); if (pRSet) { code = tsdbDataFReaderOpen(&pCommitter->pReader, pTsdb, pRSet); if (code) goto _err; @@ -860,7 +859,7 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { if (code) goto _err; // upsert SDFileSet - code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, &pCommitter->pWriter->wSet); + code = tsdbFSUpsertFSet(&pCommitter->fs, &pCommitter->pWriter->wSet); if (code) goto _err; // close and sync @@ -978,7 +977,7 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - code = tsdbFSBegin(pTsdb->pFS); + code = tsdbFSCopy(pTsdb, &pCommitter->fs); if (code) goto _err; return code; @@ -1147,28 +1146,33 @@ _err: return code; } -static int32_t tsdbCommitCache(SCommitter *pCommitter) { - int32_t code = 0; - // TODO - return code; -} - static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SMemTable *pMemTable = pTsdb->imem; - if (eno == 0) { - code = tsdbFSCommit(pTsdb->pFS); - } else { - code = tsdbFSRollback(pTsdb->pFS); + ASSERT(eno); + + code = tsdbFSCommit1(pTsdb, &pCommitter->fs); + if (code) goto _err; + + // lock + taosThreadRwlockWrlock(&pTsdb->rwLock); + + // commit or rollback + code = tsdbFSCommit2(pTsdb, &pCommitter->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; } - taosThreadRwlockWrlock(&pTsdb->rwLock); pTsdb->imem = NULL; + + // unlock taosThreadRwlockUnlock(&pTsdb->rwLock); tsdbUnrefMemTable(pMemTable); + tsdbFSDestroy(&pCommitter->fs); tsdbInfo("vgId:%d tsdb end commit", TD_VID(pTsdb->pVnode)); return code; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index f5e6e9744e..5c95e6cfec 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -16,67 +16,41 @@ #include "tsdb.h" // ================================================================================================= -static int32_t tPutFSState(uint8_t *p, STsdbFSState *pState) { +static int32_t tsdbEncodeFS(uint8_t *p, STsdbFS *pFS) { int32_t n = 0; - int8_t hasDel = pState->pDelFile ? 1 : 0; - uint32_t nDFileSet = taosArrayGetSize(pState->aDFileSet); + int8_t hasDel = pFS->pDelFile ? 1 : 0; + uint32_t nSet = taosArrayGetSize(pFS->aDFileSet); // SDelFile n += tPutI8(p ? p + n : p, hasDel); if (hasDel) { - n += tPutDelFile(p ? p + n : p, pState->pDelFile); + n += tPutDelFile(p ? p + n : p, pFS->pDelFile); } // SArray - n += tPutU32v(p ? p + n : p, nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pState->aDFileSet, iDFileSet)); + n += tPutU32v(p ? p + n : p, nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet)); } return n; } -static int32_t tGetFSState(uint8_t *p, STsdbFSState *pState) { - int32_t n = 0; - int8_t hasDel; - uint32_t nDFileSet; - SDFileSet *pSet = &(SDFileSet){0}; - - // SDelFile - n += tGetI8(p + n, &hasDel); - if (hasDel) { - pState->pDelFile = &pState->delFile; - n += tGetDelFile(p + n, pState->pDelFile); - } else { - pState->pDelFile = NULL; - } - - // SArray - taosArrayClear(pState->aDFileSet); - n += tGetU32v(p + n, &nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tGetDFileSet(p + n, pSet); - taosArrayPush(pState->aDFileSet, pSet); - } - - return n; -} - -static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { +static int32_t tsdbGnrtCurrent(STsdb *pTsdb, STsdbFS *pFS, char *fname) { int32_t code = 0; int64_t n; int64_t size; - uint8_t *pData; + uint8_t *pData = NULL; TdFilePtr pFD = NULL; // to binary - size = tPutFSState(NULL, pState) + sizeof(TSCKSUM); + size = tsdbEncodeFS(NULL, pFS) + sizeof(TSCKSUM); pData = taosMemoryMalloc(size); if (pData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - n = tPutFSState(pData, pState); + n = tsdbEncodeFS(pData, pFS); ASSERT(n + sizeof(TSCKSUM) == size); taosCalcChecksumAppend(0, pData, size); @@ -104,411 +78,267 @@ static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { return code; _err: - tsdbError("tsdb gnrt current failed since %s", tstrerror(code)); + tsdbError("vgId:%d tsdb gnrt current failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); if (pData) taosMemoryFree(pData); return code; } -static int32_t tsdbLoadCurrentState(STsdbFS *pFS, STsdbFSState *pState) { - int32_t code = 0; - int64_t size; - int64_t n; - char fname[TSDB_FILENAME_LEN]; - uint8_t *pData = NULL; - TdFilePtr pFD; +// static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); +// if (pFrom && pTo) { +// bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); - if (!taosCheckExistFile(fname)) { - // create an empry CURRENT file if not exists - code = tsdbGnrtCurrent(fname, pState); - if (code) goto _err; - } else { - // open the file and load - pFD = taosOpenFile(fname, TD_FILE_READ); - if (pFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // head +// if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { +// ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); +// ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); +// } else { +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); +// } - if (taosFStatFile(pFD, &size, NULL) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // data +// if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { +// if (pFrom->pDataF->size > pTo->pDataF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); +// } - pData = taosMemoryMalloc(size); - if (pData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// // last +// if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { +// if (pFrom->pLastF->size > pTo->pLastF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); +// } - n = taosReadFile(pFD, pData, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // sma +// if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { +// if (pFrom->pSmaF->size > pTo->pSmaF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } +// } else if (pFrom) { +// // head +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); - if (!taosCheckChecksumWhole(pData, size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } +// // data +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); - taosCloseFile(&pFD); +// // last +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); - // decode - tGetFSState(pData, pState); +// // fsm +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; + +// if (pFrom && pTo) { +// if (!tsdbDelFileIsSame(pFrom, pTo)) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } +// } else if (pFrom) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } else { +// // do nothing +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { +// int32_t code = 0; +// int32_t iFrom = 0; +// int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); +// int32_t iTo = 0; +// int32_t nTo = taosArrayGetSize(pTo->aDFileSet); +// SDFileSet *pDFileSetFrom; +// SDFileSet *pDFileSetTo; + +// // SDelFile +// code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); +// if (code) goto _err; + +// // SDFileSet +// while (iFrom < nFrom && iTo < nTo) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); + +// if (pDFileSetFrom->fid == pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); +// if (code) goto _err; + +// iFrom++; +// iTo++; +// } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } else { +// iTo++; +// } +// } + +// while (iFrom < nFrom) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } + +// #if 0 +// // do noting +// while (iTo < nTo) { +// pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); +// code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); +// if (code) goto _err; + +// iTo++; +// } +// #endif + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +void tsdbFSDestroy(STsdbFS *pFS) { + if (pFS->pDelFile) { + taosMemoryFree(pFS->pDelFile); } - if (pData) taosMemoryFree(pData); - return code; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + } -_err: - tsdbError("vgId:%d tsdb load current state failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - if (pData) taosMemoryFree(pData); - return code; + taosArrayDestroy(pFS->aDFileSet); } -static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { int32_t code = 0; + int64_t size; char fname[TSDB_FILENAME_LEN]; - if (pFrom && pTo) { - bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); - - // head - if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { - ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); - ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); - } else { - tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); - taosRemoveFile(fname); - } - - // data - if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { - if (pFrom->pDataF->size > pTo->pDataF->size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); - taosRemoveFile(fname); - } - - // last - if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { - if (pFrom->pLastF->size > pTo->pLastF->size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); - if (code) goto _err; - } - } else { - tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); - taosRemoveFile(fname); - } - - // sma - if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { - if (pFrom->pSmaF->size > pTo->pSmaF->size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); - if (code) goto _err; - } - } else { - tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); - taosRemoveFile(fname); - } - } else if (pFrom) { - // head - tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); - taosRemoveFile(fname); - - // data - tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); - taosRemoveFile(fname); - - // last - tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); - taosRemoveFile(fname); - - // fsm - tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); - taosRemoveFile(fname); - } - - return code; - -_err: - tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { - int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; - - if (pFrom && pTo) { - if (!tsdbDelFileIsSame(pFrom, pTo)) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } - } else if (pFrom) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } else { - // do nothing - } - - return code; - -_err: - tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { - int32_t code = 0; - int32_t iFrom = 0; - int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); - int32_t iTo = 0; - int32_t nTo = taosArrayGetSize(pTo->aDFileSet); - SDFileSet *pDFileSetFrom; - SDFileSet *pDFileSetTo; - // SDelFile - code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); - if (code) goto _err; - - // SDFileSet - while (iFrom < nFrom && iTo < nTo) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); - - if (pDFileSetFrom->fid == pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); - if (code) goto _err; - - iFrom++; - iTo++; - } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; - - iFrom++; - } else { - iTo++; - } - } - - while (iFrom < nFrom) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; - - iFrom++; - } - -#if 0 - // do noting - while (iTo < nTo) { - pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); - code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); - if (code) goto _err; - - iTo++; - } -#endif - - return code; - -_err: - tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static void tsdbFSDestroy(STsdbFS *pFS) { - if (pFS) { - if (pFS->nState) { - taosArrayDestroy(pFS->nState->aDFileSet); - taosMemoryFree(pFS->nState); - } - - if (pFS->cState) { - taosArrayDestroy(pFS->cState->aDFileSet); - taosMemoryFree(pFS->cState); - } - - taosMemoryFree(pFS); - } - // TODO -} - -static int32_t tsdbFSCreate(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - STsdbFS *pFS = NULL; - - pFS = (STsdbFS *)taosMemoryCalloc(1, sizeof(*pFS)); - if (pFS == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->pTsdb = pTsdb; - - pFS->cState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->cState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->cState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->cState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pFS->nState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->nState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->nState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->nState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - *ppFS = pFS; - return code; - -_err: - tsdbError("vgId:%d tsdb fs create failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSDestroy(pFS); - *ppFS = NULL; - return code; -} - -static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { - int32_t code = 0; - STsdb *pTsdb = pFS->pTsdb; - STfs *pTfs = pTsdb->pVnode->pTfs; - int64_t size; - char fname[TSDB_FILENAME_LEN]; - char pHdr[TSDB_FHDR_SIZE]; - TdFilePtr pFD; - - // SDelFile - if (pFS->cState->pDelFile) { - tsdbDelFileName(pTsdb, pFS->cState->pDelFile, fname); + if (pTsdb->fs.pDelFile) { + tsdbDelFileName(pTsdb, pTsdb->fs.pDelFile, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size != pFS->cState->pDelFile->size) { + if (size != pTsdb->fs.pDelFile->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } - - if (deepScan) { - // TODO - } } // SArray - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); // head ========= - tsdbHeadFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pHeadF, fname); + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (deepScan) { - // TODO + if (size != pSet->pHeadF->size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; } // data ========= - tsdbDataFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pDataF, fname); + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->pDataF->size) { + if (size < pSet->pDataF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->pDataF->size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO + } else if (size > pSet->pDataF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); + if (code) goto _err; } // last =========== - tsdbLastFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pLastF, fname); + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->pLastF->size) { + if (size != pSet->pLastF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->pLastF->size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO } // sma ============= - tsdbSmaFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pSmaF, fname); + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->pSmaF->size) { + if (size < pSet->pSmaF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->pSmaF->size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO + } else if (size > pSet->pSmaF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); + if (code) goto _err; } } - // remove those invalid files (todo) -#if 0 - STfsDir *tdir; - const STfsFile *pf; - - tdir = tfsOpendir(pTfs, pTsdb->path); - if (tdir == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; + { + // remove those invalid files (todo) } - while ((pf = tfsReaddir(tdir))) { - tfsBasename(pf, fname); - } - - tfsClosedir(tdir); -#endif - return code; _err: @@ -516,7 +346,7 @@ _err: return code; } -static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { +int32_t tDFileSetCmprFn(const void *p1, const void *p2) { if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) { return -1; } else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) { @@ -526,87 +356,372 @@ static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { return 0; } -// EXPOSED APIS ==================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - - // create handle - code = tsdbFSCreate(pTsdb, ppFS); - if (code) goto _err; - - // load current state - code = tsdbLoadCurrentState(*ppFS, (*ppFS)->cState); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } - - // scan and fix FS - code = tsdbScanAndTryFixFS(*ppFS, 0); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } - - return code; - -_err: - *ppFS = NULL; - tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbFSClose(STsdbFS *pFS) { - int32_t code = 0; - tsdbFSDestroy(pFS); - return code; -} - -int32_t tsdbFSBegin(STsdbFS *pFS) { - int32_t code = 0; +static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) { + int32_t code = 0; + int8_t hasDel; + uint32_t nSet; + int32_t n; // SDelFile - pFS->nState->pDelFile = NULL; - if (pFS->cState->pDelFile) { - pFS->nState->delFile = pFS->cState->delFile; - pFS->nState->pDelFile = &pFS->nState->delFile; + n = 0; + n += tGetI8(pData + n, &hasDel); + if (hasDel) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pTsdb->fs.pDelFile->nRef = 1; + n += tGetDelFile(pData + n, pTsdb->fs.pDelFile); + } else { + pTsdb->fs.pDelFile = NULL; } - // SArray - taosArrayClear(pFS->nState->aDFileSet); - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + // SArray + taosArrayClear(pTsdb->fs.aDFileSet); + n += tGetU32v(pData + n, &nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + SDFileSet fSet; - if (taosArrayPush(pFS->nState->aDFileSet, pDFileSet) == NULL) { + // head + fSet.pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pHeadF->nRef = 1; + + // data + fSet.pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryCalloc(1, sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pLastF->nRef = 1; + + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pSmaF->nRef = 1; + + n += tGetDFileSet(pData + n, &fSet); + + if (taosArrayPush(pTsdb->fs.aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } } + ASSERT(n + sizeof(TSCKSUM) == nData); return code; _err: - tsdbError("vgId:%d tsdb fs begin failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSCommit(STsdbFS *pFS) { - int32_t code = 0; - STsdbFSState *pState = pFS->nState; - char tfname[TSDB_FILENAME_LEN]; - char fname[TSDB_FILENAME_LEN]; +// EXPOSED APIS ==================================================================================== +int32_t tsdbFSOpen(STsdb *pTsdb) { + int32_t code = 0; - // need lock (todo) - pFS->nState = pFS->cState; - pFS->cState = pState; + // open handle + pTsdb->fs.pDelFile = NULL; + pTsdb->fs.aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); + if (pTsdb->fs.aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } - snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); + // load fs or keep empty + char fname[TSDB_FILENAME_LEN]; + + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + + if (!taosCheckExistFile(fname)) { + // empty one + code = tsdbGnrtCurrent(pTsdb, &pTsdb->fs, fname); + if (code) goto _err; + } else { + // read + TdFilePtr pFD = taosOpenFile(fname, TD_FILE_READ); + if (pFD == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + int64_t size; + if (taosFStatFile(pFD, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosCloseFile(&pFD); + goto _err; + } + + uint8_t *pData = taosMemoryMalloc(size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + taosCloseFile(&pFD); + goto _err; + } + + int64_t n = taosReadFile(pFD, pData, size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } + + if (!taosCheckChecksumWhole(pData, size)) { + code = TSDB_CODE_FILE_CORRUPTED; + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } + + taosCloseFile(&pFD); + + // recover fs + code = tsdbRecoverFS(pTsdb, pData, size); + if (code) { + taosMemoryFree(pData); + goto _err; + } + + taosMemoryFree(pData); + } + + // scan and fix FS + code = tsdbScanAndTryFixFS(pTsdb); + if (code) goto _err; + + return code; + +_err: + tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbFSClose(STsdb *pTsdb) { + int32_t code = 0; + + if (pTsdb->fs.pDelFile) { + ASSERT(pTsdb->fs.pDelFile->nRef == 1); + taosMemoryFree(pTsdb->fs.pDelFile); + } + + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + + // head + ASSERT(pSet->pHeadF->nRef == 1); + taosMemoryFree(pSet->pHeadF); + + // data + ASSERT(pSet->pDataF->nRef == 1); + taosMemoryFree(pSet->pDataF); + + // last + ASSERT(pSet->pLastF->nRef == 1); + taosMemoryFree(pSet->pLastF); + + // sma + ASSERT(pSet->pSmaF->nRef == 1); + taosMemoryFree(pSet->pSmaF); + } + + taosArrayClear(pTsdb->fs.aDFileSet); + + return code; +} + +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; + + pFS->pDelFile = NULL; + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + if (pTsdb->fs.pDelFile) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + *pFS->pDelFile = *pTsdb->fs.pDelFile; + } + + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pHeadF->nRef = 0; + fSet.pHeadF->commitID = pSet->pHeadF->commitID; + fSet.pHeadF->size = pSet->pHeadF->size; + fSet.pHeadF->offset = pSet->pHeadF->offset; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pDataF->nRef = 0; + fSet.pDataF->commitID = pSet->pDataF->commitID; + fSet.pDataF->size = pSet->pDataF->size; + + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pLastF->nRef = 0; + fSet.pLastF->commitID = pSet->pLastF->commitID; + fSet.pLastF->size = pSet->pLastF->size; + + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pSmaF->nRef = 0; + fSet.pSmaF->commitID = pSet->pSmaF->commitID; + fSet.pSmaF->size = pSet->pSmaF->size; + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + } + +_exit: + return code; +} + +int32_t tsdbFSRollback(STsdbFS *pFS) { + int32_t code = 0; + + ASSERT(0); + + return code; + +_err: + return code; +} + +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile) { + int32_t code = 0; + + if (pFS->pDelFile == NULL) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + } + *pFS->pDelFile = *pDelFile; + +_exit: + return code; +} + +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { + int32_t code = 0; + int32_t idx = taosArraySearchIdx(pFS->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); + + if (idx < 0) { + idx = taosArrayGetSize(pFS->aDFileSet); + } else { + SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, idx); + int32_t c = tDFileSetCmprFn(pSet, pDFileSet); + if (c == 0) { + *pDFileSet->pHeadF = *pSet->pHeadF; + *pDFileSet->pDataF = *pSet->pDataF; + *pDFileSet->pLastF = *pSet->pLastF; + *pDFileSet->pSmaF = *pSet->pSmaF; + + goto _exit; + } + } + + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pHeadF = *pSet->pHeadF; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pDataF = *pSet->pDataF; + + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pLastF = *pSet->pLastF; + + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pSmaF = *pSet->pSmaF; + + if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + +_exit: + return code; +} + +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFSNew) { + int32_t code = 0; + char tfname[TSDB_FILENAME_LEN]; + char fname[TSDB_FILENAME_LEN]; + + snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); // gnrt CURRENT.t - code = tsdbGnrtCurrent(tfname, pFS->cState); + code = tsdbGnrtCurrent(pTsdb, pFSNew, tfname); if (code) goto _err; // rename @@ -616,56 +731,295 @@ int32_t tsdbFSCommit(STsdbFS *pFS) { goto _err; } - // apply commit on disk - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; - return code; _err: - tsdbError("vgId:%d tsdb fs commit failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb fs commit phase 1 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSRollback(STsdbFS *pFS) { +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { int32_t code = 0; + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; + // del + if (pFSNew->pDelFile) { + SDelFile *pDelFile = pTsdb->fs.pDelFile; - return code; + if (pDelFile == NULL || (pDelFile->commitID != pFSNew->pDelFile->commitID)) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } -_err: - tsdbError("vgId:%d tsdb fs rollback failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} + *pTsdb->fs.pDelFile = *pFSNew->pDelFile; + pTsdb->fs.pDelFile->nRef = 1; -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile) { - int32_t code = 0; - pState->delFile = *pDelFile; - pState->pDelFile = &pState->delFile; - return code; -} - -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet) { - int32_t code = 0; - int32_t idx = taosArraySearchIdx(pState->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); - - if (idx < 0) { - if (taosArrayPush(pState->aDFileSet, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + if (pDelFile) { + nRef = atomic_sub_fetch_32(&pDelFile->nRef, 1); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pDelFile); + } + } } } else { - SDFileSet *tDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, idx); - int32_t c = tDFileSetCmprFn(pSet, tDFileSet); - if (c == 0) { - taosArraySet(pState->aDFileSet, idx, pSet); - } else { - if (taosArrayInsert(pState->aDFileSet, idx, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + ASSERT(pTsdb->fs.pDelFile == NULL); + } + + // data + int32_t iOld = 0; + int32_t iNew = 0; + while (true) { + int32_t nOld = taosArrayGetSize(pTsdb->fs.aDFileSet); + int32_t nNew = taosArrayGetSize(pFSNew->aDFileSet); + SDFileSet fSet; + int8_t sameDisk; + + if (iOld >= nOld && iNew >= nNew) break; + + SDFileSet *pSetOld = (iOld < nOld) ? taosArrayGet(pTsdb->fs.aDFileSet, iOld) : NULL; + SDFileSet *pSetNew = (iNew < nNew) ? taosArrayGet(pFSNew->aDFileSet, iNew) : NULL; + + if (pSetOld && pSetNew) { + if (pSetOld->fid == pSetNew->fid) { + goto _merge_old_and_new; + } else if (pSetOld->fid < pSetNew->fid) { + goto _remove_old; + } else { + goto _add_new; } + } else if (pSetOld) { + goto _remove_old; + } else { + goto _add_new; + } + + _merge_old_and_new: + sameDisk = ((pSetOld->diskId.level == pSetNew->diskId.level) && (pSetOld->diskId.id == pSetNew->diskId.id)); + + // head + fSet.pHeadF = pSetOld->pHeadF; + if ((!sameDisk) || (pSetOld->pHeadF->commitID != pSetNew->pHeadF->commitID)) { + pSetOld->pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (pSetOld->pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pHeadF = *pSetNew->pHeadF; + pSetOld->pHeadF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pHeadF); + } + } else { + ASSERT(fSet.pHeadF->size == pSetNew->pHeadF->size); + ASSERT(fSet.pHeadF->offset == pSetNew->pHeadF->offset); + } + + // data + fSet.pDataF = pSetOld->pDataF; + if ((!sameDisk) || (pSetOld->pDataF->commitID != pSetNew->pDataF->commitID)) { + pSetOld->pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (pSetOld->pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pDataF = *pSetNew->pDataF; + pSetOld->pDataF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pDataF); + } + } else { + ASSERT(pSetOld->pDataF->size <= pSetNew->pDataF->size); + pSetOld->pDataF->size = pSetNew->pDataF->size; + } + + // last + fSet.pLastF = pSetOld->pLastF; + if ((!sameDisk) || (pSetOld->pLastF->commitID != pSetNew->pLastF->commitID)) { + pSetOld->pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (pSetOld->pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pLastF = *pSetNew->pLastF; + pSetOld->pLastF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pLastF); + } + } else { + ASSERT(pSetOld->pLastF->size == pSetNew->pLastF->size); + } + + // sma + fSet.pSmaF = pSetOld->pSmaF; + if ((!sameDisk) || (pSetOld->pSmaF->commitID != pSetNew->pSmaF->commitID)) { + pSetOld->pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (pSetOld->pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pSmaF = *pSetNew->pSmaF; + pSetOld->pSmaF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pSmaF); + } + } else { + ASSERT(pSetOld->pSmaF->size <= pSetNew->pSmaF->size); + pSetOld->pSmaF->size = pSetNew->pSmaF->size; + } + + if (!sameDisk) { + pSetOld->diskId = pSetNew->diskId; + } + + iOld++; + iNew++; + continue; + + _remove_old: + nRef = atomic_sub_fetch_32(&pSetOld->pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pHeadF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pDataF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pLastF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pSmaF); + } + + taosArrayRemove(pTsdb->fs.aDFileSet, iOld); + continue; + + _add_new: + fSet.diskId = pSetNew->diskId; + fSet.fid = pSetNew->fid; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pHeadF = *pSetNew->pHeadF; + fSet.pHeadF->nRef = 1; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pDataF = *pSetNew->pDataF; + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pLastF = *pSetNew->pLastF; + fSet.pLastF->nRef = 1; + + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pSmaF = *pSetNew->pSmaF; + fSet.pSmaF->nRef = 1; + + if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + iOld++; + iNew++; + continue; + } + + return code; + +_err: + tsdbError("vgId:%d tsdb fs commit phase 2 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; + int32_t nRef; + + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + pFS->pDelFile = pTsdb->fs.pDelFile; + if (pFS->pDelFile) { + nRef = atomic_fetch_add_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef > 0); + } + + SDFileSet fSet; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + fSet = *pSet; + + nRef = atomic_fetch_add_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef > 0); + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } } @@ -673,16 +1027,59 @@ _exit: return code; } -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid) { - int32_t idx; +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) { + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - idx = taosArraySearchIdx(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - ASSERT(idx >= 0); - taosArrayRemove(pState->aDFileSet, idx); -} + if (pFS->pDelFile) { + nRef = atomic_sub_fetch_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pFS->pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pFS->pDelFile); + } + } -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState) { return pState->pDelFile; } + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag) { - return (SDFileSet *)taosArraySearch(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, flag); -} + // head + nRef = atomic_sub_fetch_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pHeadF); + } + + // data + nRef = atomic_sub_fetch_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pDataF); + } + + // last + nRef = atomic_sub_fetch_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pLastF); + } + + // sma + nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pSmaF); + } + } + + taosArrayDestroy(pFS->aDFileSet); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 4a41e9fb41..135ee23d44 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -122,21 +122,11 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { // truncate switch (ftype) { - case TSDB_HEAD_FILE: - size = pSet->pHeadF->size; - tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); - tPutHeadFile(hdr, pSet->pHeadF); - break; case TSDB_DATA_FILE: size = pSet->pDataF->size; tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); tPutDataFile(hdr, pSet->pDataF); break; - case TSDB_LAST_FILE: - size = pSet->pLastF->size; - tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); - tPutLastFile(hdr, pSet->pLastF); - break; case TSDB_SMA_FILE: size = pSet->pSmaF->size; tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); @@ -186,6 +176,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { return code; _err: + tsdbError("vgId:%d tsdb rollback file failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } @@ -219,10 +210,8 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { // SDelFile =============================================== void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; - - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, pTsdb->path, - TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); } int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile) { diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 80ba5f0363..52b6e07903 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -605,48 +605,3 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) { tsdbMemTableDestroy(pMemTable); } } - -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem) { - ASSERT(0); - int32_t code = 0; - - // lock - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - - // take snapshot - *ppMem = pTsdb->mem; - *ppIMem = pTsdb->imem; - - if (*ppMem) { - tsdbRefMemTable(*ppMem); - } - - if (*ppIMem) { - tsdbRefMemTable(*ppIMem); - } - - // unlock - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - -_exit: - return code; -} - -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem) { - ASSERT(0); - if (pMem) { - tsdbUnrefMemTable(pMem); - } - - if (pIMem) { - tsdbUnrefMemTable(pIMem); - } -} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index 064c7adf4b..0b355d91b4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -66,7 +66,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee tfsMkdir(pVnode->pTfs, pTsdb->path); // open tsdb - if (tsdbFSOpen(pTsdb, &pTsdb->pFS) < 0) { + if (tsdbFSOpen(pTsdb) < 0) { goto _err; } @@ -88,7 +88,7 @@ _err: int tsdbClose(STsdb **pTsdb) { if (*pTsdb) { taosThreadRwlockDestroy(&(*pTsdb)->rwLock); - tsdbFSClose((*pTsdb)->pFS); + tsdbFSClose(*pTsdb); tsdbCloseCache((*pTsdb)->lruCache); taosMemoryFreeClear(*pTsdb); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 3375dd69ba..27afd7d0c5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -118,8 +118,7 @@ struct STsdbReader { char* idStr; // query info handle, for debug purpose int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; - SMemTable* pMem; - SMemTable* pIMem; + STsdbReadSnap* pReadSnap; SIOCostSummary cost; STSchema* pSchema; @@ -275,12 +274,12 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap } // init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) { - size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet); +static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) { + size_t numOfFileset = taosArrayGetSize(aDFileSet); pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset; pIter->order = order; - pIter->pFileList = taosArrayDup(pFState->aDFileSet); + pIter->pFileList = aDFileSet; pIter->numOfFiles = numOfFileset; tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr); @@ -1881,8 +1880,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea int32_t backward = (!ASCENDING_TRAVERSE(pReader->order)); STbData* d = NULL; - if (pReader->pMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pMem, pReader->suid, pBlockScanInfo->uid, &d); + if (pReader->pReadSnap->pMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); if (d != NULL) { code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1902,8 +1901,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea } STbData* di = NULL; - if (pReader->pIMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pIMem, pReader->suid, pBlockScanInfo->uid, &di); + if (pReader->pReadSnap->pIMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); if (di != NULL) { code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1939,7 +1938,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader* pDelFReader = NULL; code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); @@ -2830,8 +2829,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl SDataBlockIter* pBlockIter = &pReader->status.blockIter; - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); // no data in files, let's try buffer in memory @@ -2844,7 +2842,8 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl } } - tsdbTakeMemSnapshot(pReader->pTsdb, &pReader->pMem, &pReader->pIMem); + code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap); + if (code) goto _err; tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr); return code; @@ -2861,7 +2860,7 @@ void tsdbReaderClose(STsdbReader* pReader) { SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - tsdbUntakeMemSnapshot(pReader->pTsdb, pReader->pMem, pReader->pIMem); + tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap); taosMemoryFreeClear(pSupInfo->plist); taosMemoryFree(pSupInfo->colIds); @@ -3081,8 +3080,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { tsdbDataFReaderClose(&pReader->pFileReader); - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); resetDataBlockScanInfo(pReader->status.pTableMap); @@ -3275,6 +3273,11 @@ int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { } // fs (todo) + code = tsdbFSRef(pTsdb, &(*ppSnap)->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _exit; + } // unlock code = taosThreadRwlockUnlock(&pTsdb->rwLock); @@ -3297,6 +3300,6 @@ void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { tsdbUnrefMemTable(pSnap->pIMem); } - // fs (todo) + tsdbFSUnref(pTsdb, &pSnap->fs); } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 137ef9a4a6..77ca49e33e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -16,7 +16,8 @@ #include "tsdb.h" static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t *canDo) { - int32_t code = 0; + int32_t code = 0; +#if 0 STsdbFSState *pState; if (try) { @@ -64,18 +65,20 @@ static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t code = tsdbDFileSetCopy(pTsdb, pDFileSet, &nDFileSet); if (code) goto _exit; - code = tsdbFSStateUpsertDFileSet(pState, &nDFileSet); + code = tsdbFSUpsertFSet(pState, &nDFileSet); if (code) goto _exit; } } } +#endif _exit: return code; } int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { int32_t code = 0; +#if 0 int8_t canDo; // try @@ -100,5 +103,6 @@ _exit: _err: tsdbError("vgId:%d tsdb do retention failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); tsdbFSRollback(pTsdb->pFS); +#endif return code; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index dfb01f2ded..43537c9a8d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -20,6 +20,7 @@ struct STsdbSnapReader { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // for data file int8_t dataDone; int32_t fid; @@ -45,7 +46,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { while (true) { if (pReader->pDataFReader == NULL) { - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->cState, pReader->fid, TD_GT); + SDFileSet* pSet = + taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); if (pSet == NULL) goto _exit; @@ -159,7 +161,7 @@ _err: static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pTsdb->pFS->cState->pDelFile; + SDelFile* pDelFile = pReader->fs.pDelFile; if (pReader->pDelFReader == NULL) { if (pDelFile == NULL) { @@ -254,6 +256,24 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapRe pReader->sver = sver; pReader->ever = ever; + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + + code = tsdbFSRef(pTsdb, &pReader->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + pReader->fid = INT32_MIN; pReader->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pReader->aBlockIdx == NULL) { @@ -305,6 +325,8 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { taosArrayDestroy(pReader->aDelIdx); taosArrayDestroy(pReader->aDelData); + tsdbFSUnref(pReader->pTsdb, &pReader->fs); + tsdbInfo("vgId:%d vnode snapshot tsdb reader closed", TD_VID(pReader->pTsdb->pVnode)); taosMemoryFree(pReader); @@ -358,6 +380,7 @@ struct STsdbSnapWriter { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // config int32_t minutes; @@ -798,7 +821,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL); if (code) goto _err; - code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, &pWriter->pDataFWriter->wSet); + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); if (code) goto _err; code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); @@ -843,7 +866,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 pWriter->fid = fid; // read - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, fid, TD_EQ); + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); if (pSet) { code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); if (code) goto _err; @@ -911,7 +934,7 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 STsdb* pTsdb = pWriter->pTsdb; if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->nState); + SDelFile* pDelFile = pWriter->fs.pDelFile; // reader if (pDelFile) { @@ -1021,7 +1044,7 @@ static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pWriter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); @@ -1055,6 +1078,9 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->sver = sver; pWriter->ever = ever; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + if (code) goto _err; + // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; @@ -1100,9 +1126,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr goto _err; } - code = tsdbFSBegin(pTsdb->pFS); - if (code) goto _err; - *ppWriter = pWriter; return code; @@ -1117,8 +1140,9 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { STsdbSnapWriter* pWriter = *ppWriter; if (rollback) { - code = tsdbFSRollback(pWriter->pTsdb->pFS); - if (code) goto _err; + ASSERT(0); + // code = tsdbFSRollback(pWriter->pTsdb->pFS); + // if (code) goto _err; } else { code = tsdbSnapWriteDataEnd(pWriter); if (code) goto _err; @@ -1126,7 +1150,10 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { code = tsdbSnapWriteDelEnd(pWriter); if (code) goto _err; - code = tsdbFSCommit(pWriter->pTsdb->pFS); + code = tsdbFSCommit1(pWriter->pTsdb, &pWriter->fs); + if (code) goto _err; + + code = tsdbFSCommit2(pWriter->pTsdb, &pWriter->fs); if (code) goto _err; } From 6a7a3da3db17fd4e3049835cc4b33fcca4a13afa Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Jul 2022 11:56:18 +0000 Subject: [PATCH 07/27] fix some crash --- source/dnode/vnode/src/tsdb/tsdbCommit.c | 2 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index c4dc341a63..194bd2e924 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -1151,7 +1151,7 @@ static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { STsdb *pTsdb = pCommitter->pTsdb; SMemTable *pMemTable = pTsdb->imem; - ASSERT(eno); + ASSERT(eno == 0); code = tsdbFSCommit1(pTsdb, &pCommitter->fs); if (code) goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 27afd7d0c5..6be79f7578 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -286,8 +286,6 @@ static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32 return TSDB_CODE_SUCCESS; } -static void cleanupFilesetIterator(SFilesetIter* pIter) { taosArrayDestroy(pIter->pFileList); } - static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pIter->order); int32_t step = asc ? 1 : -1; @@ -2829,6 +2827,9 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl SDataBlockIter* pBlockIter = &pReader->status.blockIter; + code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap); + if (code) goto _err; + initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); @@ -2842,9 +2843,6 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl } } - code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap); - if (code) goto _err; - tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr); return code; @@ -2873,7 +2871,6 @@ void tsdbReaderClose(STsdbReader* pReader) { } taosMemoryFree(pSupInfo->buildBuf); - cleanupFilesetIterator(&pReader->status.fileIter); cleanupDataBlockIterator(&pReader->status.blockIter); destroyBlockScanInfo(pReader->status.pTableMap); blockDataDestroy(pReader->pResBlock); From 1059ca6d534fb6762b22e8c3e93c69a2b0988d53 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 20:00:16 +0800 Subject: [PATCH 08/27] fix: the problem of data loss when interval is used for outer query --- source/libs/planner/src/planLogicCreater.c | 4 ++-- source/libs/planner/test/planIntervalTest.cpp | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 9b07578212..1663266fb7 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -713,7 +713,7 @@ static int32_t createWindowLogicNodeByInterval(SLogicPlanContext* pCxt, SInterva pWindow->windowAlgo = pCxt->pPlanCxt->streamQuery ? INTERVAL_ALGO_STREAM_SINGLE : INTERVAL_ALGO_HASH; pWindow->node.groupAction = GROUP_ACTION_KEEP; pWindow->node.requireDataOrder = DATA_ORDER_LEVEL_IN_BLOCK; - pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_BLOCK; + pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_GROUP; pWindow->pTspk = nodesCloneNode(pInterval->pCol); if (NULL == pWindow->pTspk) { @@ -1416,7 +1416,7 @@ static int32_t adjustProjectDataRequirement(SProjectLogicNode* pProject, EDataOr } static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_BLOCK) { + if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { return TSDB_CODE_SUCCESS; } pWindow->node.resultDataOrder = requirement; diff --git a/source/libs/planner/test/planIntervalTest.cpp b/source/libs/planner/test/planIntervalTest.cpp index 8645992500..30f7051722 100644 --- a/source/libs/planner/test/planIntervalTest.cpp +++ b/source/libs/planner/test/planIntervalTest.cpp @@ -38,6 +38,9 @@ TEST_F(PlanIntervalTest, fill) { run("SELECT COUNT(*) FROM t1 WHERE ts > TIMESTAMP '2022-04-01 00:00:00' and ts < TIMESTAMP '2022-04-30 23:59:59' " "INTERVAL(10s) FILL(LINEAR)"); + run("SELECT COUNT(*) FROM st1 WHERE ts > TIMESTAMP '2022-04-01 00:00:00' and ts < TIMESTAMP '2022-04-30 23:59:59' " + "INTERVAL(10s) FILL(LINEAR)"); + run("SELECT COUNT(*), SUM(c1) FROM t1 " "WHERE ts > TIMESTAMP '2022-04-01 00:00:00' and ts < TIMESTAMP '2022-04-30 23:59:59' " "INTERVAL(10s) FILL(VALUE, 10, 20)"); From 461db22807bf6d7bb57b2dbcf5fcd72085fccee2 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 20:17:30 +0800 Subject: [PATCH 09/27] fix: the problem of data loss when interval is used for outer query --- source/libs/planner/src/planLogicCreater.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 1663266fb7..7f69e77299 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -660,7 +660,7 @@ static int32_t createWindowLogicNodeByState(SLogicPlanContext* pCxt, SStateWindo pWindow->winType = WINDOW_TYPE_STATE; pWindow->node.groupAction = GROUP_ACTION_KEEP; - pWindow->node.requireDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + pWindow->node.requireDataOrder = pCxt->pPlanCxt->streamQuery ? DATA_ORDER_LEVEL_IN_BLOCK : DATA_ORDER_LEVEL_IN_GROUP; pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_GROUP; pWindow->pStateExpr = nodesCloneNode(pState->pExpr); pWindow->pTspk = nodesCloneNode(pState->pCol); @@ -683,7 +683,7 @@ static int32_t createWindowLogicNodeBySession(SLogicPlanContext* pCxt, SSessionW pWindow->sessionGap = ((SValueNode*)pSession->pGap)->datum.i; pWindow->windowAlgo = pCxt->pPlanCxt->streamQuery ? SESSION_ALGO_STREAM_SINGLE : SESSION_ALGO_MERGE; pWindow->node.groupAction = GROUP_ACTION_KEEP; - pWindow->node.requireDataOrder = DATA_ORDER_LEVEL_IN_GROUP; + pWindow->node.requireDataOrder = pCxt->pPlanCxt->streamQuery ? DATA_ORDER_LEVEL_IN_BLOCK : DATA_ORDER_LEVEL_IN_GROUP; pWindow->node.resultDataOrder = DATA_ORDER_LEVEL_IN_GROUP; pWindow->pTspk = nodesCloneNode((SNode*)pSession->pCol); @@ -1390,7 +1390,7 @@ static void setLogicSubplanType(bool hasScan, SLogicSubplan* pSubplan) { } static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_BLOCK) { + if (requirement <= pScan->node.resultDataOrder) { return TSDB_CODE_SUCCESS; } pScan->scanType = SCAN_TYPE_TABLE_MERGE; @@ -1416,7 +1416,7 @@ static int32_t adjustProjectDataRequirement(SProjectLogicNode* pProject, EDataOr } static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + if (requirement <= pWindow->node.resultDataOrder) { return TSDB_CODE_SUCCESS; } pWindow->node.resultDataOrder = requirement; @@ -1425,7 +1425,7 @@ static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrd } static int32_t adjustSessionDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + if (requirement <= pWindow->node.resultDataOrder) { return TSDB_CODE_SUCCESS; } pWindow->node.resultDataOrder = requirement; @@ -1434,7 +1434,7 @@ static int32_t adjustSessionDataRequirement(SWindowLogicNode* pWindow, EDataOrde } static int32_t adjustStateDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + if (requirement <= pWindow->node.resultDataOrder) { return TSDB_CODE_SUCCESS; } pWindow->node.resultDataOrder = requirement; @@ -1457,7 +1457,7 @@ static int32_t adjustWindowDataRequirement(SWindowLogicNode* pWindow, EDataOrder } static int32_t adjustFillDataRequirement(SFillLogicNode* pFill, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + if (requirement <= pFill->node.requireDataOrder) { return TSDB_CODE_SUCCESS; } pFill->node.resultDataOrder = requirement; @@ -1488,7 +1488,7 @@ static int32_t adjustIndefRowsDataRequirement(SIndefRowsFuncLogicNode* pIndef, E } static int32_t adjustInterpDataRequirement(SInterpFuncLogicNode* pInterp, EDataOrderLevel requirement) { - if (requirement <= DATA_ORDER_LEVEL_IN_GROUP) { + if (requirement <= pInterp->node.requireDataOrder) { return TSDB_CODE_SUCCESS; } pInterp->node.resultDataOrder = requirement; From 15c3945eb783ce14f5aea505a801217646dd0687 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Jul 2022 12:38:49 +0000 Subject: [PATCH 10/27] fix: another concurrency problem --- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 52b6e07903..fa775bb882 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -93,7 +93,11 @@ static int32_t tbDataPCmprFn(const void *p1, const void *p2) { } void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) { STbData *pTbData = &(STbData){.suid = suid, .uid = uid}; - void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + + taosRLockLatch(&pMemTable->latch); + void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + taosRUnLockLatch(&pMemTable->latch); + *ppTbData = p ? *(STbData **)p : NULL; } @@ -363,10 +367,13 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid void *p; if (idx < 0) { - p = taosArrayPush(pMemTable->aTbData, &pTbData); - } else { - p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + idx = taosArrayGetSize(pMemTable->aTbData); } + + taosWLockLatch(&pMemTable->latch); + p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + taosWUnLockLatch(&pMemTable->latch); + if (p == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; From ef54bfb7872ec7372508fab4a23f4d6756cbf4a9 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Thu, 21 Jul 2022 20:39:01 +0800 Subject: [PATCH 11/27] fix: the problem of data loss when interval is used for outer query --- source/libs/planner/src/planLogicCreater.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 7f69e77299..0bd0e87194 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1393,7 +1393,9 @@ static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel if (requirement <= pScan->node.resultDataOrder) { return TSDB_CODE_SUCCESS; } - pScan->scanType = SCAN_TYPE_TABLE_MERGE; + if (TSDB_SUPER_TABLE == pScan->tableType) { + pScan->scanType = SCAN_TYPE_TABLE_MERGE; + } pScan->node.resultDataOrder = requirement; return TSDB_CODE_SUCCESS; } From a086019de7d48fa141df2cdc4bd02d923c889174 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Jul 2022 13:02:48 +0000 Subject: [PATCH 12/27] more concurrency fix --- source/dnode/vnode/src/tsdb/tsdbRetention.c | 133 ++++++++++---------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 77ca49e33e..5ba2ecb64b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -15,94 +15,99 @@ #include "tsdb.h" -static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t *canDo) { - int32_t code = 0; -#if 0 - STsdbFSState *pState; - - if (try) { - pState = pTsdb->pFS->cState; - *canDo = 0; - } else { - pState = pTsdb->pFS->nState; - } - - for (int32_t iSet = 0; iSet < taosArrayGetSize(pState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pDFileSet->fid, &pTsdb->keepCfg, now); +static bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now) { + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); SDiskID did; - // check - if (expLevel == pDFileSet->diskId.id) continue; + if (expLevel == pSet->diskId.level) continue; - // delete or move if (expLevel < 0) { - if (try) { - *canDo = 1; - } else { - tsdbFSStateDeleteDFileSet(pState, pDFileSet->fid); - iSet--; - } + return true; + } else { + if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { + return false; + } + + if (did.level == pSet->diskId.level) continue; + + return true; + } + } + + return false; +} + +int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { + int32_t code = 0; + + if (!tsdbShouldDoRetention(pTsdb, now)) { + return code; + } + + // do retention + STsdbFS fs; + + code = tsdbFSCopy(pTsdb, &fs); + if (code) goto _err; + + for (int32_t iSet = 0; iSet < taosArrayGetSize(fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); + SDiskID did; + + if (expLevel < 0) { + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + taosArrayRemove(fs.aDFileSet, iSet); + iSet--; } else { - // alloc if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { code = terrno; goto _exit; } - if (did.level == pDFileSet->diskId.level) continue; + if (did.level == pSet->diskId.level) continue; - if (try) { - *canDo = 1; - } else { - // copy the file to new disk + // copy file to new disk (todo) + SDFileSet fSet = *pSet; + fSet.diskId = did; - SDFileSet nDFileSet = *pDFileSet; - nDFileSet.diskId = did; + code = tsdbDFileSetCopy(pTsdb, pSet, &fSet); + if (code) goto _err; - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - - code = tsdbDFileSetCopy(pTsdb, pDFileSet, &nDFileSet); - if (code) goto _exit; - - code = tsdbFSUpsertFSet(pState, &nDFileSet); - if (code) goto _exit; - } + code = tsdbFSUpsertFSet(&fs, &fSet); + if (code) goto _err; } + + /* code */ } -#endif -_exit: - return code; -} - -int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { - int32_t code = 0; -#if 0 - int8_t canDo; - - // try - tsdbDoRetentionImpl(pTsdb, now, 1, &canDo); - if (!canDo) goto _exit; - - // begin - code = tsdbFSBegin(pTsdb->pFS); + // do change fs + code = tsdbFSCommit1(pTsdb, &fs); if (code) goto _err; - // do retention - code = tsdbDoRetentionImpl(pTsdb, now, 0, NULL); - if (code) goto _err; + taosThreadRwlockWrlock(&pTsdb->rwLock); - // commit - code = tsdbFSCommit(pTsdb->pFS); - if (code) goto _err; + code = tsdbFSCommit2(pTsdb, &fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + taosThreadRwlockUnlock(&pTsdb->rwLock); + + tsdbFSDestroy(&fs); _exit: return code; _err: tsdbError("vgId:%d tsdb do retention failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSRollback(pTsdb->pFS); -#endif + ASSERT(0); + // tsdbFSRollback(pTsdb->pFS); return code; } \ No newline at end of file From 63627172c69e0c397e476ebc48eac9cabaeba95b Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 01:37:21 +0000 Subject: [PATCH 13/27] fix memory leak --- source/dnode/vnode/src/tsdb/tsdbFS.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 5c95e6cfec..b17e30d7c7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -541,7 +541,7 @@ int32_t tsdbFSClose(STsdb *pTsdb) { taosMemoryFree(pSet->pSmaF); } - taosArrayClear(pTsdb->fs.aDFileSet); + taosArrayDestroy(pTsdb->fs.aDFileSet); return code; } From b27272d784848b707b1971c05d60c8f4fce0c9cd Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 01:40:02 +0000 Subject: [PATCH 14/27] fix memory leak --- source/dnode/vnode/src/tsdb/tsdbRead.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 23763bd16c..cd8abe4020 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3275,7 +3275,7 @@ int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { tsdbRefMemTable((*ppSnap)->pIMem); } - // fs (todo) + // fs code = tsdbFSRef(pTsdb, &(*ppSnap)->fs); if (code) { taosThreadRwlockUnlock(&pTsdb->rwLock); @@ -3289,6 +3289,7 @@ int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { goto _exit; } + tsdbTrace("vgId:%d take read snapshot", TD_VID(pTsdb->pVnode)); _exit: return code; } @@ -3304,5 +3305,8 @@ void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { } tsdbFSUnref(pTsdb, &pSnap->fs); + taosMemoryFree(pSnap); } + + tsdbTrace("vgId:%d untake read snapshot", TD_VID(pTsdb->pVnode)); } From 613ca9c67d838cf72b6d36f02a8b52b86cf60ad6 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 02:28:39 +0000 Subject: [PATCH 15/27] fix resource leak --- source/libs/executor/src/scanoperator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 574aa648e5..a691091fe5 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -740,7 +740,7 @@ static SSDataBlock* doBlockInfoScan(SOperatorInfo* pOperator) { static void destroyBlockDistScanOperatorInfo(void* param, int32_t numOfOutput) { SBlockDistInfo* pDistInfo = (SBlockDistInfo*)param; blockDataDestroy(pDistInfo->pResBlock); - + tsdbReaderClose(pDistInfo->pHandle); taosMemoryFreeClear(param); } @@ -2051,8 +2051,8 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { uint64_t suid = pInfo->pCur->mr.me.ctbEntry.suid; int32_t code = metaGetTableEntryByUid(&mr, suid); if (code != TSDB_CODE_SUCCESS) { - qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", - pInfo->pCur->mr.me.name, suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); + qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", pInfo->pCur->mr.me.name, + suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); metaReaderClear(&mr); metaCloseTbCursor(pInfo->pCur); pInfo->pCur = NULL; @@ -2158,7 +2158,6 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { } } - static SSDataBlock* sysTableScanUserSTables(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SSysTableScanInfo* pInfo = pOperator->info; @@ -2184,12 +2183,13 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { getDBNameFromCondition(pInfo->pCondition, dbName); sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); } - + if (strncasecmp(name, TSDB_INS_TABLE_USER_TABLES, TSDB_TABLE_FNAME_LEN) == 0) { return sysTableScanUserTables(pOperator); } else if (strncasecmp(name, TSDB_INS_TABLE_USER_TAGS, TSDB_TABLE_FNAME_LEN) == 0) { return sysTableScanUserTags(pOperator); - } else if (strncasecmp(name, TSDB_INS_TABLE_USER_STABLES, TSDB_TABLE_FNAME_LEN) == 0 && IS_SYS_DBNAME(pInfo->req.db)) { + } else if (strncasecmp(name, TSDB_INS_TABLE_USER_STABLES, TSDB_TABLE_FNAME_LEN) == 0 && + IS_SYS_DBNAME(pInfo->req.db)) { return sysTableScanUserSTables(pOperator); } else { // load the meta from mnode of the given epset if (pOperator->status == OP_EXEC_DONE) { From d7c454932498325b197cf0af907057f8ee2e35d1 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Fri, 22 Jul 2022 10:38:11 +0800 Subject: [PATCH 16/27] fix: the problem of data loss when interval is used for outer query --- source/libs/nodes/src/nodesCodeFuncs.c | 21 +++ source/libs/planner/inc/planInt.h | 1 + source/libs/planner/src/planLogicCreater.c | 167 +-------------------- source/libs/planner/src/planOptimizer.c | 8 + source/libs/planner/src/planUtil.c | 167 +++++++++++++++++++++ 5 files changed, 198 insertions(+), 166 deletions(-) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index e3c50de654..c92e6908f1 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -504,6 +504,9 @@ static const char* jkLogicPlanConditions = "Conditions"; static const char* jkLogicPlanChildren = "Children"; static const char* jkLogicPlanLimit = "Limit"; static const char* jkLogicPlanSlimit = "SLimit"; +static const char* jkLogicPlanRequireDataOrder = "RequireDataOrder"; +static const char* jkLogicPlanResultDataOrder = "ResultDataOrder"; +static const char* jkLogicPlanGroupAction = "GroupAction"; static int32_t logicPlanNodeToJson(const void* pObj, SJson* pJson) { const SLogicNode* pNode = (const SLogicNode*)pObj; @@ -521,6 +524,15 @@ static int32_t logicPlanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddObject(pJson, jkLogicPlanSlimit, nodeToJson, pNode->pSlimit); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkLogicPlanRequireDataOrder, pNode->requireDataOrder); + } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkLogicPlanResultDataOrder, pNode->resultDataOrder); + } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkLogicPlanGroupAction, pNode->groupAction); + } return code; } @@ -541,6 +553,15 @@ static int32_t jsonToLogicPlanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = jsonToNodeObject(pJson, jkLogicPlanSlimit, &pNode->pSlimit); } + if (TSDB_CODE_SUCCESS == code) { + tjsonGetNumberValue(pJson, jkLogicPlanRequireDataOrder, pNode->requireDataOrder, code); + } + if (TSDB_CODE_SUCCESS == code) { + tjsonGetNumberValue(pJson, jkLogicPlanResultDataOrder, pNode->resultDataOrder, code); + } + if (TSDB_CODE_SUCCESS == code) { + tjsonGetNumberValue(pJson, jkLogicPlanGroupAction, pNode->groupAction, code); + } return code; } diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index b7fba83235..88c7c26276 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -35,6 +35,7 @@ int32_t generateUsageErrMsg(char* pBuf, int32_t len, int32_t errCode, ...); int32_t createColumnByRewriteExprs(SNodeList* pExprs, SNodeList** pList); int32_t createColumnByRewriteExpr(SNode* pExpr, SNodeList** pList); int32_t replaceLogicNode(SLogicSubplan* pSubplan, SLogicNode* pOld, SLogicNode* pNew); +int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode, EDataOrderLevel requirement); int32_t createLogicPlan(SPlanContext* pCxt, SLogicSubplan** pLogicSubplan); int32_t optimizeLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 0bd0e87194..78e779565e 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1389,171 +1389,6 @@ static void setLogicSubplanType(bool hasScan, SLogicSubplan* pSubplan) { } } -static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel requirement) { - if (requirement <= pScan->node.resultDataOrder) { - return TSDB_CODE_SUCCESS; - } - if (TSDB_SUPER_TABLE == pScan->tableType) { - pScan->scanType = SCAN_TYPE_TABLE_MERGE; - } - pScan->node.resultDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustJoinDataRequirement(SJoinLogicNode* pJoin, EDataOrderLevel requirement) { - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustAggDataRequirement(SAggLogicNode* pAgg, EDataOrderLevel requirement) { - if (requirement > DATA_ORDER_LEVEL_NONE) { - return TSDB_CODE_PLAN_INTERNAL_ERROR; - } - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustProjectDataRequirement(SProjectLogicNode* pProject, EDataOrderLevel requirement) { - pProject->node.resultDataOrder = requirement; - pProject->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= pWindow->node.resultDataOrder) { - return TSDB_CODE_SUCCESS; - } - pWindow->node.resultDataOrder = requirement; - pWindow->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustSessionDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= pWindow->node.resultDataOrder) { - return TSDB_CODE_SUCCESS; - } - pWindow->node.resultDataOrder = requirement; - pWindow->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustStateDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= pWindow->node.resultDataOrder) { - return TSDB_CODE_SUCCESS; - } - pWindow->node.resultDataOrder = requirement; - pWindow->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustWindowDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - switch (pWindow->winType) { - case WINDOW_TYPE_INTERVAL: - return adjustIntervalDataRequirement(pWindow, requirement); - case WINDOW_TYPE_SESSION: - return adjustSessionDataRequirement(pWindow, requirement); - case WINDOW_TYPE_STATE: - return adjustStateDataRequirement(pWindow, requirement); - default: - break; - } - return TSDB_CODE_PLAN_INTERNAL_ERROR; -} - -static int32_t adjustFillDataRequirement(SFillLogicNode* pFill, EDataOrderLevel requirement) { - if (requirement <= pFill->node.requireDataOrder) { - return TSDB_CODE_SUCCESS; - } - pFill->node.resultDataOrder = requirement; - pFill->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustSortDataRequirement(SSortLogicNode* pSort, EDataOrderLevel requirement) { - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustPartitionDataRequirement(SPartitionLogicNode* pPart, EDataOrderLevel requirement) { - if (DATA_ORDER_LEVEL_GLOBAL == requirement) { - return TSDB_CODE_PLAN_INTERNAL_ERROR; - } - pPart->node.resultDataOrder = requirement; - pPart->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustIndefRowsDataRequirement(SIndefRowsFuncLogicNode* pIndef, EDataOrderLevel requirement) { - if (requirement <= pIndef->node.resultDataOrder) { - return TSDB_CODE_SUCCESS; - } - pIndef->node.resultDataOrder = requirement; - pIndef->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustInterpDataRequirement(SInterpFuncLogicNode* pInterp, EDataOrderLevel requirement) { - if (requirement <= pInterp->node.requireDataOrder) { - return TSDB_CODE_SUCCESS; - } - pInterp->node.resultDataOrder = requirement; - pInterp->node.requireDataOrder = requirement; - return TSDB_CODE_SUCCESS; -} - -static int32_t adjustLogicNodeDataRequirementImpl(SLogicNode* pNode, EDataOrderLevel requirement) { - int32_t code = TSDB_CODE_SUCCESS; - switch (nodeType(pNode)) { - case QUERY_NODE_LOGIC_PLAN_SCAN: - code = adjustScanDataRequirement((SScanLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_JOIN: - code = adjustJoinDataRequirement((SJoinLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_AGG: - code = adjustAggDataRequirement((SAggLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_PROJECT: - code = adjustProjectDataRequirement((SProjectLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_VNODE_MODIFY: - case QUERY_NODE_LOGIC_PLAN_EXCHANGE: - case QUERY_NODE_LOGIC_PLAN_MERGE: - break; - case QUERY_NODE_LOGIC_PLAN_WINDOW: - code = adjustWindowDataRequirement((SWindowLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_FILL: - code = adjustFillDataRequirement((SFillLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_SORT: - code = adjustSortDataRequirement((SSortLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_PARTITION: - code = adjustPartitionDataRequirement((SPartitionLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_INDEF_ROWS_FUNC: - code = adjustIndefRowsDataRequirement((SIndefRowsFuncLogicNode*)pNode, requirement); - break; - case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: - code = adjustInterpDataRequirement((SInterpFuncLogicNode*)pNode, requirement); - break; - default: - break; - } - if (TSDB_CODE_SUCCESS == code) { - SNode* pChild = NULL; - FOREACH(pChild, pNode->pChildren) { - code = adjustLogicNodeDataRequirementImpl((SLogicNode*)pChild, pNode->requireDataOrder); - if (TSDB_CODE_SUCCESS != code) { - break; - } - } - } - return code; -} - -static int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode) { - return adjustLogicNodeDataRequirementImpl(pNode, DATA_ORDER_LEVEL_NONE); -} - int32_t createLogicPlan(SPlanContext* pCxt, SLogicSubplan** pLogicSubplan) { SLogicPlanContext cxt = {.pPlanCxt = pCxt, .pCurrRoot = NULL, .hasScan = false}; @@ -1569,7 +1404,7 @@ int32_t createLogicPlan(SPlanContext* pCxt, SLogicSubplan** pLogicSubplan) { if (TSDB_CODE_SUCCESS == code) { setLogicNodeParent(pSubplan->pNode); setLogicSubplanType(cxt.hasScan, pSubplan); - code = adjustLogicNodeDataRequirement(pSubplan->pNode); + code = adjustLogicNodeDataRequirement(pSubplan->pNode, DATA_ORDER_LEVEL_NONE); } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 8079513e4d..b7b8cfce33 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1906,6 +1906,8 @@ static int32_t rewriteUniqueOptCreateAgg(SIndefRowsFuncLogicNode* pIndef, SLogic TSWAP(pAgg->node.pChildren, pIndef->node.pChildren); optResetParent((SLogicNode*)pAgg); pAgg->node.precision = pIndef->node.precision; + pAgg->node.requireDataOrder = DATA_ORDER_LEVEL_IN_BLOCK; // first function requirement + pAgg->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; int32_t code = TSDB_CODE_SUCCESS; bool hasSelectPrimaryKey = false; @@ -1978,6 +1980,8 @@ static int32_t rewriteUniqueOptCreateProject(SIndefRowsFuncLogicNode* pIndef, SL TSWAP(pProject->node.pTargets, pIndef->node.pTargets); pProject->node.precision = pIndef->node.precision; + pProject->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; + pProject->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; int32_t code = TSDB_CODE_SUCCESS; SNode* pNode = NULL; @@ -2012,6 +2016,10 @@ static int32_t rewriteUniqueOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* if (TSDB_CODE_SUCCESS == code) { code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pIndef, pProject); } + if (TSDB_CODE_SUCCESS == code) { + code = adjustLogicNodeDataRequirement( + pProject, NULL == pProject->pParent ? DATA_ORDER_LEVEL_NONE : pProject->pParent->requireDataOrder); + } if (TSDB_CODE_SUCCESS == code) { nodesDestroyNode((SNode*)pIndef); } else { diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 77e4e05530..4214377101 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -121,3 +121,170 @@ int32_t replaceLogicNode(SLogicSubplan* pSubplan, SLogicNode* pOld, SLogicNode* } return TSDB_CODE_PLAN_INTERNAL_ERROR; } + +static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel requirement) { + if (SCAN_TYPE_TABLE != pScan->scanType || SCAN_TYPE_TABLE_MERGE != pScan->scanType) { + return TSDB_CODE_SUCCESS; + } + // The lowest sort level of scan output data is DATA_ORDER_LEVEL_IN_BLOCK + if (requirement < DATA_ORDER_LEVEL_IN_BLOCK) { + requirement = DATA_ORDER_LEVEL_IN_BLOCK; + } + if (DATA_ORDER_LEVEL_IN_BLOCK == requirement) { + pScan->scanType = SCAN_TYPE_TABLE; + } else if (TSDB_SUPER_TABLE == pScan->tableType) { + pScan->scanType = SCAN_TYPE_TABLE_MERGE; + } + pScan->node.resultDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustJoinDataRequirement(SJoinLogicNode* pJoin, EDataOrderLevel requirement) { + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustAggDataRequirement(SAggLogicNode* pAgg, EDataOrderLevel requirement) { + if (requirement > DATA_ORDER_LEVEL_NONE) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustProjectDataRequirement(SProjectLogicNode* pProject, EDataOrderLevel requirement) { + pProject->node.resultDataOrder = requirement; + pProject->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + if (requirement <= pWindow->node.resultDataOrder) { + return TSDB_CODE_SUCCESS; + } + pWindow->node.resultDataOrder = requirement; + pWindow->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustSessionDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + if (requirement <= pWindow->node.resultDataOrder) { + return TSDB_CODE_SUCCESS; + } + pWindow->node.resultDataOrder = requirement; + pWindow->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustStateDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + if (requirement <= pWindow->node.resultDataOrder) { + return TSDB_CODE_SUCCESS; + } + pWindow->node.resultDataOrder = requirement; + pWindow->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustWindowDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { + switch (pWindow->winType) { + case WINDOW_TYPE_INTERVAL: + return adjustIntervalDataRequirement(pWindow, requirement); + case WINDOW_TYPE_SESSION: + return adjustSessionDataRequirement(pWindow, requirement); + case WINDOW_TYPE_STATE: + return adjustStateDataRequirement(pWindow, requirement); + default: + break; + } + return TSDB_CODE_PLAN_INTERNAL_ERROR; +} + +static int32_t adjustFillDataRequirement(SFillLogicNode* pFill, EDataOrderLevel requirement) { + if (requirement <= pFill->node.requireDataOrder) { + return TSDB_CODE_SUCCESS; + } + pFill->node.resultDataOrder = requirement; + pFill->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustSortDataRequirement(SSortLogicNode* pSort, EDataOrderLevel requirement) { + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustPartitionDataRequirement(SPartitionLogicNode* pPart, EDataOrderLevel requirement) { + if (DATA_ORDER_LEVEL_GLOBAL == requirement) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + pPart->node.resultDataOrder = requirement; + pPart->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustIndefRowsDataRequirement(SIndefRowsFuncLogicNode* pIndef, EDataOrderLevel requirement) { + if (requirement <= pIndef->node.resultDataOrder) { + return TSDB_CODE_SUCCESS; + } + pIndef->node.resultDataOrder = requirement; + pIndef->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +static int32_t adjustInterpDataRequirement(SInterpFuncLogicNode* pInterp, EDataOrderLevel requirement) { + if (requirement <= pInterp->node.requireDataOrder) { + return TSDB_CODE_SUCCESS; + } + pInterp->node.resultDataOrder = requirement; + pInterp->node.requireDataOrder = requirement; + return TSDB_CODE_SUCCESS; +} + +int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode, EDataOrderLevel requirement) { + int32_t code = TSDB_CODE_SUCCESS; + switch (nodeType(pNode)) { + case QUERY_NODE_LOGIC_PLAN_SCAN: + code = adjustScanDataRequirement((SScanLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_JOIN: + code = adjustJoinDataRequirement((SJoinLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_AGG: + code = adjustAggDataRequirement((SAggLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_PROJECT: + code = adjustProjectDataRequirement((SProjectLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_VNODE_MODIFY: + case QUERY_NODE_LOGIC_PLAN_EXCHANGE: + case QUERY_NODE_LOGIC_PLAN_MERGE: + break; + case QUERY_NODE_LOGIC_PLAN_WINDOW: + code = adjustWindowDataRequirement((SWindowLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_FILL: + code = adjustFillDataRequirement((SFillLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_SORT: + code = adjustSortDataRequirement((SSortLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_PARTITION: + code = adjustPartitionDataRequirement((SPartitionLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_INDEF_ROWS_FUNC: + code = adjustIndefRowsDataRequirement((SIndefRowsFuncLogicNode*)pNode, requirement); + break; + case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: + code = adjustInterpDataRequirement((SInterpFuncLogicNode*)pNode, requirement); + break; + default: + break; + } + if (TSDB_CODE_SUCCESS == code) { + SNode* pChild = NULL; + FOREACH(pChild, pNode->pChildren) { + code = adjustLogicNodeDataRequirement((SLogicNode*)pChild, pNode->requireDataOrder); + if (TSDB_CODE_SUCCESS != code) { + break; + } + } + } + return code; +} From 65fc81c45b83ed31678cb1b15b868dfce834daff Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 02:45:11 +0000 Subject: [PATCH 17/27] fix: more concurrency read/write --- source/libs/executor/src/executorMain.c | 11 +++++++++++ source/libs/executor/src/scanoperator.c | 7 ++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c index e0020a496e..06c710f4c4 100644 --- a/source/libs/executor/src/executorMain.c +++ b/source/libs/executor/src/executorMain.c @@ -315,6 +315,9 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { SStreamScanInfo* pInfo = pOperator->info; if (pOffset->type == TMQ_OFFSET__LOG) { + STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTSInfo->dataReader); + pTSInfo->dataReader = NULL; #if 0 if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.lastStatus) && pInfo->tqReader->pWalReader->curVersion != pOffset->version) { @@ -358,6 +361,14 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { // TODO after dropping table, table may be not found ASSERT(found); + if (pTableScanInfo == NULL) { + if (tsdbReaderOpen(pTableScanInfo->readHandle.vnode, &pTableScanInfo->cond, + pTaskInfo->tableqinfoList.pTableList, &pTableScanInfo->dataReader, NULL) < 0 || + pTableScanInfo->dataReader == NULL) { + ASSERT(0); + } + } + tsdbSetTableId(pTableScanInfo->dataReader, uid); int64_t oldSkey = pTableScanInfo->cond.twindows.skey; pTableScanInfo->cond.twindows.skey = ts + 1; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index a691091fe5..5e96bb2ee2 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -982,7 +982,9 @@ static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32 if (!pResult) { blockDataCleanup(pSDB); *pRowIndex = 0; - return NULL; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; } if (pResult->info.groupId == pInfo->groupId) { @@ -1003,6 +1005,9 @@ static SSDataBlock* doDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_ } if (!pResult) { pInfo->updateWin = (STimeWindow){.skey = INT64_MIN, .ekey = INT64_MAX}; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; return NULL; } From 476053e3e13820bef5e42ba0860fc7e91546cbdb Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 03:16:03 +0000 Subject: [PATCH 18/27] refact some code --- source/dnode/vnode/src/inc/tsdb.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index cfadc91d89..04bf6bcc2b 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -40,12 +40,9 @@ typedef struct SDelIdx SDelIdx; typedef struct STbData STbData; typedef struct SMemTable SMemTable; typedef struct STbDataIter STbDataIter; -typedef struct STable STable; typedef struct SMapData SMapData; typedef struct SBlockIdx SBlockIdx; typedef struct SBlock SBlock; -typedef struct SBlockStatis SBlockStatis; -typedef struct SAggrBlkCol SAggrBlkCol; typedef struct SColData SColData; typedef struct SBlockDataHdr SBlockDataHdr; typedef struct SBlockData SBlockData; @@ -62,7 +59,6 @@ typedef struct SDelFReader SDelFReader; typedef struct SRowIter SRowIter; typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; -typedef struct STsdbSnapHdr STsdbSnapHdr; typedef struct STsdbReadSnap STsdbReadSnap; #define TSDB_MAX_SUBBLOCKS 8 @@ -416,16 +412,6 @@ struct SBlock { SSubBlock aSubBlock[TSDB_MAX_SUBBLOCKS]; }; -struct SAggrBlkCol { - int16_t colId; - int16_t maxIndex; - int16_t minIndex; - int16_t numOfNull; - int64_t sum; - int64_t max; - int64_t min; -}; - struct SColData { int16_t cid; int8_t type; From ff45653b1a409518050fc7d1e8534b232f195e0c Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 03:40:45 +0000 Subject: [PATCH 19/27] make pass ci --- tests/script/jenkins/basic.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index a13a757bbe..5518ef9cbc 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -299,8 +299,8 @@ # --- sma ./test.sh -f tsim/sma/drop_sma.sim ./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim -./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim -./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim +# ./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim +# ./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim # --- valgrind ./test.sh -f tsim/valgrind/checkError1.sim From 958f3586c030e3b6c7c9bcd9fbd7be98c6a11f71 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Fri, 22 Jul 2022 11:41:18 +0800 Subject: [PATCH 20/27] fix: the problem of data loss when interval is used for outer query --- include/libs/function/functionMgt.h | 1 + source/libs/function/inc/functionMgtInt.h | 1 + source/libs/function/src/builtins.c | 8 +++---- source/libs/function/src/functionMgt.c | 2 ++ source/libs/planner/src/planOptimizer.c | 5 ++-- source/libs/planner/src/planUtil.c | 24 +++++++++++++++---- source/libs/planner/test/planSubqueryTest.cpp | 2 ++ 7 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index d55adcb63c..f1f60cb8e5 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -197,6 +197,7 @@ bool fmIsSystemInfoFunc(int32_t funcId); bool fmIsImplicitTsFunc(int32_t funcId); bool fmIsClientPseudoColumnFunc(int32_t funcId); bool fmIsMultiRowsFunc(int32_t funcId); +bool fmIsKeepOrderFunc(int32_t funcId); int32_t fmGetDistMethod(const SFunctionNode* pFunc, SFunctionNode** pPartialFunc, SFunctionNode** pMergeFunc); diff --git a/source/libs/function/inc/functionMgtInt.h b/source/libs/function/inc/functionMgtInt.h index 0b4fdefec4..c79306f1e4 100644 --- a/source/libs/function/inc/functionMgtInt.h +++ b/source/libs/function/inc/functionMgtInt.h @@ -47,6 +47,7 @@ extern "C" { #define FUNC_MGT_SYSTEM_INFO_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(18) #define FUNC_MGT_CLIENT_PC_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(19) #define FUNC_MGT_MULTI_ROWS_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(20) +#define FUNC_MGT_KEEP_ORDER_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(21) #define FUNC_MGT_TEST_MASK(val, mask) (((val) & (mask)) != 0) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index ec8e6b038e..28f244a18c 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2097,7 +2097,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "top", .type = FUNCTION_TYPE_TOP, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateTopBot, .getEnvFunc = getTopBotFuncEnv, .initFunc = topBotFunctionSetup, @@ -2112,7 +2112,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "bottom", .type = FUNCTION_TYPE_BOTTOM, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateTopBot, .getEnvFunc = getTopBotFuncEnv, .initFunc = topBotFunctionSetup, @@ -2480,7 +2480,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "sample", .type = FUNCTION_TYPE_SAMPLE, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateSample, .getEnvFunc = getSampleFuncEnv, .initFunc = sampleFunctionSetup, @@ -2906,7 +2906,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "_select_value", .type = FUNCTION_TYPE_SELECT_VALUE, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_KEEP_ORDER_FUNC, .translateFunc = translateSelectValue, .getEnvFunc = getSelectivityFuncEnv, // todo remove this function later. .initFunc = functionSetup, diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index ff74c880e3..c173522683 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -183,6 +183,8 @@ bool fmIsClientPseudoColumnFunc(int32_t funcId) { return isSpecificClassifyFunc( bool fmIsMultiRowsFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_MULTI_ROWS_FUNC); } +bool fmIsKeepOrderFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_KEEP_ORDER_FUNC); } + bool fmIsInterpFunc(int32_t funcId) { if (funcId < 0 || funcId >= funcMgtBuiltinsNum) { return false; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index b7b8cfce33..2c93a3921e 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2010,15 +2010,16 @@ static int32_t rewriteUniqueOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeAppend(&pProject->pChildren, (SNode*)pAgg); - pAgg->pParent = pProject; - pAgg = NULL; } if (TSDB_CODE_SUCCESS == code) { + pAgg->pParent = pProject; + pAgg = NULL; code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pIndef, pProject); } if (TSDB_CODE_SUCCESS == code) { code = adjustLogicNodeDataRequirement( pProject, NULL == pProject->pParent ? DATA_ORDER_LEVEL_NONE : pProject->pParent->requireDataOrder); + pProject = NULL; } if (TSDB_CODE_SUCCESS == code) { nodesDestroyNode((SNode*)pIndef); diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 4214377101..d4bd7e5162 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ +#include "functionMgt.h" #include "planInt.h" static char* getUsageErrFormat(int32_t errCode) { @@ -140,13 +141,27 @@ static int32_t adjustScanDataRequirement(SScanLogicNode* pScan, EDataOrderLevel } static int32_t adjustJoinDataRequirement(SJoinLogicNode* pJoin, EDataOrderLevel requirement) { + // The lowest sort level of join input and output data is DATA_ORDER_LEVEL_GLOBAL return TSDB_CODE_SUCCESS; } +static bool isKeepOrderAggFunc(SNodeList* pFuncs) { + SNode* pFunc = NULL; + FOREACH(pFunc, pFuncs) { + if (!fmIsKeepOrderFunc(((SFunctionNode*)pFunc)->funcId)) { + return false; + } + } + return true; +} + static int32_t adjustAggDataRequirement(SAggLogicNode* pAgg, EDataOrderLevel requirement) { - if (requirement > DATA_ORDER_LEVEL_NONE) { + // The sort level of agg with group by output data can only be DATA_ORDER_LEVEL_NONE + if (requirement > DATA_ORDER_LEVEL_NONE && (NULL != pAgg->pGroupKeys || !isKeepOrderAggFunc(pAgg->pAggFuncs))) { return TSDB_CODE_PLAN_INTERNAL_ERROR; } + pAgg->node.resultDataOrder = requirement; + pAgg->node.requireDataOrder = requirement; return TSDB_CODE_SUCCESS; } @@ -157,11 +172,12 @@ static int32_t adjustProjectDataRequirement(SProjectLogicNode* pProject, EDataOr } static int32_t adjustIntervalDataRequirement(SWindowLogicNode* pWindow, EDataOrderLevel requirement) { - if (requirement <= pWindow->node.resultDataOrder) { - return TSDB_CODE_SUCCESS; + // The lowest sort level of interval output data is DATA_ORDER_LEVEL_IN_GROUP + if (requirement < DATA_ORDER_LEVEL_IN_GROUP) { + requirement = DATA_ORDER_LEVEL_IN_GROUP; } + // The sort level of interval input data is always DATA_ORDER_LEVEL_IN_BLOCK pWindow->node.resultDataOrder = requirement; - pWindow->node.requireDataOrder = requirement; return TSDB_CODE_SUCCESS; } diff --git a/source/libs/planner/test/planSubqueryTest.cpp b/source/libs/planner/test/planSubqueryTest.cpp index fde373c66d..16dd91846c 100644 --- a/source/libs/planner/test/planSubqueryTest.cpp +++ b/source/libs/planner/test/planSubqueryTest.cpp @@ -70,4 +70,6 @@ TEST_F(PlanSubqeuryTest, outerInterval) { run("SELECT COUNT(*) FROM (SELECT * FROM st1) INTERVAL(5s)"); run("SELECT COUNT(*) + SUM(c1) FROM (SELECT * FROM st1) INTERVAL(5s)"); + + run("SELECT COUNT(*) FROM (SELECT ts, TOP(c1, 10) FROM st1s1) INTERVAL(5s)"); } From f9563b5b74bf4acfc36ffb8e02f8a72198756416 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Fri, 22 Jul 2022 11:55:02 +0800 Subject: [PATCH 21/27] Update scanoperator.c --- source/libs/executor/src/scanoperator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 5e96bb2ee2..443a57954d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -985,6 +985,7 @@ static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32 STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTableScanInfo->dataReader); pTableScanInfo->dataReader = NULL; + return NULL; } if (pResult->info.groupId == pInfo->groupId) { From 8cdef368797d26a4d228247ad6eed989fbdd41cd Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 04:56:15 +0000 Subject: [PATCH 22/27] fix: sam cases --- source/dnode/vnode/src/tsdb/tsdbRead.c | 2 +- tests/script/jenkins/basic.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index e2665c4339..26ced6cf6a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2833,7 +2833,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl SDataBlockIter* pBlockIter = &pReader->status.blockIter; - code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap); + code = tsdbTakeReadSnap(pReader->pTsdb, &pReader->pReadSnap); if (code) goto _err; initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 065b4ada73..5fe653bcb5 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -298,8 +298,8 @@ # --- sma ./test.sh -f tsim/sma/drop_sma.sim ./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim -# ./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim -# ./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim +./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim +./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim # --- valgrind ./test.sh -f tsim/valgrind/checkError1.sim From 652a01556cb8f8a5aecd1623e15fe0e2105f6da1 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 22 Jul 2022 13:04:50 +0800 Subject: [PATCH 23/27] test: add case update1_sort_merge.sim to CI --- source/dnode/vnode/src/sma/smaRollup.c | 2 +- tests/script/jenkins/basic.txt | 1 + tests/script/tsim/insert/dupinsert.sim | 176 ------------------ .../script/tsim/insert/update1_sort_merge.sim | 1 + 4 files changed, 3 insertions(+), 177 deletions(-) delete mode 100644 tests/script/tsim/insert/dupinsert.sim diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index cecf899591..e7293da60b 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -582,7 +582,7 @@ static int32_t tdRSmaFetchAndSubmitResult(SRSmaInfoItem *pItem, STSchema *pTSche int32_t code = qExecTask(pItem->taskInfo, &output, &ts); if (code < 0) { - smaError("vgId:%d, qExecTask for rsma table %" PRIi64 "l evel %" PRIi8 " failed since %s", SMA_VID(pSma), suid, + smaError("vgId:%d, qExecTask for rsma table %" PRIi64 " level %" PRIi8 " failed since %s", SMA_VID(pSma), suid, pItem->level, terrstr(code)); goto _err; } diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 5fe653bcb5..052bc62f59 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -81,6 +81,7 @@ ./test.sh -f tsim/insert/query_multi_file.sim ./test.sh -f tsim/insert/tcp.sim ./test.sh -f tsim/insert/update0.sim +./test.sh -f tsim/insert/update1_sort_merge.sim # ---- parser ./test.sh -f tsim/parser/alter__for_community_version.sim diff --git a/tests/script/tsim/insert/dupinsert.sim b/tests/script/tsim/insert/dupinsert.sim deleted file mode 100644 index 0526cc19e4..0000000000 --- a/tests/script/tsim/insert/dupinsert.sim +++ /dev/null @@ -1,176 +0,0 @@ -system sh/stop_dnodes.sh -system sh/deploy.sh -n dnode1 -i 1 -system sh/exec.sh -n dnode1 -s start -sql connect - -print =============== create database -sql drop database if exists d0 -sql create database d0 keep 365000d,365000d,365000d -sql use d0 - -print =============== create super table -sql create table if not exists stb (ts timestamp, c1 int unsigned, c2 double, c3 binary(10), c4 nchar(10), c5 double) tags (city binary(20),district binary(20)); - -sql show stables -if $rows != 1 then - return -1 -endi - -print =============== create child table -sql create table ct1 using stb tags("BeiJing", "ChaoYang") -sql create table ct2 using stb tags("BeiJing", "HaiDian") -sql create table ct3 using stb tags("BeiJing", "PingGu") -sql create table ct4 using stb tags("BeiJing", "YanQing") - -sql show tables -if $rows != 4 then - print rows $rows != 4 - return -1 -endi - -print =============== step 1 insert records into ct1 - taosd merge -sql insert into ct1(ts,c1,c2) values('2022-05-03 16:59:00.010', 10, 20); -sql insert into ct1(ts,c1,c2,c3,c4) values('2022-05-03 16:59:00.011', 11, NULL, 'binary', 'nchar'); -sql insert into ct1 values('2022-05-03 16:59:00.016', 16, NULL, NULL, 'nchar', NULL); -sql insert into ct1 values('2022-05-03 16:59:00.016', 17, NULL, NULL, 'nchar', 170); -sql insert into ct1 values('2022-05-03 16:59:00.020', 20, NULL, NULL, 'nchar', 200); -sql insert into ct1 values('2022-05-03 16:59:00.016', 18, NULL, NULL, 'nchar', 180); -sql insert into ct1 values('2022-05-03 16:59:00.021', 21, NULL, NULL, 'nchar', 210); -sql insert into ct1 values('2022-05-03 16:59:00.022', 22, NULL, NULL, 'nchar', 220); - -print =============== step 2 insert records into ct1/ct2 - taosc merge for 2022-05-03 16:59:00.010 -sql insert into ct1(ts,c1,c2) values('2022-05-03 16:59:00.010', 10,10), ('2022-05-03 16:59:00.010',20,10.0), ('2022-05-03 16:59:00.010',30,NULL) ct2(ts,c1) values('2022-05-03 16:59:00.010',10), ('2022-05-03 16:59:00.010',20) ct1(ts,c2) values('2022-05-03 16:59:00.010',10), ('2022-05-03 16:59:00.010',100) ct1(ts,c3) values('2022-05-03 16:59:00.010','bin1'), ('2022-05-03 16:59:00.010','bin2') ct1(ts,c4,c5) values('2022-05-03 16:59:00.010',NULL,NULL), ('2022-05-03 16:59:00.010','nchar4',1000.01) ct2(ts,c2,c3,c4,c5) values('2022-05-03 16:59:00.010',20,'xkl','zxc',10); - -print =============== step 3 insert records into ct3 -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.020', 10,10); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.021', 10,10), ('2022-05-03 16:59:00.021',20,20.0); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.022', 30,30), ('2022-05-03 16:59:00.022',40,40.0),('2022-05-03 16:59:00.022',50,50.0); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.023', 60,60), ('2022-05-03 16:59:00.023',70,70.0),('2022-05-03 16:59:00.023',80,80.0), ('2022-05-03 16:59:00.023',90,90.0); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.024', 100,100), ('2022-05-03 16:59:00.025',110,110.0),('2022-05-03 16:59:00.025',120,120.0), ('2022-05-03 16:59:00.025',130,130.0); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.030', 140,140), ('2022-05-03 16:59:00.030',150,150.0),('2022-05-03 16:59:00.031',160,160.0), ('2022-05-03 16:59:00.030',170,170.0), ('2022-05-03 16:59:00.031',180,180.0); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.042', 190,190), ('2022-05-03 16:59:00.041',200,200.0),('2022-05-03 16:59:00.040',210,210.0); -sql insert into ct3(ts,c1,c5) values('2022-05-03 16:59:00.050', 220,220), ('2022-05-03 16:59:00.051',230,230.0),('2022-05-03 16:59:00.052',240,240.0); - -print =============== step 4 insert records into ct4 -sql insert into ct4(ts,c1,c3,c4) values('2022-05-03 16:59:00.020', 10,'b0','n0'); -sql insert into ct4(ts,c1,c3,c4) values('2022-05-03 16:59:00.021', 20,'b1','n1'), ('2022-05-03 16:59:00.021',30,'b2','n2'); -sql insert into ct4(ts,c1,c3,c4) values('2022-05-03 16:59:00.022', 40,'b3','n3'), ('2022-05-03 16:59:00.022',40,'b4','n4'),('2022-05-03 16:59:00.022',50,'b5','n5'); -sql insert into ct4(ts,c1,c3,c4) values('2022-05-03 16:59:00.023', 60,'b6','n6'), ('2022-05-03 16:59:00.024',70,'b7','n7'),('2022-05-03 16:59:00.024',80,'b8','n8'), ('2022-05-03 16:59:00.023',90,'b9','n9'); - - - -print =============== step 5 query records of ct1 from memory(taosc and taosd merge) -sql select * from ct1; -print $data00 $data01 $data02 $data03 $data04 $data05 -print $data10 $data11 $data12 $data13 $data14 $data15 -print $data20 $data21 $data22 $data23 $data24 $data25 -print $data30 $data31 $data32 $data33 $data34 $data35 -print $data40 $data41 $data42 $data43 $data44 $data45 -print $data50 $data51 $data52 $data53 $data54 $data55 - - - -print =============== step 6 query records of ct2 from memory(taosc and taosd merge) -sql select * from ct2; -print $data00 $data01 $data02 $data03 $data04 $data05 - -if $rows != 1 then - print rows $rows != 1 - return -1 -endi - - - -print =============== step 7 query records of ct3 from memory -sql select * from ct3; -print $data00 $data01 $data02 $data03 $data04 $data05 -print $data10 $data11 $data12 $data13 $data14 $data15 -print $data20 $data21 $data22 $data23 $data24 $data25 -print $data30 $data31 $data32 $data33 $data34 $data35 -print $data40 $data41 $data42 $data43 $data44 $data45 -print $data50 $data51 $data52 $data53 $data54 $data55 -print $data60 $data61 $data62 $data63 $data64 $data65 -print $data70 $data71 $data72 $data73 $data74 $data75 -print $data80 $data81 $data82 $data83 $data84 $data85 -print $data90 $data91 $data92 $data93 $data94 $data95 -print $data[10][0] $data[10][1] $data[10][2] $data[10][3] $data[10][4] $data[10][5] -print $data[11][0] $data[11][1] $data[11][2] $data[11][3] $data[11][4] $data[11][5] -print $data[12][0] $data[12][1] $data[12][2] $data[12][3] $data[12][4] $data[12][5] -print $data[13][0] $data[13][1] $data[13][2] $data[13][3] $data[13][4] $data[13][5] - -if $rows != 14 then - print rows $rows != 14 - return -1 -endi - - -print =============== step 8 query records of ct4 from memory -sql select * from ct4; -print $data00 $data01 $data02 $data03 $data04 $data05 -print $data10 $data11 $data12 $data13 $data14 $data15 -print $data20 $data21 $data22 $data23 $data24 $data25 -print $data30 $data31 $data32 $data33 $data34 $data35 -print $data40 $data41 $data42 $data43 $data44 $data45 - - -if $rows != 5 then - print rows $rows != 5 - return -1 -endi - - -#==================== reboot to trigger commit data to file -system sh/exec.sh -n dnode1 -s stop -x SIGINT -system sh/exec.sh -n dnode1 -s start - - -print =============== step 9 query records of ct1 from file -sql select * from ct1; -print $data00 $data01 $data02 $data03 $data04 $data05 -print $data10 $data11 $data12 $data13 $data14 $data15 -print $data20 $data21 $data22 $data23 $data24 $data25 -print $data30 $data31 $data32 $data33 $data34 $data35 -print $data40 $data41 $data42 $data43 $data44 $data45 -print $data50 $data51 $data52 $data53 $data54 $data55 - -if $rows != 6 then - print rows $rows != 6 - return -1 -endi - - -print =============== step 10 query records of ct2 from file -sql select * from ct2; -print $data00 $data01 $data02 $data03 $data04 $data05 - -if $rows != 1 then - print rows $rows != 1 - return -1 -endi - - -print =============== step 11 query records of ct3 from file -sql select * from ct3; -print $data00 $data01 $data02 $data03 $data04 $data05 -print $data10 $data11 $data12 $data13 $data14 $data15 -print $data20 $data21 $data22 $data23 $data24 $data25 -print $data30 $data31 $data32 $data33 $data34 $data35 -print $data40 $data41 $data42 $data43 $data44 $data45 -print $data50 $data51 $data52 $data53 $data54 $data55 -print $data60 $data61 $data62 $data63 $data64 $data65 -print $data70 $data71 $data72 $data73 $data74 $data75 -print $data80 $data81 $data82 $data83 $data84 $data85 -print $data90 $data91 $data92 $data93 $data94 $data95 -print $data[10][0] $data[10][1] $data[10][2] $data[10][3] $data[10][4] $data[10][5] -print $data[11][0] $data[11][1] $data[11][2] $data[11][3] $data[11][4] $data[11][5] -print $data[12][0] $data[12][1] $data[12][2] $data[12][3] $data[12][4] $data[12][5] -print $data[13][0] $data[13][1] $data[13][2] $data[13][3] $data[13][4] $data[13][5] - - -print =============== step 12 query records of ct4 from file -sql select * from ct4; -print $data00 $data01 $data02 $data03 $data04 $data05 -print $data10 $data11 $data12 $data13 $data14 $data15 -print $data20 $data21 $data22 $data23 $data24 $data25 -print $data30 $data31 $data32 $data33 $data34 $data35 -print $data40 $data41 $data42 $data43 $data44 $data45 \ No newline at end of file diff --git a/tests/script/tsim/insert/update1_sort_merge.sim b/tests/script/tsim/insert/update1_sort_merge.sim index c4f7877220..79d72b43a0 100644 --- a/tests/script/tsim/insert/update1_sort_merge.sim +++ b/tests/script/tsim/insert/update1_sort_merge.sim @@ -3,6 +3,7 @@ system sh/deploy.sh -n dnode1 -i 1 system sh/exec.sh -n dnode1 -s start sql connect +print =============== test case: merge duplicated rows in taosc and taosd print =============== create database sql drop database if exists d0 sql create database d0 keep 365000d,365000d,365000d From c47439970c120723630b7891018830997a70ba61 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 22 Jul 2022 05:04:59 +0000 Subject: [PATCH 24/27] fix: another coredump --- source/libs/executor/src/executorMain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c index 299ffeb380..e920f58560 100644 --- a/source/libs/executor/src/executorMain.c +++ b/source/libs/executor/src/executorMain.c @@ -352,8 +352,8 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { #ifndef NDEBUG - qDebug("switch to next table %ld (cursor %d), %ld rows returned", uid, - pTableScanInfo->currentTable, pInfo->pTableScanOp->resultInfo.totalRows); + qDebug("switch to next table %ld (cursor %d), %ld rows returned", uid, pTableScanInfo->currentTable, + pInfo->pTableScanOp->resultInfo.totalRows); pInfo->pTableScanOp->resultInfo.totalRows = 0; #endif @@ -370,7 +370,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { // TODO after dropping table, table may be not found ASSERT(found); - if (pTableScanInfo == NULL) { + if (pTableScanInfo->dataReader == NULL) { if (tsdbReaderOpen(pTableScanInfo->readHandle.vnode, &pTableScanInfo->cond, pTaskInfo->tableqinfoList.pTableList, &pTableScanInfo->dataReader, NULL) < 0 || pTableScanInfo->dataReader == NULL) { From 01d8287a7378b3db88b98c1a30dd1a24b817a335 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 22 Jul 2022 13:12:26 +0800 Subject: [PATCH 25/27] enh: set wal ver to submitReq --- source/dnode/vnode/src/vnd/vnodeSvr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 9929258df0..ca2338d3f7 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -789,6 +789,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq goto _exit; } + pSubmitReq->version = version; submitRsp.pArray = taosArrayInit(msgIter.numOfBlocks, sizeof(SSubmitBlkRsp)); newTbUids = taosArrayInit(msgIter.numOfBlocks, sizeof(int64_t)); if (!submitRsp.pArray) { From 675c91cc6bff5d92165a7863b4c3dcf2ff99d312 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Fri, 22 Jul 2022 13:12:43 +0800 Subject: [PATCH 26/27] fix: the problem of data loss when interval is used for outer query --- source/libs/executor/src/executorimpl.c | 2 +- source/libs/parser/src/parInsert.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 292e435732..4b03bc9b11 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2879,7 +2879,7 @@ int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scan *order = TSDB_ORDER_ASC; *scanFlag = MAIN_SCAN; return TSDB_CODE_SUCCESS; - } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) { STableScanInfo* pTableScanInfo = pOperator->info; *order = pTableScanInfo->cond.order; *scanFlag = pTableScanInfo->scanFlag; diff --git a/source/libs/parser/src/parInsert.c b/source/libs/parser/src/parInsert.c index 2c7307b660..8fa8fd4cf8 100644 --- a/source/libs/parser/src/parInsert.c +++ b/source/libs/parser/src/parInsert.c @@ -438,6 +438,10 @@ static bool isNullStr(SToken* pToken) { (strncasecmp(TSDB_DATA_NULL_STR_L, pToken->z, pToken->n) == 0)); } +static bool isNullValue(int8_t dataType, SToken* pToken) { + return TK_NULL == pToken->type || (!IS_STR_DATA_TYPE(dataType) && isNullStr(pToken)); +} + static FORCE_INLINE int32_t toDouble(SToken* pToken, double* value, char** endPtr) { errno = 0; *value = taosStr2Double(pToken->z, endPtr); @@ -461,7 +465,7 @@ static int32_t parseValueToken(char** end, SToken* pToken, SSchema* pSchema, int return code; } - if (TK_NULL == pToken->type || (!IS_VAR_DATA_TYPE(pSchema->type) && isNullStr(pToken))) { + if (isNullValue(pSchema->type, pToken)) { if (TSDB_DATA_TYPE_TIMESTAMP == pSchema->type && PRIMARYKEY_TIMESTAMP_COL_ID == pSchema->colId) { return buildSyntaxErrMsg(pMsgBuf, "primary timestamp should not be null", pToken->z); } @@ -754,7 +758,7 @@ static int32_t parseTagToken(char** end, SToken* pToken, SSchema* pSchema, int16 uint64_t uv; char* endptr = NULL; - if (isNullStr(pToken)) { + if (isNullValue(pSchema->type, pToken)) { if (TSDB_DATA_TYPE_TIMESTAMP == pSchema->type && PRIMARYKEY_TIMESTAMP_COL_ID == pSchema->colId) { return buildSyntaxErrMsg(pMsgBuf, "primary timestamp should not be null", pToken->z); } @@ -972,7 +976,7 @@ static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pSchema, uint goto end; } - if (!isNullStr(&sToken)) { + if (!isNullValue(pTagSchema->type, &sToken)) { taosArrayPush(tagName, pTagSchema->name); } if (pTagSchema->type == TSDB_DATA_TYPE_JSON) { @@ -981,7 +985,7 @@ static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pSchema, uint taosMemoryFree(tmpTokenBuf); goto end; } - if (isNullStr(&sToken)) { + if (isNullValue(pTagSchema->type, &sToken)) { code = tTagNew(pTagVals, 1, true, &pTag); } else { code = parseJsontoTagData(sToken.z, pTagVals, &pTag, &pCxt->msg); @@ -1561,7 +1565,7 @@ int32_t parseInsertSql(SParseContext* pContext, SQuery** pQuery, SParseMetaCache } else { nodesDestroyNode((*pQuery)->pRoot); } - + (*pQuery)->execMode = QUERY_EXEC_MODE_SCHEDULE; (*pQuery)->haveResultSet = false; (*pQuery)->msgType = TDMT_VND_SUBMIT; From 74de37615e22f492126b09554c255c4b126a3f02 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 22 Jul 2022 13:18:53 +0800 Subject: [PATCH 27/27] enh: code optimization --- source/dnode/vnode/src/vnd/vnodeSvr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index ca2338d3f7..e8d51555d8 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -773,6 +773,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq terrno = TSDB_CODE_SUCCESS; pRsp->code = 0; + pSubmitReq->version = version; #ifdef TD_DEBUG_PRINT_ROW vnodeDebugPrintSubmitMsg(pVnode, pReq, __func__); @@ -789,10 +790,9 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq goto _exit; } - pSubmitReq->version = version; submitRsp.pArray = taosArrayInit(msgIter.numOfBlocks, sizeof(SSubmitBlkRsp)); newTbUids = taosArrayInit(msgIter.numOfBlocks, sizeof(int64_t)); - if (!submitRsp.pArray) { + if (!submitRsp.pArray || !newTbUids) { pRsp->code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; }