diff --git a/cmake/cmake.options b/cmake/cmake.options index 5d99b2214a..51d6f53048 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -51,6 +51,13 @@ IF(${TD_WINDOWS}) "If build unit tests using googletest" ON ) + + option( + TDENGINE_3 + "TDengine 3.x" + ON + ) + ELSEIF (TD_DARWIN_64) add_definitions(-DCOMPILER_SUPPORTS_CXX13) option( diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 0b647934ff..9e7aea03ea 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -103,6 +103,7 @@ typedef struct SDataBlockInfo { int16_t hasVarCol; uint32_t capacity; // TODO: optimize and remove following + int64_t version; // used for stream, and need serialization int32_t childId; // used for stream, do not serialize EStreamType type; // used for stream, do not serialize STimeWindow calWin; // used for stream, do not serialize diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 3e27bd9268..77b6ca1833 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -438,7 +438,7 @@ static FORCE_INLINE int32_t tDecodeSSchemaWrapperEx(SDecoder* pDecoder, SSchemaW return 0; } -STSchema* tdGetSTSChemaFromSSChema(SSchema** pSchema, int32_t nCols); +STSchema* tdGetSTSChemaFromSSChema(SSchema* pSchema, int32_t nCols, int32_t sver); typedef struct { char name[TSDB_TABLE_FNAME_LEN]; @@ -1359,6 +1359,7 @@ typedef struct { int32_t numOfCols; int64_t skey; int64_t ekey; + int64_t version; // for stream char data[]; } SRetrieveTableRsp; diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 65244ec11a..0c466e5396 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -195,6 +195,8 @@ int32_t qStreamInput(qTaskInfo_t tinfo, void* pItem); int32_t qStreamPrepareRecover(qTaskInfo_t tinfo, int64_t startVer, int64_t endVer); +STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key); + #ifdef __cplusplus } #endif diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index d55adcb63c..f1f60cb8e5 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -197,6 +197,7 @@ bool fmIsSystemInfoFunc(int32_t funcId); bool fmIsImplicitTsFunc(int32_t funcId); bool fmIsClientPseudoColumnFunc(int32_t funcId); bool fmIsMultiRowsFunc(int32_t funcId); +bool fmIsKeepOrderFunc(int32_t funcId); int32_t fmGetDistMethod(const SFunctionNode* pFunc, SFunctionNode** pPartialFunc, SFunctionNode** pMergeFunc); diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 1c3e5903f6..a11ea0b2cd 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -29,9 +29,17 @@ extern "C" { typedef enum EDataOrderLevel { DATA_ORDER_LEVEL_NONE = 1, DATA_ORDER_LEVEL_IN_BLOCK, - DATA_ORDER_LEVEL_IN_GROUP + DATA_ORDER_LEVEL_IN_GROUP, + DATA_ORDER_LEVEL_GLOBAL } EDataOrderLevel; +typedef enum EGroupAction { + GROUP_ACTION_NONE = 1, + GROUP_ACTION_SET, + GROUP_ACTION_KEEP, + GROUP_ACTION_CLEAR +} EGroupAction; + typedef struct SLogicNode { ENodeType type; SNodeList* pTargets; // SColumnNode @@ -44,6 +52,7 @@ typedef struct SLogicNode { SNode* pSlimit; EDataOrderLevel requireDataOrder; // requirements for input data EDataOrderLevel resultDataOrder; // properties of the output data + EGroupAction groupAction; } SLogicNode; typedef enum EScanType { diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index f6c3b3f5b2..eb83da1803 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -142,6 +142,7 @@ static FORCE_INLINE void* streamQueueNextItem(SStreamQueue* queue) { ASSERT(queue->qItem != NULL); return streamQueueCurItem(queue); } else { + queue->qItem = NULL; taosGetQitem(queue->qall, &queue->qItem); if (queue->qItem == NULL) { taosReadAllQitems(queue->queue, queue->qall); diff --git a/include/os/osSignal.h b/include/os/osSignal.h index e22c43684c..12f4f2ed0f 100644 --- a/include/os/osSignal.h +++ b/include/os/osSignal.h @@ -44,7 +44,11 @@ extern "C" { #define SIGBREAK 1234 #endif +#ifdef WINDOWS +typedef BOOL (*FSignalHandler)(DWORD fdwCtrlType); +#else typedef void (*FSignalHandler)(int32_t signum, void *sigInfo, void *context); +#endif void taosSetSignal(int32_t signum, FSignalHandler sigfp); void taosIgnSignal(int32_t signum); void taosDflSignal(int32_t signum); diff --git a/include/os/osSystem.h b/include/os/osSystem.h index 581e688ccb..6770be6e46 100644 --- a/include/os/osSystem.h +++ b/include/os/osSystem.h @@ -29,9 +29,6 @@ extern "C" { #define tcgetattr TCGETATTR_FUNC_TAOS_FORBID #endif -#define TAOS_CONSOLE_PROMPT_HEADER "taos> " -#define TAOS_CONSOLE_PROMPT_CONTINUE " -> " - typedef struct TdCmd *TdCmdPtr; TdCmdPtr taosOpenCmd(const char* cmd); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 8611278550..3e733d291b 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4941,14 +4941,14 @@ int tDecodeSVCreateStbReq(SDecoder *pCoder, SVCreateStbReq *pReq) { return 0; } -STSchema *tdGetSTSChemaFromSSChema(SSchema **pSchema, int32_t nCols) { +STSchema *tdGetSTSChemaFromSSChema(SSchema *pSchema, int32_t nCols, int32_t sver) { STSchemaBuilder schemaBuilder = {0}; - if (tdInitTSchemaBuilder(&schemaBuilder, 1) < 0) { + if (tdInitTSchemaBuilder(&schemaBuilder, sver) < 0) { return NULL; } for (int i = 0; i < nCols; i++) { - SSchema *schema = *pSchema + i; + SSchema *schema = pSchema + i; if (tdAddColToSchema(&schemaBuilder, schema->type, schema->flags, schema->colId, schema->bytes) < 0) { tdDestroyTSchemaBuilder(&schemaBuilder); return NULL; diff --git a/source/common/src/trow.c b/source/common/src/trow.c index df5bf64acf..754e142437 100644 --- a/source/common/src/trow.c +++ b/source/common/src/trow.c @@ -568,6 +568,7 @@ int32_t tdSTSRowNew(SArray *pArray, STSchema *pTSchema, STSRow **ppRow) { int32_t maxVarDataLen = 0; int32_t iColVal = 0; void *varBuf = NULL; + bool isAlloc = false; ASSERT(nColVal > 1); @@ -610,8 +611,11 @@ int32_t tdSTSRowNew(SArray *pArray, STSchema *pTSchema, STSRow **ppRow) { ++iColVal; } - *ppRow = (STSRow *)taosMemoryCalloc( - 1, sizeof(STSRow) + pTSchema->flen + varDataLen + TD_BITMAP_BYTES(pTSchema->numOfCols - 1)); + if (!(*ppRow)) { + *ppRow = (STSRow *)taosMemoryCalloc( + 1, sizeof(STSRow) + pTSchema->flen + varDataLen + TD_BITMAP_BYTES(pTSchema->numOfCols - 1)); + isAlloc = true; + } if (!(*ppRow)) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -621,7 +625,9 @@ int32_t tdSTSRowNew(SArray *pArray, STSchema *pTSchema, STSRow **ppRow) { if (maxVarDataLen > 0) { varBuf = taosMemoryMalloc(maxVarDataLen); if (!varBuf) { - taosMemoryFreeClear(*ppRow); + if(isAlloc) { + taosMemoryFreeClear(*ppRow); + } terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } @@ -1323,12 +1329,11 @@ void tTSRowGetVal(STSRow *pRow, STSchema *pTSchema, int16_t iCol, SColVal *pColV SCellVal cv; SValue value; - ASSERT(iCol > 0); + ASSERT((pTColumn->colId == PRIMARYKEY_TIMESTAMP_COL_ID) || (iCol > 0)); if (TD_IS_TP_ROW(pRow)) { tdSTpRowGetVal(pRow, pTColumn->colId, pTColumn->type, pTSchema->flen, pTColumn->offset, iCol - 1, &cv); } else if (TD_IS_KV_ROW(pRow)) { - ASSERT(iCol > 0); tdSKvRowGetVal(pRow, pTColumn->colId, iCol - 1, &cv); } else { ASSERT(0); diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index b1e4321053..77b45b6df1 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -815,20 +815,17 @@ int64_t taosTimeTruncate(int64_t t, const SInterval* pInterval, int32_t precisio if (pInterval->offset > 0) { start = taosTimeAdd(start, pInterval->offset, pInterval->offsetUnit, precision); - if (start > t) { - start = taosTimeAdd(start, -pInterval->interval, pInterval->intervalUnit, precision); - } else { - // try to move current window to the left-hande-side, due to the offset effect. - int64_t end = taosTimeAdd(start, pInterval->interval, pInterval->intervalUnit, precision) - 1; - int64_t newEnd = end; - while(newEnd >= t) { - end = newEnd; - newEnd = taosTimeAdd(newEnd, -pInterval->sliding, pInterval->slidingUnit, precision); - } + // try to move current window to the left-hande-side, due to the offset effect. + int64_t end = taosTimeAdd(start, pInterval->interval, pInterval->intervalUnit, precision) - 1; - start = taosTimeAdd(end, -pInterval->interval, pInterval->intervalUnit, precision) + 1; + int64_t newEnd = end; + while (newEnd >= t) { + end = newEnd; + newEnd = taosTimeAdd(newEnd, -pInterval->sliding, pInterval->slidingUnit, precision); } + + start = taosTimeAdd(end, -pInterval->interval, pInterval->intervalUnit, precision) + 1; } return start; diff --git a/source/common/test/dataformatTest.cpp b/source/common/test/dataformatTest.cpp index d16e35ff07..65f21bee40 100644 --- a/source/common/test/dataformatTest.cpp +++ b/source/common/test/dataformatTest.cpp @@ -116,7 +116,7 @@ STSchema *genSTSchema(int16_t nCols) { } STSchema *pResult = NULL; - pResult = tdGetSTSChemaFromSSChema(&pSchema, nCols); + pResult = tdGetSTSChemaFromSSChema(pSchema, nCols, 1); taosMemoryFree(pSchema); return pResult; diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 05e197150e..245dc413f1 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -868,7 +868,10 @@ int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topicName) } // iter all vnode to delete handle - ASSERT(taosHashGetSize(pSub->consumerHash) == 0); + if (taosHashGetSize(pSub->consumerHash) != 0) { + sdbRelease(pSdb, pSub); + return -1; + } int32_t sz = taosArrayGetSize(pSub->unassignedVgs); for (int32_t i = 0; i < sz; i++) { SMqVgEp *pVgEp = taosArrayGetP(pSub->unassignedVgs, i); diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index f2a037ab82..268cbaa55c 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -583,6 +583,7 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { mndTransSetDbName(pTrans, pTopic->db, NULL); if (pTrans == NULL) { mError("topic:%s, failed to drop since %s", pTopic->name, terrstr()); + mndReleaseTopic(pMnode, pTopic); return -1; } @@ -590,11 +591,17 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { if (mndDropOffsetByTopic(pMnode, pTrans, dropReq.name) < 0) { ASSERT(0); + mndTransDrop(pTrans); + mndReleaseTopic(pMnode, pTopic); return -1; } + // TODO check if rebalancing if (mndDropSubByTopic(pMnode, pTrans, dropReq.name) < 0) { - ASSERT(0); + /*ASSERT(0);*/ + mError("topic:%s, failed to drop since %s", pTopic->name, terrstr()); + mndTransDrop(pTrans); + mndReleaseTopic(pMnode, pTopic); return -1; } diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index c2006a2535..93c70e1109 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -68,7 +68,7 @@ typedef struct { typedef struct { char* qmsg; - qTaskInfo_t task[5]; + qTaskInfo_t task; } STqExecCol; typedef struct { @@ -82,7 +82,7 @@ typedef struct { typedef struct { int8_t subType; - STqReader* pExecReader[5]; + STqReader* pExecReader; union { STqExecCol execCol; STqExecTb execTb; @@ -139,8 +139,7 @@ int64_t tqScan(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVa int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** pHeadWithCkSum); // tqExec -int32_t tqLogScanExec(STQ* pTq, STqExecHandle* pExec, SSubmitReq* pReq, SMqDataRsp* pRsp, int32_t workerId); -int32_t tqScanSnapshot(STQ* pTq, const STqExecHandle* pExec, SMqDataRsp* pRsp, STqOffsetVal offset, int32_t workerId); +int32_t tqLogScanExec(STQ* pTq, STqExecHandle* pExec, SSubmitReq* pReq, SMqDataRsp* pRsp); int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp); // tqMeta diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 30a6188db0..04bf6bcc2b 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -40,12 +40,9 @@ typedef struct SDelIdx SDelIdx; typedef struct STbData STbData; typedef struct SMemTable SMemTable; typedef struct STbDataIter STbDataIter; -typedef struct STable STable; typedef struct SMapData SMapData; typedef struct SBlockIdx SBlockIdx; typedef struct SBlock SBlock; -typedef struct SBlockStatis SBlockStatis; -typedef struct SAggrBlkCol SAggrBlkCol; typedef struct SColData SColData; typedef struct SBlockDataHdr SBlockDataHdr; typedef struct SBlockData SBlockData; @@ -62,8 +59,7 @@ typedef struct SDelFReader SDelFReader; typedef struct SRowIter SRowIter; typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; -typedef struct STsdbFSState STsdbFSState; -typedef struct STsdbSnapHdr STsdbSnapHdr; +typedef struct STsdbReadSnap STsdbReadSnap; #define TSDB_MAX_SUBBLOCKS 8 #define TSDB_FHDR_SIZE 512 @@ -176,8 +172,6 @@ void tsdbMemTableDestroy(SMemTable *pMemTable); void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData); void tsdbRefMemTable(SMemTable *pMemTable); void tsdbUnrefMemTable(SMemTable *pMemTable); -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem); -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem); // STbDataIter int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter); void *tsdbTbDataIterDestroy(STbDataIter *pIter); @@ -188,30 +182,39 @@ bool tsdbTbDataIterNext(STbDataIter *pIter); int32_t tsdbGetNRowsInTbData(STbData *pTbData); // tsdbFile.c ============================================================================================== typedef enum { TSDB_HEAD_FILE = 0, TSDB_DATA_FILE, TSDB_LAST_FILE, TSDB_SMA_FILE } EDataFileT; -void tsdbDataFileName(STsdb *pTsdb, SDFileSet *pDFileSet, EDataFileT ftype, char fname[]); -bool tsdbFileIsSame(SDFileSet *pDFileSet1, SDFileSet *pDFileSet2, EDataFileT ftype); + bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2); -int32_t tsdbUpdateDFileHdr(TdFilePtr pFD, SDFileSet *pSet, EDataFileT ftype); int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype); -int32_t tPutDataFileHdr(uint8_t *p, SDFileSet *pSet, EDataFileT ftype); +int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile); +int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile); +int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile); +int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile); int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tGetDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet); int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet); + +void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]); +void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]); +void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]); +void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]); // SDelFile void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]); // tsdbFS.c ============================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS); -int32_t tsdbFSClose(STsdbFS *pFS); -int32_t tsdbFSBegin(STsdbFS *pFS); -int32_t tsdbFSCommit(STsdbFS *pFS); +int32_t tsdbFSOpen(STsdb *pTsdb); +int32_t tsdbFSClose(STsdb *pTsdb); +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSDestroy(STsdbFS *pFS); +int32_t tDFileSetCmprFn(const void *p1, const void *p2); +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS); + int32_t tsdbFSRollback(STsdbFS *pFS); -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile); -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet); -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid); -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag); +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet); +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile); // tsdbReaderWriter.c ============================================================================================== // SDataFWriter int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet); @@ -222,8 +225,7 @@ int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *pMapData, uint8_t **ppBu int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2, SBlockIdx *pBlockIdx, SBlock *pBlock, int8_t cmprAlg); -SDFileSet *tsdbDataFWriterGetWSet(SDataFWriter *pWriter); -int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); +int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); // SDataFReader int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet); int32_t tsdbDataFReaderClose(SDataFReader **ppReader); @@ -245,6 +247,9 @@ int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb int32_t tsdbDelFReaderClose(SDelFReader **ppReader); int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, uint8_t **ppBuf); int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf); +// tsdbRead.c ============================================================================================== +int32_t tsdbTakeReadSnap(STsdb *pTsdb, STsdbReadSnap **ppSnap); +void tsdbUntakeReadSnap(STsdb *pTsdb, STsdbReadSnap *pSnap); #define TSDB_CACHE_NO(c) ((c).cacheLast == 0) #define TSDB_CACHE_LAST_ROW(c) (((c).cacheLast & 1) > 0) @@ -276,6 +281,11 @@ typedef struct { TSKEY minKey; } SRtn; +struct STsdbFS { + SDelFile *pDelFile; + SArray *aDFileSet; // SArray +}; + struct STsdb { char *path; SVnode *pVnode; @@ -283,7 +293,7 @@ struct STsdb { TdThreadRwlock rwLock; SMemTable *mem; SMemTable *imem; - STsdbFS *pFS; + STsdbFS fs; SLRUCache *lruCache; }; @@ -402,16 +412,6 @@ struct SBlock { SSubBlock aSubBlock[TSDB_MAX_SUBBLOCKS]; }; -struct SAggrBlkCol { - int16_t colId; - int16_t maxIndex; - int16_t minIndex; - int16_t numOfNull; - int64_t sum; - int64_t max; - int64_t min; -}; - struct SColData { int16_t cid; int8_t type; @@ -465,12 +465,6 @@ struct SDelIdx { int64_t size; }; -struct SDelFile { - int64_t commitID; - int64_t size; - int64_t offset; -}; - #pragma pack(push, 1) struct SBlockDataHdr { uint32_t delimiter; @@ -479,34 +473,50 @@ struct SBlockDataHdr { }; #pragma pack(pop) +struct SDelFile { + volatile int32_t nRef; + + int64_t commitID; + int64_t size; + int64_t offset; +}; + struct SHeadFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; int64_t offset; }; struct SDataFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SLastFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SSmaFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SDFileSet { - SDiskID diskId; - int32_t fid; - SHeadFile fHead; - SDataFile fData; - SLastFile fLast; - SSmaFile fSma; + SDiskID diskId; + int32_t fid; + SHeadFile *pHeadF; + SDataFile *pDataF; + SLastFile *pLastF; + SSmaFile *pSmaF; }; struct SRowIter { @@ -521,26 +531,33 @@ struct SRowMerger { SArray *pArray; // SArray }; -struct STsdbFSState { - SDelFile *pDelFile; - SArray *aDFileSet; // SArray - SDelFile delFile; -}; - -struct STsdbFS { - STsdb *pTsdb; - TdThreadRwlock lock; - int8_t inTxn; - STsdbFSState *cState; - STsdbFSState *nState; -}; - struct SDelFWriter { STsdb *pTsdb; SDelFile fDel; TdFilePtr pWriteH; }; +struct SDataFWriter { + STsdb *pTsdb; + SDFileSet wSet; + + TdFilePtr pHeadFD; + TdFilePtr pDataFD; + TdFilePtr pLastFD; + TdFilePtr pSmaFD; + + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; +}; + +struct STsdbReadSnap { + SMemTable *pMem; + SMemTable *pIMem; + STsdbFS fs; +}; + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index d785376925..5e87e35d68 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -146,7 +146,7 @@ int32_t tqCheckColModifiable(STQ* pTq, int32_t colId); int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessVgDeleteReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessOffsetCommitReq(STQ* pTq, char* msg, int32_t msgLen); -int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId); +int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDeployReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessStreamTrigger(STQ* pTq, SSubmitReq* data); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index cecf899591..e7293da60b 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -582,7 +582,7 @@ static int32_t tdRSmaFetchAndSubmitResult(SRSmaInfoItem *pItem, STSchema *pTSche int32_t code = qExecTask(pItem->taskInfo, &output, &ts); if (code < 0) { - smaError("vgId:%d, qExecTask for rsma table %" PRIi64 "l evel %" PRIi8 " failed since %s", SMA_VID(pSma), suid, + smaError("vgId:%d, qExecTask for rsma table %" PRIi64 " level %" PRIi8 " failed since %s", SMA_VID(pSma), suid, pItem->level, terrstr(code)); goto _err; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index e35bd1de98..3739897ec0 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -215,10 +215,10 @@ int32_t tqCheckColModifiable(STQ* pTq, int32_t colId) { if (pIter == NULL) break; STqHandle* pExec = (STqHandle*)pIter; if (pExec->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - int32_t sz = taosArrayGetSize(pExec->colIdList); + int32_t sz = pExec->execHandle.pSchemaWrapper->nCols; for (int32_t i = 0; i < sz; i++) { - int32_t forbidColId = *(int32_t*)taosArrayGet(pExec->colIdList, i); - if (forbidColId == colId) { + SSchema* pSchema = &pExec->execHandle.pSchemaWrapper->pSchema[i]; + if (pSchema->colId == colId) { taosHashCancelIterate(pTq->handles, pIter); return -1; } @@ -262,7 +262,7 @@ static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t su static int32_t tqInitMetaRsp(SMqMetaRsp* pRsp, const SMqPollReq* pReq) { return 0; } -int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { +int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { SMqPollReq* pReq = pMsg->pCont; int64_t consumerId = pReq->consumerId; int64_t timeout = pReq->timeout; @@ -271,9 +271,6 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { STqOffsetVal reqOffset = pReq->reqOffset; STqOffsetVal fetchOffsetNew; - // todo - workerId = 0; - // 1.find handle STqHandle* pHandle = taosHashGet(pTq->handles, pReq->subKey, strlen(pReq->subKey)); /*ASSERT(pHandle);*/ @@ -405,7 +402,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { if (pHead->msgType == TDMT_VND_SUBMIT) { SSubmitReq* pCont = (SSubmitReq*)&pHead->body; - if (tqLogScanExec(pTq, &pHandle->execHandle, pCont, &dataRsp, workerId) < 0) { + if (tqLogScanExec(pTq, &pHandle->execHandle, pCont, &dataRsp) < 0) { /*ASSERT(0);*/ } // TODO batch optimization: @@ -518,27 +515,24 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { pHandle->execHandle.execCol.qmsg = req.qmsg; pHandle->snapshotVer = ver; req.qmsg = NULL; - for (int32_t i = 0; i < 5; i++) { - SReadHandle handle = { - .meta = pTq->pVnode->pMeta, - .vnode = pTq->pVnode, - .initTableReader = true, - .initTqReader = true, - .version = ver, - }; - pHandle->execHandle.execCol.task[i] = qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, &pHandle->execHandle.numOfCols, - &pHandle->execHandle.pSchemaWrapper); - ASSERT(pHandle->execHandle.execCol.task[i]); - void* scanner = NULL; - qExtractStreamScanner(pHandle->execHandle.execCol.task[i], &scanner); - ASSERT(scanner); - pHandle->execHandle.pExecReader[i] = qExtractReaderFromStreamScanner(scanner); - ASSERT(pHandle->execHandle.pExecReader[i]); - } + SReadHandle handle = { + .meta = pTq->pVnode->pMeta, + .vnode = pTq->pVnode, + .initTableReader = true, + .initTqReader = true, + .version = ver, + }; + pHandle->execHandle.execCol.task = + qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, &pHandle->execHandle.numOfCols, + &pHandle->execHandle.pSchemaWrapper); + ASSERT(pHandle->execHandle.execCol.task); + void* scanner = NULL; + qExtractStreamScanner(pHandle->execHandle.execCol.task, &scanner); + ASSERT(scanner); + pHandle->execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); + ASSERT(pHandle->execHandle.pExecReader); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { - for (int32_t i = 0; i < 5; i++) { - pHandle->execHandle.pExecReader[i] = tqOpenReader(pTq->pVnode); - } + pHandle->execHandle.pExecReader = tqOpenReader(pTq->pVnode); pHandle->execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { @@ -550,10 +544,8 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); tqDebug("vgId:%d, idx %d, uid:%" PRId64, TD_VID(pTq->pVnode), i, tbUid); } - for (int32_t i = 0; i < 5; i++) { - pHandle->execHandle.pExecReader[i] = tqOpenReader(pTq->pVnode); - tqReaderSetTbUidList(pHandle->execHandle.pExecReader[i], tbUidList); - } + pHandle->execHandle.pExecReader = tqOpenReader(pTq->pVnode); + tqReaderSetTbUidList(pHandle->execHandle.pExecReader, tbUidList); taosArrayDestroy(tbUidList); } taosHashPut(pTq->handles, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle)); @@ -634,7 +626,7 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, char* msg, int32_t msgLen) { ASSERT(pTask->tbSink.pSchemaWrapper->pSchema); pTask->tbSink.pTSchema = - tdGetSTSChemaFromSSChema(&pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols); + tdGetSTSChemaFromSSChema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, 1); ASSERT(pTask->tbSink.pTSchema); } diff --git a/source/dnode/vnode/src/tq/tqExec.c b/source/dnode/vnode/src/tq/tqExec.c index 5172819d2a..4e2750d9f0 100644 --- a/source/dnode/vnode/src/tq/tqExec.c +++ b/source/dnode/vnode/src/tq/tqExec.c @@ -37,8 +37,8 @@ static int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, return 0; } -static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, int32_t workerId, SMqDataRsp* pRsp) { - SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pExecReader[workerId]->pSchemaWrapper); +static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, SMqDataRsp* pRsp) { + SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pExecReader->pSchemaWrapper); if (pSW == NULL) { return -1; } @@ -51,6 +51,7 @@ static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, SMqDataRsp* pRsp) { metaReaderInit(&mr, pTq->pVnode->pMeta, 0); // TODO add reference to gurantee success if (metaGetTableEntryByUid(&mr, uid) < 0) { + metaReaderClear(&mr); return -1; } char* tbName = strdup(mr.me.name); @@ -61,7 +62,7 @@ static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, SMqDataRsp* pRsp) { int64_t tqScan(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVal* pOffset) { const STqExecHandle* pExec = &pHandle->execHandle; - qTaskInfo_t task = pExec->execCol.task[0]; + qTaskInfo_t task = pExec->execCol.task; if (qStreamPrepareScan(task, pOffset) < 0) { if (pOffset->type == TMQ_OFFSET__LOG) { @@ -89,7 +90,7 @@ int64_t tqScan(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVa if (pDataBlock != NULL) { if (pRsp->withTbName) { if (pOffset->type == TMQ_OFFSET__LOG) { - int64_t uid = pExec->pExecReader[0]->msgIter.uid; + int64_t uid = pExec->pExecReader->msgIter.uid; if (tqAddTbNameToRsp(pTq, uid, pRsp) < 0) { continue; } @@ -108,6 +109,7 @@ int64_t tqScan(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVa } if (pRsp->blockNum == 0 && pOffset->type == TMQ_OFFSET__SNAPSHOT_DATA) { + tqDebug("vgId: %d, tsdb consume over, switch to wal, ver %ld", TD_VID(pTq->pVnode), pHandle->snapshotVer + 1); tqOffsetResetToLog(pOffset, pHandle->snapshotVer); qStreamPrepareScan(task, pOffset); continue; @@ -184,12 +186,12 @@ int32_t tqScanSnapshot(STQ* pTq, const STqExecHandle* pExec, SMqDataRsp* pRsp, S } #endif -int32_t tqLogScanExec(STQ* pTq, STqExecHandle* pExec, SSubmitReq* pReq, SMqDataRsp* pRsp, int32_t workerId) { +int32_t tqLogScanExec(STQ* pTq, STqExecHandle* pExec, SSubmitReq* pReq, SMqDataRsp* pRsp) { ASSERT(pExec->subType != TOPIC_SUB_TYPE__COLUMN); if (pExec->subType == TOPIC_SUB_TYPE__TABLE) { pRsp->withSchema = 1; - STqReader* pReader = pExec->pExecReader[workerId]; + STqReader* pReader = pExec->pExecReader; tqReaderSetDataMsg(pReader, pReq, 0); while (tqNextDataBlock(pReader)) { SSDataBlock block = {0}; @@ -197,18 +199,18 @@ int32_t tqLogScanExec(STQ* pTq, STqExecHandle* pExec, SSubmitReq* pReq, SMqDataR if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue; } if (pRsp->withTbName) { - int64_t uid = pExec->pExecReader[workerId]->msgIter.uid; + int64_t uid = pExec->pExecReader->msgIter.uid; if (tqAddTbNameToRsp(pTq, uid, pRsp) < 0) { continue; } } tqAddBlockDataToRsp(&block, pRsp, taosArrayGetSize(block.pDataBlock)); - tqAddBlockSchemaToRsp(pExec, workerId, pRsp); + tqAddBlockSchemaToRsp(pExec, pRsp); pRsp->blockNum++; } } else if (pExec->subType == TOPIC_SUB_TYPE__DB) { pRsp->withSchema = 1; - STqReader* pReader = pExec->pExecReader[workerId]; + STqReader* pReader = pExec->pExecReader; tqReaderSetDataMsg(pReader, pReq, 0); while (tqNextDataBlockFilterOut(pReader, pExec->execDb.pFilterOutTbUid)) { SSDataBlock block = {0}; @@ -216,13 +218,13 @@ int32_t tqLogScanExec(STQ* pTq, STqExecHandle* pExec, SSubmitReq* pReq, SMqDataR if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue; } if (pRsp->withTbName) { - int64_t uid = pExec->pExecReader[workerId]->msgIter.uid; + int64_t uid = pExec->pExecReader->msgIter.uid; if (tqAddTbNameToRsp(pTq, uid, pRsp) < 0) { continue; } } tqAddBlockDataToRsp(&block, pRsp, taosArrayGetSize(block.pDataBlock)); - tqAddBlockSchemaToRsp(pExec, workerId, pRsp); + tqAddBlockSchemaToRsp(pExec, pRsp); pRsp->blockNum++; } } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 83e852c79e..835ffb02fd 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -80,28 +80,23 @@ int32_t tqMetaOpen(STQ* pTq) { tDecoderInit(&decoder, (uint8_t*)pVal, vLen); tDecodeSTqHandle(&decoder, &handle); handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - /*for (int32_t i = 0; i < 5; i++) {*/ - /*handle.execHandle.pExecReader[i] = tqOpenReader(pTq->pVnode);*/ - /*}*/ if (handle.execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - for (int32_t i = 0; i < 5; i++) { - SReadHandle reader = { - .meta = pTq->pVnode->pMeta, - .vnode = pTq->pVnode, - .initTableReader = true, - .initTqReader = true, - .version = handle.snapshotVer, - }; + SReadHandle reader = { + .meta = pTq->pVnode->pMeta, + .vnode = pTq->pVnode, + .initTableReader = true, + .initTqReader = true, + .version = handle.snapshotVer, + }; - handle.execHandle.execCol.task[i] = qCreateQueueExecTaskInfo(handle.execHandle.execCol.qmsg, &reader, &handle.execHandle.numOfCols, - &handle.execHandle.pSchemaWrapper); - ASSERT(handle.execHandle.execCol.task[i]); - void* scanner = NULL; - qExtractStreamScanner(handle.execHandle.execCol.task[i], &scanner); - ASSERT(scanner); - handle.execHandle.pExecReader[i] = qExtractReaderFromStreamScanner(scanner); - ASSERT(handle.execHandle.pExecReader[i]); - } + handle.execHandle.execCol.task = + qCreateQueueExecTaskInfo(handle.execHandle.execCol.qmsg, &reader, &handle.execHandle.numOfCols, &handle.execHandle.pSchemaWrapper); + ASSERT(handle.execHandle.execCol.task); + void* scanner = NULL; + qExtractStreamScanner(handle.execHandle.execCol.task, &scanner); + ASSERT(scanner); + handle.execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); + ASSERT(handle.execHandle.pExecReader); } else { handle.execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 4c0d416ad1..6097ddd49e 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -249,6 +249,8 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) return -1; } memcpy(data, msg, msgLen); + SSubmitReq* pReq = (SSubmitReq*)data; + pReq->version = ver; tqProcessStreamTrigger(pTq, data); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index e4c11c4787..17842615c4 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -314,6 +314,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader) { pBlock->info.uid = pReader->msgIter.uid; pBlock->info.rows = pReader->msgIter.numOfRows; + pBlock->info.version = pReader->pMsg->version; while ((row = tGetSubmitBlkNext(&pReader->blkIter)) != NULL) { tdSTSRowIterReset(&iter, row); @@ -393,10 +394,8 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { if (pIter == NULL) break; STqHandle* pExec = (STqHandle*)pIter; if (pExec->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - for (int32_t i = 0; i < 5; i++) { - int32_t code = qUpdateQualifiedTableId(pExec->execHandle.execCol.task[i], tbUidList, isAdd); - ASSERT(code == 0); - } + int32_t code = qUpdateQualifiedTableId(pExec->execHandle.execCol.task, tbUidList, isAdd); + ASSERT(code == 0); } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__DB) { if (!isAdd) { int32_t sz = taosArrayGetSize(tbUidList); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index b0eb7f4a14..464a3a3ee1 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -127,6 +127,8 @@ SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, boo int32_t rows = pDataBlock->info.rows; + tqDebug("tq sink, convert block %d, rows: %d", i, rows); + int32_t dataLen = 0; void* blkSchema = POINTER_SHIFT(blkHead, sizeof(SSubmitBlk)); @@ -178,11 +180,14 @@ void tqTableSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data) { const SArray* pRes = (const SArray*)data; SVnode* pVnode = (SVnode*)vnode; - tqDebug("task write into table, vgId %d, block num: %d", pVnode->config.vgId, (int32_t)pRes->size); + tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, (int32_t)pRes->size); ASSERT(pTask->tbSink.pTSchema); SSubmitReq* pReq = tdBlockToSubmit(pRes, pTask->tbSink.pTSchema, true, pTask->tbSink.stbUid, pTask->tbSink.stbFullName, pVnode->config.vgId); + + tqDebug("vgId:%d, task %d convert blocks over, put into write-queue", TD_VID(pVnode), pTask->taskId); + /*tPrintFixedSchemaSubmitReq(pReq, pTask->tbSink.pTSchema);*/ // build write msg SRpcMsg msg = { diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 484020e6e1..e259dde29c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -464,7 +464,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { switch (state->state) { case SFSNEXTROW_FS: - state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; + // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; state->nFileSet = taosArrayGetSize(state->aDFileSet); state->iFileSet = state->nFileSet; @@ -793,9 +793,10 @@ typedef struct { TSDBROW memRow, imemRow, fsRow; TsdbNextRowState input[3]; - SMemTable *pMemTable; - SMemTable *pIMemTable; - STsdb *pTsdb; + // SMemTable *pMemTable; + // SMemTable *pIMemTable; + STsdbReadSnap *pReadSnap; + STsdb *pTsdb; } CacheNextRowIter; static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb) { @@ -803,16 +804,16 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - tsdbTakeMemSnapshot(pTsdb, &pIter->pMemTable, &pIter->pIMemTable); + tsdbTakeReadSnap(pTsdb, &pIter->pReadSnap); STbData *pMem = NULL; - if (pIter->pMemTable) { - tsdbGetTbDataFromMemTable(pIter->pMemTable, suid, uid, &pMem); + if (pIter->pReadSnap->pMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid, &pMem); } STbData *pIMem = NULL; - if (pIter->pIMemTable) { - tsdbGetTbDataFromMemTable(pIter->pIMemTable, suid, uid, &pIMem); + if (pIter->pReadSnap->pIMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid, &pIMem); } pIter->pTsdb = pTsdb; @@ -821,7 +822,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile *pDelFile = pIter->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader *pDelFReader; @@ -846,6 +847,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->fsState.state = SFSNEXTROW_FS; pIter->fsState.pTsdb = pTsdb; + pIter->fsState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; pIter->fsState.pBlockIdxExp = &pIter->idx; pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, &pIter->memState, getNextRowFromMem, NULL}; @@ -885,7 +887,7 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { taosArrayDestroy(pIter->pSkyline); } - tsdbUntakeMemSnapshot(pIter->pTsdb, pIter->pMemTable, pIter->pIMemTable); + tsdbUntakeReadSnap(pIter->pTsdb, pIter->pReadSnap); return code; _err: @@ -1172,480 +1174,480 @@ _err: return code; } -static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { - int32_t code = 0; - SArray *pSkyline = NULL; +// static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { +// int32_t code = 0; +// SArray *pSkyline = NULL; - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } - *ppRow = NULL; +// *ppRow = NULL; - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - SDelIdx delIdx; +// SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - SBlockIdx idx = {.suid = suid, .uid = uid}; +// SBlockIdx idx = {.suid = suid, .uid = uid}; - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - if (input[i].pRow == NULL) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// if (input[i].pRow == NULL) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } - } +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } +// } - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = i; - merge[nMerge++] = max[i]; - } +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = i; +// merge[nMerge++] = max[i]; +// } - input[iMax[i]].next = deleted; - } +// input[iMax[i]].next = deleted; +// } - // merge if nMerge > 1 - if (nMerge > 0) { - *dup = false; +// // merge if nMerge > 1 +// if (nMerge > 0) { +// *dup = false; - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } - } while (1); +// } while (1); - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); - return code; -_err: - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// return code; +// _err: +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } // static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, STSRow **ppRow) { -static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { - int32_t code = 0; - SArray *pSkyline = NULL; - STSRow *pRow = NULL; - STSRow **ppRow = &pRow; +// static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { +// int32_t code = 0; +// SArray *pSkyline = NULL; +// STSRow *pRow = NULL; +// STSRow **ppRow = &pRow; - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } - *ppLastArray = NULL; +// *ppLastArray = NULL; - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - SDelIdx delIdx; +// SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - SBlockIdx idx = {.suid = suid, .uid = uid}; +// SBlockIdx idx = {.suid = suid, .uid = uid}; - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = iMax[i]; - merge[nMerge++] = max[i]; - } +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = iMax[i]; +// merge[nMerge++] = max[i]; +// } - input[iMax[i]].next = deleted; - } +// input[iMax[i]].next = deleted; +// } - // merge if nMerge > 1 - if (nMerge > 0) { - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; +// // merge if nMerge > 1 +// if (nMerge > 0) { +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } else { - /* *ppRow = NULL; */ - /* return code; */ - continue; - } +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } else { +// /* *ppRow = NULL; */ +// /* return code; */ +// continue; +// } - if (iCol == 0) { - STColumn *pTColumn = &pTSchema->columns[0]; - SColVal *pColVal = &(SColVal){0}; +// if (iCol == 0) { +// STColumn *pTColumn = &pTSchema->columns[0]; +// SColVal *pColVal = &(SColVal){0}; - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); +// *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } - ++iCol; +// ++iCol; - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); - tTSRowGetVal(*ppRow, pTSchema, i, pColVal); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); +// tTSRowGetVal(*ppRow, pTSchema, i, pColVal); +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } - if (pColVal->isNull || pColVal->isNone) { - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - if (!setICol) { - iCol = i; - setICol = true; - } - } else { - --nilColCount; - } - } +// if (pColVal->isNull || pColVal->isNone) { +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// if (!setICol) { +// iCol = i; +// setICol = true; +// } +// } else { +// --nilColCount; +// } +// } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } - continue; - } +// continue; +// } - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - SColVal colVal = {0}; - tTSRowGetVal(*ppRow, pTSchema, i, &colVal); - TSKEY rowTs = (*ppRow)->ts; +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// SColVal colVal = {0}; +// tTSRowGetVal(*ppRow, pTSchema, i, &colVal); +// TSKEY rowTs = (*ppRow)->ts; - // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); - SColVal *tColVal = &tTsVal->colVal; +// // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); +// SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); +// SColVal *tColVal = &tTsVal->colVal; - if (!colVal.isNone && !colVal.isNull) { - if (tColVal->isNull || tColVal->isNone) { - // taosArraySet(pColArray, i, &colVal); - taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); - --nilColCount; - } - } else { - if ((tColVal->isNull || tColVal->isNone) && !setICol) { - iCol = i; - setICol = true; +// if (!colVal.isNone && !colVal.isNull) { +// if (tColVal->isNull || tColVal->isNone) { +// // taosArraySet(pColArray, i, &colVal); +// taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); +// --nilColCount; +// } +// } else { +// if ((tColVal->isNull || tColVal->isNone) && !setICol) { +// iCol = i; +// setICol = true; - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - } - } - } +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// } +// } +// } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - } while (nilColCount > 0); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// } while (nilColCount > 0); - // if () new ts row from pColArray if non empty - /* if (taosArrayGetSize(pColArray) == nCol) { */ - /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ - /* if (code) goto _err; */ - /* } */ - /* taosArrayDestroy(pColArray); */ - if (taosArrayGetSize(pColArray) <= 0) { - *ppLastArray = NULL; - taosArrayDestroy(pColArray); - } else { - *ppLastArray = pColArray; - } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } +// // if () new ts row from pColArray if non empty +// /* if (taosArrayGetSize(pColArray) == nCol) { */ +// /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ +// /* if (code) goto _err; */ +// /* } */ +// /* taosArrayDestroy(pColArray); */ +// if (taosArrayGetSize(pColArray) <= 0) { +// *ppLastArray = NULL; +// taosArrayDestroy(pColArray); +// } else { +// *ppLastArray = pColArray; +// } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); - return code; -_err: - taosArrayDestroy(pColArray); - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// return code; +// _err: +// taosArrayDestroy(pColArray); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, STsdb *pTsdb, LRUHandle **handle) { int32_t code = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 13f310ae27..194bd2e924 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -29,6 +29,7 @@ typedef struct { int32_t minRow; int32_t maxRow; int8_t cmprAlg; + STsdbFS fs; // -------------- TSKEY nextKey; // reset by each table commit int32_t commitFid; @@ -119,9 +120,6 @@ int32_t tsdbCommit(STsdb *pTsdb) { code = tsdbCommitDel(&commith); if (code) goto _err; - code = tsdbCommitCache(&commith); - if (code) goto _err; - // end commit code = tsdbEndCommit(&commith, 0); if (code) goto _err; @@ -158,7 +156,7 @@ static int32_t tsdbCommitDelStart(SCommitter *pCommitter) { goto _err; } - SDelFile *pDelFileR = pTsdb->pFS->nState->pDelFile; + SDelFile *pDelFileR = pCommitter->fs.pDelFile; if (pDelFileR) { code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb, NULL); if (code) goto _err; @@ -247,7 +245,7 @@ static int32_t tsdbCommitDelEnd(SCommitter *pCommitter) { code = tsdbUpdateDelFileHdr(pCommitter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pCommitter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pCommitter->fs, &pCommitter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pCommitter->pDelFWriter, 1); @@ -273,7 +271,6 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SDFileSet *pRSet = NULL; - SDFileSet wSet; // memory pCommitter->nextKey = TSKEY_MAX; @@ -282,7 +279,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { taosArrayClear(pCommitter->aBlockIdx); tMapDataReset(&pCommitter->oBlockMap); tBlockDataReset(&pCommitter->oBlockData); - pRSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, pCommitter->commitFid, TD_EQ); + pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, + tDFileSetCmprFn, TD_EQ); if (pRSet) { code = tsdbDataFReaderOpen(&pCommitter->pReader, pTsdb, pRSet); if (code) goto _err; @@ -292,23 +290,29 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { } // new + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; + taosArrayClear(pCommitter->aBlockIdxN); tMapDataReset(&pCommitter->nBlockMap); tBlockDataReset(&pCommitter->nBlockData); if (pRSet) { - wSet = (SDFileSet){.diskId = pRSet->diskId, - .fid = pCommitter->commitFid, - .fHead = {.commitID = pCommitter->commitID, .offset = 0, .size = 0}, - .fData = pRSet->fData, - .fLast = {.commitID = pCommitter->commitID, .size = 0}, - .fSma = pRSet->fSma}; + wSet.diskId = pRSet->diskId; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; + fData = *pRSet->pDataF; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; + fSma = *pRSet->pSmaF; } else { - wSet = (SDFileSet){.diskId = (SDiskID){.level = 0, .id = 0}, - .fid = pCommitter->commitFid, - .fHead = {.commitID = pCommitter->commitID, .offset = 0, .size = 0}, - .fData = {.commitID = pCommitter->commitID, .size = 0}, - .fLast = {.commitID = pCommitter->commitID, .size = 0}, - .fSma = {.commitID = pCommitter->commitID, .size = 0}}; + wSet.diskId = (SDiskID){.level = 0, .id = 0}; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; + fData = (SDataFile){.commitID = pCommitter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; + fSma = (SSmaFile){.commitID = pCommitter->commitID, .size = 0}; } code = tsdbDataFWriterOpen(&pCommitter->pWriter, pTsdb, &wSet); if (code) goto _err; @@ -855,7 +859,7 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { if (code) goto _err; // upsert SDFileSet - code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, tsdbDataFWriterGetWSet(pCommitter->pWriter)); + code = tsdbFSUpsertFSet(&pCommitter->fs, &pCommitter->pWriter->wSet); if (code) goto _err; // close and sync @@ -973,7 +977,7 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - code = tsdbFSBegin(pTsdb->pFS); + code = tsdbFSCopy(pTsdb, &pCommitter->fs); if (code) goto _err; return code; @@ -1142,28 +1146,33 @@ _err: return code; } -static int32_t tsdbCommitCache(SCommitter *pCommitter) { - int32_t code = 0; - // TODO - return code; -} - static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SMemTable *pMemTable = pTsdb->imem; - if (eno == 0) { - code = tsdbFSCommit(pTsdb->pFS); - } else { - code = tsdbFSRollback(pTsdb->pFS); + ASSERT(eno == 0); + + code = tsdbFSCommit1(pTsdb, &pCommitter->fs); + if (code) goto _err; + + // lock + taosThreadRwlockWrlock(&pTsdb->rwLock); + + // commit or rollback + code = tsdbFSCommit2(pTsdb, &pCommitter->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; } - taosThreadRwlockWrlock(&pTsdb->rwLock); pTsdb->imem = NULL; + + // unlock taosThreadRwlockUnlock(&pTsdb->rwLock); tsdbUnrefMemTable(pMemTable); + tsdbFSDestroy(&pCommitter->fs); tsdbInfo("vgId:%d tsdb end commit", TD_VID(pTsdb->pVnode)); return code; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 3bc79621e1..b17e30d7c7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -16,67 +16,41 @@ #include "tsdb.h" // ================================================================================================= -static int32_t tPutFSState(uint8_t *p, STsdbFSState *pState) { +static int32_t tsdbEncodeFS(uint8_t *p, STsdbFS *pFS) { int32_t n = 0; - int8_t hasDel = pState->pDelFile ? 1 : 0; - uint32_t nDFileSet = taosArrayGetSize(pState->aDFileSet); + int8_t hasDel = pFS->pDelFile ? 1 : 0; + uint32_t nSet = taosArrayGetSize(pFS->aDFileSet); // SDelFile n += tPutI8(p ? p + n : p, hasDel); if (hasDel) { - n += tPutDelFile(p ? p + n : p, pState->pDelFile); + n += tPutDelFile(p ? p + n : p, pFS->pDelFile); } // SArray - n += tPutU32v(p ? p + n : p, nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pState->aDFileSet, iDFileSet)); + n += tPutU32v(p ? p + n : p, nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet)); } return n; } -static int32_t tGetFSState(uint8_t *p, STsdbFSState *pState) { - int32_t n = 0; - int8_t hasDel; - uint32_t nDFileSet; - SDFileSet *pSet = &(SDFileSet){0}; - - // SDelFile - n += tGetI8(p + n, &hasDel); - if (hasDel) { - pState->pDelFile = &pState->delFile; - n += tGetDelFile(p + n, pState->pDelFile); - } else { - pState->pDelFile = NULL; - } - - // SArray - taosArrayClear(pState->aDFileSet); - n += tGetU32v(p + n, &nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tGetDFileSet(p + n, pSet); - taosArrayPush(pState->aDFileSet, pSet); - } - - return n; -} - -static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { +static int32_t tsdbGnrtCurrent(STsdb *pTsdb, STsdbFS *pFS, char *fname) { int32_t code = 0; int64_t n; int64_t size; - uint8_t *pData; + uint8_t *pData = NULL; TdFilePtr pFD = NULL; // to binary - size = tPutFSState(NULL, pState) + sizeof(TSCKSUM); + size = tsdbEncodeFS(NULL, pFS) + sizeof(TSCKSUM); pData = taosMemoryMalloc(size); if (pData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - n = tPutFSState(pData, pState); + n = tsdbEncodeFS(pData, pFS); ASSERT(n + sizeof(TSCKSUM) == size); taosCalcChecksumAppend(0, pData, size); @@ -104,419 +78,267 @@ static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { return code; _err: - tsdbError("tsdb gnrt current failed since %s", tstrerror(code)); + tsdbError("vgId:%d tsdb gnrt current failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); if (pData) taosMemoryFree(pData); return code; } -static int32_t tsdbLoadCurrentState(STsdbFS *pFS, STsdbFSState *pState) { - int32_t code = 0; - int64_t size; - int64_t n; - char fname[TSDB_FILENAME_LEN]; - uint8_t *pData = NULL; - TdFilePtr pFD; +// static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); +// if (pFrom && pTo) { +// bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); - if (!taosCheckExistFile(fname)) { - // create an empry CURRENT file if not exists - code = tsdbGnrtCurrent(fname, pState); - if (code) goto _err; - } else { - // open the file and load - pFD = taosOpenFile(fname, TD_FILE_READ); - if (pFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // head +// if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { +// ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); +// ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); +// } else { +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); +// } - if (taosFStatFile(pFD, &size, NULL) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // data +// if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { +// if (pFrom->pDataF->size > pTo->pDataF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); +// } - pData = taosMemoryMalloc(size); - if (pData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// // last +// if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { +// if (pFrom->pLastF->size > pTo->pLastF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); +// } - n = taosReadFile(pFD, pData, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // sma +// if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { +// if (pFrom->pSmaF->size > pTo->pSmaF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } +// } else if (pFrom) { +// // head +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); - if (!taosCheckChecksumWhole(pData, size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } +// // data +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); - taosCloseFile(&pFD); +// // last +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); - // decode - tGetFSState(pData, pState); +// // fsm +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; + +// if (pFrom && pTo) { +// if (!tsdbDelFileIsSame(pFrom, pTo)) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } +// } else if (pFrom) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } else { +// // do nothing +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { +// int32_t code = 0; +// int32_t iFrom = 0; +// int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); +// int32_t iTo = 0; +// int32_t nTo = taosArrayGetSize(pTo->aDFileSet); +// SDFileSet *pDFileSetFrom; +// SDFileSet *pDFileSetTo; + +// // SDelFile +// code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); +// if (code) goto _err; + +// // SDFileSet +// while (iFrom < nFrom && iTo < nTo) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); + +// if (pDFileSetFrom->fid == pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); +// if (code) goto _err; + +// iFrom++; +// iTo++; +// } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } else { +// iTo++; +// } +// } + +// while (iFrom < nFrom) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } + +// #if 0 +// // do noting +// while (iTo < nTo) { +// pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); +// code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); +// if (code) goto _err; + +// iTo++; +// } +// #endif + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +void tsdbFSDestroy(STsdbFS *pFS) { + if (pFS->pDelFile) { + taosMemoryFree(pFS->pDelFile); } - if (pData) taosMemoryFree(pData); - return code; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + } -_err: - tsdbError("vgId:%d tsdb load current state failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - if (pData) taosMemoryFree(pData); - return code; + taosArrayDestroy(pFS->aDFileSet); } -static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { int32_t code = 0; + int64_t size; char fname[TSDB_FILENAME_LEN]; - if (pFrom && pTo) { - // head - if (tsdbFileIsSame(pFrom, pTo, TSDB_HEAD_FILE)) { - ASSERT(pFrom->fHead.size == pTo->fHead.size); - ASSERT(pFrom->fHead.offset == pTo->fHead.offset); - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_HEAD_FILE, fname); - taosRemoveFile(fname); - } - - // data - if (tsdbFileIsSame(pFrom, pTo, TSDB_DATA_FILE)) { - if (pFrom->fData.size > pTo->fData.size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_DATA_FILE, fname); - taosRemoveFile(fname); - } - - // last - if (tsdbFileIsSame(pFrom, pTo, TSDB_LAST_FILE)) { - if (pFrom->fLast.size > pTo->fLast.size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_LAST_FILE, fname); - taosRemoveFile(fname); - } - - // sma - if (tsdbFileIsSame(pFrom, pTo, TSDB_SMA_FILE)) { - if (pFrom->fSma.size > pTo->fSma.size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_SMA_FILE, fname); - taosRemoveFile(fname); - } - } else if (pFrom) { - // head - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_HEAD_FILE, fname); - taosRemoveFile(fname); - - // data - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_DATA_FILE, fname); - taosRemoveFile(fname); - - // last - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_LAST_FILE, fname); - taosRemoveFile(fname); - - // fsm - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_SMA_FILE, fname); - taosRemoveFile(fname); - } - - return code; - -_err: - tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { - int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; - - if (pFrom && pTo) { - if (!tsdbDelFileIsSame(pFrom, pTo)) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } - } else if (pFrom) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } else { - // do nothing - } - - return code; - -_err: - tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { - int32_t code = 0; - int32_t iFrom = 0; - int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); - int32_t iTo = 0; - int32_t nTo = taosArrayGetSize(pTo->aDFileSet); - SDFileSet *pDFileSetFrom; - SDFileSet *pDFileSetTo; - // SDelFile - code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); - if (code) goto _err; - - // SDFileSet - while (iFrom < nFrom && iTo < nTo) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); - - if (pDFileSetFrom->fid == pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); - if (code) goto _err; - - iFrom++; - iTo++; - } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; - - iFrom++; - } else { - iTo++; - } - } - - while (iFrom < nFrom) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; - - iFrom++; - } - -#if 0 - // do noting - while (iTo < nTo) { - pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); - code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); - if (code) goto _err; - - iTo++; - } -#endif - - return code; - -_err: - tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static void tsdbFSDestroy(STsdbFS *pFS) { - if (pFS) { - if (pFS->nState) { - taosArrayDestroy(pFS->nState->aDFileSet); - taosMemoryFree(pFS->nState); - } - - if (pFS->cState) { - taosArrayDestroy(pFS->cState->aDFileSet); - taosMemoryFree(pFS->cState); - } - - taosThreadRwlockDestroy(&pFS->lock); - taosMemoryFree(pFS); - } - // TODO -} - -static int32_t tsdbFSCreate(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - STsdbFS *pFS = NULL; - - pFS = (STsdbFS *)taosMemoryCalloc(1, sizeof(*pFS)); - if (pFS == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->pTsdb = pTsdb; - - code = taosThreadRwlockInit(&pFS->lock, NULL); - if (code) { - taosMemoryFree(pFS); - code = TAOS_SYSTEM_ERROR(code); - goto _err; - } - - pFS->inTxn = 0; - - pFS->cState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->cState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->cState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->cState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pFS->nState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->nState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->nState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->nState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - *ppFS = pFS; - return code; - -_err: - tsdbError("vgId:%d tsdb fs create failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSDestroy(pFS); - *ppFS = NULL; - return code; -} - -static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { - int32_t code = 0; - STsdb *pTsdb = pFS->pTsdb; - STfs *pTfs = pTsdb->pVnode->pTfs; - int64_t size; - char fname[TSDB_FILENAME_LEN]; - char pHdr[TSDB_FHDR_SIZE]; - TdFilePtr pFD; - - // SDelFile - if (pFS->cState->pDelFile) { - tsdbDelFileName(pTsdb, pFS->cState->pDelFile, fname); + if (pTsdb->fs.pDelFile) { + tsdbDelFileName(pTsdb, pTsdb->fs.pDelFile, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size != pFS->cState->pDelFile->size) { + if (size != pTsdb->fs.pDelFile->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } - - if (deepScan) { - // TODO - } } // SArray - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); // head ========= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (deepScan) { - // TODO + if (size != pSet->pHeadF->size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; } // data ========= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->fData.size) { + if (size < pSet->pDataF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->fData.size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO + } else if (size > pSet->pDataF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); + if (code) goto _err; } // last =========== - tsdbDataFileName(pTsdb, pDFileSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->fLast.size) { + if (size != pSet->pLastF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->fLast.size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO } // sma ============= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->fSma.size) { + if (size < pSet->pSmaF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->fSma.size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO + } else if (size > pSet->pSmaF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); + if (code) goto _err; } } - // remove those invalid files (todo) -#if 0 - STfsDir *tdir; - const STfsFile *pf; - - tdir = tfsOpendir(pTfs, pTsdb->path); - if (tdir == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; + { + // remove those invalid files (todo) } - while ((pf = tfsReaddir(tdir))) { - tfsBasename(pf, fname); - } - - tfsClosedir(tdir); -#endif - return code; _err: @@ -524,7 +346,7 @@ _err: return code; } -static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { +int32_t tDFileSetCmprFn(const void *p1, const void *p2) { if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) { return -1; } else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) { @@ -534,90 +356,372 @@ static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { return 0; } -// EXPOSED APIS ==================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - - // create handle - code = tsdbFSCreate(pTsdb, ppFS); - if (code) goto _err; - - // load current state - code = tsdbLoadCurrentState(*ppFS, (*ppFS)->cState); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } - - // scan and fix FS - code = tsdbScanAndTryFixFS(*ppFS, 0); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } - - return code; - -_err: - *ppFS = NULL; - tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbFSClose(STsdbFS *pFS) { - int32_t code = 0; - tsdbFSDestroy(pFS); - return code; -} - -int32_t tsdbFSBegin(STsdbFS *pFS) { - int32_t code = 0; - - ASSERT(!pFS->inTxn); +static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) { + int32_t code = 0; + int8_t hasDel; + uint32_t nSet; + int32_t n; // SDelFile - pFS->nState->pDelFile = NULL; - if (pFS->cState->pDelFile) { - pFS->nState->delFile = pFS->cState->delFile; - pFS->nState->pDelFile = &pFS->nState->delFile; + n = 0; + n += tGetI8(pData + n, &hasDel); + if (hasDel) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pTsdb->fs.pDelFile->nRef = 1; + n += tGetDelFile(pData + n, pTsdb->fs.pDelFile); + } else { + pTsdb->fs.pDelFile = NULL; } - // SArray - taosArrayClear(pFS->nState->aDFileSet); - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + // SArray + taosArrayClear(pTsdb->fs.aDFileSet); + n += tGetU32v(pData + n, &nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + SDFileSet fSet; - if (taosArrayPush(pFS->nState->aDFileSet, pDFileSet) == NULL) { + // head + fSet.pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pHeadF->nRef = 1; + + // data + fSet.pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryCalloc(1, sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pLastF->nRef = 1; + + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pSmaF->nRef = 1; + + n += tGetDFileSet(pData + n, &fSet); + + if (taosArrayPush(pTsdb->fs.aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } } - pFS->inTxn = 1; + ASSERT(n + sizeof(TSCKSUM) == nData); return code; _err: - tsdbError("vgId:%d tsdb fs begin failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSCommit(STsdbFS *pFS) { - int32_t code = 0; - STsdbFSState *pState = pFS->nState; - char tfname[TSDB_FILENAME_LEN]; - char fname[TSDB_FILENAME_LEN]; +// EXPOSED APIS ==================================================================================== +int32_t tsdbFSOpen(STsdb *pTsdb) { + int32_t code = 0; - // need lock (todo) - pFS->nState = pFS->cState; - pFS->cState = pState; + // open handle + pTsdb->fs.pDelFile = NULL; + pTsdb->fs.aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); + if (pTsdb->fs.aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } - snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); + // load fs or keep empty + char fname[TSDB_FILENAME_LEN]; + + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + + if (!taosCheckExistFile(fname)) { + // empty one + code = tsdbGnrtCurrent(pTsdb, &pTsdb->fs, fname); + if (code) goto _err; + } else { + // read + TdFilePtr pFD = taosOpenFile(fname, TD_FILE_READ); + if (pFD == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + int64_t size; + if (taosFStatFile(pFD, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosCloseFile(&pFD); + goto _err; + } + + uint8_t *pData = taosMemoryMalloc(size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + taosCloseFile(&pFD); + goto _err; + } + + int64_t n = taosReadFile(pFD, pData, size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } + + if (!taosCheckChecksumWhole(pData, size)) { + code = TSDB_CODE_FILE_CORRUPTED; + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } + + taosCloseFile(&pFD); + + // recover fs + code = tsdbRecoverFS(pTsdb, pData, size); + if (code) { + taosMemoryFree(pData); + goto _err; + } + + taosMemoryFree(pData); + } + + // scan and fix FS + code = tsdbScanAndTryFixFS(pTsdb); + if (code) goto _err; + + return code; + +_err: + tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbFSClose(STsdb *pTsdb) { + int32_t code = 0; + + if (pTsdb->fs.pDelFile) { + ASSERT(pTsdb->fs.pDelFile->nRef == 1); + taosMemoryFree(pTsdb->fs.pDelFile); + } + + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + + // head + ASSERT(pSet->pHeadF->nRef == 1); + taosMemoryFree(pSet->pHeadF); + + // data + ASSERT(pSet->pDataF->nRef == 1); + taosMemoryFree(pSet->pDataF); + + // last + ASSERT(pSet->pLastF->nRef == 1); + taosMemoryFree(pSet->pLastF); + + // sma + ASSERT(pSet->pSmaF->nRef == 1); + taosMemoryFree(pSet->pSmaF); + } + + taosArrayDestroy(pTsdb->fs.aDFileSet); + + return code; +} + +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; + + pFS->pDelFile = NULL; + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + if (pTsdb->fs.pDelFile) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + *pFS->pDelFile = *pTsdb->fs.pDelFile; + } + + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pHeadF->nRef = 0; + fSet.pHeadF->commitID = pSet->pHeadF->commitID; + fSet.pHeadF->size = pSet->pHeadF->size; + fSet.pHeadF->offset = pSet->pHeadF->offset; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pDataF->nRef = 0; + fSet.pDataF->commitID = pSet->pDataF->commitID; + fSet.pDataF->size = pSet->pDataF->size; + + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pLastF->nRef = 0; + fSet.pLastF->commitID = pSet->pLastF->commitID; + fSet.pLastF->size = pSet->pLastF->size; + + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pSmaF->nRef = 0; + fSet.pSmaF->commitID = pSet->pSmaF->commitID; + fSet.pSmaF->size = pSet->pSmaF->size; + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + } + +_exit: + return code; +} + +int32_t tsdbFSRollback(STsdbFS *pFS) { + int32_t code = 0; + + ASSERT(0); + + return code; + +_err: + return code; +} + +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile) { + int32_t code = 0; + + if (pFS->pDelFile == NULL) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + } + *pFS->pDelFile = *pDelFile; + +_exit: + return code; +} + +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { + int32_t code = 0; + int32_t idx = taosArraySearchIdx(pFS->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); + + if (idx < 0) { + idx = taosArrayGetSize(pFS->aDFileSet); + } else { + SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, idx); + int32_t c = tDFileSetCmprFn(pSet, pDFileSet); + if (c == 0) { + *pDFileSet->pHeadF = *pSet->pHeadF; + *pDFileSet->pDataF = *pSet->pDataF; + *pDFileSet->pLastF = *pSet->pLastF; + *pDFileSet->pSmaF = *pSet->pSmaF; + + goto _exit; + } + } + + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pHeadF = *pSet->pHeadF; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pDataF = *pSet->pDataF; + + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pLastF = *pSet->pLastF; + + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pSmaF = *pSet->pSmaF; + + if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + +_exit: + return code; +} + +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFSNew) { + int32_t code = 0; + char tfname[TSDB_FILENAME_LEN]; + char fname[TSDB_FILENAME_LEN]; + + snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); // gnrt CURRENT.t - code = tsdbGnrtCurrent(tfname, pFS->cState); + code = tsdbGnrtCurrent(pTsdb, pFSNew, tfname); if (code) goto _err; // rename @@ -627,60 +731,295 @@ int32_t tsdbFSCommit(STsdbFS *pFS) { goto _err; } - // apply commit on disk - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; - - pFS->inTxn = 0; - return code; _err: - tsdbError("vgId:%d tsdb fs commit failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb fs commit phase 1 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSRollback(STsdbFS *pFS) { +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { int32_t code = 0; + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; + // del + if (pFSNew->pDelFile) { + SDelFile *pDelFile = pTsdb->fs.pDelFile; - pFS->inTxn = 0; + if (pDelFile == NULL || (pDelFile->commitID != pFSNew->pDelFile->commitID)) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } - return code; + *pTsdb->fs.pDelFile = *pFSNew->pDelFile; + pTsdb->fs.pDelFile->nRef = 1; -_err: - tsdbError("vgId:%d tsdb fs rollback failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile) { - int32_t code = 0; - pState->delFile = *pDelFile; - pState->pDelFile = &pState->delFile; - return code; -} - -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet) { - int32_t code = 0; - int32_t idx = taosArraySearchIdx(pState->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); - - if (idx < 0) { - if (taosArrayPush(pState->aDFileSet, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + if (pDelFile) { + nRef = atomic_sub_fetch_32(&pDelFile->nRef, 1); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pDelFile); + } + } } } else { - SDFileSet *tDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, idx); - int32_t c = tDFileSetCmprFn(pSet, tDFileSet); - if (c == 0) { - taosArraySet(pState->aDFileSet, idx, pSet); - } else { - if (taosArrayInsert(pState->aDFileSet, idx, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + ASSERT(pTsdb->fs.pDelFile == NULL); + } + + // data + int32_t iOld = 0; + int32_t iNew = 0; + while (true) { + int32_t nOld = taosArrayGetSize(pTsdb->fs.aDFileSet); + int32_t nNew = taosArrayGetSize(pFSNew->aDFileSet); + SDFileSet fSet; + int8_t sameDisk; + + if (iOld >= nOld && iNew >= nNew) break; + + SDFileSet *pSetOld = (iOld < nOld) ? taosArrayGet(pTsdb->fs.aDFileSet, iOld) : NULL; + SDFileSet *pSetNew = (iNew < nNew) ? taosArrayGet(pFSNew->aDFileSet, iNew) : NULL; + + if (pSetOld && pSetNew) { + if (pSetOld->fid == pSetNew->fid) { + goto _merge_old_and_new; + } else if (pSetOld->fid < pSetNew->fid) { + goto _remove_old; + } else { + goto _add_new; } + } else if (pSetOld) { + goto _remove_old; + } else { + goto _add_new; + } + + _merge_old_and_new: + sameDisk = ((pSetOld->diskId.level == pSetNew->diskId.level) && (pSetOld->diskId.id == pSetNew->diskId.id)); + + // head + fSet.pHeadF = pSetOld->pHeadF; + if ((!sameDisk) || (pSetOld->pHeadF->commitID != pSetNew->pHeadF->commitID)) { + pSetOld->pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (pSetOld->pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pHeadF = *pSetNew->pHeadF; + pSetOld->pHeadF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pHeadF); + } + } else { + ASSERT(fSet.pHeadF->size == pSetNew->pHeadF->size); + ASSERT(fSet.pHeadF->offset == pSetNew->pHeadF->offset); + } + + // data + fSet.pDataF = pSetOld->pDataF; + if ((!sameDisk) || (pSetOld->pDataF->commitID != pSetNew->pDataF->commitID)) { + pSetOld->pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (pSetOld->pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pDataF = *pSetNew->pDataF; + pSetOld->pDataF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pDataF); + } + } else { + ASSERT(pSetOld->pDataF->size <= pSetNew->pDataF->size); + pSetOld->pDataF->size = pSetNew->pDataF->size; + } + + // last + fSet.pLastF = pSetOld->pLastF; + if ((!sameDisk) || (pSetOld->pLastF->commitID != pSetNew->pLastF->commitID)) { + pSetOld->pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (pSetOld->pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pLastF = *pSetNew->pLastF; + pSetOld->pLastF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pLastF); + } + } else { + ASSERT(pSetOld->pLastF->size == pSetNew->pLastF->size); + } + + // sma + fSet.pSmaF = pSetOld->pSmaF; + if ((!sameDisk) || (pSetOld->pSmaF->commitID != pSetNew->pSmaF->commitID)) { + pSetOld->pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (pSetOld->pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pSmaF = *pSetNew->pSmaF; + pSetOld->pSmaF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pSmaF); + } + } else { + ASSERT(pSetOld->pSmaF->size <= pSetNew->pSmaF->size); + pSetOld->pSmaF->size = pSetNew->pSmaF->size; + } + + if (!sameDisk) { + pSetOld->diskId = pSetNew->diskId; + } + + iOld++; + iNew++; + continue; + + _remove_old: + nRef = atomic_sub_fetch_32(&pSetOld->pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pHeadF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pDataF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pLastF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pSmaF); + } + + taosArrayRemove(pTsdb->fs.aDFileSet, iOld); + continue; + + _add_new: + fSet.diskId = pSetNew->diskId; + fSet.fid = pSetNew->fid; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pHeadF = *pSetNew->pHeadF; + fSet.pHeadF->nRef = 1; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pDataF = *pSetNew->pDataF; + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pLastF = *pSetNew->pLastF; + fSet.pLastF->nRef = 1; + + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pSmaF = *pSetNew->pSmaF; + fSet.pSmaF->nRef = 1; + + if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + iOld++; + iNew++; + continue; + } + + return code; + +_err: + tsdbError("vgId:%d tsdb fs commit phase 2 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; + int32_t nRef; + + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + pFS->pDelFile = pTsdb->fs.pDelFile; + if (pFS->pDelFile) { + nRef = atomic_fetch_add_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef > 0); + } + + SDFileSet fSet; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + fSet = *pSet; + + nRef = atomic_fetch_add_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef > 0); + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } } @@ -688,16 +1027,59 @@ _exit: return code; } -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid) { - int32_t idx; +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) { + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - idx = taosArraySearchIdx(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - ASSERT(idx >= 0); - taosArrayRemove(pState->aDFileSet, idx); -} + if (pFS->pDelFile) { + nRef = atomic_sub_fetch_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pFS->pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pFS->pDelFile); + } + } -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState) { return pState->pDelFile; } + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag) { - return (SDFileSet *)taosArraySearch(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, flag); -} + // head + nRef = atomic_sub_fetch_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pHeadF); + } + + // data + nRef = atomic_sub_fetch_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pDataF); + } + + // last + nRef = atomic_sub_fetch_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pLastF); + } + + // sma + nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pSmaF); + } + } + + taosArrayDestroy(pFS->aDFileSet); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index f15ad072e7..135ee23d44 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -15,7 +15,7 @@ #include "tsdb.h" -static int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile) { +int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pHeadFile->commitID); @@ -35,7 +35,7 @@ static int32_t tGetHeadFile(uint8_t *p, SHeadFile *pHeadFile) { return n; } -static int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile) { +int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pDataFile->commitID); @@ -53,7 +53,7 @@ static int32_t tGetDataFile(uint8_t *p, SDataFile *pDataFile) { return n; } -static int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { +int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pLastFile->commitID); @@ -71,7 +71,7 @@ static int32_t tGetLastFile(uint8_t *p, SLastFile *pLastFile) { return n; } -static int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile) { +int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pSmaFile->commitID); @@ -90,90 +90,53 @@ static int32_t tGetSmaFile(uint8_t *p, SSmaFile *pSmaFile) { } // EXPOSED APIS ================================================== -void tsdbDataFileName(STsdb *pTsdb, SDFileSet *pDFileSet, EDataFileT ftype, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; - - switch (ftype) { - case TSDB_HEAD_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fHead.commitID, - ".head"); - break; - case TSDB_DATA_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fData.commitID, - ".data"); - break; - case TSDB_LAST_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fLast.commitID, - ".last"); - break; - case TSDB_SMA_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fSma.commitID, - ".sma"); - break; - default: - ASSERT(0); - break; - } +void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pHeadF->commitID, ".head"); } -bool tsdbFileIsSame(SDFileSet *pDFileSet1, SDFileSet *pDFileSet2, EDataFileT ftype) { - if (pDFileSet1->diskId.level != pDFileSet2->diskId.level || pDFileSet1->diskId.id != pDFileSet2->diskId.id) { - return false; - } +void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pDataF->commitID, ".data"); +} - switch (ftype) { - case TSDB_HEAD_FILE: - return pDFileSet1->fHead.commitID == pDFileSet2->fHead.commitID; - case TSDB_DATA_FILE: - return pDFileSet1->fData.commitID == pDFileSet2->fData.commitID; - case TSDB_LAST_FILE: - return pDFileSet1->fLast.commitID == pDFileSet2->fLast.commitID; - case TSDB_SMA_FILE: - return pDFileSet1->fSma.commitID == pDFileSet2->fSma.commitID; - default: - ASSERT(0); - break; - } +void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pLastF->commitID, ".last"); +} + +void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pSmaF->commitID, ".sma"); } bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2) { return pDelFile1->commitID == pDelFile2->commitID; } -int32_t tsdbUpdateDFileHdr(TdFilePtr pFD, SDFileSet *pSet, EDataFileT ftype) { - int32_t code = 0; - int64_t n; - char hdr[TSDB_FHDR_SIZE]; - - memset(hdr, 0, TSDB_FHDR_SIZE); - tPutDataFileHdr(hdr, pSet, ftype); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); - - n = taosLSeekFile(pFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - - n = taosWriteFile(pFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - -_exit: - return code; -} - int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { int32_t code = 0; int64_t size; + int64_t n; TdFilePtr pFD; char fname[TSDB_FILENAME_LEN]; + char hdr[TSDB_FHDR_SIZE] = {0}; - tsdbDataFileName(pTsdb, pSet, ftype, fname); + // truncate + switch (ftype) { + case TSDB_DATA_FILE: + size = pSet->pDataF->size; + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + tPutDataFile(hdr, pSet->pDataF); + break; + case TSDB_SMA_FILE: + size = pSet->pSmaF->size; + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + tPutSmaFile(hdr, pSet->pSmaF); + break; + default: + ASSERT(0); + } + + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); // open pFD = taosOpenFile(fname, TD_FILE_WRITE); @@ -182,31 +145,24 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { goto _err; } - // truncate - switch (ftype) { - case TSDB_HEAD_FILE: - size = pSet->fHead.size; - break; - case TSDB_DATA_FILE: - size = pSet->fData.size; - break; - case TSDB_LAST_FILE: - size = pSet->fLast.size; - break; - case TSDB_SMA_FILE: - size = pSet->fSma.size; - break; - default: - ASSERT(0); - } + // ftruncate if (taosFtruncateFile(pFD, size) < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } // update header - code = tsdbUpdateDFileHdr(pFD, pSet, ftype); - if (code) goto _err; + n = taosLSeekFile(pFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // sync if (taosFsyncFile(pFD) < 0) { @@ -220,42 +176,20 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { return code; _err: + tsdbError("vgId:%d tsdb rollback file failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tPutDataFileHdr(uint8_t *p, SDFileSet *pSet, EDataFileT ftype) { - int32_t n = 0; - - switch (ftype) { - case TSDB_HEAD_FILE: - n += tPutHeadFile(p ? p + n : p, &pSet->fHead); - break; - case TSDB_DATA_FILE: - n += tPutDataFile(p ? p + n : p, &pSet->fData); - break; - case TSDB_LAST_FILE: - n += tPutLastFile(p ? p + n : p, &pSet->fLast); - break; - case TSDB_SMA_FILE: - n += tPutSmaFile(p ? p + n : p, &pSet->fSma); - break; - default: - ASSERT(0); - } - - return n; -} - int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet) { int32_t n = 0; n += tPutI32v(p ? p + n : p, pSet->diskId.level); n += tPutI32v(p ? p + n : p, pSet->diskId.id); n += tPutI32v(p ? p + n : p, pSet->fid); - n += tPutHeadFile(p ? p + n : p, &pSet->fHead); - n += tPutDataFile(p ? p + n : p, &pSet->fData); - n += tPutLastFile(p ? p + n : p, &pSet->fLast); - n += tPutSmaFile(p ? p + n : p, &pSet->fSma); + n += tPutHeadFile(p ? p + n : p, pSet->pHeadF); + n += tPutDataFile(p ? p + n : p, pSet->pDataF); + n += tPutLastFile(p ? p + n : p, pSet->pLastF); + n += tPutSmaFile(p ? p + n : p, pSet->pSmaF); return n; } @@ -266,20 +200,18 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { n += tGetI32v(p + n, &pSet->diskId.level); n += tGetI32v(p + n, &pSet->diskId.id); n += tGetI32v(p + n, &pSet->fid); - n += tGetHeadFile(p + n, &pSet->fHead); - n += tGetDataFile(p + n, &pSet->fData); - n += tGetLastFile(p + n, &pSet->fLast); - n += tGetSmaFile(p + n, &pSet->fSma); + n += tGetHeadFile(p + n, pSet->pHeadF); + n += tGetDataFile(p + n, pSet->pDataF); + n += tGetLastFile(p + n, pSet->pLastF); + n += tGetSmaFile(p + n, pSet->pSmaF); return n; } // SDelFile =============================================== void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; - - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, pTsdb->path, - TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); } int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile) { diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index ee8a23e76e..fa775bb882 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -93,7 +93,11 @@ static int32_t tbDataPCmprFn(const void *p1, const void *p2) { } void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) { STbData *pTbData = &(STbData){.suid = suid, .uid = uid}; - void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + + taosRLockLatch(&pMemTable->latch); + void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + taosRUnLockLatch(&pMemTable->latch); + *ppTbData = p ? *(STbData **)p : NULL; } @@ -363,10 +367,13 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid void *p; if (idx < 0) { - p = taosArrayPush(pMemTable->aTbData, &pTbData); - } else { - p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + idx = taosArrayGetSize(pMemTable->aTbData); } + + taosWLockLatch(&pMemTable->latch); + p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + taosWUnLockLatch(&pMemTable->latch); + if (p == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -605,46 +612,3 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) { tsdbMemTableDestroy(pMemTable); } } - -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem) { - int32_t code = 0; - - // lock - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - - // take snapshot - *ppMem = pTsdb->mem; - *ppIMem = pTsdb->imem; - - if (*ppMem) { - tsdbRefMemTable(*ppMem); - } - - if (*ppIMem) { - tsdbRefMemTable(*ppIMem); - } - - // unlock - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - -_exit: - return code; -} - -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem) { - if (pMem) { - tsdbUnrefMemTable(pMem); - } - - if (pIMem) { - tsdbUnrefMemTable(pIMem); - } -} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index 064c7adf4b..0b355d91b4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -66,7 +66,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee tfsMkdir(pVnode->pTfs, pTsdb->path); // open tsdb - if (tsdbFSOpen(pTsdb, &pTsdb->pFS) < 0) { + if (tsdbFSOpen(pTsdb) < 0) { goto _err; } @@ -88,7 +88,7 @@ _err: int tsdbClose(STsdb **pTsdb) { if (*pTsdb) { taosThreadRwlockDestroy(&(*pTsdb)->rwLock); - tsdbFSClose((*pTsdb)->pFS); + tsdbFSClose(*pTsdb); tsdbCloseCache((*pTsdb)->lruCache); taosMemoryFreeClear(*pTsdb); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index ccca13e55c..26ced6cf6a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -118,8 +118,7 @@ struct STsdbReader { char* idStr; // query info handle, for debug purpose int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; - SMemTable* pMem; - SMemTable* pIMem; + STsdbReadSnap* pReadSnap; SIOCostSummary cost; STSchema* pSchema; @@ -275,20 +274,18 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap } // init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) { - size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet); +static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) { + size_t numOfFileset = taosArrayGetSize(aDFileSet); pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset; pIter->order = order; - pIter->pFileList = taosArrayDup(pFState->aDFileSet); + pIter->pFileList = aDFileSet; pIter->numOfFiles = numOfFileset; tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr); return TSDB_CODE_SUCCESS; } -static void cleanupFilesetIterator(SFilesetIter* pIter) { taosArrayDestroy(pIter->pFileList); } - static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pIter->order); int32_t step = asc ? 1 : -1; @@ -1881,8 +1878,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea int32_t backward = (!ASCENDING_TRAVERSE(pReader->order)); STbData* d = NULL; - if (pReader->pMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pMem, pReader->suid, pBlockScanInfo->uid, &d); + if (pReader->pReadSnap->pMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); if (d != NULL) { code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1902,8 +1899,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea } STbData* di = NULL; - if (pReader->pIMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pIMem, pReader->suid, pBlockScanInfo->uid, &di); + if (pReader->pReadSnap->pIMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); if (di != NULL) { code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1939,7 +1936,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader* pDelFReader = NULL; code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); @@ -2836,8 +2833,10 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl SDataBlockIter* pBlockIter = &pReader->status.blockIter; - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + code = tsdbTakeReadSnap(pReader->pTsdb, &pReader->pReadSnap); + if (code) goto _err; + + initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); // no data in files, let's try buffer in memory @@ -2850,8 +2849,6 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl } } - tsdbTakeMemSnapshot(pReader->pTsdb, &pReader->pMem, &pReader->pIMem); - tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr); return code; @@ -2867,7 +2864,7 @@ void tsdbReaderClose(STsdbReader* pReader) { SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - tsdbUntakeMemSnapshot(pReader->pTsdb, pReader->pMem, pReader->pIMem); + tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap); taosMemoryFreeClear(pSupInfo->plist); taosMemoryFree(pSupInfo->colIds); @@ -2880,7 +2877,6 @@ void tsdbReaderClose(STsdbReader* pReader) { } taosMemoryFree(pSupInfo->buildBuf); - cleanupFilesetIterator(&pReader->status.fileIter); cleanupDataBlockIterator(&pReader->status.blockIter); destroyBlockScanInfo(pReader->status.pTableMap); blockDataDestroy(pReader->pResBlock); @@ -3087,8 +3083,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { tsdbDataFReaderClose(&pReader->pFileReader); - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); resetDataBlockScanInfo(pReader->status.pTableMap); @@ -3250,3 +3245,68 @@ int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int6 return TSDB_CODE_SUCCESS; } + +int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { + int32_t code = 0; + + // alloc + *ppSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap)); + if (*ppSnap == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + // lock + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + // take snapshot + (*ppSnap)->pMem = pTsdb->mem; + (*ppSnap)->pIMem = pTsdb->imem; + + if ((*ppSnap)->pMem) { + tsdbRefMemTable((*ppSnap)->pMem); + } + + if ((*ppSnap)->pIMem) { + tsdbRefMemTable((*ppSnap)->pIMem); + } + + // fs + code = tsdbFSRef(pTsdb, &(*ppSnap)->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _exit; + } + + // unlock + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + tsdbTrace("vgId:%d take read snapshot", TD_VID(pTsdb->pVnode)); +_exit: + return code; +} + +void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { + if (pSnap) { + if (pSnap->pMem) { + tsdbUnrefMemTable(pSnap->pMem); + } + + if (pSnap->pIMem) { + tsdbUnrefMemTable(pSnap->pIMem); + } + + tsdbFSUnref(pTsdb, &pSnap->fs); + taosMemoryFree(pSnap); + } + + tsdbTrace("vgId:%d untake read snapshot", TD_VID(pTsdb->pVnode)); +} diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 5e8157864f..7365ac23b8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -459,7 +459,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS // open impl // head - tsdbDataFileName(pTsdb, pSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); pReader->pHeadFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pHeadFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -467,7 +467,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // data - tsdbDataFileName(pTsdb, pSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); pReader->pDataFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pDataFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -475,7 +475,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // last - tsdbDataFileName(pTsdb, pSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); pReader->pLastFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pLastFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -483,7 +483,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // sma - tsdbDataFileName(pTsdb, pSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); pReader->pSmaFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pSmaFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -536,8 +536,8 @@ _err: int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppBuf) { int32_t code = 0; - int64_t offset = pReader->pSet->fHead.offset; - int64_t size = pReader->pSet->fHead.size - offset; + int64_t offset = pReader->pSet->pHeadF->offset; + int64_t size = pReader->pSet->pHeadF->size - offset; uint8_t *pBuf = NULL; int64_t n; uint32_t delimiter; @@ -1211,17 +1211,6 @@ _err: } // SDataFWriter ==================================================== -struct SDataFWriter { - STsdb *pTsdb; - SDFileSet wSet; - TdFilePtr pHeadFD; - TdFilePtr pDataFD; - TdFilePtr pLastFD; - TdFilePtr pSmaFD; -}; - -SDFileSet *tsdbDataFWriterGetWSet(SDataFWriter *pWriter) { return &pWriter->wSet; } - int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet) { int32_t code = 0; int32_t flag; @@ -1237,12 +1226,20 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } pWriter->pTsdb = pTsdb; - pWriter->wSet = *pSet; - pSet = &pWriter->wSet; + pWriter->wSet = (SDFileSet){.diskId = pSet->diskId, + .fid = pSet->fid, + .pHeadF = &pWriter->fHead, + .pDataF = &pWriter->fData, + .pLastF = &pWriter->fLast, + .pSmaF = &pWriter->fSma}; + pWriter->fHead = *pSet->pHeadF; + pWriter->fData = *pSet->pDataF; + pWriter->fLast = *pSet->pLastF; + pWriter->fSma = *pSet->pSmaF; // head flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; - tsdbDataFileName(pTsdb, pSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname); pWriter->pHeadFD = taosOpenFile(fname, flag); if (pWriter->pHeadFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -1257,28 +1254,28 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS ASSERT(n == TSDB_FHDR_SIZE); - pSet->fHead.size += TSDB_FHDR_SIZE; + pWriter->fHead.size += TSDB_FHDR_SIZE; // data - if (pSet->fData.size == 0) { + if (pWriter->fData.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname); pWriter->pDataFD = taosOpenFile(fname, flag); if (pWriter->pDataFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fData.size == 0) { + if (pWriter->fData.size == 0) { n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fData.size += TSDB_FHDR_SIZE; + pWriter->fData.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_END); if (n < 0) { @@ -1286,29 +1283,29 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fData.size); + ASSERT(n == pWriter->fData.size); } // last - if (pSet->fLast.size == 0) { + if (pWriter->fLast.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fLast, fname); pWriter->pLastFD = taosOpenFile(fname, flag); if (pWriter->pLastFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fLast.size == 0) { + if (pWriter->fLast.size == 0) { n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fLast.size += TSDB_FHDR_SIZE; + pWriter->fLast.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_END); if (n < 0) { @@ -1316,29 +1313,29 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fLast.size); + ASSERT(n == pWriter->fLast.size); } // sma - if (pSet->fSma.size == 0) { + if (pWriter->fSma.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname); pWriter->pSmaFD = taosOpenFile(fname, flag); if (pWriter->pSmaFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fSma.size == 0) { + if (pWriter->fSma.size == 0) { n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fSma.size += TSDB_FHDR_SIZE; + pWriter->fSma.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_END); if (n < 0) { @@ -1346,7 +1343,7 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fSma.size); + ASSERT(n == pWriter->fSma.size); } *ppWriter = pWriter; @@ -1418,22 +1415,76 @@ _err: int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) { int32_t code = 0; + int64_t n; + char hdr[TSDB_FHDR_SIZE]; // head ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_HEAD_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutHeadFile(hdr, &pWriter->fHead); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pHeadFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // data ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_DATA_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutDataFile(hdr, &pWriter->fData); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // last ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_LAST_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutLastFile(hdr, &pWriter->fLast); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // sma ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_SMA_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutSmaFile(hdr, &pWriter->fSma); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } return code; @@ -1444,7 +1495,7 @@ _err: int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx, uint8_t **ppBuf) { int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->wSet.fHead; + SHeadFile *pHeadFile = &pWriter->fHead; uint8_t *pBuf = NULL; int64_t size; int64_t n; @@ -1494,7 +1545,7 @@ _err: int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, uint8_t **ppBuf, SBlockIdx *pBlockIdx) { int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->wSet.fHead; + SHeadFile *pHeadFile = &pWriter->fHead; SBlockDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, .suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; uint8_t *pBuf = NULL; int64_t size; @@ -1831,9 +1882,9 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ pSubBlock->nRow = pBlockData->nRow; pSubBlock->cmprAlg = cmprAlg; if (pBlock->last) { - pSubBlock->offset = pWriter->wSet.fLast.size; + pSubBlock->offset = pWriter->fLast.size; } else { - pSubBlock->offset = pWriter->wSet.fData.size; + pSubBlock->offset = pWriter->fData.size; } // ======================= BLOCK DATA ======================= @@ -1881,9 +1932,9 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ pSubBlock->szBlock = pSubBlock->szBlockCol + sizeof(TSCKSUM) + nData; if (pBlock->last) { - pWriter->wSet.fLast.size += pSubBlock->szBlock; + pWriter->fLast.size += pSubBlock->szBlock; } else { - pWriter->wSet.fData.size += pSubBlock->szBlock; + pWriter->fData.size += pSubBlock->szBlock; } // ======================= BLOCK SMA ======================= @@ -1896,8 +1947,8 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ if (code) goto _err; if (pSubBlock->nSma > 0) { - pSubBlock->sOffset = pWriter->wSet.fSma.size; - pWriter->wSet.fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); + pSubBlock->sOffset = pWriter->fSma.size; + pWriter->fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); } _exit: @@ -1924,8 +1975,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { char fNameTo[TSDB_FILENAME_LEN]; // head - tsdbDataFileName(pTsdb, pSetFrom, TSDB_HEAD_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_HEAD_FILE, fNameTo); + tsdbHeadFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pHeadF, fNameFrom); + tsdbHeadFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pHeadF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1939,7 +1990,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fHead.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pHeadF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1948,8 +1999,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // data - tsdbDataFileName(pTsdb, pSetFrom, TSDB_DATA_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_DATA_FILE, fNameTo); + tsdbDataFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pDataF, fNameFrom); + tsdbDataFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pDataF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1963,7 +2014,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fData.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pDataF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1972,8 +2023,9 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // last - tsdbDataFileName(pTsdb, pSetFrom, TSDB_LAST_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_LAST_FILE, fNameTo); + tsdbLastFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pLastF, fNameFrom); + tsdbLastFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pLastF, fNameTo); + pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -1986,7 +2038,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fLast.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pLastF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1995,8 +2047,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // sma - tsdbDataFileName(pTsdb, pSetFrom, TSDB_SMA_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_SMA_FILE, fNameTo); + tsdbSmaFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pSmaF, fNameFrom); + tsdbSmaFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pSmaF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -2010,7 +2062,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fSma.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pSmaF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 137ef9a4a6..5ba2ecb64b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -15,90 +15,99 @@ #include "tsdb.h" -static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t *canDo) { - int32_t code = 0; - STsdbFSState *pState; - - if (try) { - pState = pTsdb->pFS->cState; - *canDo = 0; - } else { - pState = pTsdb->pFS->nState; - } - - for (int32_t iSet = 0; iSet < taosArrayGetSize(pState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pDFileSet->fid, &pTsdb->keepCfg, now); +static bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now) { + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); SDiskID did; - // check - if (expLevel == pDFileSet->diskId.id) continue; + if (expLevel == pSet->diskId.level) continue; - // delete or move if (expLevel < 0) { - if (try) { - *canDo = 1; - } else { - tsdbFSStateDeleteDFileSet(pState, pDFileSet->fid); - iSet--; - } + return true; + } else { + if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { + return false; + } + + if (did.level == pSet->diskId.level) continue; + + return true; + } + } + + return false; +} + +int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { + int32_t code = 0; + + if (!tsdbShouldDoRetention(pTsdb, now)) { + return code; + } + + // do retention + STsdbFS fs; + + code = tsdbFSCopy(pTsdb, &fs); + if (code) goto _err; + + for (int32_t iSet = 0; iSet < taosArrayGetSize(fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); + SDiskID did; + + if (expLevel < 0) { + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + taosArrayRemove(fs.aDFileSet, iSet); + iSet--; } else { - // alloc if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { code = terrno; goto _exit; } - if (did.level == pDFileSet->diskId.level) continue; + if (did.level == pSet->diskId.level) continue; - if (try) { - *canDo = 1; - } else { - // copy the file to new disk + // copy file to new disk (todo) + SDFileSet fSet = *pSet; + fSet.diskId = did; - SDFileSet nDFileSet = *pDFileSet; - nDFileSet.diskId = did; + code = tsdbDFileSetCopy(pTsdb, pSet, &fSet); + if (code) goto _err; - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - - code = tsdbDFileSetCopy(pTsdb, pDFileSet, &nDFileSet); - if (code) goto _exit; - - code = tsdbFSStateUpsertDFileSet(pState, &nDFileSet); - if (code) goto _exit; - } + code = tsdbFSUpsertFSet(&fs, &fSet); + if (code) goto _err; } + + /* code */ } -_exit: - return code; -} - -int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { - int32_t code = 0; - int8_t canDo; - - // try - tsdbDoRetentionImpl(pTsdb, now, 1, &canDo); - if (!canDo) goto _exit; - - // begin - code = tsdbFSBegin(pTsdb->pFS); + // do change fs + code = tsdbFSCommit1(pTsdb, &fs); if (code) goto _err; - // do retention - code = tsdbDoRetentionImpl(pTsdb, now, 0, NULL); - if (code) goto _err; + taosThreadRwlockWrlock(&pTsdb->rwLock); - // commit - code = tsdbFSCommit(pTsdb->pFS); - if (code) goto _err; + code = tsdbFSCommit2(pTsdb, &fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + taosThreadRwlockUnlock(&pTsdb->rwLock); + + tsdbFSDestroy(&fs); _exit: return code; _err: tsdbError("vgId:%d tsdb do retention failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSRollback(pTsdb->pFS); + ASSERT(0); + // tsdbFSRollback(pTsdb->pFS); return code; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index fea0254045..43537c9a8d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -20,6 +20,7 @@ struct STsdbSnapReader { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // for data file int8_t dataDone; int32_t fid; @@ -45,7 +46,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { while (true) { if (pReader->pDataFReader == NULL) { - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->cState, pReader->fid, TD_GT); + SDFileSet* pSet = + taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); if (pSet == NULL) goto _exit; @@ -159,7 +161,7 @@ _err: static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pTsdb->pFS->cState->pDelFile; + SDelFile* pDelFile = pReader->fs.pDelFile; if (pReader->pDelFReader == NULL) { if (pDelFile == NULL) { @@ -254,6 +256,24 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapRe pReader->sver = sver; pReader->ever = ever; + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + + code = tsdbFSRef(pTsdb, &pReader->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + pReader->fid = INT32_MIN; pReader->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pReader->aBlockIdx == NULL) { @@ -305,6 +325,8 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { taosArrayDestroy(pReader->aDelIdx); taosArrayDestroy(pReader->aDelData); + tsdbFSUnref(pReader->pTsdb, &pReader->fs); + tsdbInfo("vgId:%d vnode snapshot tsdb reader closed", TD_VID(pReader->pTsdb->pVnode)); taosMemoryFree(pReader); @@ -358,6 +380,7 @@ struct STsdbSnapWriter { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // config int32_t minutes; @@ -798,7 +821,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL); if (code) goto _err; - code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, tsdbDataFWriterGetWSet(pWriter->pDataFWriter)); + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); if (code) goto _err; code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); @@ -843,7 +866,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 pWriter->fid = fid; // read - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, fid, TD_EQ); + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); if (pSet) { code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); if (code) goto _err; @@ -863,22 +886,26 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 tBlockDataReset(&pWriter->bDataR); // write - SDFileSet wSet; + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; if (pSet) { - wSet = (SDFileSet){.diskId = pSet->diskId, - .fid = fid, - .fHead = {.commitID = pWriter->commitID, .offset = 0, .size = 0}, - .fData = pSet->fData, - .fLast = {.commitID = pWriter->commitID, .size = 0}, - .fSma = pSet->fSma}; + wSet.diskId = pSet->diskId; + wSet.fid = fid; + fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; + fData = *pSet->pDataF; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fSma = *pSet->pSmaF; } else { - wSet = (SDFileSet){.diskId = (SDiskID){.level = 0, .id = 0}, - .fid = fid, - .fHead = {.commitID = pWriter->commitID, .offset = 0, .size = 0}, - .fData = {.commitID = pWriter->commitID, .size = 0}, - .fLast = {.commitID = pWriter->commitID, .size = 0}, - .fSma = {.commitID = pWriter->commitID, .size = 0}}; + wSet.diskId = (SDiskID){.level = 0, .id = 0}; + wSet.fid = fid; + fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; + fData = (SDataFile){.commitID = pWriter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fSma = (SSmaFile){.commitID = pWriter->commitID, .size = 0}; } code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); @@ -907,7 +934,7 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 STsdb* pTsdb = pWriter->pTsdb; if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->nState); + SDelFile* pDelFile = pWriter->fs.pDelFile; // reader if (pDelFile) { @@ -1017,7 +1044,7 @@ static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pWriter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); @@ -1051,6 +1078,9 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->sver = sver; pWriter->ever = ever; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + if (code) goto _err; + // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; @@ -1096,9 +1126,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr goto _err; } - code = tsdbFSBegin(pTsdb->pFS); - if (code) goto _err; - *ppWriter = pWriter; return code; @@ -1113,8 +1140,9 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { STsdbSnapWriter* pWriter = *ppWriter; if (rollback) { - code = tsdbFSRollback(pWriter->pTsdb->pFS); - if (code) goto _err; + ASSERT(0); + // code = tsdbFSRollback(pWriter->pTsdb->pFS); + // if (code) goto _err; } else { code = tsdbSnapWriteDataEnd(pWriter); if (code) goto _err; @@ -1122,7 +1150,10 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { code = tsdbSnapWriteDelEnd(pWriter); if (code) goto _err; - code = tsdbFSCommit(pWriter->pTsdb->pFS); + code = tsdbFSCommit1(pWriter->pTsdb, &pWriter->fs); + if (code) goto _err; + + code = tsdbFSCommit2(pWriter->pTsdb, &pWriter->fs); if (code) goto _err; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index e6d116dfef..e8d51555d8 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -316,7 +316,7 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { case TDMT_VND_TABLE_CFG: return vnodeGetTableCfg(pVnode, pMsg); case TDMT_VND_CONSUME: - return tqProcessPollReq(pVnode->pTq, pMsg, pInfo->workerId); + return tqProcessPollReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH: @@ -773,6 +773,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq terrno = TSDB_CODE_SUCCESS; pRsp->code = 0; + pSubmitReq->version = version; #ifdef TD_DEBUG_PRINT_ROW vnodeDebugPrintSubmitMsg(pVnode, pReq, __func__); @@ -791,7 +792,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq submitRsp.pArray = taosArrayInit(msgIter.numOfBlocks, sizeof(SSubmitBlkRsp)); newTbUids = taosArrayInit(msgIter.numOfBlocks, sizeof(int64_t)); - if (!submitRsp.pArray) { + if (!submitRsp.pArray || !newTbUids) { pRsp->code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 3d76a03647..c6ff913837 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -546,9 +546,10 @@ typedef struct SProjectOperatorInfo { SOptrBasicInfo binfo; SAggSupporter aggSup; SNode* pFilterNode; // filter info, which is push down by optimizer - SSDataBlock* existDataBlock; SArray* pPseudoColInfo; SLimitInfo limitInfo; + bool mergeDataBlocks; + SSDataBlock* pFinalRes; } SProjectOperatorInfo; typedef struct SIndefOperatorInfo { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 0d547c2a14..7d3f16aedf 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -131,7 +131,8 @@ int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numO return code; } -qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* readers, int32_t* numOfCols, SSchemaWrapper** pSchemaWrapper) { +qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* readers, int32_t* numOfCols, + SSchemaWrapper** pSchemaWrapper) { if (msg == NULL) { // TODO create raw scan return NULL; @@ -157,7 +158,7 @@ qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* readers, int32_t* n SDataBlockDescNode* pDescNode = pPlan->pNode->pOutputDataBlockDesc; *numOfCols = 0; - SNode* pNode; + SNode* pNode; FOREACH(pNode, pDescNode->pSlots) { SSlotDescNode* pSlotDesc = (SSlotDescNode*)pNode; if (pSlotDesc->output) { @@ -605,6 +606,9 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { SStreamScanInfo* pInfo = pOperator->info; if (pOffset->type == TMQ_OFFSET__LOG) { + STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTSInfo->dataReader); + pTSInfo->dataReader = NULL; #if 0 if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.lastStatus) && pInfo->tqReader->pWalReader->curVersion != pOffset->version) { @@ -631,11 +635,20 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { return -1; } } + /*if (pTaskInfo->streamInfo.lastStatus.type != TMQ_OFFSET__SNAPSHOT_DATA ||*/ /*pTaskInfo->streamInfo.lastStatus.uid != uid || pTaskInfo->streamInfo.lastStatus.ts != ts) {*/ STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; int32_t tableSz = taosArrayGetSize(pTaskInfo->tableqinfoList.pTableList); - bool found = false; + +#ifndef NDEBUG + + qDebug("switch to next table %ld (cursor %d), %ld rows returned", uid, pTableScanInfo->currentTable, + pInfo->pTableScanOp->resultInfo.totalRows); + pInfo->pTableScanOp->resultInfo.totalRows = 0; +#endif + + bool found = false; for (int32_t i = 0; i < tableSz; i++) { STableKeyInfo* pTableInfo = taosArrayGet(pTaskInfo->tableqinfoList.pTableList, i); if (pTableInfo->uid == uid) { @@ -648,6 +661,14 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { // TODO after dropping table, table may be not found ASSERT(found); + if (pTableScanInfo->dataReader == NULL) { + if (tsdbReaderOpen(pTableScanInfo->readHandle.vnode, &pTableScanInfo->cond, + pTaskInfo->tableqinfoList.pTableList, &pTableScanInfo->dataReader, NULL) < 0 || + pTableScanInfo->dataReader == NULL) { + ASSERT(0); + } + } + tsdbSetTableId(pTableScanInfo->dataReader, uid); int64_t oldSkey = pTableScanInfo->cond.twindows.skey; pTableScanInfo->cond.twindows.skey = ts + 1; @@ -672,6 +693,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { return 0; } + #if 0 int32_t qStreamPrepareTsdbScan(qTaskInfo_t tinfo, uint64_t uid, int64_t ts) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index d3bfac82b6..012d25a0c9 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -376,9 +376,7 @@ void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow colDataAppendInt64(pColData, 4, &pQueryWindow->ekey); } -void cleanupExecTimeWindowInfo(SColumnInfoData* pColData) { - colDataDestroy(pColData); -} +void cleanupExecTimeWindowInfo(SColumnInfoData* pColData) { colDataDestroy(pColData); } void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, STimeWindow* pWin, SColumnInfoData* pTimeWindowData, int32_t offset, int32_t forwardStep, TSKEY* tsCol, @@ -524,8 +522,8 @@ static int32_t doSetInputDataBlock(SOperatorInfo* pOperator, SqlFunctionCtx* pCt // NOTE: the last parameter is the primary timestamp column // todo: refactor this if (fmIsTimelineFunc(pCtx[i].functionId) && (j == pOneExpr->base.numOfParams - 1)) { - pInput->pPTS = pInput->pData[j]; // in case of merge function, this is not always the ts column data. -// ASSERT(pInput->pPTS->info.type == TSDB_DATA_TYPE_TIMESTAMP); + pInput->pPTS = pInput->pData[j]; // in case of merge function, this is not always the ts column data. + // ASSERT(pInput->pPTS->info.type == TSDB_DATA_TYPE_TIMESTAMP); } ASSERT(pInput->pData[j] != NULL); } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) { @@ -633,7 +631,7 @@ int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBloc ASSERT(pResult->info.capacity > 0); colDataMergeCol(pResColData, startOffset, &pResult->info.capacity, &idata, dest.numOfRows); colDataDestroy(&idata); - + numOfRows = dest.numOfRows; taosArrayDestroy(pBlockList); } else if (pExpr[k].pExpr->nodeType == QUERY_NODE_FUNCTION) { @@ -835,7 +833,7 @@ void setTaskKilled(SExecTaskInfo* pTaskInfo) { pTaskInfo->code = TSDB_CODE_TSC_Q ///////////////////////////////////////////////////////////////////////////////////////////// STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key) { - STimeWindow win = {0}; + STimeWindow win = {0}; win.skey = taosTimeTruncate(key, pInterval, precision); /* @@ -2374,7 +2372,7 @@ static SSDataBlock* doLoadRemoteData(SOperatorInfo* pOperator) { return NULL; } - while(1) { + while (1) { SSDataBlock* pBlock = doLoadRemoteDataImpl(pOperator); if (pBlock == NULL) { return NULL; @@ -2870,7 +2868,7 @@ int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scan *order = TSDB_ORDER_ASC; *scanFlag = MAIN_SCAN; return TSDB_CODE_SUCCESS; - } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + } else if (type == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN) { STableScanInfo* pTableScanInfo = pOperator->info; *order = pTableScanInfo->cond.order; *scanFlag = pTableScanInfo->scanFlag; @@ -3187,8 +3185,7 @@ int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) { return TDB_CODE_SUCCESS; } -int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDataBlock* pBlock, SSDataBlock** pExistedBlock, - bool holdDataInBuf) { +int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDataBlock* pBlock, bool holdDataInBuf) { if (pLimitInfo->remainGroupOffset > 0) { if (pLimitInfo->currentGroupId == 0) { // it is the first group pLimitInfo->currentGroupId = pBlock->info.groupId; @@ -3223,8 +3220,6 @@ int32_t handleLimitOffset(SOperatorInfo* pOperator, SLimitInfo* pLimitInfo, SSDa pLimitInfo->numOfOutputRows = 0; pLimitInfo->remainOffset = pLimitInfo->limit.offset; - *pExistedBlock = pBlock; - // existing rows that belongs to previous group. if (pBlock->info.rows > 0) { return PROJECT_RETRIEVE_DONE; @@ -3273,7 +3268,9 @@ static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { SExprSupp* pSup = &pOperator->exprSupp; SSDataBlock* pRes = pInfo->pRes; - blockDataCleanup(pRes); + SSDataBlock* pFinalRes = pProjectInfo->pFinalRes; + + blockDataCleanup(pFinalRes); SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; if (pOperator->status == OP_EXEC_DONE) { @@ -3284,24 +3281,6 @@ static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { return NULL; } -#if 0 - if (pProjectInfo->existDataBlock) { // TODO refactor - SSDataBlock* pBlock = pProjectInfo->existDataBlock; - pProjectInfo->existDataBlock = NULL; - - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pOperator, pInfo->pCtx, pBlock, TSDB_ORDER_ASC); - - blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows); - projectApplyFunctions(pOperator->exprSupp.pExprInfo, pInfo->pRes, pBlock, pInfo->pCtx, pOperator->exprSupp.numOfExprs); - if (pRes->info.rows >= pProjectInfo->binfo.capacity * 0.8) { - copyTsColoum(pRes, pInfo->pCtx, pOperator->exprSupp.numOfExprs); - resetResultRowEntryResult(pInfo->pCtx, pOperator->exprSupp.numOfExprs); - return pRes; - } - } -#endif - int64_t st = 0; int32_t order = 0; int32_t scanFlag = 0; @@ -3311,61 +3290,131 @@ static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { } SOperatorInfo* downstream = pOperator->pDownstream[0]; + SLimitInfo* pLimitInfo = &pProjectInfo->limitInfo; - while (1) { - // The downstream exec may change the value of the newgroup, so use a local variable instead. - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); - if (pBlock == NULL) { - doSetOperatorCompleted(pOperator); + while(1) { + while (1) { + blockDataCleanup(pRes); + + // The downstream exec may change the value of the newgroup, so use a local variable instead. + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + doSetOperatorCompleted(pOperator); + break; + } + + if (pBlock->info.type == STREAM_RETRIEVE) { + // for stream interval + return pBlock; + } + + if (pLimitInfo->remainGroupOffset > 0) { + if (pLimitInfo->currentGroupId == 0 || pLimitInfo->currentGroupId == pBlock->info.groupId) { // it is the first group + pLimitInfo->currentGroupId = pBlock->info.groupId; + continue; + } else if (pLimitInfo->currentGroupId != pBlock->info.groupId) { + // now it is the data from a new group + pLimitInfo->remainGroupOffset -= 1; + pLimitInfo->currentGroupId = pBlock->info.groupId; + + // ignore data block in current group + if (pLimitInfo->remainGroupOffset > 0) { + continue; + } + } + + // set current group id of the project operator + pLimitInfo->currentGroupId = pBlock->info.groupId; + } + + // remainGroupOffset == 0 + // here check for a new group data, we need to handle the data of the previous group. + if (pLimitInfo->currentGroupId != 0 && pLimitInfo->currentGroupId != pBlock->info.groupId) { + pLimitInfo->numOfOutputGroups += 1; + if ((pLimitInfo->slimit.limit > 0) && (pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups)) { + doSetOperatorCompleted(pOperator); + break; + } + + // reset the value for a new group data + // existing rows that belongs to previous group. + pLimitInfo->numOfOutputRows = 0; + pLimitInfo->remainOffset = pLimitInfo->limit.offset; + } + + // the pDataBlock are always the same one, no need to call this again + int32_t code = getTableScanInfo(pOperator->pDownstream[0], &order, &scanFlag); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + setInputDataBlock(pOperator, pSup->pCtx, pBlock, order, scanFlag, false); + blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); + + code = projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, + pProjectInfo->pPseudoColInfo); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } + + // set current group id + pLimitInfo->currentGroupId = pBlock->info.groupId; + + if (pLimitInfo->remainOffset >= pInfo->pRes->info.rows) { + pLimitInfo->remainOffset -= pInfo->pRes->info.rows; + blockDataCleanup(pInfo->pRes); + continue; + } else if (pLimitInfo->remainOffset < pInfo->pRes->info.rows && pLimitInfo->remainOffset > 0) { + blockDataTrimFirstNRows(pInfo->pRes, pLimitInfo->remainOffset); + pLimitInfo->remainOffset = 0; + } + + // check for the limitation in each group + if (pLimitInfo->limit.limit >= 0 && + pLimitInfo->numOfOutputRows + pInfo->pRes->info.rows >= pLimitInfo->limit.limit) { + int32_t keepRows = (int32_t)(pLimitInfo->limit.limit - pLimitInfo->numOfOutputRows); + blockDataKeepFirstNRows(pInfo->pRes, keepRows); + if (pLimitInfo->slimit.limit > 0 && pLimitInfo->slimit.limit <= pLimitInfo->numOfOutputGroups) { + pOperator->status = OP_EXEC_DONE; + } + } + + pLimitInfo->numOfOutputRows += pInfo->pRes->info.rows; break; } - if (pBlock->info.type == STREAM_RETRIEVE) { - // for stream interval - return pBlock; - } - - // the pDataBlock are always the same one, no need to call this again - int32_t code = getTableScanInfo(pOperator->pDownstream[0], &order, &scanFlag); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } - - setInputDataBlock(pOperator, pSup->pCtx, pBlock, order, scanFlag, false); - blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); - - code = projectApplyFunctions(pSup->pExprInfo, pInfo->pRes, pBlock, pSup->pCtx, pSup->numOfExprs, - pProjectInfo->pPseudoColInfo); - if (code != TSDB_CODE_SUCCESS) { - longjmp(pTaskInfo->env, code); - } - - int32_t status = handleLimitOffset(pOperator, &pProjectInfo->limitInfo, pInfo->pRes, true); - - // filter shall be applied after apply functions and limit/offset on the result - doFilter(pProjectInfo->pFilterNode, pInfo->pRes); - - if (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) { + // no results generated + if (pInfo->pRes->info.rows == 0 || (!pProjectInfo->mergeDataBlocks)) { break; } - if (status == PROJECT_RETRIEVE_CONTINUE || pInfo->pRes->info.rows == 0) { - continue; - } else if (status == PROJECT_RETRIEVE_DONE) { + if (pProjectInfo->mergeDataBlocks) { + if (pFinalRes->info.rows + pInfo->pRes->info.rows <= pOperator->resultInfo.threshold) { + pFinalRes->info.groupId = pInfo->pRes->info.groupId; + pFinalRes->info.version = pInfo->pRes->info.version; + + // continue merge data, ignore the group id + blockDataMerge(pFinalRes, pInfo->pRes); + continue; + } + } + + // do apply filter + SSDataBlock* p = pProjectInfo->mergeDataBlocks ? pFinalRes : pRes; + doFilter(pProjectInfo->pFilterNode, p); + if (p->info.rows > 0) { break; } } - size_t rows = pInfo->pRes->info.rows; - pProjectInfo->limitInfo.numOfOutputRows += rows; - - pOperator->resultInfo.totalRows += rows; + SSDataBlock* p = pProjectInfo->mergeDataBlocks ? pFinalRes : pRes; + pOperator->resultInfo.totalRows += p->info.rows; if (pOperator->cost.openCost == 0) { pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; } - return (rows > 0) ? pInfo->pRes : NULL; + return (p->info.rows > 0) ? p : NULL; } static void doHandleRemainBlockForNewGroupImpl(SFillOperatorInfo* pInfo, SResultInfo* pResultInfo, @@ -3431,13 +3480,13 @@ static SSDataBlock* doFillImpl(SOperatorInfo* pOperator) { blockDataUpdateTsWindow(pBlock, pInfo->primaryTsCol); if (pInfo->curGroupId == 0 || pInfo->curGroupId == pBlock->info.groupId) { - pInfo->curGroupId = pBlock->info.groupId; // the first data block + pInfo->curGroupId = pBlock->info.groupId; // the first data block pInfo->totalInputRows += pBlock->info.rows; taosFillSetStartInfo(pInfo->pFillInfo, pBlock->info.rows, pBlock->info.window.ekey); taosFillSetInputDataBlock(pInfo->pFillInfo, pBlock); - } else if (pInfo->curGroupId != pBlock->info.groupId) { // the new group data block + } else if (pInfo->curGroupId != pBlock->info.groupId) { // the new group data block pInfo->existNewGroupBlock = pBlock; // Fill the previous group data block, before handle the data block of new group. @@ -3510,7 +3559,7 @@ static SSDataBlock* doFill(SOperatorInfo* pOperator) { static void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs) { for (int32_t i = 0; i < numOfExprs; ++i) { SExprInfo* pExprInfo = &pExpr[i]; - for(int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) { + for (int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) { if (pExprInfo->base.pParam[j].type == FUNC_PARAM_TYPE_COLUMN) { taosMemoryFreeClear(pExprInfo->base.pParam[j].pCol); } @@ -3603,7 +3652,7 @@ int32_t initAggInfo(SExprSupp* pSup, SAggSupporter* pAggSup, SExprInfo* pExprInf return TSDB_CODE_SUCCESS; } -void initResultSizeInfo(SResultInfo * pResultInfo, int32_t numOfRows) { +void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows) { ASSERT(numOfRows != 0); pResultInfo->capacity = numOfRows; pResultInfo->threshold = numOfRows * 0.75; @@ -3723,7 +3772,6 @@ void destroyBasicOperatorInfo(void* param, int32_t numOfOutput) { taosMemoryFreeClear(param); } - static void freeItem(void* pItem) { void** p = pItem; if (*p != NULL) { @@ -3758,6 +3806,7 @@ static void destroyProjectOperatorInfo(void* param, int32_t numOfOutput) { cleanupAggSup(&pInfo->aggSup); taosArrayDestroy(pInfo->pPseudoColInfo); + blockDataDestroy(pInfo->pFinalRes); taosMemoryFreeClear(param); } @@ -3817,7 +3866,10 @@ SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhys initLimitInfo(pProjPhyNode->node.pLimit, pProjPhyNode->node.pSlimit, &pInfo->limitInfo); pInfo->binfo.pRes = pResBlock; + pInfo->pFinalRes = createOneDataBlock(pResBlock, false); + pInfo->pFilterNode = pProjPhyNode->node.pConditions; + pInfo->mergeDataBlocks = pProjPhyNode->mergeDataBlock; int32_t numOfRows = 4096; size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; @@ -4050,8 +4102,8 @@ static int32_t initFillInfo(SFillOperatorInfo* pInfo, SExprInfo* pExpr, int32_t w = getFirstQualifiedTimeWindow(win.skey, &w, pInterval, TSDB_ORDER_ASC); int32_t order = TSDB_ORDER_ASC; - pInfo->pFillInfo = taosCreateFillInfo(order, w.skey, 0, capacity, numOfCols, pInterval, - fillType, pColInfo, pInfo->primaryTsCol, id); + pInfo->pFillInfo = + taosCreateFillInfo(order, w.skey, 0, capacity, numOfCols, pInterval, fillType, pColInfo, pInfo->primaryTsCol, id); pInfo->win = win; pInfo->p = taosMemoryCalloc(numOfCols, POINTER_BYTES); @@ -4065,7 +4117,8 @@ static int32_t initFillInfo(SFillOperatorInfo* pInfo, SExprInfo* pExpr, int32_t } } -SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo) { +SOperatorInfo* createFillOperatorInfo(SOperatorInfo* downstream, SFillPhysiNode* pPhyFillNode, + SExecTaskInfo* pTaskInfo) { SFillOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SFillOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -4145,8 +4198,8 @@ int32_t extractTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, metaReaderInit(&mr, pHandle->meta, 0); int32_t code = metaGetTableEntryByUid(&mr, pScanNode->uid); if (code != TSDB_CODE_SUCCESS) { - qError("failed to get the table meta, uid:0x%"PRIx64", suid:0x%"PRIx64 ", %s", pScanNode->uid, pScanNode->suid, - GET_TASKID(pTaskInfo)); + qError("failed to get the table meta, uid:0x%" PRIx64 ", suid:0x%" PRIx64 ", %s", pScanNode->uid, pScanNode->suid, + GET_TASKID(pTaskInfo)); metaReaderClear(&mr); return terrno; @@ -4176,11 +4229,11 @@ int32_t extractTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, } SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode) { - int32_t numOfCols = LIST_LENGTH(pScanNode->pScanCols); + int32_t numOfCols = LIST_LENGTH(pScanNode->pScanCols); SSchemaWrapper* pqSw = taosMemoryCalloc(1, sizeof(SSchemaWrapper)); pqSw->pSchema = taosMemoryCalloc(numOfCols, sizeof(SSchema)); - for(int32_t i = 0; i < numOfCols; ++i) { + for (int32_t i = 0; i < numOfCols; ++i) { STargetNode* pNode = (STargetNode*)nodesListGetNode(pScanNode->pScanCols, i); SColumnNode* pColNode = (SColumnNode*)pNode->pExpr; @@ -4329,21 +4382,23 @@ static int32_t initTableblockDistQueryCond(uint64_t uid, SQueryTableDataCond* pC pCond->suid = uid; pCond->type = BLOCK_LOAD_OFFSET_ORDER; pCond->startVersion = -1; - pCond->endVersion = -1; + pCond->endVersion = -1; return TSDB_CODE_SUCCESS; } SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, - STableListInfo* pTableListInfo, SNode* pTagCond, SNode* pTagIndexCond, const char* pUser) { + STableListInfo* pTableListInfo, SNode* pTagCond, SNode* pTagIndexCond, + const char* pUser) { int32_t type = nodeType(pPhyNode); if (pPhyNode->pChildren == NULL || LIST_LENGTH(pPhyNode->pChildren) == 0) { if (QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN == type) { STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; - int32_t code = createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, - pTableScanNode->groupSort, pHandle, pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); + int32_t code = + createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, pHandle, + pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); if (code) { pTaskInfo->code = code; return NULL; @@ -4362,8 +4417,9 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } else if (QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN == type) { STableMergeScanPhysiNode* pTableScanNode = (STableMergeScanPhysiNode*)pPhyNode; - int32_t code = createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, - pTableScanNode->groupSort, pHandle, pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); + int32_t code = + createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, pHandle, + pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); if (code) { pTaskInfo->code = code; return NULL; @@ -4375,8 +4431,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo return NULL; } - SOperatorInfo* pOperator = - createTableMergeScanOperatorInfo(pTableScanNode, pTableListInfo, pHandle, pTaskInfo); + SOperatorInfo* pOperator = createTableMergeScanOperatorInfo(pTableScanNode, pTableListInfo, pHandle, pTaskInfo); STableScanInfo* pScanInfo = pOperator->info; pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder; @@ -4393,13 +4448,22 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo }; if (pHandle->vnode) { - int32_t code = createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, - pTableScanNode->groupSort, pHandle, pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); + int32_t code = + createScanTableListInfo(&pTableScanNode->scan, pTableScanNode->pGroupTags, pTableScanNode->groupSort, + pHandle, pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); if (code) { pTaskInfo->code = code; return NULL; } + +#ifndef NDEBUG + int32_t sz = taosArrayGetSize(pTableListInfo->pTableList); + for (int32_t i = 0; i < sz; i++) { + STableKeyInfo* pKeyInfo = taosArrayGet(pTableListInfo->pTableList, i); + qDebug("creating stream task: add table %ld", pKeyInfo->uid); + } } +#endif pTaskInfo->schemaInfo.qsw = extractQueriedColumnSchema(&pTableScanNode->scan); SOperatorInfo* pOperator = createStreamScanOperatorInfo(pHandle, pTableScanNode, pTagCond, &aggSup, pTaskInfo); @@ -4434,7 +4498,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } SQueryTableDataCond cond = {0}; - int32_t code = initTableblockDistQueryCond(pBlockNode->suid, &cond); + int32_t code = initTableblockDistQueryCond(pBlockNode->suid, &cond); if (code != TSDB_CODE_SUCCESS) { return NULL; } @@ -4447,7 +4511,8 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } else if (QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN == type) { SLastRowScanPhysiNode* pScanNode = (SLastRowScanPhysiNode*)pPhyNode; - int32_t code = createScanTableListInfo(&pScanNode->scan, pScanNode->pGroupTags, true, pHandle, pTableListInfo, pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); + int32_t code = createScanTableListInfo(&pScanNode->scan, pScanNode->pGroupTags, true, pHandle, pTableListInfo, + pTagCond, pTagIndexCond, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { pTaskInfo->code = code; return NULL; @@ -4909,7 +4974,8 @@ int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SRead (*pTaskInfo)->sql = sql; (*pTaskInfo)->pSubplan = pPlan; - (*pTaskInfo)->pRoot = createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, &(*pTaskInfo)->tableqinfoList, pPlan->pTagCond, pPlan->pTagIndexCond, pPlan->user); + (*pTaskInfo)->pRoot = createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, &(*pTaskInfo)->tableqinfoList, + pPlan->pTagCond, pPlan->pTagIndexCond, pPlan->user); if (NULL == (*pTaskInfo)->pRoot) { code = (*pTaskInfo)->code; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 2c206fbc12..9ecf6b96d7 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -740,7 +740,7 @@ static SSDataBlock* doBlockInfoScan(SOperatorInfo* pOperator) { static void destroyBlockDistScanOperatorInfo(void* param, int32_t numOfOutput) { SBlockDistInfo* pDistInfo = (SBlockDistInfo*)param; blockDataDestroy(pDistInfo->pResBlock); - + tsdbReaderClose(pDistInfo->pHandle); taosMemoryFreeClear(param); } @@ -982,6 +982,9 @@ static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32 if (!pResult) { blockDataCleanup(pSDB); *pRowIndex = 0; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; return NULL; } @@ -1003,6 +1006,9 @@ static SSDataBlock* doDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_ } if (!pResult) { pInfo->updateWin = (STimeWindow){.skey = INT64_MIN, .ekey = INT64_MAX}; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; return NULL; } @@ -2047,8 +2053,8 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { uint64_t suid = pInfo->pCur->mr.me.ctbEntry.suid; int32_t code = metaGetTableEntryByUid(&mr, suid); if (code != TSDB_CODE_SUCCESS) { - qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", - pInfo->pCur->mr.me.name, suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); + qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", pInfo->pCur->mr.me.name, + suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); metaReaderClear(&mr); metaCloseTbCursor(pInfo->pCur); pInfo->pCur = NULL; @@ -2154,7 +2160,6 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { } } - static SSDataBlock* sysTableScanUserSTables(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SSysTableScanInfo* pInfo = pOperator->info; @@ -2180,12 +2185,13 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { getDBNameFromCondition(pInfo->pCondition, dbName); sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); } - + if (strncasecmp(name, TSDB_INS_TABLE_USER_TABLES, TSDB_TABLE_FNAME_LEN) == 0) { return sysTableScanUserTables(pOperator); } else if (strncasecmp(name, TSDB_INS_TABLE_USER_TAGS, TSDB_TABLE_FNAME_LEN) == 0) { return sysTableScanUserTags(pOperator); - } else if (strncasecmp(name, TSDB_INS_TABLE_USER_STABLES, TSDB_TABLE_FNAME_LEN) == 0 && IS_SYS_DBNAME(pInfo->req.db)) { + } else if (strncasecmp(name, TSDB_INS_TABLE_USER_STABLES, TSDB_TABLE_FNAME_LEN) == 0 && + IS_SYS_DBNAME(pInfo->req.db)) { return sysTableScanUserSTables(pOperator); } else { // load the meta from mnode of the given epset if (pOperator->status == OP_EXEC_DONE) { diff --git a/source/libs/function/inc/functionMgtInt.h b/source/libs/function/inc/functionMgtInt.h index 0b4fdefec4..c79306f1e4 100644 --- a/source/libs/function/inc/functionMgtInt.h +++ b/source/libs/function/inc/functionMgtInt.h @@ -47,6 +47,7 @@ extern "C" { #define FUNC_MGT_SYSTEM_INFO_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(18) #define FUNC_MGT_CLIENT_PC_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(19) #define FUNC_MGT_MULTI_ROWS_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(20) +#define FUNC_MGT_KEEP_ORDER_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(21) #define FUNC_MGT_TEST_MASK(val, mask) (((val) & (mask)) != 0) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 1d95d58a57..567941c157 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2097,7 +2097,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "top", .type = FUNCTION_TYPE_TOP, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateTopBot, .getEnvFunc = getTopBotFuncEnv, .initFunc = topBotFunctionSetup, @@ -2112,7 +2112,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "bottom", .type = FUNCTION_TYPE_BOTTOM, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateTopBot, .getEnvFunc = getTopBotFuncEnv, .initFunc = topBotFunctionSetup, @@ -2480,7 +2480,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "sample", .type = FUNCTION_TYPE_SAMPLE, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateSample, .getEnvFunc = getSampleFuncEnv, .initFunc = sampleFunctionSetup, @@ -2906,7 +2906,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "_select_value", .type = FUNCTION_TYPE_SELECT_VALUE, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_KEEP_ORDER_FUNC, .translateFunc = translateSelectValue, .getEnvFunc = getSelectivityFuncEnv, // todo remove this function later. .initFunc = functionSetup, diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index ff74c880e3..c173522683 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -183,6 +183,8 @@ bool fmIsClientPseudoColumnFunc(int32_t funcId) { return isSpecificClassifyFunc( bool fmIsMultiRowsFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_MULTI_ROWS_FUNC); } +bool fmIsKeepOrderFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_KEEP_ORDER_FUNC); } + bool fmIsInterpFunc(int32_t funcId) { if (funcId < 0 || funcId >= funcMgtBuiltinsNum) { return false; diff --git a/source/libs/parser/src/parInsert.c b/source/libs/parser/src/parInsert.c index 05e8c1094d..8fa8fd4cf8 100644 --- a/source/libs/parser/src/parInsert.c +++ b/source/libs/parser/src/parInsert.c @@ -434,8 +434,12 @@ static FORCE_INLINE int32_t checkAndTrimValue(SToken* pToken, char* tmpTokenBuf, } static bool isNullStr(SToken* pToken) { - return (pToken->type == TK_NULL) || ((pToken->type == TK_NK_STRING) && (pToken->n != 0) && - (strncasecmp(TSDB_DATA_NULL_STR_L, pToken->z, pToken->n) == 0)); + return ((pToken->type == TK_NK_STRING) && (pToken->n != 0) && + (strncasecmp(TSDB_DATA_NULL_STR_L, pToken->z, pToken->n) == 0)); +} + +static bool isNullValue(int8_t dataType, SToken* pToken) { + return TK_NULL == pToken->type || (!IS_STR_DATA_TYPE(dataType) && isNullStr(pToken)); } static FORCE_INLINE int32_t toDouble(SToken* pToken, double* value, char** endPtr) { @@ -461,7 +465,7 @@ static int32_t parseValueToken(char** end, SToken* pToken, SSchema* pSchema, int return code; } - if (isNullStr(pToken)) { + if (isNullValue(pSchema->type, pToken)) { if (TSDB_DATA_TYPE_TIMESTAMP == pSchema->type && PRIMARYKEY_TIMESTAMP_COL_ID == pSchema->colId) { return buildSyntaxErrMsg(pMsgBuf, "primary timestamp should not be null", pToken->z); } @@ -735,11 +739,12 @@ static int32_t parseBoundColumns(SInsertParseContext* pCxt, SParsedDataColInfo* return TSDB_CODE_SUCCESS; } -static void buildCreateTbReq(SVCreateTbReq* pTbReq, const char* tname, STag* pTag, int64_t suid, const char* sname, SArray* tagName) { +static void buildCreateTbReq(SVCreateTbReq* pTbReq, const char* tname, STag* pTag, int64_t suid, const char* sname, + SArray* tagName) { pTbReq->type = TD_CHILD_TABLE; pTbReq->name = strdup(tname); pTbReq->ctb.suid = suid; - if(sname) pTbReq->ctb.name = strdup(sname); + if (sname) pTbReq->ctb.name = strdup(sname); pTbReq->ctb.pTag = (uint8_t*)pTag; pTbReq->ctb.tagName = taosArrayDup(tagName); pTbReq->commentLen = -1; @@ -753,7 +758,7 @@ static int32_t parseTagToken(char** end, SToken* pToken, SSchema* pSchema, int16 uint64_t uv; char* endptr = NULL; - if (isNullStr(pToken)) { + if (isNullValue(pSchema->type, pToken)) { if (TSDB_DATA_TYPE_TIMESTAMP == pSchema->type && PRIMARYKEY_TIMESTAMP_COL_ID == pSchema->colId) { return buildSyntaxErrMsg(pMsgBuf, "primary timestamp should not be null", pToken->z); } @@ -761,7 +766,7 @@ static int32_t parseTagToken(char** end, SToken* pToken, SSchema* pSchema, int16 return TSDB_CODE_SUCCESS; } -// strcpy(val->colName, pSchema->name); + // strcpy(val->colName, pSchema->name); val->cid = pSchema->colId; val->type = pSchema->type; @@ -971,7 +976,7 @@ static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pSchema, uint goto end; } - if (!isNullStr(&sToken)) { + if (!isNullValue(pTagSchema->type, &sToken)) { taosArrayPush(tagName, pTagSchema->name); } if (pTagSchema->type == TSDB_DATA_TYPE_JSON) { @@ -980,7 +985,7 @@ static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pSchema, uint taosMemoryFree(tmpTokenBuf); goto end; } - if (isNullStr(&sToken)) { + if (isNullValue(pTagSchema->type, &sToken)) { code = tTagNew(pTagVals, 1, true, &pTag); } else { code = parseJsontoTagData(sToken.z, pTagVals, &pTag, &pCxt->msg); @@ -1200,7 +1205,7 @@ static int parseOneRow(SInsertParseContext* pCxt, STableDataBlocks* pDataBlocks, *gotRow = true; #ifdef TD_DEBUG_PRINT_ROW - STSchema* pSTSchema = tdGetSTSChemaFromSSChema(&schema, spd->numOfCols); + STSchema* pSTSchema = tdGetSTSChemaFromSSChema(schema, spd->numOfCols, 1); tdSRowPrint(row, pSTSchema, __func__); taosMemoryFree(pSTSchema); #endif @@ -1321,7 +1326,11 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, TdFilePtr fp, STableDataB static int32_t parseDataFromFile(SInsertParseContext* pCxt, SToken filePath, STableDataBlocks* dataBuf) { char filePathStr[TSDB_FILENAME_LEN] = {0}; - strncpy(filePathStr, filePath.z, filePath.n); + if (TK_NK_STRING == filePath.type) { + trimString(filePath.z, filePath.n, filePathStr, sizeof(filePathStr)); + } else { + strncpy(filePathStr, filePath.z, filePath.n); + } TdFilePtr fp = taosOpenFile(filePathStr, TD_FILE_READ | TD_FILE_STREAM); if (NULL == fp) { return TAOS_SYSTEM_ERROR(errno); @@ -1556,7 +1565,7 @@ int32_t parseInsertSql(SParseContext* pContext, SQuery** pQuery, SParseMetaCache } else { nodesDestroyNode((*pQuery)->pRoot); } - + (*pQuery)->execMode = QUERY_EXEC_MODE_SCHEDULE; (*pQuery)->haveResultSet = false; (*pQuery)->msgType = TDMT_VND_SUBMIT; @@ -1804,8 +1813,8 @@ int32_t qBuildStmtOutput(SQuery* pQuery, SHashObj* pVgHash, SHashObj* pBlockHash return TSDB_CODE_SUCCESS; } -int32_t qBindStmtTagsValue(void* pBlock, void* boundTags, int64_t suid, const char* sTableName, char* tName, TAOS_MULTI_BIND* bind, - char* msgBuf, int32_t msgBufLen) { +int32_t qBindStmtTagsValue(void* pBlock, void* boundTags, int64_t suid, const char* sTableName, char* tName, + TAOS_MULTI_BIND* bind, char* msgBuf, int32_t msgBufLen) { STableDataBlocks* pDataBlock = (STableDataBlocks*)pBlock; SMsgBuf pBuf = {.buf = msgBuf, .len = msgBufLen}; SParsedDataColInfo* tags = (SParsedDataColInfo*)boundTags; @@ -1856,7 +1865,7 @@ int32_t qBindStmtTagsValue(void* pBlock, void* boundTags, int64_t suid, const ch } } else { STagVal val = {.cid = pTagSchema->colId, .type = pTagSchema->type}; -// strcpy(val.colName, pTagSchema->name); + // strcpy(val.colName, pTagSchema->name); if (pTagSchema->type == TSDB_DATA_TYPE_BINARY) { val.pData = (uint8_t*)bind[c].buffer; val.nData = colLen; @@ -1972,7 +1981,7 @@ int32_t qBindStmtColsValue(void* pBlock, TAOS_MULTI_BIND* bind, char* msgBuf, in } } #ifdef TD_DEBUG_PRINT_ROW - STSchema* pSTSchema = tdGetSTSChemaFromSSChema(&pSchema, spd->numOfCols); + STSchema* pSTSchema = tdGetSTSChemaFromSSChema(pSchema, spd->numOfCols, 1); tdSRowPrint(row, pSTSchema, __func__); taosMemoryFree(pSTSchema); #endif @@ -2057,7 +2066,7 @@ int32_t qBindStmtSingleColValue(void* pBlock, TAOS_MULTI_BIND* bind, char* msgBu #ifdef TD_DEBUG_PRINT_ROW if (rowEnd) { - STSchema* pSTSchema = tdGetSTSChemaFromSSChema(&pSchema, spd->numOfCols); + STSchema* pSTSchema = tdGetSTSChemaFromSSChema(pSchema, spd->numOfCols, 1); tdSRowPrint(row, pSTSchema, __func__); taosMemoryFree(pSTSchema); } @@ -2247,7 +2256,8 @@ static int32_t smlBoundColumnData(SArray* cols, SParsedDataColInfo* pColList, SS * @param msg * @return int32_t */ -static int32_t smlBuildTagRow(SArray* cols, SParsedDataColInfo* tags, SSchema* pSchema, STag** ppTag, SArray** tagName, SMsgBuf* msg) { +static int32_t smlBuildTagRow(SArray* cols, SParsedDataColInfo* tags, SSchema* pSchema, STag** ppTag, SArray** tagName, + SMsgBuf* msg) { SArray* pTagArray = taosArrayInit(tags->numOfBound, sizeof(STagVal)); if (!pTagArray) { return TSDB_CODE_TSC_OUT_OF_MEMORY; @@ -2264,7 +2274,7 @@ static int32_t smlBuildTagRow(SArray* cols, SParsedDataColInfo* tags, SSchema* p taosArrayPush(*tagName, pTagSchema->name); STagVal val = {.cid = pTagSchema->colId, .type = pTagSchema->type}; -// strcpy(val.colName, pTagSchema->name); + // strcpy(val.colName, pTagSchema->name); if (pTagSchema->type == TSDB_DATA_TYPE_BINARY) { val.pData = (uint8_t*)kv->value; val.nData = kv->length; @@ -2320,7 +2330,7 @@ int32_t smlBindData(void* handle, SArray* tags, SArray* colsSchema, SArray* cols buildInvalidOperationMsg(&pBuf, "bound tags error"); return ret; } - STag* pTag = NULL; + STag* pTag = NULL; SArray* tagName = NULL; ret = smlBuildTagRow(tags, &smlHandle->tableExecHandle.tags, pTagsSchema, &pTag, &tagName, &pBuf); if (ret != TSDB_CODE_SUCCESS) { @@ -2404,9 +2414,9 @@ int32_t smlBindData(void* handle, SArray* tags, SArray* colsSchema, SArray* cols } else { int32_t colLen = kv->length; if (pColSchema->type == TSDB_DATA_TYPE_TIMESTAMP) { -// uError("SML:data before:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); + // uError("SML:data before:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); kv->i = convertTimePrecision(kv->i, TSDB_TIME_PRECISION_NANO, pTableMeta->tableInfo.precision); -// uError("SML:data after:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); + // uError("SML:data after:%ld, precision:%d", kv->i, pTableMeta->tableInfo.precision); } if (IS_VAR_DATA_TYPE(kv->type)) { diff --git a/source/libs/parser/src/parInsertData.c b/source/libs/parser/src/parInsertData.c index 9e1d8dba8b..ae123a3563 100644 --- a/source/libs/parser/src/parInsertData.c +++ b/source/libs/parser/src/parInsertData.c @@ -19,6 +19,7 @@ #include "parInt.h" #include "parUtil.h" #include "querynodes.h" +#include "tRealloc.h" #define IS_RAW_PAYLOAD(t) \ (((int)(t)) == PAYLOAD_TYPE_RAW) // 0: K-V payload for non-prepare insert, 1: rawPayload for prepare insert @@ -34,6 +35,32 @@ typedef struct SBlockKeyInfo { SBlockKeyTuple* pKeyTuple; } SBlockKeyInfo; +typedef struct { + int32_t index; + SArray* rowArray; // array of merged rows(mem allocated by tRealloc/free by tFree) + STSchema* pSchema; + int64_t tbUid; // suid for child table, uid for normal table +} SBlockRowMerger; + +static FORCE_INLINE void tdResetSBlockRowMerger(SBlockRowMerger* pMerger) { + if (pMerger) { + pMerger->index = -1; + } +} + +static void tdFreeSBlockRowMerger(SBlockRowMerger* pMerger) { + if (pMerger) { + int32_t size = taosArrayGetSize(pMerger->rowArray); + for (int32_t i = 0; i < size; ++i) { + tFree(*(void**)taosArrayGet(pMerger->rowArray, i)); + } + taosArrayDestroy(pMerger->rowArray); + + taosMemoryFreeClear(pMerger->pSchema); + taosMemoryFree(pMerger); + } +} + static int32_t rowDataCompar(const void* lhs, const void* rhs) { TSKEY left = *(TSKEY*)lhs; TSKEY right = *(TSKEY*)rhs; @@ -328,7 +355,7 @@ void sortRemoveDataBlockDupRowsRaw(STableDataBlocks* dataBuf) { } // data block is disordered, sort it in ascending order -int sortRemoveDataBlockDupRows(STableDataBlocks* dataBuf, SBlockKeyInfo* pBlkKeyInfo) { +static int sortRemoveDataBlockDupRows(STableDataBlocks* dataBuf, SBlockKeyInfo* pBlkKeyInfo) { SSubmitBlk* pBlocks = (SSubmitBlk*)dataBuf->pData; int16_t nRows = pBlocks->numOfRows; @@ -396,6 +423,201 @@ int sortRemoveDataBlockDupRows(STableDataBlocks* dataBuf, SBlockKeyInfo* pBlkKey return 0; } +static void* tdGetCurRowFromBlockMerger(SBlockRowMerger* pBlkRowMerger) { + if (pBlkRowMerger && (pBlkRowMerger->index >= 0)) { + ASSERT(pBlkRowMerger->index < taosArrayGetSize(pBlkRowMerger->rowArray)); + return *(void**)taosArrayGet(pBlkRowMerger->rowArray, pBlkRowMerger->index); + } + return NULL; +} + +static int32_t tdBlockRowMerge(STableMeta* pTableMeta, SBlockKeyTuple* pEndKeyTp, int32_t nDupRows, + SBlockRowMerger** pBlkRowMerger, int32_t rowSize) { + ASSERT(nDupRows > 1); + SBlockKeyTuple* pStartKeyTp = pEndKeyTp - (nDupRows - 1); + ASSERT(pStartKeyTp->skey == pEndKeyTp->skey); + + // TODO: optimization if end row is all normal +#if 0 + STSRow* pEndRow = (STSRow*)pEndKeyTp->payloadAddr; + if(isNormal(pEndRow)) { // set the end row if it is normal and return directly + pStartKeyTp->payloadAddr = pEndKeyTp->payloadAddr; + return TSDB_CODE_SUCCESS; + } +#endif + + if (!(*pBlkRowMerger)) { + (*pBlkRowMerger) = taosMemoryCalloc(1, sizeof(**pBlkRowMerger)); + if (!(*pBlkRowMerger)) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_FAILED; + } + (*pBlkRowMerger)->index = -1; + if (!(*pBlkRowMerger)->rowArray) { + (*pBlkRowMerger)->rowArray = taosArrayInit(1, sizeof(void*)); + if (!(*pBlkRowMerger)->rowArray) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_FAILED; + } + } + } + + if ((*pBlkRowMerger)->pSchema) { + if ((*pBlkRowMerger)->pSchema->version != pTableMeta->sversion) { + taosMemoryFreeClear((*pBlkRowMerger)->pSchema); + } else { + if ((*pBlkRowMerger)->tbUid != (pTableMeta->suid > 0 ? pTableMeta->suid : pTableMeta->uid)) { + taosMemoryFreeClear((*pBlkRowMerger)->pSchema); + } + } + } + + if (!(*pBlkRowMerger)->pSchema) { + (*pBlkRowMerger)->pSchema = + tdGetSTSChemaFromSSChema(pTableMeta->schema, pTableMeta->tableInfo.numOfColumns, pTableMeta->sversion); + + if (!(*pBlkRowMerger)->pSchema) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_FAILED; + } + (*pBlkRowMerger)->tbUid = pTableMeta->suid > 0 ? pTableMeta->suid : pTableMeta->uid; + } + + void* pDestRow = NULL; + ++((*pBlkRowMerger)->index); + if ((*pBlkRowMerger)->index < taosArrayGetSize((*pBlkRowMerger)->rowArray)) { + void* pAlloc = *(void**)taosArrayGet((*pBlkRowMerger)->rowArray, (*pBlkRowMerger)->index); + if (tRealloc((uint8_t**)&pAlloc, rowSize) != 0) { + return TSDB_CODE_FAILED; + } + pDestRow = pAlloc; + } else { + if (tRealloc((uint8_t**)&pDestRow, rowSize) != 0) { + return TSDB_CODE_FAILED; + } + taosArrayPush((*pBlkRowMerger)->rowArray, &pDestRow); + } + + // merge rows to pDestRow + STSchema* pSchema = (*pBlkRowMerger)->pSchema; + SArray* pArray = taosArrayInit(pSchema->numOfCols, sizeof(SColVal)); + for (int32_t i = 0; i < pSchema->numOfCols; ++i) { + SColVal colVal = {0}; + for (int32_t j = 0; j < nDupRows; ++j) { + tTSRowGetVal((pEndKeyTp - j)->payloadAddr, pSchema, i, &colVal); + if (!colVal.isNone) { + break; + } + } + taosArrayPush(pArray, &colVal); + } + if (tdSTSRowNew(pArray, pSchema, (STSRow**)&pDestRow) < 0) { + taosArrayDestroy(pArray); + return TSDB_CODE_FAILED; + } + + taosArrayDestroy(pArray); + return TSDB_CODE_SUCCESS; +} + +// data block is disordered, sort it in ascending order, and merge dup rows if exists +static int sortMergeDataBlockDupRows(STableDataBlocks* dataBuf, SBlockKeyInfo* pBlkKeyInfo, + SBlockRowMerger** ppBlkRowMerger) { + SSubmitBlk* pBlocks = (SSubmitBlk*)dataBuf->pData; + STableMeta* pTableMeta = dataBuf->pTableMeta; + int16_t nRows = pBlocks->numOfRows; + + // size is less than the total size, since duplicated rows may be removed. + + // allocate memory + size_t nAlloc = nRows * sizeof(SBlockKeyTuple); + if (pBlkKeyInfo->pKeyTuple == NULL || pBlkKeyInfo->maxBytesAlloc < nAlloc) { + char* tmp = taosMemoryRealloc(pBlkKeyInfo->pKeyTuple, nAlloc); + if (tmp == NULL) { + return TSDB_CODE_TSC_OUT_OF_MEMORY; + } + pBlkKeyInfo->pKeyTuple = (SBlockKeyTuple*)tmp; + pBlkKeyInfo->maxBytesAlloc = (int32_t)nAlloc; + } + memset(pBlkKeyInfo->pKeyTuple, 0, nAlloc); + + tdResetSBlockRowMerger(*ppBlkRowMerger); + + int32_t extendedRowSize = getExtendedRowSize(dataBuf); + SBlockKeyTuple* pBlkKeyTuple = pBlkKeyInfo->pKeyTuple; + char* pBlockData = pBlocks->data + pBlocks->schemaLen; + int n = 0; + while (n < nRows) { + pBlkKeyTuple->skey = TD_ROW_KEY((STSRow*)pBlockData); + pBlkKeyTuple->payloadAddr = pBlockData; + pBlkKeyTuple->index = n; + + // next loop + pBlockData += extendedRowSize; + ++pBlkKeyTuple; + ++n; + } + + if (!dataBuf->ordered) { + pBlkKeyTuple = pBlkKeyInfo->pKeyTuple; + + taosSort(pBlkKeyTuple, nRows, sizeof(SBlockKeyTuple), rowDataComparStable); + + pBlkKeyTuple = pBlkKeyInfo->pKeyTuple; + bool hasDup = false; + int32_t nextPos = 0; + int32_t i = 0; + int32_t j = 1; + + while (j < nRows) { + TSKEY ti = (pBlkKeyTuple + i)->skey; + TSKEY tj = (pBlkKeyTuple + j)->skey; + + if (ti == tj) { + ++j; + continue; + } + + if ((j - i) > 1) { + if (tdBlockRowMerge(pTableMeta, (pBlkKeyTuple + j - 1), j - i, ppBlkRowMerger, extendedRowSize) < 0) { + return TSDB_CODE_FAILED; + } + (pBlkKeyTuple + nextPos)->payloadAddr = tdGetCurRowFromBlockMerger(*ppBlkRowMerger); + if (!hasDup) { + hasDup = true; + } + i = j; + } else { + if (hasDup) { + memmove(pBlkKeyTuple + nextPos, pBlkKeyTuple + i, sizeof(SBlockKeyTuple)); + } + ++i; + } + + ++nextPos; + ++j; + } + + if ((j - i) > 1) { + ASSERT((pBlkKeyTuple + i)->skey == (pBlkKeyTuple + j - 1)->skey); + if (tdBlockRowMerge(pTableMeta, (pBlkKeyTuple + j - 1), j - i, ppBlkRowMerger, extendedRowSize) < 0) { + return TSDB_CODE_FAILED; + } + (pBlkKeyTuple + nextPos)->payloadAddr = tdGetCurRowFromBlockMerger(*ppBlkRowMerger); + } else if (hasDup) { + memmove(pBlkKeyTuple + nextPos, pBlkKeyTuple + i, sizeof(SBlockKeyTuple)); + } + + dataBuf->ordered = true; + pBlocks->numOfRows = nextPos + 1; + } + + dataBuf->size = sizeof(SSubmitBlk) + pBlocks->numOfRows * extendedRowSize; + dataBuf->prevTS = INT64_MIN; + + return TSDB_CODE_SUCCESS; +} + // Erase the empty space reserved for binary data static int trimDataBlock(void* pDataBlock, STableDataBlocks* pTableDataBlock, SBlockKeyTuple* blkKeyTuple, bool isRawPayload) { @@ -464,6 +686,8 @@ int32_t mergeTableDataBlocks(SHashObj* pHashObj, uint8_t payloadType, SArray** p STableDataBlocks** p = taosHashIterate(pHashObj, NULL); STableDataBlocks* pOneTableBlock = *p; SBlockKeyInfo blkKeyInfo = {0}; // share by pOneTableBlock + SBlockRowMerger *pBlkRowMerger = NULL; + while (pOneTableBlock) { SSubmitBlk* pBlocks = (SSubmitBlk*)pOneTableBlock->pData; if (pBlocks->numOfRows > 0) { @@ -473,6 +697,7 @@ int32_t mergeTableDataBlocks(SHashObj* pHashObj, uint8_t payloadType, SArray** p getDataBlockFromList(pVnodeDataBlockHashList, &pOneTableBlock->vgId, sizeof(pOneTableBlock->vgId), TSDB_PAYLOAD_SIZE, INSERT_HEAD_SIZE, 0, pOneTableBlock->pTableMeta, &dataBuf, pVnodeDataBlockList, NULL); if (ret != TSDB_CODE_SUCCESS) { + tdFreeSBlockRowMerger(pBlkRowMerger); taosHashCleanup(pVnodeDataBlockHashList); destroyBlockArrayList(pVnodeDataBlockList); taosMemoryFreeClear(blkKeyInfo.pKeyTuple); @@ -490,6 +715,7 @@ int32_t mergeTableDataBlocks(SHashObj* pHashObj, uint8_t payloadType, SArray** p if (tmp != NULL) { dataBuf->pData = tmp; } else { // failed to allocate memory, free already allocated memory and return error code + tdFreeSBlockRowMerger(pBlkRowMerger); taosHashCleanup(pVnodeDataBlockHashList); destroyBlockArrayList(pVnodeDataBlockList); taosMemoryFreeClear(dataBuf->pData); @@ -501,7 +727,8 @@ int32_t mergeTableDataBlocks(SHashObj* pHashObj, uint8_t payloadType, SArray** p if (isRawPayload) { sortRemoveDataBlockDupRowsRaw(pOneTableBlock); } else { - if ((code = sortRemoveDataBlockDupRows(pOneTableBlock, &blkKeyInfo)) != 0) { + if ((code = sortMergeDataBlockDupRows(pOneTableBlock, &blkKeyInfo, &pBlkRowMerger)) != 0) { + tdFreeSBlockRowMerger(pBlkRowMerger); taosHashCleanup(pVnodeDataBlockHashList); destroyBlockArrayList(pVnodeDataBlockList); taosMemoryFreeClear(dataBuf->pData); @@ -529,6 +756,7 @@ int32_t mergeTableDataBlocks(SHashObj* pHashObj, uint8_t payloadType, SArray** p } // free the table data blocks; + tdFreeSBlockRowMerger(pBlkRowMerger); taosHashCleanup(pVnodeDataBlockHashList); taosMemoryFreeClear(blkKeyInfo.pKeyTuple); *pVgDataBlocks = pVnodeDataBlockList; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 892ae6d5ac..e198df4bc4 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -1089,7 +1089,7 @@ static int32_t translateScanPseudoColumnFunc(STranslateContext* pCxt, SFunctionN return TSDB_CODE_SUCCESS; } if (0 == LIST_LENGTH(pFunc->pParameterList)) { - if (!isSelectStmt(pCxt->pCurrStmt) || + if (!isSelectStmt(pCxt->pCurrStmt) || NULL == ((SSelectStmt*)pCxt->pCurrStmt)->pFromTable || QUERY_NODE_REAL_TABLE != nodeType(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TBNAME); } diff --git a/source/libs/parser/src/parser.c b/source/libs/parser/src/parser.c index 78d1e83436..53ee717af2 100644 --- a/source/libs/parser/src/parser.c +++ b/source/libs/parser/src/parser.c @@ -36,7 +36,7 @@ bool qIsInsertValuesSql(const char* pStr, size_t length) { pStr += index; index = 0; t = tStrGetToken((char*)pStr, &index, false); - if (TK_USING == t.type || TK_VALUES == t.type) { + if (TK_USING == t.type || TK_VALUES == t.type || TK_FILE == t.type) { return true; } else if (TK_SELECT == t.type) { return false; diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index b9a760d342..849ba14d11 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -444,4 +444,11 @@ TEST_F(ParserSelectTest, withoutFrom) { run("SELECT USER()"); } +TEST_F(ParserSelectTest, withoutFromSemanticCheck) { + useDb("root", "test"); + + run("SELECT c1", TSDB_CODE_PAR_INVALID_COLUMN); + run("SELECT TBNAME", TSDB_CODE_PAR_INVALID_TBNAME); +} + } // namespace ParserTest