diff --git a/Jenkinsfile b/Jenkinsfile index 23dc54d963..fc2b3562c1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -105,15 +105,17 @@ pipeline { abort_previous() abortPreviousBuilds() } - pre_test() - sh''' - cd ${WKC}/tests - ./test-all.sh b1fq - ''' - sh''' - cd ${WKC}/debug - ctest - ''' + timeout(time: 45, unit: 'MINUTES'){ + pre_test() + sh''' + cd ${WKC}/tests + ./test-all.sh b1fq + ''' + sh''' + cd ${WKC}/debug + ctest + ''' + } } } } diff --git a/include/common/tdataformat.h b/include/common/tdataformat.h index c6ef6c513f..cc30cd78f5 100644 --- a/include/common/tdataformat.h +++ b/include/common/tdataformat.h @@ -31,13 +31,12 @@ extern "C" { memcpy(varDataVal(x), (str), __len); \ } while (0); -#define STR_TO_NET_VARSTR(x, str) \ - do { \ - VarDataLenT __len = (VarDataLenT)strlen(str); \ - *(VarDataLenT *)(x) = htons(__len); \ - memcpy(varDataVal(x), (str), __len); \ - } while (0); - +#define STR_TO_NET_VARSTR(x, str) \ + do { \ + VarDataLenT __len = (VarDataLenT)strlen(str); \ + *(VarDataLenT *)(x) = htons(__len); \ + memcpy(varDataVal(x), (str), __len); \ + } while (0); #define STR_WITH_MAXSIZE_TO_VARSTR(x, str, _maxs) \ do { \ @@ -71,11 +70,12 @@ typedef struct { // ----------------- TSDB SCHEMA DEFINITION typedef struct { - int version; // version - int numOfCols; // Number of columns appended - int tlen; // maximum length of a SDataRow without the header part (sizeof(VarDataOffsetT) + sizeof(VarDataLenT) + (bytes)) - uint16_t flen; // First part length in a SDataRow after the header part - uint16_t vlen; // pure value part length, excluded the overhead (bytes only) + int version; // version + int numOfCols; // Number of columns appended + int tlen; // maximum length of a SDataRow without the header part (sizeof(VarDataOffsetT) + sizeof(VarDataLenT) + + // (bytes)) + uint16_t flen; // First part length in a SDataRow after the header part + uint16_t vlen; // pure value part length, excluded the overhead (bytes only) STColumn columns[]; } STSchema; @@ -202,7 +202,6 @@ void tdFreeDataRow(SDataRow row); void tdInitDataRow(SDataRow row, STSchema *pSchema); SDataRow tdDataRowDup(SDataRow row); - // offset here not include dataRow header length static FORCE_INLINE int tdAppendDataColVal(SDataRow row, const void *value, bool isCopyVarData, int8_t type, int32_t offset) { @@ -228,7 +227,6 @@ static FORCE_INLINE int tdAppendDataColVal(SDataRow row, const void *value, bool return 0; } - // offset here not include dataRow header length static FORCE_INLINE int tdAppendColVal(SDataRow row, const void *value, int8_t type, int32_t offset) { return tdAppendDataColVal(row, value, true, type, offset); @@ -249,27 +247,28 @@ static FORCE_INLINE void *tdGetPtrToCol(SDataRow row, STSchema *pSchema, int idx static FORCE_INLINE void *tdGetColOfRowBySchema(SDataRow row, STSchema *pSchema, int idx) { int16_t offset = TD_DATA_ROW_HEAD_SIZE + pSchema->columns[idx].offset; - int8_t type = pSchema->columns[idx].type; + int8_t type = pSchema->columns[idx].type; return tdGetRowDataOfCol(row, type, offset); } static FORCE_INLINE bool tdIsColOfRowNullBySchema(SDataRow row, STSchema *pSchema, int idx) { int16_t offset = TD_DATA_ROW_HEAD_SIZE + pSchema->columns[idx].offset; - int8_t type = pSchema->columns[idx].type; + int8_t type = pSchema->columns[idx].type; return isNull(tdGetRowDataOfCol(row, type, offset), type); } static FORCE_INLINE void tdSetColOfRowNullBySchema(SDataRow row, STSchema *pSchema, int idx) { int16_t offset = TD_DATA_ROW_HEAD_SIZE + pSchema->columns[idx].offset; - int8_t type = pSchema->columns[idx].type; + int8_t type = pSchema->columns[idx].type; int16_t bytes = pSchema->columns[idx].bytes; setNull(tdGetRowDataOfCol(row, type, offset), type, bytes); } -static FORCE_INLINE void tdCopyColOfRowBySchema(SDataRow dst, STSchema *pDstSchema, int dstIdx, SDataRow src, STSchema *pSrcSchema, int srcIdx) { +static FORCE_INLINE void tdCopyColOfRowBySchema(SDataRow dst, STSchema *pDstSchema, int dstIdx, SDataRow src, + STSchema *pSrcSchema, int srcIdx) { int8_t type = pDstSchema->columns[dstIdx].type; assert(type == pSrcSchema->columns[srcIdx].type); void *pData = tdGetPtrToCol(dst, pDstSchema, dstIdx); @@ -319,7 +318,6 @@ static FORCE_INLINE void tdCopyColOfRowBySchema(SDataRow dst, STSchema *pDstSche } } - // ----------------- Data column structure typedef struct SDataCol { int8_t type; // column type @@ -339,7 +337,7 @@ static FORCE_INLINE void dataColReset(SDataCol *pDataCol) { pDataCol->len = 0; } int tdAllocMemForCol(SDataCol *pCol, int maxPoints); void dataColInit(SDataCol *pDataCol, STColumn *pCol, int maxPoints); -int dataColAppendVal(SDataCol *pCol, const void *value, int numOfRows, int maxPoints); +int dataColAppendVal(SDataCol *pCol, const void *value, int numOfRows, int maxPoints); void dataColSetOffset(SDataCol *pCol, int nEle); bool isNEleNull(SDataCol *pCol, int nEle); @@ -367,15 +365,15 @@ static FORCE_INLINE int32_t dataColGetNEleLen(SDataCol *pDataCol, int rows) { } typedef struct { - int maxCols; // max number of columns - int maxPoints; // max number of points - int numOfRows; - int numOfCols; // Total number of cols - int sversion; // TODO: set sversion + int maxCols; // max number of columns + int maxPoints; // max number of points + int numOfRows; + int numOfCols; // Total number of cols + int sversion; // TODO: set sversion SDataCol *cols; } SDataCols; -#define keyCol(pCols) (&((pCols)->cols[0])) // Key column +#define keyCol(pCols) (&((pCols)->cols[0])) // Key column #define dataColsTKeyAt(pCols, idx) ((TKEY *)(keyCol(pCols)->pData))[(idx)] // the idx row of column-wised data #define dataColsKeyAt(pCols, idx) tdGetKey(dataColsTKeyAt(pCols, idx)) static FORCE_INLINE TKEY dataColsTKeyFirst(SDataCols *pCols) { @@ -454,6 +452,7 @@ typedef struct { #define kvRowValLen(r) (kvRowLen(r) - TD_KV_ROW_HEAD_SIZE - sizeof(SColIdx) * kvRowNCols(r)) #define kvRowTKey(r) (*(TKEY *)(kvRowValues(r))) #define kvRowKey(r) tdGetKey(kvRowTKey(r)) +#define kvRowKeys(r) POINTER_SHIFT(r, *(uint16_t *)POINTER_SHIFT(r, TD_KV_ROW_HEAD_SIZE + sizeof(int16_t))) #define kvRowDeleted(r) TKEY_IS_DELETED(kvRowTKey(r)) SKVRow tdKVRowDup(SKVRow row); @@ -547,7 +546,7 @@ SKVRow tdGetKVRowFromBuilder(SKVRowBuilder *pBuilder); static FORCE_INLINE int tdAddColToKVRow(SKVRowBuilder *pBuilder, int16_t colId, int8_t type, const void *value) { if (pBuilder->nCols >= pBuilder->tCols) { pBuilder->tCols *= 2; - SColIdx* pColIdx = (SColIdx *)realloc((void *)(pBuilder->pColIdx), sizeof(SColIdx) * pBuilder->tCols); + SColIdx *pColIdx = (SColIdx *)realloc((void *)(pBuilder->pColIdx), sizeof(SColIdx) * pBuilder->tCols); if (pColIdx == NULL) return -1; pBuilder->pColIdx = pColIdx; } @@ -562,7 +561,7 @@ static FORCE_INLINE int tdAddColToKVRow(SKVRowBuilder *pBuilder, int16_t colId, while (tlen > pBuilder->alloc - pBuilder->size) { pBuilder->alloc *= 2; } - void* buf = realloc(pBuilder->buf, pBuilder->alloc); + void *buf = realloc(pBuilder->buf, pBuilder->alloc); if (buf == NULL) return -1; pBuilder->buf = buf; } @@ -654,6 +653,7 @@ static FORCE_INLINE char *memRowEnd(SMemRow row) { #define memRowTKey(r) (isDataRow(r) ? dataRowTKey(memRowDataBody(r)) : kvRowTKey(memRowKvBody(r))) #define memRowKey(r) (isDataRow(r) ? dataRowKey(memRowDataBody(r)) : kvRowKey(memRowKvBody(r))) +#define memRowKeys(r) (isDataRow(r) ? dataRowTuple(memRowDataBody(r)) : kvRowKeys(memRowKvBody(r))) #define memRowSetTKey(r, k) \ do { \ if (isDataRow(r)) { \ @@ -750,10 +750,10 @@ static FORCE_INLINE void tdGetColAppendDeltaLen(const void *value, int8_t colTyp typedef struct { int16_t colId; uint8_t colType; - char* colVal; + char * colVal; } SColInfo; -static FORCE_INLINE void setSColInfo(SColInfo* colInfo, int16_t colId, uint8_t colType, char* colVal) { +static FORCE_INLINE void setSColInfo(SColInfo *colInfo, int16_t colId, uint8_t colType, char *colVal) { colInfo->colId = colId; colInfo->colType = colType; colInfo->colVal = colVal; @@ -813,4 +813,4 @@ static FORCE_INLINE char *payloadNextCol(char *pCol) { return (char *)POINTER_SH } #endif -#endif /*_TD_COMMON_DATA_FORMAT_H_*/ +#endif /*_TD_COMMON_DATA_FORMAT_H_*/ diff --git a/include/common/tmsg.h b/include/common/tmsg.h index f173662770..632d99878c 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -264,11 +264,29 @@ typedef struct SSubmitBlk { // Submit message for this TSDB typedef struct SSubmitMsg { SMsgHead header; + int64_t version; int32_t length; int32_t numOfBlocks; char blocks[]; } SSubmitMsg; +typedef struct { + int32_t totalLen; + int32_t len; + SMemRow row; +} SSubmitBlkIter; + +typedef struct { + int32_t totalLen; + int32_t len; + void* pMsg; +} SSubmitMsgIter; + +int tInitSubmitMsgIter(SSubmitMsg* pMsg, SSubmitMsgIter* pIter); +int tGetSubmitMsgNext(SSubmitMsgIter* pIter, SSubmitBlk** pPBlock); +int tInitSubmitBlkIter(SSubmitBlk* pBlock, SSubmitBlkIter* pIter); +SMemRow tGetSubmitBlkNext(SSubmitBlkIter* pIter); + typedef struct { int32_t index; // index of failed block in submit blocks int32_t vnode; // vnode index of failed block @@ -390,7 +408,7 @@ typedef struct { char app[TSDB_APP_NAME_LEN]; char db[TSDB_DB_NAME_LEN]; int64_t startTime; -} SConnectMsg; +} SConnectReq; typedef struct SEpSet { int8_t inUse; @@ -738,6 +756,7 @@ typedef struct { int32_t sver; int32_t dnodeId; int64_t clusterId; + int64_t dver; int64_t rebootTime; int64_t updateTime; int32_t numOfCores; @@ -745,7 +764,7 @@ typedef struct { char dnodeEp[TSDB_EP_LEN]; SClusterCfg clusterCfg; SVnodeLoads vnodeLoads; -} SStatusMsg; +} SStatusReq; typedef struct { int32_t reserved; @@ -770,6 +789,7 @@ typedef struct { } SDnodeEps; typedef struct { + int64_t dver; SDnodeCfg dnodeCfg; SDnodeEps dnodeEps; } SStatusRsp; @@ -805,19 +825,19 @@ typedef struct { int8_t replica; int8_t selfIndex; SReplica replicas[TSDB_MAX_REPLICA]; -} SCreateVnodeMsg, SAlterVnodeMsg; +} SCreateVnodeReq, SAlterVnodeReq; typedef struct { int32_t vgId; int32_t dnodeId; - char db[TSDB_DB_FNAME_LEN]; uint64_t dbUid; -} SDropVnodeMsg, SSyncVnodeMsg, SCompactVnodeMsg; + char db[TSDB_DB_FNAME_LEN]; +} SDropVnodeReq, SSyncVnodeReq, SCompactVnodeReq; typedef struct { int32_t vgId; int8_t accessState; -} SAuthVnodeMsg; +} SAuthVnodeReq; typedef struct { SMsgHead header; @@ -889,6 +909,7 @@ typedef struct { typedef struct { char db[TSDB_DB_FNAME_LEN]; + int64_t uid; int32_t vgVersion; int32_t vgNum; int8_t hashMethod; @@ -921,26 +942,26 @@ typedef struct SShowRsp { typedef struct { char fqdn[TSDB_FQDN_LEN]; // end point, hostname:port int32_t port; -} SCreateDnodeMsg; +} SCreateDnodeReq; typedef struct { int32_t dnodeId; -} SDropDnodeMsg; +} SDropDnodeReq; typedef struct { int32_t dnodeId; char config[TSDB_DNODE_CONFIG_LEN]; -} SCfgDnodeMsg; +} SMCfgDnodeReq, SDCfgDnodeReq; typedef struct { int32_t dnodeId; -} SMCreateMnodeMsg, SMDropMnodeMsg, SDDropMnodeMsg; +} SMCreateMnodeReq, SMDropMnodeReq, SDDropMnodeReq; typedef struct { int32_t dnodeId; int8_t replica; SReplica replicas[TSDB_MAX_REPLICA]; -} SDCreateMnodeMsg, SDAlterMnodeMsg; +} SDCreateMnodeReq, SDAlterMnodeReq; typedef struct { int32_t dnodeId; @@ -986,7 +1007,7 @@ typedef struct { int32_t numOfStreams; char app[TSDB_APP_NAME_LEN]; char pData[]; -} SHeartBeatMsg; +} SHeartBeatReq; typedef struct { int32_t connId; @@ -999,19 +1020,14 @@ typedef struct { SEpSet epSet; } SHeartBeatRsp; -typedef struct { - int32_t connId; - int32_t streamId; -} SKillStreamMsg; - typedef struct { int32_t connId; int32_t queryId; -} SKillQueryMsg; +} SKillQueryReq; typedef struct { int32_t connId; -} SKillConnMsg; +} SKillConnReq; typedef struct { char user[TSDB_USER_LEN]; @@ -1019,7 +1035,7 @@ typedef struct { char encrypt; char secret[TSDB_PASSWORD_LEN]; char ckey[TSDB_PASSWORD_LEN]; -} SAuthMsg, SAuthRsp; +} SAuthReq, SAuthRsp; typedef struct { int8_t finished; diff --git a/include/dnode/mnode/sdb/sdb.h b/include/dnode/mnode/sdb/sdb.h index c7198eee6f..5a4ac6a96f 100644 --- a/include/dnode/mnode/sdb/sdb.h +++ b/include/dnode/mnode/sdb/sdb.h @@ -281,6 +281,15 @@ int32_t sdbGetSize(SSdb *pSdb, ESdbType type); */ int32_t sdbGetMaxId(SSdb *pSdb, ESdbType type); +/** + * @brief Get the version of the table + * + * @param pSdb The sdb object. + * @param pIter The type of the table. + * @return int32_t The version of the table + */ +int64_t sdbGetTableVer(SSdb *pSdb, ESdbType type); + /** * @brief Update the version of sdb * diff --git a/include/dnode/vnode/tsdb/tsdb.h b/include/dnode/vnode/tsdb/tsdb.h index e5522ddbd3..c19152de44 100644 --- a/include/dnode/vnode/tsdb/tsdb.h +++ b/include/dnode/vnode/tsdb/tsdb.h @@ -22,21 +22,45 @@ extern "C" { #endif +typedef struct SDataStatis { + int16_t colId; + int64_t sum; + int64_t max; + int64_t min; + int16_t maxIndex; + int16_t minIndex; + int16_t numOfNull; +} SDataStatis; + +typedef struct STable { + int32_t tid; + uint64_t uid; + STSchema *pSchema; +} STable; + +#define TABLE_TID(t) (t)->tid +#define TABLE_UID(t) (t)->uid + // TYPES EXPOSED typedef struct STsdb STsdb; typedef struct STsdbCfg { + int8_t precision; uint64_t lruCacheSize; - uint32_t keep0; - uint32_t keep1; - uint32_t keep2; + int32_t daysPerFile; + int32_t minRowsPerFileBlock; + int32_t maxRowsPerFileBlock; + int32_t keep; + int32_t keep1; + int32_t keep2; + int8_t update; } STsdbCfg; // STsdb STsdb *tsdbOpen(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorFactory *pMAF); void tsdbClose(STsdb *); void tsdbRemove(const char *path); -int tsdbInsertData(STsdb *pTsdb, SSubmitMsg *pMsg); +int tsdbInsertData(STsdb *pTsdb, SSubmitMsg *pMsg, SSubmitRsp *pRsp); int tsdbPrepareCommit(STsdb *pTsdb); int tsdbCommit(STsdb *pTsdb); diff --git a/include/dnode/vnode/tsdb2/tsdb.h b/include/dnode/vnode/tsdb2/tsdb.h new file mode 100644 index 0000000000..49840ae231 --- /dev/null +++ b/include/dnode/vnode/tsdb2/tsdb.h @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef _TD_TSDB_H_ +#define _TD_TSDB_H_ + +#include +#include +#include + +#include "common.h" +#include "taosdef.h" +#include "tarray.h" +#include "tdataformat.h" +#include "thash.h" +#include "tlist.h" +#include "tlockfree.h" +#include "tmsg.h" +#include "tname.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define TSDB_VERSION_MAJOR 1 +#define TSDB_VERSION_MINOR 0 + +#define TSDB_INVALID_SUPER_TABLE_ID -1 + +#define TSDB_STATUS_COMMIT_START 1 +#define TSDB_STATUS_COMMIT_OVER 2 +#define TSDB_STATUS_COMMIT_NOBLOCK 3 // commit no block, need to be solved + +// TSDB STATE DEFINITION +#define TSDB_STATE_OK 0x0 +#define TSDB_STATE_BAD_META 0x1 +#define TSDB_STATE_BAD_DATA 0x2 + +typedef struct SDataStatis { + int16_t colId; + int64_t sum; + int64_t max; + int64_t min; + int16_t maxIndex; + int16_t minIndex; + int16_t numOfNull; +} SDataStatis; + +// --------- TSDB APPLICATION HANDLE DEFINITION + +// --------- TSDB REPOSITORY CONFIGURATION DEFINITION +typedef struct { + int32_t tsdbId; + int32_t cacheBlockSize; + int32_t totalBlocks; + int32_t daysPerFile; // day per file sharding policy + int32_t keep; // day of data to keep + int32_t keep1; + int32_t keep2; + int32_t lruCacheSize; + int32_t minRowsPerFileBlock; // minimum rows per file block + int32_t maxRowsPerFileBlock; // maximum rows per file block + int8_t precision; + int8_t compression; + int8_t update; + int8_t cacheLastRow; // 0:no cache, 1: cache last row, 2: cache last NULL column 3: 1&2 +} STsdbCfg; + +#define CACHE_NO_LAST(c) ((c)->cacheLastRow == 0) +#define CACHE_LAST_ROW(c) (((c)->cacheLastRow & 1) > 0) +#define CACHE_LAST_NULL_COLUMN(c) (((c)->cacheLastRow & 2) > 0) + +// --------- TSDB REPOSITORY USAGE STATISTICS +typedef struct { + int64_t totalStorage; // total bytes occupie + int64_t compStorage; + int64_t pointsWritten; // total data points written +} STsdbStat; + +typedef struct STsdb STsdb; + +STsdbCfg *tsdbGetCfg(const STsdb *repo); + +// --------- TSDB REPOSITORY DEFINITION +// int32_t tsdbCreateRepo(int repoid); +// int32_t tsdbDropRepo(int repoid); +STsdb * tsdbOpen(STsdbCfg *pCfg, STsdbAppH *pAppH); +int tsdbClose(STsdb *repo, int toCommit); +int32_t tsdbConfigRepo(STsdb *repo, STsdbCfg *pCfg); +int tsdbGetState(STsdb *repo); +int8_t tsdbGetCompactState(STsdb *repo); +// --------- TSDB TABLE DEFINITION +typedef struct { + uint64_t uid; // the unique table ID + int32_t tid; // the table ID in the repository. +} STableId; + +// --------- TSDB TABLE configuration +typedef struct { + ETableType type; + char * name; + STableId tableId; + int32_t sversion; + char * sname; // super table name + uint64_t superUid; + STSchema * schema; + STSchema * tagSchema; + SKVRow tagValues; + char * sql; +} STableCfg; + +void tsdbClearTableCfg(STableCfg *config); + +void *tsdbGetTableTagVal(const void *pTable, int32_t colId, int16_t type); +char *tsdbGetTableName(void *pTable); + +#define TSDB_TABLEID(_table) ((STableId *)(_table)) +#define TSDB_PREV_ROW 0x1 +#define TSDB_NEXT_ROW 0x2 + +STableCfg *tsdbCreateTableCfgFromMsg(SMDCreateTableMsg *pMsg); + +int tsdbCreateTable(STsdb *repo, STableCfg *pCfg); +int tsdbDropTable(STsdb *pRepo, STableId tableId); +int tsdbUpdateTableTagValue(STsdb *repo, SUpdateTableTagValMsg *pMsg); + +uint32_t tsdbGetFileInfo(STsdb *repo, char *name, uint32_t *index, uint32_t eindex, int64_t *size); + +// the TSDB repository info +typedef struct STsdbRepoInfo { + STsdbCfg tsdbCfg; + uint64_t version; // version of the repository + int64_t tsdbTotalDataSize; // the original inserted data size + int64_t tsdbTotalDiskSize; // the total disk size taken by this TSDB repository + // TODO: Other informations to add +} STsdbRepoInfo; +STsdbRepoInfo *tsdbGetStatus(STsdb *pRepo); + +// the meter information report structure +typedef struct { + STableCfg tableCfg; + uint64_t version; + int64_t tableTotalDataSize; // In bytes + int64_t tableTotalDiskSize; // In bytes +} STableInfo; + +// -- FOR INSERT DATA +/** + * Insert data to a table in a repository + * @param pRepo the TSDB repository handle + * @param pData the data to insert (will give a more specific description) + * + * @return the number of points inserted, -1 for failure and the error number is set + */ +int32_t tsdbInsertData(STsdb *repo, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp); + +// -- FOR QUERY TIME SERIES DATA + +typedef void *TsdbQueryHandleT; // Use void to hide implementation details + +#define BLOCK_LOAD_OFFSET_SEQ_ORDER 1 +#define BLOCK_LOAD_TABLE_SEQ_ORDER 2 +#define BLOCK_LOAD_TABLE_RR_ORDER 3 + +// query condition to build multi-table data block iterator +typedef struct STsdbQueryCond { + STimeWindow twindow; + int32_t order; // desc|asc order to iterate the data block + int64_t offset; // skip offset put down to tsdb + int32_t numOfCols; + SColumnInfo *colList; + bool loadExternalRows; // load external rows or not + int32_t type; // data block load type: +} STsdbQueryCond; + +typedef struct STableData STableData; +typedef struct { + T_REF_DECLARE() + SRWLatch latch; + TSKEY keyFirst; + TSKEY keyLast; + int64_t numOfRows; + int32_t maxTables; + STableData **tData; + SList * actList; + SList * extraBuffList; + SList * bufBlockList; + int64_t pointsAdd; // TODO + int64_t storageAdd; // TODO +} SMemTable; + +typedef struct { + SMemTable *mem; + SMemTable *imem; + SMemTable mtable; + SMemTable *omem; +} SMemSnapshot; + +typedef struct SMemRef { + int32_t ref; + SMemSnapshot snapshot; +} SMemRef; + +#if 0 +typedef struct SFileBlockInfo { + int32_t numBlocksOfStep; +} SFileBlockInfo; + +typedef struct { + void *pTable; + TSKEY lastKey; +} STableKeyInfo; + +typedef struct { + uint32_t numOfTables; + SArray * pGroupList; + SHashObj *map; // speedup acquire the tableQueryInfo by table uid +} STableGroupInfo; + +#define TSDB_BLOCK_DIST_STEP_ROWS 16 +typedef struct { + uint16_t rowSize; + uint16_t numOfFiles; + uint32_t numOfTables; + uint64_t totalSize; + uint64_t totalRows; + int32_t maxRows; + int32_t minRows; + int32_t firstSeekTimeUs; + uint32_t numOfRowsInMemTable; + uint32_t numOfSmallBlocks; + SArray * dataBlockInfos; +} STableBlockDist; + +/** + * Get the data block iterator, starting from position according to the query condition + * + * @param tsdb tsdb handle + * @param pCond query condition, including time window, result set order, and basic required columns for each block + * @param tableInfoGroup table object list in the form of set, grouped into different sets according to the + * group by condition + * @param qinfo query info handle from query processor + * @return + */ +TsdbQueryHandleT *tsdbQueryTables(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *tableInfoGroup, uint64_t qId, + SMemRef *pRef); + +/** + * Get the last row of the given query time window for all the tables in STableGroupInfo object. + * Note that only one data block with only row will be returned while invoking retrieve data block function for + * all tables in this group. + * + * @param tsdb tsdb handle + * @param pCond query condition, including time window, result set order, and basic required columns for each block + * @param tableInfo table list. + * @return + */ +TsdbQueryHandleT tsdbQueryLastRow(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *tableInfo, uint64_t qId, + SMemRef *pRef); + +TsdbQueryHandleT tsdbQueryCacheLast(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, + SMemRef *pMemRef); + +bool isTsdbCacheLastRow(TsdbQueryHandleT *pQueryHandle); + +/** + * get the queried table object list + * @param pHandle + * @return + */ +SArray *tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle); + +/** + * get the group list according to table id from client + * @param tsdb + * @param pCond + * @param groupList + * @param qinfo + * @return + */ +TsdbQueryHandleT tsdbQueryRowsInExternalWindow(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, + uint64_t qId, SMemRef *pRef); + +/** + * get num of rows in mem table + * + * @param pHandle + * @return row size + */ + +int64_t tsdbGetNumOfRowsInMemTable(TsdbQueryHandleT *pHandle); + +/** + * move to next block if exists + * + * @param pQueryHandle + * @return + */ +bool tsdbNextDataBlock(TsdbQueryHandleT pQueryHandle); + +/** + * Get current data block information + * + * @param pQueryHandle + * @param pBlockInfo + * @return + */ +void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT *pQueryHandle, SDataBlockInfo *pBlockInfo); + +/** + * + * Get the pre-calculated information w.r.t. current data block. + * + * In case of data block in cache, the pBlockStatis will always be NULL. + * If a block is not completed loaded from disk, the pBlockStatis will be NULL. + + * @pBlockStatis the pre-calculated value for current data blocks. if the block is a cache block, always return 0 + * @return + */ +int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT *pQueryHandle, SDataStatis **pBlockStatis); + +/** + * + * The query condition with primary timestamp is passed to iterator during its constructor function, + * the returned data block must be satisfied with the time window condition in any cases, + * which means the SData data block is not actually the completed disk data blocks. + * + * @param pQueryHandle query handle + * @param pColumnIdList required data columns id list + * @return + */ +SArray *tsdbRetrieveDataBlock(TsdbQueryHandleT *pQueryHandle, SArray *pColumnIdList); + +/** + * Get the qualified table id for a super table according to the tag query expression. + * @param stableid. super table sid + * @param pTagCond. tag query condition + */ +int32_t tsdbQuerySTableByTagCond(STsdb *tsdb, uint64_t uid, TSKEY key, const char *pTagCond, size_t len, + STableGroupInfo *pGroupList, SColIndex *pColIndex, int32_t numOfCols); + +/** + * destroy the created table group list, which is generated by tag query + * @param pGroupList + */ +void tsdbDestroyTableGroup(STableGroupInfo *pGroupList); + +/** + * create the table group result including only one table, used to handle the normal table query + * + * @param tsdb tsdbHandle + * @param uid table uid + * @param pGroupInfo the generated result + * @return + */ +int32_t tsdbGetOneTableGroup(STsdb *tsdb, uint64_t uid, TSKEY startKey, STableGroupInfo *pGroupInfo); + +/** + * + * @param tsdb + * @param pTableIdList + * @param pGroupInfo + * @return + */ +int32_t tsdbGetTableGroupFromIdList(STsdb *tsdb, SArray *pTableIdList, STableGroupInfo *pGroupInfo); + +/** + * clean up the query handle + * @param queryHandle + */ +void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle); + +void tsdbResetQueryHandle(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond); + +void tsdbResetQueryHandleForNewTable(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond, STableGroupInfo *groupList); + +int32_t tsdbGetFileBlocksDistInfo(TsdbQueryHandleT *queryHandle, STableBlockDist *pTableBlockInfo); + +// obtain queryHandle attribute +int64_t tsdbSkipOffset(TsdbQueryHandleT queryHandle); + +/** + * get the statistics of repo usage + * @param repo. point to the tsdbrepo + * @param totalPoints. total data point written + * @param totalStorage. total bytes took by the tsdb + * @param compStorage. total bytes took by the tsdb after compressed + */ +void tsdbReportStat(void *repo, int64_t *totalPoints, int64_t *totalStorage, int64_t *compStorage); + +int tsdbInitCommitQueue(); +void tsdbDestroyCommitQueue(); +int tsdbSyncCommit(STsdb *repo); +void tsdbIncCommitRef(int vgId); +void tsdbDecCommitRef(int vgId); +void tsdbSwitchTable(TsdbQueryHandleT pQueryHandle); + +// For TSDB file sync +int tsdbSyncSend(void *pRepo, SOCKET socketFd); +int tsdbSyncRecv(void *pRepo, SOCKET socketFd); + +// // For TSDB Compact +// int tsdbCompact(STsdb *pRepo); + +// For TSDB Health Monitor + +// // no problem return true +// bool tsdbNoProblem(STsdb *pRepo); +// // unit of walSize: MB +// int tsdbCheckWal(STsdb *pRepo, uint32_t walSize); + +// // for json tag +// void *getJsonTagValueElment(void *data, char *key, int32_t keyLen, char *out, int16_t bytes); +// void getJsonTagValueAll(void *data, void *dst, int16_t bytes); +// char *parseTagDatatoJson(void *p); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // _TD_TSDB_H_ diff --git a/include/dnode/vnode/vnode.h b/include/dnode/vnode/vnode.h index af56d69b11..f3ad0b0176 100644 --- a/include/dnode/vnode/vnode.h +++ b/include/dnode/vnode/vnode.h @@ -32,6 +32,8 @@ extern "C" { /* ------------------------ TYPES EXPOSED ------------------------ */ typedef struct SVnode SVnode; typedef struct SVnodeCfg { + int32_t vgId; + /** vnode buffer pool options */ struct { /** write buffer size */ diff --git a/include/libs/catalog/catalog.h b/include/libs/catalog/catalog.h index 3916898829..70cff7ed1a 100644 --- a/include/libs/catalog/catalog.h +++ b/include/libs/catalog/catalog.h @@ -48,8 +48,22 @@ typedef struct SMetaData { typedef struct SCatalogCfg { uint32_t maxTblCacheNum; uint32_t maxDBCacheNum; + uint32_t dbRentSec; + uint32_t stableRentSec; } SCatalogCfg; +typedef struct SSTableMetaVersion { + uint64_t suid; + int16_t sversion; + int16_t tversion; +} SSTableMetaVersion; + +typedef struct SDbVgVersion { + int64_t dbId; + int32_t vgVersion; +} SDbVgVersion; + + int32_t catalogInit(SCatalogCfg *cfg); /** @@ -60,19 +74,27 @@ int32_t catalogInit(SCatalogCfg *cfg); */ int32_t catalogGetHandle(uint64_t clusterId, struct SCatalog** catalogHandle); +/** + * Free a cluster's all catalog info, usually it's not necessary, until the application is closing. + * no current or future usage should be guaranteed by application + * @param pCatalog (input, NO more usage) + * @return error code + */ +void catalogFreeHandle(struct SCatalog* pCatalog); + int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, int32_t* version); /** * Get a DB's all vgroup info. * @param pCatalog (input, got with catalogGetHandle) - * @param pRpc (input, rpc object) + * @param pTransporter (input, rpc object) * @param pMgmtEps (input, mnode EPs) * @param pDBName (input, full db name) * @param forceUpdate (input, force update db vgroup info from mnode) * @param pVgroupList (output, vgroup info list, element is SVgroupInfo, NEED to simply free the array by caller) * @return error code */ -int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* pDBName, int32_t forceUpdate, SArray** pVgroupList); +int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const char* pDBName, bool forceUpdate, SArray** pVgroupList); int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDBVgroupInfo* dbInfo); @@ -87,15 +109,28 @@ int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDB */ int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta); +/** + * Get a super table's meta data. + * @param pCatalog (input, got with catalogGetHandle) + * @param pTransporter (input, rpc object) + * @param pMgmtEps (input, mnode EPs) + * @param pTableName (input, table name, NOT including db name) + * @param pTableMeta(output, table meta data, NEED to free it by calller) + * @return error code + */ +int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta); + + /** * Force renew a table's local cached meta data. * @param pCatalog (input, got with catalogGetHandle) * @param pTransporter (input, rpc object) * @param pMgmtEps (input, mnode EPs) * @param pTableName (input, table name, NOT including db name) + * @param isSTable (input, is super table or not, 1:supposed to be stable, 0: supposed not to be stable, -1:not sure) * @return error code */ -int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName); + int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable); /** * Force renew a table's local cached meta data and get the new one. @@ -104,21 +139,23 @@ int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void * pTransporter, co * @param pMgmtEps (input, mnode EPs) * @param pTableName (input, table name, NOT including db name) * @param pTableMeta(output, table meta data, NEED to free it by calller) + * @param isSTable (input, is super table or not, 1:supposed to be stable, 0: supposed not to be stable, -1:not sure) * @return error code */ -int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta); + int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta, int32_t isSTable); + /** * Get a table's actual vgroup, for stable it's all possible vgroup list. * @param pCatalog (input, got with catalogGetHandle) - * @param pRpc (input, rpc object) + * @param pTransporter (input, rpc object) * @param pMgmtEps (input, mnode EPs) * @param pTableName (input, table name, NOT including db name) * @param pVgroupList (output, vgroup info list, element is SVgroupInfo, NEED to simply free the array by caller) * @return error code */ -int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, SArray** pVgroupList); +int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, SArray** pVgroupList); /** * Get a table's vgroup from its name's hash value. @@ -135,17 +172,20 @@ int32_t catalogGetTableHashVgroup(struct SCatalog* pCatalog, void * pTransporter /** * Get all meta data required in pReq. * @param pCatalog (input, got with catalogGetHandle) - * @param pRpc (input, rpc object) + * @param pTransporter (input, rpc object) * @param pMgmtEps (input, mnode EPs) * @param pReq (input, reqest info) * @param pRsp (output, response data) * @return error code */ -int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SCatalogReq* pReq, SMetaData* pRsp); +int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SCatalogReq* pReq, SMetaData* pRsp); -int32_t catalogGetQnodeList(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, SArray* pQnodeList); +int32_t catalogGetQnodeList(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, SArray* pQnodeList); +int32_t catalogGetExpiredSTables(struct SCatalog* pCatalog, SSTableMetaVersion **stables, uint32_t *num); + +int32_t catalogGetExpiredDBs(struct SCatalog* pCatalog, SDbVgVersion **dbs, uint32_t *num); /** diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index c3c7d740f7..17c11b5d09 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -20,16 +20,16 @@ extern "C" { #endif -typedef void* qinfo_t; +typedef void* qTaskInfo_t; /** * create the qinfo object according to QueryTableMsg * @param tsdb * @param pQueryTableMsg - * @param qinfo + * @param pTaskInfo * @return */ -int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableInfo* pQueryTableMsg, qinfo_t* qinfo, uint64_t qId); +int32_t qCreateTask(void* tsdb, int32_t vgId, void* pQueryTableMsg, qTaskInfo_t* pTaskInfo, uint64_t qId); /** * the main query execution function, including query on both table and multiple tables, @@ -38,7 +38,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableInfo* pQueryTableM * @param qinfo * @return */ -bool qTableQuery(qinfo_t qinfo, uint64_t *qId); +bool qExecTask(qTaskInfo_t qinfo, uint64_t *qId); /** * Retrieve the produced results information, if current query is not paused or completed, @@ -48,7 +48,7 @@ bool qTableQuery(qinfo_t qinfo, uint64_t *qId); * @param qinfo * @return */ -int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext); +int32_t qRetrieveQueryResultInfo(qTaskInfo_t qinfo, bool* buildRes, void* pRspContext); /** * @@ -60,41 +60,41 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex * @param contLen payload length * @return */ -int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec); +int32_t qDumpRetrieveResult(qTaskInfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec); /** * return the transporter context (RPC) * @param qinfo * @return */ -void* qGetResultRetrieveMsg(qinfo_t qinfo); +void* qGetResultRetrieveMsg(qTaskInfo_t qinfo); /** * kill the ongoing query and free the query handle and corresponding resources automatically * @param qinfo qhandle * @return */ -int32_t qKillQuery(qinfo_t qinfo); +int32_t qKillTask(qTaskInfo_t qinfo); /** * return whether query is completed or not * @param qinfo * @return */ -int32_t qIsQueryCompleted(qinfo_t qinfo); +int32_t qIsQueryCompleted(qTaskInfo_t qinfo); /** * destroy query info structure * @param qHandle */ -void qDestroyQueryInfo(qinfo_t qHandle); +void qDestroyTask(qTaskInfo_t qHandle); /** * Get the queried table uid * @param qHandle * @return */ -int64_t qGetQueriedTableUid(qinfo_t qHandle); +int64_t qGetQueriedTableUid(qTaskInfo_t qHandle); /** * Extract the qualified table id list, and than pass them to the TSDB driver to load the required table data blocks. @@ -121,7 +121,7 @@ int32_t qCreateTableGroupByGroupExpr(SArray* pTableIdList, TSKEY skey, STableGro * @param type operation type: ADD|DROP * @return */ -int32_t qUpdateQueriedTableIdList(qinfo_t qinfo, int64_t uid, int32_t type); +int32_t qUpdateQueriedTableIdList(qTaskInfo_t qinfo, int64_t uid, int32_t type); //================================================================================================ // query handle management @@ -130,13 +130,13 @@ int32_t qUpdateQueriedTableIdList(qinfo_t qinfo, int64_t uid, int32_t type); * @param vgId * @return */ -void* qOpenQueryMgmt(int32_t vgId); +void* qOpenTaskMgmt(int32_t vgId); /** * broadcast the close information and wait for all query stop. * @param pExecutor */ -void qQueryMgmtNotifyClosed(void* pExecutor); +void qTaskMgmtNotifyClosing(void* pExecutor); /** * Re-open the query handle management module when opening the vnode again. @@ -148,7 +148,7 @@ void qQueryMgmtReOpen(void *pExecutor); * Close query mgmt and clean up resources. * @param pExecutor */ -void qCleanupQueryMgmt(void* pExecutor); +void qCleanupTaskMgmt(void* pExecutor); /** * Add the query into the query mgmt object @@ -157,7 +157,7 @@ void qCleanupQueryMgmt(void* pExecutor); * @param qInfo * @return */ -void** qRegisterQInfo(void* pMgmt, uint64_t qId, void *qInfo); +void** qRegisterTask(void* pMgmt, uint64_t qId, void *qInfo); /** * acquire the query handle according to the key from query mgmt object. @@ -165,7 +165,7 @@ void** qRegisterQInfo(void* pMgmt, uint64_t qId, void *qInfo); * @param key * @return */ -void** qAcquireQInfo(void* pMgmt, uint64_t key); +void** qAcquireTask(void* pMgmt, uint64_t key); /** * release the query handle and decrease the reference count in cache @@ -174,7 +174,7 @@ void** qAcquireQInfo(void* pMgmt, uint64_t key); * @param freeHandle * @return */ -void** qReleaseQInfo(void* pMgmt, void* pQInfo); +void** qReleaseTask(void* pMgmt, void* pQInfo, bool freeHandle); /** * De-register the query handle from the management module and free it immediately. diff --git a/include/libs/function/function.h b/include/libs/function/function.h index d7360a81bc..bf2937a220 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -89,7 +89,7 @@ enum { }; enum { - MASTER_SCAN = 0x0u, + MAIN_SCAN = 0x0u, REVERSE_SCAN = 0x1u, REPEAT_SCAN = 0x2u, //repeat scan belongs to the master scan MERGE_STAGE = 0x20u, @@ -183,7 +183,6 @@ typedef struct tExprNode { struct {// function node char functionName[FUNCTIONS_NAME_MAX_LENGTH]; -// int32_t functionId; int32_t num; // Note that the attribute of pChild is not the parameter of function, it is the columns that involved in the diff --git a/include/libs/index/index.h b/include/libs/index/index.h index d2b157542f..47eb97cc3a 100644 --- a/include/libs/index/index.h +++ b/include/libs/index/index.h @@ -76,25 +76,20 @@ void indexOptsDestroy(SIndexOpts* opts); * @param: */ -SIndexTerm* indexTermCreate(int64_t suid, - SIndexOperOnColumn operType, - uint8_t colType, - const char* colName, - int32_t nColName, - const char* colVal, - int32_t nColVal); +SIndexTerm* indexTermCreate(int64_t suid, SIndexOperOnColumn operType, uint8_t colType, const char* colName, + int32_t nColName, const char* colVal, int32_t nColVal); void indexTermDestroy(SIndexTerm* p); /* - * init index - * - */ -int32_t indexInit(); -/* - * destory index + * init index env * */ +void indexInit(); +/* + * destory index env + * + */ void indexCleanUp(); #ifdef __cplusplus diff --git a/include/libs/parser/parsenodes.h b/include/libs/parser/parsenodes.h index 18596a9e18..ac8a10067d 100644 --- a/include/libs/parser/parsenodes.h +++ b/include/libs/parser/parsenodes.h @@ -135,9 +135,8 @@ typedef struct SQueryStmtInfo { SArray *pUdfInfo; struct SQueryStmtInfo *sibling; // sibling - struct SQueryStmtInfo *pDownstream; SMultiFunctionsDesc info; - SArray *pUpstream; // SArray + SArray *pDownstream; // SArray int32_t havingFieldNum; int32_t exprListLevelIndex; } SQueryStmtInfo; diff --git a/include/libs/parser/parser.h b/include/libs/parser/parser.h index 5bd18641bf..edf9cf461f 100644 --- a/include/libs/parser/parser.h +++ b/include/libs/parser/parser.h @@ -24,7 +24,6 @@ extern "C" { typedef struct SParseContext { SParseBasicCtx ctx; - int8_t schemaAttached; // denote if submit block is built with table schema or not const char *pSql; // sql string size_t sqlLen; // length of the sql string char *pMsg; // extended error message if exists to help identifying the problem in sql statement. @@ -41,8 +40,17 @@ typedef struct SParseContext { */ int32_t qParseQuerySql(SParseContext* pContext, SQueryNode** pQuery); -bool qIsDdlQuery(const SQueryNode* pQuery); +/** + * Return true if it is a ddl/dcl sql statement + * @param pQuery + * @return + */ +bool qIsDdlQuery(const SQueryNode* pQueryNode); +/** + * Destroy logic query plan + * @param pQueryNode + */ void qDestroyQuery(SQueryNode* pQueryNode); /** @@ -62,8 +70,8 @@ void columnListDestroy(SArray* pColumnList); void dropAllExprInfo(SArray** pExprInfo, int32_t numOfLevel); typedef struct SSourceParam { - SArray *pExprNodeList; //Array - SArray *pColumnList; //Array + SArray *pExprNodeList; //Array + SArray *pColumnList; //Array int32_t num; } SSourceParam; diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index ae76b022f2..c5da68f0a6 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -51,8 +51,10 @@ struct SQueryStmtInfo; typedef SSchema SSlotSchema; typedef struct SDataBlockSchema { - SSlotSchema *pSchema; - int32_t numOfCols; // number of columns + SSlotSchema *pSchema; + int32_t numOfCols; // number of columns + int32_t resultRowSize; + int16_t precision; } SDataBlockSchema; typedef struct SQueryNodeBasicInfo { @@ -62,6 +64,7 @@ typedef struct SQueryNodeBasicInfo { typedef struct SDataSink { SQueryNodeBasicInfo info; + SDataBlockSchema schema; } SDataSink; typedef struct SDataDispatcher { @@ -140,16 +143,20 @@ typedef struct SQueryDag { struct SQueryNode; -/** - * Create the physical plan for the query, according to the AST. - */ + /** + * Create the physical plan for the query, according to the AST. + * @param pQueryInfo + * @param pDag + * @param requestId + * @return + */ int32_t qCreateQueryDag(const struct SQueryNode* pQueryInfo, struct SQueryDag** pDag, uint64_t requestId); // Set datasource of this subplan, multiple calls may be made to a subplan. // @subplan subplan to be schedule // @templateId templateId of a group of datasource subplans of this @subplan // @ep one execution location of this group of datasource subplans -int32_t qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep); +void qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep); int32_t qExplainQuery(const struct SQueryNode* pQueryInfo, struct SEpSet* pQnode, char** str); diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 17a06a941a..a3ee59e2e0 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -76,6 +76,7 @@ typedef struct STableMeta { typedef struct SDBVgroupInfo { SRWLatch lock; + int64_t dbId; int32_t vgVersion; int8_t hashMethod; SHashObj *vgInfo; //key:vgId, value:SVgroupInfo @@ -86,8 +87,16 @@ typedef struct SUseDbOutput { SDBVgroupInfo dbVgroup; } SUseDbOutput; +enum { + META_TYPE_NON_TABLE = 1, + META_TYPE_CTABLE, + META_TYPE_TABLE, + META_TYPE_BOTH_TABLE +}; + + typedef struct STableMetaOutput { - int32_t metaNum; + int32_t metaType; char ctbFname[TSDB_TABLE_FNAME_LEN]; char tbFname[TSDB_TABLE_FNAME_LEN]; SCTableMeta ctbMeta; @@ -100,6 +109,11 @@ void initQueryModuleMsgHandle(); extern int32_t (*queryBuildMsg[TDMT_MAX])(void* input, char **msg, int32_t msgSize, int32_t *msgLen); extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char *msg, int32_t msgSize); +#define SET_META_TYPE_NONE(t) (t) = META_TYPE_NON_TABLE +#define SET_META_TYPE_CTABLE(t) (t) = META_TYPE_CTABLE +#define SET_META_TYPE_TABLE(t) (t) = META_TYPE_TABLE +#define SET_META_TYPE_BOTH_TABLE(t) (t) = META_TYPE_BOTH_TABLE + #define qFatal(...) do { if (qDebugFlag & DEBUG_FATAL) { taosPrintLog("QRY FATAL ", qDebugFlag, __VA_ARGS__); }} while(0) #define qError(...) do { if (qDebugFlag & DEBUG_ERROR) { taosPrintLog("QRY ERROR ", qDebugFlag, __VA_ARGS__); }} while(0) #define qWarn(...) do { if (qDebugFlag & DEBUG_WARN) { taosPrintLog("QRY WARN ", qDebugFlag, __VA_ARGS__); }} while(0) diff --git a/include/libs/scheduler/scheduler.h b/include/libs/scheduler/scheduler.h index b2ba7acebf..74b7813465 100644 --- a/include/libs/scheduler/scheduler.h +++ b/include/libs/scheduler/scheduler.h @@ -24,7 +24,7 @@ extern "C" { #include "catalog.h" typedef struct SSchedulerCfg { - int32_t maxJobNum; + uint32_t maxJobNum; } SSchedulerCfg; typedef struct SQueryProfileSummary { @@ -75,6 +75,12 @@ int32_t scheduleExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void */ int32_t scheduleAsyncExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void** pJob); +/** + * Fetch query result from the remote query executor + * @param pJob + * @param data + * @return + */ int32_t scheduleFetchRows(void *pJob, void **data); @@ -85,6 +91,10 @@ int32_t scheduleFetchRows(void *pJob, void **data); */ int32_t scheduleCancelJob(void *pJob); +/** + * Free the query job + * @param pJob + */ void scheduleFreeJob(void *pJob); void schedulerDestroy(void); diff --git a/include/libs/tfs/tfs.h b/include/libs/tfs/tfs.h new file mode 100644 index 0000000000..6c850d1016 --- /dev/null +++ b/include/libs/tfs/tfs.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_TFS_H +#define TD_TFS_H + +#include "tglobal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int level; + int id; +} SDiskID; + +#define TFS_UNDECIDED_LEVEL -1 +#define TFS_UNDECIDED_ID -1 +#define TFS_PRIMARY_LEVEL 0 +#define TFS_PRIMARY_ID 0 +#define TFS_MIN_LEVEL 0 +#define TFS_MAX_LEVEL (TSDB_MAX_TIERS - 1) + +// FS APIs ==================================== +typedef struct { + int64_t tsize; + int64_t used; + int64_t avail; +} SFSMeta; + +typedef struct { + int64_t size; + int64_t used; + int64_t free; + int16_t nAvailDisks; // # of Available disks +} STierMeta; + +int tfsInit(SDiskCfg *pDiskCfg, int ndisk); +void tfsDestroy(); +void tfsUpdateInfo(SFSMeta *pFSMeta, STierMeta *tierMetas, int8_t numLevels); +void tfsGetMeta(SFSMeta *pMeta); +void tfsAllocDisk(int expLevel, int *level, int *id); + +const char *TFS_PRIMARY_PATH(); +const char *TFS_DISK_PATH(int level, int id); + +// TFILE APIs ==================================== +typedef struct { + int level; + int id; + char rname[TSDB_FILENAME_LEN]; // REL name + char aname[TSDB_FILENAME_LEN]; // ABS name +} TFILE; + +#define TFILE_LEVEL(pf) ((pf)->level) +#define TFILE_ID(pf) ((pf)->id) +#define TFILE_NAME(pf) ((pf)->aname) +#define TFILE_REL_NAME(pf) ((pf)->rname) + +#define tfsopen(pf, flags) open(TFILE_NAME(pf), flags) +#define tfsclose(fd) close(fd) +#define tfsremove(pf) remove(TFILE_NAME(pf)) +#define tfscopy(sf, df) taosCopyFile(TFILE_NAME(sf), TFILE_NAME(df)) +#define tfsrename(sf, df) taosRename(TFILE_NAME(sf), TFILE_NAME(df)) + +void tfsInitFile(TFILE *pf, int level, int id, const char *bname); +bool tfsIsSameFile(const TFILE *pf1, const TFILE *pf2); +int tfsEncodeFile(void **buf, TFILE *pf); +void *tfsDecodeFile(void *buf, TFILE *pf); +void tfsbasename(const TFILE *pf, char *dest); +void tfsdirname(const TFILE *pf, char *dest); + +// DIR APIs ==================================== +int tfsMkdirAt(const char *rname, int level, int id); +int tfsMkdirRecurAt(const char *rname, int level, int id); +int tfsMkdir(const char *rname); +int tfsRmdir(const char *rname); +int tfsRename(char *orname, char *nrname); + +typedef struct TDIR TDIR; + +TDIR * tfsOpendir(const char *rname); +const TFILE *tfsReaddir(TDIR *tdir); +void tfsClosedir(TDIR *tdir); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/os/os.h b/include/os/os.h index 972880da9c..9112b4922f 100644 --- a/include/os/os.h +++ b/include/os/os.h @@ -24,10 +24,13 @@ extern "C" { #include #include #include +#include #include #include +#include #include #include +#include #include #include #include @@ -44,6 +47,8 @@ extern "C" { #include #include #include +#include +#include #include diff --git a/include/os/osDef.h b/include/os/osDef.h index bb5395f548..040c4bc7e7 100644 --- a/include/os/osDef.h +++ b/include/os/osDef.h @@ -73,6 +73,12 @@ extern "C" { #endif +#ifndef WINDOWS + #ifndef O_BINARY + #define O_BINARY 0 + #endif +#endif + #define POINTER_SHIFT(p, b) ((void *)((char *)(p) + (b))) #define POINTER_DISTANCE(p1, p2) ((char *)(p1) - (char *)(p2)) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 4fcdae2496..80241405a6 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -259,31 +259,30 @@ int32_t* taosGetErrno(); #define TSDB_CODE_DND_DNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0411) #define TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0420) #define TSDB_CODE_DND_MNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0421) -#define TSDB_CODE_DND_MNODE_ID_INVALID TAOS_DEF_ERROR_CODE(0, 0x0422) -#define TSDB_CODE_DND_MNODE_ID_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0423) -#define TSDB_CODE_DND_MNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0424) -#define TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0425) +#define TSDB_CODE_DND_MNODE_INVALID_OPTION TAOS_DEF_ERROR_CODE(0, 0x0422) +#define TSDB_CODE_DND_MNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0423) +#define TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0424) #define TSDB_CODE_DND_QNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0430) #define TSDB_CODE_DND_QNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0431) -#define TSDB_CODE_DND_QNODE_ID_INVALID TAOS_DEF_ERROR_CODE(0, 0x0432) -#define TSDB_CODE_DND_QNODE_ID_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0433) -#define TSDB_CODE_DND_QNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0434) -#define TSDB_CODE_DND_QNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0435) +#define TSDB_CODE_DND_QNODE_INVALID_OPTION TAOS_DEF_ERROR_CODE(0, 0x0432) +#define TSDB_CODE_DND_QNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0433) +#define TSDB_CODE_DND_QNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0434) #define TSDB_CODE_DND_SNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0440) #define TSDB_CODE_DND_SNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0441) -#define TSDB_CODE_DND_SNODE_ID_INVALID TAOS_DEF_ERROR_CODE(0, 0x0442) -#define TSDB_CODE_DND_SNODE_ID_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0443) -#define TSDB_CODE_DND_SNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0444) -#define TSDB_CODE_DND_SNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0445) +#define TSDB_CODE_DND_SNODE_INVALID_OPTION TAOS_DEF_ERROR_CODE(0, 0x0442) +#define TSDB_CODE_DND_SNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0443) +#define TSDB_CODE_DND_SNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0444) #define TSDB_CODE_DND_BNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0450) #define TSDB_CODE_DND_BNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0451) -#define TSDB_CODE_DND_BNODE_ID_INVALID TAOS_DEF_ERROR_CODE(0, 0x0452) -#define TSDB_CODE_DND_BNODE_ID_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0453) -#define TSDB_CODE_DND_BNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0454) -#define TSDB_CODE_DND_BNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0455) -#define TSDB_CODE_DND_VNODE_TOO_MANY_VNODES TAOS_DEF_ERROR_CODE(0, 0x0460) -#define TSDB_CODE_DND_VNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0461) -#define TSDB_CODE_DND_VNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0462) +#define TSDB_CODE_DND_BNODE_INVALID_OPTION TAOS_DEF_ERROR_CODE(0, 0x0452) +#define TSDB_CODE_DND_BNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0453) +#define TSDB_CODE_DND_BNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0454) +#define TSDB_CODE_DND_VNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0460) +#define TSDB_CODE_DND_VNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0461) +#define TSDB_CODE_DND_VNODE_INVALID_OPTION TAOS_DEF_ERROR_CODE(0, 0x0462) +#define TSDB_CODE_DND_VNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0463) +#define TSDB_CODE_DND_VNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0464) +#define TSDB_CODE_DND_VNODE_TOO_MANY_VNODES TAOS_DEF_ERROR_CODE(0, 0x0465) // vnode #define TSDB_CODE_VND_ACTION_IN_PROGRESS TAOS_DEF_ERROR_CODE(0, 0x0500) //"Action in progress") diff --git a/include/util/thash.h b/include/util/thash.h index 9dc6630461..3a614a73a6 100644 --- a/include/util/thash.h +++ b/include/util/thash.h @@ -124,6 +124,9 @@ int32_t taosHashGetSize(const SHashObj *pHashObj); */ int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *data, size_t size); +int32_t taosHashPutExt(SHashObj *pHashObj, const void *key, size_t keyLen, void *data, size_t size, bool *newAdded); + + /** * return the payload data with the specified key * diff --git a/include/util/tlog.h b/include/util/tlog.h index a367243a46..26a5417320 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -44,7 +44,6 @@ extern int32_t tsdbDebugFlag; extern int32_t tqDebugFlag; extern int32_t cqDebugFlag; extern int32_t debugFlag; -extern int32_t ctgDebugFlag; #define DEBUG_FATAL 1U #define DEBUG_ERROR DEBUG_FATAL diff --git a/include/util/tqueue.h b/include/util/tqueue.h index a57bdb5ce8..63ba460d39 100644 --- a/include/util/tqueue.h +++ b/include/util/tqueue.h @@ -51,6 +51,7 @@ void taosFreeQitem(void *pItem); int32_t taosWriteQitem(STaosQueue *queue, void *pItem); int32_t taosReadQitem(STaosQueue *queue, void **ppItem); bool taosQueueEmpty(STaosQueue *queue); +int32_t taosQueueSize(STaosQueue *queue); STaosQall *taosAllocateQall(); void taosFreeQall(STaosQall *qall); diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 705d6ef786..26afe237c9 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -62,6 +62,7 @@ typedef struct SAppInstInfo { SList *pConnList; // STscObj linked list int64_t clusterId; void *pTransporter; + SHeartBeatInfo hb; } SAppInstInfo; typedef struct SAppInfo { @@ -70,7 +71,7 @@ typedef struct SAppInfo { char *ep; int32_t pid; int32_t numOfThreads; - SHeartBeatInfo hb; + SHashObj *pInstMap; } SAppInfo; diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 26c27a5cae..a6b04624d7 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -140,7 +140,7 @@ int32_t buildRequest(STscObj *pTscObj, const char *sql, int sqlLen, SRequestObj* (*pRequest)->sqlstr[sqlLen] = 0; (*pRequest)->sqlLen = sqlLen; - tscDebugL("0x%"PRIx64" SQL: %s, reqId:0x"PRIx64, (*pRequest)->self, (*pRequest)->sqlstr, (*pRequest)->requestId); + tscDebugL("0x%"PRIx64" SQL: %s, reqId:0x%"PRIx64, (*pRequest)->self, (*pRequest)->sqlstr, (*pRequest)->requestId); return TSDB_CODE_SUCCESS; } @@ -181,7 +181,7 @@ int32_t execDdlQuery(SRequestObj* pRequest, SQueryNode* pQuery) { if (pDcl->msgType == TDMT_VND_SHOW_TABLES) { SShowReqInfo* pShowReqInfo = &pRequest->body.showInfo; if (pShowReqInfo->pArray == NULL) { - pShowReqInfo->currentIndex = 0; + pShowReqInfo->currentIndex = 0; // set the first vnode/ then iterate the next vnode pShowReqInfo->pArray = pDcl->pExtension; } } @@ -291,10 +291,10 @@ TAOS_RES *taos_query_l(TAOS *taos, const char *sql, int sqlLen) { nPrintTsc("%s", sql) - SRequestObj* pRequest = NULL; - SQueryNode* pQuery = NULL; - SQueryDag* pDag = NULL; - void* pJob = NULL; + SRequestObj *pRequest = NULL; + SQueryNode *pQuery = NULL; + SQueryDag *pDag = NULL; + void *pJob = NULL; terrno = TSDB_CODE_SUCCESS; CHECK_CODE_GOTO(buildRequest(pTscObj, sql, sqlLen, &pRequest), _return); @@ -395,13 +395,13 @@ static SMsgSendInfo* buildConnectMsg(SRequestObj *pRequest) { } pMsgSendInfo->msgType = TDMT_MND_CONNECT; - pMsgSendInfo->msgInfo.len = sizeof(SConnectMsg); + pMsgSendInfo->msgInfo.len = sizeof(SConnectReq); pMsgSendInfo->requestObjRefId = pRequest->self; pMsgSendInfo->requestId = pRequest->requestId; pMsgSendInfo->fp = handleRequestRspFp[TMSG_INDEX(pMsgSendInfo->msgType)]; pMsgSendInfo->param = pRequest; - SConnectMsg *pConnect = calloc(1, sizeof(SConnectMsg)); + SConnectReq *pConnect = calloc(1, sizeof(SConnectReq)); if (pConnect == NULL) { tfree(pMsgSendInfo); terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 2b875b3eb5..1238976b97 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -1,11 +1,12 @@ #include "os.h" +#include "tref.h" +#include "trpc.h" #include "clientInt.h" #include "clientLog.h" #include "query.h" #include "tmsg.h" #include "tglobal.h" -#include "tref.h" -#include "trpc.h" +#include "catalog.h" #define TSC_VAR_NOT_RELEASE 1 #define TSC_VAR_RELEASED 0 @@ -46,6 +47,7 @@ void taos_cleanup(void) { taosCloseRef(id); rpcCleanup(); + catalogDestroy(); taosCloseLog(); tscInfo("all local resources released"); diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index aeaa1d8361..73c9fc5e9f 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -101,13 +101,13 @@ TEST(testCase, show_user_Test) { assert(pConn != NULL); TAOS_RES* pRes = taos_query(pConn, "show users"); - TAOS_ROW pRow = NULL; + TAOS_ROW pRow = NULL; TAOS_FIELD* pFields = taos_fetch_fields(pRes); - int32_t numOfFields = taos_num_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); char str[512] = {0}; - while((pRow = taos_fetch_row(pRes)) != NULL) { + while ((pRow = taos_fetch_row(pRes)) != NULL) { int32_t code = taos_print_row(str, pRow, pFields, numOfFields); printf("%s\n", str); } @@ -134,13 +134,13 @@ TEST(testCase, show_db_Test) { assert(pConn != NULL); TAOS_RES* pRes = taos_query(pConn, "show databases"); - TAOS_ROW pRow = NULL; + TAOS_ROW pRow = NULL; TAOS_FIELD* pFields = taos_fetch_fields(pRes); - int32_t numOfFields = taos_num_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); char str[512] = {0}; - while((pRow = taos_fetch_row(pRes)) != NULL) { + while ((pRow = taos_fetch_row(pRes)) != NULL) { int32_t code = taos_print_row(str, pRow, pFields, numOfFields); printf("%s\n", str); } @@ -228,29 +228,29 @@ TEST(testCase, use_db_test) { taos_close(pConn); } -//TEST(testCase, drop_db_test) { -//// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -//// assert(pConn != NULL); -//// -//// showDB(pConn); -//// -//// TAOS_RES* pRes = taos_query(pConn, "drop database abc1"); -//// if (taos_errno(pRes) != 0) { -//// printf("failed to drop db, reason:%s\n", taos_errstr(pRes)); -//// } -//// taos_free_result(pRes); -//// -//// showDB(pConn); -//// -//// pRes = taos_query(pConn, "create database abc1"); -//// if (taos_errno(pRes) != 0) { -//// printf("create to drop db, reason:%s\n", taos_errstr(pRes)); -//// } -//// taos_free_result(pRes); -//// taos_close(pConn); +// TEST(testCase, drop_db_test) { +// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); +// assert(pConn != NULL); +// +// showDB(pConn); +// +// TAOS_RES* pRes = taos_query(pConn, "drop database abc1"); +// if (taos_errno(pRes) != 0) { +// printf("failed to drop db, reason:%s\n", taos_errstr(pRes)); +// } +// taos_free_result(pRes); +// +// showDB(pConn); +// +// pRes = taos_query(pConn, "create database abc1"); +// if (taos_errno(pRes) != 0) { +// printf("create to drop db, reason:%s\n", taos_errstr(pRes)); +// } +// taos_free_result(pRes); +// taos_close(pConn); //} - TEST(testCase, create_stable_Test) { +TEST(testCase, create_stable_Test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); assert(pConn != NULL); @@ -281,128 +281,227 @@ TEST(testCase, use_db_test) { taos_close(pConn); } -//TEST(testCase, create_table_Test) { -// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -// assert(pConn != NULL); -// -// TAOS_RES* pRes = taos_query(pConn, "use abc1"); -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "create table tm0(ts timestamp, k int)"); -// taos_free_result(pRes); -// -// taos_close(pConn); -//} +TEST(testCase, create_table_Test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + assert(pConn != NULL); -//TEST(testCase, create_ctable_Test) { -// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -// assert(pConn != NULL); -// -// TAOS_RES* pRes = taos_query(pConn, "use abc1"); -// if (taos_errno(pRes) != 0) { -// printf("failed to use db, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "create table tm0 using st1 tags(1)"); -// if (taos_errno(pRes) != 0) { -// printf("failed to create child table tm0, reason:%s\n", taos_errstr(pRes)); -// } -// -// taos_free_result(pRes); -// taos_close(pConn); -//} -// -//TEST(testCase, show_stable_Test) { -// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -// assert(pConn != NULL); -// -// TAOS_RES* pRes = taos_query(pConn, "use abc1"); -// if (taos_errno(pRes) != 0) { -// printf("failed to use db, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "show stables"); -// if (taos_errno(pRes) != 0) { -// printf("failed to show stables, reason:%s\n", taos_errstr(pRes)); -// taos_free_result(pRes); -// ASSERT_TRUE(false); -// } -// -// TAOS_ROW pRow = NULL; -// TAOS_FIELD* pFields = taos_fetch_fields(pRes); -// int32_t numOfFields = taos_num_fields(pRes); -// -// char str[512] = {0}; -// while((pRow = taos_fetch_row(pRes)) != NULL) { -// int32_t code = taos_print_row(str, pRow, pFields, numOfFields); -// printf("%s\n", str); -// } -// -// taos_free_result(pRes); -// taos_close(pConn); -//} + TAOS_RES* pRes = taos_query(pConn, "use abc1"); + taos_free_result(pRes); -//TEST(testCase, show_vgroup_Test) { -// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -// assert(pConn != NULL); -// -// TAOS_RES* pRes = taos_query(pConn, "use abc1"); -// if (taos_errno(pRes) != 0) { -// printf("failed to use db, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "show vgroups"); -// if (taos_errno(pRes) != 0) { -// printf("failed to show vgroups, reason:%s\n", taos_errstr(pRes)); -// taos_free_result(pRes); -// ASSERT_TRUE(false); -// } -// -// TAOS_ROW pRow = NULL; -// -// TAOS_FIELD* pFields = taos_fetch_fields(pRes); -// int32_t numOfFields = taos_num_fields(pRes); -// -// char str[512] = {0}; -// while((pRow = taos_fetch_row(pRes)) != NULL) { -// int32_t code = taos_print_row(str, pRow, pFields, numOfFields); -// printf("%s\n", str); -// } -// -// taos_free_result(pRes); -// -// taos_close(pConn); -//} + pRes = taos_query(pConn, "create table tm0(ts timestamp, k int)"); + taos_free_result(pRes); -//TEST(testCase, drop_stable_Test) { -// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -// assert(pConn != NULL); -// -// TAOS_RES* pRes = taos_query(pConn, "create database abc1"); -// if (taos_errno(pRes) != 0) { -// printf("error in creating db, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "use abc1"); -// if (taos_errno(pRes) != 0) { -// printf("error in using db, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "drop stable st1"); -// if (taos_errno(pRes) != 0) { -// printf("failed to drop stable, reason:%s\n", taos_errstr(pRes)); -// } -// -// taos_free_result(pRes); -// taos_close(pConn); -//} + taos_close(pConn); +} -//TEST(testCase, create_topic_Test) { +TEST(testCase, create_ctable_Test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + assert(pConn != NULL); + + TAOS_RES* pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != 0) { + printf("failed to use db, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "create table tm0 using st1 tags(1)"); + if (taos_errno(pRes) != 0) { + printf("failed to create child table tm0, reason:%s\n", taos_errstr(pRes)); + } + + taos_free_result(pRes); + taos_close(pConn); +} + +TEST(testCase, show_stable_Test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + assert(pConn != NULL); + + TAOS_RES* pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != 0) { + printf("failed to use db, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "show stables"); + if (taos_errno(pRes) != 0) { + printf("failed to show stables, reason:%s\n", taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT_TRUE(false); + } + + TAOS_ROW pRow = NULL; + TAOS_FIELD* pFields = taos_fetch_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); + + char str[512] = {0}; + while ((pRow = taos_fetch_row(pRes)) != NULL) { + int32_t code = taos_print_row(str, pRow, pFields, numOfFields); + printf("%s\n", str); + } + + taos_free_result(pRes); + taos_close(pConn); +} + +TEST(testCase, show_vgroup_Test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + assert(pConn != NULL); + + TAOS_RES* pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != 0) { + printf("failed to use db, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "show vgroups"); + if (taos_errno(pRes) != 0) { + printf("failed to show vgroups, reason:%s\n", taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT_TRUE(false); + } + + TAOS_ROW pRow = NULL; + + TAOS_FIELD* pFields = taos_fetch_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); + + char str[512] = {0}; + while ((pRow = taos_fetch_row(pRes)) != NULL) { + int32_t code = taos_print_row(str, pRow, pFields, numOfFields); + printf("%s\n", str); + } + + taos_free_result(pRes); + taos_close(pConn); +} + +TEST(testCase, create_multiple_tables) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + ASSERT_NE(pConn, nullptr); + + TAOS_RES* pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != 0) { + printf("failed to use db, reason:%s", taos_errstr(pRes)); + taos_free_result(pRes); + taos_close(pConn); + return; + } + + taos_free_result(pRes); + + pRes = taos_query(pConn, "create table t_2 using st1 tags(1)"); + if (taos_errno(pRes) != 0) { + printf("failed to create multiple tables, reason:%s\n", taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT_TRUE(false); + } + + taos_free_result(pRes); + pRes = taos_query(pConn, "create table t_3 using st1 tags(2)"); + if (taos_errno(pRes) != 0) { + printf("failed to create multiple tables, reason:%s\n", taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT_TRUE(false); + } + + TAOS_ROW pRow = NULL; + TAOS_FIELD* pFields = taos_fetch_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); + + char str[512] = {0}; + while ((pRow = taos_fetch_row(pRes)) != NULL) { + int32_t code = taos_print_row(str, pRow, pFields, numOfFields); + printf("%s\n", str); + } + + taos_free_result(pRes); + + for (int32_t i = 0; i < 20; ++i) { + char sql[512] = {0}; + snprintf(sql, tListLen(sql), + "create table t_x_%d using st1 tags(2) t_x_%d using st1 tags(5) t_x_%d using st1 tags(911)", i, + (i + 1) * 30, (i + 2) * 40); + TAOS_RES* pres = taos_query(pConn, sql); + if (taos_errno(pres) != 0) { + printf("failed to create table %d\n, reason:%s", i, taos_errstr(pres)); + } + taos_free_result(pres); + } + + taos_close(pConn); +} + +TEST(testCase, show_table_Test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + assert(pConn != NULL); + + TAOS_RES* pRes = taos_query(pConn, "use abc1"); + taos_free_result(pRes); + + pRes = taos_query(pConn, "show tables"); + if (taos_errno(pRes) != 0) { + printf("failed to show vgroups, reason:%s\n", taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT_TRUE(false); + } + + TAOS_ROW pRow = NULL; + TAOS_FIELD* pFields = taos_fetch_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); + + char str[512] = {0}; + while ((pRow = taos_fetch_row(pRes)) != NULL) { + int32_t code = taos_print_row(str, pRow, pFields, numOfFields); + printf("%s\n", str); + } + + taos_free_result(pRes); + taos_close(pConn); +} + +TEST(testCase, drop_stable_Test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + assert(pConn != NULL); + + TAOS_RES* pRes = taos_query(pConn, "create database abc1"); + if (taos_errno(pRes) != 0) { + printf("error in creating db, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != 0) { + printf("error in using db, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "drop stable st1"); + if (taos_errno(pRes) != 0) { + printf("failed to drop stable, reason:%s\n", taos_errstr(pRes)); + } + + taos_free_result(pRes); + taos_close(pConn); +} + +TEST(testCase, generated_request_id_test) { + SHashObj* phash = taosHashInit(10000, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); + + for (int32_t i = 0; i < 50000; ++i) { + uint64_t v = generateRequestId(); + void* result = taosHashGet(phash, &v, sizeof(v)); + if (result != nullptr) { + printf("0x%lx, index:%d\n", v, i); + } + assert(result == nullptr); + taosHashPut(phash, &v, sizeof(v), NULL, 0); + } + + taosHashCleanup(phash); +} + +// TEST(testCase, create_topic_Test) { // TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); // assert(pConn != NULL); // @@ -435,132 +534,55 @@ TEST(testCase, use_db_test) { // tmq_create_topic(pConn, "test_topic_1", sql, strlen(sql)); // taos_close(pConn); //} - -//TEST(testCase, show_table_Test) { -// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); -// assert(pConn != NULL); -// -// TAOS_RES* pRes = taos_query(pConn, "use abc1"); -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "show tables"); -// if (taos_errno(pRes) != 0) { -// printf("failed to show vgroups, reason:%s\n", taos_errstr(pRes)); -// taos_free_result(pRes); -// ASSERT_TRUE(false); -// } -// -// TAOS_ROW pRow = NULL; -// TAOS_FIELD* pFields = taos_fetch_fields(pRes); -// int32_t numOfFields = taos_num_fields(pRes); -// -// char str[512] = {0}; -// while((pRow = taos_fetch_row(pRes)) != NULL) { -// int32_t code = taos_print_row(str, pRow, pFields, numOfFields); -// printf("%s\n", str); -// } -// -// taos_free_result(pRes); -// taos_close(pConn); -//} - -TEST(testCase, create_multiple_tables) { - TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); - ASSERT_NE(pConn, nullptr); - - TAOS_RES* pRes = taos_query(pConn, "use abc1"); - if (taos_errno(pRes) != 0) { - printf("failed to use db, reason:%s", taos_errstr(pRes)); - taos_free_result(pRes); - taos_close(pConn); - return; - } - - taos_free_result(pRes); - - pRes = taos_query(pConn, "create table t_2 using st1 tags(1)"); - if (taos_errno(pRes) != 0) { - printf("failed to create multiple tables, reason:%s\n", taos_errstr(pRes)); - taos_free_result(pRes); - ASSERT_TRUE(false); - } - - taos_free_result(pRes); - pRes = taos_query(pConn, "create table t_3 using st1 tags(2)"); - if (taos_errno(pRes) != 0) { - printf("failed to create multiple tables, reason:%s\n", taos_errstr(pRes)); - taos_free_result(pRes); - ASSERT_TRUE(false); - } - - TAOS_ROW pRow = NULL; - TAOS_FIELD* pFields = taos_fetch_fields(pRes); - int32_t numOfFields = taos_num_fields(pRes); - - char str[512] = {0}; - while((pRow = taos_fetch_row(pRes)) != NULL) { - int32_t code = taos_print_row(str, pRow, pFields, numOfFields); - printf("%s\n", str); - } - - taos_free_result(pRes); - - for(int32_t i = 0; i < 200000; ++i) { - char sql[512] = {0}; - snprintf(sql, tListLen(sql), "create table t_x_%d using st1 tags(2)", i); - TAOS_RES* pres = taos_query(pConn, sql); - if (taos_errno(pres) != 0) { - printf("failed to create table %d\n, reason:%s", i, taos_errstr(pres)); - } - - printf("%d\n", i); - taos_free_result(pres); - } - - taos_close(pConn); -} - -TEST(testCase, generated_request_id_test) { - SHashObj *phash = taosHashInit(10000, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); - - for(int32_t i = 0; i < 50000; ++i) { - uint64_t v = generateRequestId(); - void* result = taosHashGet(phash, &v, sizeof(v)); - if (result != nullptr) { - printf("0x%lx, index:%d\n", v, i); - } - assert(result == nullptr); - taosHashPut(phash, &v, sizeof(v), NULL, 0); - } - - taosHashCleanup(phash); -} - -//TEST(testCase, projection_query_tables) { +//TEST(testCase, insert_test) { // TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); // ASSERT_EQ(pConn, nullptr); // // TAOS_RES* pRes = taos_query(pConn, "use abc1"); // taos_free_result(pRes); // -// pRes = taos_query(pConn, "select * from t_2"); +// pRes = taos_query(pConn, "insert into t_2 values(now, 1)"); // if (taos_errno(pRes) != 0) { // printf("failed to create multiple tables, reason:%s\n", taos_errstr(pRes)); // taos_free_result(pRes); // ASSERT_TRUE(false); // } // -// TAOS_ROW pRow = NULL; -// TAOS_FIELD* pFields = taos_fetch_fields(pRes); -// int32_t numOfFields = taos_num_fields(pRes); -// -// char str[512] = {0}; -// while((pRow = taos_fetch_row(pRes)) != NULL) { -// int32_t code = taos_print_row(str, pRow, pFields, numOfFields); -// printf("%s\n", str); -// } -// // taos_free_result(pRes); // taos_close(pConn); //} +//#endif +TEST(testCase, projection_query_tables) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + ASSERT_NE(pConn, nullptr); + + TAOS_RES* pRes = taos_query(pConn, "use test1"); + if (taos_errno(pRes) != 0) { + printf("failed to use db, reason:%s", taos_errstr(pRes)); + taos_free_result(pRes); + return; + } + + taos_free_result(pRes); + + pRes = taos_query(pConn, "select * from tm0"); + if (taos_errno(pRes) != 0) { + printf("failed to create multiple tables, reason:%s\n", taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT_TRUE(false); + } + + TAOS_ROW pRow = NULL; + TAOS_FIELD* pFields = taos_fetch_fields(pRes); + int32_t numOfFields = taos_num_fields(pRes); + + char str[512] = {0}; + while ((pRow = taos_fetch_row(pRes)) != NULL) { + int32_t code = taos_print_row(str, pRow, pFields, numOfFields); + printf("%s\n", str); + } + + taos_free_result(pRes); + taos_close(pConn); +} diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9ddadc9ba6..9a20fadbfb 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -910,7 +910,7 @@ static void doInitGlobalConfig(void) { cfg.option = "tsdbDebugFlag"; cfg.ptr = &tsdbDebugFlag; cfg.valType = TAOS_CFG_VTYPE_INT32; - cfg.cfgType = TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_LOG | TSDB_CFG_CTYPE_B_CLIENT; + cfg.cfgType = TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_LOG; cfg.minValue = 0; cfg.maxValue = 255; cfg.ptrLength = 0; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index b94bd6f715..53f59c7d57 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -27,6 +27,64 @@ #undef TD_MSG_SEG_CODE_ #include "tmsgdef.h" +int tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) { + if (pMsg == NULL) { + terrno = TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP; + return -1; + } + + pIter->totalLen = pMsg->length; + pIter->len = 0; + pIter->pMsg = pMsg; + if (pMsg->length <= sizeof(SSubmitMsg)) { + terrno = TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP; + return -1; + } + + return 0; +} + +int tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) { + if (pIter->len == 0) { + pIter->len += sizeof(SSubmitMsg); + } else { + SSubmitBlk *pSubmitBlk = (SSubmitBlk *)POINTER_SHIFT(pIter->pMsg, pIter->len); + pIter->len += (sizeof(SSubmitBlk) + pSubmitBlk->dataLen + pSubmitBlk->schemaLen); + } + + if (pIter->len > pIter->totalLen) { + terrno = TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP; + *pPBlock = NULL; + return -1; + } + + *pPBlock = (pIter->len == pIter->totalLen) ? NULL : (SSubmitBlk *)POINTER_SHIFT(pIter->pMsg, pIter->len); + + return 0; +} + +int tInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter) { + if (pBlock->dataLen <= 0) return -1; + pIter->totalLen = pBlock->dataLen; + pIter->len = 0; + pIter->row = (SMemRow)(pBlock->data + pBlock->schemaLen); + return 0; +} + +SMemRow tGetSubmitBlkNext(SSubmitBlkIter *pIter) { + SMemRow row = pIter->row; + + if (pIter->len >= pIter->totalLen) { + return NULL; + } else { + pIter->len += memRowTLen(row); + if (pIter->len < pIter->totalLen) { + pIter->row = POINTER_SHIFT(row, memRowTLen(row)); + } + return row; + } +} + int tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) { int tlen = 0; tlen += taosEncodeSClientHbKey(buf, &pReq->connKey); diff --git a/source/dnode/mgmt/impl/inc/dndInt.h b/source/dnode/mgmt/impl/inc/dndInt.h index 07c8ce5d02..d5c9dd57dc 100644 --- a/source/dnode/mgmt/impl/inc/dndInt.h +++ b/source/dnode/mgmt/impl/inc/dndInt.h @@ -80,20 +80,20 @@ typedef struct { } SDnodeDir; typedef struct { - int32_t dnodeId; - int32_t dropped; - int64_t clusterId; - int64_t rebootTime; - int64_t updateTime; - int8_t statusSent; - SEpSet mnodeEpSet; - char *file; - SHashObj *dnodeHash; - SDnodeEps *dnodeEps; - pthread_t *threadId; - SRWLatch latch; - STaosQueue *pMgmtQ; - SWorkerPool mgmtPool; + int32_t dnodeId; + int32_t dropped; + int64_t clusterId; + int64_t dver; + int64_t rebootTime; + int64_t updateTime; + int8_t statusSent; + SEpSet mnodeEpSet; + char *file; + SHashObj *dnodeHash; + SDnodeEps *dnodeEps; + pthread_t *threadId; + SRWLatch latch; + SDnodeWorker mgmtWorker; } SDnodeMgmt; typedef struct { diff --git a/source/dnode/mgmt/impl/inc/dndDnode.h b/source/dnode/mgmt/impl/inc/dndMgmt.h similarity index 100% rename from source/dnode/mgmt/impl/inc/dndDnode.h rename to source/dnode/mgmt/impl/inc/dndMgmt.h diff --git a/source/dnode/mgmt/impl/inc/dndVnodes.h b/source/dnode/mgmt/impl/inc/dndVnodes.h index bf5f0122c1..b5fae62959 100644 --- a/source/dnode/mgmt/impl/inc/dndVnodes.h +++ b/source/dnode/mgmt/impl/inc/dndVnodes.h @@ -29,12 +29,12 @@ void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet); -int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg); -int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg); -int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg); -int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg); -int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg); -int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg); +int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *pReq); +int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *pReq); +int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *pReq); +int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *pReq); +int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *pReq); +int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *pReq); #ifdef __cplusplus } diff --git a/source/dnode/mgmt/impl/src/dndBnode.c b/source/dnode/mgmt/impl/src/dndBnode.c index 1b2e56edfe..15be59a419 100644 --- a/source/dnode/mgmt/impl/src/dndBnode.c +++ b/source/dnode/mgmt/impl/src/dndBnode.c @@ -15,7 +15,7 @@ #define _DEFAULT_SOURCE #include "dndBnode.h" -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndTransport.h" #include "dndWorker.h" @@ -42,18 +42,13 @@ static SBnode *dndAcquireBnode(SDnode *pDnode) { } static void dndReleaseBnode(SDnode *pDnode, SBnode *pBnode) { + if (pBnode == NULL) return; + SBnodeMgmt *pMgmt = &pDnode->bmgmt; - int32_t refCount = 0; - taosRLockLatch(&pMgmt->latch); - if (pBnode != NULL) { - refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); - } + int32_t refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); taosRUnLockLatch(&pMgmt->latch); - - if (pBnode != NULL) { - dTrace("release bnode, refCount:%d", refCount); - } + dTrace("release bnode, refCount:%d", refCount); } static int32_t dndReadBnodeFile(SDnode *pDnode) { @@ -268,7 +263,7 @@ int32_t dndProcessCreateBnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { pMsg->dnodeId = htonl(pMsg->dnodeId); if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_BNODE_ID_INVALID; + terrno = TSDB_CODE_DND_BNODE_INVALID_OPTION; dError("failed to create bnode since %s", terrstr()); return -1; } else { @@ -281,7 +276,7 @@ int32_t dndProcessDropBnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { pMsg->dnodeId = htonl(pMsg->dnodeId); if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_BNODE_ID_INVALID; + terrno = TSDB_CODE_DND_BNODE_INVALID_OPTION; dError("failed to drop bnode since %s", terrstr()); return -1; } else { diff --git a/source/dnode/mgmt/impl/src/dndDnode.c b/source/dnode/mgmt/impl/src/dndMgmt.c similarity index 83% rename from source/dnode/mgmt/impl/src/dndDnode.c rename to source/dnode/mgmt/impl/src/dndMgmt.c index f5f9bbf1b8..7d071be944 100644 --- a/source/dnode/mgmt/impl/src/dndDnode.c +++ b/source/dnode/mgmt/impl/src/dndMgmt.c @@ -14,28 +14,25 @@ */ #define _DEFAULT_SOURCE -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndBnode.h" #include "dndMnode.h" #include "dndQnode.h" #include "dndSnode.h" #include "dndTransport.h" #include "dndVnodes.h" +#include "dndWorker.h" -static int32_t dndInitMgmtWorker(SDnode *pDnode); -static void dndCleanupMgmtWorker(SDnode *pDnode); -static int32_t dndAllocMgmtQueue(SDnode *pDnode); -static void dndFreeMgmtQueue(SDnode *pDnode); -static void dndProcessMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg); +static void dndProcessMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg); static int32_t dndReadDnodes(SDnode *pDnode); static int32_t dndWriteDnodes(SDnode *pDnode); static void *dnodeThreadRoutine(void *param); -static int32_t dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pMsg); -static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pMsg); -static void dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pMsg); -static void dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pMsg); +static int32_t dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pReq); +static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pRsp); +static void dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pRsp); +static void dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pRsp); int32_t dndGetDnodeId(SDnode *pDnode) { SDnodeMgmt *pMgmt = &pDnode->dmgmt; @@ -80,13 +77,13 @@ void dndGetMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) { taosRUnLockLatch(&pMgmt->latch); } -void dndSendRedirectRsp(SDnode *pDnode, SRpcMsg *pMsg) { - tmsg_t msgType = pMsg->msgType; +void dndSendRedirectRsp(SDnode *pDnode, SRpcMsg *pReq) { + tmsg_t msgType = pReq->msgType; SEpSet epSet = {0}; dndGetMnodeEpSet(pDnode, &epSet); - dDebug("RPC %p, msg:%s is redirected, num:%d use:%d", pMsg->handle, TMSG_INFO(msgType), epSet.numOfEps, epSet.inUse); + dDebug("RPC %p, req:%s is redirected, num:%d use:%d", pReq->handle, TMSG_INFO(msgType), epSet.numOfEps, epSet.inUse); for (int32_t i = 0; i < epSet.numOfEps; ++i) { dDebug("mnode index:%d %s:%u", i, epSet.fqdn[i], epSet.port[i]); if (strcmp(epSet.fqdn[i], pDnode->opt.localFqdn) == 0 && epSet.port[i] == pDnode->opt.serverPort) { @@ -96,7 +93,7 @@ void dndSendRedirectRsp(SDnode *pDnode, SRpcMsg *pMsg) { epSet.port[i] = htons(epSet.port[i]); } - rpcSendRedirectRsp(pMsg->handle, &epSet); + rpcSendRedirectRsp(pReq->handle, &epSet); } static void dndUpdateMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) { @@ -350,14 +347,14 @@ static int32_t dndWriteDnodes(SDnode *pDnode) { terrno = 0; pMgmt->updateTime = taosGetTimestampMs(); - dInfo("successed to write %s", pMgmt->file); + dDebug("successed to write %s", pMgmt->file); return 0; } void dndSendStatusReq(SDnode *pDnode) { - int32_t contLen = sizeof(SStatusMsg) + TSDB_MAX_VNODES * sizeof(SVnodeLoad); + int32_t contLen = sizeof(SStatusReq) + TSDB_MAX_VNODES * sizeof(SVnodeLoad); - SStatusMsg *pStatus = rpcMallocCont(contLen); + SStatusReq *pStatus = rpcMallocCont(contLen); if (pStatus == NULL) { dError("failed to malloc status message"); return; @@ -366,6 +363,7 @@ void dndSendStatusReq(SDnode *pDnode) { SDnodeMgmt *pMgmt = &pDnode->dmgmt; taosRLockLatch(&pMgmt->latch); pStatus->sver = htonl(pDnode->opt.sver); + pStatus->dver = htobe64(pMgmt->dver); pStatus->dnodeId = htonl(pMgmt->dnodeId); pStatus->clusterId = htobe64(pMgmt->clusterId); pStatus->rebootTime = htobe64(pMgmt->rebootTime); @@ -385,12 +383,12 @@ void dndSendStatusReq(SDnode *pDnode) { taosRUnLockLatch(&pMgmt->latch); dndGetVnodeLoads(pDnode, &pStatus->vnodeLoads); - contLen = sizeof(SStatusMsg) + pStatus->vnodeLoads.num * sizeof(SVnodeLoad); + contLen = sizeof(SStatusReq) + pStatus->vnodeLoads.num * sizeof(SVnodeLoad); SRpcMsg rpcMsg = {.pCont = pStatus, .contLen = contLen, .msgType = TDMT_MND_STATUS, .ahandle = (void *)9527}; pMgmt->statusSent = 1; - dTrace("pDnode:%p, send status msg to mnode", pDnode); + dTrace("pDnode:%p, send status req to mnode", pDnode); dndSendReqToMnode(pDnode, &rpcMsg); } @@ -426,12 +424,12 @@ static void dndUpdateDnodeEps(SDnode *pDnode, SDnodeEps *pDnodeEps) { taosWUnLockLatch(&pMgmt->latch); } -static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pMsg) { +static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pRsp) { SDnodeMgmt *pMgmt = &pDnode->dmgmt; - if (pMsg->code != TSDB_CODE_SUCCESS) { + if (pRsp->code != TSDB_CODE_SUCCESS) { pMgmt->statusSent = 0; - if (pMsg->code == TSDB_CODE_MND_DNODE_NOT_EXIST && !pMgmt->dropped && pMgmt->dnodeId > 0) { + if (pRsp->code == TSDB_CODE_MND_DNODE_NOT_EXIST && !pMgmt->dropped && pMgmt->dnodeId > 0) { dInfo("dnode:%d, set to dropped since not exist in mnode", pMgmt->dnodeId); pMgmt->dropped = 1; dndWriteDnodes(pDnode); @@ -439,14 +437,16 @@ static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pMsg) { return; } - SStatusRsp *pRsp = pMsg->pCont; - if (pMsg->pCont != NULL && pMsg->contLen != 0) { - SDnodeCfg *pCfg = &pRsp->dnodeCfg; + if (pRsp->pCont != NULL && pRsp->contLen != 0) { + SStatusRsp *pStatus = pRsp->pCont; + pMgmt->dver = htobe64(pStatus->dver); + + SDnodeCfg *pCfg = &pStatus->dnodeCfg; pCfg->dnodeId = htonl(pCfg->dnodeId); pCfg->clusterId = htobe64(pCfg->clusterId); dndUpdateDnodeCfg(pDnode, pCfg); - SDnodeEps *pDnodeEps = &pRsp->dnodeEps; + SDnodeEps *pDnodeEps = &pStatus->dnodeEps; pDnodeEps->num = htonl(pDnodeEps->num); for (int32_t i = 0; i < pDnodeEps->num; ++i) { pDnodeEps->eps[i].id = htonl(pDnodeEps->eps[i].id); @@ -458,26 +458,27 @@ static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pMsg) { pMgmt->statusSent = 0; } -static void dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pMsg) { assert(1); } +static void dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pReq) { dError("auth rsp is received, but not supported yet"); } -static void dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pMsg) { assert(1); } - -static int32_t dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pMsg) { - dError("config msg is received, but not supported yet"); - SCfgDnodeMsg *pCfg = pMsg->pCont; +static void dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pReq) { + dError("grant rsp is received, but not supported yet"); +} +static int32_t dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + dError("config req is received, but not supported yet"); + SDCfgDnodeReq *pCfg = pReq->pCont; return TSDB_CODE_OPS_NOT_SUPPORT; } -void dndProcessStartupReq(SDnode *pDnode, SRpcMsg *pMsg) { - dDebug("startup msg is received"); +void dndProcessStartupReq(SDnode *pDnode, SRpcMsg *pReq) { + dDebug("startup req is received"); SStartupMsg *pStartup = rpcMallocCont(sizeof(SStartupMsg)); dndGetStartup(pDnode, pStartup); - dDebug("startup msg is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished); + dDebug("startup req is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished); - SRpcMsg rpcRsp = {.handle = pMsg->handle, .pCont = pStartup, .contLen = sizeof(SStartupMsg)}; + SRpcMsg rpcRsp = {.handle = pReq->handle, .pCont = pStartup, .contLen = sizeof(SStartupMsg)}; rpcSendResponse(&rpcRsp); } @@ -530,13 +531,8 @@ int32_t dndInitDnode(SDnode *pDnode) { return -1; } - if (dndInitMgmtWorker(pDnode) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - if (dndAllocMgmtQueue(pDnode) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + if (dndInitWorker(pDnode, &pMgmt->mgmtWorker, DND_WORKER_SINGLE, "dnode-mgmt", 1, 1, dndProcessMgmtQueue) != 0) { + dError("failed to start dnode mgmt worker since %s", terrstr()); return -1; } @@ -547,15 +543,14 @@ int32_t dndInitDnode(SDnode *pDnode) { return -1; } - dInfo("dnode-dnode is initialized"); + dInfo("dnode-mgmt is initialized"); return 0; } void dndCleanupDnode(SDnode *pDnode) { SDnodeMgmt *pMgmt = &pDnode->dmgmt; - dndCleanupMgmtWorker(pDnode); - dndFreeMgmtQueue(pDnode); + dndCleanupWorker(&pMgmt->mgmtWorker); if (pMgmt->threadId != NULL) { taosDestoryThread(pMgmt->threadId); @@ -580,62 +575,22 @@ void dndCleanupDnode(SDnode *pDnode) { } taosWUnLockLatch(&pMgmt->latch); - dInfo("dnode-dnode is cleaned up"); + dInfo("dnode-mgmt is cleaned up"); } -static int32_t dndInitMgmtWorker(SDnode *pDnode) { - SDnodeMgmt *pMgmt = &pDnode->dmgmt; - SWorkerPool *pPool = &pMgmt->mgmtPool; - pPool->name = "dnode-mgmt"; - pPool->min = 1; - pPool->max = 1; - if (tWorkerInit(pPool) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - dDebug("dnode mgmt worker is initialized"); - return 0; -} - -static void dndCleanupMgmtWorker(SDnode *pDnode) { - SDnodeMgmt *pMgmt = &pDnode->dmgmt; - tWorkerCleanup(&pMgmt->mgmtPool); - dDebug("dnode mgmt worker is closed"); -} - -static int32_t dndAllocMgmtQueue(SDnode *pDnode) { - SDnodeMgmt *pMgmt = &pDnode->dmgmt; - pMgmt->pMgmtQ = tWorkerAllocQueue(&pMgmt->mgmtPool, pDnode, (FProcessItem)dndProcessMgmtQueue); - if (pMgmt->pMgmtQ == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - return 0; -} - -static void dndFreeMgmtQueue(SDnode *pDnode) { - SDnodeMgmt *pMgmt = &pDnode->dmgmt; - tWorkerFreeQueue(&pMgmt->mgmtPool, pMgmt->pMgmtQ); - pMgmt->pMgmtQ = NULL; -} - -void dndProcessMgmtMsg(SDnode *pDnode, SRpcMsg *pRpcMsg, SEpSet *pEpSet) { +void dndProcessMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) { SDnodeMgmt *pMgmt = &pDnode->dmgmt; - if (pEpSet && pEpSet->numOfEps > 0 && pRpcMsg->msgType == TDMT_MND_STATUS_RSP) { + if (pEpSet && pEpSet->numOfEps > 0 && pMsg->msgType == TDMT_MND_STATUS_RSP) { dndUpdateMnodeEpSet(pDnode, pEpSet); } - SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg)); - if (pMsg != NULL) *pMsg = *pRpcMsg; - - if (pMsg == NULL || taosWriteQitem(pMgmt->pMgmtQ, pMsg) != 0) { - if (pRpcMsg->msgType & 1u) { - SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = TSDB_CODE_OUT_OF_MEMORY}; + if (dndWriteMsgToWorker(&pMgmt->mgmtWorker, pMsg, sizeof(SRpcMsg)) != 0) { + if (pMsg->msgType & 1u) { + SRpcMsg rsp = {.handle = pMsg->handle, .code = TSDB_CODE_OUT_OF_MEMORY}; rpcSendResponse(&rsp); } - rpcFreeCont(pRpcMsg->pCont); + rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); } } @@ -704,7 +659,7 @@ static void dndProcessMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg) { default: terrno = TSDB_CODE_MSG_NOT_PROCESSED; code = -1; - dError("RPC %p, dnode req:%s not processed", pMsg->handle, TMSG_INFO(pMsg->msgType)); + dError("RPC %p, dnode msg:%s not processed", pMsg->handle, TMSG_INFO(pMsg->msgType)); break; } diff --git a/source/dnode/mgmt/impl/src/dndMnode.c b/source/dnode/mgmt/impl/src/dndMnode.c index a8bf26f133..6c23af7f00 100644 --- a/source/dnode/mgmt/impl/src/dndMnode.c +++ b/source/dnode/mgmt/impl/src/dndMnode.c @@ -15,7 +15,7 @@ #define _DEFAULT_SOURCE #include "dndMnode.h" -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndTransport.h" #include "dndWorker.h" @@ -43,18 +43,13 @@ static SMnode *dndAcquireMnode(SDnode *pDnode) { } static void dndReleaseMnode(SDnode *pDnode, SMnode *pMnode) { + if (pMnode == NULL) return; + SMnodeMgmt *pMgmt = &pDnode->mmgmt; - int32_t refCount = 0; - taosRLockLatch(&pMgmt->latch); - if (pMnode != NULL) { - refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); - } + int32_t refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); taosRUnLockLatch(&pMgmt->latch); - - if (pMnode != NULL) { - dTrace("release mnode, refCount:%d", refCount); - } + dTrace("release mnode, refCount:%d", refCount); } static int32_t dndReadMnodeFile(SDnode *pDnode) { @@ -305,25 +300,24 @@ static void dndBuildMnodeOpenOption(SDnode *pDnode, SMnodeOpt *pOption) { memcpy(&pOption->replicas, pMgmt->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA); } -static int32_t dndBuildMnodeOptionFromMsg(SDnode *pDnode, SMnodeOpt *pOption, SDCreateMnodeMsg *pMsg) { +static int32_t dndBuildMnodeOptionFromReq(SDnode *pDnode, SMnodeOpt *pOption, SDCreateMnodeReq *pCreate) { dndInitMnodeOption(pDnode, pOption); pOption->dnodeId = dndGetDnodeId(pDnode); pOption->clusterId = dndGetClusterId(pDnode); - pOption->replica = pMsg->replica; + pOption->replica = pCreate->replica; pOption->selfIndex = -1; - for (int32_t i = 0; i < pMsg->replica; ++i) { + for (int32_t i = 0; i < pCreate->replica; ++i) { SReplica *pReplica = &pOption->replicas[i]; - pReplica->id = pMsg->replicas[i].id; - pReplica->port = pMsg->replicas[i].port; - memcpy(pReplica->fqdn, pMsg->replicas[i].fqdn, TSDB_FQDN_LEN); + pReplica->id = pCreate->replicas[i].id; + pReplica->port = pCreate->replicas[i].port; + memcpy(pReplica->fqdn, pCreate->replicas[i].fqdn, TSDB_FQDN_LEN); if (pReplica->id == pOption->dnodeId) { pOption->selfIndex = i; } } if (pOption->selfIndex == -1) { - terrno = TSDB_CODE_DND_MNODE_ID_NOT_FOUND; dError("failed to build mnode options since %s", terrstr()); return -1; } @@ -423,63 +417,97 @@ static int32_t dndDropMnode(SDnode *pDnode) { return 0; } -static SDCreateMnodeMsg *dndParseCreateMnodeMsg(SRpcMsg *pRpcMsg) { - SDCreateMnodeMsg *pMsg = pRpcMsg->pCont; - pMsg->dnodeId = htonl(pMsg->dnodeId); - for (int32_t i = 0; i < pMsg->replica; ++i) { - pMsg->replicas[i].id = htonl(pMsg->replicas[i].id); - pMsg->replicas[i].port = htons(pMsg->replicas[i].port); +static SDCreateMnodeReq *dndParseCreateMnodeReq(SRpcMsg *pReq) { + SDCreateMnodeReq *pCreate = pReq->pCont; + pCreate->dnodeId = htonl(pCreate->dnodeId); + for (int32_t i = 0; i < pCreate->replica; ++i) { + pCreate->replicas[i].id = htonl(pCreate->replicas[i].id); + pCreate->replicas[i].port = htons(pCreate->replicas[i].port); } - return pMsg; + return pCreate; } -int32_t dndProcessCreateMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { - SDCreateMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg); +int32_t dndProcessCreateMnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SDCreateMnodeReq *pCreate = dndParseCreateMnodeReq(pReq); - if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_MNODE_ID_INVALID; - return -1; - } else { - SMnodeOpt option = {0}; - if (dndBuildMnodeOptionFromMsg(pDnode, &option, pMsg) != 0) { - return -1; - } - - return dndOpenMnode(pDnode, &option); - } -} - -int32_t dndProcessAlterMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { - SDAlterMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg); - - if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_MNODE_ID_INVALID; + if (pCreate->replica <= 1 || pCreate->dnodeId != dndGetDnodeId(pDnode)) { + terrno = TSDB_CODE_DND_MNODE_INVALID_OPTION; + dError("failed to create mnode since %s", terrstr()); return -1; } SMnodeOpt option = {0}; - if (dndBuildMnodeOptionFromMsg(pDnode, &option, pMsg) != 0) { + if (dndBuildMnodeOptionFromReq(pDnode, &option, pCreate) != 0) { + terrno = TSDB_CODE_DND_MNODE_INVALID_OPTION; + dError("failed to create mnode since %s", terrstr()); return -1; } - if (dndAlterMnode(pDnode, &option) != 0) { + SMnode *pMnode = dndAcquireMnode(pDnode); + if (pMnode != NULL) { + dndReleaseMnode(pDnode, pMnode); + terrno = TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED; + dError("failed to create mnode since %s", terrstr()); return -1; } - return dndWriteMnodeFile(pDnode); + dDebug("start to create mnode"); + return dndOpenMnode(pDnode, &option); } -int32_t dndProcessDropMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { - SDDropMnodeMsg *pMsg = pRpcMsg->pCont; - pMsg->dnodeId = htonl(pMsg->dnodeId); +int32_t dndProcessAlterMnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SDAlterMnodeReq *pAlter = dndParseCreateMnodeReq(pReq); - if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_MNODE_ID_INVALID; + if (pAlter->dnodeId != dndGetDnodeId(pDnode)) { + terrno = TSDB_CODE_DND_MNODE_INVALID_OPTION; + dError("failed to alter mnode since %s", terrstr()); return -1; - } else { - return dndDropMnode(pDnode); } + + SMnodeOpt option = {0}; + if (dndBuildMnodeOptionFromReq(pDnode, &option, pAlter) != 0) { + terrno = TSDB_CODE_DND_MNODE_INVALID_OPTION; + dError("failed to alter mnode since %s", terrstr()); + return -1; + } + + SMnode *pMnode = dndAcquireMnode(pDnode); + if (pMnode == NULL) { + terrno = TSDB_CODE_DND_MNODE_NOT_DEPLOYED; + dError("failed to alter mnode since %s", terrstr()); + return -1; + } + + dDebug("start to alter mnode"); + int32_t code = dndAlterMnode(pDnode, &option); + dndReleaseMnode(pDnode, pMnode); + + return code; +} + +int32_t dndProcessDropMnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SDDropMnodeReq *pDrop = pReq->pCont; + pDrop->dnodeId = htonl(pDrop->dnodeId); + + if (pDrop->dnodeId != dndGetDnodeId(pDnode)) { + terrno = TSDB_CODE_DND_MNODE_INVALID_OPTION; + dError("failed to drop mnode since %s", terrstr()); + return -1; + } + + SMnode *pMnode = dndAcquireMnode(pDnode); + if (pMnode == NULL) { + terrno = TSDB_CODE_DND_MNODE_NOT_DEPLOYED; + dError("failed to drop mnode since %s", terrstr()); + return -1; + } + + dDebug("start to drop mnode"); + int32_t code = dndDropMnode(pDnode); + dndReleaseMnode(pDnode, pMnode); + + return code; } static void dndProcessMnodeQueue(SDnode *pDnode, SMnodeMsg *pMsg) { @@ -506,6 +534,7 @@ static void dndWriteMnodeMsgToWorker(SDnode *pDnode, SDnodeWorker *pWorker, SRpc code = TSDB_CODE_OUT_OF_MEMORY; } else { code = dndWriteMsgToWorker(pWorker, pMsg, 0); + if (code != 0) code = terrno; } if (code != 0) { diff --git a/source/dnode/mgmt/impl/src/dndQnode.c b/source/dnode/mgmt/impl/src/dndQnode.c index 845ea8bf9a..9d2f623c45 100644 --- a/source/dnode/mgmt/impl/src/dndQnode.c +++ b/source/dnode/mgmt/impl/src/dndQnode.c @@ -15,7 +15,7 @@ #define _DEFAULT_SOURCE #include "dndQnode.h" -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndTransport.h" #include "dndWorker.h" @@ -42,18 +42,13 @@ static SQnode *dndAcquireQnode(SDnode *pDnode) { } static void dndReleaseQnode(SDnode *pDnode, SQnode *pQnode) { + if (pQnode == NULL) return; + SQnodeMgmt *pMgmt = &pDnode->qmgmt; - int32_t refCount = 0; - taosRLockLatch(&pMgmt->latch); - if (pQnode != NULL) { - refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); - } + int32_t refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); taosRUnLockLatch(&pMgmt->latch); - - if (pQnode != NULL) { - dTrace("release qnode, refCount:%d", refCount); - } + dTrace("release qnode, refCount:%d", refCount); } static int32_t dndReadQnodeFile(SDnode *pDnode) { @@ -274,7 +269,7 @@ int32_t dndProcessCreateQnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { pMsg->dnodeId = htonl(pMsg->dnodeId); if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_QNODE_ID_INVALID; + terrno = TSDB_CODE_DND_QNODE_INVALID_OPTION; dError("failed to create qnode since %s", terrstr()); return -1; } else { @@ -287,7 +282,7 @@ int32_t dndProcessDropQnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { pMsg->dnodeId = htonl(pMsg->dnodeId); if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_QNODE_ID_INVALID; + terrno = TSDB_CODE_DND_QNODE_INVALID_OPTION; dError("failed to drop qnode since %s", terrstr()); return -1; } else { diff --git a/source/dnode/mgmt/impl/src/dndSnode.c b/source/dnode/mgmt/impl/src/dndSnode.c index 7f234629cb..00435d4c3e 100644 --- a/source/dnode/mgmt/impl/src/dndSnode.c +++ b/source/dnode/mgmt/impl/src/dndSnode.c @@ -15,7 +15,7 @@ #define _DEFAULT_SOURCE #include "dndSnode.h" -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndTransport.h" #include "dndWorker.h" @@ -42,18 +42,13 @@ static SSnode *dndAcquireSnode(SDnode *pDnode) { } static void dndReleaseSnode(SDnode *pDnode, SSnode *pSnode) { + if (pSnode == NULL) return; + SSnodeMgmt *pMgmt = &pDnode->smgmt; - int32_t refCount = 0; - taosRLockLatch(&pMgmt->latch); - if (pSnode != NULL) { - refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); - } + int32_t refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1); taosRUnLockLatch(&pMgmt->latch); - - if (pSnode != NULL) { - dTrace("release snode, refCount:%d", refCount); - } + dTrace("release snode, refCount:%d", refCount); } static int32_t dndReadSnodeFile(SDnode *pDnode) { @@ -268,7 +263,7 @@ int32_t dndProcessCreateSnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { pMsg->dnodeId = htonl(pMsg->dnodeId); if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_SNODE_ID_INVALID; + terrno = TSDB_CODE_DND_SNODE_INVALID_OPTION; dError("failed to create snode since %s", terrstr()); return -1; } else { @@ -281,7 +276,7 @@ int32_t dndProcessDropSnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) { pMsg->dnodeId = htonl(pMsg->dnodeId); if (pMsg->dnodeId != dndGetDnodeId(pDnode)) { - terrno = TSDB_CODE_DND_SNODE_ID_INVALID; + terrno = TSDB_CODE_DND_SNODE_INVALID_OPTION; dError("failed to drop snode since %s", terrstr()); return -1; } else { diff --git a/source/dnode/mgmt/impl/src/dndTransport.c b/source/dnode/mgmt/impl/src/dndTransport.c index cf0f561609..509e8f4cab 100644 --- a/source/dnode/mgmt/impl/src/dndTransport.c +++ b/source/dnode/mgmt/impl/src/dndTransport.c @@ -21,7 +21,7 @@ #define _DEFAULT_SOURCE #include "dndTransport.h" -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndMnode.h" #include "dndVnodes.h" @@ -143,26 +143,26 @@ static void dndInitMsgFp(STransMgmt *pMgmt) { pMgmt->msgFp[TMSG_INDEX(TDMT_VND_SHOW_TABLES_FETCH)] = dndProcessVnodeFetchMsg; } -static void dndProcessResponse(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) { +static void dndProcessResponse(void *parent, SRpcMsg *pRsp, SEpSet *pEpSet) { SDnode *pDnode = parent; STransMgmt *pMgmt = &pDnode->tmgmt; - tmsg_t msgType = pMsg->msgType; + tmsg_t msgType = pRsp->msgType; if (dndGetStat(pDnode) == DND_STAT_STOPPED) { - if (pMsg == NULL || pMsg->pCont == NULL) return; - dTrace("RPC %p, rsp:%s is ignored since dnode is stopping", pMsg->handle, TMSG_INFO(msgType)); - rpcFreeCont(pMsg->pCont); + if (pRsp == NULL || pRsp->pCont == NULL) return; + dTrace("RPC %p, rsp:%s is ignored since dnode is stopping", pRsp->handle, TMSG_INFO(msgType)); + rpcFreeCont(pRsp->pCont); return; } DndMsgFp fp = pMgmt->msgFp[TMSG_INDEX(msgType)]; if (fp != NULL) { - dTrace("RPC %p, rsp:%s will be processed, code:0x%x", pMsg->handle, TMSG_INFO(msgType), pMsg->code & 0XFFFF); - (*fp)(pDnode, pMsg, pEpSet); + dTrace("RPC %p, rsp:%s will be processed, code:0x%x", pRsp->handle, TMSG_INFO(msgType), pRsp->code & 0XFFFF); + (*fp)(pDnode, pRsp, pEpSet); } else { - dError("RPC %p, rsp:%s not processed", pMsg->handle, TMSG_INFO(msgType)); - rpcFreeCont(pMsg->pCont); + dError("RPC %p, rsp:%s not processed", pRsp->handle, TMSG_INFO(msgType)); + rpcFreeCont(pRsp->pCont); } } @@ -201,48 +201,48 @@ static void dndCleanupClient(SDnode *pDnode) { } } -static void dndProcessRequest(void *param, SRpcMsg *pMsg, SEpSet *pEpSet) { +static void dndProcessRequest(void *param, SRpcMsg *pReq, SEpSet *pEpSet) { SDnode *pDnode = param; STransMgmt *pMgmt = &pDnode->tmgmt; - tmsg_t msgType = pMsg->msgType; + tmsg_t msgType = pReq->msgType; if (msgType == TDMT_DND_NETWORK_TEST) { - dTrace("RPC %p, network test req, app:%p will be processed, code:0x%x", pMsg->handle, pMsg->ahandle, pMsg->code); - dndProcessStartupReq(pDnode, pMsg); + dTrace("RPC %p, network test req, app:%p will be processed, code:0x%x", pReq->handle, pReq->ahandle, pReq->code); + dndProcessStartupReq(pDnode, pReq); return; } if (dndGetStat(pDnode) == DND_STAT_STOPPED) { - dError("RPC %p, req:%s app:%p is ignored since dnode exiting", pMsg->handle, TMSG_INFO(msgType), pMsg->ahandle); - SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_DND_OFFLINE}; + dError("RPC %p, req:%s app:%p is ignored since dnode exiting", pReq->handle, TMSG_INFO(msgType), pReq->ahandle); + SRpcMsg rspMsg = {.handle = pReq->handle, .code = TSDB_CODE_DND_OFFLINE}; rpcSendResponse(&rspMsg); - rpcFreeCont(pMsg->pCont); + rpcFreeCont(pReq->pCont); return; } else if (dndGetStat(pDnode) != DND_STAT_RUNNING) { - dError("RPC %p, req:%s app:%p is ignored since dnode not running", pMsg->handle, TMSG_INFO(msgType), pMsg->ahandle); - SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_APP_NOT_READY}; + dError("RPC %p, req:%s app:%p is ignored since dnode not running", pReq->handle, TMSG_INFO(msgType), pReq->ahandle); + SRpcMsg rspMsg = {.handle = pReq->handle, .code = TSDB_CODE_APP_NOT_READY}; rpcSendResponse(&rspMsg); - rpcFreeCont(pMsg->pCont); + rpcFreeCont(pReq->pCont); return; } - if (pMsg->pCont == NULL) { - dTrace("RPC %p, req:%s app:%p not processed since content is null", pMsg->handle, TMSG_INFO(msgType), - pMsg->ahandle); - SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_DND_INVALID_MSG_LEN}; + if (pReq->pCont == NULL) { + dTrace("RPC %p, req:%s app:%p not processed since content is null", pReq->handle, TMSG_INFO(msgType), + pReq->ahandle); + SRpcMsg rspMsg = {.handle = pReq->handle, .code = TSDB_CODE_DND_INVALID_MSG_LEN}; rpcSendResponse(&rspMsg); return; } DndMsgFp fp = pMgmt->msgFp[TMSG_INDEX(msgType)]; if (fp != NULL) { - dTrace("RPC %p, req:%s app:%p will be processed", pMsg->handle, TMSG_INFO(msgType), pMsg->ahandle); - (*fp)(pDnode, pMsg, pEpSet); + dTrace("RPC %p, req:%s app:%p will be processed", pReq->handle, TMSG_INFO(msgType), pReq->ahandle); + (*fp)(pDnode, pReq, pEpSet); } else { - dError("RPC %p, req:%s app:%p is not processed since no handle", pMsg->handle, TMSG_INFO(msgType), pMsg->ahandle); - SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_MSG_NOT_PROCESSED}; + dError("RPC %p, req:%s app:%p is not processed since no handle", pReq->handle, TMSG_INFO(msgType), pReq->ahandle); + SRpcMsg rspMsg = {.handle = pReq->handle, .code = TSDB_CODE_MSG_NOT_PROCESSED}; rpcSendResponse(&rspMsg); - rpcFreeCont(pMsg->pCont); + rpcFreeCont(pReq->pCont); } } @@ -254,7 +254,7 @@ static void dndSendMsgToMnodeRecv(SDnode *pDnode, SRpcMsg *pRpcMsg, SRpcMsg *pRp rpcSendRecv(pMgmt->clientRpc, &epSet, pRpcMsg, pRpcRsp); } -static int32_t dndAuthInternalMsg(SDnode *pDnode, char *user, char *spi, char *encrypt, char *secret, char *ckey) { +static int32_t dndAuthInternalReq(SDnode *pDnode, char *user, char *spi, char *encrypt, char *secret, char *ckey) { if (strcmp(user, INTERNAL_USER) == 0) { // A simple temporary implementation char pass[TSDB_PASSWORD_LEN] = {0}; @@ -281,7 +281,7 @@ static int32_t dndAuthInternalMsg(SDnode *pDnode, char *user, char *spi, char *e static int32_t dndRetrieveUserAuthInfo(void *parent, char *user, char *spi, char *encrypt, char *secret, char *ckey) { SDnode *pDnode = parent; - if (dndAuthInternalMsg(parent, user, spi, encrypt, secret, ckey) == 0) { + if (dndAuthInternalReq(parent, user, spi, encrypt, secret, ckey) == 0) { // dTrace("get internal auth success"); return 0; } @@ -298,10 +298,10 @@ static int32_t dndRetrieveUserAuthInfo(void *parent, char *user, char *spi, char // dDebug("user:%s, send auth msg to other mnodes", user); - SAuthMsg *pMsg = rpcMallocCont(sizeof(SAuthMsg)); - tstrncpy(pMsg->user, user, TSDB_USER_LEN); + SAuthReq *pReq = rpcMallocCont(sizeof(SAuthReq)); + tstrncpy(pReq->user, user, TSDB_USER_LEN); - SRpcMsg rpcMsg = {.pCont = pMsg, .contLen = sizeof(SAuthMsg), .msgType = TDMT_MND_AUTH}; + SRpcMsg rpcMsg = {.pCont = pReq, .contLen = sizeof(SAuthReq), .msgType = TDMT_MND_AUTH}; SRpcMsg rpcRsp = {0}; dndSendMsgToMnodeRecv(pDnode, &rpcMsg, &rpcRsp); @@ -381,19 +381,19 @@ void dndCleanupTrans(SDnode *pDnode) { dInfo("dnode-transport is cleaned up"); } -int32_t dndSendReqToDnode(SDnode *pDnode, SEpSet *pEpSet, SRpcMsg *pMsg) { +int32_t dndSendReqToDnode(SDnode *pDnode, SEpSet *pEpSet, SRpcMsg *pReq) { STransMgmt *pMgmt = &pDnode->tmgmt; if (pMgmt->clientRpc == NULL) { terrno = TSDB_CODE_DND_OFFLINE; return -1; } - rpcSendRequest(pMgmt->clientRpc, pEpSet, pMsg, NULL); + rpcSendRequest(pMgmt->clientRpc, pEpSet, pReq, NULL); return 0; } -int32_t dndSendReqToMnode(SDnode *pDnode, SRpcMsg *pMsg) { +int32_t dndSendReqToMnode(SDnode *pDnode, SRpcMsg *pReq) { SEpSet epSet = {0}; dndGetMnodeEpSet(pDnode, &epSet); - return dndSendReqToDnode(pDnode, &epSet, pMsg); + return dndSendReqToDnode(pDnode, &epSet, pReq); } diff --git a/source/dnode/mgmt/impl/src/dndVnodes.c b/source/dnode/mgmt/impl/src/dndVnodes.c index 8835e0ba65..5198e351ab 100644 --- a/source/dnode/mgmt/impl/src/dndVnodes.c +++ b/source/dnode/mgmt/impl/src/dndVnodes.c @@ -40,7 +40,7 @@ typedef struct { STaosQueue *pSyncQ; STaosQueue *pApplyQ; STaosQueue *pQueryQ; - STaosQueue* pFetchQ; + STaosQueue *pFetchQ; } SVnodeObj; typedef struct { @@ -53,22 +53,8 @@ typedef struct { SWrapperCfg *pCfgs; } SVnodeThread; -static int32_t dndInitVnodeReadWorker(SDnode *pDnode); -static int32_t dndInitVnodeWriteWorker(SDnode *pDnode); -static int32_t dndInitVnodeSyncWorker(SDnode *pDnode); -static void dndCleanupVnodeReadWorker(SDnode *pDnode); -static void dndCleanupVnodeWriteWorker(SDnode *pDnode); -static void dndCleanupVnodeSyncWorker(SDnode *pDnode); -static int32_t dndAllocVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode); -static int32_t dndAllocVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode); -static int32_t dndAllocVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode); -static int32_t dndAllocVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode); -static int32_t dndAllocVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode); -static void dndFreeVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode); -static void dndFreeVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode); -static void dndFreeVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode); -static void dndFreeVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode); -static void dndFreeVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode); +static int32_t dndAllocVnodeQueue(SDnode *pDnode, SVnodeObj *pVnode); +static void dndFreeVnodeQueue(SDnode *pDnode, SVnodeObj *pVnode); static void dndProcessVnodeQueryQueue(SVnodeObj *pVnode, SRpcMsg *pMsg); static void dndProcessVnodeFetchQueue(SVnodeObj *pVnode, SRpcMsg *pMsg); @@ -117,11 +103,9 @@ static void dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode) { if (pVnode == NULL) return; SVnodesMgmt *pMgmt = &pDnode->vmgmt; - taosRLockLatch(&pMgmt->latch); int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1); taosRUnLockLatch(&pMgmt->latch); - dTrace("vgId:%d, release vnode, refCount:%d", pVnode->vgId, refCount); } @@ -134,7 +118,7 @@ static int32_t dndOpenVnode(SDnode *pDnode, SWrapperCfg *pCfg, SVnode *pImpl) { } pVnode->vgId = pCfg->vgId; - pVnode->refCount = 1; + pVnode->refCount = 0; pVnode->dropped = 0; pVnode->accessState = TSDB_VN_ALL_ACCCESS; pVnode->pImpl = pImpl; @@ -148,23 +132,8 @@ static int32_t dndOpenVnode(SDnode *pDnode, SWrapperCfg *pCfg, SVnode *pImpl) { return -1; } - if (dndAllocVnodeQueryQueue(pDnode, pVnode) != 0) { - return -1; - } - - if (dndAllocVnodeFetchQueue(pDnode, pVnode) != 0) { - return -1; - } - - if (dndAllocVnodeWriteQueue(pDnode, pVnode) != 0) { - return -1; - } - - if (dndAllocVnodeApplyQueue(pDnode, pVnode) != 0) { - return -1; - } - - if (dndAllocVnodeSyncQueue(pDnode, pVnode) != 0) { + if (dndAllocVnodeQueue(pDnode, pVnode) != 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } @@ -192,11 +161,10 @@ static void dndCloseVnode(SDnode *pDnode, SVnodeObj *pVnode) { while (!taosQueueEmpty(pVnode->pQueryQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pFetchQ)) taosMsleep(10); - dndFreeVnodeQueryQueue(pDnode, pVnode); - dndFreeVnodeFetchQueue(pDnode, pVnode); - dndFreeVnodeWriteQueue(pDnode, pVnode); - dndFreeVnodeApplyQueue(pDnode, pVnode); - dndFreeVnodeSyncQueue(pDnode, pVnode); + dndFreeVnodeQueue(pDnode, pVnode); + vnodeClose(pVnode->pImpl); + pVnode->pImpl = NULL; + free(pVnode->path); free(pVnode->db); free(pVnode); @@ -388,7 +356,7 @@ static int32_t dndWriteVnodesToFile(SDnode *pDnode) { free(pVnodes); } - dInfo("successed to write %s", file); + dDebug("successed to write %s", realfile); return taosRenameFile(file, realfile); } @@ -408,7 +376,10 @@ static void *dnodeOpenVnodeFunc(void *param) { pMgmt->openVnodes, pMgmt->totalVnodes); dndReportStartup(pDnode, "open-vnodes", stepDesc); - SVnode *pImpl = vnodeOpen(pCfg->path, NULL); + SVnodeCfg vnodeCfg = {0}; + vnodeCfg.vgId = pCfg->vgId; + + SVnode *pImpl = vnodeOpen(pCfg->path, &vnodeCfg); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex); pThread->failed++; @@ -504,7 +475,6 @@ static void dndCloseVnodes(SDnode *pDnode) { SVnodeObj **pVnodes = dndGetVnodesFromHash(pDnode, &numOfVnodes); for (int32_t i = 0; i < numOfVnodes; ++i) { - dndReleaseVnode(pDnode, pVnodes[i]); dndCloseVnode(pDnode, pVnodes[i]); } @@ -520,8 +490,8 @@ static void dndCloseVnodes(SDnode *pDnode) { dInfo("total vnodes:%d are all closed", numOfVnodes); } -static SCreateVnodeMsg *dndParseCreateVnodeReq(SRpcMsg *rpcMsg) { - SCreateVnodeMsg *pCreate = rpcMsg->pCont; +static SCreateVnodeReq *dndParseCreateVnodeReq(SRpcMsg *pReq) { + SCreateVnodeReq *pCreate = pReq->pCont; pCreate->vgId = htonl(pCreate->vgId); pCreate->dnodeId = htonl(pCreate->dnodeId); pCreate->dbUid = htobe64(pCreate->dbUid); @@ -545,7 +515,8 @@ static SCreateVnodeMsg *dndParseCreateVnodeReq(SRpcMsg *rpcMsg) { return pCreate; } -static void dndGenerateVnodeCfg(SCreateVnodeMsg *pCreate, SVnodeCfg *pCfg) { +static void dndGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { + pCfg->vgId = pCreate->vgId; pCfg->wsize = pCreate->cacheBlockSize; pCfg->ssize = pCreate->cacheBlockSize; pCfg->wsize = pCreate->cacheBlockSize; @@ -554,7 +525,7 @@ static void dndGenerateVnodeCfg(SCreateVnodeMsg *pCreate, SVnodeCfg *pCfg) { pCfg->ttl = 4; pCfg->keep = pCreate->daysToKeep0; pCfg->isWeak = true; - pCfg->tsdbCfg.keep0 = pCreate->daysToKeep0; + pCfg->tsdbCfg.keep = pCreate->daysToKeep0; pCfg->tsdbCfg.keep1 = pCreate->daysToKeep2; pCfg->tsdbCfg.keep2 = pCreate->daysToKeep0; pCfg->tsdbCfg.lruCacheSize = pCreate->cacheBlockSize; @@ -568,7 +539,7 @@ static void dndGenerateVnodeCfg(SCreateVnodeMsg *pCreate, SVnodeCfg *pCfg) { pCfg->walCfg.vgId = pCreate->vgId; } -static void dndGenerateWrapperCfg(SDnode *pDnode, SCreateVnodeMsg *pCreate, SWrapperCfg *pCfg) { +static void dndGenerateWrapperCfg(SDnode *pDnode, SCreateVnodeReq *pCreate, SWrapperCfg *pCfg) { memcpy(pCfg->db, pCreate->db, TSDB_DB_FNAME_LEN); pCfg->dbUid = pCreate->dbUid; pCfg->dropped = 0; @@ -577,20 +548,20 @@ static void dndGenerateWrapperCfg(SDnode *pDnode, SCreateVnodeMsg *pCreate, SWra pCfg->vgVersion = pCreate->vgVersion; } -static SDropVnodeMsg *vnodeParseDropVnodeReq(SRpcMsg *rpcMsg) { - SDropVnodeMsg *pDrop = rpcMsg->pCont; +static SDropVnodeReq *vnodeParseDropVnodeReq(SRpcMsg *pReq) { + SDropVnodeReq *pDrop = pReq->pCont; pDrop->vgId = htonl(pDrop->vgId); return pDrop; } -static SAuthVnodeMsg *vnodeParseAuthVnodeReq(SRpcMsg *rpcMsg) { - SAuthVnodeMsg *pAuth = rpcMsg->pCont; +static SAuthVnodeReq *vnodeParseAuthVnodeReq(SRpcMsg *pReq) { + SAuthVnodeReq *pAuth = pReq->pCont; pAuth->vgId = htonl(pAuth->vgId); return pAuth; } -int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { - SCreateVnodeMsg *pCreate = dndParseCreateVnodeReq(rpcMsg); +int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SCreateVnodeReq *pCreate = dndParseCreateVnodeReq(pReq); dDebug("vgId:%d, create vnode req is received", pCreate->vgId); SVnodeCfg vnodeCfg = {0}; @@ -603,16 +574,19 @@ int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { if (pVnode != NULL) { dDebug("vgId:%d, already exist, return success", pCreate->vgId); dndReleaseVnode(pDnode, pVnode); + terrno = TSDB_CODE_DND_VNODE_ALREADY_DEPLOYED; return 0; } - SVnode *pImpl = vnodeOpen(wrapperCfg.path, NULL /*pCfg*/); + SVnode *pImpl = vnodeOpen(wrapperCfg.path, &vnodeCfg); if (pImpl == NULL) { + dError("vgId:%d, failed to create vnode since %s", pCreate->vgId, terrstr()); return -1; } int32_t code = dndOpenVnode(pDnode, &wrapperCfg, pImpl); if (code != 0) { + dError("vgId:%d, failed to open vnode since %s", pCreate->vgId, terrstr()); vnodeClose(pImpl); vnodeDestroy(wrapperCfg.path); terrno = code; @@ -630,23 +604,20 @@ int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { return 0; } -int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { - SAlterVnodeMsg *pAlter = (SAlterVnodeMsg *)dndParseCreateVnodeReq(rpcMsg); +int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SAlterVnodeReq *pAlter = (SAlterVnodeReq *)dndParseCreateVnodeReq(pReq); dDebug("vgId:%d, alter vnode req is received", pAlter->vgId); SVnodeCfg vnodeCfg = {0}; dndGenerateVnodeCfg(pAlter, &vnodeCfg); - SWrapperCfg wrapperCfg = {0}; - dndGenerateWrapperCfg(pDnode, pAlter, &wrapperCfg); - SVnodeObj *pVnode = dndAcquireVnode(pDnode, pAlter->vgId); if (pVnode == NULL) { dDebug("vgId:%d, failed to alter vnode since %s", pAlter->vgId, terrstr()); - return terrno; + return -1; } - if (wrapperCfg.vgVersion == pVnode->vgVersion) { + if (pAlter->vgVersion == pVnode->vgVersion) { dndReleaseVnode(pDnode, pVnode); dDebug("vgId:%d, no need to alter vnode cfg for version unchanged ", pAlter->vgId); return 0; @@ -655,11 +626,11 @@ int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { if (vnodeAlter(pVnode->pImpl, &vnodeCfg) != 0) { dError("vgId:%d, failed to alter vnode since %s", pAlter->vgId, terrstr()); dndReleaseVnode(pDnode, pVnode); - return terrno; + return -1; } int32_t oldVersion = pVnode->vgVersion; - pVnode->vgVersion = wrapperCfg.vgVersion; + pVnode->vgVersion = pAlter->vgVersion; int32_t code = dndWriteVnodesToFile(pDnode); if (code != 0) { pVnode->vgVersion = oldVersion; @@ -669,8 +640,8 @@ int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { return code; } -int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { - SDropVnodeMsg *pDrop = vnodeParseDropVnodeReq(rpcMsg); +int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SDropVnodeReq *pDrop = vnodeParseDropVnodeReq(pReq); int32_t vgId = pDrop->vgId; dDebug("vgId:%d, drop vnode req is received", vgId); @@ -684,10 +655,10 @@ int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { pVnode->dropped = 1; if (dndWriteVnodesToFile(pDnode) != 0) { pVnode->dropped = 0; - return terrno; + dndReleaseVnode(pDnode, pVnode); + return -1; } - dndReleaseVnode(pDnode, pVnode); dndCloseVnode(pDnode, pVnode); vnodeClose(pVnode->pImpl); vnodeDestroy(pVnode->path); @@ -696,17 +667,16 @@ int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { return 0; } -int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { - SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg); +int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SAuthVnodeReq *pAuth = (SAuthVnodeReq *)vnodeParseAuthVnodeReq(pReq); - int32_t code = 0; int32_t vgId = pAuth->vgId; dDebug("vgId:%d, auth vnode req is received", vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId); if (pVnode == NULL) { dDebug("vgId:%d, failed to auth since %s", vgId, terrstr()); - return terrno; + return -1; } pVnode->accessState = pAuth->accessState; @@ -714,30 +684,30 @@ int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { return 0; } -int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { - SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg); +int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SSyncVnodeReq *pSync = (SSyncVnodeReq *)vnodeParseDropVnodeReq(pReq); - int32_t vgId = pAuth->vgId; - dDebug("vgId:%d, auth vnode req is received", vgId); + int32_t vgId = pSync->vgId; + dDebug("vgId:%d, sync vnode req is received", vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId); if (pVnode == NULL) { - dDebug("vgId:%d, failed to auth since %s", vgId, terrstr()); - return terrno; + dDebug("vgId:%d, failed to sync since %s", vgId, terrstr()); + return -1; } if (vnodeSync(pVnode->pImpl) != 0) { - dError("vgId:%d, failed to auth vnode since %s", vgId, terrstr()); + dError("vgId:%d, failed to sync vnode since %s", vgId, terrstr()); dndReleaseVnode(pDnode, pVnode); - return terrno; + return -1; } dndReleaseVnode(pDnode, pVnode); return 0; } -int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { - SCompactVnodeMsg *pCompact = (SCompactVnodeMsg *)vnodeParseDropVnodeReq(rpcMsg); +int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *pReq) { + SCompactVnodeReq *pCompact = (SCompactVnodeReq *)vnodeParseDropVnodeReq(pReq); int32_t vgId = pCompact->vgId; dDebug("vgId:%d, compact vnode req is received", vgId); @@ -745,13 +715,13 @@ int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) { SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId); if (pVnode == NULL) { dDebug("vgId:%d, failed to compact since %s", vgId, terrstr()); - return terrno; + return -1; } if (vnodeCompact(pVnode->pImpl) != 0) { dError("vgId:%d, failed to compact vnode since %s", vgId, terrstr()); dndReleaseVnode(pDnode, pVnode); - return terrno; + return -1; } dndReleaseVnode(pDnode, pVnode); @@ -810,6 +780,7 @@ static void dndProcessVnodeApplyQueue(SVnodeObj *pVnode, STaosQall *qall, int32_ for (int32_t i = 0; i < numOfMsgs; ++i) { taosGetQitem(qall, (void **)&pMsg); + // todo SRpcMsg *pRsp = NULL; (void)vnodeApplyWMsg(pVnode->pImpl, pMsg, &pRsp); } @@ -821,6 +792,7 @@ static void dndProcessVnodeSyncQueue(SVnodeObj *pVnode, STaosQall *qall, int32_t for (int32_t i = 0; i < numOfMsgs; ++i) { taosGetQitem(qall, (void **)&pMsg); + // todo SRpcMsg *pRsp = NULL; (void)vnodeProcessSyncReq(pVnode->pImpl, pMsg, &pRsp); } @@ -844,21 +816,25 @@ static int32_t dndWriteRpcMsgToVnodeQueue(STaosQueue *pQueue, SRpcMsg *pRpcMsg) } if (code != TSDB_CODE_SUCCESS) { - SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = code}; - rpcSendResponse(&rsp); + if (pRpcMsg->msgType & 1u) { + SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = code}; + rpcSendResponse(&rsp); + } rpcFreeCont(pRpcMsg->pCont); } } static SVnodeObj *dndAcquireVnodeFromMsg(SDnode *pDnode, SRpcMsg *pMsg) { - SMsgHead *pHead = (SMsgHead *)pMsg->pCont; + SMsgHead *pHead = pMsg->pCont; pHead->contLen = htonl(pHead->contLen); pHead->vgId = htonl(pHead->vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, pHead->vgId); if (pVnode == NULL) { - SRpcMsg rsp = {.handle = pMsg->handle, .code = TSDB_CODE_VND_INVALID_VGROUP_ID}; - rpcSendResponse(&rsp); + if (pMsg->msgType & 1u) { + SRpcMsg rsp = {.handle = pMsg->handle, .code = TSDB_CODE_VND_INVALID_VGROUP_ID}; + rpcSendResponse(&rsp); + } rpcFreeCont(pMsg->pCont); } @@ -899,193 +875,96 @@ void dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) { static int32_t dndPutMsgIntoVnodeApplyQueue(SDnode *pDnode, int32_t vgId, SRpcMsg *pMsg) { SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId); - if (pVnode == NULL) { - return -1; - } + if (pVnode == NULL) return -1; int32_t code = taosWriteQitem(pVnode->pApplyQ, pMsg); dndReleaseVnode(pDnode, pVnode); return code; } -static int32_t dndAllocVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - pVnode->pQueryQ = tWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FProcessItem)dndProcessVnodeQueryQueue); - if (pVnode->pQueryQ == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - return 0; -} - -static void dndFreeVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - tWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ); - pVnode->pQueryQ = NULL; -} - -static int32_t dndAllocVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - pVnode->pFetchQ = tWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FProcessItem)dndProcessVnodeFetchQueue); - if (pVnode->pFetchQ == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - return 0; -} - -static void dndFreeVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - tWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ); - pVnode->pFetchQ = NULL; -} - -static int32_t dndInitVnodeReadWorker(SDnode *pDnode) { +static int32_t dndInitVnodeWorkers(SDnode *pDnode) { SVnodesMgmt *pMgmt = &pDnode->vmgmt; int32_t maxFetchThreads = 4; - float threadsForQuery = MAX(pDnode->opt.numOfCores * pDnode->opt.ratioOfQueryCores, 1); + int32_t minFetchThreads = MIN(maxFetchThreads, pDnode->opt.numOfCores); + int32_t minQueryThreads = MAX((int32_t)(pDnode->opt.numOfCores * pDnode->opt.ratioOfQueryCores), 1); + int32_t maxQueryThreads = minQueryThreads; + int32_t maxWriteThreads = MAX(pDnode->opt.numOfCores, 1); + int32_t maxSyncThreads = MAX(pDnode->opt.numOfCores / 2, 1); SWorkerPool *pPool = &pMgmt->queryPool; pPool->name = "vnode-query"; - pPool->min = (int32_t)threadsForQuery; - pPool->max = pPool->min; - if (tWorkerInit(pPool) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } + pPool->min = minQueryThreads; + pPool->max = maxQueryThreads; + if (tWorkerInit(pPool) != 0) return -1; pPool = &pMgmt->fetchPool; pPool->name = "vnode-fetch"; - pPool->min = MIN(maxFetchThreads, pDnode->opt.numOfCores); - pPool->max = pPool->min; - if (tWorkerInit(pPool) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } + pPool->min = minFetchThreads; + pPool->max = maxFetchThreads; + if (tWorkerInit(pPool) != 0) return -1; - dDebug("vnode read worker is initialized"); + SMWorkerPool *pMPool = &pMgmt->writePool; + pMPool->name = "vnode-write"; + pMPool->max = maxWriteThreads; + if (tMWorkerInit(pMPool) != 0) return -1; + + pMPool = &pMgmt->syncPool; + pMPool->name = "vnode-sync"; + pMPool->max = maxSyncThreads; + if (tMWorkerInit(pMPool) != 0) return -1; + + dDebug("vnode workers is initialized"); return 0; } -static void dndCleanupVnodeReadWorker(SDnode *pDnode) { +static void dndCleanupVnodeWorkers(SDnode *pDnode) { SVnodesMgmt *pMgmt = &pDnode->vmgmt; tWorkerCleanup(&pMgmt->fetchPool); tWorkerCleanup(&pMgmt->queryPool); - dDebug("vnode close worker is initialized"); -} - -static int32_t dndAllocVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - pVnode->pWriteQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeWriteQueue); - if (pVnode->pWriteQ == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - return 0; -} - -static void dndFreeVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pWriteQ); - pVnode->pWriteQ = NULL; -} - -static int32_t dndAllocVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - pVnode->pApplyQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeApplyQueue); - if (pVnode->pApplyQ == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - return 0; -} - -static void dndFreeVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pApplyQ); - pVnode->pApplyQ = NULL; -} - -static int32_t dndInitVnodeWriteWorker(SDnode *pDnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - SMWorkerPool *pPool = &pMgmt->writePool; - pPool->name = "vnode-write"; - pPool->max = pDnode->opt.numOfCores; - if (tMWorkerInit(pPool) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - dDebug("vnode write worker is initialized"); - return 0; -} - -static void dndCleanupVnodeWriteWorker(SDnode *pDnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; tMWorkerCleanup(&pMgmt->writePool); - dDebug("vnode write worker is closed"); -} - -static int32_t dndAllocVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - pVnode->pSyncQ = tMWorkerAllocQueue(&pMgmt->syncPool, pVnode, (FProcessItems)dndProcessVnodeSyncQueue); - if (pVnode->pSyncQ == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - return 0; -} - -static void dndFreeVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - tMWorkerFreeQueue(&pMgmt->syncPool, pVnode->pSyncQ); - pVnode->pSyncQ = NULL; -} - -static int32_t dndInitVnodeSyncWorker(SDnode *pDnode) { - int32_t maxThreads = pDnode->opt.numOfCores / 2; - if (maxThreads < 1) maxThreads = 1; - - SVnodesMgmt *pMgmt = &pDnode->vmgmt; - SMWorkerPool *pPool = &pMgmt->syncPool; - pPool->name = "vnode-sync"; - pPool->max = maxThreads; - if (tMWorkerInit(pPool) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - dDebug("vnode sync worker is initialized"); - return 0; -} - -static void dndCleanupVnodeSyncWorker(SDnode *pDnode) { - SVnodesMgmt *pMgmt = &pDnode->vmgmt; tMWorkerCleanup(&pMgmt->syncPool); - dDebug("vnode sync worker is closed"); + dDebug("vnode workers is closed"); +} + +static int32_t dndAllocVnodeQueue(SDnode *pDnode, SVnodeObj *pVnode) { + SVnodesMgmt *pMgmt = &pDnode->vmgmt; + + pVnode->pWriteQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeWriteQueue); + pVnode->pApplyQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeApplyQueue); + pVnode->pSyncQ = tMWorkerAllocQueue(&pMgmt->syncPool, pVnode, (FProcessItems)dndProcessVnodeSyncQueue); + pVnode->pFetchQ = tWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FProcessItem)dndProcessVnodeFetchQueue); + pVnode->pQueryQ = tWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FProcessItem)dndProcessVnodeQueryQueue); + + if (pVnode->pApplyQ == NULL || pVnode->pWriteQ == NULL || pVnode->pSyncQ == NULL || pVnode->pFetchQ == NULL || + pVnode->pQueryQ == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + return 0; +} + +static void dndFreeVnodeQueue(SDnode *pDnode, SVnodeObj *pVnode) { + SVnodesMgmt *pMgmt = &pDnode->vmgmt; + tWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ); + tWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ); + tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pWriteQ); + tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pApplyQ); + tMWorkerFreeQueue(&pMgmt->syncPool, pVnode->pSyncQ); + pVnode->pWriteQ = NULL; + pVnode->pApplyQ = NULL; + pVnode->pSyncQ = NULL; + pVnode->pFetchQ = NULL; + pVnode->pQueryQ = NULL; } int32_t dndInitVnodes(SDnode *pDnode) { dInfo("dnode-vnodes start to init"); - if (dndInitVnodeReadWorker(pDnode) != 0) { - dError("failed to init vnodes read worker since %s", terrstr()); - return -1; - } - - if (dndInitVnodeWriteWorker(pDnode) != 0) { - dError("failed to init vnodes write worker since %s", terrstr()); - return -1; - } - - if (dndInitVnodeSyncWorker(pDnode) != 0) { - dError("failed to init vnodes sync worker since %s", terrstr()); + if (dndInitVnodeWorkers(pDnode) != 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + dError("failed to init vnode workers since %s", terrstr()); return -1; } @@ -1101,9 +980,7 @@ int32_t dndInitVnodes(SDnode *pDnode) { void dndCleanupVnodes(SDnode *pDnode) { dInfo("dnode-vnodes start to clean up"); dndCloseVnodes(pDnode); - dndCleanupVnodeReadWorker(pDnode); - dndCleanupVnodeWriteWorker(pDnode); - dndCleanupVnodeSyncWorker(pDnode); + dndCleanupVnodeWorkers(pDnode); dInfo("dnode-vnodes is cleaned up"); } diff --git a/source/dnode/mgmt/impl/src/dndWorker.c b/source/dnode/mgmt/impl/src/dndWorker.c index b1107fd185..e0db262f89 100644 --- a/source/dnode/mgmt/impl/src/dndWorker.c +++ b/source/dnode/mgmt/impl/src/dndWorker.c @@ -101,7 +101,9 @@ int32_t dndWriteMsgToWorker(SDnodeWorker *pWorker, void *pCont, int32_t contLen) } if (taosWriteQitem(pWorker->queue, pMsg) != 0) { - taosFreeQitem(pMsg); + if (contLen != 0) { + taosFreeQitem(pMsg); + } terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } diff --git a/source/dnode/mgmt/impl/src/dnode.c b/source/dnode/mgmt/impl/src/dnode.c index ef5c15743c..362838eba0 100644 --- a/source/dnode/mgmt/impl/src/dnode.c +++ b/source/dnode/mgmt/impl/src/dnode.c @@ -15,7 +15,7 @@ #define _DEFAULT_SOURCE #include "dndBnode.h" -#include "dndDnode.h" +#include "dndMgmt.h" #include "dndMnode.h" #include "dndQnode.h" #include "dndSnode.h" diff --git a/source/dnode/mgmt/impl/test/CMakeLists.txt b/source/dnode/mgmt/impl/test/CMakeLists.txt index b13edf7d5a..ce93a14d3f 100644 --- a/source/dnode/mgmt/impl/test/CMakeLists.txt +++ b/source/dnode/mgmt/impl/test/CMakeLists.txt @@ -3,18 +3,6 @@ enable_testing() add_subdirectory(qnode) add_subdirectory(bnode) add_subdirectory(snode) - -# add_subdirectory(auth) -# add_subdirectory(balance) -add_subdirectory(db) -add_subdirectory(dnode) -# add_subdirectory(func) add_subdirectory(mnode) -add_subdirectory(profile) -add_subdirectory(stb) -# add_subdirectory(sync) -# add_subdirectory(telem) -# add_subdirectory(trans) -add_subdirectory(vgroup) - +add_subdirectory(vnode) add_subdirectory(sut) diff --git a/source/dnode/mgmt/impl/test/bnode/dbnode.cpp b/source/dnode/mgmt/impl/test/bnode/dbnode.cpp index bafe8242a6..398d530648 100644 --- a/source/dnode/mgmt/impl/test/bnode/dbnode.cpp +++ b/source/dnode/mgmt/impl/test/bnode/dbnode.cpp @@ -34,7 +34,7 @@ TEST_F(DndTestBnode, 01_Create_Bnode) { SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_BNODE, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_DND_BNODE_ID_INVALID); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_BNODE_INVALID_OPTION); } { @@ -82,7 +82,7 @@ TEST_F(DndTestBnode, 01_Drop_Bnode) { SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_BNODE, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_DND_BNODE_ID_INVALID); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_BNODE_INVALID_OPTION); } { diff --git a/source/dnode/mgmt/impl/test/db/CMakeLists.txt b/source/dnode/mgmt/impl/test/db/CMakeLists.txt deleted file mode 100644 index cb9f1600fc..0000000000 --- a/source/dnode/mgmt/impl/test/db/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -aux_source_directory(. DB_SRC) -add_executable(dnode_test_db ${DB_SRC}) -target_link_libraries( - dnode_test_db - PUBLIC sut -) - -add_test( - NAME dnode_test_db - COMMAND dnode_test_db -) diff --git a/source/dnode/mgmt/impl/test/dnode/CMakeLists.txt b/source/dnode/mgmt/impl/test/dnode/CMakeLists.txt deleted file mode 100644 index 5796590865..0000000000 --- a/source/dnode/mgmt/impl/test/dnode/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -aux_source_directory(. DTEST_SRC) -add_executable(dnode_test_dnode ${DTEST_SRC}) -target_link_libraries( - dnode_test_dnode - PUBLIC sut -) - -add_test( - NAME dnode_test_dnode - COMMAND dnode_test_dnode -) diff --git a/source/dnode/mgmt/impl/test/dnode/dnode.cpp b/source/dnode/mgmt/impl/test/dnode/dnode.cpp deleted file mode 100644 index 51af008fc3..0000000000 --- a/source/dnode/mgmt/impl/test/dnode/dnode.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/** - * @file dnode.cpp - * @author slguan (slguan@taosdata.com) - * @brief DNODE module dnode-msg tests - * @version 0.1 - * @date 2021-12-15 - * - * @copyright Copyright (c) 2021 - * - */ - -#include "sut.h" - -class DndTestDnode : public ::testing::Test { - public: - void SetUp() override {} - void TearDown() override {} - - public: - static void SetUpTestSuite() { - test.Init("/tmp/dnode_test_dnode1", 9041); - const char* fqdn = "localhost"; - const char* firstEp = "localhost:9041"; - - server2.Start("/tmp/dnode_test_dnode2", fqdn, 9042, firstEp); - server3.Start("/tmp/dnode_test_dnode3", fqdn, 9043, firstEp); - server4.Start("/tmp/dnode_test_dnode4", fqdn, 9044, firstEp); - server5.Start("/tmp/dnode_test_dnode5", fqdn, 9045, firstEp); - taosMsleep(300); - } - - static void TearDownTestSuite() { - server2.Stop(); - server3.Stop(); - server4.Stop(); - server5.Stop(); - test.Cleanup(); - } - - static Testbase test; - static TestServer server2; - static TestServer server3; - static TestServer server4; - static TestServer server5; -}; - -Testbase DndTestDnode::test; -TestServer DndTestDnode::server2; -TestServer DndTestDnode::server3; -TestServer DndTestDnode::server4; -TestServer DndTestDnode::server5; - -TEST_F(DndTestDnode, 01_ShowDnode) { - test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); - CHECK_META("show dnodes", 7); - - CHECK_SCHEMA(0, TSDB_DATA_TYPE_SMALLINT, 2, "id"); - CHECK_SCHEMA(1, TSDB_DATA_TYPE_BINARY, TSDB_EP_LEN + VARSTR_HEADER_SIZE, "endpoint"); - CHECK_SCHEMA(2, TSDB_DATA_TYPE_SMALLINT, 2, "vnodes"); - CHECK_SCHEMA(3, TSDB_DATA_TYPE_SMALLINT, 2, "support_vnodes"); - CHECK_SCHEMA(4, TSDB_DATA_TYPE_BINARY, 10 + VARSTR_HEADER_SIZE, "status"); - CHECK_SCHEMA(5, TSDB_DATA_TYPE_TIMESTAMP, 8, "create_time"); - CHECK_SCHEMA(6, TSDB_DATA_TYPE_BINARY, 24 + VARSTR_HEADER_SIZE, "offline_reason"); - - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 1); - - CheckInt16(1); - CheckBinary("localhost:9041", TSDB_EP_LEN); - CheckInt16(0); - CheckInt16(16); - CheckBinary("ready", 10); - CheckTimestamp(); - CheckBinary("", 24); -} - -TEST_F(DndTestDnode, 02_ConfigDnode) { - int32_t contLen = sizeof(SCfgDnodeMsg); - - SCfgDnodeMsg* pReq = (SCfgDnodeMsg*)rpcMallocCont(contLen); - pReq->dnodeId = htonl(1); - strcpy(pReq->config, "ddebugflag 131"); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CONFIG_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); -} - -TEST_F(DndTestDnode, 03_Create_Drop_Restart_Dnode) { - { - int32_t contLen = sizeof(SCreateDnodeMsg); - - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); - strcpy(pReq->fqdn, "localhost"); - pReq->port = htonl(9042); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - } - - taosMsleep(1300); - - test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); - CHECK_META("show dnodes", 7); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 2); - - CheckInt16(1); - CheckInt16(2); - CheckBinary("localhost:9041", TSDB_EP_LEN); - CheckBinary("localhost:9042", TSDB_EP_LEN); - CheckInt16(0); - CheckInt16(0); - CheckInt16(16); - CheckInt16(16); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckTimestamp(); - CheckTimestamp(); - CheckBinary("", 24); - CheckBinary("", 24); - - { - int32_t contLen = sizeof(SDropDnodeMsg); - - SDropDnodeMsg* pReq = (SDropDnodeMsg*)rpcMallocCont(contLen); - pReq->dnodeId = htonl(2); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - } - - test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); - CHECK_META("show dnodes", 7); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 1); - - CheckInt16(1); - CheckBinary("localhost:9041", TSDB_EP_LEN); - CheckInt16(0); - CheckInt16(16); - CheckBinary("ready", 10); - CheckTimestamp(); - CheckBinary("", 24); - - { - int32_t contLen = sizeof(SCreateDnodeMsg); - - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); - strcpy(pReq->fqdn, "localhost"); - pReq->port = htonl(9043); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - } - - { - int32_t contLen = sizeof(SCreateDnodeMsg); - - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); - strcpy(pReq->fqdn, "localhost"); - pReq->port = htonl(9044); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - } - - { - int32_t contLen = sizeof(SCreateDnodeMsg); - - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); - strcpy(pReq->fqdn, "localhost"); - pReq->port = htonl(9045); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - } - - taosMsleep(1300); - test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); - CHECK_META("show dnodes", 7); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 4); - - CheckInt16(1); - CheckInt16(3); - CheckInt16(4); - CheckInt16(5); - CheckBinary("localhost:9041", TSDB_EP_LEN); - CheckBinary("localhost:9043", TSDB_EP_LEN); - CheckBinary("localhost:9044", TSDB_EP_LEN); - CheckBinary("localhost:9045", TSDB_EP_LEN); - CheckInt16(0); - CheckInt16(0); - CheckInt16(0); - CheckInt16(0); - CheckInt16(16); - CheckInt16(16); - CheckInt16(16); - CheckInt16(16); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckTimestamp(); - CheckTimestamp(); - CheckTimestamp(); - CheckTimestamp(); - CheckBinary("", 24); - CheckBinary("", 24); - CheckBinary("", 24); - CheckBinary("", 24); - - // restart - uInfo("stop all server"); - test.Restart(); - server2.Restart(); - server3.Restart(); - server4.Restart(); - server5.Restart(); - - taosMsleep(1300); - test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); - CHECK_META("show dnodes", 7); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 4); - - CheckInt16(1); - CheckInt16(3); - CheckInt16(4); - CheckInt16(5); - CheckBinary("localhost:9041", TSDB_EP_LEN); - CheckBinary("localhost:9043", TSDB_EP_LEN); - CheckBinary("localhost:9044", TSDB_EP_LEN); - CheckBinary("localhost:9045", TSDB_EP_LEN); - CheckInt16(0); - CheckInt16(0); - CheckInt16(0); - CheckInt16(0); - CheckInt16(16); - CheckInt16(16); - CheckInt16(16); - CheckInt16(16); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckBinary("ready", 10); - CheckTimestamp(); - CheckTimestamp(); - CheckTimestamp(); - CheckTimestamp(); - CheckBinary("", 24); - CheckBinary("", 24); - CheckBinary("", 24); - CheckBinary("", 24); -} diff --git a/source/dnode/mgmt/impl/test/mnode/CMakeLists.txt b/source/dnode/mgmt/impl/test/mnode/CMakeLists.txt index d6b3b16fb6..be29b93b02 100644 --- a/source/dnode/mgmt/impl/test/mnode/CMakeLists.txt +++ b/source/dnode/mgmt/impl/test/mnode/CMakeLists.txt @@ -1,5 +1,5 @@ -aux_source_directory(. MTEST_SRC) -add_executable(dnode_test_mnode ${MTEST_SRC}) +aux_source_directory(. DMTEST_SRC) +add_executable(dnode_test_mnode ${DMTEST_SRC}) target_link_libraries( dnode_test_mnode PUBLIC sut diff --git a/source/dnode/mgmt/impl/test/mnode/dmnode.cpp b/source/dnode/mgmt/impl/test/mnode/dmnode.cpp new file mode 100644 index 0000000000..841d00d14d --- /dev/null +++ b/source/dnode/mgmt/impl/test/mnode/dmnode.cpp @@ -0,0 +1,189 @@ +/** + * @file dmnode.cpp + * @author slguan (slguan@taosdata.com) + * @brief DNODE module mnode tests + * @version 1.0 + * @date 2022-01-07 + * + * @copyright Copyright (c) 2022 + * + */ + +#include "sut.h" + +class DndTestMnode : public ::testing::Test { + protected: + static void SetUpTestSuite() { test.Init("/tmp/dnode_test_mnode", 9113); } + static void TearDownTestSuite() { test.Cleanup(); } + + static Testbase test; + + public: + void SetUp() override {} + void TearDown() override {} +}; + +Testbase DndTestMnode::test; + +TEST_F(DndTestMnode, 01_Create_Mnode) { + { + int32_t contLen = sizeof(SDCreateMnodeReq); + + SDCreateMnodeReq* pReq = (SDCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + pReq->replica = 1; + pReq->replicas[0].id = htonl(1); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_INVALID_OPTION); + } + + { + int32_t contLen = sizeof(SDCreateMnodeReq); + + SDCreateMnodeReq* pReq = (SDCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + pReq->replica = 1; + pReq->replicas[0].id = htonl(2); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_INVALID_OPTION); + } + + { + int32_t contLen = sizeof(SDCreateMnodeReq); + + SDCreateMnodeReq* pReq = (SDCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + pReq->replica = 2; + pReq->replicas[0].id = htonl(1); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + pReq->replicas[1].id = htonl(1); + pReq->replicas[1].port = htonl(9114); + strcpy(pReq->replicas[1].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED); + } +} + +TEST_F(DndTestMnode, 02_Alter_Mnode) { + { + int32_t contLen = sizeof(SDAlterMnodeReq); + + SDAlterMnodeReq* pReq = (SDAlterMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + pReq->replica = 1; + pReq->replicas[0].id = htonl(1); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_ALTER_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_INVALID_OPTION); + } + + { + int32_t contLen = sizeof(SDAlterMnodeReq); + + SDAlterMnodeReq* pReq = (SDAlterMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + pReq->replica = 1; + pReq->replicas[0].id = htonl(2); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_ALTER_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_INVALID_OPTION); + } + + { + int32_t contLen = sizeof(SDAlterMnodeReq); + + SDAlterMnodeReq* pReq = (SDAlterMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + pReq->replica = 1; + pReq->replicas[0].id = htonl(1); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_ALTER_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(DndTestMnode, 03_Drop_Mnode) { + { + int32_t contLen = sizeof(SDDropMnodeReq); + + SDDropMnodeReq* pReq = (SDDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_INVALID_OPTION); + } + + { + int32_t contLen = sizeof(SDDropMnodeReq); + + SDDropMnodeReq* pReq = (SDDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + int32_t contLen = sizeof(SDDropMnodeReq); + + SDDropMnodeReq* pReq = (SDDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_NOT_DEPLOYED); + } + + { + int32_t contLen = sizeof(SDAlterMnodeReq); + + SDAlterMnodeReq* pReq = (SDAlterMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + pReq->replica = 1; + pReq->replicas[0].id = htonl(1); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_ALTER_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_MNODE_NOT_DEPLOYED); + } + + + { + int32_t contLen = sizeof(SDCreateMnodeReq); + + SDCreateMnodeReq* pReq = (SDCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + pReq->replica = 2; + pReq->replicas[0].id = htonl(1); + pReq->replicas[0].port = htonl(9113); + strcpy(pReq->replicas[0].fqdn, "localhost"); + + SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} \ No newline at end of file diff --git a/source/dnode/mgmt/impl/test/mnode/mnode.cpp b/source/dnode/mgmt/impl/test/mnode/mnode.cpp deleted file mode 100644 index 9a725c20d3..0000000000 --- a/source/dnode/mgmt/impl/test/mnode/mnode.cpp +++ /dev/null @@ -1,301 +0,0 @@ -/** - * @file dnode.cpp - * @author slguan (slguan@taosdata.com) - * @brief DNODE module dnode-msg tests - * @version 0.1 - * @date 2021-12-15 - * - * @copyright Copyright (c) 2021 - * - */ - -#include "sut.h" - -class DndTestMnode : public ::testing::Test { - public: - void SetUp() override {} - void TearDown() override {} - - public: - static void SetUpTestSuite() { - test.Init("/tmp/dnode_test_mnode1", 9061); - const char* fqdn = "localhost"; - const char* firstEp = "localhost:9061"; - - server2.Start("/tmp/dnode_test_mnode2", fqdn, 9062, firstEp); - server3.Start("/tmp/dnode_test_mnode3", fqdn, 9063, firstEp); - server4.Start("/tmp/dnode_test_mnode4", fqdn, 9064, firstEp); - server5.Start("/tmp/dnode_test_mnode5", fqdn, 9065, firstEp); - taosMsleep(300); - } - - static void TearDownTestSuite() { - server2.Stop(); - server3.Stop(); - server4.Stop(); - server5.Stop(); - test.Cleanup(); - } - - static Testbase test; - static TestServer server2; - static TestServer server3; - static TestServer server4; - static TestServer server5; -}; - -Testbase DndTestMnode::test; -TestServer DndTestMnode::server2; -TestServer DndTestMnode::server3; -TestServer DndTestMnode::server4; -TestServer DndTestMnode::server5; - -TEST_F(DndTestMnode, 01_ShowDnode) { - test.SendShowMetaReq(TSDB_MGMT_TABLE_MNODE, ""); - CHECK_META("show mnodes", 5); - - CHECK_SCHEMA(0, TSDB_DATA_TYPE_SMALLINT, 2, "id"); - CHECK_SCHEMA(1, TSDB_DATA_TYPE_BINARY, TSDB_EP_LEN + VARSTR_HEADER_SIZE, "endpoint"); - CHECK_SCHEMA(2, TSDB_DATA_TYPE_BINARY, 12 + VARSTR_HEADER_SIZE, "role"); - CHECK_SCHEMA(3, TSDB_DATA_TYPE_TIMESTAMP, 8, "role_time"); - CHECK_SCHEMA(4, TSDB_DATA_TYPE_TIMESTAMP, 8, "create_time"); - - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 1); - - CheckInt16(1); - CheckBinary("localhost:9061", TSDB_EP_LEN); - CheckBinary("master", 12); - CheckInt64(0); - CheckTimestamp(); -} - -TEST_F(DndTestMnode, 02_Create_Mnode_Invalid_Id) { - { - int32_t contLen = sizeof(SMCreateMnodeMsg); - - SMCreateMnodeMsg* pReq = (SMCreateMnodeMsg*)rpcMallocCont(contLen); - pReq->dnodeId = htonl(1); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_MNODE_ALREADY_EXIST); - } -} - -TEST_F(DndTestMnode, 03_Create_Mnode_Invalid_Id) { - { - int32_t contLen = sizeof(SMCreateMnodeMsg); - - SMCreateMnodeMsg* pReq = (SMCreateMnodeMsg*)rpcMallocCont(contLen); - pReq->dnodeId = htonl(2); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DNODE_NOT_EXIST); - } -} - -TEST_F(DndTestMnode, 04_Create_Mnode) { - { - // create dnode - int32_t contLen = sizeof(SCreateDnodeMsg); - - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); - strcpy(pReq->fqdn, "localhost"); - pReq->port = htonl(9062); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - - taosMsleep(1300); - test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 2); - } - - { - // create mnode - int32_t contLen = sizeof(SMCreateMnodeMsg); - - SMCreateMnodeMsg* pReq = (SMCreateMnodeMsg*)rpcMallocCont(contLen); - pReq->dnodeId = htonl(2); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - - test.SendShowMetaReq(TSDB_MGMT_TABLE_MNODE, ""); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 2); - - CheckInt16(1); - CheckInt16(2); - CheckBinary("localhost:9061", TSDB_EP_LEN); - CheckBinary("localhost:9062", TSDB_EP_LEN); - CheckBinary("master", 12); - CheckBinary("slave", 12); - CheckInt64(0); - CheckInt64(0); - CheckTimestamp(); - CheckTimestamp(); - } - - { - // drop mnode - int32_t contLen = sizeof(SMDropMnodeMsg); - - SMDropMnodeMsg* pReq = (SMDropMnodeMsg*)rpcMallocCont(contLen); - pReq->dnodeId = htonl(2); - - SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_MNODE, pReq, contLen); - ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, 0); - - test.SendShowMetaReq(TSDB_MGMT_TABLE_MNODE, ""); - test.SendShowRetrieveReq(); - EXPECT_EQ(test.GetShowRows(), 1); - - CheckInt16(1); - CheckBinary("localhost:9061", TSDB_EP_LEN); - CheckBinary("master", 12); - CheckInt64(0); - CheckTimestamp(); - } -} -// { -// int32_t contLen = sizeof(SDropDnodeMsg); - -// SDropDnodeMsg* pReq = (SDropDnodeMsg*)rpcMallocCont(contLen); -// pReq->dnodeId = htonl(2); - -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DNODE, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); -// CHECK_META("show dnodes", 7); -// test.SendShowRetrieveReq(); -// EXPECT_EQ(test.GetShowRows(), 1); - -// CheckInt16(1); -// CheckBinary("localhost:9061", TSDB_EP_LEN); -// CheckInt16(0); -// CheckInt16(1); -// CheckBinary("ready", 10); -// CheckTimestamp(); -// CheckBinary("", 24); - -// { -// int32_t contLen = sizeof(SCreateDnodeMsg); - -// SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); -// strcpy(pReq->ep, "localhost:9063"); - -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// { -// int32_t contLen = sizeof(SCreateDnodeMsg); - -// SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); -// strcpy(pReq->ep, "localhost:9064"); - -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// { -// int32_t contLen = sizeof(SCreateDnodeMsg); - -// SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); -// strcpy(pReq->ep, "localhost:9065"); - -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// taosMsleep(1300); -// test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); -// CHECK_META("show dnodes", 7); -// test.SendShowRetrieveReq(); -// EXPECT_EQ(test.GetShowRows(), 4); - -// CheckInt16(1); -// CheckInt16(3); -// CheckInt16(4); -// CheckInt16(5); -// CheckBinary("localhost:9061", TSDB_EP_LEN); -// CheckBinary("localhost:9063", TSDB_EP_LEN); -// CheckBinary("localhost:9064", TSDB_EP_LEN); -// CheckBinary("localhost:9065", TSDB_EP_LEN); -// CheckInt16(0); -// CheckInt16(0); -// CheckInt16(0); -// CheckInt16(0); -// CheckInt16(1); -// CheckInt16(1); -// CheckInt16(1); -// CheckInt16(1); -// CheckBinary("ready", 10); -// CheckBinary("ready", 10); -// CheckBinary("ready", 10); -// CheckBinary("ready", 10); -// CheckTimestamp(); -// CheckTimestamp(); -// CheckTimestamp(); -// CheckTimestamp(); -// CheckBinary("", 24); -// CheckBinary("", 24); -// CheckBinary("", 24); -// CheckBinary("", 24); - -// // restart -// uInfo("stop all server"); -// test.Restart(); -// server2.Restart(); -// server3.Restart(); -// server4.Restart(); -// server5.Restart(); - -// taosMsleep(1300); -// test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); -// CHECK_META("show dnodes", 7); -// test.SendShowRetrieveReq(); -// EXPECT_EQ(test.GetShowRows(), 4); - -// CheckInt16(1); -// CheckInt16(3); -// CheckInt16(4); -// CheckInt16(5); -// CheckBinary("localhost:9061", TSDB_EP_LEN); -// CheckBinary("localhost:9063", TSDB_EP_LEN); -// CheckBinary("localhost:9064", TSDB_EP_LEN); -// CheckBinary("localhost:9065", TSDB_EP_LEN); -// CheckInt16(0); -// CheckInt16(0); -// CheckInt16(0); -// CheckInt16(0); -// CheckInt16(1); -// CheckInt16(1); -// CheckInt16(1); -// CheckInt16(1); -// CheckBinary("ready", 10); -// CheckBinary("ready", 10); -// CheckBinary("ready", 10); -// CheckBinary("ready", 10); -// CheckTimestamp(); -// CheckTimestamp(); -// CheckTimestamp(); -// CheckTimestamp(); -// CheckBinary("", 24); -// CheckBinary("", 24); -// CheckBinary("", 24); -// CheckBinary("", 24); -// } diff --git a/source/dnode/mgmt/impl/test/profile/CMakeLists.txt b/source/dnode/mgmt/impl/test/profile/CMakeLists.txt deleted file mode 100644 index 0edd631720..0000000000 --- a/source/dnode/mgmt/impl/test/profile/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -aux_source_directory(. PROFILE_SRC) -add_executable(dnode_test_profile ${PROFILE_SRC}) -target_link_libraries( - dnode_test_profile - PUBLIC sut -) - -add_test( - NAME dnode_test_profile - COMMAND dnode_test_profile -) diff --git a/source/dnode/mgmt/impl/test/qnode/dqnode.cpp b/source/dnode/mgmt/impl/test/qnode/dqnode.cpp index e64a0543fc..19fd6b4b12 100644 --- a/source/dnode/mgmt/impl/test/qnode/dqnode.cpp +++ b/source/dnode/mgmt/impl/test/qnode/dqnode.cpp @@ -34,7 +34,7 @@ TEST_F(DndTestQnode, 01_Create_Qnode) { SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_QNODE, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_DND_QNODE_ID_INVALID); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_QNODE_INVALID_OPTION); } { @@ -82,7 +82,7 @@ TEST_F(DndTestQnode, 02_Drop_Qnode) { SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_QNODE, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_DND_QNODE_ID_INVALID); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_QNODE_INVALID_OPTION); } { diff --git a/source/dnode/mgmt/impl/test/snode/dsnode.cpp b/source/dnode/mgmt/impl/test/snode/dsnode.cpp index 1e6bcdb3fb..f51f4a0268 100644 --- a/source/dnode/mgmt/impl/test/snode/dsnode.cpp +++ b/source/dnode/mgmt/impl/test/snode/dsnode.cpp @@ -34,7 +34,7 @@ TEST_F(DndTestSnode, 01_Create_Snode) { SRpcMsg* pRsp = test.SendReq(TDMT_DND_CREATE_SNODE, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_DND_SNODE_ID_INVALID); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_SNODE_INVALID_OPTION); } { @@ -82,7 +82,7 @@ TEST_F(DndTestSnode, 01_Drop_Snode) { SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_SNODE, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_DND_SNODE_ID_INVALID); + ASSERT_EQ(pRsp->code, TSDB_CODE_DND_SNODE_INVALID_OPTION); } { diff --git a/source/dnode/mgmt/impl/test/sut/src/sut.cpp b/source/dnode/mgmt/impl/test/sut/src/sut.cpp index 72a6416e22..65c7d67254 100644 --- a/source/dnode/mgmt/impl/test/sut/src/sut.cpp +++ b/source/dnode/mgmt/impl/test/sut/src/sut.cpp @@ -16,7 +16,7 @@ #include "sut.h" void Testbase::InitLog(const char* path) { - dDebugFlag = 0; + dDebugFlag = 143; vDebugFlag = 0; mDebugFlag = 143; cDebugFlag = 0; diff --git a/source/dnode/mgmt/impl/test/vgroup/CMakeLists.txt b/source/dnode/mgmt/impl/test/vgroup/CMakeLists.txt deleted file mode 100644 index b864b0593c..0000000000 --- a/source/dnode/mgmt/impl/test/vgroup/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -aux_source_directory(. VGROUP_SRC) -add_executable(dnode_test_vgroup ${VGROUP_SRC}) -target_link_libraries( - dnode_test_vgroup - PUBLIC sut -) - -add_test( - NAME dnode_test_vgroup - COMMAND dnode_test_vgroup -) diff --git a/source/dnode/mgmt/impl/test/vnode/CMakeLists.txt b/source/dnode/mgmt/impl/test/vnode/CMakeLists.txt new file mode 100644 index 0000000000..6fb8bb4ba4 --- /dev/null +++ b/source/dnode/mgmt/impl/test/vnode/CMakeLists.txt @@ -0,0 +1,11 @@ +aux_source_directory(. VNODE_SRC) +add_executable(dnode_test_vnode ${VNODE_SRC}) +target_link_libraries( + dnode_test_vnode + PUBLIC sut +) + +add_test( + NAME dnode_test_vnode + COMMAND dnode_test_vnode +) diff --git a/source/dnode/mgmt/impl/test/vgroup/vgroup.cpp b/source/dnode/mgmt/impl/test/vnode/vnode.cpp similarity index 83% rename from source/dnode/mgmt/impl/test/vgroup/vgroup.cpp rename to source/dnode/mgmt/impl/test/vnode/vnode.cpp index 7fa3b4ab61..7233137140 100644 --- a/source/dnode/mgmt/impl/test/vgroup/vgroup.cpp +++ b/source/dnode/mgmt/impl/test/vnode/vnode.cpp @@ -11,9 +11,9 @@ #include "sut.h" -class DndTestVgroup : public ::testing::Test { +class DndTestVnode : public ::testing::Test { protected: - static void SetUpTestSuite() { test.Init("/tmp/dnode_test_vgroup", 9150); } + static void SetUpTestSuite() { test.Init("/tmp/dnode_test_vnode", 9150); } static void TearDownTestSuite() { test.Cleanup(); } static Testbase test; @@ -23,14 +23,14 @@ class DndTestVgroup : public ::testing::Test { void TearDown() override {} }; -Testbase DndTestVgroup::test; +Testbase DndTestVnode::test; -TEST_F(DndTestVgroup, 01_Create_Restart_Drop_Vnode) { +TEST_F(DndTestVnode, 01_Create_Restart_Drop_Vnode) { { for (int i = 0; i < 3; ++i) { - int32_t contLen = sizeof(SCreateVnodeMsg); + int32_t contLen = sizeof(SCreateVnodeReq); - SCreateVnodeMsg* pReq = (SCreateVnodeMsg*)rpcMallocCont(contLen); + SCreateVnodeReq* pReq = (SCreateVnodeReq*)rpcMallocCont(contLen); pReq->vgId = htonl(2); pReq->dnodeId = htonl(1); strcpy(pReq->db, "1.d1"); @@ -68,9 +68,9 @@ TEST_F(DndTestVgroup, 01_Create_Restart_Drop_Vnode) { { for (int i = 0; i < 3; ++i) { - int32_t contLen = sizeof(SAlterVnodeMsg); + int32_t contLen = sizeof(SAlterVnodeReq); - SAlterVnodeMsg* pReq = (SAlterVnodeMsg*)rpcMallocCont(contLen); + SAlterVnodeReq* pReq = (SAlterVnodeReq*)rpcMallocCont(contLen); pReq->vgId = htonl(2); pReq->dnodeId = htonl(1); strcpy(pReq->db, "1.d1"); @@ -108,9 +108,9 @@ TEST_F(DndTestVgroup, 01_Create_Restart_Drop_Vnode) { { for (int i = 0; i < 3; ++i) { - int32_t contLen = sizeof(SDropVnodeMsg); + int32_t contLen = sizeof(SDropVnodeReq); - SDropVnodeMsg* pReq = (SDropVnodeMsg*)rpcMallocCont(contLen); + SDropVnodeReq* pReq = (SDropVnodeReq*)rpcMallocCont(contLen); pReq->vgId = htonl(2); pReq->dnodeId = htonl(1); strcpy(pReq->db, "1.d1"); @@ -118,7 +118,7 @@ TEST_F(DndTestVgroup, 01_Create_Restart_Drop_Vnode) { SRpcMsg rpcMsg = {0}; rpcMsg.pCont = pReq; - rpcMsg.contLen = sizeof(SDropVnodeMsg); + rpcMsg.contLen = sizeof(SDropVnodeReq); rpcMsg.msgType = TDMT_DND_DROP_VNODE; SRpcMsg* pRsp = test.SendReq(TDMT_DND_DROP_VNODE, pReq, contLen); diff --git a/source/dnode/mnode/impl/inc/mndVgroup.h b/source/dnode/mnode/impl/inc/mndVgroup.h index 6d391450b7..9e4656fec8 100644 --- a/source/dnode/mnode/impl/inc/mndVgroup.h +++ b/source/dnode/mnode/impl/inc/mndVgroup.h @@ -31,8 +31,8 @@ int32_t mndAllocVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj **ppVgroups); SEpSet mndGetVgroupEpset(SMnode *pMnode, SVgObj *pVgroup); int32_t mndGetVnodesNum(SMnode *pMnode, int32_t dnodeId); -SCreateVnodeMsg *mndBuildCreateVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup); -SDropVnodeMsg *mndBuildDropVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup); +SCreateVnodeReq *mndBuildCreateVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup); +SDropVnodeReq *mndBuildDropVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 410368f130..bf5d01d0a2 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -331,11 +331,11 @@ static int32_t mndSetCreateDbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj action.epSet = mndGetDnodeEpset(pDnode); mndReleaseDnode(pMnode, pDnode); - SCreateVnodeMsg *pMsg = mndBuildCreateVnodeMsg(pMnode, pDnode, pDb, pVgroup); + SCreateVnodeReq *pMsg = mndBuildCreateVnodeMsg(pMnode, pDnode, pDb, pVgroup); if (pMsg == NULL) return -1; action.pCont = pMsg; - action.contLen = sizeof(SCreateVnodeMsg); + action.contLen = sizeof(SCreateVnodeReq); action.msgType = TDMT_DND_CREATE_VNODE; if (mndTransAppendRedoAction(pTrans, &action) != 0) { free(pMsg); @@ -360,11 +360,11 @@ static int32_t mndSetCreateDbUndoActions(SMnode *pMnode, STrans *pTrans, SDbObj action.epSet = mndGetDnodeEpset(pDnode); mndReleaseDnode(pMnode, pDnode); - SDropVnodeMsg *pMsg = mndBuildDropVnodeMsg(pMnode, pDnode, pDb, pVgroup); + SDropVnodeReq *pMsg = mndBuildDropVnodeMsg(pMnode, pDnode, pDb, pVgroup); if (pMsg == NULL) return -1; action.pCont = pMsg; - action.contLen = sizeof(SDropVnodeMsg); + action.contLen = sizeof(SDropVnodeReq); action.msgType = TDMT_DND_DROP_VNODE; if (mndTransAppendUndoAction(pTrans, &action) != 0) { free(pMsg); @@ -593,11 +593,11 @@ static int32_t mndBuildUpdateVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj action.epSet = mndGetDnodeEpset(pDnode); mndReleaseDnode(pMnode, pDnode); - SAlterVnodeMsg *pMsg = (SAlterVnodeMsg *)mndBuildCreateVnodeMsg(pMnode, pDnode, pDb, pVgroup); + SAlterVnodeReq *pMsg = (SAlterVnodeReq *)mndBuildCreateVnodeMsg(pMnode, pDnode, pDb, pVgroup); if (pMsg == NULL) return -1; action.pCont = pMsg; - action.contLen = sizeof(SAlterVnodeMsg); + action.contLen = sizeof(SAlterVnodeReq); action.msgType = TDMT_DND_ALTER_VNODE; if (mndTransAppendRedoAction(pTrans, &action) != 0) { free(pMsg); @@ -757,11 +757,11 @@ static int32_t mndBuildDropVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj * action.epSet = mndGetDnodeEpset(pDnode); mndReleaseDnode(pMnode, pDnode); - SDropVnodeMsg *pMsg = mndBuildDropVnodeMsg(pMnode, pDnode, pDb, pVgroup); + SDropVnodeReq *pMsg = mndBuildDropVnodeMsg(pMnode, pDnode, pDb, pVgroup); if (pMsg == NULL) return -1; action.pCont = pMsg; - action.contLen = sizeof(SCreateVnodeMsg); + action.contLen = sizeof(SCreateVnodeReq); action.msgType = TDMT_DND_DROP_VNODE; if (mndTransAppendRedoAction(pTrans, &action) != 0) { free(pMsg); @@ -917,6 +917,7 @@ static int32_t mndProcessUseDbMsg(SMnodeMsg *pMsg) { } memcpy(pRsp->db, pDb->name, TSDB_DB_FNAME_LEN); + pRsp->uid = htobe64(pDb->uid); pRsp->vgVersion = htonl(pDb->vgVersion); pRsp->vgNum = htonl(vindex); pRsp->hashMethod = pDb->hashMethod; diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 2cca70b04e..d94078cfe1 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -45,19 +45,19 @@ static SSdbRaw *mndDnodeActionEncode(SDnodeObj *pDnode); static SSdbRow *mndDnodeActionDecode(SSdbRaw *pRaw); static int32_t mndDnodeActionInsert(SSdb *pSdb, SDnodeObj *pDnode); static int32_t mndDnodeActionDelete(SSdb *pSdb, SDnodeObj *pDnode); -static int32_t mndDnodeActionUpdate(SSdb *pSdb, SDnodeObj *pOldDnode, SDnodeObj *pNewDnode); +static int32_t mndDnodeActionUpdate(SSdb *pSdb, SDnodeObj *pOld, SDnodeObj *pNew); -static int32_t mndProcessCreateDnodeMsg(SMnodeMsg *pMsg); -static int32_t mndProcessDropDnodeMsg(SMnodeMsg *pMsg); -static int32_t mndProcessConfigDnodeMsg(SMnodeMsg *pMsg); -static int32_t mndProcessConfigDnodeRsp(SMnodeMsg *pMsg); -static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg); +static int32_t mndProcessCreateDnodeReq(SMnodeMsg *pReq); +static int32_t mndProcessDropDnodeReq(SMnodeMsg *pReq); +static int32_t mndProcessConfigDnodeReq(SMnodeMsg *pReq); +static int32_t mndProcessConfigDnodeRsp(SMnodeMsg *pRsp); +static int32_t mndProcessStatusReq(SMnodeMsg *pReq); -static int32_t mndGetConfigMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta); -static int32_t mndRetrieveConfigs(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows); +static int32_t mndGetConfigMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta); +static int32_t mndRetrieveConfigs(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows); static void mndCancelGetNextConfig(SMnode *pMnode, void *pIter); -static int32_t mndGetDnodeMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta); -static int32_t mndRetrieveDnodes(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows); +static int32_t mndGetDnodeMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta); +static int32_t mndRetrieveDnodes(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows); static void mndCancelGetNextDnode(SMnode *pMnode, void *pIter); int32_t mndInitDnode(SMnode *pMnode) { @@ -70,11 +70,11 @@ int32_t mndInitDnode(SMnode *pMnode) { .updateFp = (SdbUpdateFp)mndDnodeActionUpdate, .deleteFp = (SdbDeleteFp)mndDnodeActionDelete}; - mndSetMsgHandle(pMnode, TDMT_MND_CREATE_DNODE, mndProcessCreateDnodeMsg); - mndSetMsgHandle(pMnode, TDMT_MND_DROP_DNODE, mndProcessDropDnodeMsg); - mndSetMsgHandle(pMnode, TDMT_MND_CONFIG_DNODE, mndProcessConfigDnodeMsg); + mndSetMsgHandle(pMnode, TDMT_MND_CREATE_DNODE, mndProcessCreateDnodeReq); + mndSetMsgHandle(pMnode, TDMT_MND_DROP_DNODE, mndProcessDropDnodeReq); + mndSetMsgHandle(pMnode, TDMT_MND_CONFIG_DNODE, mndProcessConfigDnodeReq); mndSetMsgHandle(pMnode, TDMT_DND_CONFIG_DNODE_RSP, mndProcessConfigDnodeRsp); - mndSetMsgHandle(pMnode, TDMT_MND_STATUS, mndProcessStatusMsg); + mndSetMsgHandle(pMnode, TDMT_MND_STATUS, mndProcessStatusReq); mndAddShowMetaHandle(pMnode, TSDB_MGMT_TABLE_VARIABLES, mndGetConfigMeta); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VARIABLES, mndRetrieveConfigs); @@ -182,9 +182,9 @@ static int32_t mndDnodeActionDelete(SSdb *pSdb, SDnodeObj *pDnode) { return 0; } -static int32_t mndDnodeActionUpdate(SSdb *pSdb, SDnodeObj *pOldDnode, SDnodeObj *pNewDnode) { - mTrace("dnode:%d, perform update action, old_row:%p new_row:%p", pOldDnode->id, pOldDnode, pNewDnode); - pOldDnode->updateTime = pNewDnode->updateTime; +static int32_t mndDnodeActionUpdate(SSdb *pSdb, SDnodeObj *pOld, SDnodeObj *pNew) { + mTrace("dnode:%d, perform update action, old_row:%p new_row:%p", pOld->id, pOld, pNew); + pOld->updateTime = pNew->updateTime; return 0; } @@ -244,22 +244,22 @@ bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode, int64_t curMs) { return true; } -static void mndGetDnodeData(SMnode *pMnode, SDnodeEps *pEps, int32_t numOfEps) { +static void mndGetDnodeData(SMnode *pMnode, SDnodeEps *pEps, int32_t maxEps) { SSdb *pSdb = pMnode->pSdb; - int32_t i = 0; + int32_t numOfEps = 0; void *pIter = NULL; while (1) { SDnodeObj *pDnode = NULL; pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode); if (pIter == NULL) break; - if (i >= numOfEps) { + if (numOfEps >= maxEps) { sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pDnode); break; } - SDnodeEp *pEp = &pEps->eps[i]; + SDnodeEp *pEp = &pEps->eps[numOfEps]; pEp->id = htonl(pDnode->id); pEp->port = htons(pDnode->port); memcpy(pEp->fqdn, pDnode->fqdn, TSDB_FQDN_LEN); @@ -267,11 +267,11 @@ static void mndGetDnodeData(SMnode *pMnode, SDnodeEps *pEps, int32_t numOfEps) { if (mndIsMnode(pMnode, pDnode->id)) { pEp->isMnode = 1; } - i++; + numOfEps++; sdbRelease(pSdb, pDnode); } - pEps->num = htonl(i); + pEps->num = htonl(numOfEps); } static int32_t mndCheckClusterCfgPara(SMnode *pMnode, const SClusterCfg *pCfg) { @@ -299,8 +299,9 @@ static int32_t mndCheckClusterCfgPara(SMnode *pMnode, const SClusterCfg *pCfg) { return 0; } -static void mndParseStatusMsg(SStatusMsg *pStatus) { +static void mndParseStatusMsg(SStatusReq *pStatus) { pStatus->sver = htonl(pStatus->sver); + pStatus->dver = htobe64(pStatus->dver); pStatus->dnodeId = htonl(pStatus->dnodeId); pStatus->clusterId = htobe64(pStatus->clusterId); pStatus->rebootTime = htobe64(pStatus->rebootTime); @@ -309,11 +310,19 @@ static void mndParseStatusMsg(SStatusMsg *pStatus) { pStatus->numOfSupportVnodes = htonl(pStatus->numOfSupportVnodes); pStatus->clusterCfg.statusInterval = htonl(pStatus->clusterCfg.statusInterval); pStatus->clusterCfg.checkTime = htobe64(pStatus->clusterCfg.checkTime); + for (int32_t v = 0; v < pStatus->vnodeLoads.num; ++v) { + SVnodeLoad *pVload = &pStatus->vnodeLoads.data[v]; + pVload->vgId = htonl(pVload->vgId); + pVload->totalStorage = htobe64(pVload->totalStorage); + pVload->compStorage = htobe64(pVload->compStorage); + pVload->pointsWritten = htobe64(pVload->pointsWritten); + pVload->tablesNum = htobe64(pVload->tablesNum); + } } -static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SStatusMsg *pStatus = pMsg->rpcMsg.pCont; +static int32_t mndProcessStatusReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SStatusReq *pStatus = pReq->rpcMsg.pCont; SDnodeObj *pDnode = NULL; int32_t code = -1; @@ -341,9 +350,11 @@ static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg) { int64_t curMs = taosGetTimestampMs(); bool online = mndIsDnodeOnline(pMnode, pDnode, curMs); - bool needCheckCfg = !(online && pDnode->rebootTime == pStatus->rebootTime); + bool dnodeChanged = (pStatus->dver != sdbGetTableVer(pMnode->pSdb, SDB_DNODE)); + bool reboot = (pDnode->rebootTime != pStatus->rebootTime); + bool needCheck = !online || dnodeChanged || reboot; - if (needCheckCfg) { + if (needCheck) { if (pStatus->sver != pMnode->cfg.sver) { if (pDnode != NULL) { pDnode->offlineReason = DND_REASON_VERSION_NOT_MATCH; @@ -379,7 +390,11 @@ static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg) { goto PROCESS_STATUS_MSG_OVER; } - mInfo("dnode:%d, from offline to online", pDnode->id); + if (!online) { + mInfo("dnode:%d, from offline to online", pDnode->id); + } else { + mDebug("dnode:%d, send dnode eps", pDnode->id); + } pDnode->rebootTime = pStatus->rebootTime; pDnode->numOfCores = pStatus->numOfCores; @@ -393,12 +408,13 @@ static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg) { goto PROCESS_STATUS_MSG_OVER; } + pRsp->dver = htobe64(sdbGetTableVer(pMnode->pSdb, SDB_DNODE)); pRsp->dnodeCfg.dnodeId = htonl(pDnode->id); pRsp->dnodeCfg.clusterId = htobe64(pMnode->clusterId); mndGetDnodeData(pMnode, &pRsp->dnodeEps, numOfEps); - pMsg->contLen = contLen; - pMsg->pCont = pRsp; + pReq->contLen = contLen; + pReq->pCont = pRsp; } pDnode->lastAccessTime = curMs; @@ -409,7 +425,7 @@ PROCESS_STATUS_MSG_OVER: return code; } -static int32_t mndCreateDnode(SMnode *pMnode, SMnodeMsg *pMsg, SCreateDnodeMsg *pCreate) { +static int32_t mndCreateDnode(SMnode *pMnode, SMnodeMsg *pReq, SCreateDnodeReq *pCreate) { SDnodeObj dnodeObj = {0}; dnodeObj.id = sdbGetMaxId(pMnode->pSdb, SDB_DNODE); dnodeObj.createdTime = taosGetTimestampMs(); @@ -418,7 +434,7 @@ static int32_t mndCreateDnode(SMnode *pMnode, SMnodeMsg *pMsg, SCreateDnodeMsg * memcpy(dnodeObj.fqdn, pCreate->fqdn, TSDB_FQDN_LEN); snprintf(dnodeObj.ep, TSDB_EP_LEN, "%s:%u", dnodeObj.fqdn, dnodeObj.port); - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, &pMsg->rpcMsg); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, &pReq->rpcMsg); if (pTrans == NULL) { mError("dnode:%s, failed to create since %s", dnodeObj.ep, terrstr()); return -1; @@ -443,9 +459,9 @@ static int32_t mndCreateDnode(SMnode *pMnode, SMnodeMsg *pMsg, SCreateDnodeMsg * return 0; } -static int32_t mndProcessCreateDnodeMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SCreateDnodeMsg *pCreate = pMsg->rpcMsg.pCont; +static int32_t mndProcessCreateDnodeReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SCreateDnodeReq *pCreate = pReq->rpcMsg.pCont; pCreate->port = htonl(pCreate->port); mDebug("dnode:%s:%d, start to create", pCreate->fqdn, pCreate->port); @@ -465,7 +481,7 @@ static int32_t mndProcessCreateDnodeMsg(SMnodeMsg *pMsg) { return -1; } - int32_t code = mndCreateDnode(pMnode, pMsg, pCreate); + int32_t code = mndCreateDnode(pMnode, pReq, pCreate); if (code != 0) { mError("dnode:%s:%d, failed to create since %s", pCreate->fqdn, pCreate->port, terrstr()); @@ -475,8 +491,8 @@ static int32_t mndProcessCreateDnodeMsg(SMnodeMsg *pMsg) { return TSDB_CODE_MND_ACTION_IN_PROGRESS; } -static int32_t mndDropDnode(SMnode *pMnode, SMnodeMsg *pMsg, SDnodeObj *pDnode) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, &pMsg->rpcMsg); +static int32_t mndDropDnode(SMnode *pMnode, SMnodeMsg *pReq, SDnodeObj *pDnode) { + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, &pReq->rpcMsg); if (pTrans == NULL) { mError("dnode:%d, failed to drop since %s", pDnode->id, terrstr()); return -1; @@ -501,9 +517,9 @@ static int32_t mndDropDnode(SMnode *pMnode, SMnodeMsg *pMsg, SDnodeObj *pDnode) return 0; } -static int32_t mndProcessDropDnodeMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SDropDnodeMsg *pDrop = pMsg->rpcMsg.pCont; +static int32_t mndProcessDropDnodeReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SDropDnodeReq *pDrop = pReq->rpcMsg.pCont; pDrop->dnodeId = htonl(pDrop->dnodeId); mDebug("dnode:%d, start to drop", pDrop->dnodeId); @@ -521,7 +537,7 @@ static int32_t mndProcessDropDnodeMsg(SMnodeMsg *pMsg) { return -1; } - int32_t code = mndDropDnode(pMnode, pMsg, pDnode); + int32_t code = mndDropDnode(pMnode, pReq, pDnode); if (code != 0) { mndReleaseDnode(pMnode, pDnode); mError("dnode:%d, failed to drop since %s", pDrop->dnodeId, terrstr()); @@ -532,9 +548,9 @@ static int32_t mndProcessDropDnodeMsg(SMnodeMsg *pMsg) { return TSDB_CODE_MND_ACTION_IN_PROGRESS; } -static int32_t mndProcessConfigDnodeMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SCfgDnodeMsg *pCfg = pMsg->rpcMsg.pCont; +static int32_t mndProcessConfigDnodeReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SMCfgDnodeReq *pCfg = pReq->rpcMsg.pCont; pCfg->dnodeId = htonl(pCfg->dnodeId); SDnodeObj *pDnode = mndAcquireDnode(pMnode, pCfg->dnodeId); @@ -547,14 +563,14 @@ static int32_t mndProcessConfigDnodeMsg(SMnodeMsg *pMsg) { SEpSet epSet = mndGetDnodeEpset(pDnode); mndReleaseDnode(pMnode, pDnode); - SCfgDnodeMsg *pCfgDnode = rpcMallocCont(sizeof(SCfgDnodeMsg)); + SDCfgDnodeReq *pCfgDnode = rpcMallocCont(sizeof(SDCfgDnodeReq)); pCfgDnode->dnodeId = htonl(pCfg->dnodeId); memcpy(pCfgDnode->config, pCfg->config, TSDB_DNODE_CONFIG_LEN); SRpcMsg rpcMsg = {.msgType = TDMT_DND_CONFIG_DNODE, .pCont = pCfgDnode, - .contLen = sizeof(SCfgDnodeMsg), - .ahandle = pMsg->rpcMsg.ahandle}; + .contLen = sizeof(SDCfgDnodeReq), + .ahandle = pReq->rpcMsg.ahandle}; mInfo("dnode:%d, app:%p config:%s req send to dnode", pCfg->dnodeId, rpcMsg.ahandle, pCfg->config); mndSendReqToDnode(pMnode, &epSet, &rpcMsg); @@ -562,11 +578,11 @@ static int32_t mndProcessConfigDnodeMsg(SMnodeMsg *pMsg) { return 0; } -static int32_t mndProcessConfigDnodeRsp(SMnodeMsg *pMsg) { - mInfo("app:%p config rsp from dnode", pMsg->rpcMsg.ahandle); +static int32_t mndProcessConfigDnodeRsp(SMnodeMsg *pRsp) { + mInfo("app:%p config rsp from dnode", pRsp->rpcMsg.ahandle); } -static int32_t mndGetConfigMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta) { +static int32_t mndGetConfigMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta) { int32_t cols = 0; SSchema *pSchema = pMeta->pSchema; @@ -597,8 +613,8 @@ static int32_t mndGetConfigMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg return 0; } -static int32_t mndRetrieveConfigs(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndRetrieveConfigs(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows) { + SMnode *pMnode = pReq->pMnode; int32_t numOfRows = 0; char *cfgOpts[TSDB_CONFIG_NUMBER] = {0}; char cfgVals[TSDB_CONFIG_NUMBER][TSDB_CONIIG_VALUE_LEN + 1] = {0}; @@ -640,8 +656,8 @@ static int32_t mndRetrieveConfigs(SMnodeMsg *pMsg, SShowObj *pShow, char *data, static void mndCancelGetNextConfig(SMnode *pMnode, void *pIter) {} -static int32_t mndGetDnodeMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndGetDnodeMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta) { + SMnode *pMnode = pReq->pMnode; SSdb *pSdb = pMnode->pSdb; int32_t cols = 0; @@ -704,8 +720,8 @@ static int32_t mndGetDnodeMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg * return 0; } -static int32_t mndRetrieveDnodes(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndRetrieveDnodes(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows) { + SMnode *pMnode = pReq->pMnode; SSdb *pSdb = pMnode->pSdb; int32_t numOfRows = 0; int32_t cols = 0; diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index df1848f2f1..c14d1f51f8 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -27,14 +27,14 @@ static SSdbRaw *mndMnodeActionEncode(SMnodeObj *pObj); static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw); static int32_t mndMnodeActionInsert(SSdb *pSdb, SMnodeObj *pObj); static int32_t mndMnodeActionDelete(SSdb *pSdb, SMnodeObj *pObj); -static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOldMnode, SMnodeObj *pNewMnode); -static int32_t mndProcessCreateMnodeReq(SMnodeMsg *pMsg); -static int32_t mndProcessDropMnodeReq(SMnodeMsg *pMsg); -static int32_t mndProcessCreateMnodeRsp(SMnodeMsg *pMsg); -static int32_t mndProcessAlterMnodeRsp(SMnodeMsg *pMsg); -static int32_t mndProcessDropMnodeRsp(SMnodeMsg *pMsg); -static int32_t mndGetMnodeMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta); -static int32_t mndRetrieveMnodes(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows); +static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOld, SMnodeObj *pNew); +static int32_t mndProcessCreateMnodeReq(SMnodeMsg *pReq); +static int32_t mndProcessDropMnodeReq(SMnodeMsg *pReq); +static int32_t mndProcessCreateMnodeRsp(SMnodeMsg *pRsp); +static int32_t mndProcessAlterMnodeRsp(SMnodeMsg *pRsp); +static int32_t mndProcessDropMnodeRsp(SMnodeMsg *pRsp); +static int32_t mndGetMnodeMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta); +static int32_t mndRetrieveMnodes(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows); static void mndCancelGetNextMnode(SMnode *pMnode, void *pIter); int32_t mndInitMnode(SMnode *pMnode) { @@ -65,7 +65,7 @@ void mndCleanupMnode(SMnode *pMnode) {} static SMnodeObj *mndAcquireMnode(SMnode *pMnode, int32_t mnodeId) { SSdb *pSdb = pMnode->pSdb; SMnodeObj *pObj = sdbAcquire(pSdb, SDB_MNODE, &mnodeId); - if (pObj == NULL) { + if (pObj == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) { terrno = TSDB_CODE_MND_MNODE_NOT_EXIST; } return pObj; @@ -207,9 +207,9 @@ static int32_t mndMnodeActionDelete(SSdb *pSdb, SMnodeObj *pObj) { return 0; } -static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOldMnode, SMnodeObj *pNewMnode) { - mTrace("mnode:%d, perform update action, old_row:%p new_row:%p", pOldMnode->id, pOldMnode, pNewMnode); - pOldMnode->updateTime = pNewMnode->updateTime; +static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOld, SMnodeObj *pNew) { + mTrace("mnode:%d, perform update action, old_row:%p new_row:%p", pOld->id, pOld, pNew); + pOld->updateTime = pNew->updateTime; return 0; } @@ -277,13 +277,13 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno void *pIter = NULL; int32_t numOfReplicas = 0; - SDCreateMnodeMsg createMsg = {0}; + SDCreateMnodeReq createReq = {0}; while (1) { SMnodeObj *pMObj = NULL; pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj); if (pIter == NULL) break; - SReplica *pReplica = &createMsg.replicas[numOfReplicas]; + SReplica *pReplica = &createReq.replicas[numOfReplicas]; pReplica->id = htonl(pMObj->id); pReplica->port = htons(pMObj->pDnode->port); memcpy(pReplica->fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN); @@ -292,13 +292,13 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno sdbRelease(pSdb, pMObj); } - SReplica *pReplica = &createMsg.replicas[numOfReplicas]; + SReplica *pReplica = &createReq.replicas[numOfReplicas]; pReplica->id = htonl(pDnode->id); pReplica->port = htons(pDnode->port); memcpy(pReplica->fqdn, pDnode->fqdn, TSDB_FQDN_LEN); numOfReplicas++; - createMsg.replica = numOfReplicas; + createReq.replica = numOfReplicas; while (1) { SMnodeObj *pMObj = NULL; @@ -307,22 +307,23 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno STransAction action = {0}; - SDAlterMnodeMsg *pMsg = malloc(sizeof(SDAlterMnodeMsg)); - if (pMsg == NULL) { + SDAlterMnodeReq *pReq = malloc(sizeof(SDAlterMnodeReq)); + if (pReq == NULL) { sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pMObj); return -1; } - memcpy(pMsg, &createMsg, sizeof(SDAlterMnodeMsg)); + memcpy(pReq, &createReq, sizeof(SDAlterMnodeReq)); - pMsg->dnodeId = htonl(pMObj->id); + pReq->dnodeId = htonl(pMObj->id); action.epSet = mndGetDnodeEpset(pMObj->pDnode); - action.pCont = pMsg; - action.contLen = sizeof(SDAlterMnodeMsg); + action.pCont = pReq; + action.contLen = sizeof(SDAlterMnodeReq); action.msgType = TDMT_DND_ALTER_MNODE; + action.acceptableCode = TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED; if (mndTransAppendRedoAction(pTrans, &action) != 0) { - free(pMsg); + free(pReq); sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pMObj); return -1; @@ -335,17 +336,18 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno STransAction action = {0}; action.epSet = mndGetDnodeEpset(pDnode); - SDCreateMnodeMsg *pMsg = malloc(sizeof(SDCreateMnodeMsg)); - if (pMsg == NULL) return -1; - memcpy(pMsg, &createMsg, sizeof(SDAlterMnodeMsg)); - pMsg->dnodeId = htonl(pObj->id); + SDCreateMnodeReq *pReq = malloc(sizeof(SDCreateMnodeReq)); + if (pReq == NULL) return -1; + memcpy(pReq, &createReq, sizeof(SDAlterMnodeReq)); + pReq->dnodeId = htonl(pObj->id); action.epSet = mndGetDnodeEpset(pDnode); - action.pCont = pMsg; - action.contLen = sizeof(SDCreateMnodeMsg); + action.pCont = pReq; + action.contLen = sizeof(SDCreateMnodeReq); action.msgType = TDMT_DND_CREATE_MNODE; + action.acceptableCode = TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED; if (mndTransAppendRedoAction(pTrans, &action) != 0) { - free(pMsg); + free(pReq); return -1; } } @@ -353,39 +355,23 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno return 0; } -static int32_t mndCreateMnode(SMnode *pMnode, SMnodeMsg *pMsg, SDnodeObj *pDnode, SMCreateMnodeMsg *pCreate) { +static int32_t mndCreateMnode(SMnode *pMnode, SMnodeMsg *pReq, SDnodeObj *pDnode, SMCreateMnodeReq *pCreate) { + int32_t code = -1; + SMnodeObj mnodeObj = {0}; mnodeObj.id = pDnode->id; mnodeObj.createdTime = taosGetTimestampMs(); mnodeObj.updateTime = mnodeObj.createdTime; - int32_t code = -1; - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pMsg->rpcMsg); - if (pTrans == NULL) { - mError("mnode:%d, failed to create since %s", pCreate->dnodeId, terrstr()); - goto CREATE_MNODE_OVER; - } + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pReq->rpcMsg); + if (pTrans == NULL) goto CREATE_MNODE_OVER; + mDebug("trans:%d, used to create mnode:%d", pTrans->id, pCreate->dnodeId); + if (mndSetCreateMnodeRedoLogs(pMnode, pTrans, &mnodeObj) != 0) goto CREATE_MNODE_OVER; + if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeObj) != 0) goto CREATE_MNODE_OVER; + if (mndSetCreateMnodeRedoActions(pMnode, pTrans, pDnode, &mnodeObj) != 0) goto CREATE_MNODE_OVER; - if (mndSetCreateMnodeRedoLogs(pMnode, pTrans, &mnodeObj) != 0) { - mError("trans:%d, failed to set redo log since %s", pTrans->id, terrstr()); - goto CREATE_MNODE_OVER; - } - - if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeObj) != 0) { - mError("trans:%d, failed to set commit log since %s", pTrans->id, terrstr()); - goto CREATE_MNODE_OVER; - } - - if (mndSetCreateMnodeRedoActions(pMnode, pTrans, pDnode, &mnodeObj) != 0) { - mError("trans:%d, failed to set redo actions since %s", pTrans->id, terrstr()); - goto CREATE_MNODE_OVER; - } - - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - goto CREATE_MNODE_OVER; - } + if (mndTransPrepare(pMnode, pTrans) != 0) goto CREATE_MNODE_OVER; code = 0; @@ -394,9 +380,9 @@ CREATE_MNODE_OVER: return code; } -static int32_t mndProcessCreateMnodeReq(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SMCreateMnodeMsg *pCreate = pMsg->rpcMsg.pCont; +static int32_t mndProcessCreateMnodeReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SMCreateMnodeReq *pCreate = pReq->rpcMsg.pCont; pCreate->dnodeId = htonl(pCreate->dnodeId); @@ -408,6 +394,9 @@ static int32_t mndProcessCreateMnodeReq(SMnodeMsg *pMsg) { mError("mnode:%d, mnode already exist", pObj->id); terrno = TSDB_CODE_MND_MNODE_ALREADY_EXIST; return -1; + } else if (terrno != TSDB_CODE_MND_MNODE_NOT_EXIST) { + mError("qnode:%d, failed to create mnode since %s", pCreate->dnodeId, terrstr()); + return -1; } SDnodeObj *pDnode = mndAcquireDnode(pMnode, pCreate->dnodeId); @@ -417,7 +406,7 @@ static int32_t mndProcessCreateMnodeReq(SMnodeMsg *pMsg) { return -1; } - int32_t code = mndCreateMnode(pMnode, pMsg, pDnode, pCreate); + int32_t code = mndCreateMnode(pMnode, pReq, pDnode, pCreate); mndReleaseDnode(pMnode, pDnode); if (code != 0) { @@ -449,14 +438,14 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode void *pIter = NULL; int32_t numOfReplicas = 0; - SDAlterMnodeMsg alterMsg = {0}; + SDAlterMnodeReq alterReq = {0}; while (1) { SMnodeObj *pMObj = NULL; pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj); if (pIter == NULL) break; if (pMObj->id != pObj->id) { - SReplica *pReplica = &alterMsg.replicas[numOfReplicas]; + SReplica *pReplica = &alterReq.replicas[numOfReplicas]; pReplica->id = htonl(pMObj->id); pReplica->port = htons(pMObj->pDnode->port); memcpy(pReplica->fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN); @@ -466,7 +455,7 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode sdbRelease(pSdb, pMObj); } - alterMsg.replica = numOfReplicas; + alterReq.replica = numOfReplicas; while (1) { SMnodeObj *pMObj = NULL; @@ -475,22 +464,23 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode if (pMObj->id != pObj->id) { STransAction action = {0}; - SDAlterMnodeMsg *pMsg = malloc(sizeof(SDAlterMnodeMsg)); - if (pMsg == NULL) { + SDAlterMnodeReq *pReq = malloc(sizeof(SDAlterMnodeReq)); + if (pReq == NULL) { sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pMObj); return -1; } - memcpy(pMsg, &alterMsg, sizeof(SDAlterMnodeMsg)); + memcpy(pReq, &alterReq, sizeof(SDAlterMnodeReq)); - pMsg->dnodeId = htonl(pMObj->id); + pReq->dnodeId = htonl(pMObj->id); action.epSet = mndGetDnodeEpset(pMObj->pDnode); - action.pCont = pMsg; - action.contLen = sizeof(SDAlterMnodeMsg); + action.pCont = pReq; + action.contLen = sizeof(SDAlterMnodeReq); action.msgType = TDMT_DND_ALTER_MNODE; + action.acceptableCode = TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED; if (mndTransAppendRedoAction(pTrans, &action) != 0) { - free(pMsg); + free(pReq); sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pMObj); return -1; @@ -504,19 +494,20 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode STransAction action = {0}; action.epSet = mndGetDnodeEpset(pDnode); - SDDropMnodeMsg *pMsg = malloc(sizeof(SDDropMnodeMsg)); - if (pMsg == NULL) { + SDDropMnodeReq *pReq = malloc(sizeof(SDDropMnodeReq)); + if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - pMsg->dnodeId = htonl(pObj->id); + pReq->dnodeId = htonl(pObj->id); action.epSet = mndGetDnodeEpset(pDnode); - action.pCont = pMsg; - action.contLen = sizeof(SDDropMnodeMsg); + action.pCont = pReq; + action.contLen = sizeof(SDDropMnodeReq); action.msgType = TDMT_DND_DROP_MNODE; + action.acceptableCode = TSDB_CODE_DND_MNODE_NOT_DEPLOYED; if (mndTransAppendRedoAction(pTrans, &action) != 0) { - free(pMsg); + free(pReq); return -1; } } @@ -524,35 +515,18 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode return 0; } -static int32_t mndDropMnode(SMnode *pMnode, SMnodeMsg *pMsg, SMnodeObj *pObj) { +static int32_t mndDropMnode(SMnode *pMnode, SMnodeMsg *pReq, SMnodeObj *pObj) { int32_t code = -1; - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pMsg->rpcMsg); - if (pTrans == NULL) { - mError("mnode:%d, failed to drop since %s", pObj->id, terrstr()); - goto DROP_MNODE_OVER; - } + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pReq->rpcMsg); + if (pTrans == NULL) goto DROP_MNODE_OVER; mDebug("trans:%d, used to drop mnode:%d", pTrans->id, pObj->id); - if (mndSetDropMnodeRedoLogs(pMnode, pTrans, pObj) != 0) { - mError("trans:%d, failed to set redo log since %s", pTrans->id, terrstr()); - goto DROP_MNODE_OVER; - } - - if (mndSetDropMnodeCommitLogs(pMnode, pTrans, pObj) != 0) { - mError("trans:%d, failed to set commit log since %s", pTrans->id, terrstr()); - goto DROP_MNODE_OVER; - } - - if (mndSetDropMnodeRedoActions(pMnode, pTrans, pObj->pDnode, pObj) != 0) { - mError("trans:%d, failed to set redo actions since %s", pTrans->id, terrstr()); - goto DROP_MNODE_OVER; - } - - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - goto DROP_MNODE_OVER; - } + if (mndSetDropMnodeRedoLogs(pMnode, pTrans, pObj) != 0) goto DROP_MNODE_OVER; + if (mndSetDropMnodeCommitLogs(pMnode, pTrans, pObj) != 0) goto DROP_MNODE_OVER; + if (mndSetDropMnodeRedoActions(pMnode, pTrans, pObj->pDnode, pObj) != 0) goto DROP_MNODE_OVER; + if (mndTransPrepare(pMnode, pTrans) != 0) goto DROP_MNODE_OVER; code = 0; @@ -561,9 +535,9 @@ DROP_MNODE_OVER: return code; } -static int32_t mndProcessDropMnodeReq(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SMDropMnodeMsg *pDrop = pMsg->rpcMsg.pCont; +static int32_t mndProcessDropMnodeReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SMDropMnodeReq *pDrop = pReq->rpcMsg.pCont; pDrop->dnodeId = htonl(pDrop->dnodeId); mDebug("mnode:%d, start to drop", pDrop->dnodeId); @@ -577,12 +551,10 @@ static int32_t mndProcessDropMnodeReq(SMnodeMsg *pMsg) { SMnodeObj *pObj = mndAcquireMnode(pMnode, pDrop->dnodeId); if (pObj == NULL) { mError("mnode:%d, not exist", pDrop->dnodeId); - terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; return -1; } - int32_t code = mndDropMnode(pMnode, pMsg, pObj); - + int32_t code = mndDropMnode(pMnode, pReq, pObj); if (code != 0) { mError("mnode:%d, failed to drop since %s", pMnode->dnodeId, terrstr()); return -1; @@ -592,23 +564,23 @@ static int32_t mndProcessDropMnodeReq(SMnodeMsg *pMsg) { return TSDB_CODE_MND_ACTION_IN_PROGRESS; } -static int32_t mndProcessCreateMnodeRsp(SMnodeMsg *pMsg) { - mndTransProcessRsp(pMsg); +static int32_t mndProcessCreateMnodeRsp(SMnodeMsg *pRsp) { + mndTransProcessRsp(pRsp); return 0; } -static int32_t mndProcessAlterMnodeRsp(SMnodeMsg *pMsg) { - mndTransProcessRsp(pMsg); +static int32_t mndProcessAlterMnodeRsp(SMnodeMsg *pRsp) { + mndTransProcessRsp(pRsp); return 0; } -static int32_t mndProcessDropMnodeRsp(SMnodeMsg *pMsg) { - mndTransProcessRsp(pMsg); +static int32_t mndProcessDropMnodeRsp(SMnodeMsg *pRsp) { + mndTransProcessRsp(pRsp); return 0; } -static int32_t mndGetMnodeMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndGetMnodeMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta) { + SMnode *pMnode = pReq->pMnode; SSdb *pSdb = pMnode->pSdb; int32_t cols = 0; @@ -660,8 +632,8 @@ static int32_t mndGetMnodeMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg * return 0; } -static int32_t mndRetrieveMnodes(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndRetrieveMnodes(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows) { + SMnode *pMnode = pReq->pMnode; SSdb *pSdb = pMnode->pSdb; int32_t numOfRows = 0; int32_t cols = 0; diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c index 77efeb8481..fcc2eec028 100644 --- a/source/dnode/mnode/impl/src/mndProfile.c +++ b/source/dnode/mnode/impl/src/mndProfile.c @@ -47,14 +47,14 @@ static SConnObj *mndAcquireConn(SMnode *pMnode, int32_t connId); static void mndReleaseConn(SMnode *pMnode, SConnObj *pConn); static void *mndGetNextConn(SMnode *pMnode, void *pIter, SConnObj **pConn); static void mndCancelGetNextConn(SMnode *pMnode, void *pIter); -static int32_t mndProcessHeartBeatMsg(SMnodeMsg *pMsg); -static int32_t mndProcessConnectMsg(SMnodeMsg *pMsg); -static int32_t mndProcessKillQueryMsg(SMnodeMsg *pMsg); -static int32_t mndProcessKillConnectionMsg(SMnodeMsg *pMsg); -static int32_t mndGetConnsMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta); -static int32_t mndRetrieveConns(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows); -static int32_t mndGetQueryMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta); -static int32_t mndRetrieveQueries(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows); +static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq); +static int32_t mndProcessConnectReq(SMnodeMsg *pReq); +static int32_t mndProcessKillQueryReq(SMnodeMsg *pReq); +static int32_t mndProcessKillConnReq(SMnodeMsg *pReq); +static int32_t mndGetConnsMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta); +static int32_t mndRetrieveConns(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows); +static int32_t mndGetQueryMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta); +static int32_t mndRetrieveQueries(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows); static void mndCancelGetNextQuery(SMnode *pMnode, void *pIter); int32_t mndInitProfile(SMnode *pMnode) { @@ -68,10 +68,10 @@ int32_t mndInitProfile(SMnode *pMnode) { return -1; } - mndSetMsgHandle(pMnode, TDMT_MND_HEARTBEAT, mndProcessHeartBeatMsg); - mndSetMsgHandle(pMnode, TDMT_MND_CONNECT, mndProcessConnectMsg); - mndSetMsgHandle(pMnode, TDMT_MND_KILL_QUERY, mndProcessKillQueryMsg); - mndSetMsgHandle(pMnode, TDMT_MND_KILL_CONN, mndProcessKillConnectionMsg); + mndSetMsgHandle(pMnode, TDMT_MND_HEARTBEAT, mndProcessHeartBeatReq); + mndSetMsgHandle(pMnode, TDMT_MND_CONNECT, mndProcessConnectReq); + mndSetMsgHandle(pMnode, TDMT_MND_KILL_QUERY, mndProcessKillQueryReq); + mndSetMsgHandle(pMnode, TDMT_MND_KILL_CONN, mndProcessKillConnReq); mndAddShowMetaHandle(pMnode, TSDB_MGMT_TABLE_CONNS, mndGetConnsMeta); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_CONNS, mndRetrieveConns); @@ -178,35 +178,35 @@ static void mndCancelGetNextConn(SMnode *pMnode, void *pIter) { taosHashCancelIterate(pMgmt->cache->pHashTable, pIter); } -static int32_t mndProcessConnectMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; - SConnectMsg *pReq = pMsg->rpcMsg.pCont; - pReq->pid = htonl(pReq->pid); - pReq->startTime = htobe64(pReq->startTime); +static int32_t mndProcessConnectReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; + SConnectReq *pConnReq = pReq->rpcMsg.pCont; + pConnReq->pid = htonl(pConnReq->pid); + pConnReq->startTime = htobe64(pConnReq->startTime); SRpcConnInfo info = {0}; - if (rpcGetConnInfo(pMsg->rpcMsg.handle, &info) != 0) { - mError("user:%s, failed to login while get connection info since %s", pMsg->user, terrstr()); + if (rpcGetConnInfo(pReq->rpcMsg.handle, &info) != 0) { + mError("user:%s, failed to login while get connection info since %s", pReq->user, terrstr()); return -1; } char ip[30]; taosIp2String(info.clientIp, ip); - if (pReq->db[0]) { - snprintf(pMsg->db, TSDB_DB_FNAME_LEN, "%d%s%s", pMsg->acctId, TS_PATH_DELIMITER, pReq->db); - SDbObj *pDb = mndAcquireDb(pMnode, pMsg->db); + if (pConnReq->db[0]) { + snprintf(pReq->db, TSDB_DB_FNAME_LEN, "%d%s%s", pReq->acctId, TS_PATH_DELIMITER, pConnReq->db); + SDbObj *pDb = mndAcquireDb(pMnode, pReq->db); if (pDb == NULL) { terrno = TSDB_CODE_MND_INVALID_DB; - mError("user:%s, failed to login from %s while use db:%s since %s", pMsg->user, ip, pReq->db, terrstr()); + mError("user:%s, failed to login from %s while use db:%s since %s", pReq->user, ip, pConnReq->db, terrstr()); return -1; } mndReleaseDb(pMnode, pDb); } - SConnObj *pConn = mndCreateConn(pMnode, &info, pReq->pid, pReq->app, pReq->startTime); + SConnObj *pConn = mndCreateConn(pMnode, &info, pConnReq->pid, pConnReq->app, pConnReq->startTime); if (pConn == NULL) { - mError("user:%s, failed to login from %s while create connection since %s", pMsg->user, ip, terrstr()); + mError("user:%s, failed to login from %s while create connection since %s", pReq->user, ip, terrstr()); return -1; } @@ -214,11 +214,11 @@ static int32_t mndProcessConnectMsg(SMnodeMsg *pMsg) { if (pRsp == NULL) { mndReleaseConn(pMnode, pConn); terrno = TSDB_CODE_OUT_OF_MEMORY; - mError("user:%s, failed to login from %s while create rsp since %s", pMsg->user, ip, terrstr()); + mError("user:%s, failed to login from %s while create rsp since %s", pReq->user, ip, terrstr()); return -1; } - SUserObj *pUser = mndAcquireUser(pMnode, pMsg->user); + SUserObj *pUser = mndAcquireUser(pMnode, pReq->user); if (pUser != NULL) { pRsp->acctId = htonl(pUser->acctId); pRsp->superUser = pUser->superUser; @@ -230,16 +230,16 @@ static int32_t mndProcessConnectMsg(SMnodeMsg *pMsg) { mndGetMnodeEpSet(pMnode, &pRsp->epSet); mndReleaseConn(pMnode, pConn); - pMsg->contLen = sizeof(SConnectRsp); - pMsg->pCont = pRsp; + pReq->contLen = sizeof(SConnectRsp); + pReq->pCont = pRsp; - mDebug("user:%s, login from %s, conn:%d, app:%s", info.user, ip, pConn->id, pReq->app); + mDebug("user:%s, login from %s, conn:%d, app:%s", info.user, ip, pConn->id, pConnReq->app); return 0; } -static int32_t mndSaveQueryStreamList(SConnObj *pConn, SHeartBeatMsg *pMsg) { +static int32_t mndSaveQueryStreamList(SConnObj *pConn, SHeartBeatReq *pReq) { pConn->numOfQueries = 0; - int32_t numOfQueries = htonl(pMsg->numOfQueries); + int32_t numOfQueries = htonl(pReq->numOfQueries); if (numOfQueries > 0) { if (pConn->pQueries == NULL) { @@ -250,38 +250,38 @@ static int32_t mndSaveQueryStreamList(SConnObj *pConn, SHeartBeatMsg *pMsg) { int32_t saveSize = pConn->numOfQueries * sizeof(SQueryDesc); if (saveSize > 0 && pConn->pQueries != NULL) { - memcpy(pConn->pQueries, pMsg->pData, saveSize); + memcpy(pConn->pQueries, pReq->pData, saveSize); } } return TSDB_CODE_SUCCESS; } -static int32_t mndProcessHeartBeatMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; SProfileMgmt *pMgmt = &pMnode->profileMgmt; - SHeartBeatMsg *pReq = pMsg->rpcMsg.pCont; - pReq->connId = htonl(pReq->connId); - pReq->pid = htonl(pReq->pid); + SHeartBeatReq *pHeartbeat = pReq->rpcMsg.pCont; + pHeartbeat->connId = htonl(pHeartbeat->connId); + pHeartbeat->pid = htonl(pHeartbeat->pid); SRpcConnInfo info = {0}; - if (rpcGetConnInfo(pMsg->rpcMsg.handle, &info) != 0) { - mError("user:%s, connId:%d failed to process hb since %s", pMsg->user, pReq->connId, terrstr()); + if (rpcGetConnInfo(pReq->rpcMsg.handle, &info) != 0) { + mError("user:%s, connId:%d failed to process hb since %s", pReq->user, pHeartbeat->connId, terrstr()); return -1; } - SConnObj *pConn = mndAcquireConn(pMnode, pReq->connId); + SConnObj *pConn = mndAcquireConn(pMnode, pHeartbeat->connId); if (pConn == NULL) { - pConn = mndCreateConn(pMnode, &info, pReq->pid, pReq->app, 0); + pConn = mndCreateConn(pMnode, &info, pHeartbeat->pid, pHeartbeat->app, 0); if (pConn == NULL) { - mError("user:%s, conn:%d is freed and failed to create new conn since %s", pMsg->user, pReq->connId, terrstr()); + mError("user:%s, conn:%d is freed and failed to create new since %s", pReq->user, pHeartbeat->connId, terrstr()); return -1; } else { - mDebug("user:%s, conn:%d is freed and create a new conn:%d", pMsg->user, pReq->connId, pConn->id); + mDebug("user:%s, conn:%d is freed and create a new conn:%d", pReq->user, pHeartbeat->connId, pConn->id); } } else if (pConn->killed) { - mError("user:%s, conn:%d is already killed", pMsg->user, pConn->id); + mError("user:%s, conn:%d is already killed", pReq->user, pConn->id); terrno = TSDB_CODE_MND_INVALID_CONNECTION; return -1; } else { @@ -304,11 +304,11 @@ static int32_t mndProcessHeartBeatMsg(SMnodeMsg *pMsg) { if (pRsp == NULL) { mndReleaseConn(pMnode, pConn); terrno = TSDB_CODE_OUT_OF_MEMORY; - mError("user:%s, conn:%d failed to process hb while create rsp since %s", pMsg->user, pReq->connId, terrstr()); + mError("user:%s, conn:%d failed to process hb while since %s", pReq->user, pHeartbeat->connId, terrstr()); return -1; } - mndSaveQueryStreamList(pConn, pReq); + mndSaveQueryStreamList(pConn, pHeartbeat); if (pConn->killed != 0) { pRsp->killConnection = 1; } @@ -324,16 +324,16 @@ static int32_t mndProcessHeartBeatMsg(SMnodeMsg *pMsg) { mndGetMnodeEpSet(pMnode, &pRsp->epSet); mndReleaseConn(pMnode, pConn); - pMsg->contLen = sizeof(SConnectRsp); - pMsg->pCont = pRsp; + pReq->contLen = sizeof(SConnectRsp); + pReq->pCont = pRsp; return 0; } -static int32_t mndProcessKillQueryMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndProcessKillQueryReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; SProfileMgmt *pMgmt = &pMnode->profileMgmt; - SUserObj *pUser = mndAcquireUser(pMnode, pMsg->user); + SUserObj *pUser = mndAcquireUser(pMnode, pReq->user); if (pUser == NULL) return 0; if (!pUser->superUser) { mndReleaseUser(pMnode, pUser); @@ -342,7 +342,7 @@ static int32_t mndProcessKillQueryMsg(SMnodeMsg *pMsg) { } mndReleaseUser(pMnode, pUser); - SKillQueryMsg *pKill = pMsg->rpcMsg.pCont; + SKillQueryReq *pKill = pReq->rpcMsg.pCont; int32_t connId = htonl(pKill->connId); int32_t queryId = htonl(pKill->queryId); mInfo("kill query msg is received, queryId:%d", pKill->queryId); @@ -353,18 +353,18 @@ static int32_t mndProcessKillQueryMsg(SMnodeMsg *pMsg) { terrno = TSDB_CODE_MND_INVALID_CONN_ID; return -1; } else { - mInfo("connId:%d, queryId:%d is killed by user:%s", connId, queryId, pMsg->user); + mInfo("connId:%d, queryId:%d is killed by user:%s", connId, queryId, pReq->user); pConn->queryId = queryId; taosCacheRelease(pMgmt->cache, (void **)&pConn, false); return 0; } } -static int32_t mndProcessKillConnectionMsg(SMnodeMsg *pMsg) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndProcessKillConnReq(SMnodeMsg *pReq) { + SMnode *pMnode = pReq->pMnode; SProfileMgmt *pMgmt = &pMnode->profileMgmt; - SUserObj *pUser = mndAcquireUser(pMnode, pMsg->user); + SUserObj *pUser = mndAcquireUser(pMnode, pReq->user); if (pUser == NULL) return 0; if (!pUser->superUser) { mndReleaseUser(pMnode, pUser); @@ -373,7 +373,7 @@ static int32_t mndProcessKillConnectionMsg(SMnodeMsg *pMsg) { } mndReleaseUser(pMnode, pUser); - SKillConnMsg *pKill = pMsg->rpcMsg.pCont; + SKillConnReq *pKill = pReq->rpcMsg.pCont; int32_t connId = htonl(pKill->connId); SConnObj *pConn = taosCacheAcquireByKey(pMgmt->cache, &connId, sizeof(int32_t)); @@ -382,18 +382,18 @@ static int32_t mndProcessKillConnectionMsg(SMnodeMsg *pMsg) { terrno = TSDB_CODE_MND_INVALID_CONN_ID; return -1; } else { - mInfo("connId:%d, is killed by user:%s", connId, pMsg->user); + mInfo("connId:%d, is killed by user:%s", connId, pReq->user); pConn->killed = 1; taosCacheRelease(pMgmt->cache, (void **)&pConn, false); return TSDB_CODE_SUCCESS; } } -static int32_t mndGetConnsMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndGetConnsMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta) { + SMnode *pMnode = pReq->pMnode; SProfileMgmt *pMgmt = &pMnode->profileMgmt; - SUserObj *pUser = mndAcquireUser(pMnode, pMsg->user); + SUserObj *pUser = mndAcquireUser(pMnode, pReq->user); if (pUser == NULL) return 0; if (!pUser->superUser) { mndReleaseUser(pMnode, pUser); @@ -464,8 +464,8 @@ static int32_t mndGetConnsMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg * return 0; } -static int32_t mndRetrieveConns(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndRetrieveConns(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows) { + SMnode *pMnode = pReq->pMnode; int32_t numOfRows = 0; SConnObj *pConn = NULL; int32_t cols = 0; @@ -518,11 +518,11 @@ static int32_t mndRetrieveConns(SMnodeMsg *pMsg, SShowObj *pShow, char *data, in return numOfRows; } -static int32_t mndGetQueryMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg *pMeta) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndGetQueryMeta(SMnodeMsg *pReq, SShowObj *pShow, STableMetaMsg *pMeta) { + SMnode *pMnode = pReq->pMnode; SProfileMgmt *pMgmt = &pMnode->profileMgmt; - SUserObj *pUser = mndAcquireUser(pMnode, pMsg->user); + SUserObj *pUser = mndAcquireUser(pMnode, pReq->user); if (pUser == NULL) return 0; if (!pUser->superUser) { mndReleaseUser(pMnode, pUser); @@ -633,8 +633,8 @@ static int32_t mndGetQueryMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaMsg * return 0; } -static int32_t mndRetrieveQueries(SMnodeMsg *pMsg, SShowObj *pShow, char *data, int32_t rows) { - SMnode *pMnode = pMsg->pMnode; +static int32_t mndRetrieveQueries(SMnodeMsg *pReq, SShowObj *pShow, char *data, int32_t rows) { + SMnode *pMnode = pReq->pMnode; int32_t numOfRows = 0; SConnObj *pConn = NULL; int32_t cols = 0; diff --git a/source/dnode/mnode/impl/src/mndShow.c b/source/dnode/mnode/impl/src/mndShow.c index 9c145f6445..28fe0551c2 100644 --- a/source/dnode/mnode/impl/src/mndShow.c +++ b/source/dnode/mnode/impl/src/mndShow.c @@ -124,20 +124,20 @@ static int32_t mndProcessShowReq(SMnodeMsg *pReq) { if (type <= TSDB_MGMT_TABLE_START || type >= TSDB_MGMT_TABLE_MAX) { terrno = TSDB_CODE_MND_INVALID_MSG_TYPE; - mError("failed to process show msg since %s", terrstr()); + mError("failed to process show-meta req since %s", terrstr()); return -1; } ShowMetaFp metaFp = pMgmt->metaFps[type]; if (metaFp == NULL) { terrno = TSDB_CODE_MND_INVALID_MSG_TYPE; - mError("failed to process show-meta msg:%s since %s", mndShowStr(type), terrstr()); + mError("failed to process show-meta req:%s since %s", mndShowStr(type), terrstr()); return -1; } SShowObj *pShow = mndCreateShowObj(pMnode, pShowReq); if (pShow == NULL) { - mError("failed to process show-meta msg:%s since %s", mndShowStr(type), terrstr()); + mError("failed to process show-meta req:%s since %s", mndShowStr(type), terrstr()); return -1; } @@ -146,7 +146,7 @@ static int32_t mndProcessShowReq(SMnodeMsg *pReq) { if (pRsp == NULL) { mndReleaseShowObj(pShow, true); terrno = TSDB_CODE_OUT_OF_MEMORY; - mError("show:0x%" PRIx64 ", failed to process show-meta msg:%s since malloc rsp error", pShow->id, + mError("show:0x%" PRIx64 ", failed to process show-meta req:%s since malloc rsp error", pShow->id, mndShowStr(type)); return -1; } @@ -181,7 +181,7 @@ static int32_t mndProcessRetrieveReq(SMnodeMsg *pReq) { SShowObj *pShow = mndAcquireShowObj(pMnode, showId); if (pShow == NULL) { terrno = TSDB_CODE_MND_INVALID_SHOWOBJ; - mError("failed to process show-retrieve msg:%p since %s", pShow, terrstr()); + mError("failed to process show-retrieve req:%p since %s", pShow, terrstr()); return -1; } diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 2609faa41a..d6b6a07de0 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -769,7 +769,8 @@ static int32_t mndProcessStbMetaMsg(SMnodeMsg *pMsg) { pMeta->tableType = TSDB_SUPER_TABLE; pMeta->update = pDb->cfg.update; pMeta->sversion = htonl(pStb->version); - pMeta->suid = htonl(pStb->uid); + pMeta->suid = htobe64(pStb->uid); + pMeta->tuid = htobe64(pStb->uid); for (int32_t i = 0; i < totalCols; ++i) { SSchema *pSchema = &pMeta->pSchema[i]; diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index e55da73f62..581b57ea65 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -86,16 +86,16 @@ static int32_t mndRestoreWal(SMnode *pMnode) { mndTransPullup(pMnode); - if (walBeginSnapshot(pWal, sdbVer) < 0) { - goto WAL_RESTORE_OVER; - } - if (sdbVer != lastSdbVer) { mInfo("sdb restored from %" PRId64 " to %" PRId64 ", write file", lastSdbVer, sdbVer); if (sdbWriteFile(pSdb) != 0) { goto WAL_RESTORE_OVER; } + if (walBeginSnapshot(pWal, sdbVer) < 0) { + goto WAL_RESTORE_OVER; + } + if (walEndSnapshot(pWal) < 0) { goto WAL_RESTORE_OVER; } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index bd17c6d150..e9d35a6e4c 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -189,8 +189,8 @@ void mndReleaseVgroup(SMnode *pMnode, SVgObj *pVgroup) { sdbRelease(pSdb, pVgroup); } -SCreateVnodeMsg *mndBuildCreateVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup) { - SCreateVnodeMsg *pCreate = calloc(1, sizeof(SCreateVnodeMsg)); +SCreateVnodeReq *mndBuildCreateVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup) { + SCreateVnodeReq *pCreate = calloc(1, sizeof(SCreateVnodeReq)); if (pCreate == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -248,8 +248,8 @@ SCreateVnodeMsg *mndBuildCreateVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbOb return pCreate; } -SDropVnodeMsg *mndBuildDropVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup) { - SDropVnodeMsg *pDrop = calloc(1, sizeof(SDropVnodeMsg)); +SDropVnodeReq *mndBuildDropVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup) { + SDropVnodeReq *pDrop = calloc(1, sizeof(SDropVnodeReq)); if (pDrop == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; diff --git a/source/dnode/mnode/impl/test/CMakeLists.txt b/source/dnode/mnode/impl/test/CMakeLists.txt index 6cdc43344d..3ca35d58a7 100644 --- a/source/dnode/mnode/impl/test/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/CMakeLists.txt @@ -7,3 +7,8 @@ add_subdirectory(qnode) add_subdirectory(snode) add_subdirectory(bnode) add_subdirectory(show) +add_subdirectory(profile) +add_subdirectory(dnode) +add_subdirectory(mnode) +add_subdirectory(db) +add_subdirectory(stb) diff --git a/source/dnode/mnode/impl/test/bnode/bnode.cpp b/source/dnode/mnode/impl/test/bnode/bnode.cpp index c33776fd1b..0b54a9bf4a 100644 --- a/source/dnode/mnode/impl/test/bnode/bnode.cpp +++ b/source/dnode/mnode/impl/test/bnode/bnode.cpp @@ -96,9 +96,9 @@ TEST_F(MndTestBnode, 02_Create_Bnode) { TEST_F(MndTestBnode, 03_Drop_Bnode) { { - int32_t contLen = sizeof(SCreateDnodeMsg); + int32_t contLen = sizeof(SCreateDnodeReq); - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); strcpy(pReq->fqdn, "localhost"); pReq->port = htonl(9019); diff --git a/source/dnode/mnode/impl/test/db/CMakeLists.txt b/source/dnode/mnode/impl/test/db/CMakeLists.txt new file mode 100644 index 0000000000..f0abdf152c --- /dev/null +++ b/source/dnode/mnode/impl/test/db/CMakeLists.txt @@ -0,0 +1,11 @@ +aux_source_directory(. DB_SRC) +add_executable(mnode_test_db ${DB_SRC}) +target_link_libraries( + mnode_test_db + PUBLIC sut +) + +add_test( + NAME mnode_test_db + COMMAND mnode_test_db +) diff --git a/source/dnode/mgmt/impl/test/db/db.cpp b/source/dnode/mnode/impl/test/db/db.cpp similarity index 95% rename from source/dnode/mgmt/impl/test/db/db.cpp rename to source/dnode/mnode/impl/test/db/db.cpp index 3a69ae2305..42cf753c7c 100644 --- a/source/dnode/mgmt/impl/test/db/db.cpp +++ b/source/dnode/mnode/impl/test/db/db.cpp @@ -1,19 +1,19 @@ /** * @file db.cpp * @author slguan (slguan@taosdata.com) - * @brief DNODE module db-msg tests - * @version 0.1 - * @date 2021-12-15 + * @brief MNODE module db tests + * @version 1.0 + * @date 2022-01-11 * - * @copyright Copyright (c) 2021 + * @copyright Copyright (c) 2022 * */ #include "sut.h" -class DndTestDb : public ::testing::Test { +class MndTestDb : public ::testing::Test { protected: - static void SetUpTestSuite() { test.Init("/tmp/dnode_test_db", 9040); } + static void SetUpTestSuite() { test.Init("/tmp/mnode_test_db", 9030); } static void TearDownTestSuite() { test.Cleanup(); } static Testbase test; @@ -23,9 +23,9 @@ class DndTestDb : public ::testing::Test { void TearDown() override {} }; -Testbase DndTestDb::test; +Testbase MndTestDb::test; -TEST_F(DndTestDb, 01_ShowDb) { +TEST_F(MndTestDb, 01_ShowDb) { test.SendShowMetaReq(TSDB_MGMT_TABLE_DB, ""); CHECK_META("show databases", 18); CHECK_SCHEMA(0, TSDB_DATA_TYPE_BINARY, TSDB_DB_NAME_LEN - 1 + VARSTR_HEADER_SIZE, "name"); @@ -51,7 +51,7 @@ TEST_F(DndTestDb, 01_ShowDb) { EXPECT_EQ(test.GetShowRows(), 0); } -TEST_F(DndTestDb, 02_Create_Alter_Drop_Db) { +TEST_F(MndTestDb, 02_Create_Alter_Drop_Db) { { int32_t contLen = sizeof(SCreateDbMsg); @@ -211,7 +211,7 @@ TEST_F(DndTestDb, 02_Create_Alter_Drop_Db) { EXPECT_EQ(test.GetShowRows(), 0); } -TEST_F(DndTestDb, 03_Create_Use_Restart_Use_Db) { +TEST_F(MndTestDb, 03_Create_Use_Restart_Use_Db) { { int32_t contLen = sizeof(SCreateDbMsg); @@ -281,7 +281,7 @@ TEST_F(DndTestDb, 03_Create_Use_Restart_Use_Db) { EXPECT_EQ(pInfo->numOfEps, 1); SEpAddrMsg* pAddr = &pInfo->epAddr[0]; pAddr->port = htons(pAddr->port); - EXPECT_EQ(pAddr->port, 9040); + EXPECT_EQ(pAddr->port, 9030); EXPECT_STREQ(pAddr->fqdn, "localhost"); } @@ -297,7 +297,7 @@ TEST_F(DndTestDb, 03_Create_Use_Restart_Use_Db) { EXPECT_EQ(pInfo->numOfEps, 1); SEpAddrMsg* pAddr = &pInfo->epAddr[0]; pAddr->port = htons(pAddr->port); - EXPECT_EQ(pAddr->port, 9040); + EXPECT_EQ(pAddr->port, 9030); EXPECT_STREQ(pAddr->fqdn, "localhost"); } } diff --git a/source/dnode/mnode/impl/test/dnode/CMakeLists.txt b/source/dnode/mnode/impl/test/dnode/CMakeLists.txt new file mode 100644 index 0000000000..e29c5e8f3d --- /dev/null +++ b/source/dnode/mnode/impl/test/dnode/CMakeLists.txt @@ -0,0 +1,11 @@ +aux_source_directory(. DTEST_SRC) +add_executable(mnode_test_dnode ${DTEST_SRC}) +target_link_libraries( + mnode_test_dnode + PUBLIC sut +) + +add_test( + NAME mnode_test_dnode + COMMAND mnode_test_dnode +) diff --git a/source/dnode/mnode/impl/test/dnode/dnode.cpp b/source/dnode/mnode/impl/test/dnode/dnode.cpp new file mode 100644 index 0000000000..1c0cfb7bfc --- /dev/null +++ b/source/dnode/mnode/impl/test/dnode/dnode.cpp @@ -0,0 +1,350 @@ +/** + * @file dnode.cpp + * @author slguan (slguan@taosdata.com) + * @brief MNODE module dnode tests + * @version 1.0 + * @date 2022-01-06 + * + * @copyright Copyright (c) 2022 + * + */ + +#include "sut.h" + +class MndTestDnode : public ::testing::Test { + public: + void SetUp() override {} + void TearDown() override {} + + public: + static void SetUpTestSuite() { + test.Init("/tmp/dnode_test_dnode1", 9023); + const char* fqdn = "localhost"; + const char* firstEp = "localhost:9023"; + + server2.Start("/tmp/dnode_test_dnode2", fqdn, 9024, firstEp); + server3.Start("/tmp/dnode_test_dnode3", fqdn, 9025, firstEp); + server4.Start("/tmp/dnode_test_dnode4", fqdn, 9026, firstEp); + server5.Start("/tmp/dnode_test_dnode5", fqdn, 9027, firstEp); + taosMsleep(300); + } + + static void TearDownTestSuite() { + server2.Stop(); + server3.Stop(); + server4.Stop(); + server5.Stop(); + test.Cleanup(); + } + + static Testbase test; + static TestServer server2; + static TestServer server3; + static TestServer server4; + static TestServer server5; +}; + +Testbase MndTestDnode::test; +TestServer MndTestDnode::server2; +TestServer MndTestDnode::server3; +TestServer MndTestDnode::server4; +TestServer MndTestDnode::server5; + +TEST_F(MndTestDnode, 01_ShowDnode) { + test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); + CHECK_META("show dnodes", 7); + + CHECK_SCHEMA(0, TSDB_DATA_TYPE_SMALLINT, 2, "id"); + CHECK_SCHEMA(1, TSDB_DATA_TYPE_BINARY, TSDB_EP_LEN + VARSTR_HEADER_SIZE, "endpoint"); + CHECK_SCHEMA(2, TSDB_DATA_TYPE_SMALLINT, 2, "vnodes"); + CHECK_SCHEMA(3, TSDB_DATA_TYPE_SMALLINT, 2, "support_vnodes"); + CHECK_SCHEMA(4, TSDB_DATA_TYPE_BINARY, 10 + VARSTR_HEADER_SIZE, "status"); + CHECK_SCHEMA(5, TSDB_DATA_TYPE_TIMESTAMP, 8, "create_time"); + CHECK_SCHEMA(6, TSDB_DATA_TYPE_BINARY, 24 + VARSTR_HEADER_SIZE, "offline_reason"); + + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + + CheckInt16(1); + CheckBinary("localhost:9023", TSDB_EP_LEN); + CheckInt16(0); + CheckInt16(16); + CheckBinary("ready", 10); + CheckTimestamp(); + CheckBinary("", 24); +} + +TEST_F(MndTestDnode, 02_ConfigDnode) { + int32_t contLen = sizeof(SMCfgDnodeReq); + + SMCfgDnodeReq* pReq = (SMCfgDnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + strcpy(pReq->config, "ddebugflag 131"); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CONFIG_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); +} + +TEST_F(MndTestDnode, 03_Create_Dnode) { + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, ""); + pReq->port = htonl(9024); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DNODE_EP); + } + + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(-1); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DNODE_EP); + } + + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(123456); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DNODE_EP); + } + + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(9024); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(9024); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DNODE_ALREADY_EXIST); + } + + taosMsleep(1300); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); + CHECK_META("show dnodes", 7); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 2); + + CheckInt16(1); + CheckInt16(2); + CheckBinary("localhost:9023", TSDB_EP_LEN); + CheckBinary("localhost:9024", TSDB_EP_LEN); + CheckInt16(0); + CheckInt16(0); + CheckInt16(16); + CheckInt16(16); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckTimestamp(); + CheckTimestamp(); + CheckBinary("", 24); + CheckBinary("", 24); +} + +TEST_F(MndTestDnode, 04_Drop_Dnode) { + { + int32_t contLen = sizeof(SDropDnodeReq); + + SDropDnodeReq* pReq = (SDropDnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(-3); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DNODE_ID); + } + + { + int32_t contLen = sizeof(SDropDnodeReq); + + SDropDnodeReq* pReq = (SDropDnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(5); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DNODE_NOT_EXIST); + } + + { + int32_t contLen = sizeof(SDropDnodeReq); + + SDropDnodeReq* pReq = (SDropDnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + int32_t contLen = sizeof(SDropDnodeReq); + + SDropDnodeReq* pReq = (SDropDnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DNODE_NOT_EXIST); + } + + test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); + CHECK_META("show dnodes", 7); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + + CheckInt16(1); + CheckBinary("localhost:9023", TSDB_EP_LEN); + CheckInt16(0); + CheckInt16(16); + CheckBinary("ready", 10); + CheckTimestamp(); + CheckBinary("", 24); + + taosMsleep(2000); + server2.Stop(); + server2.DoStart(); +} + +TEST_F(MndTestDnode, 05_Create_Drop_Restart_Dnode) { + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(9025); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(9026); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(9027); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + taosMsleep(1300); + test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); + CHECK_META("show dnodes", 7); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 4); + + CheckInt16(1); + CheckInt16(3); + CheckInt16(4); + CheckInt16(5); + CheckBinary("localhost:9023", TSDB_EP_LEN); + CheckBinary("localhost:9025", TSDB_EP_LEN); + CheckBinary("localhost:9026", TSDB_EP_LEN); + CheckBinary("localhost:9027", TSDB_EP_LEN); + CheckInt16(0); + CheckInt16(0); + CheckInt16(0); + CheckInt16(0); + CheckInt16(16); + CheckInt16(16); + CheckInt16(16); + CheckInt16(16); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckTimestamp(); + CheckTimestamp(); + CheckTimestamp(); + CheckTimestamp(); + CheckBinary("", 24); + CheckBinary("", 24); + CheckBinary("", 24); + CheckBinary("", 24); + + // restart + uInfo("stop all server"); + test.Restart(); + server2.Restart(); + server3.Restart(); + server4.Restart(); + server5.Restart(); + + taosMsleep(1300); + test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); + CHECK_META("show dnodes", 7); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 4); + + CheckInt16(1); + CheckInt16(3); + CheckInt16(4); + CheckInt16(5); + CheckBinary("localhost:9023", TSDB_EP_LEN); + CheckBinary("localhost:9025", TSDB_EP_LEN); + CheckBinary("localhost:9026", TSDB_EP_LEN); + CheckBinary("localhost:9027", TSDB_EP_LEN); + CheckInt16(0); + CheckInt16(0); + CheckInt16(0); + CheckInt16(0); + CheckInt16(16); + CheckInt16(16); + CheckInt16(16); + CheckInt16(16); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckBinary("ready", 10); + CheckTimestamp(); + CheckTimestamp(); + CheckTimestamp(); + CheckTimestamp(); + CheckBinary("", 24); + CheckBinary("", 24); + CheckBinary("", 24); + CheckBinary("", 24); +} diff --git a/source/dnode/mnode/impl/test/mnode/CMakeLists.txt b/source/dnode/mnode/impl/test/mnode/CMakeLists.txt new file mode 100644 index 0000000000..4d9b473291 --- /dev/null +++ b/source/dnode/mnode/impl/test/mnode/CMakeLists.txt @@ -0,0 +1,11 @@ +aux_source_directory(. MTEST_SRC) +add_executable(mnode_test_mnode ${MTEST_SRC}) +target_link_libraries( + mnode_test_mnode + PUBLIC sut +) + +add_test( + NAME mnode_test_mnode + COMMAND mnode_test_mnode +) diff --git a/source/dnode/mnode/impl/test/mnode/mnode.cpp b/source/dnode/mnode/impl/test/mnode/mnode.cpp new file mode 100644 index 0000000000..f56b864cad --- /dev/null +++ b/source/dnode/mnode/impl/test/mnode/mnode.cpp @@ -0,0 +1,290 @@ +/** + * @file mnode.cpp + * @author slguan (slguan@taosdata.com) + * @brief MNODE module mnode tests + * @version 1.0 + * @date 2022-01-07 + * + * @copyright Copyright (c) 2022 + * + */ + +#include "sut.h" + +class MndTestMnode : public ::testing::Test { + public: + void SetUp() override {} + void TearDown() override {} + + public: + static void SetUpTestSuite() { + test.Init("/tmp/mnode_test_mnode1", 9028); + const char* fqdn = "localhost"; + const char* firstEp = "localhost:9028"; + + server2.Start("/tmp/mnode_test_mnode2", fqdn, 9029, firstEp); + taosMsleep(300); + } + + static void TearDownTestSuite() { + server2.Stop(); + test.Cleanup(); + } + + static Testbase test; + static TestServer server2; +}; + +Testbase MndTestMnode::test; +TestServer MndTestMnode::server2; + +TEST_F(MndTestMnode, 01_ShowDnode) { + test.SendShowMetaReq(TSDB_MGMT_TABLE_MNODE, ""); + CHECK_META("show mnodes", 5); + + CHECK_SCHEMA(0, TSDB_DATA_TYPE_SMALLINT, 2, "id"); + CHECK_SCHEMA(1, TSDB_DATA_TYPE_BINARY, TSDB_EP_LEN + VARSTR_HEADER_SIZE, "endpoint"); + CHECK_SCHEMA(2, TSDB_DATA_TYPE_BINARY, 12 + VARSTR_HEADER_SIZE, "role"); + CHECK_SCHEMA(3, TSDB_DATA_TYPE_TIMESTAMP, 8, "role_time"); + CHECK_SCHEMA(4, TSDB_DATA_TYPE_TIMESTAMP, 8, "create_time"); + + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + + CheckInt16(1); + CheckBinary("localhost:9028", TSDB_EP_LEN); + CheckBinary("master", 12); + CheckInt64(0); + CheckTimestamp(); +} + +TEST_F(MndTestMnode, 02_Create_Mnode_Invalid_Id) { + { + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(1); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_MNODE_ALREADY_EXIST); + } +} + +TEST_F(MndTestMnode, 03_Create_Mnode_Invalid_Id) { + { + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DNODE_NOT_EXIST); + } +} + +TEST_F(MndTestMnode, 04_Create_Mnode) { + { + // create dnode + int32_t contLen = sizeof(SCreateDnodeReq); + + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); + strcpy(pReq->fqdn, "localhost"); + pReq->port = htonl(9029); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + taosMsleep(1300); + test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, ""); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 2); + } + + { + // create mnode + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_MNODE, ""); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 2); + + CheckInt16(1); + CheckInt16(2); + CheckBinary("localhost:9028", TSDB_EP_LEN); + CheckBinary("localhost:9029", TSDB_EP_LEN); + CheckBinary("master", 12); + CheckBinary("slave", 12); + CheckInt64(0); + CheckInt64(0); + CheckTimestamp(); + CheckTimestamp(); + } + + { + // drop mnode + int32_t contLen = sizeof(SMDropMnodeReq); + + SMDropMnodeReq* pReq = (SMDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_MNODE, ""); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + + CheckInt16(1); + CheckBinary("localhost:9028", TSDB_EP_LEN); + CheckBinary("master", 12); + CheckInt64(0); + CheckTimestamp(); + } + + { + // drop mnode + int32_t contLen = sizeof(SMDropMnodeReq); + + SMDropMnodeReq* pReq = (SMDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_MNODE_NOT_EXIST); + } +} + +TEST_F(MndTestMnode, 03_Create_Mnode_Rollback) { + { + // send message first, then dnode2 crash, result is returned, and rollback is started + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + server2.Stop(); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_RPC_NETWORK_UNAVAIL); + } + + { + // continue send message, mnode is creating + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING); + } + + { + // continue send message, mnode is creating + int32_t contLen = sizeof(SMDropMnodeReq); + + SMDropMnodeReq* pReq = (SMDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING); + } + + { + // server start, wait until the rollback finished + server2.DoStart(); + taosMsleep(1000); + + int32_t retry = 0; + int32_t retryMax = 20; + + for (retry = 0; retry < retryMax; retry++) { + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + if (pRsp->code == TSDB_CODE_MND_MNODE_ALREADY_EXIST) break; + taosMsleep(1000); + } + + ASSERT_NE(retry, retryMax); + } +} + +TEST_F(MndTestMnode, 04_Drop_Mnode_Rollback) { + { + // send message first, then dnode2 crash, result is returned, and rollback is started + int32_t contLen = sizeof(SMDropMnodeReq); + + SMDropMnodeReq* pReq = (SMDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + server2.Stop(); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_RPC_NETWORK_UNAVAIL); + } + + { + // continue send message, mnode is dropping + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING); + } + + { + // continue send message, mnode is dropping + int32_t contLen = sizeof(SMDropMnodeReq); + + SMDropMnodeReq* pReq = (SMDropMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING); + } + + { + // server start, wait until the rollback finished + server2.DoStart(); + taosMsleep(1000); + + int32_t retry = 0; + int32_t retryMax = 20; + + for (retry = 0; retry < retryMax; retry++) { + int32_t contLen = sizeof(SMCreateMnodeReq); + + SMCreateMnodeReq* pReq = (SMCreateMnodeReq*)rpcMallocCont(contLen); + pReq->dnodeId = htonl(2); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_MNODE, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + if (pRsp->code == 0) break; + taosMsleep(1000); + } + + ASSERT_NE(retry, retryMax); + } +} \ No newline at end of file diff --git a/source/dnode/mnode/impl/test/profile/CMakeLists.txt b/source/dnode/mnode/impl/test/profile/CMakeLists.txt new file mode 100644 index 0000000000..88d7366b7d --- /dev/null +++ b/source/dnode/mnode/impl/test/profile/CMakeLists.txt @@ -0,0 +1,11 @@ +aux_source_directory(. PROFILE_SRC) +add_executable(mnode_test_profile ${PROFILE_SRC}) +target_link_libraries( + mnode_test_profile + PUBLIC sut +) + +add_test( + NAME mnode_test_profile + COMMAND mnode_test_profile +) diff --git a/source/dnode/mgmt/impl/test/profile/profile.cpp b/source/dnode/mnode/impl/test/profile/profile.cpp similarity index 75% rename from source/dnode/mgmt/impl/test/profile/profile.cpp rename to source/dnode/mnode/impl/test/profile/profile.cpp index 29b71f1c27..bdffb6c72a 100644 --- a/source/dnode/mgmt/impl/test/profile/profile.cpp +++ b/source/dnode/mnode/impl/test/profile/profile.cpp @@ -1,19 +1,19 @@ /** * @file profile.cpp * @author slguan (slguan@taosdata.com) - * @brief DNODE module profile-msg tests - * @version 0.1 - * @date 2021-12-15 + * @brief MNODE module profile tests + * @version 1.0 + * @date 2022-01-06 * - * @copyright Copyright (c) 2021 + * @copyright Copyright (c) 2022 * */ #include "sut.h" -class DndTestProfile : public ::testing::Test { +class MndTestProfile : public ::testing::Test { protected: - static void SetUpTestSuite() { test.Init("/tmp/dnode_test_profile", 9080); } + static void SetUpTestSuite() { test.Init("/tmp/mnode_test_profile", 9022); } static void TearDownTestSuite() { test.Cleanup(); } static Testbase test; @@ -24,15 +24,15 @@ class DndTestProfile : public ::testing::Test { void TearDown() override {} }; -Testbase DndTestProfile::test; -int32_t DndTestProfile::connId; +Testbase MndTestProfile::test; +int32_t MndTestProfile::connId; -TEST_F(DndTestProfile, 01_ConnectMsg) { - int32_t contLen = sizeof(SConnectMsg); +TEST_F(MndTestProfile, 01_ConnectMsg) { + int32_t contLen = sizeof(SConnectReq); - SConnectMsg* pReq = (SConnectMsg*)rpcMallocCont(contLen); + SConnectReq* pReq = (SConnectReq*)rpcMallocCont(contLen); pReq->pid = htonl(1234); - strcpy(pReq->app, "dnode_test_profile"); + strcpy(pReq->app, "mnode_test_profile"); strcpy(pReq->db, ""); SRpcMsg* pMsg = test.SendReq(TDMT_MND_CONNECT, pReq, contLen); @@ -53,18 +53,18 @@ TEST_F(DndTestProfile, 01_ConnectMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9080); + EXPECT_EQ(pRsp->epSet.port[0], 9022); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); connId = pRsp->connId; } -TEST_F(DndTestProfile, 02_ConnectMsg_InvalidDB) { - int32_t contLen = sizeof(SConnectMsg); +TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { + int32_t contLen = sizeof(SConnectReq); - SConnectMsg* pReq = (SConnectMsg*)rpcMallocCont(contLen); + SConnectReq* pReq = (SConnectReq*)rpcMallocCont(contLen); pReq->pid = htonl(1234); - strcpy(pReq->app, "dnode_test_profile"); + strcpy(pReq->app, "mnode_test_profile"); strcpy(pReq->db, "invalid_db"); SRpcMsg* pRsp = test.SendReq(TDMT_MND_CONNECT, pReq, contLen); @@ -73,7 +73,7 @@ TEST_F(DndTestProfile, 02_ConnectMsg_InvalidDB) { ASSERT_EQ(pRsp->contLen, 0); } -TEST_F(DndTestProfile, 03_ConnectMsg_Show) { +TEST_F(MndTestProfile, 03_ConnectMsg_Show) { test.SendShowMetaReq(TSDB_MGMT_TABLE_CONNS, ""); CHECK_META("show connections", 7); CHECK_SCHEMA(0, TSDB_DATA_TYPE_INT, 4, "connId"); @@ -88,22 +88,22 @@ TEST_F(DndTestProfile, 03_ConnectMsg_Show) { EXPECT_EQ(test.GetShowRows(), 1); CheckInt32(1); CheckBinary("root", TSDB_USER_LEN); - CheckBinary("dnode_test_profile", TSDB_APP_NAME_LEN); + CheckBinary("mnode_test_profile", TSDB_APP_NAME_LEN); CheckInt32(1234); IgnoreBinary(TSDB_IPv4ADDR_LEN + 6); CheckTimestamp(); CheckTimestamp(); } -TEST_F(DndTestProfile, 04_HeartBeatMsg) { - int32_t contLen = sizeof(SHeartBeatMsg); +TEST_F(MndTestProfile, 04_HeartBeatMsg) { + int32_t contLen = sizeof(SHeartBeatReq); - SHeartBeatMsg* pReq = (SHeartBeatMsg*)rpcMallocCont(contLen); + SHeartBeatReq* pReq = (SHeartBeatReq*)rpcMallocCont(contLen); pReq->connId = htonl(connId); pReq->pid = htonl(1234); pReq->numOfQueries = htonl(0); pReq->numOfStreams = htonl(0); - strcpy(pReq->app, "dnode_test_profile"); + strcpy(pReq->app, "mnode_test_profile"); SRpcMsg* pMsg = test.SendReq(TDMT_MND_HEARTBEAT, pReq, contLen); ASSERT_NE(pMsg, nullptr); @@ -127,15 +127,15 @@ TEST_F(DndTestProfile, 04_HeartBeatMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9080); + EXPECT_EQ(pRsp->epSet.port[0], 9022); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); } -TEST_F(DndTestProfile, 05_KillConnMsg) { +TEST_F(MndTestProfile, 05_KillConnMsg) { { - int32_t contLen = sizeof(SKillConnMsg); + int32_t contLen = sizeof(SKillConnReq); - SKillConnMsg* pReq = (SKillConnMsg*)rpcMallocCont(contLen); + SKillConnReq* pReq = (SKillConnReq*)rpcMallocCont(contLen); pReq->connId = htonl(connId); SRpcMsg* pRsp = test.SendReq(TDMT_MND_KILL_CONN, pReq, contLen); @@ -144,14 +144,14 @@ TEST_F(DndTestProfile, 05_KillConnMsg) { } { - int32_t contLen = sizeof(SHeartBeatMsg); + int32_t contLen = sizeof(SHeartBeatReq); - SHeartBeatMsg* pReq = (SHeartBeatMsg*)rpcMallocCont(contLen); + SHeartBeatReq* pReq = (SHeartBeatReq*)rpcMallocCont(contLen); pReq->connId = htonl(connId); pReq->pid = htonl(1234); pReq->numOfQueries = htonl(0); pReq->numOfStreams = htonl(0); - strcpy(pReq->app, "dnode_test_profile"); + strcpy(pReq->app, "mnode_test_profile"); SRpcMsg* pRsp = test.SendReq(TDMT_MND_HEARTBEAT, pReq, contLen); ASSERT_NE(pRsp, nullptr); @@ -160,11 +160,11 @@ TEST_F(DndTestProfile, 05_KillConnMsg) { } { - int32_t contLen = sizeof(SConnectMsg); + int32_t contLen = sizeof(SConnectReq); - SConnectMsg* pReq = (SConnectMsg*)rpcMallocCont(contLen); + SConnectReq* pReq = (SConnectReq*)rpcMallocCont(contLen); pReq->pid = htonl(1234); - strcpy(pReq->app, "dnode_test_profile"); + strcpy(pReq->app, "mnode_test_profile"); strcpy(pReq->db, ""); SRpcMsg* pMsg = test.SendReq(TDMT_MND_CONNECT, pReq, contLen); @@ -185,17 +185,17 @@ TEST_F(DndTestProfile, 05_KillConnMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9080); + EXPECT_EQ(pRsp->epSet.port[0], 9022); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); connId = pRsp->connId; } } -TEST_F(DndTestProfile, 06_KillConnMsg_InvalidConn) { - int32_t contLen = sizeof(SKillConnMsg); +TEST_F(MndTestProfile, 06_KillConnMsg_InvalidConn) { + int32_t contLen = sizeof(SKillConnReq); - SKillConnMsg* pReq = (SKillConnMsg*)rpcMallocCont(contLen); + SKillConnReq* pReq = (SKillConnReq*)rpcMallocCont(contLen); pReq->connId = htonl(2345); SRpcMsg* pRsp = test.SendReq(TDMT_MND_KILL_CONN, pReq, contLen); @@ -203,11 +203,11 @@ TEST_F(DndTestProfile, 06_KillConnMsg_InvalidConn) { ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_CONN_ID); } -TEST_F(DndTestProfile, 07_KillQueryMsg) { +TEST_F(MndTestProfile, 07_KillQueryMsg) { { - int32_t contLen = sizeof(SKillQueryMsg); + int32_t contLen = sizeof(SKillQueryReq); - SKillQueryMsg* pReq = (SKillQueryMsg*)rpcMallocCont(contLen); + SKillQueryReq* pReq = (SKillQueryReq*)rpcMallocCont(contLen); pReq->connId = htonl(connId); pReq->queryId = htonl(1234); @@ -218,14 +218,14 @@ TEST_F(DndTestProfile, 07_KillQueryMsg) { } { - int32_t contLen = sizeof(SHeartBeatMsg); + int32_t contLen = sizeof(SHeartBeatReq); - SHeartBeatMsg* pReq = (SHeartBeatMsg*)rpcMallocCont(contLen); + SHeartBeatReq* pReq = (SHeartBeatReq*)rpcMallocCont(contLen); pReq->connId = htonl(connId); pReq->pid = htonl(1234); pReq->numOfQueries = htonl(0); pReq->numOfStreams = htonl(0); - strcpy(pReq->app, "dnode_test_profile"); + strcpy(pReq->app, "mnode_test_profile"); SRpcMsg* pMsg = test.SendReq(TDMT_MND_HEARTBEAT, pReq, contLen); ASSERT_NE(pMsg, nullptr); @@ -249,15 +249,15 @@ TEST_F(DndTestProfile, 07_KillQueryMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9080); + EXPECT_EQ(pRsp->epSet.port[0], 9022); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); } } -TEST_F(DndTestProfile, 08_KillQueryMsg_InvalidConn) { - int32_t contLen = sizeof(SKillQueryMsg); +TEST_F(MndTestProfile, 08_KillQueryMsg_InvalidConn) { + int32_t contLen = sizeof(SKillQueryReq); - SKillQueryMsg* pReq = (SKillQueryMsg*)rpcMallocCont(contLen); + SKillQueryReq* pReq = (SKillQueryReq*)rpcMallocCont(contLen); pReq->connId = htonl(2345); pReq->queryId = htonl(1234); @@ -266,7 +266,7 @@ TEST_F(DndTestProfile, 08_KillQueryMsg_InvalidConn) { ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_CONN_ID); } -TEST_F(DndTestProfile, 09_KillQueryMsg) { +TEST_F(MndTestProfile, 09_KillQueryMsg) { test.SendShowMetaReq(TSDB_MGMT_TABLE_QUERIES, ""); CHECK_META("show queries", 14); diff --git a/source/dnode/mnode/impl/test/qnode/qnode.cpp b/source/dnode/mnode/impl/test/qnode/qnode.cpp index 9aa41ca177..8a9e087e7f 100644 --- a/source/dnode/mnode/impl/test/qnode/qnode.cpp +++ b/source/dnode/mnode/impl/test/qnode/qnode.cpp @@ -96,9 +96,9 @@ TEST_F(MndTestQnode, 02_Create_Qnode) { TEST_F(MndTestQnode, 03_Drop_Qnode) { { - int32_t contLen = sizeof(SCreateDnodeMsg); + int32_t contLen = sizeof(SCreateDnodeReq); - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); strcpy(pReq->fqdn, "localhost"); pReq->port = htonl(9015); diff --git a/source/dnode/mnode/impl/test/show/show.cpp b/source/dnode/mnode/impl/test/show/show.cpp index b4a625d8be..eabcbc7eb4 100644 --- a/source/dnode/mnode/impl/test/show/show.cpp +++ b/source/dnode/mnode/impl/test/show/show.cpp @@ -13,7 +13,7 @@ class MndTestShow : public ::testing::Test { protected: - static void SetUpTestSuite() { test.Init("/tmp/mnode_test_show", 9020); } + static void SetUpTestSuite() { test.Init("/tmp/mnode_test_show", 9021); } static void TearDownTestSuite() { test.Cleanup(); } static Testbase test; @@ -50,9 +50,9 @@ TEST_F(MndTestShow, 02_ShowMsg_InvalidMsgStart) { } TEST_F(MndTestShow, 03_ShowMsg_Conn) { - int32_t contLen = sizeof(SConnectMsg); + int32_t contLen = sizeof(SConnectReq); - SConnectMsg* pReq = (SConnectMsg*)rpcMallocCont(contLen); + SConnectReq* pReq = (SConnectReq*)rpcMallocCont(contLen); pReq->pid = htonl(1234); strcpy(pReq->app, "mnode_test_show"); strcpy(pReq->db, ""); diff --git a/source/dnode/mnode/impl/test/snode/snode.cpp b/source/dnode/mnode/impl/test/snode/snode.cpp index 2dcfc658a4..3a38b9ede6 100644 --- a/source/dnode/mnode/impl/test/snode/snode.cpp +++ b/source/dnode/mnode/impl/test/snode/snode.cpp @@ -96,9 +96,9 @@ TEST_F(MndTestSnode, 02_Create_Snode) { TEST_F(MndTestSnode, 03_Drop_Snode) { { - int32_t contLen = sizeof(SCreateDnodeMsg); + int32_t contLen = sizeof(SCreateDnodeReq); - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); strcpy(pReq->fqdn, "localhost"); pReq->port = htonl(9017); diff --git a/source/dnode/mgmt/impl/test/stb/CMakeLists.txt b/source/dnode/mnode/impl/test/stb/CMakeLists.txt similarity index 100% rename from source/dnode/mgmt/impl/test/stb/CMakeLists.txt rename to source/dnode/mnode/impl/test/stb/CMakeLists.txt diff --git a/source/dnode/mgmt/impl/test/stb/stb.cpp b/source/dnode/mnode/impl/test/stb/stb.cpp similarity index 92% rename from source/dnode/mgmt/impl/test/stb/stb.cpp rename to source/dnode/mnode/impl/test/stb/stb.cpp index b3d5d29785..55cc030122 100644 --- a/source/dnode/mgmt/impl/test/stb/stb.cpp +++ b/source/dnode/mnode/impl/test/stb/stb.cpp @@ -1,19 +1,19 @@ /** * @file stb.cpp * @author slguan (slguan@taosdata.com) - * @brief DNODE module db-msg tests - * @version 0.1 - * @date 2021-12-17 + * @brief MNODE module stb tests + * @version 1.0 + * @date 2022-01-12 * - * @copyright Copyright (c) 2021 + * @copyright Copyright (c) 2022 * */ #include "sut.h" -class DndTestStb : public ::testing::Test { +class MndTestStb : public ::testing::Test { protected: - static void SetUpTestSuite() { test.Init("/tmp/dnode_test_stb", 9101); } + static void SetUpTestSuite() { test.Init("/tmp/mnode_test_stb", 9034); } static void TearDownTestSuite() { test.Cleanup(); } static Testbase test; @@ -23,9 +23,9 @@ class DndTestStb : public ::testing::Test { void TearDown() override {} }; -Testbase DndTestStb::test; +Testbase MndTestStb::test; -TEST_F(DndTestStb, 01_Create_Show_Meta_Drop_Restart_Stb) { +TEST_F(MndTestStb, 01_Create_Show_Meta_Drop_Restart_Stb) { { int32_t contLen = sizeof(SCreateDbMsg); @@ -137,9 +137,9 @@ TEST_F(DndTestStb, 01_Create_Show_Meta_Drop_Restart_Stb) { pRsp->numOfColumns = htonl(pRsp->numOfColumns); pRsp->sversion = htonl(pRsp->sversion); pRsp->tversion = htonl(pRsp->tversion); - pRsp->suid = htobe64(pRsp->suid); - pRsp->tuid = htobe64(pRsp->tuid); - pRsp->vgId = htobe64(pRsp->vgId); + pRsp->suid = be64toh(pRsp->suid); + pRsp->tuid = be64toh(pRsp->tuid); + pRsp->vgId = be64toh(pRsp->vgId); for (int32_t i = 0; i < pRsp->numOfTags + pRsp->numOfColumns; ++i) { SSchema* pSchema = &pRsp->pSchema[i]; pSchema->colId = htonl(pSchema->colId); @@ -156,7 +156,7 @@ TEST_F(DndTestStb, 01_Create_Show_Meta_Drop_Restart_Stb) { EXPECT_EQ(pRsp->sversion, 1); EXPECT_EQ(pRsp->tversion, 0); EXPECT_GT(pRsp->suid, 0); - EXPECT_EQ(pRsp->tuid, 0); + EXPECT_GT(pRsp->tuid, 0); EXPECT_EQ(pRsp->vgId, 0); { diff --git a/source/dnode/mnode/impl/test/trans/trans.cpp b/source/dnode/mnode/impl/test/trans/trans.cpp index e2ed707a99..a0c4c539fb 100644 --- a/source/dnode/mnode/impl/test/trans/trans.cpp +++ b/source/dnode/mnode/impl/test/trans/trans.cpp @@ -133,9 +133,9 @@ TEST_F(MndTestTrans, 02_Create_Qnode1_Crash) { TEST_F(MndTestTrans, 03_Create_Qnode2_Crash) { { - int32_t contLen = sizeof(SCreateDnodeMsg); + int32_t contLen = sizeof(SCreateDnodeReq); - SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen); + SCreateDnodeReq* pReq = (SCreateDnodeReq*)rpcMallocCont(contLen); strcpy(pReq->fqdn, "localhost"); pReq->port = htonl(9020); diff --git a/source/dnode/mnode/sdb/src/sdbHash.c b/source/dnode/mnode/sdb/src/sdbHash.c index 13b2c7daa5..8fdb6b1657 100644 --- a/source/dnode/mnode/sdb/src/sdbHash.c +++ b/source/dnode/mnode/sdb/src/sdbHash.c @@ -429,3 +429,12 @@ int32_t sdbGetMaxId(SSdb *pSdb, ESdbType type) { maxId = MAX(maxId, pSdb->maxId[type]); return maxId + 1; } + +int64_t sdbGetTableVer(SSdb *pSdb, ESdbType type) { + if (type >= SDB_MAX || type < 0) { + terrno = TSDB_CODE_SDB_INVALID_TABLE_TYPE; + return -1; + } + + return pSdb->tableVer[type]; +} diff --git a/source/dnode/vnode/impl/inc/vnodeCommit.h b/source/dnode/vnode/impl/inc/vnodeCommit.h index 031089ba14..e6568fbd52 100644 --- a/source/dnode/vnode/impl/inc/vnodeCommit.h +++ b/source/dnode/vnode/impl/inc/vnodeCommit.h @@ -23,8 +23,8 @@ extern "C" { #endif #define vnodeShouldCommit vnodeBufPoolIsFull +int vnodeSyncCommit(SVnode *pVnode); int vnodeAsyncCommit(SVnode *pVnode); -int vnodeCommit(void *arg); #ifdef __cplusplus } diff --git a/source/dnode/vnode/impl/src/vnodeCommit.c b/source/dnode/vnode/impl/src/vnodeCommit.c index f5bf60a7e3..bc0a3c6680 100644 --- a/source/dnode/vnode/impl/src/vnodeCommit.c +++ b/source/dnode/vnode/impl/src/vnodeCommit.c @@ -15,10 +15,14 @@ #include "vnodeDef.h" -static int vnodeStartCommit(SVnode *pVnode); -static int vnodeEndCommit(SVnode *pVnode); +static int vnodeStartCommit(SVnode *pVnode); +static int vnodeEndCommit(SVnode *pVnode); +static int vnodeCommit(void *arg); +static void vnodeWaitCommit(SVnode *pVnode); int vnodeAsyncCommit(SVnode *pVnode) { + vnodeWaitCommit(pVnode); + vnodeBufPoolSwitch(pVnode); SVnodeTask *pTask = (SVnodeTask *)malloc(sizeof(*pTask)); @@ -33,7 +37,13 @@ int vnodeAsyncCommit(SVnode *pVnode) { return 0; } -int vnodeCommit(void *arg) { +int vnodeSyncCommit(SVnode *pVnode) { + vnodeAsyncCommit(pVnode); + vnodeWaitCommit(pVnode); + return 0; +} + +static int vnodeCommit(void *arg) { SVnode *pVnode = (SVnode *)arg; metaCommit(pVnode->pMeta); @@ -42,7 +52,6 @@ int vnodeCommit(void *arg) { vnodeBufPoolRecycle(pVnode); tsem_post(&(pVnode->canCommit)); - // TODO return 0; } @@ -54,4 +63,6 @@ static int vnodeStartCommit(SVnode *pVnode) { static int vnodeEndCommit(SVnode *pVnode) { // TODO return 0; -} \ No newline at end of file +} + +static FORCE_INLINE void vnodeWaitCommit(SVnode *pVnode) { tsem_wait(&pVnode->canCommit); } \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index 2b0363c97f..995bed6e0b 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -24,9 +24,9 @@ SVnode *vnodeOpen(const char *path, const SVnodeCfg *pVnodeCfg) { SVnode *pVnode = NULL; // Set default options - if (pVnodeCfg == NULL) { + //if (pVnodeCfg == NULL) { pVnodeCfg = &defaultVnodeOptions; - } + //} // Validate options if (vnodeValidateOptions(pVnodeCfg) < 0) { @@ -137,6 +137,7 @@ static int vnodeOpenImpl(SVnode *pVnode) { } static void vnodeCloseImpl(SVnode *pVnode) { + // vnodeSyncCommit(pVnode); if (pVnode) { vnodeCloseBufPool(pVnode); metaClose(pVnode->pMeta); diff --git a/source/dnode/vnode/impl/src/vnodeQuery.c b/source/dnode/vnode/impl/src/vnodeQuery.c index 308fc9d2e5..a32ee50df5 100644 --- a/source/dnode/vnode/impl/src/vnodeQuery.c +++ b/source/dnode/vnode/impl/src/vnodeQuery.c @@ -105,6 +105,9 @@ static int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { if (pTbCfg->type == META_CHILD_TABLE) { strcpy(pTbMetaMsg->stbFname, pStbCfg->name); pTbMetaMsg->suid = htobe64(pTbCfg->ctbCfg.suid); + } else if (pTbCfg->type == META_SUPER_TABLE) { + strcpy(pTbMetaMsg->stbFname, pTbCfg->name); + pTbMetaMsg->suid = htobe64(uid); } pTbMetaMsg->numOfTags = htonl(nTagCols); pTbMetaMsg->numOfColumns = htonl(nCols); diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index 88a73ca174..ddcb93863a 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -92,7 +92,7 @@ int vnodeApplyWMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { // } break; case TDMT_VND_SUBMIT: - if (tsdbInsertData(pVnode->pTsdb, (SSubmitMsg *)ptr) < 0) { + if (tsdbInsertData(pVnode->pTsdb, (SSubmitMsg *)ptr, NULL) < 0) { // TODO: handle error } break; diff --git a/source/dnode/vnode/meta/CMakeLists.txt b/source/dnode/vnode/meta/CMakeLists.txt index bb48d1acad..7041811617 100644 --- a/source/dnode/vnode/meta/CMakeLists.txt +++ b/source/dnode/vnode/meta/CMakeLists.txt @@ -19,11 +19,13 @@ add_library(meta STATIC ${META_SRC}) target_include_directories( meta PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/vnode/meta" + PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/index" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) target_link_libraries( meta PUBLIC common + PUBLIC index ) if(${META_DB_IMPL} STREQUAL "BDB") diff --git a/source/dnode/vnode/meta/src/metaIdx.c b/source/dnode/vnode/meta/src/metaIdx.c index fe07f5ced4..828bd12088 100644 --- a/source/dnode/vnode/meta/src/metaIdx.c +++ b/source/dnode/vnode/meta/src/metaIdx.c @@ -13,9 +13,13 @@ * along with this program. If not, see . */ +#include "index.h" #include "metaDef.h" struct SMetaIdx { +#ifdef USE_INVERTED_INDEX + SIndex *pIdx; +#endif /* data */ }; @@ -43,6 +47,13 @@ int metaOpenIdx(SMeta *pMeta) { rocksdb_options_destroy(options); #endif +#ifdef USE_INVERTED_INDEX + SIndexOpts opts; + if (indexOpen(&opts, pMeta->path, &pMeta->pIdx->pIdx) != 0) { + return -1; + } + +#endif return 0; } @@ -53,14 +64,47 @@ void metaCloseIdx(SMeta *pMeta) { /* TODO */ pMeta->pIdx = NULL; } #endif + +#ifdef USE_INVERTED_INDEX + SIndexOpts opts; + if (indexClose(pMeta->pIdx->pIdx) != 0) { + return -1; + } + +#endif } -int metaSaveTableToIdx(SMeta *pMeta, const STbCfg *pTbOptions) { +int metaSaveTableToIdx(SMeta *pMeta, const STbCfg *pTbCfg) { +#ifdef USE_INVERTED_INDEX + if (pTbCfgs - type == META_CHILD_TABLE) { + char buf[8] = {0}; + int16_t colId = (kvRowColIdx(pTbCfg->ctbCfg.pTag))[0].colId; + sprintf(buf, "%d", colId); // colname + + char *pTagVal = (char *)tdGetKVRowValOfCol(pTbCfg->ctbCfg.pTag, (kvRowColIdx(pTbCfg->ctbCfg.pTag))[0].colId); + + tb_uid_t suid = pTbCfg->ctbCfg.suid; // super id + tb_uid_t tuid = 0; // child table uid + SIndexMultiTerm *terms = indexMultiTermCreate(); + SIndexTerm * term = + indexTermCreate(suid, ADD_VALUE, TSDB_DATA_TYPE_BINARY, buf, strlen(buf), pTagVal, strlen(pTagVal), tuid); + indexMultiTermAdd(terms, term); + + int ret = indexPut(pMeta->pIdx->pIdx, terms); + indexMultiTermDestroy(terms); + return ret; + } else { + return DB_DONOTINDEX; + } +#endif // TODO return 0; } int metaRemoveTableFromIdx(SMeta *pMeta, tb_uid_t uid) { +#ifdef USE_INVERTED_INDEX + +#endif // TODO return 0; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/meta/src/metaTbUid.c b/source/dnode/vnode/meta/src/metaTbUid.c index be85b45d95..cad1eba134 100644 --- a/source/dnode/vnode/meta/src/metaTbUid.c +++ b/source/dnode/vnode/meta/src/metaTbUid.c @@ -22,9 +22,10 @@ int metaOpenUidGnrt(SMeta *pMeta) { return 0; } -void metaCloseUidGnrt(SMeta *pMeta) { /* TODO */ } +void metaCloseUidGnrt(SMeta *pMeta) { /* TODO */ +} tb_uid_t metaGenerateUid(SMeta *pMeta) { // Generate a new table UID return ++(pMeta->uidGnrt.nextUid); -} \ No newline at end of file +} diff --git a/source/dnode/vnode/tsdb/CMakeLists.txt b/source/dnode/vnode/tsdb/CMakeLists.txt index 30e9d70f12..2d3f6d6e42 100644 --- a/source/dnode/vnode/tsdb/CMakeLists.txt +++ b/source/dnode/vnode/tsdb/CMakeLists.txt @@ -10,6 +10,9 @@ else(0) "src/tsdbMemTable.c" "src/tsdbOptions.c" "src/tsdbWrite.c" + "src/tsdbReadImpl.c" + "src/tsdbFile.c" + "src/tsdbFS.c" ) endif(0) @@ -25,4 +28,5 @@ target_link_libraries( PUBLIC util PUBLIC common PUBLIC tkv + PUBLIC tfs ) \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/inc/tsdbCommit.h b/source/dnode/vnode/tsdb/inc/tsdbCommit.h index 82ba1c9dff..4043f22dd8 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbCommit.h +++ b/source/dnode/vnode/tsdb/inc/tsdbCommit.h @@ -16,7 +16,6 @@ #ifndef _TD_TSDB_COMMIT_H_ #define _TD_TSDB_COMMIT_H_ -#if 0 typedef struct { int minFid; int midFid; @@ -30,9 +29,19 @@ typedef struct { int64_t size; } SKVRecord; +void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn); + +static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) { + if (key < 0) { + return (int)((key + 1) / tsTickPerDay[precision] / days - 1); + } else { + return (int)((key / tsTickPerDay[precision] / days)); + } +} + +#if 0 #define TSDB_DEFAULT_BLOCK_ROWS(maxRows) ((maxRows)*4 / 5) -void tsdbGetRtnSnap(STsdbRepo *pRepo, SRtn *pRtn); int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord); void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord); void *tsdbCommitData(STsdbRepo *pRepo); diff --git a/source/dnode/vnode/tsdb/inc/tsdbDef.h b/source/dnode/vnode/tsdb/inc/tsdbDef.h index ded28727e4..e81c51441f 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbDef.h +++ b/source/dnode/vnode/tsdb/inc/tsdbDef.h @@ -17,27 +17,43 @@ #define _TD_TSDB_DEF_H_ #include "mallocator.h" -#include "tmsg.h" -#include "tlist.h" +#include "tcompression.h" +#include "tglobal.h" #include "thash.h" +#include "tlist.h" +#include "tmsg.h" #include "tskiplist.h" +#include "ttime.h" #include "tsdb.h" +#include "tsdbCommit.h" +#include "tsdbFS.h" +#include "tsdbFile.h" +#include "tsdbLog.h" #include "tsdbMemTable.h" +#include "tsdbMemory.h" #include "tsdbOptions.h" +#include "tsdbReadImpl.h" #ifdef __cplusplus extern "C" { #endif struct STsdb { + int32_t vgId; char * path; - STsdbCfg options; + STsdbCfg config; STsdbMemTable * mem; STsdbMemTable * imem; + SRtn rtn; SMemAllocatorFactory *pmaf; + STsdbFS fs; }; +#define REPO_ID(r) 0 +#define REPO_CFG(r) (&(r)->config) +#define REPO_FS(r) (&(r)->fs) + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/tsdb/inc/tsdbFS.h b/source/dnode/vnode/tsdb/inc/tsdbFS.h index 0320756783..dfd34deb84 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbFS.h +++ b/source/dnode/vnode/tsdb/inc/tsdbFS.h @@ -16,7 +16,7 @@ #ifndef _TD_TSDB_FS_H_ #define _TD_TSDB_FS_H_ -#if 0 +#include "tsdbFile.h" #define TSDB_FS_VERSION 0 @@ -39,19 +39,17 @@ typedef struct { // ================== typedef struct { STsdbFSMeta meta; // FS meta - SMFile* pmf; // meta file pointer - SMFile mf; // meta file - SArray* df; // data file array + SArray * df; // data file array } SFSStatus; typedef struct { pthread_rwlock_t lock; - SFSStatus* cstatus; // current status - SHashObj* metaCache; // meta cache - SHashObj* metaCacheComp; // meta cache for compact + SFSStatus *cstatus; // current status + SHashObj * metaCache; // meta cache + SHashObj * metaCacheComp; // meta cache for compact bool intxn; - SFSStatus* nstatus; // new status + SFSStatus *nstatus; // new status } STsdbFS; #define FS_CURRENT_STATUS(pfs) ((pfs)->cstatus) @@ -63,10 +61,10 @@ typedef struct { typedef struct { int direction; uint64_t version; // current FS version - STsdbFS* pfs; + STsdbFS * pfs; int index; // used to position next fset when version the same int fid; // used to seek when version is changed - SDFileSet* pSet; + SDFileSet *pSet; } SFSIter; #define TSDB_FS_ITER_FORWARD TSDB_ORDER_ASC @@ -74,21 +72,21 @@ typedef struct { STsdbFS *tsdbNewFS(STsdbCfg *pCfg); void * tsdbFreeFS(STsdbFS *pfs); -int tsdbOpenFS(STsdbRepo *pRepo); -void tsdbCloseFS(STsdbRepo *pRepo); -void tsdbStartFSTxn(STsdbRepo *pRepo, int64_t pointsAdd, int64_t storageAdd); -int tsdbEndFSTxn(STsdbRepo *pRepo); +int tsdbOpenFS(STsdb *pRepo); +void tsdbCloseFS(STsdb *pRepo); +void tsdbStartFSTxn(STsdb *pRepo, int64_t pointsAdd, int64_t storageAdd); +int tsdbEndFSTxn(STsdb *pRepo); int tsdbEndFSTxnWithError(STsdbFS *pfs); void tsdbUpdateFSTxnMeta(STsdbFS *pfs, STsdbFSMeta *pMeta); -void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile); +// void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile); int tsdbUpdateDFileSet(STsdbFS *pfs, const SDFileSet *pSet); void tsdbFSIterInit(SFSIter *pIter, STsdbFS *pfs, int direction); void tsdbFSIterSeek(SFSIter *pIter, int fid); SDFileSet *tsdbFSIterNext(SFSIter *pIter); -int tsdbLoadMetaCache(STsdbRepo *pRepo, bool recoverMeta); +int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta); -static FORCE_INLINE int tsdbRLockFS(STsdbFS* pFs) { +static FORCE_INLINE int tsdbRLockFS(STsdbFS *pFs) { int code = pthread_rwlock_rdlock(&(pFs->lock)); if (code != 0) { terrno = TAOS_SYSTEM_ERROR(code); @@ -97,7 +95,7 @@ static FORCE_INLINE int tsdbRLockFS(STsdbFS* pFs) { return 0; } -static FORCE_INLINE int tsdbWLockFS(STsdbFS* pFs) { +static FORCE_INLINE int tsdbWLockFS(STsdbFS *pFs) { int code = pthread_rwlock_wrlock(&(pFs->lock)); if (code != 0) { terrno = TAOS_SYSTEM_ERROR(code); @@ -106,7 +104,7 @@ static FORCE_INLINE int tsdbWLockFS(STsdbFS* pFs) { return 0; } -static FORCE_INLINE int tsdbUnLockFS(STsdbFS* pFs) { +static FORCE_INLINE int tsdbUnLockFS(STsdbFS *pFs) { int code = pthread_rwlock_unlock(&(pFs->lock)); if (code != 0) { terrno = TAOS_SYSTEM_ERROR(code); @@ -115,6 +113,4 @@ static FORCE_INLINE int tsdbUnLockFS(STsdbFS* pFs) { return 0; } -#endif - #endif /* _TD_TSDB_FS_H_ */ diff --git a/source/dnode/vnode/tsdb/inc/tsdbFile.h b/source/dnode/vnode/tsdb/inc/tsdbFile.h index 73a7de0249..fb21ef56a7 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbFile.h +++ b/source/dnode/vnode/tsdb/inc/tsdbFile.h @@ -16,7 +16,8 @@ #ifndef _TS_TSDB_FILE_H_ #define _TS_TSDB_FILE_H_ -#if 0 +#include "tchecksum.h" +#include "tfs.h" #define TSDB_FILE_HEAD_SIZE 512 #define TSDB_FILE_DELIMITER 0xF00AFA0F @@ -34,7 +35,7 @@ #define TSDB_FILE_SET_CLOSED(f) (TSDB_FILE_FD(f) = -1) #define TSDB_FILE_LEVEL(tf) TFILE_LEVEL(TSDB_FILE_F(tf)) #define TSDB_FILE_ID(tf) TFILE_ID(TSDB_FILE_F(tf)) -#define TSDB_FILE_FSYNC(tf) taosFsync(TSDB_FILE_FD(tf)) +#define TSDB_FILE_FSYNC(tf) taosFsyncFile(TSDB_FILE_FD(tf)) #define TSDB_FILE_STATE(tf) ((tf)->state) #define TSDB_FILE_SET_STATE(tf, s) ((tf)->state = (s)) #define TSDB_FILE_IS_OK(tf) (TSDB_FILE_STATE(tf) == TSDB_FILE_STATE_OK) @@ -42,6 +43,7 @@ typedef enum { TSDB_FILE_HEAD = 0, TSDB_FILE_DATA, TSDB_FILE_LAST, TSDB_FILE_MAX, TSDB_FILE_META } TSDB_FILE_T; +#if 0 // =============== SMFile typedef struct { int64_t size; @@ -68,7 +70,7 @@ int tsdbApplyMFileChange(SMFile* from, SMFile* to); int tsdbCreateMFile(SMFile* pMFile, bool updateHeader); int tsdbUpdateMFileHeader(SMFile* pMFile); int tsdbLoadMFileHeader(SMFile* pMFile, SMFInfo* pInfo); -int tsdbScanAndTryFixMFile(STsdbRepo* pRepo); +int tsdbScanAndTryFixMFile(STsdb* pRepo); int tsdbEncodeMFInfo(void** buf, SMFInfo* pInfo); void* tsdbDecodeMFInfo(void* buf, SMFInfo* pInfo); @@ -96,7 +98,7 @@ static FORCE_INLINE void tsdbCloseMFile(SMFile* pMFile) { static FORCE_INLINE int64_t tsdbSeekMFile(SMFile* pMFile, int64_t offset, int whence) { ASSERT(TSDB_FILE_OPENED(pMFile)); - int64_t loffset = taosLSeek(TSDB_FILE_FD(pMFile), offset, whence); + int64_t loffset = taosLSeekFile(TSDB_FILE_FD(pMFile), offset, whence); if (loffset < 0) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -108,7 +110,7 @@ static FORCE_INLINE int64_t tsdbSeekMFile(SMFile* pMFile, int64_t offset, int wh static FORCE_INLINE int64_t tsdbWriteMFile(SMFile* pMFile, void* buf, int64_t nbyte) { ASSERT(TSDB_FILE_OPENED(pMFile)); - int64_t nwrite = taosWrite(pMFile->fd, buf, nbyte); + int64_t nwrite = taosWriteFile(pMFile->fd, buf, nbyte); if (nwrite < nbyte) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -150,7 +152,7 @@ static FORCE_INLINE int tsdbRemoveMFile(SMFile* pMFile) { return tfsremove(TSDB_ static FORCE_INLINE int64_t tsdbReadMFile(SMFile* pMFile, void* buf, int64_t nbyte) { ASSERT(TSDB_FILE_OPENED(pMFile)); - int64_t nread = taosRead(pMFile->fd, buf, nbyte); + int64_t nread = taosReadFile(pMFile->fd, buf, nbyte); if (nread < 0) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -159,6 +161,8 @@ static FORCE_INLINE int64_t tsdbReadMFile(SMFile* pMFile, void* buf, int64_t nby return nread; } +#endif + // =============== SDFile typedef struct { uint32_t magic; @@ -210,7 +214,7 @@ static FORCE_INLINE void tsdbCloseDFile(SDFile* pDFile) { static FORCE_INLINE int64_t tsdbSeekDFile(SDFile* pDFile, int64_t offset, int whence) { ASSERT(TSDB_FILE_OPENED(pDFile)); - int64_t loffset = taosLSeek(TSDB_FILE_FD(pDFile), offset, whence); + int64_t loffset = taosLSeekFile(TSDB_FILE_FD(pDFile), offset, whence); if (loffset < 0) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -222,7 +226,7 @@ static FORCE_INLINE int64_t tsdbSeekDFile(SDFile* pDFile, int64_t offset, int wh static FORCE_INLINE int64_t tsdbWriteDFile(SDFile* pDFile, void* buf, int64_t nbyte) { ASSERT(TSDB_FILE_OPENED(pDFile)); - int64_t nwrite = taosWrite(pDFile->fd, buf, nbyte); + int64_t nwrite = taosWriteFile(pDFile->fd, buf, nbyte); if (nwrite < nbyte) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -264,7 +268,7 @@ static FORCE_INLINE int tsdbRemoveDFile(SDFile* pDFile) { return tfsremove(TSDB_ static FORCE_INLINE int64_t tsdbReadDFile(SDFile* pDFile, void* buf, int64_t nbyte) { ASSERT(TSDB_FILE_OPENED(pDFile)); - int64_t nread = taosRead(pDFile->fd, buf, nbyte); + int64_t nread = taosReadFile(pDFile->fd, buf, nbyte); if (nread < 0) { terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -316,7 +320,7 @@ void* tsdbDecodeDFileSetEx(void* buf, SDFileSet* pSet); int tsdbApplyDFileSetChange(SDFileSet* from, SDFileSet* to); int tsdbCreateDFileSet(SDFileSet* pSet, bool updateHeader); int tsdbUpdateDFileSetHeader(SDFileSet* pSet); -int tsdbScanAndTryFixDFileSet(STsdbRepo *pRepo, SDFileSet* pSet); +int tsdbScanAndTryFixDFileSet(STsdb* pRepo, SDFileSet* pSet); static FORCE_INLINE void tsdbCloseDFileSet(SDFileSet* pSet) { for (TSDB_FILE_T ftype = 0; ftype < TSDB_FILE_MAX; ftype++) { @@ -366,5 +370,4 @@ static FORCE_INLINE bool tsdbFSetIsOk(SDFileSet* pSet) { return true; } -#endif #endif /* _TS_TSDB_FILE_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/inc/tsdbLog.h b/source/dnode/vnode/tsdb/inc/tsdbLog.h new file mode 100644 index 0000000000..bde9b338a2 --- /dev/null +++ b/source/dnode/vnode/tsdb/inc/tsdbLog.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_LOG_H_ +#define _TD_TSDB_LOG_H_ + +#include "tlog.h" + +extern int32_t tsdbDebugFlag; + +#define tsdbFatal(...) do { if (tsdbDebugFlag & DEBUG_FATAL) { taosPrintLog("TDB FATAL ", 255, __VA_ARGS__); }} while(0) +#define tsdbError(...) do { if (tsdbDebugFlag & DEBUG_ERROR) { taosPrintLog("TDB ERROR ", 255, __VA_ARGS__); }} while(0) +#define tsdbWarn(...) do { if (tsdbDebugFlag & DEBUG_WARN) { taosPrintLog("TDB WARN ", 255, __VA_ARGS__); }} while(0) +#define tsdbInfo(...) do { if (tsdbDebugFlag & DEBUG_INFO) { taosPrintLog("TDB ", 255, __VA_ARGS__); }} while(0) +#define tsdbDebug(...) do { if (tsdbDebugFlag & DEBUG_DEBUG) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0) +#define tsdbTrace(...) do { if (tsdbDebugFlag & DEBUG_TRACE) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0) + +#endif /* _TD_TSDB_LOG_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/inc/tsdbMemTable.h b/source/dnode/vnode/tsdb/inc/tsdbMemTable.h index e7787af7cf..0bb9fb75f8 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbMemTable.h +++ b/source/dnode/vnode/tsdb/inc/tsdbMemTable.h @@ -24,9 +24,9 @@ extern "C" { typedef struct STsdbMemTable STsdbMemTable; -STsdbMemTable *tsdbNewMemTable(SMemAllocatorFactory *pMAF); -void tsdbFreeMemTable(SMemAllocatorFactory *pMAF, STsdbMemTable *pMemTable); -int tsdbInsertDataToMemTable(STsdbMemTable *pMemTable, SSubmitMsg *pMsg); +STsdbMemTable *tsdbNewMemTable(STsdb *pTsdb); +void tsdbFreeMemTable(STsdb *pTsdb, STsdbMemTable *pMemTable); +int tsdbMemTableInsert(STsdb *pTsdb, STsdbMemTable *pMemTable, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp); #ifdef __cplusplus } diff --git a/source/dnode/vnode/tsdb/inc/tsdbMemory.h b/source/dnode/vnode/tsdb/inc/tsdbMemory.h new file mode 100644 index 0000000000..1fc4cd9e52 --- /dev/null +++ b/source/dnode/vnode/tsdb/inc/tsdbMemory.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_MEMORY_H_ +#define _TD_TSDB_MEMORY_H_ + +static void * taosTMalloc(size_t size); +static void * taosTCalloc(size_t nmemb, size_t size); +static void * taosTRealloc(void *ptr, size_t size); +static void * taosTZfree(void *ptr); +static size_t taosTSizeof(void *ptr); +static void taosTMemset(void *ptr, int c); + +static FORCE_INLINE void *taosTMalloc(size_t size) { + if (size <= 0) return NULL; + + void *ret = malloc(size + sizeof(size_t)); + if (ret == NULL) return NULL; + + *(size_t *)ret = size; + + return (void *)((char *)ret + sizeof(size_t)); +} + +static FORCE_INLINE void *taosTCalloc(size_t nmemb, size_t size) { + size_t tsize = nmemb * size; + void * ret = taosTMalloc(tsize); + if (ret == NULL) return NULL; + + taosTMemset(ret, 0); + return ret; +} + +static FORCE_INLINE size_t taosTSizeof(void *ptr) { return (ptr) ? (*(size_t *)((char *)ptr - sizeof(size_t))) : 0; } + +static FORCE_INLINE void taosTMemset(void *ptr, int c) { memset(ptr, c, taosTSizeof(ptr)); } + +static FORCE_INLINE void * taosTRealloc(void *ptr, size_t size) { + if (ptr == NULL) return taosTMalloc(size); + + if (size <= taosTSizeof(ptr)) return ptr; + + void * tptr = (void *)((char *)ptr - sizeof(size_t)); + size_t tsize = size + sizeof(size_t); + void* tptr1 = realloc(tptr, tsize); + if (tptr1 == NULL) return NULL; + tptr = tptr1; + + *(size_t *)tptr = size; + + return (void *)((char *)tptr + sizeof(size_t)); +} + +static FORCE_INLINE void* taosTZfree(void* ptr) { + if (ptr) { + free((void*)((char*)ptr - sizeof(size_t))); + } + return NULL; +} + + +#endif /* _TD_TSDB_MEMORY_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/inc/tsdbReadImpl.h b/source/dnode/vnode/tsdb/inc/tsdbReadImpl.h index a9bd76c2b1..3fb235e7dd 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbReadImpl.h +++ b/source/dnode/vnode/tsdb/inc/tsdbReadImpl.h @@ -15,14 +15,14 @@ #ifndef _TD_TSDB_READ_IMPL_H_ #define _TD_TSDB_READ_IMPL_H_ -#if 0 +#include "os.h" #include "tfs.h" #include "tsdb.h" -#include "os.h" #include "tsdbFile.h" #include "tskiplist.h" -#include "tsdbMeta.h" +#include "tsdbMemory.h" +#include "common.h" typedef struct SReadH SReadH; @@ -91,7 +91,7 @@ typedef struct { } SBlockData; struct SReadH { - STsdbRepo * pRepo; + STsdb * pRepo; SDFileSet rSet; // FSET to read SArray * aBlkIdx; // SBlockIdx array STable * pTable; // table to read @@ -116,7 +116,7 @@ struct SReadH { #define TSDB_BLOCK_STATIS_SIZE(ncols) (sizeof(SBlockData) + sizeof(SBlockCol) * (ncols) + sizeof(TSCKSUM)) -int tsdbInitReadH(SReadH *pReadh, STsdbRepo *pRepo); +int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo); void tsdbDestroyReadH(SReadH *pReadh); int tsdbSetAndOpenReadFSet(SReadH *pReadh, SDFileSet *pSet); void tsdbCloseAndUnsetFSet(SReadH *pReadh); @@ -151,6 +151,4 @@ static FORCE_INLINE int tsdbMakeRoom(void **ppBuf, size_t size) { return 0; } -#endif - #endif /*_TD_TSDB_READ_IMPL_H_*/ diff --git a/source/dnode/vnode/tsdb/src/tsdbCommit.c b/source/dnode/vnode/tsdb/src/tsdbCommit.c index 1247dcd728..0080e14181 100644 --- a/source/dnode/vnode/tsdb/src/tsdbCommit.c +++ b/source/dnode/vnode/tsdb/src/tsdbCommit.c @@ -32,6 +32,23 @@ int tsdbCommit(STsdb *pTsdb) { return 0; } +void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn) { + STsdbCfg *pCfg = REPO_CFG(pRepo); + TSKEY minKey, midKey, maxKey, now; + + now = taosGetTimestamp(pCfg->precision); + minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision]; + midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision]; + maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision]; + + pRtn->minKey = minKey; + pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision)); + pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision)); + pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision)); + tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey, + pRtn->minFid, pRtn->midFid, pRtn->maxFid); +} + #if 0 /* * Copyright (c) 2019 TAOS Data, Inc. @@ -420,23 +437,6 @@ void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) { return buf; } -void tsdbGetRtnSnap(STsdbRepo *pRepo, SRtn *pRtn) { - STsdbCfg *pCfg = REPO_CFG(pRepo); - TSKEY minKey, midKey, maxKey, now; - - now = taosGetTimestamp(pCfg->precision); - minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision]; - midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision]; - maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision]; - - pRtn->minKey = minKey; - pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision)); - pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision)); - pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision)); - tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey, - pRtn->minFid, pRtn->midFid, pRtn->maxFid); -} - static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) { char buf[64] = "\0"; void * pBuf = buf; diff --git a/source/dnode/vnode/tsdb/src/tsdbFS.c b/source/dnode/vnode/tsdb/src/tsdbFS.c index a40e67ca59..eefc21a109 100644 --- a/source/dnode/vnode/tsdb/src/tsdbFS.c +++ b/source/dnode/vnode/tsdb/src/tsdbFS.c @@ -13,9 +13,9 @@ * along with this program. If not, see . */ -#include "os.h" -#include "tsdbint.h" #include +#include "os.h" +#include "tsdbDef.h" typedef enum { TSDB_TXN_TEMP_FILE = 0, TSDB_TXN_CURR_FILE } TSDB_TXN_FILE_T; static const char *tsdbTxnFname[] = {"current.t", "current"}; @@ -26,16 +26,24 @@ static void tsdbResetFSStatus(SFSStatus *pStatus); static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid); static void tsdbApplyFSTxnOnDisk(SFSStatus *pFrom, SFSStatus *pTo); static void tsdbGetTxnFname(int repoid, TSDB_TXN_FILE_T ftype, char fname[]); -static int tsdbOpenFSFromCurrent(STsdbRepo *pRepo); -static int tsdbScanAndTryFixFS(STsdbRepo *pRepo); -static int tsdbScanRootDir(STsdbRepo *pRepo); -static int tsdbScanDataDir(STsdbRepo *pRepo); +static int tsdbOpenFSFromCurrent(STsdb *pRepo); +static int tsdbScanAndTryFixFS(STsdb *pRepo); +static int tsdbScanRootDir(STsdb *pRepo); +static int tsdbScanDataDir(STsdb *pRepo); static bool tsdbIsTFileInFS(STsdbFS *pfs, const TFILE *pf); -static int tsdbRestoreCurrent(STsdbRepo *pRepo); +static int tsdbRestoreCurrent(STsdb *pRepo); static int tsdbComparTFILE(const void *arg1, const void *arg2); -static void tsdbScanAndTryFixDFilesHeader(STsdbRepo *pRepo, int32_t *nExpired); -static int tsdbProcessExpiredFS(STsdbRepo *pRepo); -static int tsdbCreateMeta(STsdbRepo *pRepo); +static void tsdbScanAndTryFixDFilesHeader(STsdb *pRepo, int32_t *nExpired); +// static int tsdbProcessExpiredFS(STsdb *pRepo); +// static int tsdbCreateMeta(STsdb *pRepo); + +static void tsdbGetRootDir(int repoid, char dirName[]) { + snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb", repoid); +} + +static void tsdbGetDataDir(int repoid, char dirName[]) { + snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data", repoid); +} // For backward compatibility // ================== CURRENT file header info @@ -104,11 +112,11 @@ static void *tsdbDecodeDFileSetArray(void *buf, SArray *pArray) { } static int tsdbEncodeFSStatus(void **buf, SFSStatus *pStatus) { - ASSERT(pStatus->pmf); + // ASSERT(pStatus->pmf); int tlen = 0; - tlen += tsdbEncodeSMFile(buf, pStatus->pmf); + // tlen += tsdbEncodeSMFile(buf, pStatus->pmf); tlen += tsdbEncodeDFileSetArray(buf, pStatus->df); return tlen; @@ -117,9 +125,9 @@ static int tsdbEncodeFSStatus(void **buf, SFSStatus *pStatus) { static void *tsdbDecodeFSStatus(void *buf, SFSStatus *pStatus) { tsdbResetFSStatus(pStatus); - pStatus->pmf = &(pStatus->mf); + // pStatus->pmf = &(pStatus->mf); - buf = tsdbDecodeSMFile(buf, pStatus->pmf); + // buf = tsdbDecodeSMFile(buf, pStatus->pmf); buf = tsdbDecodeDFileSetArray(buf, pStatus->df); return buf; @@ -132,7 +140,7 @@ static SFSStatus *tsdbNewFSStatus(int maxFSet) { return NULL; } - TSDB_FILE_SET_CLOSED(&(pStatus->mf)); + // TSDB_FILE_SET_CLOSED(&(pStatus->mf)); pStatus->df = taosArrayInit(maxFSet, sizeof(SDFileSet)); if (pStatus->df == NULL) { @@ -158,18 +166,18 @@ static void tsdbResetFSStatus(SFSStatus *pStatus) { return; } - TSDB_FILE_SET_CLOSED(&(pStatus->mf)); + // TSDB_FILE_SET_CLOSED(&(pStatus->mf)); - pStatus->pmf = NULL; + // pStatus->pmf = NULL; taosArrayClear(pStatus->df); } -static void tsdbSetStatusMFile(SFSStatus *pStatus, const SMFile *pMFile) { - ASSERT(pStatus->pmf == NULL); +// static void tsdbSetStatusMFile(SFSStatus *pStatus, const SMFile *pMFile) { +// ASSERT(pStatus->pmf == NULL); - pStatus->pmf = &(pStatus->mf); - tsdbInitMFileEx(pStatus->pmf, (SMFile *)pMFile); -} +// pStatus->pmf = &(pStatus->mf); +// tsdbInitMFileEx(pStatus->pmf, (SMFile *)pMFile); +// } static int tsdbAddDFileSetToStatus(SFSStatus *pStatus, const SDFileSet *pSet) { if (taosArrayPush(pStatus->df, (void *)pSet) == NULL) { @@ -240,63 +248,63 @@ void *tsdbFreeFS(STsdbFS *pfs) { return NULL; } -static int tsdbProcessExpiredFS(STsdbRepo *pRepo) { - tsdbStartFSTxn(pRepo, 0, 0); - if (tsdbCreateMeta(pRepo) < 0) { - tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } +// static int tsdbProcessExpiredFS(STsdb *pRepo) { +// tsdbStartFSTxn(pRepo, 0, 0); +// // if (tsdbCreateMeta(pRepo) < 0) { +// // tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno)); +// // return -1; +// // } - if (tsdbApplyRtn(pRepo) < 0) { - tsdbEndFSTxnWithError(REPO_FS(pRepo)); - tsdbError("vgId:%d failed to apply rtn since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } - if (tsdbEndFSTxn(pRepo) < 0) { - tsdbError("vgId:%d failed to end fs txn since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } - return 0; -} +// if (tsdbApplyRtn(pRepo) < 0) { +// tsdbEndFSTxnWithError(REPO_FS(pRepo)); +// tsdbError("vgId:%d failed to apply rtn since %s", REPO_ID(pRepo), tstrerror(terrno)); +// return -1; +// } +// if (tsdbEndFSTxn(pRepo) < 0) { +// tsdbError("vgId:%d failed to end fs txn since %s", REPO_ID(pRepo), tstrerror(terrno)); +// return -1; +// } +// return 0; +// } -static int tsdbCreateMeta(STsdbRepo *pRepo) { - STsdbFS *pfs = REPO_FS(pRepo); - SMFile * pOMFile = pfs->cstatus->pmf; - SMFile mf; - SDiskID did; +// static int tsdbCreateMeta(STsdb *pRepo) { +// STsdbFS *pfs = REPO_FS(pRepo); +// SMFile * pOMFile = pfs->cstatus->pmf; +// SMFile mf; +// SDiskID did; - if (pOMFile != NULL) { - // keep the old meta file - tsdbUpdateMFile(pfs, pOMFile); - return 0; - } +// if (pOMFile != NULL) { +// // keep the old meta file +// tsdbUpdateMFile(pfs, pOMFile); +// return 0; +// } - // Create a new meta file - did.level = TFS_PRIMARY_LEVEL; - did.id = TFS_PRIMARY_ID; - tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo))); +// // Create a new meta file +// did.level = TFS_PRIMARY_LEVEL; +// did.id = TFS_PRIMARY_ID; +// tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo))); - if (tsdbCreateMFile(&mf, true) < 0) { - tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } +// if (tsdbCreateMFile(&mf, true) < 0) { +// tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno)); +// return -1; +// } - tsdbInfo("vgId:%d meta file %s is created", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf)); +// tsdbInfo("vgId:%d meta file %s is created", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf)); - if (tsdbUpdateMFileHeader(&mf) < 0) { - tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno)); - tsdbApplyMFileChange(&mf, pOMFile); - return -1; - } +// if (tsdbUpdateMFileHeader(&mf) < 0) { +// tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno)); +// tsdbApplyMFileChange(&mf, pOMFile); +// return -1; +// } - TSDB_FILE_FSYNC(&mf); - tsdbCloseMFile(&mf); - tsdbUpdateMFile(pfs, &mf); +// TSDB_FILE_FSYNC(&mf); +// tsdbCloseMFile(&mf); +// tsdbUpdateMFile(pfs, &mf); - return 0; -} +// return 0; +// } -int tsdbOpenFS(STsdbRepo *pRepo) { +int tsdbOpenFS(STsdb *pRepo) { STsdbFS *pfs = REPO_FS(pRepo); char current[TSDB_FILENAME_LEN] = "\0"; int nExpired = 0; @@ -313,9 +321,9 @@ int tsdbOpenFS(STsdbRepo *pRepo) { } tsdbScanAndTryFixDFilesHeader(pRepo, &nExpired); - if (nExpired > 0) { - tsdbProcessExpiredFS(pRepo); - } + // if (nExpired > 0) { + // tsdbProcessExpiredFS(pRepo); + // } } else { // should skip expired fileset inside of the function if (tsdbRestoreCurrent(pRepo) < 0) { @@ -329,39 +337,39 @@ int tsdbOpenFS(STsdbRepo *pRepo) { return -1; } - // Load meta cache if has meta file - if ((!(pRepo->state & TSDB_STATE_BAD_META)) && tsdbLoadMetaCache(pRepo, true) < 0) { - tsdbError("vgId:%d failed to open FS while loading meta cache since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } + // // Load meta cache if has meta file + // if ((!(pRepo->state & TSDB_STATE_BAD_META)) && tsdbLoadMetaCache(pRepo, true) < 0) { + // tsdbError("vgId:%d failed to open FS while loading meta cache since %s", REPO_ID(pRepo), tstrerror(terrno)); + // return -1; + // } return 0; } -void tsdbCloseFS(STsdbRepo *pRepo) { +void tsdbCloseFS(STsdb *pRepo) { // Do nothing } // Start a new transaction to modify the file system -void tsdbStartFSTxn(STsdbRepo *pRepo, int64_t pointsAdd, int64_t storageAdd) { +void tsdbStartFSTxn(STsdb *pRepo, int64_t pointsAdd, int64_t storageAdd) { STsdbFS *pfs = REPO_FS(pRepo); ASSERT(pfs->intxn == false); pfs->intxn = true; tsdbResetFSStatus(pfs->nstatus); pfs->nstatus->meta = pfs->cstatus->meta; - if (pfs->cstatus->pmf == NULL) { - pfs->nstatus->meta.version = 0; - } else { - pfs->nstatus->meta.version = pfs->cstatus->meta.version + 1; - } + // if (pfs->cstatus->pmf == NULL) { + pfs->nstatus->meta.version = 0; + // } else { + // pfs->nstatus->meta.version = pfs->cstatus->meta.version + 1; + // } pfs->nstatus->meta.totalPoints = pfs->cstatus->meta.totalPoints + pointsAdd; pfs->nstatus->meta.totalStorage = pfs->cstatus->meta.totalStorage += storageAdd; } void tsdbUpdateFSTxnMeta(STsdbFS *pfs, STsdbFSMeta *pMeta) { pfs->nstatus->meta = *pMeta; } -int tsdbEndFSTxn(STsdbRepo *pRepo) { +int tsdbEndFSTxn(STsdb *pRepo) { STsdbFS *pfs = REPO_FS(pRepo); ASSERT(FS_IN_TXN(pfs)); SFSStatus *pStatus; @@ -372,7 +380,7 @@ int tsdbEndFSTxn(STsdbRepo *pRepo) { return -1; } - // Make new + // Make new tsdbWLockFS(pfs); pStatus = pfs->cstatus; pfs->cstatus = pfs->nstatus; @@ -393,7 +401,7 @@ int tsdbEndFSTxnWithError(STsdbFS *pfs) { return 0; } -void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile) { tsdbSetStatusMFile(pfs->nstatus, pMFile); } +// void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile) { tsdbSetStatusMFile(pfs->nstatus, pMFile); } int tsdbUpdateDFileSet(STsdbFS *pfs, const SDFileSet *pSet) { return tsdbAddDFileSetToStatus(pfs->nstatus, pSet); } @@ -415,8 +423,7 @@ static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) { } fsheader.version = TSDB_FS_VERSION; - if (pStatus->pmf == NULL) { - ASSERT(taosArrayGetSize(pStatus->df) == 0); + if (taosArrayGetSize(pStatus->df) == 0) { fsheader.len = 0; } else { fsheader.len = tsdbEncodeFSStatus(NULL, pStatus) + sizeof(TSCKSUM); @@ -429,7 +436,7 @@ static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) { taosCalcChecksumAppend(0, (uint8_t *)hbuf, TSDB_FILE_HEAD_SIZE); - if (taosWrite(fd, hbuf, TSDB_FILE_HEAD_SIZE) < TSDB_FILE_HEAD_SIZE) { + if (taosWriteFile(fd, hbuf, TSDB_FILE_HEAD_SIZE) < TSDB_FILE_HEAD_SIZE) { terrno = TAOS_SYSTEM_ERROR(errno); close(fd); remove(tfname); @@ -448,7 +455,7 @@ static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) { tsdbEncodeFSStatus(&ptr, pStatus); taosCalcChecksumAppend(0, (uint8_t *)pBuf, fsheader.len); - if (taosWrite(fd, pBuf, fsheader.len) < fsheader.len) { + if (taosWriteFile(fd, pBuf, fsheader.len) < fsheader.len) { terrno = TAOS_SYSTEM_ERROR(errno); close(fd); (void)remove(tfname); @@ -458,7 +465,7 @@ static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) { } // fsync, close and rename - if (taosFsync(fd) < 0) { + if (taosFsyncFile(fd) < 0) { terrno = TAOS_SYSTEM_ERROR(errno); close(fd); remove(tfname); @@ -467,7 +474,7 @@ static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) { } (void)close(fd); - (void)taosRename(tfname, cfname); + (void)taosRenameFile(tfname, cfname); taosTZfree(pBuf); return 0; @@ -484,7 +491,7 @@ static void tsdbApplyFSTxnOnDisk(SFSStatus *pFrom, SFSStatus *pTo) { sizeTo = taosArrayGetSize(pTo->df); // Apply meta file change - (void)tsdbApplyMFileChange(pFrom->pmf, pTo->pmf); + // (void)tsdbApplyMFileChange(pFrom->pmf, pTo->pmf); // Apply SDFileSet change if (ifrom >= sizeFrom) { @@ -642,7 +649,7 @@ static void tsdbGetTxnFname(int repoid, TSDB_TXN_FILE_T ftype, char fname[]) { snprintf(fname, TSDB_FILENAME_LEN, "%s/vnode/vnode%d/tsdb/%s", TFS_PRIMARY_PATH(), repoid, tsdbTxnFname[ftype]); } -static int tsdbOpenFSFromCurrent(STsdbRepo *pRepo) { +static int tsdbOpenFSFromCurrent(STsdb *pRepo) { STsdbFS * pfs = REPO_FS(pRepo); int fd = -1; void * buffer = NULL; @@ -664,7 +671,7 @@ static int tsdbOpenFSFromCurrent(STsdbRepo *pRepo) { goto _err; } - int nread = (int)taosRead(fd, buffer, TSDB_FILE_HEAD_SIZE); + int nread = (int)taosReadFile(fd, buffer, TSDB_FILE_HEAD_SIZE); if (nread < 0) { tsdbError("vgId:%d failed to read %d bytes from file %s since %s", REPO_ID(pRepo), TSDB_FILENAME_LEN, current, strerror(errno)); @@ -698,7 +705,7 @@ static int tsdbOpenFSFromCurrent(STsdbRepo *pRepo) { goto _err; } - nread = (int)taosRead(fd, buffer, fsheader.len); + nread = (int)taosReadFile(fd, buffer, fsheader.len); if (nread < 0) { tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), current, strerror(errno)); terrno = TAOS_SYSTEM_ERROR(errno); @@ -737,14 +744,14 @@ _err: } // Scan and try to fix incorrect files -static int tsdbScanAndTryFixFS(STsdbRepo *pRepo) { +static int tsdbScanAndTryFixFS(STsdb *pRepo) { STsdbFS * pfs = REPO_FS(pRepo); SFSStatus *pStatus = pfs->cstatus; - if (tsdbScanAndTryFixMFile(pRepo) < 0) { - tsdbError("vgId:%d failed to fix MFile since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } + // if (tsdbScanAndTryFixMFile(pRepo) < 0) { + // tsdbError("vgId:%d failed to fix MFile since %s", REPO_ID(pRepo), tstrerror(terrno)); + // return -1; + // } size_t size = taosArrayGetSize(pStatus->df); @@ -763,143 +770,143 @@ static int tsdbScanAndTryFixFS(STsdbRepo *pRepo) { return 0; } -int tsdbLoadMetaCache(STsdbRepo *pRepo, bool recoverMeta) { - char tbuf[128]; - STsdbFS * pfs = REPO_FS(pRepo); - SMFile mf; - SMFile * pMFile = &mf; - void * pBuf = NULL; - SKVRecord rInfo; - int64_t maxBufSize = 0; - SMFInfo minfo; +// int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta) { +// char tbuf[128]; +// STsdbFS * pfs = REPO_FS(pRepo); +// SMFile mf; +// SMFile * pMFile = &mf; +// void * pBuf = NULL; +// SKVRecord rInfo; +// int64_t maxBufSize = 0; +// SMFInfo minfo; - taosHashClear(pfs->metaCache); +// taosHashClear(pfs->metaCache); - // No meta file, just return - if (pfs->cstatus->pmf == NULL) return 0; +// // No meta file, just return +// if (pfs->cstatus->pmf == NULL) return 0; - mf = pfs->cstatus->mf; - // Load cache first - if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) { - return -1; - } +// mf = pfs->cstatus->mf; +// // Load cache first +// if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) { +// return -1; +// } - if (tsdbLoadMFileHeader(pMFile, &minfo) < 0) { - tsdbCloseMFile(pMFile); - return -1; - } +// if (tsdbLoadMFileHeader(pMFile, &minfo) < 0) { +// tsdbCloseMFile(pMFile); +// return -1; +// } - while (true) { - int64_t tsize = tsdbReadMFile(pMFile, tbuf, sizeof(SKVRecord)); - if (tsize == 0) break; +// while (true) { +// int64_t tsize = tsdbReadMFile(pMFile, tbuf, sizeof(SKVRecord)); +// if (tsize == 0) break; - if (tsize < 0) { - tsdbError("vgId:%d failed to read META file since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } +// if (tsize < 0) { +// tsdbError("vgId:%d failed to read META file since %s", REPO_ID(pRepo), tstrerror(terrno)); +// return -1; +// } - if (tsize < sizeof(SKVRecord)) { - tsdbError("vgId:%d failed to read %" PRIzu " bytes from file %s", REPO_ID(pRepo), sizeof(SKVRecord), - TSDB_FILE_FULL_NAME(pMFile)); - terrno = TSDB_CODE_TDB_FILE_CORRUPTED; - tsdbCloseMFile(pMFile); - return -1; - } +// if (tsize < sizeof(SKVRecord)) { +// tsdbError("vgId:%d failed to read %" PRIzu " bytes from file %s", REPO_ID(pRepo), sizeof(SKVRecord), +// TSDB_FILE_FULL_NAME(pMFile)); +// terrno = TSDB_CODE_TDB_FILE_CORRUPTED; +// tsdbCloseMFile(pMFile); +// return -1; +// } - void *ptr = tsdbDecodeKVRecord(tbuf, &rInfo); - ASSERT(POINTER_DISTANCE(ptr, tbuf) == sizeof(SKVRecord)); - // ASSERT((rInfo.offset > 0) ? (pStore->info.size == rInfo.offset) : true); +// void *ptr = tsdbDecodeKVRecord(tbuf, &rInfo); +// ASSERT(POINTER_DISTANCE(ptr, tbuf) == sizeof(SKVRecord)); +// // ASSERT((rInfo.offset > 0) ? (pStore->info.size == rInfo.offset) : true); - if (rInfo.offset < 0) { - taosHashRemove(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid)); -#if 0 - pStore->info.size += sizeof(SKVRecord); - pStore->info.nRecords--; - pStore->info.nDels++; - pStore->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2); -#endif - } else { - ASSERT(rInfo.offset > 0 && rInfo.size > 0); - if (taosHashPut(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid), &rInfo, sizeof(rInfo)) < 0) { - tsdbError("vgId:%d failed to load meta cache from file %s since OOM", REPO_ID(pRepo), - TSDB_FILE_FULL_NAME(pMFile)); - terrno = TSDB_CODE_COM_OUT_OF_MEMORY; - tsdbCloseMFile(pMFile); - return -1; - } +// if (rInfo.offset < 0) { +// taosHashRemove(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid)); +// #if 0 +// pStore->info.size += sizeof(SKVRecord); +// pStore->info.nRecords--; +// pStore->info.nDels++; +// pStore->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2); +// #endif +// } else { +// ASSERT(rInfo.offset > 0 && rInfo.size > 0); +// if (taosHashPut(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid), &rInfo, sizeof(rInfo)) < 0) { +// tsdbError("vgId:%d failed to load meta cache from file %s since OOM", REPO_ID(pRepo), +// TSDB_FILE_FULL_NAME(pMFile)); +// terrno = TSDB_CODE_COM_OUT_OF_MEMORY; +// tsdbCloseMFile(pMFile); +// return -1; +// } - maxBufSize = MAX(maxBufSize, rInfo.size); +// maxBufSize = MAX(maxBufSize, rInfo.size); - if (tsdbSeekMFile(pMFile, rInfo.size, SEEK_CUR) < 0) { - tsdbError("vgId:%d failed to lseek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), - tstrerror(terrno)); - tsdbCloseMFile(pMFile); - return -1; - } +// if (tsdbSeekMFile(pMFile, rInfo.size, SEEK_CUR) < 0) { +// tsdbError("vgId:%d failed to lseek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), +// tstrerror(terrno)); +// tsdbCloseMFile(pMFile); +// return -1; +// } -#if 0 - pStore->info.size += (sizeof(SKVRecord) + rInfo.size); - pStore->info.nRecords++; -#endif - } - } +// #if 0 +// pStore->info.size += (sizeof(SKVRecord) + rInfo.size); +// pStore->info.nRecords++; +// #endif +// } +// } - if (recoverMeta) { - pBuf = malloc((size_t)maxBufSize); - if (pBuf == NULL) { - terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; - tsdbCloseMFile(pMFile); - return -1; - } +// if (recoverMeta) { +// pBuf = malloc((size_t)maxBufSize); +// if (pBuf == NULL) { +// terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; +// tsdbCloseMFile(pMFile); +// return -1; +// } - SKVRecord *pRecord = taosHashIterate(pfs->metaCache, NULL); - while (pRecord) { - if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) { - tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), - tstrerror(terrno)); - tfree(pBuf); - tsdbCloseMFile(pMFile); - return -1; - } +// SKVRecord *pRecord = taosHashIterate(pfs->metaCache, NULL); +// while (pRecord) { +// if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) { +// tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), +// tstrerror(terrno)); +// tfree(pBuf); +// tsdbCloseMFile(pMFile); +// return -1; +// } - int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size); - if (nread < 0) { - tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), - tstrerror(terrno)); - tfree(pBuf); - tsdbCloseMFile(pMFile); - return -1; - } +// int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size); +// if (nread < 0) { +// tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), +// tstrerror(terrno)); +// tfree(pBuf); +// tsdbCloseMFile(pMFile); +// return -1; +// } - if (nread < pRecord->size) { - tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d", - REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread); - terrno = TSDB_CODE_TDB_FILE_CORRUPTED; - tfree(pBuf); - tsdbCloseMFile(pMFile); - return -1; - } +// if (nread < pRecord->size) { +// tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d", +// REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread); +// terrno = TSDB_CODE_TDB_FILE_CORRUPTED; +// tfree(pBuf); +// tsdbCloseMFile(pMFile); +// return -1; +// } - if (tsdbRestoreTable(pRepo, pBuf, (int)pRecord->size) < 0) { - tsdbError("vgId:%d failed to restore table, uid %" PRId64 ", since %s" PRIu64, REPO_ID(pRepo), pRecord->uid, - tstrerror(terrno)); - tfree(pBuf); - tsdbCloseMFile(pMFile); - return -1; - } +// if (tsdbRestoreTable(pRepo, pBuf, (int)pRecord->size) < 0) { +// tsdbError("vgId:%d failed to restore table, uid %" PRId64 ", since %s" PRIu64, REPO_ID(pRepo), pRecord->uid, +// tstrerror(terrno)); +// tfree(pBuf); +// tsdbCloseMFile(pMFile); +// return -1; +// } - pRecord = taosHashIterate(pfs->metaCache, pRecord); - } +// pRecord = taosHashIterate(pfs->metaCache, pRecord); +// } - tsdbOrgMeta(pRepo); - } +// tsdbOrgMeta(pRepo); +// } - tsdbCloseMFile(pMFile); - tfree(pBuf); - return 0; -} +// tsdbCloseMFile(pMFile); +// tfree(pBuf); +// return 0; +// } -static int tsdbScanRootDir(STsdbRepo *pRepo) { +static int tsdbScanRootDir(STsdb *pRepo) { char rootDir[TSDB_FILENAME_LEN]; char bname[TSDB_FILENAME_LEN]; STsdbFS * pfs = REPO_FS(pRepo); @@ -920,9 +927,9 @@ static int tsdbScanRootDir(STsdbRepo *pRepo) { continue; } - if (pfs->cstatus->pmf && tfsIsSameFile(pf, &(pfs->cstatus->pmf->f))) { - continue; - } + // if (/*pfs->cstatus->pmf && */ tfsIsSameFile(pf, &(pfs->cstatus->pmf->f))) { + // continue; + // } (void)tfsremove(pf); tsdbDebug("vgId:%d invalid file %s is removed", REPO_ID(pRepo), TFILE_NAME(pf)); @@ -933,7 +940,7 @@ static int tsdbScanRootDir(STsdbRepo *pRepo) { return 0; } -static int tsdbScanDataDir(STsdbRepo *pRepo) { +static int tsdbScanDataDir(STsdb *pRepo) { char dataDir[TSDB_FILENAME_LEN]; char bname[TSDB_FILENAME_LEN]; STsdbFS * pfs = REPO_FS(pRepo); @@ -977,128 +984,128 @@ static bool tsdbIsTFileInFS(STsdbFS *pfs, const TFILE *pf) { return false; } -static int tsdbRestoreMeta(STsdbRepo *pRepo) { - char rootDir[TSDB_FILENAME_LEN]; - char bname[TSDB_FILENAME_LEN]; - TDIR * tdir = NULL; - const TFILE *pf = NULL; - const char * pattern = "^meta(-ver[0-9]+)?$"; - regex_t regex; - STsdbFS * pfs = REPO_FS(pRepo); +// static int tsdbRestoreMeta(STsdb *pRepo) { +// char rootDir[TSDB_FILENAME_LEN]; +// char bname[TSDB_FILENAME_LEN]; +// TDIR * tdir = NULL; +// const TFILE *pf = NULL; +// const char * pattern = "^meta(-ver[0-9]+)?$"; +// regex_t regex; +// STsdbFS * pfs = REPO_FS(pRepo); - regcomp(®ex, pattern, REG_EXTENDED); +// regcomp(®ex, pattern, REG_EXTENDED); - tsdbInfo("vgId:%d try to restore meta", REPO_ID(pRepo)); +// tsdbInfo("vgId:%d try to restore meta", REPO_ID(pRepo)); - tsdbGetRootDir(REPO_ID(pRepo), rootDir); +// tsdbGetRootDir(REPO_ID(pRepo), rootDir); - tdir = tfsOpendir(rootDir); - if (tdir == NULL) { - tsdbError("vgId:%d failed to open dir %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno)); - regfree(®ex); - return -1; - } +// tdir = tfsOpendir(rootDir); +// if (tdir == NULL) { +// tsdbError("vgId:%d failed to open dir %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno)); +// regfree(®ex); +// return -1; +// } - while ((pf = tfsReaddir(tdir))) { - tfsbasename(pf, bname); +// while ((pf = tfsReaddir(tdir))) { +// tfsbasename(pf, bname); - if (strcmp(bname, "data") == 0) { - // Skip the data/ directory - continue; - } +// if (strcmp(bname, "data") == 0) { +// // Skip the data/ directory +// continue; +// } - if (strcmp(bname, tsdbTxnFname[TSDB_TXN_TEMP_FILE]) == 0) { - // Skip current.t file - tsdbInfo("vgId:%d file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); - (void)tfsremove(pf); - continue; - } +// if (strcmp(bname, tsdbTxnFname[TSDB_TXN_TEMP_FILE]) == 0) { +// // Skip current.t file +// tsdbInfo("vgId:%d file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); +// (void)tfsremove(pf); +// continue; +// } - int code = regexec(®ex, bname, 0, NULL, 0); - if (code == 0) { - // Match - if (pfs->cstatus->pmf != NULL) { - tsdbError("vgId:%d failed to restore meta since two file exists, file1 %s and file2 %s", REPO_ID(pRepo), - TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), TFILE_NAME(pf)); - terrno = TSDB_CODE_TDB_FILE_CORRUPTED; - tfsClosedir(tdir); - regfree(®ex); - return -1; - } else { - uint32_t _version = 0; - if (strcmp(bname, "meta") != 0) { - sscanf(bname, "meta-ver%" PRIu32, &_version); - pfs->cstatus->meta.version = _version; - } +// int code = regexec(®ex, bname, 0, NULL, 0); +// if (code == 0) { +// // Match +// if (pfs->cstatus->pmf != NULL) { +// tsdbError("vgId:%d failed to restore meta since two file exists, file1 %s and file2 %s", REPO_ID(pRepo), +// TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), TFILE_NAME(pf)); +// terrno = TSDB_CODE_TDB_FILE_CORRUPTED; +// tfsClosedir(tdir); +// regfree(®ex); +// return -1; +// } else { +// uint32_t _version = 0; +// if (strcmp(bname, "meta") != 0) { +// sscanf(bname, "meta-ver%" PRIu32, &_version); +// pfs->cstatus->meta.version = _version; +// } - pfs->cstatus->pmf = &(pfs->cstatus->mf); - pfs->cstatus->pmf->f = *pf; - TSDB_FILE_SET_CLOSED(pfs->cstatus->pmf); +// pfs->cstatus->pmf = &(pfs->cstatus->mf); +// pfs->cstatus->pmf->f = *pf; +// TSDB_FILE_SET_CLOSED(pfs->cstatus->pmf); - if (tsdbOpenMFile(pfs->cstatus->pmf, O_RDONLY) < 0) { - tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno)); - tfsClosedir(tdir); - regfree(®ex); - return -1; - } +// if (tsdbOpenMFile(pfs->cstatus->pmf, O_RDONLY) < 0) { +// tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno)); +// tfsClosedir(tdir); +// regfree(®ex); +// return -1; +// } - if (tsdbLoadMFileHeader(pfs->cstatus->pmf, &(pfs->cstatus->pmf->info)) < 0) { - tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno)); - tsdbCloseMFile(pfs->cstatus->pmf); - tfsClosedir(tdir); - regfree(®ex); - return -1; - } +// if (tsdbLoadMFileHeader(pfs->cstatus->pmf, &(pfs->cstatus->pmf->info)) < 0) { +// tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno)); +// tsdbCloseMFile(pfs->cstatus->pmf); +// tfsClosedir(tdir); +// regfree(®ex); +// return -1; +// } - if (tsdbForceKeepFile) { - struct stat tfstat; +// if (tsdbForceKeepFile) { +// struct stat tfstat; - // Get real file size - if (fstat(pfs->cstatus->pmf->fd, &tfstat) < 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - tsdbCloseMFile(pfs->cstatus->pmf); - tfsClosedir(tdir); - regfree(®ex); - return -1; - } +// // Get real file size +// if (fstat(pfs->cstatus->pmf->fd, &tfstat) < 0) { +// terrno = TAOS_SYSTEM_ERROR(errno); +// tsdbCloseMFile(pfs->cstatus->pmf); +// tfsClosedir(tdir); +// regfree(®ex); +// return -1; +// } - if (pfs->cstatus->pmf->info.size != tfstat.st_size) { - int64_t tfsize = pfs->cstatus->pmf->info.size; - pfs->cstatus->pmf->info.size = tfstat.st_size; - tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo), - TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), tfsize, pfs->cstatus->pmf->info.size); - } - } +// if (pfs->cstatus->pmf->info.size != tfstat.st_size) { +// int64_t tfsize = pfs->cstatus->pmf->info.size; +// pfs->cstatus->pmf->info.size = tfstat.st_size; +// tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo), +// TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), tfsize, pfs->cstatus->pmf->info.size); +// } +// } - tsdbCloseMFile(pfs->cstatus->pmf); - } - } else if (code == REG_NOMATCH) { - // Not match - tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); - tfsremove(pf); - continue; - } else { - // Has other error - tsdbError("vgId:%d failed to restore meta file while run regexec since %s", REPO_ID(pRepo), strerror(code)); - terrno = TAOS_SYSTEM_ERROR(code); - tfsClosedir(tdir); - regfree(®ex); - return -1; - } - } +// tsdbCloseMFile(pfs->cstatus->pmf); +// } +// } else if (code == REG_NOMATCH) { +// // Not match +// tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); +// tfsremove(pf); +// continue; +// } else { +// // Has other error +// tsdbError("vgId:%d failed to restore meta file while run regexec since %s", REPO_ID(pRepo), strerror(code)); +// terrno = TAOS_SYSTEM_ERROR(code); +// tfsClosedir(tdir); +// regfree(®ex); +// return -1; +// } +// } - if (pfs->cstatus->pmf) { - tsdbInfo("vgId:%d meta file %s is restored", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pfs->cstatus->pmf)); - } else { - tsdbInfo("vgId:%d no meta file is restored", REPO_ID(pRepo)); - } +// if (pfs->cstatus->pmf) { +// tsdbInfo("vgId:%d meta file %s is restored", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pfs->cstatus->pmf)); +// } else { +// tsdbInfo("vgId:%d no meta file is restored", REPO_ID(pRepo)); +// } - tfsClosedir(tdir); - regfree(®ex); - return 0; -} +// tfsClosedir(tdir); +// regfree(®ex); +// return 0; +// } -static int tsdbRestoreDFileSet(STsdbRepo *pRepo) { +static int tsdbRestoreDFileSet(STsdb *pRepo) { char dataDir[TSDB_FILENAME_LEN]; char bname[TSDB_FILENAME_LEN]; TDIR * tdir = NULL; @@ -1220,9 +1227,10 @@ static int tsdbRestoreDFileSet(STsdbRepo *pRepo) { } pDFile->f = *pf; - + if (tsdbOpenDFile(pDFile, O_RDONLY) < 0) { - tsdbError("vgId:%d failed to open DFile %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno)); + tsdbError("vgId:%d failed to open DFile %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), + tstrerror(terrno)); taosArrayDestroy(fArray); return -1; } @@ -1266,12 +1274,12 @@ static int tsdbRestoreDFileSet(STsdbRepo *pRepo) { return 0; } -static int tsdbRestoreCurrent(STsdbRepo *pRepo) { - // Loop to recover mfile - if (tsdbRestoreMeta(pRepo) < 0) { - tsdbError("vgId:%d failed to restore current since %s", REPO_ID(pRepo), tstrerror(terrno)); - return -1; - } +static int tsdbRestoreCurrent(STsdb *pRepo) { + // // Loop to recover mfile + // if (tsdbRestoreMeta(pRepo) < 0) { + // tsdbError("vgId:%d failed to restore current since %s", REPO_ID(pRepo), tstrerror(terrno)); + // return -1; + // } // Loop to recover dfile set if (tsdbRestoreDFileSet(pRepo) < 0) { @@ -1279,7 +1287,7 @@ static int tsdbRestoreCurrent(STsdbRepo *pRepo) { return -1; } - if (tsdbSaveFSStatus(pRepo->fs->cstatus, REPO_ID(pRepo)) < 0) { + if (tsdbSaveFSStatus(pRepo->fs.cstatus, REPO_ID(pRepo)) < 0) { tsdbError("vgId:%d failed to restore corrent since %s", REPO_ID(pRepo), tstrerror(terrno)); return -1; } @@ -1317,7 +1325,7 @@ static int tsdbComparTFILE(const void *arg1, const void *arg2) { } } -static void tsdbScanAndTryFixDFilesHeader(STsdbRepo *pRepo, int32_t *nExpired) { +static void tsdbScanAndTryFixDFilesHeader(STsdb *pRepo, int32_t *nExpired) { STsdbFS * pfs = REPO_FS(pRepo); SFSStatus *pStatus = pfs->cstatus; SDFInfo info; diff --git a/source/dnode/vnode/tsdb/src/tsdbFile.c b/source/dnode/vnode/tsdb/src/tsdbFile.c index 0f13b6108f..a1c1b57b44 100644 --- a/source/dnode/vnode/tsdb/src/tsdbFile.c +++ b/source/dnode/vnode/tsdb/src/tsdbFile.c @@ -13,22 +13,23 @@ * along with this program. If not, see . */ -#include "tsdbint.h" +#include "tsdbDef.h" static const char *TSDB_FNAME_SUFFIX[] = { - "head", // TSDB_FILE_HEAD - "data", // TSDB_FILE_DATA - "last", // TSDB_FILE_LAST - "", // TSDB_FILE_MAX - "meta", // TSDB_FILE_META + "head", // TSDB_FILE_HEAD + "data", // TSDB_FILE_DATA + "last", // TSDB_FILE_LAST + "", // TSDB_FILE_MAX + "meta", // TSDB_FILE_META }; -static void tsdbGetFilename(int vid, int fid, uint32_t ver, TSDB_FILE_T ftype, char *fname); -static int tsdbRollBackMFile(SMFile *pMFile); +static void tsdbGetFilename(int vid, int fid, uint32_t ver, TSDB_FILE_T ftype, char *fname); +// static int tsdbRollBackMFile(SMFile *pMFile); static int tsdbEncodeDFInfo(void **buf, SDFInfo *pInfo); static void *tsdbDecodeDFInfo(void *buf, SDFInfo *pInfo); static int tsdbRollBackDFile(SDFile *pDFile); +#if 0 // ============== SMFile void tsdbInitMFile(SMFile *pMFile, SDiskID did, int vid, uint32_t ver) { char fname[TSDB_FILENAME_LEN]; @@ -185,7 +186,7 @@ int tsdbLoadMFileHeader(SMFile *pMFile, SMFInfo *pInfo) { return 0; } -int tsdbScanAndTryFixMFile(STsdbRepo *pRepo) { +int tsdbScanAndTryFixMFile(STsdb *pRepo) { SMFile * pMFile = pRepo->fs->cstatus->pmf; struct stat mfstat; SMFile mf; @@ -291,6 +292,8 @@ static int tsdbRollBackMFile(SMFile *pMFile) { return 0; } +#endif + // ============== Operations on SDFile void tsdbInitDFile(SDFile *pDFile, SDiskID did, int vid, int fid, uint32_t ver, TSDB_FILE_T ftype) { char fname[TSDB_FILENAME_LEN]; @@ -397,7 +400,7 @@ int tsdbUpdateDFileHeader(SDFile *pDFile) { } void *ptr = buf; - taosEncodeFixedU32(&ptr, TSDB_FS_VERSION); + taosEncodeFixedU32(&ptr, 0); tsdbEncodeDFInfo(&ptr, &(pDFile->info)); taosCalcChecksumAppend(0, (uint8_t *)buf, TSDB_FILE_HEAD_SIZE); @@ -433,7 +436,7 @@ int tsdbLoadDFileHeader(SDFile *pDFile, SDFInfo *pInfo) { return 0; } -static int tsdbScanAndTryFixDFile(STsdbRepo *pRepo, SDFile *pDFile) { +static int tsdbScanAndTryFixDFile(STsdb *pRepo, SDFile *pDFile) { struct stat dfstat; SDFile df; @@ -442,7 +445,7 @@ static int tsdbScanAndTryFixDFile(STsdbRepo *pRepo, SDFile *pDFile) { if (access(TSDB_FILE_FULL_NAME(pDFile), F_OK) != 0) { tsdbError("vgId:%d data file %s not exit, report to upper layer to fix it", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile)); - pRepo->state |= TSDB_STATE_BAD_DATA; + // pRepo->state |= TSDB_STATE_BAD_DATA; TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_BAD); return 0; } @@ -457,7 +460,7 @@ static int tsdbScanAndTryFixDFile(STsdbRepo *pRepo, SDFile *pDFile) { return -1; } - if (taosFtruncate(df.fd, df.info.size) < 0) { + if (taosFtruncateFile(df.fd, df.info.size) < 0) { terrno = TAOS_SYSTEM_ERROR(errno); tsdbCloseDFile(&df); return -1; @@ -474,7 +477,7 @@ static int tsdbScanAndTryFixDFile(STsdbRepo *pRepo, SDFile *pDFile) { } else if (pDFile->info.size > dfstat.st_size) { tsdbError("vgId:%d data file %s has wrong size %" PRId64 " expected %" PRId64 ", report to upper layer to fix it", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), dfstat.st_size, pDFile->info.size); - pRepo->state |= TSDB_STATE_BAD_DATA; + // pRepo->state |= TSDB_STATE_BAD_DATA; TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_BAD); terrno = TSDB_CODE_TDB_FILE_CORRUPTED; return 0; @@ -538,7 +541,7 @@ static int tsdbRollBackDFile(SDFile *pDFile) { return -1; } - if (taosFtruncate(TSDB_FILE_FD(&df), pDFile->info.size) < 0) { + if (taosFtruncateFile(TSDB_FILE_FD(&df), pDFile->info.size) < 0) { terrno = TAOS_SYSTEM_ERROR(errno); tsdbCloseDFile(&df); return -1; @@ -651,7 +654,7 @@ int tsdbUpdateDFileSetHeader(SDFileSet *pSet) { return 0; } -int tsdbScanAndTryFixDFileSet(STsdbRepo *pRepo, SDFileSet *pSet) { +int tsdbScanAndTryFixDFileSet(STsdb *pRepo, SDFileSet *pSet) { for (TSDB_FILE_T ftype = 0; ftype < TSDB_FILE_MAX; ftype++) { if (tsdbScanAndTryFixDFile(pRepo, TSDB_DFILE_IN_SET(pSet, ftype)) < 0) { return -1; diff --git a/source/dnode/vnode/tsdb/src/tsdbMain.c b/source/dnode/vnode/tsdb/src/tsdbMain.c index c8bcfc6906..4cb2eab644 100644 --- a/source/dnode/vnode/tsdb/src/tsdbMain.c +++ b/source/dnode/vnode/tsdb/src/tsdbMain.c @@ -72,7 +72,7 @@ static STsdb *tsdbNew(const char *path, const STsdbCfg *pTsdbCfg, SMemAllocatorF } pTsdb->path = strdup(path); - tsdbOptionsCopy(&(pTsdb->options), pTsdbCfg); + tsdbOptionsCopy(&(pTsdb->config), pTsdbCfg); pTsdb->pmaf = pMAF; return pTsdb; diff --git a/source/dnode/vnode/tsdb/src/tsdbMemTable.c b/source/dnode/vnode/tsdb/src/tsdbMemTable.c index 7b0df18f5a..539d5440d4 100644 --- a/source/dnode/vnode/tsdb/src/tsdbMemTable.c +++ b/source/dnode/vnode/tsdb/src/tsdbMemTable.c @@ -15,22 +15,13 @@ #include "tsdbDef.h" -#if 1 typedef struct STbData { - TD_SLIST_NODE(STbData); - SSubmitMsg *pMsg; -} STbData; -#else -typedef struct STbData { - TD_SLIST_NODE(STbData); - uint64_t uid; // TODO: change here as tb_uid_t + tb_uid_t uid; TSKEY keyMin; TSKEY keyMax; - uint64_t nRows; - SSkipList *pData; // Here need a container, may not use the SL - T_REF_DECLARE() + int64_t nrows; + SSkipList *pData; } STbData; -#endif struct STsdbMemTable { T_REF_DECLARE() @@ -40,45 +31,315 @@ struct STsdbMemTable { uint64_t nRow; SMemAllocator *pMA; // Container +#if 1 + SSkipList *pSlIdx; // SSkiplist + SHashObj * pHashIdx; +#else TD_SLIST(STbData) list; +#endif }; -STsdbMemTable *tsdbNewMemTable(SMemAllocatorFactory *pMAF) { - STsdbMemTable *pMemTable; - SMemAllocator *pMA; +static int tsdbScanAndConvertSubmitMsg(STsdb *pTsdb, SSubmitMsg *pMsg); +static int tsdbMemTableInsertTbData(STsdb *pRepo, SSubmitBlk *pBlock, int32_t *pAffectedRows); +static STbData *tsdbNewTbData(tb_uid_t uid); +static void tsdbFreeTbData(STbData *pTbData); +static char * tsdbGetTsTupleKey(const void *data); +static int tsdbTbDataComp(const void *arg1, const void *arg2); +static char * tsdbTbDataGetUid(const void *arg); - pMA = (*pMAF->create)(pMAF); - ASSERT(pMA != NULL); - - pMemTable = (STsdbMemTable *)TD_MA_MALLOC(pMA, sizeof(*pMemTable)); +STsdbMemTable *tsdbNewMemTable(STsdb *pTsdb) { + STsdbMemTable *pMemTable = (STsdbMemTable *)calloc(1, sizeof(*pMemTable)); if (pMemTable == NULL) { - (*pMAF->destroy)(pMAF, pMA); + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } T_REF_INIT_VAL(pMemTable, 1); taosInitRWLatch(&(pMemTable->latch)); - pMemTable->keyMin = TSKEY_MAX; pMemTable->keyMax = TSKEY_MIN; + pMemTable->keyMin = TSKEY_MAX; pMemTable->nRow = 0; - pMemTable->pMA = pMA; - TD_SLIST_INIT(&(pMemTable->list)); + pMemTable->pMA = pTsdb->pmaf->create(pTsdb->pmaf); + if (pMemTable->pMA == NULL) { + free(pMemTable); + return NULL; + } + + // Initialize the container + pMemTable->pSlIdx = + tSkipListCreate(5, TSDB_DATA_TYPE_BIGINT, sizeof(tb_uid_t), tsdbTbDataComp, SL_DISCARD_DUP_KEY, tsdbTbDataGetUid); + if (pMemTable->pSlIdx == NULL) { + pTsdb->pmaf->destroy(pTsdb->pmaf, pMemTable->pMA); + free(pMemTable); + return NULL; + } + + pMemTable->pHashIdx = taosHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); + if (pMemTable->pHashIdx == NULL) { + pTsdb->pmaf->destroy(pTsdb->pmaf, pMemTable->pMA); + tSkipListDestroy(pMemTable->pSlIdx); + free(pMemTable); + return NULL; + } - // TODO return pMemTable; } -void tsdbFreeMemTable(SMemAllocatorFactory *pMAF, STsdbMemTable *pMemTable) { - SMemAllocator *pMA = pMemTable->pMA; - - if (TD_MA_FREE_FUNC(pMA) != NULL) { - // TODO - ASSERT(0); +void tsdbFreeMemTable(STsdb *pTsdb, STsdbMemTable *pMemTable) { + if (pMemTable) { + taosHashCleanup(pMemTable->pHashIdx); + tSkipListDestroy(pMemTable->pSlIdx); + if (pMemTable->pMA) { + pTsdb->pmaf->destroy(pTsdb->pmaf, pMemTable->pMA); + } + free(pMemTable); } - - (*pMAF->destroy)(pMAF, pMA); } +int tsdbMemTableInsert(STsdb *pTsdb, STsdbMemTable *pMemTable, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp) { + SSubmitBlk * pBlock = NULL; + SSubmitMsgIter msgIter = {0}; + int32_t affectedrows = 0, numOfRows = 0; + + if (tsdbScanAndConvertSubmitMsg(pTsdb, pMsg) < 0) { + if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) { + tsdbError("vgId:%d failed to insert data since %s", REPO_ID(pTsdb), tstrerror(terrno)); + } + return -1; + } + + tInitSubmitMsgIter(pMsg, &msgIter); + while (true) { + tGetSubmitMsgNext(&msgIter, &pBlock); + if (pBlock == NULL) break; + if (tsdbMemTableInsertTbData(pTsdb, pBlock, &affectedrows) < 0) { + return -1; + } + + numOfRows += pBlock->numOfRows; + } + + if (pRsp != NULL) { + pRsp->affectedRows = htonl(affectedrows); + pRsp->numOfRows = htonl(numOfRows); + } + + return 0; +} + +static int tsdbScanAndConvertSubmitMsg(STsdb *pTsdb, SSubmitMsg *pMsg) { + ASSERT(pMsg != NULL); + // STsdbMeta * pMeta = pTsdb->tsdbMeta; + SSubmitMsgIter msgIter = {0}; + SSubmitBlk * pBlock = NULL; + SSubmitBlkIter blkIter = {0}; + SMemRow row = NULL; + TSKEY now = taosGetTimestamp(pTsdb->config.precision); + TSKEY minKey = now - tsTickPerDay[pTsdb->config.precision] * pTsdb->config.keep; + TSKEY maxKey = now + tsTickPerDay[pTsdb->config.precision] * pTsdb->config.daysPerFile; + + terrno = TSDB_CODE_SUCCESS; + pMsg->length = htonl(pMsg->length); + pMsg->numOfBlocks = htonl(pMsg->numOfBlocks); + + if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) return -1; + while (true) { + if (tGetSubmitMsgNext(&msgIter, &pBlock) < 0) return -1; + if (pBlock == NULL) break; + + pBlock->uid = htobe64(pBlock->uid); + pBlock->tid = htonl(pBlock->tid); + pBlock->sversion = htonl(pBlock->sversion); + pBlock->dataLen = htonl(pBlock->dataLen); + pBlock->schemaLen = htonl(pBlock->schemaLen); + pBlock->numOfRows = htons(pBlock->numOfRows); + +#if 0 + if (pBlock->tid <= 0 || pBlock->tid >= pMeta->maxTables) { + tsdbError("vgId:%d failed to get table to insert data, uid %" PRIu64 " tid %d", REPO_ID(pTsdb), pBlock->uid, + pBlock->tid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + return -1; + } + + STable *pTable = pMeta->tables[pBlock->tid]; + if (pTable == NULL || TABLE_UID(pTable) != pBlock->uid) { + tsdbError("vgId:%d failed to get table to insert data, uid %" PRIu64 " tid %d", REPO_ID(pTsdb), pBlock->uid, + pBlock->tid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + return -1; + } + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + tsdbError("vgId:%d invalid action trying to insert a super table %s", REPO_ID(pTsdb), TABLE_CHAR_NAME(pTable)); + terrno = TSDB_CODE_TDB_INVALID_ACTION; + return -1; + } + + // Check schema version and update schema if needed + if (tsdbCheckTableSchema(pTsdb, pBlock, pTable) < 0) { + if (terrno == TSDB_CODE_TDB_TABLE_RECONFIGURE) { + continue; + } else { + return -1; + } + } + + tsdbInitSubmitBlkIter(pBlock, &blkIter); + while ((row = tsdbGetSubmitBlkNext(&blkIter)) != NULL) { + if (tsdbCheckRowRange(pTsdb, pTable, row, minKey, maxKey, now) < 0) { + return -1; + } + } +#endif + } + + if (terrno != TSDB_CODE_SUCCESS) return -1; + return 0; +} + +static int tsdbMemTableInsertTbData(STsdb *pTsdb, SSubmitBlk *pBlock, int32_t *pAffectedRows) { + // STsdbMeta *pMeta = pRepo->tsdbMeta; + // int32_t points = 0; + // STable *pTable = NULL; + SSubmitBlkIter blkIter = {0}; + STsdbMemTable *pMemTable = pTsdb->mem; + void * tptr; + STbData * pTbData; + SMemRow row; + TSKEY keyMin; + TSKEY keyMax; + + // SMemTable *pMemTable = NULL; + // STableData *pTableData = NULL; + // STsdbCfg *pCfg = &(pRepo->config); + + tptr = taosHashGet(pMemTable->pHashIdx, &(pBlock->uid), sizeof(pBlock->uid)); + if (tptr == NULL) { + pTbData = tsdbNewTbData(pBlock->uid); + if (pTbData == NULL) { + return -1; + } + + // Put into hash + taosHashPut(pMemTable->pHashIdx, &(pBlock->uid), sizeof(pBlock->uid), &(pTbData), sizeof(pTbData)); + + // Put into skiplist + tSkipListPut(pMemTable->pSlIdx, pTbData); + } else { + pTbData = *(STbData **)tptr; + } + + tInitSubmitBlkIter(pBlock, &blkIter); + if (blkIter.row == NULL) return 0; + keyMin = memRowKey(blkIter.row); + + tSkipListPutBatchByIter(pTbData->pData, &blkIter, (iter_next_fn_t)tGetSubmitBlkNext); + + // Set statistics + keyMax = memRowKey(blkIter.row); + + pTbData->nrows += pBlock->numOfRows; + if (pTbData->keyMin > keyMin) pTbData->keyMin = keyMin; + if (pTbData->keyMax < keyMax) pTbData->keyMax = keyMax; + + pMemTable->nRow += pBlock->numOfRows; + if (pMemTable->keyMin > keyMin) pMemTable->keyMin = keyMin; + if (pMemTable->keyMax < keyMax) pMemTable->keyMax = keyMax; + + // SMemRow lastRow = NULL; + // int64_t osize = SL_SIZE(pTableData->pData); + // tsdbSetupSkipListHookFns(pTableData->pData, pRepo, pTable, &points, &lastRow); + // tSkipListPutBatchByIter(pTableData->pData, &blkIter, (iter_next_fn_t)tsdbGetSubmitBlkNext); + // int64_t dsize = SL_SIZE(pTableData->pData) - osize; + // (*pAffectedRows) += points; + + // if(lastRow != NULL) { + // TSKEY lastRowKey = memRowKey(lastRow); + // if (pMemTable->keyFirst > firstRowKey) pMemTable->keyFirst = firstRowKey; + // pMemTable->numOfRows += dsize; + + // if (pTableData->keyFirst > firstRowKey) pTableData->keyFirst = firstRowKey; + // pTableData->numOfRows += dsize; + // if (pMemTable->keyLast < lastRowKey) pMemTable->keyLast = lastRowKey; + // if (pTableData->keyLast < lastRowKey) pTableData->keyLast = lastRowKey; + // if (tsdbUpdateTableLatestInfo(pRepo, pTable, lastRow) < 0) { + // return -1; + // } + // } + + // STSchema *pSchema = tsdbGetTableSchemaByVersion(pTable, pBlock->sversion, -1); + // pRepo->stat.pointsWritten += points * schemaNCols(pSchema); + // pRepo->stat.totalStorage += points * schemaVLen(pSchema); + + return 0; +} + +static STbData *tsdbNewTbData(tb_uid_t uid) { + STbData *pTbData = (STbData *)calloc(1, sizeof(*pTbData)); + if (pTbData == NULL) { + return NULL; + } + + pTbData->uid = uid; + pTbData->keyMin = TSKEY_MAX; + pTbData->keyMax = TSKEY_MIN; + pTbData->nrows = 0; + + // uint8_t skipListCreateFlags; + // if (pCfg->update == TD_ROW_DISCARD_UPDATE) + // skipListCreateFlags = SL_DISCARD_DUP_KEY; + // else + // skipListCreateFlags = SL_UPDATE_DUP_KEY; + + // pTableData->pData = + // tSkipListCreate(TSDB_DATA_SKIPLIST_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, TYPE_BYTES[TSDB_DATA_TYPE_TIMESTAMP], + // tkeyComparFn, skipListCreateFlags, tsdbGetTsTupleKey); + // if (pTableData->pData == NULL) { + // terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + // free(pTableData); + // return NULL; + // } + + pTbData->pData = tSkipListCreate(5, TSDB_DATA_TYPE_TIMESTAMP, sizeof(int64_t), tkeyComparFn, SL_DISCARD_DUP_KEY, + tsdbGetTsTupleKey); + if (pTbData->pData == NULL) { + free(pTbData); + return NULL; + } + + return pTbData; +} + +static void tsdbFreeTbData(STbData *pTbData) { + if (pTbData) { + tSkipListDestroy(pTbData->pData); + free(pTbData); + } +} + +static char *tsdbGetTsTupleKey(const void *data) { return memRowKeys((SMemRow)data); } + +static int tsdbTbDataComp(const void *arg1, const void *arg2) { + STbData *pTbData1 = (STbData *)arg1; + STbData *pTbData2 = (STbData *)arg2; + + if (pTbData1->uid > pTbData2->uid) { + return 1; + } else if (pTbData1->uid == pTbData2->uid) { + return 0; + } else { + return -1; + } +} + +static char *tsdbTbDataGetUid(const void *arg) { + STbData *pTbData = (STbData *)arg; + return (char *)(&(pTbData->uid)); +} + +/* ------------------------ REFACTORING ------------------------ */ +#if 0 int tsdbInsertDataToMemTable(STsdbMemTable *pMemTable, SSubmitMsg *pMsg) { SMemAllocator *pMA = pMemTable->pMA; STbData * pTbData = (STbData *)TD_MA_MALLOC(pMA, sizeof(*pTbData)); @@ -91,4 +352,609 @@ int tsdbInsertDataToMemTable(STsdbMemTable *pMemTable, SSubmitMsg *pMsg) { return 0; } -/* ------------------------ STATIC METHODS ------------------------ */ \ No newline at end of file +#include "tdataformat.h" +#include "tfunctional.h" +#include "tsdbRowMergeBuf.h" +#include "tsdbint.h" +#include "tskiplist.h" + +#define TSDB_DATA_SKIPLIST_LEVEL 5 +#define TSDB_MAX_INSERT_BATCH 512 + +typedef struct { + int32_t totalLen; + int32_t len; + SMemRow row; +} SSubmitBlkIter; + +typedef struct { + int32_t totalLen; + int32_t len; + void * pMsg; +} SSubmitMsgIter; + +static SMemTable * tsdbNewMemTable(STsdbRepo *pRepo); +static void tsdbFreeMemTable(SMemTable *pMemTable); +static STableData* tsdbNewTableData(STsdbCfg *pCfg, STable *pTable); +static void tsdbFreeTableData(STableData *pTableData); +static int tsdbAdjustMemMaxTables(SMemTable *pMemTable, int maxTables); +static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row); +static int tsdbInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter); +static SMemRow tsdbGetSubmitBlkNext(SSubmitBlkIter *pIter); +static int tsdbScanAndConvertSubmitMsg(STsdbRepo *pRepo, SSubmitMsg *pMsg); +static int tsdbInsertDataToTable(STsdbRepo *pRepo, SSubmitBlk *pBlock, int32_t *affectedrows); +static int tsdbInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter); +static int tsdbGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock); +static int tsdbCheckTableSchema(STsdbRepo *pRepo, SSubmitBlk *pBlock, STable *pTable); +static int tsdbUpdateTableLatestInfo(STsdbRepo *pRepo, STable *pTable, SMemRow row); + +static FORCE_INLINE int tsdbCheckRowRange(STsdbRepo *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey, + TSKEY now); + + +// ---------------- INTERNAL FUNCTIONS ---------------- +int tsdbRefMemTable(STsdbRepo *pRepo, SMemTable *pMemTable) { + if (pMemTable == NULL) return 0; + int ref = T_REF_INC(pMemTable); + tsdbDebug("vgId:%d ref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref); + return 0; +} + +// Need to lock the repository +int tsdbUnRefMemTable(STsdbRepo *pRepo, SMemTable *pMemTable) { + if (pMemTable == NULL) return 0; + + int ref = T_REF_DEC(pMemTable); + tsdbDebug("vgId:%d unref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref); + if (ref == 0) { + STsdbBufPool *pBufPool = pRepo->pPool; + + SListNode *pNode = NULL; + bool addNew = false; + if (tsdbLockRepo(pRepo) < 0) return -1; + while ((pNode = tdListPopHead(pMemTable->bufBlockList)) != NULL) { + if (pBufPool->nRecycleBlocks > 0) { + tsdbRecycleBufferBlock(pBufPool, pNode, false); + pBufPool->nRecycleBlocks -= 1; + } else { + if(pBufPool->nElasticBlocks > 0 && listNEles(pBufPool->bufBlockList) > 2) { + tsdbRecycleBufferBlock(pBufPool, pNode, true); + } else { + tdListAppendNode(pBufPool->bufBlockList, pNode); + addNew = true; + } + } + } + if (addNew) { + int code = pthread_cond_signal(&pBufPool->poolNotEmpty); + if (code != 0) { + if (tsdbUnlockRepo(pRepo) < 0) return -1; + tsdbError("vgId:%d failed to signal pool not empty since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + } + + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + for (int i = 0; i < pMemTable->maxTables; i++) { + if (pMemTable->tData[i] != NULL) { + tsdbFreeTableData(pMemTable->tData[i]); + } + } + + tdListDiscard(pMemTable->actList); + tdListDiscard(pMemTable->bufBlockList); + tsdbFreeMemTable(pMemTable); + } + return 0; +} + +int tsdbTakeMemSnapshot(STsdbRepo *pRepo, SMemSnapshot *pSnapshot, SArray *pATable) { + memset(pSnapshot, 0, sizeof(*pSnapshot)); + + if (tsdbLockRepo(pRepo) < 0) return -1; + + pSnapshot->omem = pRepo->mem; + pSnapshot->imem = pRepo->imem; + tsdbRefMemTable(pRepo, pRepo->mem); + tsdbRefMemTable(pRepo, pRepo->imem); + + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + if (pSnapshot->omem) { + taosRLockLatch(&(pSnapshot->omem->latch)); + + pSnapshot->mem = &(pSnapshot->mtable); + + pSnapshot->mem->tData = (STableData **)calloc(pSnapshot->omem->maxTables, sizeof(STableData *)); + if (pSnapshot->mem->tData == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + taosRUnLockLatch(&(pSnapshot->omem->latch)); + tsdbUnRefMemTable(pRepo, pSnapshot->omem); + tsdbUnRefMemTable(pRepo, pSnapshot->imem); + pSnapshot->mem = NULL; + pSnapshot->imem = NULL; + pSnapshot->omem = NULL; + return -1; + } + + pSnapshot->mem->keyFirst = pSnapshot->omem->keyFirst; + pSnapshot->mem->keyLast = pSnapshot->omem->keyLast; + pSnapshot->mem->numOfRows = pSnapshot->omem->numOfRows; + pSnapshot->mem->maxTables = pSnapshot->omem->maxTables; + + for (size_t i = 0; i < taosArrayGetSize(pATable); i++) { + STable * pTable = *(STable **)taosArrayGet(pATable, i); + int32_t tid = TABLE_TID(pTable); + STableData *pTableData = (tid < pSnapshot->omem->maxTables) ? pSnapshot->omem->tData[tid] : NULL; + + if ((pTableData == NULL) || (TABLE_UID(pTable) != pTableData->uid)) continue; + + pSnapshot->mem->tData[tid] = pTableData; + T_REF_INC(pTableData); + } + + taosRUnLockLatch(&(pSnapshot->omem->latch)); + } + + tsdbDebug("vgId:%d take memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem); + return 0; +} + +void tsdbUnTakeMemSnapShot(STsdbRepo *pRepo, SMemSnapshot *pSnapshot) { + tsdbDebug("vgId:%d untake memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem); + + if (pSnapshot->mem) { + ASSERT(pSnapshot->omem != NULL); + + for (size_t i = 0; i < pSnapshot->mem->maxTables; i++) { + STableData *pTableData = pSnapshot->mem->tData[i]; + if (pTableData) { + tsdbFreeTableData(pTableData); + } + } + tfree(pSnapshot->mem->tData); + + tsdbUnRefMemTable(pRepo, pSnapshot->omem); + } + + tsdbUnRefMemTable(pRepo, pSnapshot->imem); + + pSnapshot->mem = NULL; + pSnapshot->imem = NULL; + pSnapshot->omem = NULL; +} + +int tsdbSyncCommitConfig(STsdbRepo* pRepo) { + ASSERT(pRepo->config_changed == true); + tsem_wait(&(pRepo->readyToCommit)); + + if (pRepo->code != TSDB_CODE_SUCCESS) { + tsdbWarn("vgId:%d try to commit config when TSDB not in good state: %s", REPO_ID(pRepo), tstrerror(terrno)); + } + + if (tsdbLockRepo(pRepo) < 0) return -1; + tsdbScheduleCommit(pRepo, COMMIT_CONFIG_REQ); + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + tsem_wait(&(pRepo->readyToCommit)); + tsem_post(&(pRepo->readyToCommit)); + + if (pRepo->code != TSDB_CODE_SUCCESS) { + terrno = pRepo->code; + return -1; + } + + terrno = TSDB_CODE_SUCCESS; + return 0; +} + +/** + * This is an important function to load data or try to load data from memory skiplist iterator. + * + * This function load memory data until: + * 1. iterator ends + * 2. data key exceeds maxKey + * 3. rowsIncreased = rowsInserted - rowsDeleteSucceed >= maxRowsToRead + * 4. operations in pCols not exceeds its max capacity if pCols is given + * + * The function tries to procceed AS MUCH AS POSSIBLE. + */ +int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey, int maxRowsToRead, SDataCols *pCols, + TKEY *filterKeys, int nFilterKeys, bool keepDup, SMergeInfo *pMergeInfo) { + ASSERT(maxRowsToRead > 0 && nFilterKeys >= 0); + if (pIter == NULL) return 0; + STSchema * pSchema = NULL; + TSKEY rowKey = 0; + TSKEY fKey = 0; + bool isRowDel = false; + int filterIter = 0; + SMemRow row = NULL; + SMergeInfo mInfo; + + if (pMergeInfo == NULL) pMergeInfo = &mInfo; + + memset(pMergeInfo, 0, sizeof(*pMergeInfo)); + pMergeInfo->keyFirst = INT64_MAX; + pMergeInfo->keyLast = INT64_MIN; + if (pCols) tdResetDataCols(pCols); + + row = tsdbNextIterRow(pIter); + if (row == NULL || memRowKey(row) > maxKey) { + rowKey = INT64_MAX; + isRowDel = false; + } else { + rowKey = memRowKey(row); + isRowDel = memRowDeleted(row); + } + + if (filterIter >= nFilterKeys) { + fKey = INT64_MAX; + } else { + fKey = tdGetKey(filterKeys[filterIter]); + } + + while (true) { + if (fKey == INT64_MAX && rowKey == INT64_MAX) break; + + if (fKey < rowKey) { + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey); + + filterIter++; + if (filterIter >= nFilterKeys) { + fKey = INT64_MAX; + } else { + fKey = tdGetKey(filterKeys[filterIter]); + } + } else if (fKey > rowKey) { + if (isRowDel) { + pMergeInfo->rowsDeleteFailed++; + } else { + if (pMergeInfo->rowsInserted - pMergeInfo->rowsDeleteSucceed >= maxRowsToRead) break; + if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break; + pMergeInfo->rowsInserted++; + pMergeInfo->nOperations++; + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey); + tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row); + } + + tSkipListIterNext(pIter); + row = tsdbNextIterRow(pIter); + if (row == NULL || memRowKey(row) > maxKey) { + rowKey = INT64_MAX; + isRowDel = false; + } else { + rowKey = memRowKey(row); + isRowDel = memRowDeleted(row); + } + } else { + if (isRowDel) { + ASSERT(!keepDup); + if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break; + pMergeInfo->rowsDeleteSucceed++; + pMergeInfo->nOperations++; + tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row); + } else { + if (keepDup) { + if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break; + pMergeInfo->rowsUpdated++; + pMergeInfo->nOperations++; + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey); + tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row); + } else { + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey); + } + } + + tSkipListIterNext(pIter); + row = tsdbNextIterRow(pIter); + if (row == NULL || memRowKey(row) > maxKey) { + rowKey = INT64_MAX; + isRowDel = false; + } else { + rowKey = memRowKey(row); + isRowDel = memRowDeleted(row); + } + + filterIter++; + if (filterIter >= nFilterKeys) { + fKey = INT64_MAX; + } else { + fKey = tdGetKey(filterKeys[filterIter]); + } + } + } + + return 0; +} + +// ---------------- LOCAL FUNCTIONS ---------------- +static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row) { + if (pCols) { + if (*ppSchema == NULL || schemaVersion(*ppSchema) != memRowVersion(row)) { + *ppSchema = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row), (int8_t)memRowType(row)); + if (*ppSchema == NULL) { + ASSERT(false); + return -1; + } + } + + tdAppendMemRowToDataCol(row, *ppSchema, pCols, true, 0); + } + + return 0; +} + +static FORCE_INLINE int tsdbCheckRowRange(STsdbRepo *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey, + TSKEY now) { + TSKEY rowKey = memRowKey(row); + if (rowKey < minKey || rowKey > maxKey) { + tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " timestamp is out of range! now %" PRId64 " minKey %" PRId64 + " maxKey %" PRId64 " row key %" PRId64, + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), now, minKey, maxKey, + rowKey); + terrno = TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE; + return -1; + } + + return 0; +} + + +//row1 has higher priority +static SMemRow tsdbInsertDupKeyMerge(SMemRow row1, SMemRow row2, STsdbRepo* pRepo, + STSchema **ppSchema1, STSchema **ppSchema2, + STable* pTable, int32_t* pPoints, SMemRow* pLastRow) { + + //for compatiblity, duplicate key inserted when update=0 should be also calculated as affected rows! + if(row1 == NULL && row2 == NULL && pRepo->config.update == TD_ROW_DISCARD_UPDATE) { + (*pPoints)++; + return NULL; + } + + tsdbTrace("vgId:%d a row is %s table %s tid %d uid %" PRIu64 " key %" PRIu64, REPO_ID(pRepo), + "updated in", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), + memRowKey(row1)); + + if(row2 == NULL || pRepo->config.update != TD_ROW_PARTIAL_UPDATE) { + void* pMem = tsdbAllocBytes(pRepo, memRowTLen(row1)); + if(pMem == NULL) return NULL; + memRowCpy(pMem, row1); + (*pPoints)++; + *pLastRow = pMem; + return pMem; + } + + STSchema *pSchema1 = *ppSchema1; + STSchema *pSchema2 = *ppSchema2; + SMergeBuf * pBuf = &pRepo->mergeBuf; + int dv1 = memRowVersion(row1); + int dv2 = memRowVersion(row2); + if(pSchema1 == NULL || schemaVersion(pSchema1) != dv1) { + if(pSchema2 != NULL && schemaVersion(pSchema2) == dv1) { + *ppSchema1 = pSchema2; + } else { + *ppSchema1 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row1), (int8_t)memRowType(row1)); + } + pSchema1 = *ppSchema1; + } + + if(pSchema2 == NULL || schemaVersion(pSchema2) != dv2) { + if(schemaVersion(pSchema1) == dv2) { + pSchema2 = pSchema1; + } else { + *ppSchema2 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row2), (int8_t)memRowType(row2)); + pSchema2 = *ppSchema2; + } + } + + SMemRow tmp = tsdbMergeTwoRows(pBuf, row1, row2, pSchema1, pSchema2); + + void* pMem = tsdbAllocBytes(pRepo, memRowTLen(tmp)); + if(pMem == NULL) return NULL; + memRowCpy(pMem, tmp); + + (*pPoints)++; + *pLastRow = pMem; + return pMem; +} + +static void* tsdbInsertDupKeyMergePacked(void** args) { + return tsdbInsertDupKeyMerge(args[0], args[1], args[2], (STSchema**)&args[3], (STSchema**)&args[4], args[5], args[6], args[7]); +} + +static void tsdbSetupSkipListHookFns(SSkipList* pSkipList, STsdbRepo *pRepo, STable *pTable, int32_t* pPoints, SMemRow* pLastRow) { + + if(pSkipList->insertHandleFn == NULL) { + tGenericSavedFunc *dupHandleSavedFunc = genericSavedFuncInit((GenericVaFunc)&tsdbInsertDupKeyMergePacked, 9); + dupHandleSavedFunc->args[2] = pRepo; + dupHandleSavedFunc->args[3] = NULL; + dupHandleSavedFunc->args[4] = NULL; + dupHandleSavedFunc->args[5] = pTable; + pSkipList->insertHandleFn = dupHandleSavedFunc; + } + pSkipList->insertHandleFn->args[6] = pPoints; + pSkipList->insertHandleFn->args[7] = pLastRow; +} + +static int tsdbCheckTableSchema(STsdbRepo *pRepo, SSubmitBlk *pBlock, STable *pTable) { + ASSERT(pTable != NULL); + + STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); + int sversion = schemaVersion(pSchema); + + if (pBlock->sversion == sversion) { + return 0; + } else { + if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { // stream table is not allowed to change schema + terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; + return -1; + } + } + + if (pBlock->sversion > sversion) { // may need to update table schema + if (pBlock->schemaLen > 0) { + tsdbDebug( + "vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, update...", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion); + ASSERT(pBlock->schemaLen % sizeof(STColumn) == 0); + int numOfCols = pBlock->schemaLen / sizeof(STColumn); + STColumn *pTCol = (STColumn *)pBlock->data; + + STSchemaBuilder schemaBuilder = {0}; + if (tdInitTSchemaBuilder(&schemaBuilder, pBlock->sversion) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + tstrerror(terrno)); + return -1; + } + + for (int i = 0; i < numOfCols; i++) { + if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, htons(pTCol[i].colId), htons(pTCol[i].bytes)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + tstrerror(terrno)); + tdDestroyTSchemaBuilder(&schemaBuilder); + return -1; + } + } + + STSchema *pNSchema = tdGetSchemaFromBuilder(&schemaBuilder); + if (pNSchema == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tdDestroyTSchemaBuilder(&schemaBuilder); + return -1; + } + + tdDestroyTSchemaBuilder(&schemaBuilder); + tsdbUpdateTableSchema(pRepo, pTable, pNSchema, true); + } else { + tsdbDebug( + "vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, reconfigure...", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion); + terrno = TSDB_CODE_TDB_TABLE_RECONFIGURE; + return -1; + } + } else { + ASSERT(pBlock->sversion >= 0); + if (tsdbGetTableSchemaImpl(pTable, false, false, pBlock->sversion, -1) == NULL) { + tsdbError("vgId:%d invalid submit schema version %d to table %s tid %d from client", REPO_ID(pRepo), + pBlock->sversion, TABLE_CHAR_NAME(pTable), TABLE_TID(pTable)); + terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; + return -1; + } + } + + return 0; +} + +static void updateTableLatestColumn(STsdbRepo *pRepo, STable *pTable, SMemRow row) { + tsdbDebug("vgId:%d updateTableLatestColumn, %s row version:%d", REPO_ID(pRepo), pTable->name->data, + memRowVersion(row)); + + STSchema* pSchema = tsdbGetTableLatestSchema(pTable); + if (tsdbUpdateLastColSchema(pTable, pSchema) < 0) { + return; + } + + pSchema = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row), (int8_t)memRowType(row)); + if (pSchema == NULL) { + return; + } + + SDataCol *pLatestCols = pTable->lastCols; + int32_t kvIdx = 0; + + for (int16_t j = 0; j < schemaNCols(pSchema); j++) { + STColumn *pTCol = schemaColAt(pSchema, j); + // ignore not exist colId + int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pTCol->colId); + if (idx == -1) { + continue; + } + + void *value = NULL; + + value = tdGetMemRowDataOfColEx(row, pTCol->colId, (int8_t)pTCol->type, + TD_DATA_ROW_HEAD_SIZE + pSchema->columns[j].offset, &kvIdx); + + if ((value == NULL) || isNull(value, pTCol->type)) { + continue; + } + // lock + TSDB_WLOCK_TABLE(pTable); + SDataCol *pDataCol = &(pLatestCols[idx]); + if (pDataCol->pData == NULL) { + pDataCol->pData = malloc(pTCol->bytes); + pDataCol->bytes = pTCol->bytes; + } else if (pDataCol->bytes < pTCol->bytes) { + pDataCol->pData = realloc(pDataCol->pData, pTCol->bytes); + pDataCol->bytes = pTCol->bytes; + } + // the actual value size + uint16_t bytes = IS_VAR_DATA_TYPE(pTCol->type) ? varDataTLen(value) : pTCol->bytes; + // the actual data size CANNOT larger than column size + assert(pTCol->bytes >= bytes); + memcpy(pDataCol->pData, value, bytes); + //tsdbInfo("updateTableLatestColumn vgId:%d cache column %d for %d,%s", REPO_ID(pRepo), j, pDataCol->bytes, (char*)pDataCol->pData); + pDataCol->ts = memRowKey(row); + // unlock + TSDB_WUNLOCK_TABLE(pTable); + } +} + +static int tsdbUpdateTableLatestInfo(STsdbRepo *pRepo, STable *pTable, SMemRow row) { + STsdbCfg *pCfg = &pRepo->config; + + // if cacheLastRow config has been reset, free the lastRow + if (!pCfg->cacheLastRow && pTable->lastRow != NULL) { + SMemRow cachedLastRow = pTable->lastRow; + TSDB_WLOCK_TABLE(pTable); + pTable->lastRow = NULL; + TSDB_WUNLOCK_TABLE(pTable); + taosTZfree(cachedLastRow); + } + + if (tsdbGetTableLastKeyImpl(pTable) <= memRowKey(row)) { + if (CACHE_LAST_ROW(pCfg) || pTable->lastRow != NULL) { + SMemRow nrow = pTable->lastRow; + if (taosTSizeof(nrow) < memRowTLen(row)) { + SMemRow orow = nrow; + nrow = taosTMalloc(memRowTLen(row)); + if (nrow == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + memRowCpy(nrow, row); + TSDB_WLOCK_TABLE(pTable); + pTable->lastKey = memRowKey(row); + pTable->lastRow = nrow; + TSDB_WUNLOCK_TABLE(pTable); + taosTZfree(orow); + } else { + TSDB_WLOCK_TABLE(pTable); + pTable->lastKey = memRowKey(row); + memRowCpy(nrow, row); + TSDB_WUNLOCK_TABLE(pTable); + } + } else { + pTable->lastKey = memRowKey(row); + } + + if (CACHE_LAST_NULL_COLUMN(pCfg)) { + updateTableLatestColumn(pRepo, pTable, row); + } + } + + pTable->cacheLastConfigVersion = pRepo->cacheLastConfigVersion; + + return 0; +} + +#endif \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/src/tsdbReadImpl.c b/source/dnode/vnode/tsdb/src/tsdbReadImpl.c index 74d41cce19..1a2b213031 100644 --- a/source/dnode/vnode/tsdb/src/tsdbReadImpl.c +++ b/source/dnode/vnode/tsdb/src/tsdbReadImpl.c @@ -13,7 +13,7 @@ * along with this program. If not, see . */ -#include "tsdbint.h" +#include "tsdbDef.h" #define TSDB_KEY_COL_OFFSET 0 @@ -25,8 +25,9 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int3 static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds, int numOfColIds); static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol); +static STSchema *tsdbGetTableSchemaImpl(STable *pTable, bool lock, bool copy, int32_t version) { return NULL; } -int tsdbInitReadH(SReadH *pReadh, STsdbRepo *pRepo) { +int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo) { ASSERT(pReadh != NULL && pRepo != NULL); STsdbCfg *pCfg = REPO_CFG(pRepo); @@ -259,7 +260,9 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { for (int i = 1; i < pBlock->numOfSubBlocks; i++) { iBlock++; if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[1]) < 0) return -1; - if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, update != TD_ROW_PARTIAL_UPDATE) < 0) return -1; + if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, + update != TD_ROW_PARTIAL_UPDATE) < 0) + return -1; } ASSERT(pReadh->pDCols[0]->numOfRows == pBlock->numOfRows); @@ -286,7 +289,9 @@ int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, for (int i = 1; i < pBlock->numOfSubBlocks; i++) { iBlock++; if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[1], colIds, numOfColsIds) < 0) return -1; - if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, update != TD_ROW_PARTIAL_UPDATE) < 0) return -1; + if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, + update != TD_ROW_PARTIAL_UPDATE) < 0) + return -1; } ASSERT(pReadh->pDCols[0]->numOfRows == pBlock->numOfRows); @@ -524,7 +529,7 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32 if (comp) { // Need to decompress int tlen = (*(tDataTypes[pDataCol->type].decompFunc))(content, len - sizeof(TSCKSUM), numOfRows, pDataCol->pData, - pDataCol->spaceSize, comp, buffer, bufferSize); + pDataCol->spaceSize, comp, buffer, bufferSize); if (tlen <= 0) { tsdbError("Failed to decompress column, file corrupted, len:%d comp:%d numOfRows:%d maxPoints:%d bufferSize:%d", len, comp, numOfRows, maxPoints, bufferSize); @@ -624,9 +629,9 @@ static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols * static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol) { ASSERT(pDataCol->colId == pBlockCol->colId); - STsdbRepo *pRepo = TSDB_READ_REPO(pReadh); - STsdbCfg * pCfg = REPO_CFG(pRepo); - int tsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES; + STsdb * pRepo = TSDB_READ_REPO(pReadh); + STsdbCfg *pCfg = REPO_CFG(pRepo); + int tsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES; if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlockCol->len) < 0) return -1; if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), tsize) < 0) return -1; @@ -662,3 +667,4 @@ static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBloc return 0; } + diff --git a/source/dnode/vnode/tsdb/src/tsdbWrite.c b/source/dnode/vnode/tsdb/src/tsdbWrite.c index f9441cbe44..570e821af0 100644 --- a/source/dnode/vnode/tsdb/src/tsdbWrite.c +++ b/source/dnode/vnode/tsdb/src/tsdbWrite.c @@ -15,11 +15,11 @@ #include "tsdbDef.h" -int tsdbInsertData(STsdb *pTsdb, SSubmitMsg *pMsg) { +int tsdbInsertData(STsdb *pTsdb, SSubmitMsg *pMsg, SSubmitRsp *pRsp) { // Check if mem is there. If not, create one. - pTsdb->mem = tsdbNewMemTable(pTsdb->pmaf); + pTsdb->mem = tsdbNewMemTable(pTsdb); if (pTsdb->mem == NULL) { return -1; } - return tsdbInsertDataToMemTable(pTsdb->mem, pMsg); + return tsdbMemTableInsert(pTsdb, pTsdb->mem, pMsg, NULL); } \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/CMakeLists.txt b/source/dnode/vnode/tsdb2/CMakeLists.txt new file mode 100644 index 0000000000..23942eeac8 --- /dev/null +++ b/source/dnode/vnode/tsdb2/CMakeLists.txt @@ -0,0 +1,9 @@ +aux_source_directory(src TSDB_SRC) +add_library(tsdb STATIC ${TSDB_SRC}) + +target_include_directories( + tsdb + PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/vnode/tsdb2" + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" +) +target_link_libraries(tsdb os util common tfs) \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbBuffer.h b/source/dnode/vnode/tsdb2/inc/tsdbBuffer.h new file mode 100644 index 0000000000..869ff5c9ca --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbBuffer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +// #ifndef _TD_TSDB_BUFFER_H_ +// #define _TD_TSDB_BUFFER_H_ + +// typedef struct { +// int64_t blockId; +// int offset; +// int remain; +// char data[]; +// } STsdbBufBlock; + +// typedef struct { +// pthread_cond_t poolNotEmpty; +// int bufBlockSize; +// int tBufBlocks; +// int nBufBlocks; +// int nRecycleBlocks; +// int nElasticBlocks; +// int64_t index; +// SList* bufBlockList; +// } STsdbBufPool; + +// #define TSDB_BUFFER_RESERVE 1024 // Reseve 1K as commit threshold + +// STsdbBufPool* tsdbNewBufPool(); +// void tsdbFreeBufPool(STsdbBufPool* pBufPool); +// int tsdbOpenBufPool(STsdb* pRepo); +// void tsdbCloseBufPool(STsdb* pRepo); +// SListNode* tsdbAllocBufBlockFromPool(STsdb* pRepo); +// int tsdbExpandPool(STsdb* pRepo, int32_t oldTotalBlocks); +// void tsdbRecycleBufferBlock(STsdbBufPool* pPool, SListNode *pNode, bool bELastic); + +// // health cite +// STsdbBufBlock *tsdbNewBufBlock(int bufBlockSize); +// void tsdbFreeBufBlock(STsdbBufBlock *pBufBlock); + +// #endif /* _TD_TSDB_BUFFER_H_ */ diff --git a/source/dnode/vnode/tsdb2/inc/tsdbCommit.h b/source/dnode/vnode/tsdb2/inc/tsdbCommit.h new file mode 100644 index 0000000000..6f80ea1d3a --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbCommit.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_COMMIT_H_ +#define _TD_TSDB_COMMIT_H_ + +typedef struct { + int minFid; + int midFid; + int maxFid; + TSKEY minKey; +} SRtn; + +typedef struct { + uint64_t uid; + int64_t offset; + int64_t size; +} SKVRecord; + +#define TSDB_DEFAULT_BLOCK_ROWS(maxRows) ((maxRows)*4 / 5) + +void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn); +int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord); +void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord); +void *tsdbCommitData(STsdb *pRepo); +int tsdbApplyRtnOnFSet(STsdb *pRepo, SDFileSet *pSet, SRtn *pRtn); +int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf, SBlockIdx *pIdx); +int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf); +int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDFileAggr, SDataCols *pDataCols, + SBlock *pBlock, bool isLast, bool isSuper, void **ppBuf, void **ppCBuf, void **ppExBuf); +int tsdbApplyRtn(STsdb *pRepo); + +static FORCE_INLINE int tsdbGetFidLevel(int fid, SRtn *pRtn) { + if (fid >= pRtn->maxFid) { + return 0; + } else if (fid >= pRtn->midFid) { + return 1; + } else if (fid >= pRtn->minFid) { + return 2; + } else { + return -1; + } +} + +#endif /* _TD_TSDB_COMMIT_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbCompact.h b/source/dnode/vnode/tsdb2/inc/tsdbCompact.h new file mode 100644 index 0000000000..704dce0caa --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbCompact.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +// #ifndef _TD_TSDB_COMPACT_H_ +// #define _TD_TSDB_COMPACT_H_ + +// #ifdef __cplusplus +// extern "C" { +// #endif + +// void *tsdbCompactImpl(STsdb *pRepo); + +// #ifdef __cplusplus +// } +// #endif + +// #endif /* _TD_TSDB_COMPACT_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbFS.h b/source/dnode/vnode/tsdb2/inc/tsdbFS.h new file mode 100644 index 0000000000..5346342d69 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbFS.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_FS_H_ +#define _TD_TSDB_FS_H_ + +/** + * 1. The fileset .head/.data/.last use the same fver 0 before 2021.10.10. + * 2. .head fver is 1 when extract aggregate block data from .data/.last file and save to separate .smad/.smal file + * since 2021.10.10 + * // TODO update date and add release version. + */ +typedef enum { + TSDB_FS_VER_0 = 0, + TSDB_FS_VER_1, +} ETsdbFsVer; + +#define TSDB_FVER_TYPE uint32_t +#define TSDB_LATEST_FVER TSDB_FS_VER_1 // latest version for DFile +#define TSDB_LATEST_SFS_VER TSDB_FS_VER_1 // latest version for 'current' file + +static FORCE_INLINE uint32_t tsdbGetDFSVersion(TSDB_FILE_T fType) { // latest version for DFile + switch (fType) { + case TSDB_FILE_HEAD: + return TSDB_FS_VER_1; + default: + return TSDB_FS_VER_0; + } +} + +// ================== TSDB global config +extern bool tsdbForceKeepFile; + +// ================== CURRENT file header info +typedef struct { + uint32_t version; // Current file system version (relating to code) + uint32_t len; // Encode content length (including checksum) +} SFSHeader; + +// ================== TSDB File System Meta +typedef struct { + uint32_t version; // Commit version from 0 to increase + int64_t totalPoints; // total points + int64_t totalStorage; // Uncompressed total storage +} STsdbFSMeta; + +// ================== +typedef struct { + STsdbFSMeta meta; // FS meta + SMFile* pmf; // meta file pointer + SMFile mf; // meta file + SArray* df; // data file array +} SFSStatus; + +typedef struct { + pthread_rwlock_t lock; + + SFSStatus* cstatus; // current status + SHashObj* metaCache; // meta cache + SHashObj* metaCacheComp; // meta cache for compact + bool intxn; + SFSStatus* nstatus; // new status +} STsdbFS; + +#define FS_CURRENT_STATUS(pfs) ((pfs)->cstatus) +#define FS_NEW_STATUS(pfs) ((pfs)->nstatus) +#define FS_IN_TXN(pfs) (pfs)->intxn +#define FS_VERSION(pfs) ((pfs)->cstatus->meta.version) +#define FS_TXN_VERSION(pfs) ((pfs)->nstatus->meta.version) + +typedef struct { + int direction; + uint64_t version; // current FS version + STsdbFS* pfs; + int index; // used to position next fset when version the same + int fid; // used to seek when version is changed + SDFileSet* pSet; +} SFSIter; + +#define TSDB_FS_ITER_FORWARD TSDB_ORDER_ASC +#define TSDB_FS_ITER_BACKWARD TSDB_ORDER_DESC + +STsdbFS *tsdbNewFS(STsdbCfg *pCfg); +void * tsdbFreeFS(STsdbFS *pfs); +int tsdbOpenFS(STsdb *pRepo); +void tsdbCloseFS(STsdb *pRepo); +void tsdbStartFSTxn(STsdb *pRepo, int64_t pointsAdd, int64_t storageAdd); +int tsdbEndFSTxn(STsdb *pRepo); +int tsdbEndFSTxnWithError(STsdbFS *pfs); +void tsdbUpdateFSTxnMeta(STsdbFS *pfs, STsdbFSMeta *pMeta); +void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile); +int tsdbUpdateDFileSet(STsdbFS *pfs, const SDFileSet *pSet); + +void tsdbFSIterInit(SFSIter *pIter, STsdbFS *pfs, int direction); +void tsdbFSIterSeek(SFSIter *pIter, int fid); +SDFileSet *tsdbFSIterNext(SFSIter *pIter); +int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta); + +static FORCE_INLINE int tsdbRLockFS(STsdbFS* pFs) { + int code = pthread_rwlock_rdlock(&(pFs->lock)); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + return 0; +} + +static FORCE_INLINE int tsdbWLockFS(STsdbFS* pFs) { + int code = pthread_rwlock_wrlock(&(pFs->lock)); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + return 0; +} + +static FORCE_INLINE int tsdbUnLockFS(STsdbFS* pFs) { + int code = pthread_rwlock_unlock(&(pFs->lock)); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + return 0; +} + +#endif /* _TD_TSDB_FS_H_ */ diff --git a/source/dnode/vnode/tsdb2/inc/tsdbFile.h b/source/dnode/vnode/tsdb2/inc/tsdbFile.h new file mode 100644 index 0000000000..18838edea9 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbFile.h @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TS_TSDB_FILE_H_ +#define _TS_TSDB_FILE_H_ + +#include "os.h" + +#define TSDB_FILE_HEAD_SIZE 512 +#define TSDB_FILE_DELIMITER 0xF00AFA0F +#define TSDB_FILE_INIT_MAGIC 0xFFFFFFFF +#define TSDB_IVLD_FID INT_MIN +#define TSDB_FILE_STATE_OK 0 +#define TSDB_FILE_STATE_BAD 1 + +#define TSDB_FILE_INFO(tf) (&((tf)->info)) +#define TSDB_FILE_F(tf) (&((tf)->f)) +#define TSDB_FILE_FD(tf) ((tf)->fd) +#define TSDB_FILE_FULL_NAME(tf) TFILE_NAME(TSDB_FILE_F(tf)) +#define TSDB_FILE_OPENED(tf) (TSDB_FILE_FD(tf) >= 0) +#define TSDB_FILE_CLOSED(tf) (!TSDB_FILE_OPENED(tf)) +#define TSDB_FILE_SET_CLOSED(f) (TSDB_FILE_FD(f) = -1) +#define TSDB_FILE_LEVEL(tf) TFILE_LEVEL(TSDB_FILE_F(tf)) +#define TSDB_FILE_ID(tf) TFILE_ID(TSDB_FILE_F(tf)) +#define TSDB_FILE_FSYNC(tf) taosFsyncFile(TSDB_FILE_FD(tf)) +#define TSDB_FILE_STATE(tf) ((tf)->state) +#define TSDB_FILE_SET_STATE(tf, s) ((tf)->state = (s)) +#define TSDB_FILE_IS_OK(tf) (TSDB_FILE_STATE(tf) == TSDB_FILE_STATE_OK) +#define TSDB_FILE_IS_BAD(tf) (TSDB_FILE_STATE(tf) == TSDB_FILE_STATE_BAD) +#define ASSERT_TSDB_FSET_NFILES_VALID(s) \ + do { \ + uint8_t nDFiles = tsdbGetNFiles(s); \ + ASSERT((nDFiles >= TSDB_FILE_MIN) && (nDFiles <= TSDB_FILE_MAX)); \ + } while (0) +typedef enum { + TSDB_FILE_HEAD = 0, + TSDB_FILE_DATA, + TSDB_FILE_LAST, + TSDB_FILE_SMAD, // sma for .data + TSDB_FILE_SMAL, // sma for .last + TSDB_FILE_MAX, + TSDB_FILE_META +} TSDB_FILE_T; + +#define TSDB_FILE_MIN 3U // min valid number of files in one DFileSet(.head/.data/.last) + +// =============== SMFile +typedef struct { + int64_t size; + int64_t tombSize; + int64_t nRecords; + int64_t nDels; + uint32_t magic; +} SMFInfo; + +typedef struct { + SMFInfo info; + TFILE f; + int fd; + uint8_t state; +} SMFile; + +void tsdbInitMFile(SMFile* pMFile, SDiskID did, int vid, uint32_t ver); +void tsdbInitMFileEx(SMFile* pMFile, const SMFile* pOMFile); +int tsdbEncodeSMFile(void** buf, SMFile* pMFile); +void* tsdbDecodeSMFile(void* buf, SMFile* pMFile); +int tsdbEncodeSMFileEx(void** buf, SMFile* pMFile); +void* tsdbDecodeSMFileEx(void* buf, SMFile* pMFile); +int tsdbApplyMFileChange(SMFile* from, SMFile* to); +int tsdbCreateMFile(SMFile* pMFile, bool updateHeader); +int tsdbUpdateMFileHeader(SMFile* pMFile); +int tsdbLoadMFileHeader(SMFile* pMFile, SMFInfo* pInfo); +int tsdbScanAndTryFixMFile(STsdb* pRepo); +int tsdbEncodeMFInfo(void** buf, SMFInfo* pInfo); +void* tsdbDecodeMFInfo(void* buf, SMFInfo* pInfo); + +static FORCE_INLINE void tsdbSetMFileInfo(SMFile* pMFile, SMFInfo* pInfo) { pMFile->info = *pInfo; } + +static FORCE_INLINE int tsdbOpenMFile(SMFile* pMFile, int flags) { + ASSERT(TSDB_FILE_CLOSED(pMFile)); + + pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), flags | O_BINARY); + if (pMFile->fd < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return 0; +} + +static FORCE_INLINE void tsdbCloseMFile(SMFile* pMFile) { + if (TSDB_FILE_OPENED(pMFile)) { + close(pMFile->fd); + TSDB_FILE_SET_CLOSED(pMFile); + } +} + +static FORCE_INLINE int64_t tsdbSeekMFile(SMFile* pMFile, int64_t offset, int whence) { + ASSERT(TSDB_FILE_OPENED(pMFile)); + + int64_t loffset = taosLSeekFile(TSDB_FILE_FD(pMFile), offset, whence); + if (loffset < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return loffset; +} + +static FORCE_INLINE int64_t tsdbWriteMFile(SMFile* pMFile, void* buf, int64_t nbyte) { + ASSERT(TSDB_FILE_OPENED(pMFile)); + + int64_t nwrite = taosWriteFile(pMFile->fd, buf, nbyte); + if (nwrite < nbyte) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return nwrite; +} + +static FORCE_INLINE void tsdbUpdateMFileMagic(SMFile* pMFile, void* pCksum) { + pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t*)(pCksum), sizeof(TSCKSUM)); +} + +static FORCE_INLINE int tsdbAppendMFile(SMFile* pMFile, void* buf, int64_t nbyte, int64_t* offset) { + ASSERT(TSDB_FILE_OPENED(pMFile)); + + int64_t toffset; + + if ((toffset = tsdbSeekMFile(pMFile, 0, SEEK_END)) < 0) { + return -1; + } + + ASSERT(pMFile->info.size == toffset); + + if (offset) { + *offset = toffset; + } + + if (tsdbWriteMFile(pMFile, buf, nbyte) < 0) { + return -1; + } + + pMFile->info.size += nbyte; + + return (int)nbyte; +} + +static FORCE_INLINE int tsdbRemoveMFile(SMFile* pMFile) { return tfsremove(TSDB_FILE_F(pMFile)); } + +static FORCE_INLINE int64_t tsdbReadMFile(SMFile* pMFile, void* buf, int64_t nbyte) { + ASSERT(TSDB_FILE_OPENED(pMFile)); + + int64_t nread = taosReadFile(pMFile->fd, buf, nbyte); + if (nread < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return nread; +} + +// =============== SDFile +typedef struct { + uint32_t magic; + uint32_t len; + uint32_t totalBlocks; + uint32_t totalSubBlocks; + uint32_t offset; + uint64_t size; + uint64_t tombSize; + uint32_t fver; +} SDFInfo; + +typedef struct { + SDFInfo info; + TFILE f; + int fd; + uint8_t state; +} SDFile; + +void tsdbInitDFile(SDFile* pDFile, SDiskID did, int vid, int fid, uint32_t ver, TSDB_FILE_T ftype); +void tsdbInitDFileEx(SDFile* pDFile, SDFile* pODFile); +int tsdbEncodeSDFile(void** buf, SDFile* pDFile); +void* tsdbDecodeSDFile(void* buf, SDFile* pDFile, uint32_t sfver); +int tsdbCreateDFile(SDFile* pDFile, bool updateHeader, TSDB_FILE_T ftype); +int tsdbUpdateDFileHeader(SDFile* pDFile); +int tsdbLoadDFileHeader(SDFile* pDFile, SDFInfo* pInfo); +int tsdbParseDFilename(const char* fname, int* vid, int* fid, TSDB_FILE_T* ftype, uint32_t* version); + +static FORCE_INLINE void tsdbSetDFileInfo(SDFile* pDFile, SDFInfo* pInfo) { pDFile->info = *pInfo; } + +static FORCE_INLINE int tsdbOpenDFile(SDFile* pDFile, int flags) { + ASSERT(!TSDB_FILE_OPENED(pDFile)); + + pDFile->fd = open(TSDB_FILE_FULL_NAME(pDFile), flags | O_BINARY); + if (pDFile->fd < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return 0; +} + +static FORCE_INLINE void tsdbCloseDFile(SDFile* pDFile) { + if (TSDB_FILE_OPENED(pDFile)) { + close(pDFile->fd); + TSDB_FILE_SET_CLOSED(pDFile); + } +} + +static FORCE_INLINE int64_t tsdbSeekDFile(SDFile* pDFile, int64_t offset, int whence) { + ASSERT(TSDB_FILE_OPENED(pDFile)); + + int64_t loffset = taosLSeekFile(TSDB_FILE_FD(pDFile), offset, whence); + if (loffset < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return loffset; +} + +static FORCE_INLINE int64_t tsdbWriteDFile(SDFile* pDFile, void* buf, int64_t nbyte) { + ASSERT(TSDB_FILE_OPENED(pDFile)); + + int64_t nwrite = taosWriteFile(pDFile->fd, buf, nbyte); + if (nwrite < nbyte) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return nwrite; +} + +static FORCE_INLINE void tsdbUpdateDFileMagic(SDFile* pDFile, void* pCksm) { + pDFile->info.magic = taosCalcChecksum(pDFile->info.magic, (uint8_t*)(pCksm), sizeof(TSCKSUM)); +} + +static FORCE_INLINE int tsdbAppendDFile(SDFile* pDFile, void* buf, int64_t nbyte, int64_t* offset) { + ASSERT(TSDB_FILE_OPENED(pDFile)); + + int64_t toffset; + + if ((toffset = tsdbSeekDFile(pDFile, 0, SEEK_END)) < 0) { + return -1; + } + + ASSERT(pDFile->info.size == toffset); + + if (offset) { + *offset = toffset; + } + + if (tsdbWriteDFile(pDFile, buf, nbyte) < 0) { + return -1; + } + + pDFile->info.size += nbyte; + + return (int)nbyte; +} + +static FORCE_INLINE int tsdbRemoveDFile(SDFile* pDFile) { return tfsremove(TSDB_FILE_F(pDFile)); } + +static FORCE_INLINE int64_t tsdbReadDFile(SDFile* pDFile, void* buf, int64_t nbyte) { + ASSERT(TSDB_FILE_OPENED(pDFile)); + + int64_t nread = taosReadFile(pDFile->fd, buf, nbyte); + if (nread < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return nread; +} + +static FORCE_INLINE int tsdbCopyDFile(SDFile* pSrc, SDFile* pDest) { + if (tfscopy(TSDB_FILE_F(pSrc), TSDB_FILE_F(pDest)) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + tsdbSetDFileInfo(pDest, TSDB_FILE_INFO(pSrc)); + return 0; +} + +// =============== SDFileSet +typedef struct { + int fid; + int state; + uint16_t ver; // fset version + SDFile files[TSDB_FILE_MAX]; +} SDFileSet; + +typedef enum { + TSDB_FSET_VER_0 = 0, // .head/.data/.last + TSDB_FSET_VER_1, // .head/.data/.last/.smad/.smal +} ETsdbFSetVer; + +#define TSDB_LATEST_FSET_VER TSDB_FSET_VER_1 + +// get nDFiles in SDFileSet +static FORCE_INLINE uint8_t tsdbGetNFiles(SDFileSet* pSet) { + switch (pSet->ver) { + case TSDB_FSET_VER_0: + return TSDB_FILE_MIN; + case TSDB_FSET_VER_1: + default: + return TSDB_FILE_MAX; + } +} +#define TSDB_FSET_FID(s) ((s)->fid) +#define TSDB_DFILE_IN_SET(s, t) ((s)->files + (t)) +#define TSDB_FSET_LEVEL(s) TSDB_FILE_LEVEL(TSDB_DFILE_IN_SET(s, 0)) +#define TSDB_FSET_ID(s) TSDB_FILE_ID(TSDB_DFILE_IN_SET(s, 0)) +#define TSDB_FSET_SET_CLOSED(s) \ + do { \ + for (TSDB_FILE_T ftype = TSDB_FILE_HEAD; ftype < TSDB_FILE_MAX; ftype++) { \ + TSDB_FILE_SET_CLOSED(TSDB_DFILE_IN_SET(s, ftype)); \ + } \ + } while (0); +#define TSDB_FSET_FSYNC(s) \ + do { \ + for (TSDB_FILE_T ftype = TSDB_FILE_HEAD; ftype < tsdbGetNFiles(s); ftype++) { \ + TSDB_FILE_FSYNC(TSDB_DFILE_IN_SET(s, ftype)); \ + } \ + } while (0); + +void tsdbInitDFileSet(SDFileSet* pSet, SDiskID did, int vid, int fid, uint32_t ver, uint16_t fsetVer); +void tsdbInitDFileSetEx(SDFileSet* pSet, SDFileSet* pOSet); +int tsdbEncodeDFileSet(void** buf, SDFileSet* pSet); +void* tsdbDecodeDFileSet(void* buf, SDFileSet* pSet, uint32_t sfver); +int tsdbEncodeDFileSetEx(void** buf, SDFileSet* pSet); +void* tsdbDecodeDFileSetEx(void* buf, SDFileSet* pSet); +int tsdbApplyDFileSetChange(SDFileSet* from, SDFileSet* to); +int tsdbCreateDFileSet(SDFileSet* pSet, bool updateHeader); +int tsdbUpdateDFileSetHeader(SDFileSet* pSet); +int tsdbScanAndTryFixDFileSet(STsdb* pRepo, SDFileSet* pSet); + +static FORCE_INLINE void tsdbCloseDFileSet(SDFileSet* pSet) { + ASSERT_TSDB_FSET_NFILES_VALID(pSet); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + tsdbCloseDFile(TSDB_DFILE_IN_SET(pSet, ftype)); + } +} + +static FORCE_INLINE int tsdbOpenDFileSet(SDFileSet* pSet, int flags) { + ASSERT_TSDB_FSET_NFILES_VALID(pSet); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + if (tsdbOpenDFile(TSDB_DFILE_IN_SET(pSet, ftype), flags) < 0) { + tsdbCloseDFileSet(pSet); + return -1; + } + } + return 0; +} + +static FORCE_INLINE void tsdbRemoveDFileSet(SDFileSet* pSet) { + ASSERT_TSDB_FSET_NFILES_VALID(pSet); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + (void)tsdbRemoveDFile(TSDB_DFILE_IN_SET(pSet, ftype)); + } +} + +static FORCE_INLINE int tsdbCopyDFileSet(SDFileSet* pSrc, SDFileSet* pDest) { + ASSERT_TSDB_FSET_NFILES_VALID(pSrc); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSrc); ftype++) { + if (tsdbCopyDFile(TSDB_DFILE_IN_SET(pSrc, ftype), TSDB_DFILE_IN_SET(pDest, ftype)) < 0) { + tsdbRemoveDFileSet(pDest); + return -1; + } + } + + return 0; +} + +static FORCE_INLINE void tsdbGetFidKeyRange(int days, int8_t precision, int fid, TSKEY* minKey, TSKEY* maxKey) { + *minKey = fid * days * tsTickPerDay[precision]; + *maxKey = *minKey + days * tsTickPerDay[precision] - 1; +} + +static FORCE_INLINE bool tsdbFSetIsOk(SDFileSet* pSet) { + for (TSDB_FILE_T ftype = 0; ftype < TSDB_FILE_MAX; ftype++) { + if (TSDB_FILE_IS_BAD(TSDB_DFILE_IN_SET(pSet, ftype))) { + return false; + } + } + + return true; +} + +#endif /* _TS_TSDB_FILE_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbLog.h b/source/dnode/vnode/tsdb2/inc/tsdbLog.h new file mode 100644 index 0000000000..fdd04e968a --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbLog.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_LOG_H_ +#define _TD_TSDB_LOG_H_ + +extern int32_t tsdbDebugFlag; + +#define tsdbFatal(...) do { if (tsdbDebugFlag & DEBUG_FATAL) { taosPrintLog("TDB FATAL ", 255, __VA_ARGS__); }} while(0) +#define tsdbError(...) do { if (tsdbDebugFlag & DEBUG_ERROR) { taosPrintLog("TDB ERROR ", 255, __VA_ARGS__); }} while(0) +#define tsdbWarn(...) do { if (tsdbDebugFlag & DEBUG_WARN) { taosPrintLog("TDB WARN ", 255, __VA_ARGS__); }} while(0) +#define tsdbInfo(...) do { if (tsdbDebugFlag & DEBUG_INFO) { taosPrintLog("TDB ", 255, __VA_ARGS__); }} while(0) +#define tsdbDebug(...) do { if (tsdbDebugFlag & DEBUG_DEBUG) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0) +#define tsdbTrace(...) do { if (tsdbDebugFlag & DEBUG_TRACE) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0) + +#endif /* _TD_TSDB_LOG_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbMemTable.h b/source/dnode/vnode/tsdb2/inc/tsdbMemTable.h new file mode 100644 index 0000000000..639c27a644 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbMemTable.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_MEMTABLE_H_ +#define _TD_TSDB_MEMTABLE_H_ + +typedef struct { + int rowsInserted; + int rowsUpdated; + int rowsDeleteSucceed; + int rowsDeleteFailed; + int nOperations; + TSKEY keyFirst; + TSKEY keyLast; +} SMergeInfo; + +typedef struct { + STable * pTable; + SSkipListIterator *pIter; +} SCommitIter; + +struct STableData { + uint64_t uid; + TSKEY keyFirst; + TSKEY keyLast; + int64_t numOfRows; + SSkipList* pData; + T_REF_DECLARE() +}; + +enum { TSDB_UPDATE_META, TSDB_DROP_META }; + +#ifdef WINDOWS +#pragma pack(push ,1) +typedef struct { +#else +typedef struct __attribute__((packed)){ +#endif + char act; + uint64_t uid; +} SActObj; +#ifdef WINDOWS +#pragma pack(pop) +#endif + +typedef struct { + int len; + char cont[]; +} SActCont; + +int tsdbRefMemTable(STsdb* pRepo, SMemTable* pMemTable); +int tsdbUnRefMemTable(STsdb* pRepo, SMemTable* pMemTable); +int tsdbTakeMemSnapshot(STsdb* pRepo, SMemSnapshot* pSnapshot, SArray* pATable); +void tsdbUnTakeMemSnapShot(STsdb* pRepo, SMemSnapshot* pSnapshot); +void* tsdbAllocBytes(STsdb* pRepo, int bytes); +int tsdbAsyncCommit(STsdb* pRepo); +int tsdbSyncCommitConfig(STsdb* pRepo); +int tsdbLoadDataFromCache(STable* pTable, SSkipListIterator* pIter, TSKEY maxKey, int maxRowsToRead, SDataCols* pCols, + TKEY* filterKeys, int nFilterKeys, bool keepDup, SMergeInfo* pMergeInfo); +void* tsdbCommitData(STsdb* pRepo); + +static FORCE_INLINE SMemRow tsdbNextIterRow(SSkipListIterator* pIter) { + if (pIter == NULL) return NULL; + + SSkipListNode* node = tSkipListIterGet(pIter); + if (node == NULL) return NULL; + + return (SMemRow)SL_GET_NODE_DATA(node); +} + +static FORCE_INLINE TSKEY tsdbNextIterKey(SSkipListIterator* pIter) { + SMemRow row = tsdbNextIterRow(pIter); + if (row == NULL) return TSDB_DATA_TIMESTAMP_NULL; + + return memRowKey(row); +} + +static FORCE_INLINE TKEY tsdbNextIterTKey(SSkipListIterator* pIter) { + SMemRow row = tsdbNextIterRow(pIter); + if (row == NULL) return TKEY_NULL; + + return memRowTKey(row); +} + +#endif /* _TD_TSDB_MEMTABLE_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbMemory.h b/source/dnode/vnode/tsdb2/inc/tsdbMemory.h new file mode 100644 index 0000000000..1fc4cd9e52 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbMemory.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_MEMORY_H_ +#define _TD_TSDB_MEMORY_H_ + +static void * taosTMalloc(size_t size); +static void * taosTCalloc(size_t nmemb, size_t size); +static void * taosTRealloc(void *ptr, size_t size); +static void * taosTZfree(void *ptr); +static size_t taosTSizeof(void *ptr); +static void taosTMemset(void *ptr, int c); + +static FORCE_INLINE void *taosTMalloc(size_t size) { + if (size <= 0) return NULL; + + void *ret = malloc(size + sizeof(size_t)); + if (ret == NULL) return NULL; + + *(size_t *)ret = size; + + return (void *)((char *)ret + sizeof(size_t)); +} + +static FORCE_INLINE void *taosTCalloc(size_t nmemb, size_t size) { + size_t tsize = nmemb * size; + void * ret = taosTMalloc(tsize); + if (ret == NULL) return NULL; + + taosTMemset(ret, 0); + return ret; +} + +static FORCE_INLINE size_t taosTSizeof(void *ptr) { return (ptr) ? (*(size_t *)((char *)ptr - sizeof(size_t))) : 0; } + +static FORCE_INLINE void taosTMemset(void *ptr, int c) { memset(ptr, c, taosTSizeof(ptr)); } + +static FORCE_INLINE void * taosTRealloc(void *ptr, size_t size) { + if (ptr == NULL) return taosTMalloc(size); + + if (size <= taosTSizeof(ptr)) return ptr; + + void * tptr = (void *)((char *)ptr - sizeof(size_t)); + size_t tsize = size + sizeof(size_t); + void* tptr1 = realloc(tptr, tsize); + if (tptr1 == NULL) return NULL; + tptr = tptr1; + + *(size_t *)tptr = size; + + return (void *)((char *)tptr + sizeof(size_t)); +} + +static FORCE_INLINE void* taosTZfree(void* ptr) { + if (ptr) { + free((void*)((char*)ptr - sizeof(size_t))); + } + return NULL; +} + + +#endif /* _TD_TSDB_MEMORY_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbMeta.h b/source/dnode/vnode/tsdb2/inc/tsdbMeta.h new file mode 100644 index 0000000000..0324fff343 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbMeta.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_META_H_ +#define _TD_TSDB_META_H_ + +#include "tskiplist.h" + +#define TSDB_MAX_TABLE_SCHEMAS 16 + +#pragma pack(push, 1) +typedef struct jsonMapValue { + void* table; // STable * + int16_t colId; // the json col ID. +} JsonMapValue; + +#pragma pack(pop) + +typedef struct STable { + STableId tableId; + ETableType type; + tstr* name; // NOTE: there a flexible string here + uint64_t suid; + struct STable* pSuper; // super table pointer + SArray* schema; + STSchema* tagSchema; + SKVRow tagVal; + SSkipList* pIndex; // For TSDB_SUPER_TABLE, it is the skiplist index + SHashObj* jsonKeyMap; // For json tag key {"key":[t1, t2, t3]} + void* eventHandler; // TODO + void* streamHandler; // TODO + TSKEY lastKey; + SMemRow lastRow; + char* sql; + void* cqhandle; + SRWLatch latch; // TODO: implementa latch functions + SDataCol* lastCols; + int16_t maxColNum; + int16_t restoreColumnNum; + bool hasRestoreLastColumn; + int lastColSVersion; + int16_t cacheLastConfigVersion; + T_REF_DECLARE() +} STable; + +typedef struct { + pthread_rwlock_t rwLock; + + int32_t nTables; + int32_t maxTables; + STable** tables; + SList* superList; + SHashObj* uidMap; + int maxRowBytes; + int maxCols; +} STsdbMeta; + +#define TSDB_INIT_NTABLES 1024 +#define TABLE_TYPE(t) (t)->type +#define TABLE_NAME(t) (t)->name +#define TABLE_CHAR_NAME(t) TABLE_NAME(t)->data +#define TABLE_UID(t) (t)->tableId.uid +#define TABLE_TID(t) (t)->tableId.tid +#define TABLE_SUID(t) (t)->suid +// #define TSDB_META_FILE_MAGIC(m) KVSTORE_MAGIC((m)->pStore) +#define TSDB_RLOCK_TABLE(t) taosRLockLatch(&((t)->latch)) +#define TSDB_RUNLOCK_TABLE(t) taosRUnLockLatch(&((t)->latch)) +#define TSDB_WLOCK_TABLE(t) taosWLockLatch(&((t)->latch)) +#define TSDB_WUNLOCK_TABLE(t) taosWUnLockLatch(&((t)->latch)) + +STsdbMeta* tsdbNewMeta(STsdbCfg* pCfg); +void tsdbFreeMeta(STsdbMeta* pMeta); +int tsdbOpenMeta(STsdb* pRepo); +int tsdbCloseMeta(STsdb* pRepo); +STable* tsdbGetTableByUid(STsdbMeta* pMeta, uint64_t uid); +STSchema* tsdbGetTableSchemaByVersion(STable* pTable, int16_t _version, int8_t rowType); +int tsdbWLockRepoMeta(STsdb* pRepo); +int tsdbRLockRepoMeta(STsdb* pRepo); +int tsdbUnlockRepoMeta(STsdb* pRepo); +void tsdbRefTable(STable* pTable); +void tsdbUnRefTable(STable* pTable); +void tsdbUpdateTableSchema(STsdb* pRepo, STable* pTable, STSchema* pSchema, bool insertAct); +int tsdbRestoreTable(STsdb* pRepo, void* cont, int contLen); +void tsdbOrgMeta(STsdb* pRepo); +int tsdbInitColIdCacheWithSchema(STable* pTable, STSchema* pSchema); +int16_t tsdbGetLastColumnsIndexByColId(STable* pTable, int16_t colId); +int tsdbUpdateLastColSchema(STable* pTable, STSchema* pNewSchema); +STSchema* tsdbGetTableLatestSchema(STable* pTable); +void tsdbFreeLastColumns(STable* pTable); +int tsdbCompareJsonMapValue(const void* a, const void* b); +void* tsdbGetJsonTagValue(STable* pTable, char* key, int32_t keyLen, int16_t* colId); + +static FORCE_INLINE int tsdbCompareSchemaVersion(const void* key1, const void* key2) { + if (*(int16_t*)key1 < schemaVersion(*(STSchema**)key2)) { + return -1; + } else if (*(int16_t*)key1 > schemaVersion(*(STSchema**)key2)) { + return 1; + } else { + return 0; + } +} + +static FORCE_INLINE STSchema* tsdbGetTableSchemaImpl(STable* pTable, bool lock, bool copy, int16_t _version, + int8_t rowType) { + STable* pDTable = (pTable->pSuper != NULL) ? pTable->pSuper : pTable; // for performance purpose + STSchema* pSchema = NULL; + STSchema* pTSchema = NULL; + + if (lock) TSDB_RLOCK_TABLE(pDTable); + if (_version < 0) { // get the latest version of schema + pTSchema = *(STSchema**)taosArrayGetLast(pDTable->schema); + } else { // get the schema with version + void* ptr = taosArraySearch(pDTable->schema, &_version, tsdbCompareSchemaVersion, TD_EQ); + if (ptr == NULL) { + if (rowType == SMEM_ROW_KV) { + ptr = taosArrayGetLast(pDTable->schema); + } else { + terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; + goto _exit; + } + } + pTSchema = *(STSchema**)ptr; + } + + ASSERT(pTSchema != NULL); + + if (copy) { + if ((pSchema = tdDupSchema(pTSchema)) == NULL) terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + } else { + pSchema = pTSchema; + } + +_exit: + if (lock) TSDB_RUNLOCK_TABLE(pDTable); + return pSchema; +} + +static FORCE_INLINE STSchema* tsdbGetTableSchema(STable* pTable) { + return tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); +} + +static FORCE_INLINE STSchema* tsdbGetTableTagSchema(STable* pTable) { + if (pTable->type == TSDB_CHILD_TABLE) { // check child table first + STable* pSuper = pTable->pSuper; + if (pSuper == NULL) return NULL; + return pSuper->tagSchema; + } else if (pTable->type == TSDB_SUPER_TABLE) { + return pTable->tagSchema; + } else { + return NULL; + } +} + +static FORCE_INLINE TSKEY tsdbGetTableLastKeyImpl(STable* pTable) { + ASSERT((pTable->lastRow == NULL) || (pTable->lastKey == memRowKey(pTable->lastRow))); + return pTable->lastKey; +} + +#endif /* _TD_TSDB_META_H_ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/inc/tsdbReadImpl.h b/source/dnode/vnode/tsdb2/inc/tsdbReadImpl.h new file mode 100644 index 0000000000..9f3fb8b683 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbReadImpl.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_READ_IMPL_H_ +#define _TD_TSDB_READ_IMPL_H_ + +#include "os.h" +#include "tfs.h" +#include "tsdb.h" +#include "tsdbFile.h" +#include "tsdbMemory.h" +#include "tsdbMeta.h" +#include "tskiplist.h" + +typedef struct SReadH SReadH; + +typedef struct { + int32_t tid; + uint32_t len; + uint32_t offset; + uint32_t hasLast : 2; + uint32_t numOfBlocks : 30; + uint64_t uid; + TSKEY maxKey; +} SBlockIdx; + +#if 0 +typedef struct { + int64_t last : 1; + int64_t offset : 63; + int32_t algorithm : 8; + int32_t numOfRows : 24; + int32_t len; + int32_t keyLen; // key column length, keyOffset = offset+sizeof(SBlockData)+sizeof(SBlockCol)*numOfCols + int16_t numOfSubBlocks; + int16_t numOfCols; // not including timestamp column + TSKEY keyFirst; + TSKEY keyLast; + } SBlock; +#endif + +/** + * keyLen; // key column length, keyOffset = offset+sizeof(SBlockData)+sizeof(SBlockCol)*numOfCols + * numOfCols; // not including timestamp column + */ +#define SBlockFieldsP0 \ + int64_t last : 1; \ + int64_t offset : 63; \ + int32_t algorithm : 8; \ + int32_t numOfRows : 24; \ + int32_t len; \ + int32_t keyLen; \ + int16_t numOfSubBlocks; \ + int16_t numOfCols; \ + TSKEY keyFirst; \ + TSKEY keyLast + +/** + * aggrStat; // only valid when blkVer > 0. 0 - no aggr part in .data/.last/.smad/.smal, 1 - has aggr in .smad/.smal + * blkVer; // 0 - original block, 1 - block since importing .smad/.smal + * aggrOffset; // only valid when blkVer > 0 and aggrStat > 0 + */ +#define SBlockFieldsP1 \ + uint64_t aggrStat : 1; \ + uint64_t blkVer : 7; \ + uint64_t aggrOffset : 56 + +typedef struct { + SBlockFieldsP0; +} SBlockV0; + +typedef struct { + SBlockFieldsP0; + SBlockFieldsP1; +} SBlockV1; + +typedef enum { + TSDB_SBLK_VER_0 = 0, + TSDB_SBLK_VER_1, +} ESBlockVer; + +#define SBlockVerLatest TSDB_SBLK_VER_1 + +#define SBlock SBlockV1 // latest SBlock definition + +// lastest SBlockInfo definition +typedef struct { + int32_t delimiter; // For recovery usage + int32_t tid; + uint64_t uid; + SBlock blocks[]; +} SBlockInfo; + +typedef struct { + int16_t colId; + int32_t len; + uint32_t type : 8; + uint32_t offset : 24; + int64_t sum; + int64_t max; + int64_t min; + int16_t maxIndex; + int16_t minIndex; + int16_t numOfNull; + uint8_t offsetH; + char padding[1]; +} SBlockColV0; + +typedef struct { + int16_t colId; + uint8_t offsetH; + uint8_t reserved; // reserved field, not used + int32_t len; + uint32_t type : 8; + uint32_t offset : 24; +} SBlockColV1; + +#define SBlockCol SBlockColV1 // latest SBlockCol definition + +typedef struct { + int16_t colId; + int16_t maxIndex; + int16_t minIndex; + int16_t numOfNull; + int64_t sum; + int64_t max; + int64_t min; +} SAggrBlkColV1; + +#define SAggrBlkCol SAggrBlkColV1 // latest SAggrBlkCol definition + +// Code here just for back-ward compatibility +static FORCE_INLINE void tsdbSetBlockColOffset(SBlockCol *pBlockCol, uint32_t offset) { + pBlockCol->offset = offset & ((((uint32_t)1) << 24) - 1); + pBlockCol->offsetH = (uint8_t)(offset >> 24); +} + +static FORCE_INLINE uint32_t tsdbGetBlockColOffset(SBlockCol *pBlockCol) { + uint32_t offset1 = pBlockCol->offset; + uint32_t offset2 = pBlockCol->offsetH; + return (offset1 | (offset2 << 24)); +} + +typedef struct { + int32_t delimiter; // For recovery usage + int32_t numOfCols; // For recovery usage + uint64_t uid; // For recovery usage + SBlockCol cols[]; +} SBlockData; + +typedef void SAggrBlkData; // SBlockCol cols[]; + +struct SReadH { + STsdb * pRepo; + SDFileSet rSet; // FSET to read + SArray * aBlkIdx; // SBlockIdx array + STable * pTable; // table to read + SBlockIdx * pBlkIdx; // current reading table SBlockIdx + int cidx; + SBlockInfo * pBlkInfo; // SBlockInfoV# + SBlockData * pBlkData; // Block info + SAggrBlkData *pAggrBlkData; // Aggregate Block info + SDataCols * pDCols[2]; + void * pBuf; // buffer + void * pCBuf; // compression buffer + void * pExBuf; // extra buffer +}; + +#define TSDB_READ_REPO(rh) ((rh)->pRepo) +#define TSDB_READ_REPO_ID(rh) REPO_ID(TSDB_READ_REPO(rh)) +#define TSDB_READ_FSET(rh) (&((rh)->rSet)) +#define TSDB_READ_TABLE(rh) ((rh)->pTable) +#define TSDB_READ_HEAD_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_HEAD) +#define TSDB_READ_DATA_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_DATA) +#define TSDB_READ_LAST_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_LAST) +#define TSDB_READ_SMAD_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_SMAD) +#define TSDB_READ_SMAL_FILE(rh) TSDB_DFILE_IN_SET(TSDB_READ_FSET(rh), TSDB_FILE_SMAL) +#define TSDB_READ_BUF(rh) ((rh)->pBuf) +#define TSDB_READ_COMP_BUF(rh) ((rh)->pCBuf) +#define TSDB_READ_EXBUF(rh) ((rh)->pExBuf) + +#define TSDB_BLOCK_STATIS_SIZE(ncols, blkVer) \ + (sizeof(SBlockData) + sizeof(SBlockColV##blkVer) * (ncols) + sizeof(TSCKSUM)) + +static FORCE_INLINE size_t tsdbBlockStatisSize(int nCols, uint32_t blkVer) { + switch (blkVer) { + case TSDB_SBLK_VER_0: + return TSDB_BLOCK_STATIS_SIZE(nCols, 0); + case TSDB_SBLK_VER_1: + default: + return TSDB_BLOCK_STATIS_SIZE(nCols, 1); + } +} + +#define TSDB_BLOCK_AGGR_SIZE(ncols, blkVer) (sizeof(SAggrBlkColV##blkVer) * (ncols) + sizeof(TSCKSUM)) + +static FORCE_INLINE size_t tsdbBlockAggrSize(int nCols, uint32_t blkVer) { + switch (blkVer) { + case TSDB_SBLK_VER_0: + ASSERT(false); + return 0; + case TSDB_SBLK_VER_1: + default: + return TSDB_BLOCK_AGGR_SIZE(nCols, 1); + } +} + +int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo); +void tsdbDestroyReadH(SReadH *pReadh); +int tsdbSetAndOpenReadFSet(SReadH *pReadh, SDFileSet *pSet); +void tsdbCloseAndUnsetFSet(SReadH *pReadh); +int tsdbLoadBlockIdx(SReadH *pReadh); +int tsdbSetReadTable(SReadH *pReadh, STable *pTable); +int tsdbLoadBlockInfo(SReadH *pReadh, void **pTarget, uint32_t *extendedLen); +int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlockInfo); +int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, int16_t *colIds, int numOfColsIds); +int tsdbLoadBlockStatis(SReadH *pReadh, SBlock *pBlock); +int tsdbLoadBlockOffset(SReadH *pReadh, SBlock *pBlock); +int tsdbEncodeSBlockIdx(void **buf, SBlockIdx *pIdx); +void *tsdbDecodeSBlockIdx(void *buf, SBlockIdx *pIdx); +void tsdbGetBlockStatis(SReadH *pReadh, SDataStatis *pStatis, int numOfCols, SBlock *pBlock); + +static FORCE_INLINE int tsdbMakeRoom(void **ppBuf, size_t size) { + void * pBuf = *ppBuf; + size_t tsize = taosTSizeof(pBuf); + + if (tsize < size) { + if (tsize == 0) tsize = 1024; + + while (tsize < size) { + tsize *= 2; + } + + *ppBuf = taosTRealloc(pBuf, tsize); + if (*ppBuf == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } + + return 0; +} + +static FORCE_INLINE SBlockCol *tsdbGetSBlockCol(SBlock *pBlock, SBlockCol **pDestBlkCol, SBlockCol *pBlkCols, + int colIdx) { + if (pBlock->blkVer == SBlockVerLatest) { + *pDestBlkCol = pBlkCols + colIdx; + return *pDestBlkCol; + } + if (pBlock->blkVer == TSDB_SBLK_VER_0) { + SBlockColV0 *pBlkCol = (SBlockColV0 *)pBlkCols + colIdx; + (*pDestBlkCol)->colId = pBlkCol->colId; + (*pDestBlkCol)->len = pBlkCol->len; + (*pDestBlkCol)->type = pBlkCol->type; + (*pDestBlkCol)->offset = pBlkCol->offset; + (*pDestBlkCol)->offsetH = pBlkCol->offsetH; + } + return *pDestBlkCol; +} + +#endif /*_TD_TSDB_READ_IMPL_H_*/ diff --git a/source/dnode/vnode/tsdb2/inc/tsdbRowMergeBuf.h b/source/dnode/vnode/tsdb2/inc/tsdbRowMergeBuf.h new file mode 100644 index 0000000000..cefa9b27fb --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbRowMergeBuf.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TSDB_ROW_MERGE_BUF_H +#define TSDB_ROW_MERGE_BUF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tsdb.h" +#include "tchecksum.h" +#include "tsdbReadImpl.h" + +typedef void* SMergeBuf; + +SDataRow tsdbMergeTwoRows(SMergeBuf *pBuf, SMemRow row1, SMemRow row2, STSchema *pSchema1, STSchema *pSchema2); + +static FORCE_INLINE int tsdbMergeBufMakeSureRoom(SMergeBuf *pBuf, STSchema* pSchema1, STSchema* pSchema2) { + size_t len1 = dataRowMaxBytesFromSchema(pSchema1); + size_t len2 = dataRowMaxBytesFromSchema(pSchema2); + return tsdbMakeRoom(pBuf, MAX(len1, len2)); +} + +static FORCE_INLINE void tsdbFreeMergeBuf(SMergeBuf buf) { + taosTZfree(buf); +} + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef TSDB_ROW_MERGE_BUF_H */ diff --git a/source/dnode/vnode/tsdb2/inc/tsdbint.h b/source/dnode/vnode/tsdb2/inc/tsdbint.h new file mode 100644 index 0000000000..0f492d90c3 --- /dev/null +++ b/source/dnode/vnode/tsdb2/inc/tsdbint.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_INT_H_ +#define _TD_TSDB_INT_H_ + +#include "os.h" +#include "taosdef.h" +#include "taoserror.h" +#include "tarray.h" +#include "tchecksum.h" +#include "tcoding.h" +#include "tcompression.h" +#include "tdataformat.h" +#include "tfs.h" +#include "thash.h" +#include "tlist.h" +#include "tlockfree.h" +#include "tlog.h" +#include "tsdbMemory.h" +#include "tskiplist.h" + +#include "tsdb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Log +#include "tsdbLog.h" +// Meta +#include "tsdbMeta.h" +// // Buffer +// #include "tsdbBuffer.h" +// MemTable +#include "tsdbMemTable.h" +// File +#include "tsdbFile.h" +// FS +#include "tsdbFS.h" +// ReadImpl +#include "tsdbReadImpl.h" +// Commit +#include "tsdbCommit.h" +// Compact +#include "tsdbCompact.h" + +#include "tsdbRowMergeBuf.h" +// Main definitions +struct STsdb { + uint8_t state; + STsdbCfg config; + STsdbStat stat; + STsdbMeta* tsdbMeta; + SMemTable* mem; + SMemTable* imem; + STsdbFS* fs; + SRtn rtn; + SMergeBuf mergeBuf; // used when update=2 +}; + +#define REPO_ID(r) (r)->config.tsdbId +#define REPO_CFG(r) (&((r)->config)) +#define REPO_FS(r) ((r)->fs) +#define IS_REPO_LOCKED(r) (r)->repoLocked +#define TSDB_SUBMIT_MSG_HEAD_SIZE sizeof(SSubmitMsg) + +int tsdbLockRepo(STsdb* pRepo); +int tsdbUnlockRepo(STsdb* pRepo); +STsdbMeta* tsdbGetMeta(STsdb* pRepo); +int tsdbCheckCommit(STsdb* pRepo); +int tsdbRestoreInfo(STsdb* pRepo); +UNUSED_FUNC int tsdbCacheLastData(STsdb* pRepo, STsdbCfg* oldCfg); +int32_t tsdbLoadLastCache(STsdb* pRepo, STable* pTable); +void tsdbGetRootDir(int repoid, char dirName[]); +void tsdbGetDataDir(int repoid, char dirName[]); + +#ifdef __cplusplus +} +#endif + +#endif /* _TD_TSDB_INT_H_ */ diff --git a/source/dnode/vnode/tsdb2/src/tsdbBuffer.c b/source/dnode/vnode/tsdb2/src/tsdbBuffer.c new file mode 100644 index 0000000000..396ce4181d --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbBuffer.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#if 0 + +#include "tsdbHealth.h" +#include "tsdbint.h" + +#define POOL_IS_EMPTY(b) (listNEles((b)->bufBlockList) == 0) + +// ---------------- INTERNAL FUNCTIONS ---------------- +STsdbBufPool *tsdbNewBufPool() { + STsdbBufPool *pBufPool = (STsdbBufPool *)calloc(1, sizeof(*pBufPool)); + if (pBufPool == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + int code = pthread_cond_init(&(pBufPool->poolNotEmpty), NULL); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + goto _err; + } + + pBufPool->bufBlockList = tdListNew(sizeof(STsdbBufBlock *)); + if (pBufPool->bufBlockList == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + return pBufPool; + +_err: + tsdbFreeBufPool(pBufPool); + return NULL; +} + +void tsdbFreeBufPool(STsdbBufPool *pBufPool) { + if (pBufPool) { + if (pBufPool->bufBlockList) { + ASSERT(listNEles(pBufPool->bufBlockList) == 0); + tdListFree(pBufPool->bufBlockList); + } + + pthread_cond_destroy(&pBufPool->poolNotEmpty); + + free(pBufPool); + } +} + +int tsdbOpenBufPool(STsdb *pRepo) { + STsdbCfg * pCfg = &(pRepo->config); + STsdbBufPool *pPool = pRepo->pPool; + + ASSERT(pPool != NULL); + pPool->bufBlockSize = pCfg->cacheBlockSize * 1024 * 1024; // MB + pPool->tBufBlocks = pCfg->totalBlocks; + pPool->nBufBlocks = 0; + pPool->nElasticBlocks = 0; + pPool->index = 0; + pPool->nRecycleBlocks = 0; + + for (int i = 0; i < pCfg->totalBlocks; i++) { + STsdbBufBlock *pBufBlock = tsdbNewBufBlock(pPool->bufBlockSize); + if (pBufBlock == NULL) goto _err; + + if (tdListAppend(pPool->bufBlockList, (void *)(&pBufBlock)) < 0) { + tsdbFreeBufBlock(pBufBlock); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + pPool->nBufBlocks++; + } + + tsdbDebug("vgId:%d buffer pool is opened! bufBlockSize:%d tBufBlocks:%d nBufBlocks:%d", REPO_ID(pRepo), + pPool->bufBlockSize, pPool->tBufBlocks, pPool->nBufBlocks); + + return 0; + +_err: + tsdbCloseBufPool(pRepo); + return -1; +} + +void tsdbCloseBufPool(STsdb *pRepo) { + if (pRepo == NULL) return; + + STsdbBufPool * pBufPool = pRepo->pPool; + STsdbBufBlock *pBufBlock = NULL; + + if (pBufPool) { + SListNode *pNode = NULL; + while ((pNode = tdListPopHead(pBufPool->bufBlockList)) != NULL) { + tdListNodeGetData(pBufPool->bufBlockList, pNode, (void *)(&pBufBlock)); + tsdbFreeBufBlock(pBufBlock); + free(pNode); + } + } + + tsdbDebug("vgId:%d, buffer pool is closed", REPO_ID(pRepo)); +} + +SListNode *tsdbAllocBufBlockFromPool(STsdb *pRepo) { + ASSERT(pRepo != NULL && pRepo->pPool != NULL); + ASSERT(IS_REPO_LOCKED(pRepo)); + + STsdbBufPool *pBufPool = pRepo->pPool; + + while (POOL_IS_EMPTY(pBufPool)) { + if (tsDeadLockKillQuery) { + // supply new Block + if (tsdbInsertNewBlock(pRepo) > 0) { + tsdbWarn("vgId:%d add new elastic block . elasticBlocks=%d cur free Blocks=%d", REPO_ID(pRepo), + pBufPool->nElasticBlocks, TD_DLIST_NELES(pBufPool->bufBlockList)); + break; + } else { + // no newBlock, kill query free + if (!tsdbUrgeQueryFree(pRepo)) tsdbWarn("vgId:%d Urge query free thread start failed.", REPO_ID(pRepo)); + } + } + + pRepo->repoLocked = false; + pthread_cond_wait(&(pBufPool->poolNotEmpty), &(pRepo->mutex)); + pRepo->repoLocked = true; + } + + SListNode *pNode = tdListPopHead(pBufPool->bufBlockList); + ASSERT(pNode != NULL); + STsdbBufBlock *pBufBlock = NULL; + tdListNodeGetData(pBufPool->bufBlockList, pNode, (void *)(&pBufBlock)); + + pBufBlock->blockId = pBufPool->index++; + pBufBlock->offset = 0; + pBufBlock->remain = pBufPool->bufBlockSize; + + tsdbDebug("vgId:%d, buffer block is allocated, blockId:%" PRId64, REPO_ID(pRepo), pBufBlock->blockId); + return pNode; +} + +// ---------------- LOCAL FUNCTIONS ---------------- +STsdbBufBlock *tsdbNewBufBlock(int bufBlockSize) { + STsdbBufBlock *pBufBlock = (STsdbBufBlock *)malloc(sizeof(*pBufBlock) + bufBlockSize); + if (pBufBlock == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + pBufBlock->blockId = 0; + pBufBlock->offset = 0; + pBufBlock->remain = bufBlockSize; + + return pBufBlock; +} + +void tsdbFreeBufBlock(STsdbBufBlock *pBufBlock) { tfree(pBufBlock); } + +int tsdbExpandPool(STsdb *pRepo, int32_t oldTotalBlocks) { + if (oldTotalBlocks == pRepo->config.totalBlocks) { + return TSDB_CODE_SUCCESS; + } + + int err = TSDB_CODE_SUCCESS; + + if (tsdbLockRepo(pRepo) < 0) return terrno; + STsdbBufPool *pPool = pRepo->pPool; + + if (pRepo->config.totalBlocks > oldTotalBlocks) { + for (int i = 0; i < pRepo->config.totalBlocks - oldTotalBlocks; i++) { + STsdbBufBlock *pBufBlock = tsdbNewBufBlock(pPool->bufBlockSize); + if (pBufBlock == NULL) goto err; + + if (tdListAppend(pPool->bufBlockList, (void *)(&pBufBlock)) < 0) { + tsdbFreeBufBlock(pBufBlock); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + err = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto err; + } + + pPool->nBufBlocks++; + } + pthread_cond_signal(&pPool->poolNotEmpty); + } else { + pPool->nRecycleBlocks = oldTotalBlocks - pRepo->config.totalBlocks; + } + +err: + tsdbUnlockRepo(pRepo); + return err; +} + +void tsdbRecycleBufferBlock(STsdbBufPool *pPool, SListNode *pNode, bool bELastic) { + STsdbBufBlock *pBufBlock = NULL; + tdListNodeGetData(pPool->bufBlockList, pNode, (void *)(&pBufBlock)); + tsdbFreeBufBlock(pBufBlock); + free(pNode); + if (bELastic) { + pPool->nElasticBlocks--; + tsdbWarn("pPool=%p elastic block reduce one . nElasticBlocks=%d cur free Blocks=%d", pPool, pPool->nElasticBlocks, + TD_DLIST_NELES(pPool->bufBlockList)); + } else + pPool->nBufBlocks--; +} + +#endif \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/src/tsdbCommit.c b/source/dnode/vnode/tsdb2/src/tsdbCommit.c new file mode 100644 index 0000000000..149d4a698a --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbCommit.c @@ -0,0 +1,1776 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include "tsdbint.h" +#include "ttime.h" + +extern int32_t tsTsdbMetaCompactRatio; + +#define TSDB_MAX_SUBBLOCKS 8 +static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) { + if (key < 0) { + return (int)((key + 1) / tsTickPerDay[precision] / days - 1); + } else { + return (int)((key / tsTickPerDay[precision] / days)); + } +} + +typedef struct { + SRtn rtn; // retention snapshot + SFSIter fsIter; // tsdb file iterator + int niters; // memory iterators + SCommitIter *iters; + bool isRFileSet; // read and commit FSET + SReadH readh; + SDFileSet wSet; + bool isDFileSame; + bool isLFileSame; + TSKEY minKey; + TSKEY maxKey; + SArray * aBlkIdx; // SBlockIdx array + STable * pTable; + SArray * aSupBlk; // Table super-block array + SArray * aSubBlk; // table sub-block array + SDataCols * pDataCols; +} SCommitH; + +#define TSDB_COMMIT_REPO(ch) TSDB_READ_REPO(&(ch->readh)) +#define TSDB_COMMIT_REPO_ID(ch) REPO_ID(TSDB_READ_REPO(&(ch->readh))) +#define TSDB_COMMIT_WRITE_FSET(ch) (&((ch)->wSet)) +#define TSDB_COMMIT_TABLE(ch) ((ch)->pTable) +#define TSDB_COMMIT_HEAD_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_HEAD) +#define TSDB_COMMIT_DATA_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_DATA) +#define TSDB_COMMIT_LAST_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_LAST) +#define TSDB_COMMIT_SMAD_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_SMAD) +#define TSDB_COMMIT_SMAL_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_SMAL) +#define TSDB_COMMIT_BUF(ch) TSDB_READ_BUF(&((ch)->readh)) +#define TSDB_COMMIT_COMP_BUF(ch) TSDB_READ_COMP_BUF(&((ch)->readh)) +#define TSDB_COMMIT_EXBUF(ch) TSDB_READ_EXBUF(&((ch)->readh)) +#define TSDB_COMMIT_DEFAULT_ROWS(ch) TSDB_DEFAULT_BLOCK_ROWS(TSDB_COMMIT_REPO(ch)->config.maxRowsPerFileBlock) +#define TSDB_COMMIT_TXN_VERSION(ch) FS_TXN_VERSION(REPO_FS(TSDB_COMMIT_REPO(ch))) + +static int tsdbCommitMeta(STsdb *pRepo); +static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact); +static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid); +static int tsdbCompactMetaFile(STsdb *pRepo, STsdbFS *pfs, SMFile *pMFile); +static int tsdbCommitTSData(STsdb *pRepo); +static void tsdbStartCommit(STsdb *pRepo); +static void tsdbEndCommit(STsdb *pRepo, int eno); +static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid); +static int tsdbCreateCommitIters(SCommitH *pCommith); +static void tsdbDestroyCommitIters(SCommitH *pCommith); +static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key); +static int tsdbInitCommitH(SCommitH *pCommith, STsdb *pRepo); +static void tsdbDestroyCommitH(SCommitH *pCommith); +static int tsdbGetFidLevel(int fid, SRtn *pRtn); +static int tsdbNextCommitFid(SCommitH *pCommith); +static int tsdbCommitToTable(SCommitH *pCommith, int tid); +static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable); +static int tsdbComparKeyBlock(const void *arg1, const void *arg2); +static int tsdbWriteBlockInfo(SCommitH *pCommih); +static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData); +static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx); +static int tsdbMoveBlock(SCommitH *pCommith, int bidx); +static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks); +static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit, + bool isLastOneBlock); +static void tsdbResetCommitFile(SCommitH *pCommith); +static void tsdbResetCommitTable(SCommitH *pCommith); +static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid); +static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError); +static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo); +static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget, + TSKEY maxKey, int maxRows, int8_t update); + +void *tsdbCommitData(STsdb *pRepo) { + if (pRepo->imem == NULL) { + return NULL; + } + tsdbStartCommit(pRepo); + + // Commit to update meta file + if (tsdbCommitMeta(pRepo) < 0) { + tsdbError("vgId:%d error occurs while committing META data since %s", REPO_ID(pRepo), tstrerror(terrno)); + goto _err; + } + + // Create the iterator to read from cache + if (tsdbCommitTSData(pRepo) < 0) { + tsdbError("vgId:%d error occurs while committing TS data since %s", REPO_ID(pRepo), tstrerror(terrno)); + goto _err; + } + + tsdbEndCommit(pRepo, TSDB_CODE_SUCCESS); + return NULL; + +_err: + ASSERT(terrno != TSDB_CODE_SUCCESS); + pRepo->code = terrno; + + tsdbEndCommit(pRepo, terrno); + return NULL; +} + +int tsdbApplyRtnOnFSet(STsdb *pRepo, SDFileSet *pSet, SRtn *pRtn) { + SDiskID did; + SDFileSet nSet; + STsdbFS * pfs = REPO_FS(pRepo); + int level; + + ASSERT(pSet->fid >= pRtn->minFid); + + level = tsdbGetFidLevel(pSet->fid, pRtn); + + tfsAllocDisk(level, &(did.level), &(did.id)); + if (did.level == TFS_UNDECIDED_LEVEL) { + terrno = TSDB_CODE_TDB_NO_AVAIL_DISK; + return -1; + } + + if (did.level > TSDB_FSET_LEVEL(pSet)) { + // Need to move the FSET to higher level + tsdbInitDFileSet(&nSet, did, REPO_ID(pRepo), pSet->fid, FS_TXN_VERSION(pfs), pSet->ver); + + if (tsdbCopyDFileSet(pSet, &nSet) < 0) { + tsdbError("vgId:%d failed to copy FSET %d from level %d to level %d since %s", REPO_ID(pRepo), pSet->fid, + TSDB_FSET_LEVEL(pSet), did.level, tstrerror(terrno)); + return -1; + } + + if (tsdbUpdateDFileSet(pfs, &nSet) < 0) { + return -1; + } + + tsdbInfo("vgId:%d FSET %d is copied from level %d disk id %d to level %d disk id %d", REPO_ID(pRepo), pSet->fid, + TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet), did.level, did.id); + } else { + // On a correct level + if (tsdbUpdateDFileSet(pfs, pSet) < 0) { + return -1; + } + } + + return 0; +} + +int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf, + SBlockIdx *pIdx) { + size_t nSupBlocks; + size_t nSubBlocks; + uint32_t tlen; + SBlockInfo *pBlkInfo; + int64_t offset; + SBlock * pBlock; + + memset(pIdx, 0, sizeof(*pIdx)); + + nSupBlocks = taosArrayGetSize(pSupA); + nSubBlocks = (pSubA == NULL) ? 0 : taosArrayGetSize(pSubA); + + if (nSupBlocks <= 0) { + // No data (data all deleted) + return 0; + } + + tlen = (uint32_t)(sizeof(SBlockInfo) + sizeof(SBlock) * (nSupBlocks + nSubBlocks) + sizeof(TSCKSUM)); + if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1; + pBlkInfo = *ppBuf; + + pBlkInfo->delimiter = TSDB_FILE_DELIMITER; + pBlkInfo->tid = TABLE_TID(pTable); + pBlkInfo->uid = TABLE_UID(pTable); + + memcpy((void *)(pBlkInfo->blocks), taosArrayGet(pSupA, 0), nSupBlocks * sizeof(SBlock)); + if (nSubBlocks > 0) { + memcpy((void *)(pBlkInfo->blocks + nSupBlocks), taosArrayGet(pSubA, 0), nSubBlocks * sizeof(SBlock)); + + for (int i = 0; i < nSupBlocks; i++) { + pBlock = pBlkInfo->blocks + i; + + if (pBlock->numOfSubBlocks > 1) { + pBlock->offset += (sizeof(SBlockInfo) + sizeof(SBlock) * nSupBlocks); + } + } + } + + taosCalcChecksumAppend(0, (uint8_t *)pBlkInfo, tlen); + + if (tsdbAppendDFile(pHeadf, (void *)pBlkInfo, tlen, &offset) < 0) { + return -1; + } + + tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(pBlkInfo, tlen - sizeof(TSCKSUM))); + + // Set pIdx + pBlock = taosArrayGetLast(pSupA); + + pIdx->tid = TABLE_TID(pTable); + pIdx->uid = TABLE_UID(pTable); + pIdx->hasLast = pBlock->last ? 1 : 0; + pIdx->maxKey = pBlock->keyLast; + pIdx->numOfBlocks = (uint32_t)nSupBlocks; + pIdx->len = tlen; + pIdx->offset = (uint32_t)offset; + + return 0; +} + +int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) { + SBlockIdx *pBlkIdx; + size_t nidx = taosArrayGetSize(pIdxA); + int tlen = 0, size; + int64_t offset = 0; + + if (nidx <= 0) { + // All data are deleted + pHeadf->info.offset = 0; + pHeadf->info.len = 0; + return 0; + } + + for (size_t i = 0; i < nidx; i++) { + pBlkIdx = (SBlockIdx *)taosArrayGet(pIdxA, i); + + size = tsdbEncodeSBlockIdx(NULL, pBlkIdx); + if (tsdbMakeRoom(ppBuf, tlen + size) < 0) return -1; + + void *ptr = POINTER_SHIFT(*ppBuf, tlen); + tsdbEncodeSBlockIdx(&ptr, pBlkIdx); + + tlen += size; + } + + tlen += sizeof(TSCKSUM); + if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1; + taosCalcChecksumAppend(0, (uint8_t *)(*ppBuf), tlen); + + if (tsdbAppendDFile(pHeadf, *ppBuf, tlen, &offset) < tlen) { + return -1; + } + + tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(*ppBuf, tlen - sizeof(TSCKSUM))); + pHeadf->info.offset = (uint32_t)offset; + pHeadf->info.len = tlen; + + return 0; +} + +// =================== Commit Meta Data +static int tsdbInitCommitMetaFile(STsdb *pRepo, SMFile *pMf, bool open) { + STsdbFS *pfs = REPO_FS(pRepo); + SMFile * pOMFile = pfs->cstatus->pmf; + SDiskID did; + + // Create/Open a meta file or open the existing file + if (pOMFile == NULL) { + // Create a new meta file + did.level = TFS_PRIMARY_LEVEL; + did.id = TFS_PRIMARY_ID; + tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo))); + + if (open && tsdbCreateMFile(pMf, true) < 0) { + tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf)); + } else { + tsdbInitMFileEx(pMf, pOMFile); + if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) { + tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + } + + return 0; +} + +static int tsdbCommitMeta(STsdb *pRepo) { + STsdbFS * pfs = REPO_FS(pRepo); + SMemTable *pMem = pRepo->imem; + SMFile * pOMFile = pfs->cstatus->pmf; + SMFile mf; + SActObj * pAct = NULL; + SActCont * pCont = NULL; + SListNode *pNode = NULL; + + ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0); + + if (listNEles(pMem->actList) <= 0) { + // no meta data to commit, just keep the old meta file + tsdbUpdateMFile(pfs, pOMFile); + if (tsTsdbMetaCompactRatio > 0) { + if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) { + return -1; + } + int ret = tsdbCompactMetaFile(pRepo, pfs, &mf); + if (ret < 0) tsdbError("compact meta file error"); + + return ret; + } + return 0; + } else { + if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) { + return -1; + } + } + + // Loop to write + while ((pNode = tdListPopHead(pMem->actList)) != NULL) { + pAct = (SActObj *)pNode->data; + if (pAct->act == TSDB_UPDATE_META) { + pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj)); + if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) { + tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid, + tstrerror(terrno)); + tsdbCloseMFile(&mf); + (void)tsdbApplyMFileChange(&mf, pOMFile); + // TODO: need to reload metaCache + return -1; + } + } else if (pAct->act == TSDB_DROP_META) { + if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) { + tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid, + tstrerror(terrno)); + tsdbCloseMFile(&mf); + tsdbApplyMFileChange(&mf, pOMFile); + // TODO: need to reload metaCache + return -1; + } + } else { + ASSERT(false); + } + } + + if (tsdbUpdateMFileHeader(&mf) < 0) { + tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno)); + tsdbApplyMFileChange(&mf, pOMFile); + // TODO: need to reload metaCache + return -1; + } + + TSDB_FILE_FSYNC(&mf); + tsdbCloseMFile(&mf); + tsdbUpdateMFile(pfs, &mf); + + if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) { + tsdbError("compact meta file error"); + } + + return 0; +} + +int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) { + int tlen = 0; + tlen += taosEncodeFixedU64(buf, pRecord->uid); + tlen += taosEncodeFixedI64(buf, pRecord->offset); + tlen += taosEncodeFixedI64(buf, pRecord->size); + + return tlen; +} + +void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) { + buf = taosDecodeFixedU64(buf, &(pRecord->uid)); + buf = taosDecodeFixedI64(buf, &(pRecord->offset)); + buf = taosDecodeFixedI64(buf, &(pRecord->size)); + + return buf; +} + +void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn) { + STsdbCfg *pCfg = REPO_CFG(pRepo); + TSKEY minKey, midKey, maxKey, now; + + now = taosGetTimestamp(pCfg->precision); + minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision]; + midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision]; + maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision]; + + pRtn->minKey = minKey; + pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision)); + pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision)); + pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision)); + tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey, + pRtn->minFid, pRtn->midFid, pRtn->maxFid); +} + +static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) { + char buf[64] = "\0"; + void * pBuf = buf; + SKVRecord rInfo; + int64_t offset; + + // Seek to end of meta file + offset = tsdbSeekMFile(pMFile, 0, SEEK_END); + if (offset < 0) { + return -1; + } + + rInfo.offset = offset; + rInfo.uid = uid; + rInfo.size = contLen; + + int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo); + if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) { + return -1; + } + + if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) { + return -1; + } + + tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM))); + + SHashObj *cache = compact ? pfs->metaCacheComp : pfs->metaCache; + + pMFile->info.nRecords++; + + SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid)); + if (pRecord != NULL) { + pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord)); + } else { + pMFile->info.nRecords++; + } + taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo)); + + return 0; +} + +static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) { + SKVRecord rInfo = {0}; + char buf[128] = "\0"; + + SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid)); + if (pRecord == NULL) { + tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid); + return -1; + } + + rInfo.offset = -pRecord->offset; + rInfo.uid = pRecord->uid; + rInfo.size = pRecord->size; + + void *pBuf = buf; + tsdbEncodeKVRecord(&pBuf, &rInfo); + + if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) { + return -1; + } + + pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord)); + pMFile->info.nDels++; + pMFile->info.nRecords--; + pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2); + + taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid)); + return 0; +} + +static int tsdbCompactMetaFile(STsdb *pRepo, STsdbFS *pfs, SMFile *pMFile) { + float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords); + float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size); + float compactRatio = (float)(tsTsdbMetaCompactRatio) / 100; + + if (delPercent < compactRatio && tombPercent < compactRatio) { + return 0; + } + + if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) { + tsdbError("open meta file %s compact fail", pMFile->f.rname); + return -1; + } + + tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64 + ",size:%" PRId64, + tsTsdbMetaCompactRatio, pMFile->info.nDels, pMFile->info.nRecords, pMFile->info.tombSize, pMFile->info.size); + + SMFile mf; + SDiskID did; + + // first create tmp meta file + did.level = TFS_PRIMARY_LEVEL; + did.id = TFS_PRIMARY_ID; + tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1); + + if (tsdbCreateMFile(&mf, true) < 0) { + tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf)); + + // second iterator metaCache + int code = -1; + int64_t maxBufSize = 1024; + SKVRecord *pRecord; + void * pBuf = NULL; + + pBuf = malloc((size_t)maxBufSize); + if (pBuf == NULL) { + goto _err; + } + + // init Comp + assert(pfs->metaCacheComp == NULL); + pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); + if (pfs->metaCacheComp == NULL) { + goto _err; + } + + pRecord = taosHashIterate(pfs->metaCache, NULL); + while (pRecord) { + if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) { + tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), + tstrerror(terrno)); + goto _err; + } + if (pRecord->size > maxBufSize) { + maxBufSize = pRecord->size; + void *tmp = realloc(pBuf, (size_t)maxBufSize); + if (tmp == NULL) { + goto _err; + } + pBuf = tmp; + } + int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size); + if (nread < 0) { + tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), + tstrerror(terrno)); + goto _err; + } + + if (nread < pRecord->size) { + tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d", + REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread); + goto _err; + } + + if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) { + tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid, + tstrerror(terrno)); + goto _err; + } + + pRecord = taosHashIterate(pfs->metaCache, pRecord); + } + code = 0; + +_err: + if (code == 0) TSDB_FILE_FSYNC(&mf); + tsdbCloseMFile(&mf); + tsdbCloseMFile(pMFile); + + if (code == 0) { + // rename meta.tmp -> meta + tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf), + TSDB_FILE_FULL_NAME(pMFile)); + taosRenameFile(mf.f.aname, pMFile->f.aname); + tstrncpy(mf.f.aname, pMFile->f.aname, TSDB_FILENAME_LEN); + tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN); + // update current meta file info + pfs->nstatus->pmf = NULL; + tsdbUpdateMFile(pfs, &mf); + + taosHashCleanup(pfs->metaCache); + pfs->metaCache = pfs->metaCacheComp; + pfs->metaCacheComp = NULL; + } else { + // remove meta.tmp file + remove(mf.f.aname); + taosHashCleanup(pfs->metaCacheComp); + pfs->metaCacheComp = NULL; + } + + tfree(pBuf); + + ASSERT(mf.info.nDels == 0); + ASSERT(mf.info.tombSize == 0); + + tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64, code, mf.info.nRecords, + mf.info.size); + return code; +} + +// =================== Commit Time-Series Data +static int tsdbCommitTSData(STsdb *pRepo) { + SMemTable *pMem = pRepo->imem; + SCommitH commith; + SDFileSet *pSet = NULL; + int fid; + + memset(&commith, 0, sizeof(commith)); + + if (pMem->numOfRows <= 0) { + // No memory data, just apply retention on each file on disk + if (tsdbApplyRtn(pRepo) < 0) { + return -1; + } + return 0; + } + + // Resource initialization + if (tsdbInitCommitH(&commith, pRepo) < 0) { + return -1; + } + + // Skip expired memory data and expired FSET + tsdbSeekCommitIter(&commith, commith.rtn.minKey); + while ((pSet = tsdbFSIterNext(&(commith.fsIter)))) { + if (pSet->fid < commith.rtn.minFid) { + tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid, + TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet)); + } else { + break; + } + } + + // Loop to commit to each file + fid = tsdbNextCommitFid(&(commith)); + while (true) { + // Loop over both on disk and memory + if (pSet == NULL && fid == TSDB_IVLD_FID) break; + + if (pSet && (fid == TSDB_IVLD_FID || pSet->fid < fid)) { + // Only has existing FSET but no memory data to commit in this + // existing FSET, only check if file in correct retention + if (tsdbApplyRtnOnFSet(pRepo, pSet, &(commith.rtn)) < 0) { + tsdbDestroyCommitH(&commith); + return -1; + } + + pSet = tsdbFSIterNext(&(commith.fsIter)); + } else { + // Has memory data to commit + SDFileSet *pCSet; + int cfid; + + if (pSet == NULL || pSet->fid > fid) { + // Commit to a new FSET with fid: fid + pCSet = NULL; + cfid = fid; + } else { + // Commit to an existing FSET + pCSet = pSet; + cfid = pSet->fid; + pSet = tsdbFSIterNext(&(commith.fsIter)); + } + + if (tsdbCommitToFile(&commith, pCSet, cfid) < 0) { + tsdbDestroyCommitH(&commith); + return -1; + } + + fid = tsdbNextCommitFid(&commith); + } + } + + tsdbDestroyCommitH(&commith); + return 0; +} + +static void tsdbStartCommit(STsdb *pRepo) { + SMemTable *pMem = pRepo->imem; + + ASSERT(pMem->numOfRows > 0 || listNEles(pMem->actList) > 0); + + tsdbInfo("vgId:%d start to commit! keyFirst %" PRId64 " keyLast %" PRId64 " numOfRows %" PRId64 " meta rows: %d", + REPO_ID(pRepo), pMem->keyFirst, pMem->keyLast, pMem->numOfRows, listNEles(pMem->actList)); + + tsdbStartFSTxn(pRepo, pMem->pointsAdd, pMem->storageAdd); + + pRepo->code = TSDB_CODE_SUCCESS; +} + +static void tsdbEndCommit(STsdb *pRepo, int eno) { + if (eno != TSDB_CODE_SUCCESS) { + tsdbEndFSTxnWithError(REPO_FS(pRepo)); + } else { + tsdbEndFSTxn(pRepo); + } + + tsdbInfo("vgId:%d commit over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed"); + + if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_OVER, eno); + + SMemTable *pIMem = pRepo->imem; + (void)tsdbLockRepo(pRepo); + pRepo->imem = NULL; + (void)tsdbUnlockRepo(pRepo); + tsdbUnRefMemTable(pRepo, pIMem); + tsem_post(&(pRepo->readyToCommit)); +} + +#if 0 +static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) { + for (int i = 0; i < nIters; i++) { + TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter); + if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true; + } + return false; +} +#endif + +static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + STsdbCfg *pCfg = REPO_CFG(pRepo); + + ASSERT(pSet == NULL || pSet->fid == fid); + + tsdbResetCommitFile(pCommith); + tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, fid, &(pCommith->minKey), &(pCommith->maxKey)); + + // Set and open files + if (tsdbSetAndOpenCommitFile(pCommith, pSet, fid) < 0) { + return -1; + } + + // Loop to commit each table data + for (int tid = 1; tid < pCommith->niters; tid++) { + SCommitIter *pIter = pCommith->iters + tid; + + if (pIter->pTable == NULL) continue; + + if (tsdbCommitToTable(pCommith, tid) < 0) { + tsdbCloseCommitFile(pCommith, true); + // revert the file change + tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet); + return -1; + } + } + + if (tsdbWriteBlockIdx(TSDB_COMMIT_HEAD_FILE(pCommith), pCommith->aBlkIdx, (void **)(&(TSDB_COMMIT_BUF(pCommith)))) < + 0) { + tsdbError("vgId:%d failed to write SBlockIdx part to FSET %d since %s", REPO_ID(pRepo), fid, tstrerror(terrno)); + tsdbCloseCommitFile(pCommith, true); + // revert the file change + tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet); + return -1; + } + + if (tsdbUpdateDFileSetHeader(&(pCommith->wSet)) < 0) { + tsdbError("vgId:%d failed to update FSET %d header since %s", REPO_ID(pRepo), fid, tstrerror(terrno)); + tsdbCloseCommitFile(pCommith, true); + // revert the file change + tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet); + return -1; + } + + // Close commit file + tsdbCloseCommitFile(pCommith, false); + + if (tsdbUpdateDFileSet(REPO_FS(pRepo), &(pCommith->wSet)) < 0) { + return -1; + } + + return 0; +} + +static int tsdbCreateCommitIters(SCommitH *pCommith) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + SMemTable *pMem = pRepo->imem; + STsdbMeta *pMeta = pRepo->tsdbMeta; + + pCommith->niters = pMem->maxTables; + pCommith->iters = (SCommitIter *)calloc(pMem->maxTables, sizeof(SCommitIter)); + if (pCommith->iters == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + if (tsdbRLockRepoMeta(pRepo) < 0) return -1; + + // reference all tables + for (int i = 0; i < pMem->maxTables; i++) { + if (pMeta->tables[i] != NULL) { + tsdbRefTable(pMeta->tables[i]); + pCommith->iters[i].pTable = pMeta->tables[i]; + } + } + + if (tsdbUnlockRepoMeta(pRepo) < 0) return -1; + + for (int i = 0; i < pMem->maxTables; i++) { + if ((pCommith->iters[i].pTable != NULL) && (pMem->tData[i] != NULL) && + (TABLE_UID(pCommith->iters[i].pTable) == pMem->tData[i]->uid)) { + if ((pCommith->iters[i].pIter = tSkipListCreateIter(pMem->tData[i]->pData)) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + tSkipListIterNext(pCommith->iters[i].pIter); + } + } + + return 0; +} + +static void tsdbDestroyCommitIters(SCommitH *pCommith) { + if (pCommith->iters == NULL) return; + + for (int i = 1; i < pCommith->niters; i++) { + if (pCommith->iters[i].pTable != NULL) { + tsdbUnRefTable(pCommith->iters[i].pTable); + tSkipListDestroyIter(pCommith->iters[i].pIter); + } + } + + free(pCommith->iters); + pCommith->iters = NULL; + pCommith->niters = 0; +} + +// Skip all keys until key (not included) +static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key) { + for (int i = 0; i < pCommith->niters; i++) { + SCommitIter *pIter = pCommith->iters + i; + if (pIter->pTable == NULL || pIter->pIter == NULL) continue; + + tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, key - 1, INT32_MAX, NULL, NULL, 0, true, NULL); + } +} + +static int tsdbInitCommitH(SCommitH *pCommith, STsdb *pRepo) { + STsdbCfg *pCfg = REPO_CFG(pRepo); + + memset(pCommith, 0, sizeof(*pCommith)); + tsdbGetRtnSnap(pRepo, &(pCommith->rtn)); + + TSDB_FSET_SET_CLOSED(TSDB_COMMIT_WRITE_FSET(pCommith)); + + // Init read handle + if (tsdbInitReadH(&(pCommith->readh), pRepo) < 0) { + return -1; + } + + // Init file iterator + tsdbFSIterInit(&(pCommith->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD); + + if (tsdbCreateCommitIters(pCommith) < 0) { + tsdbDestroyCommitH(pCommith); + return -1; + } + + pCommith->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx)); + if (pCommith->aBlkIdx == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCommitH(pCommith); + return -1; + } + + pCommith->aSupBlk = taosArrayInit(1024, sizeof(SBlock)); + if (pCommith->aSupBlk == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCommitH(pCommith); + return -1; + } + + pCommith->aSubBlk = taosArrayInit(1024, sizeof(SBlock)); + if (pCommith->aSubBlk == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCommitH(pCommith); + return -1; + } + + pCommith->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock); + if (pCommith->pDataCols == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCommitH(pCommith); + return -1; + } + + return 0; +} + +static void tsdbDestroyCommitH(SCommitH *pCommith) { + pCommith->pDataCols = tdFreeDataCols(pCommith->pDataCols); + pCommith->aSubBlk = taosArrayDestroy(pCommith->aSubBlk); + pCommith->aSupBlk = taosArrayDestroy(pCommith->aSupBlk); + pCommith->aBlkIdx = taosArrayDestroy(pCommith->aBlkIdx); + tsdbDestroyCommitIters(pCommith); + tsdbDestroyReadH(&(pCommith->readh)); + tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith)); +} + +static int tsdbNextCommitFid(SCommitH *pCommith) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + STsdbCfg *pCfg = REPO_CFG(pRepo); + int fid = TSDB_IVLD_FID; + + for (int i = 0; i < pCommith->niters; i++) { + SCommitIter *pIter = pCommith->iters + i; + if (pIter->pTable == NULL || pIter->pIter == NULL) continue; + + TSKEY nextKey = tsdbNextIterKey(pIter->pIter); + if (nextKey == TSDB_DATA_TIMESTAMP_NULL) { + continue; + } else { + int tfid = (int)(TSDB_KEY_FID(nextKey, pCfg->daysPerFile, pCfg->precision)); + if (fid == TSDB_IVLD_FID || fid > tfid) { + fid = tfid; // find the least fid + } + } + } + + return fid; +} + +static int tsdbCommitToTable(SCommitH *pCommith, int tid) { + SCommitIter *pIter = pCommith->iters + tid; + TSKEY nextKey = tsdbNextIterKey(pIter->pIter); + + tsdbResetCommitTable(pCommith); + + TSDB_RLOCK_TABLE(pIter->pTable); + + // Set commit table + if (tsdbSetCommitTable(pCommith, pIter->pTable) < 0) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return -1; + } + + // No disk data and no memory data, just return + if (pCommith->readh.pBlkIdx == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return 0; + } + + // Must has disk data or has memory data + int nBlocks; + int bidx = 0; + SBlock *pBlock; + + if (pCommith->readh.pBlkIdx) { + if (tsdbLoadBlockInfo(&(pCommith->readh), NULL, NULL) < 0) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return -1; + } + + nBlocks = pCommith->readh.pBlkIdx->numOfBlocks; + } else { + nBlocks = 0; + } + + if (bidx < nBlocks) { + pBlock = pCommith->readh.pBlkInfo->blocks + bidx; + } else { + pBlock = NULL; + } + + while (true) { + if (pBlock == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) break; + + if ((nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey) || + (pBlock && (!pBlock->last) && tsdbComparKeyBlock((void *)(&nextKey), pBlock) > 0)) { + if (tsdbMoveBlock(pCommith, bidx) < 0) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return -1; + } + + bidx++; + if (bidx < nBlocks) { + pBlock = pCommith->readh.pBlkInfo->blocks + bidx; + } else { + pBlock = NULL; + } + } else if (pBlock && (pBlock->last || tsdbComparKeyBlock((void *)(&nextKey), pBlock) == 0)) { + // merge pBlock data and memory data + if (tsdbMergeMemData(pCommith, pIter, bidx) < 0) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return -1; + } + + bidx++; + if (bidx < nBlocks) { + pBlock = pCommith->readh.pBlkInfo->blocks + bidx; + } else { + pBlock = NULL; + } + nextKey = tsdbNextIterKey(pIter->pIter); + } else { + // Only commit memory data + if (pBlock == NULL) { + if (tsdbCommitMemData(pCommith, pIter, pCommith->maxKey, false) < 0) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return -1; + } + } else { + if (tsdbCommitMemData(pCommith, pIter, pBlock->keyFirst - 1, true) < 0) { + TSDB_RUNLOCK_TABLE(pIter->pTable); + return -1; + } + } + nextKey = tsdbNextIterKey(pIter->pIter); + } + } + + TSDB_RUNLOCK_TABLE(pIter->pTable); + + if (tsdbWriteBlockInfo(pCommith) < 0) { + tsdbError("vgId:%d failed to write SBlockInfo part into file %s since %s", TSDB_COMMIT_REPO_ID(pCommith), + TSDB_FILE_FULL_NAME(TSDB_COMMIT_HEAD_FILE(pCommith)), tstrerror(terrno)); + return -1; + } + + return 0; +} + +static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable) { + STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); + + pCommith->pTable = pTable; + + if (tdInitDataCols(pCommith->pDataCols, pSchema) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + if (pCommith->isRFileSet) { + if (tsdbSetReadTable(&(pCommith->readh), pTable) < 0) { + return -1; + } + } else { + pCommith->readh.pBlkIdx = NULL; + } + return 0; +} + +static int tsdbComparKeyBlock(const void *arg1, const void *arg2) { + TSKEY key = *(TSKEY *)arg1; + SBlock *pBlock = (SBlock *)arg2; + + if (key < pBlock->keyFirst) { + return -1; + } else if (key > pBlock->keyLast) { + return 1; + } else { + return 0; + } +} + +int tsdbWriteBlockImpl(STsdb *pRepo, STable *pTable, SDFile *pDFile, SDFile *pDFileAggr, SDataCols *pDataCols, + SBlock *pBlock, bool isLast, bool isSuper, void **ppBuf, void **ppCBuf, void **ppExBuf) { + STsdbCfg * pCfg = REPO_CFG(pRepo); + SBlockData * pBlockData; + SAggrBlkData *pAggrBlkData = NULL; + int64_t offset = 0, offsetAggr = 0; + int rowsToWrite = pDataCols->numOfRows; + + ASSERT(rowsToWrite > 0 && rowsToWrite <= pCfg->maxRowsPerFileBlock); + ASSERT((!isLast) || rowsToWrite < pCfg->minRowsPerFileBlock); + + // Make buffer space + if (tsdbMakeRoom(ppBuf, tsdbBlockStatisSize(pDataCols->numOfCols, SBlockVerLatest)) < 0) { + return -1; + } + pBlockData = (SBlockData *)(*ppBuf); + + if (tsdbMakeRoom(ppExBuf, tsdbBlockAggrSize(pDataCols->numOfCols, SBlockVerLatest)) < 0) { + return -1; + } + pAggrBlkData = (SAggrBlkData *)(*ppExBuf); + + // Get # of cols not all NULL(not including key column) + int nColsNotAllNull = 0; + for (int ncol = 1; ncol < pDataCols->numOfCols; ncol++) { // ncol from 1, we skip the timestamp column + SDataCol * pDataCol = pDataCols->cols + ncol; + SBlockCol * pBlockCol = pBlockData->cols + nColsNotAllNull; + SAggrBlkCol *pAggrBlkCol = (SAggrBlkCol *)pAggrBlkData + nColsNotAllNull; + + if (isAllRowsNull(pDataCol)) { // all data to commit are NULL, just ignore it + continue; + } + + memset(pBlockCol, 0, sizeof(*pBlockCol)); + memset(pAggrBlkCol, 0, sizeof(*pAggrBlkCol)); + + pBlockCol->colId = pDataCol->colId; + pBlockCol->type = pDataCol->type; + pAggrBlkCol->colId = pDataCol->colId; + + if (tDataTypes[pDataCol->type].statisFunc) { +#if 0 + (*tDataTypes[pDataCol->type].statisFunc)(pDataCol->pData, rowsToWrite, &(pBlockCol->min), &(pBlockCol->max), + &(pBlockCol->sum), &(pBlockCol->minIndex), &(pBlockCol->maxIndex), + &(pBlockCol->numOfNull)); +#endif + (*tDataTypes[pDataCol->type].statisFunc)(pDataCol->pData, rowsToWrite, &(pAggrBlkCol->min), &(pAggrBlkCol->max), + &(pAggrBlkCol->sum), &(pAggrBlkCol->minIndex), &(pAggrBlkCol->maxIndex), + &(pAggrBlkCol->numOfNull)); + } + nColsNotAllNull++; + } + + ASSERT(nColsNotAllNull >= 0 && nColsNotAllNull <= pDataCols->numOfCols); + + // Compress the data if neccessary + int tcol = 0; // counter of not all NULL and written columns + uint32_t toffset = 0; + int32_t tsize = (int32_t)tsdbBlockStatisSize(nColsNotAllNull, SBlockVerLatest); + int32_t lsize = tsize; + int32_t keyLen = 0; + + uint32_t tsizeAggr = (uint32_t)tsdbBlockAggrSize(nColsNotAllNull, SBlockVerLatest); + + for (int ncol = 0; ncol < pDataCols->numOfCols; ncol++) { + // All not NULL columns finish + if (ncol != 0 && tcol >= nColsNotAllNull) break; + + SDataCol * pDataCol = pDataCols->cols + ncol; + SBlockCol *pBlockCol = pBlockData->cols + tcol; + + if (ncol != 0 && (pDataCol->colId != pBlockCol->colId)) continue; + + int32_t flen; // final length + int32_t tlen = dataColGetNEleLen(pDataCol, rowsToWrite); + void * tptr; + + // Make room + if (tsdbMakeRoom(ppBuf, lsize + tlen + COMP_OVERFLOW_BYTES + sizeof(TSCKSUM)) < 0) { + return -1; + } + pBlockData = (SBlockData *)(*ppBuf); + pBlockCol = pBlockData->cols + tcol; + tptr = POINTER_SHIFT(pBlockData, lsize); + + if (pCfg->compression == TWO_STAGE_COMP && tsdbMakeRoom(ppCBuf, tlen + COMP_OVERFLOW_BYTES) < 0) { + return -1; + } + + // Compress or just copy + if (pCfg->compression) { + flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite, tptr, + tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf, + tlen + COMP_OVERFLOW_BYTES); + } else { + flen = tlen; + memcpy(tptr, pDataCol->pData, flen); + } + + // Add checksum + ASSERT(flen > 0); + flen += sizeof(TSCKSUM); + taosCalcChecksumAppend(0, (uint8_t *)tptr, flen); + tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(tptr, flen - sizeof(TSCKSUM))); + + if (ncol != 0) { + tsdbSetBlockColOffset(pBlockCol, toffset); + pBlockCol->len = flen; + tcol++; + } else { + keyLen = flen; + } + + toffset += flen; + lsize += flen; + } + + pBlockData->delimiter = TSDB_FILE_DELIMITER; + pBlockData->uid = TABLE_UID(pTable); + pBlockData->numOfCols = nColsNotAllNull; + + taosCalcChecksumAppend(0, (uint8_t *)pBlockData, tsize); + tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(pBlockData, tsize - sizeof(TSCKSUM))); + + // Write the whole block to file + if (tsdbAppendDFile(pDFile, (void *)pBlockData, lsize, &offset) < lsize) { + return -1; + } + + uint32_t aggrStatus = nColsNotAllNull > 0 ? 1 : 0; + if (aggrStatus > 0) { + taosCalcChecksumAppend(0, (uint8_t *)pAggrBlkData, tsizeAggr); + tsdbUpdateDFileMagic(pDFileAggr, POINTER_SHIFT(pAggrBlkData, tsizeAggr - sizeof(TSCKSUM))); + + // Write the whole block to file + if (tsdbAppendDFile(pDFileAggr, (void *)pAggrBlkData, tsizeAggr, &offsetAggr) < tsizeAggr) { + return -1; + } + } + + // Update pBlock membership variables + pBlock->last = isLast; + pBlock->offset = offset; + pBlock->algorithm = pCfg->compression; + pBlock->numOfRows = rowsToWrite; + pBlock->len = lsize; + pBlock->keyLen = keyLen; + pBlock->numOfSubBlocks = isSuper ? 1 : 0; + pBlock->numOfCols = nColsNotAllNull; + pBlock->keyFirst = dataColsKeyFirst(pDataCols); + pBlock->keyLast = dataColsKeyLast(pDataCols); + // since blkVer1 + pBlock->aggrStat = aggrStatus; + pBlock->blkVer = SBlockVerLatest; + pBlock->aggrOffset = (uint64_t)offsetAggr; + + tsdbDebug("vgId:%d tid:%d a block of data is written to file %s, offset %" PRId64 + " numOfRows %d len %d numOfCols %" PRId16 " keyFirst %" PRId64 " keyLast %" PRId64, + REPO_ID(pRepo), TABLE_TID(pTable), TSDB_FILE_FULL_NAME(pDFile), offset, rowsToWrite, pBlock->len, + pBlock->numOfCols, pBlock->keyFirst, pBlock->keyLast); + + return 0; +} + +static int tsdbWriteBlock(SCommitH *pCommith, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock, bool isLast, + bool isSuper) { + return tsdbWriteBlockImpl(TSDB_COMMIT_REPO(pCommith), TSDB_COMMIT_TABLE(pCommith), pDFile, + isLast ? TSDB_COMMIT_SMAL_FILE(pCommith) : TSDB_COMMIT_SMAD_FILE(pCommith), pDataCols, + pBlock, isLast, isSuper, (void **)(&(TSDB_COMMIT_BUF(pCommith))), + (void **)(&(TSDB_COMMIT_COMP_BUF(pCommith))), (void **)(&(TSDB_COMMIT_EXBUF(pCommith)))); +} + +static int tsdbWriteBlockInfo(SCommitH *pCommih) { + SDFile * pHeadf = TSDB_COMMIT_HEAD_FILE(pCommih); + SBlockIdx blkIdx; + STable * pTable = TSDB_COMMIT_TABLE(pCommih); + + if (tsdbWriteBlockInfoImpl(pHeadf, pTable, pCommih->aSupBlk, pCommih->aSubBlk, (void **)(&(TSDB_COMMIT_BUF(pCommih))), + &blkIdx) < 0) { + return -1; + } + + if (blkIdx.numOfBlocks == 0) { + return 0; + } + + if (taosArrayPush(pCommih->aBlkIdx, (void *)(&blkIdx)) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + return 0; +} + +static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + STsdbCfg * pCfg = REPO_CFG(pRepo); + SMergeInfo mInfo; + int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith); + SDFile * pDFile; + bool isLast; + SBlock block; + + while (true) { + tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, defaultRows, pCommith->pDataCols, NULL, 0, + pCfg->update, &mInfo); + + if (pCommith->pDataCols->numOfRows <= 0) break; + + if (toData || pCommith->pDataCols->numOfRows >= pCfg->minRowsPerFileBlock) { + pDFile = TSDB_COMMIT_DATA_FILE(pCommith); + isLast = false; + } else { + pDFile = TSDB_COMMIT_LAST_FILE(pCommith); + isLast = true; + } + + if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1; + + if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) { + return -1; + } + } + + return 0; +} + +static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + STsdbCfg * pCfg = REPO_CFG(pRepo); + int nBlocks = pCommith->readh.pBlkIdx->numOfBlocks; + SBlock * pBlock = pCommith->readh.pBlkInfo->blocks + bidx; + TSKEY keyLimit; + int16_t colId = 0; + SMergeInfo mInfo; + SBlock subBlocks[TSDB_MAX_SUBBLOCKS]; + SBlock block, supBlock; + SDFile * pDFile; + + if (bidx == nBlocks - 1) { + keyLimit = pCommith->maxKey; + } else { + keyLimit = pBlock[1].keyFirst - 1; + } + + SSkipListIterator titer = *(pIter->pIter); + if (tsdbLoadBlockDataCols(&(pCommith->readh), pBlock, NULL, &colId, 1) < 0) return -1; + + tsdbLoadDataFromCache(pIter->pTable, &titer, keyLimit, INT32_MAX, NULL, pCommith->readh.pDCols[0]->cols[0].pData, + pCommith->readh.pDCols[0]->numOfRows, pCfg->update, &mInfo); + + if (mInfo.nOperations == 0) { + // no new data to insert (all updates denied) + if (tsdbMoveBlock(pCommith, bidx) < 0) { + return -1; + } + *(pIter->pIter) = titer; + } else if (pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed == 0) { + // Ignore the block + ASSERT(0); + *(pIter->pIter) = titer; + } else if (tsdbCanAddSubBlock(pCommith, pBlock, &mInfo)) { + // Add a sub-block + tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, INT32_MAX, pCommith->pDataCols, + pCommith->readh.pDCols[0]->cols[0].pData, pCommith->readh.pDCols[0]->numOfRows, pCfg->update, + &mInfo); + if (pBlock->last) { + pDFile = TSDB_COMMIT_LAST_FILE(pCommith); + } else { + pDFile = TSDB_COMMIT_DATA_FILE(pCommith); + } + + if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, pBlock->last, false) < 0) return -1; + + if (pBlock->numOfSubBlocks == 1) { + subBlocks[0] = *pBlock; + subBlocks[0].numOfSubBlocks = 0; + } else { + memcpy(subBlocks, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset), + sizeof(SBlock) * pBlock->numOfSubBlocks); + } + subBlocks[pBlock->numOfSubBlocks] = block; + supBlock = *pBlock; + supBlock.keyFirst = mInfo.keyFirst; + supBlock.keyLast = mInfo.keyLast; + supBlock.numOfSubBlocks++; + supBlock.numOfRows = pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed; + supBlock.offset = taosArrayGetSize(pCommith->aSubBlk) * sizeof(SBlock); + + if (tsdbCommitAddBlock(pCommith, &supBlock, subBlocks, supBlock.numOfSubBlocks) < 0) return -1; + } else { + if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1; + if (tsdbMergeBlockData(pCommith, pIter, pCommith->readh.pDCols[0], keyLimit, bidx == (nBlocks - 1)) < 0) return -1; + } + + return 0; +} + +static int tsdbMoveBlock(SCommitH *pCommith, int bidx) { + SBlock *pBlock = pCommith->readh.pBlkInfo->blocks + bidx; + SDFile *pDFile; + SBlock block; + bool isSameFile; + + ASSERT(pBlock->numOfSubBlocks > 0); + + if (pBlock->last) { + pDFile = TSDB_COMMIT_LAST_FILE(pCommith); + isSameFile = pCommith->isLFileSame; + } else { + pDFile = TSDB_COMMIT_DATA_FILE(pCommith); + isSameFile = pCommith->isDFileSame; + } + + if (isSameFile) { + if (pBlock->numOfSubBlocks == 1) { + if (tsdbCommitAddBlock(pCommith, pBlock, NULL, 0) < 0) { + return -1; + } + } else { + block = *pBlock; + block.offset = sizeof(SBlock) * taosArrayGetSize(pCommith->aSubBlk); + + if (tsdbCommitAddBlock(pCommith, &block, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset), + pBlock->numOfSubBlocks) < 0) { + return -1; + } + } + } else { + if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1; + if (tsdbWriteBlock(pCommith, pDFile, pCommith->readh.pDCols[0], &block, pBlock->last, true) < 0) return -1; + if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1; + } + + return 0; +} + +static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks) { + if (taosArrayPush(pCommith->aSupBlk, pSupBlock) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + if (pSubBlocks && taosArrayAddBatch(pCommith->aSubBlk, pSubBlocks, nSubBlocks) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + return 0; +} + +static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit, + bool isLastOneBlock) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + STsdbCfg *pCfg = REPO_CFG(pRepo); + SBlock block; + SDFile * pDFile; + bool isLast; + int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith); + + int biter = 0; + while (true) { + tsdbLoadAndMergeFromCache(pCommith->readh.pDCols[0], &biter, pIter, pCommith->pDataCols, keyLimit, defaultRows, + pCfg->update); + + if (pCommith->pDataCols->numOfRows == 0) break; + + if (isLastOneBlock) { + if (pCommith->pDataCols->numOfRows < pCfg->minRowsPerFileBlock) { + pDFile = TSDB_COMMIT_LAST_FILE(pCommith); + isLast = true; + } else { + pDFile = TSDB_COMMIT_DATA_FILE(pCommith); + isLast = false; + } + } else { + pDFile = TSDB_COMMIT_DATA_FILE(pCommith); + isLast = false; + } + + if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1; + if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1; + } + + return 0; +} + +static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget, + TSKEY maxKey, int maxRows, int8_t update) { + TSKEY key1 = INT64_MAX; + TSKEY key2 = INT64_MAX; + STSchema *pSchema = NULL; + + ASSERT(maxRows > 0 && dataColsKeyLast(pDataCols) <= maxKey); + tdResetDataCols(pTarget); + + while (true) { + key1 = (*iter >= pDataCols->numOfRows) ? INT64_MAX : dataColsKeyAt(pDataCols, *iter); + SMemRow row = tsdbNextIterRow(pCommitIter->pIter); + if (row == NULL || memRowKey(row) > maxKey) { + key2 = INT64_MAX; + } else { + key2 = memRowKey(row); + } + + if (key1 == INT64_MAX && key2 == INT64_MAX) break; + + if (key1 < key2) { + for (int i = 0; i < pDataCols->numOfCols; i++) { + // TODO: dataColAppendVal may fail + dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows, + pTarget->maxPoints); + } + + pTarget->numOfRows++; + (*iter)++; + } else if (key1 > key2) { + if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) { + pSchema = + tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row), (int8_t)memRowType(row)); + ASSERT(pSchema != NULL); + } + + tdAppendMemRowToDataCol(row, pSchema, pTarget, true); + + tSkipListIterNext(pCommitIter->pIter); + } else { + if (update != TD_ROW_OVERWRITE_UPDATE) { + // copy disk data + for (int i = 0; i < pDataCols->numOfCols; i++) { + // TODO: dataColAppendVal may fail + dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows, + pTarget->maxPoints); + } + + if (update == TD_ROW_DISCARD_UPDATE) pTarget->numOfRows++; + } + if (update != TD_ROW_DISCARD_UPDATE) { + // copy mem data + if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) { + pSchema = + tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row), (int8_t)memRowType(row)); + ASSERT(pSchema != NULL); + } + + tdAppendMemRowToDataCol(row, pSchema, pTarget, update == TD_ROW_OVERWRITE_UPDATE); + } + (*iter)++; + tSkipListIterNext(pCommitIter->pIter); + } + + if (pTarget->numOfRows >= maxRows) break; + } +} + +static void tsdbResetCommitFile(SCommitH *pCommith) { + pCommith->isRFileSet = false; + pCommith->isDFileSame = false; + pCommith->isLFileSame = false; + taosArrayClear(pCommith->aBlkIdx); +} + +static void tsdbResetCommitTable(SCommitH *pCommith) { + taosArrayClear(pCommith->aSubBlk); + taosArrayClear(pCommith->aSupBlk); + pCommith->pTable = NULL; +} + +static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid) { + SDiskID did; + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + SDFileSet *pWSet = TSDB_COMMIT_WRITE_FSET(pCommith); + + tfsAllocDisk(tsdbGetFidLevel(fid, &(pCommith->rtn)), &(did.level), &(did.id)); + if (did.level == TFS_UNDECIDED_LEVEL) { + terrno = TSDB_CODE_TDB_NO_AVAIL_DISK; + return -1; + } + + // Open read FSET + if (pSet) { + if (tsdbSetAndOpenReadFSet(&(pCommith->readh), pSet) < 0) { + return -1; + } + + pCommith->isRFileSet = true; + + if (tsdbLoadBlockIdx(&(pCommith->readh)) < 0) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + + tsdbDebug("vgId:%d FSET %d at level %d disk id %d is opened to read to commit", REPO_ID(pRepo), TSDB_FSET_FID(pSet), + TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet)); + } else { + pCommith->isRFileSet = false; + } + + // Set and open commit FSET + if (pSet == NULL || did.level > TSDB_FSET_LEVEL(pSet)) { + // Create a new FSET to write data + tsdbInitDFileSet(pWSet, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_LATEST_FSET_VER); + + if (tsdbCreateDFileSet(pWSet, true) < 0) { + tsdbError("vgId:%d failed to create FSET %d at level %d disk id %d since %s", REPO_ID(pRepo), + TSDB_FSET_FID(pWSet), TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet), tstrerror(terrno)); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + } + return -1; + } + + pCommith->isDFileSame = false; + pCommith->isLFileSame = false; + + tsdbDebug("vgId:%d FSET %d at level %d disk id %d is created to commit", REPO_ID(pRepo), TSDB_FSET_FID(pWSet), + TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet)); + } else { + did.level = TSDB_FSET_LEVEL(pSet); + did.id = TSDB_FSET_ID(pSet); + + pCommith->wSet.fid = fid; + pCommith->wSet.state = 0; + pCommith->wSet.ver = TSDB_LATEST_FSET_VER; + + // TSDB_FILE_HEAD + SDFile *pWHeadf = TSDB_COMMIT_HEAD_FILE(pCommith); + tsdbInitDFile(pWHeadf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_HEAD); + if (tsdbCreateDFile(pWHeadf, true, TSDB_FILE_HEAD) < 0) { + tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWHeadf), + tstrerror(terrno)); + + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + + // TSDB_FILE_DATA + SDFile *pRDataf = TSDB_READ_DATA_FILE(&(pCommith->readh)); + SDFile *pWDataf = TSDB_COMMIT_DATA_FILE(pCommith); + tsdbInitDFileEx(pWDataf, pRDataf); + if (tsdbOpenDFile(pWDataf, O_WRONLY) < 0) { + tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWDataf), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + pCommith->isDFileSame = true; + + // TSDB_FILE_LAST + SDFile *pRLastf = TSDB_READ_LAST_FILE(&(pCommith->readh)); + SDFile *pWLastf = TSDB_COMMIT_LAST_FILE(pCommith); + if (pRLastf->info.size < 32 * 1024) { + tsdbInitDFileEx(pWLastf, pRLastf); + pCommith->isLFileSame = true; + + if (tsdbOpenDFile(pWLastf, O_WRONLY) < 0) { + tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + } else { + tsdbInitDFile(pWLastf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_LAST); + pCommith->isLFileSame = false; + + if (tsdbCreateDFile(pWLastf, true, TSDB_FILE_LAST) < 0) { + tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + (void)tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + } + + // TSDB_FILE_SMAD + SDFile *pRSmadF = TSDB_READ_SMAD_FILE(&(pCommith->readh)); + SDFile *pWSmadF = TSDB_COMMIT_SMAD_FILE(pCommith); + + if (access(TSDB_FILE_FULL_NAME(pRSmadF), F_OK) != 0) { + tsdbDebug("vgId:%d create data file %s as not exist", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pRSmadF)); + tsdbInitDFile(pWSmadF, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_SMAD); + + if (tsdbCreateDFile(pWSmadF, true, TSDB_FILE_SMAD) < 0) { + tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmadF), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + (void)tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + } else { + tsdbInitDFileEx(pWSmadF, pRSmadF); + if (tsdbOpenDFile(pWSmadF, O_RDWR) < 0) { + tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmadF), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + } + + // TSDB_FILE_SMAL + ASSERT(tsdbGetNFiles(pWSet) >= TSDB_FILE_SMAL); + SDFile *pRSmalF = TSDB_READ_SMAL_FILE(&(pCommith->readh)); + SDFile *pWSmalF = TSDB_COMMIT_SMAL_FILE(pCommith); + + if ((pCommith->isLFileSame) && access(TSDB_FILE_FULL_NAME(pRSmalF), F_OK) == 0) { + tsdbInitDFileEx(pWSmalF, pRSmalF); + if (tsdbOpenDFile(pWSmalF, O_RDWR) < 0) { + tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmalF), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + } else { + tsdbDebug("vgId:%d create data file %s as not exist", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pRSmalF)); + tsdbInitDFile(pWSmalF, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_SMAL); + + if (tsdbCreateDFile(pWSmalF, true, TSDB_FILE_SMAL) < 0) { + tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWSmalF), + tstrerror(terrno)); + + tsdbCloseDFileSet(pWSet); + (void)tsdbRemoveDFile(pWHeadf); + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + return -1; + } + } + } + } + + return 0; +} + +static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError) { + if (pCommith->isRFileSet) { + tsdbCloseAndUnsetFSet(&(pCommith->readh)); + } + + if (!hasError) { + TSDB_FSET_FSYNC(TSDB_COMMIT_WRITE_FSET(pCommith)); + } + tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith)); +} + +static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo) { + STsdb * pRepo = TSDB_COMMIT_REPO(pCommith); + STsdbCfg *pCfg = REPO_CFG(pRepo); + int mergeRows = pBlock->numOfRows + pInfo->rowsInserted - pInfo->rowsDeleteSucceed; + + ASSERT(mergeRows > 0); + + if (pBlock->numOfSubBlocks < TSDB_MAX_SUBBLOCKS && pInfo->nOperations <= pCfg->maxRowsPerFileBlock) { + if (pBlock->last) { + if (pCommith->isLFileSame && mergeRows < pCfg->minRowsPerFileBlock) return true; + } else { + if (pCommith->isDFileSame && mergeRows <= pCfg->maxRowsPerFileBlock) return true; + } + } + + return false; +} + +int tsdbApplyRtn(STsdb *pRepo) { + SRtn rtn; + SFSIter fsiter; + STsdbFS * pfs = REPO_FS(pRepo); + SDFileSet *pSet; + + // Get retention snapshot + tsdbGetRtnSnap(pRepo, &rtn); + + tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD); + while ((pSet = tsdbFSIterNext(&fsiter))) { + if (pSet->fid < rtn.minFid) { + tsdbInfo("vgId:%d FSET %d at level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid, + TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet)); + continue; + } + + if (tsdbApplyRtnOnFSet(pRepo, pSet, &rtn) < 0) { + return -1; + } + } + + return 0; +} diff --git a/source/dnode/vnode/tsdb2/src/tsdbCompact.c b/source/dnode/vnode/tsdb2/src/tsdbCompact.c new file mode 100644 index 0000000000..57f2742c33 --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbCompact.c @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#if 0 +#include "tsdbint.h" + +typedef struct { + STable * pTable; + SBlockIdx * pBlkIdx; + SBlockIdx bindex; + SBlockInfo *pInfo; +} STableCompactH; + +typedef struct { + SRtn rtn; + SFSIter fsIter; + SArray * tbArray; // table array to cache table obj and block indexes + SReadH readh; + SDFileSet wSet; + SArray * aBlkIdx; + SArray * aSupBlk; + SDataCols *pDataCols; +} SCompactH; + +#define TSDB_COMPACT_WSET(pComph) (&((pComph)->wSet)) +#define TSDB_COMPACT_REPO(pComph) TSDB_READ_REPO(&((pComph)->readh)) +#define TSDB_COMPACT_HEAD_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_HEAD) +#define TSDB_COMPACT_DATA_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_DATA) +#define TSDB_COMPACT_LAST_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_LAST) +#define TSDB_COMPACT_SMAD_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_SMAD) +#define TSDB_COMPACT_SMAL_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_SMAL) +#define TSDB_COMPACT_BUF(pComph) TSDB_READ_BUF(&((pComph)->readh)) +#define TSDB_COMPACT_COMP_BUF(pComph) TSDB_READ_COMP_BUF(&((pComph)->readh)) +#define TSDB_COMPACT_EXBUF(pComph) TSDB_READ_EXBUF(&((pComph)->readh)) + +// static int tsdbAsyncCompact(STsdb *pRepo); +static void tsdbStartCompact(STsdb *pRepo); +static void tsdbEndCompact(STsdb *pRepo, int eno); +static int tsdbCompactMeta(STsdb *pRepo); +static int tsdbCompactTSData(STsdb *pRepo); +static int tsdbCompactFSet(SCompactH *pComph, SDFileSet *pSet); +static bool tsdbShouldCompact(SCompactH *pComph); +static int tsdbInitCompactH(SCompactH *pComph, STsdb *pRepo); +static void tsdbDestroyCompactH(SCompactH *pComph); +static int tsdbInitCompTbArray(SCompactH *pComph); +static void tsdbDestroyCompTbArray(SCompactH *pComph); +static int tsdbCacheFSetIndex(SCompactH *pComph); +static int tsdbCompactFSetInit(SCompactH *pComph, SDFileSet *pSet); +static void tsdbCompactFSetEnd(SCompactH *pComph); +static int tsdbCompactFSetImpl(SCompactH *pComph); +static int tsdbWriteBlockToRightFile(SCompactH *pComph, STable *pTable, SDataCols *pDataCols, void **ppBuf, + void **ppCBuf, void **ppExBuf); + +enum { TSDB_NO_COMPACT, TSDB_IN_COMPACT, TSDB_WAITING_COMPACT}; +// int tsdbCompact(STsdb *pRepo) { return tsdbAsyncCompact(pRepo); } + +void *tsdbCompactImpl(STsdb *pRepo) { + // Check if there are files in TSDB FS to compact + if (REPO_FS(pRepo)->cstatus->pmf == NULL) { + pRepo->compactState = TSDB_NO_COMPACT; + tsem_post(&(pRepo->readyToCommit)); + tsdbInfo("vgId:%d compact over, no file to compact in FS", REPO_ID(pRepo)); + return NULL; + } + + tsdbStartCompact(pRepo); + + if (tsdbCompactMeta(pRepo) < 0) { + tsdbError("vgId:%d failed to compact META data since %s", REPO_ID(pRepo), tstrerror(terrno)); + goto _err; + } + + if (tsdbCompactTSData(pRepo) < 0) { + tsdbError("vgId:%d failed to compact TS data since %s", REPO_ID(pRepo), tstrerror(terrno)); + goto _err; + } + + tsdbEndCompact(pRepo, TSDB_CODE_SUCCESS); + return NULL; + +_err: + pRepo->code = terrno; + tsdbEndCompact(pRepo, terrno); + return NULL; +} + +// static int tsdbAsyncCompact(STsdb *pRepo) { +// if (pRepo->compactState != TSDB_NO_COMPACT) { +// tsdbInfo("vgId:%d not compact tsdb again ", REPO_ID(pRepo)); +// return 0; +// } +// pRepo->compactState = TSDB_WAITING_COMPACT; +// tsem_wait(&(pRepo->readyToCommit)); +// return tsdbScheduleCommit(pRepo, COMPACT_REQ); +// } + +static void tsdbStartCompact(STsdb *pRepo) { + assert(pRepo->compactState != TSDB_IN_COMPACT); + tsdbInfo("vgId:%d start to compact!", REPO_ID(pRepo)); + tsdbStartFSTxn(pRepo, 0, 0); + pRepo->code = TSDB_CODE_SUCCESS; + pRepo->compactState = TSDB_IN_COMPACT; +} + +static void tsdbEndCompact(STsdb *pRepo, int eno) { + if (eno != TSDB_CODE_SUCCESS) { + tsdbEndFSTxnWithError(REPO_FS(pRepo)); + } else { + tsdbEndFSTxn(pRepo); + } + pRepo->compactState = TSDB_NO_COMPACT; + tsdbInfo("vgId:%d compact over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed"); + tsem_post(&(pRepo->readyToCommit)); +} + +static int tsdbCompactMeta(STsdb *pRepo) { + STsdbFS *pfs = REPO_FS(pRepo); + tsdbUpdateMFile(pfs, pfs->cstatus->pmf); + return 0; +} + + static int tsdbCompactTSData(STsdb *pRepo) { + SCompactH compactH; + SDFileSet *pSet = NULL; + + tsdbDebug("vgId:%d start to compact TS data", REPO_ID(pRepo)); + + // If no file, just return 0; + if (taosArrayGetSize(REPO_FS(pRepo)->cstatus->df) <= 0) { + tsdbDebug("vgId:%d no TS data file to compact, compact over", REPO_ID(pRepo)); + return 0; + } + + if (tsdbInitCompactH(&compactH, pRepo) < 0) { + return -1; + } + + while ((pSet = tsdbFSIterNext(&(compactH.fsIter)))) { + // Remove those expired files + if (pSet->fid < compactH.rtn.minFid) { + tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid, + TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet)); + continue; + } + + if (TSDB_FSET_LEVEL(pSet) == TFS_MAX_LEVEL) { + tsdbDebug("vgId:%d FSET %d on level %d, should not compact", REPO_ID(pRepo), pSet->fid, TFS_MAX_LEVEL); + tsdbUpdateDFileSet(REPO_FS(pRepo), pSet); + continue; + } + + if (tsdbCompactFSet(&compactH, pSet) < 0) { + tsdbDestroyCompactH(&compactH); + tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno)); + return -1; + } + } + + tsdbDestroyCompactH(&compactH); + tsdbDebug("vgId:%d compact TS data over", REPO_ID(pRepo)); + return 0; + } + + static int tsdbCompactFSet(SCompactH *pComph, SDFileSet *pSet) { + STsdb *pRepo = TSDB_COMPACT_REPO(pComph); + SDiskID did; + + tsdbDebug("vgId:%d start to compact FSET %d on level %d id %d", REPO_ID(pRepo), pSet->fid, TSDB_FSET_LEVEL(pSet), + TSDB_FSET_ID(pSet)); + + if (tsdbCompactFSetInit(pComph, pSet) < 0) { + return -1; + } + + if (!tsdbShouldCompact(pComph)) { + tsdbDebug("vgId:%d no need to compact FSET %d", REPO_ID(pRepo), pSet->fid); + if (tsdbApplyRtnOnFSet(TSDB_COMPACT_REPO(pComph), pSet, &(pComph->rtn)) < 0) { + tsdbCompactFSetEnd(pComph); + return -1; + } + } else { + // Create new fset as compacted fset + tfsAllocDisk(tsdbGetFidLevel(pSet->fid, &(pComph->rtn)), &(did.level), &(did.id)); + if (did.level == TFS_UNDECIDED_LEVEL) { + terrno = TSDB_CODE_TDB_NO_AVAIL_DISK; + tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno)); + tsdbCompactFSetEnd(pComph); + return -1; + } + + tsdbInitDFileSet(TSDB_COMPACT_WSET(pComph), did, REPO_ID(pRepo), TSDB_FSET_FID(pSet), + FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_LATEST_FSET_VER); + if (tsdbCreateDFileSet(TSDB_COMPACT_WSET(pComph), true) < 0) { + tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno)); + tsdbCompactFSetEnd(pComph); + return -1; + } + + if (tsdbCompactFSetImpl(pComph) < 0) { + tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph)); + tsdbRemoveDFileSet(TSDB_COMPACT_WSET(pComph)); + tsdbCompactFSetEnd(pComph); + return -1; + } + + tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph)); + tsdbUpdateDFileSet(REPO_FS(pRepo), TSDB_COMPACT_WSET(pComph)); + tsdbDebug("vgId:%d FSET %d compact over", REPO_ID(pRepo), pSet->fid); + } + + tsdbCompactFSetEnd(pComph); + return 0; + } + + static bool tsdbShouldCompact(SCompactH *pComph) { + // if (tsdbForceCompactFile) { + // return true; + // } + STsdb * pRepo = TSDB_COMPACT_REPO(pComph); + STsdbCfg * pCfg = REPO_CFG(pRepo); + SReadH * pReadh = &(pComph->readh); + STableCompactH *pTh; + SBlock * pBlock; + int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock); + SDFile * pDataF = TSDB_READ_DATA_FILE(pReadh); + SDFile * pLastF = TSDB_READ_LAST_FILE(pReadh); + + int tblocks = 0; // total blocks + int nSubBlocks = 0; // # of blocks with sub-blocks + int nSmallBlocks = 0; // # of blocks with rows < defaultRows + int64_t tsize = 0; + + for (size_t i = 0; i < taosArrayGetSize(pComph->tbArray); i++) { + pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, i); + + if (pTh->pTable == NULL || pTh->pBlkIdx == NULL) continue; + + for (size_t bidx = 0; bidx < pTh->pBlkIdx->numOfBlocks; bidx++) { + tblocks++; + pBlock = pTh->pInfo->blocks + bidx; + + if (pBlock->numOfRows < defaultRows) { + nSmallBlocks++; + } + + if (pBlock->numOfSubBlocks > 1) { + nSubBlocks++; + for (int k = 0; k < pBlock->numOfSubBlocks; k++) { + SBlock *iBlock = ((SBlock *)POINTER_SHIFT(pTh->pInfo, pBlock->offset)) + k; + tsize = tsize + iBlock->len; + } + } else if (pBlock->numOfSubBlocks == 1) { + tsize += pBlock->len; + } else { + ASSERT(0); + } + } + } + + return (((nSubBlocks * 1.0 / tblocks) > 0.33) || ((nSmallBlocks * 1.0 / tblocks) > 0.33) || + (tsize * 1.0 / (pDataF->info.size + pLastF->info.size - 2 * TSDB_FILE_HEAD_SIZE) < 0.85)); + } + + static int tsdbInitCompactH(SCompactH *pComph, STsdb *pRepo) { + STsdbCfg *pCfg = REPO_CFG(pRepo); + + memset(pComph, 0, sizeof(*pComph)); + + TSDB_FSET_SET_CLOSED(TSDB_COMPACT_WSET(pComph)); + + tsdbGetRtnSnap(pRepo, &(pComph->rtn)); + tsdbFSIterInit(&(pComph->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD); + + if (tsdbInitReadH(&(pComph->readh), pRepo) < 0) { + return -1; + } + + if (tsdbInitCompTbArray(pComph) < 0) { + tsdbDestroyCompactH(pComph); + return -1; + } + + pComph->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx)); + if (pComph->aBlkIdx == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCompactH(pComph); + return -1; + } + + pComph->aSupBlk = taosArrayInit(1024, sizeof(SBlock)); + if (pComph->aSupBlk == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCompactH(pComph); + return -1; + } + + pComph->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock); + if (pComph->pDataCols == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyCompactH(pComph); + return -1; + } + + return 0; + } + + static void tsdbDestroyCompactH(SCompactH *pComph) { + pComph->pDataCols = tdFreeDataCols(pComph->pDataCols); + pComph->aSupBlk = taosArrayDestroy(pComph->aSupBlk); + pComph->aBlkIdx = taosArrayDestroy(pComph->aBlkIdx); + tsdbDestroyCompTbArray(pComph); + tsdbDestroyReadH(&(pComph->readh)); + tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph)); + } + + static int tsdbInitCompTbArray(SCompactH *pComph) { // Init pComp->tbArray + STsdb *pRepo = TSDB_COMPACT_REPO(pComph); + STsdbMeta *pMeta = pRepo->tsdbMeta; + + if (tsdbRLockRepoMeta(pRepo) < 0) return -1; + + pComph->tbArray = taosArrayInit(pMeta->maxTables, sizeof(STableCompactH)); + if (pComph->tbArray == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbUnlockRepoMeta(pRepo); + return -1; + } + + // Note here must start from 0 + for (int i = 0; i < pMeta->maxTables; i++) { + STableCompactH ch = {0}; + if (pMeta->tables[i] != NULL) { + tsdbRefTable(pMeta->tables[i]); + ch.pTable = pMeta->tables[i]; + } + + if (taosArrayPush(pComph->tbArray, &ch) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbUnlockRepoMeta(pRepo); + return -1; + } + } + + if (tsdbUnlockRepoMeta(pRepo) < 0) return -1; + return 0; + } + + static void tsdbDestroyCompTbArray(SCompactH *pComph) { + STableCompactH *pTh; + + if (pComph->tbArray == NULL) return; + + for (size_t i = 0; i < taosArrayGetSize(pComph->tbArray); i++) { + pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, i); + if (pTh->pTable) { + tsdbUnRefTable(pTh->pTable); + } + + // pTh->pInfo = taosTZfree(pTh->pInfo); + tfree(pTh->pInfo); + } + + pComph->tbArray = taosArrayDestroy(pComph->tbArray); + } + + static int tsdbCacheFSetIndex(SCompactH *pComph) { + SReadH *pReadH = &(pComph->readh); + + if (tsdbLoadBlockIdx(pReadH) < 0) { + return -1; + } + + for (int tid = 1; tid < taosArrayGetSize(pComph->tbArray); tid++) { + STableCompactH *pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, tid); + pTh->pBlkIdx = NULL; + + if (pTh->pTable == NULL) continue; + if (tsdbSetReadTable(pReadH, pTh->pTable) < 0) { + return -1; + } + + if (pReadH->pBlkIdx == NULL) continue; + pTh->bindex = *(pReadH->pBlkIdx); + pTh->pBlkIdx = &(pTh->bindex); + + uint32_t originLen = 0; + if (tsdbLoadBlockInfo(pReadH, (void **)(&(pTh->pInfo)), &originLen) < 0) { + return -1; + } + } + + return 0; + } + + static int tsdbCompactFSetInit(SCompactH *pComph, SDFileSet *pSet) { + taosArrayClear(pComph->aBlkIdx); + taosArrayClear(pComph->aSupBlk); + + if (tsdbSetAndOpenReadFSet(&(pComph->readh), pSet) < 0) { + return -1; + } + + if (tsdbCacheFSetIndex(pComph) < 0) { + tsdbCloseAndUnsetFSet(&(pComph->readh)); + return -1; + } + + return 0; + } + + static void tsdbCompactFSetEnd(SCompactH *pComph) { tsdbCloseAndUnsetFSet(&(pComph->readh)); } + + static int tsdbCompactFSetImpl(SCompactH *pComph) { + STsdb *pRepo = TSDB_COMPACT_REPO(pComph); + STsdbCfg * pCfg = REPO_CFG(pRepo); + SReadH * pReadh = &(pComph->readh); + SBlockIdx blkIdx; + void ** ppBuf = &(TSDB_COMPACT_BUF(pComph)); + void ** ppCBuf = &(TSDB_COMPACT_COMP_BUF(pComph)); + void ** ppExBuf = &(TSDB_COMPACT_EXBUF(pComph)); + int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock); + + taosArrayClear(pComph->aBlkIdx); + + for (int tid = 1; tid < taosArrayGetSize(pComph->tbArray); tid++) { + STableCompactH *pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, tid); + STSchema * pSchema; + + if (pTh->pTable == NULL || pTh->pBlkIdx == NULL) continue; + + pSchema = tsdbGetTableSchemaImpl(pTh->pTable, true, true, -1, -1); + taosArrayClear(pComph->aSupBlk); + if ((tdInitDataCols(pComph->pDataCols, pSchema) < 0) || (tdInitDataCols(pReadh->pDCols[0], pSchema) < 0) || + (tdInitDataCols(pReadh->pDCols[1], pSchema) < 0)) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tdFreeSchema(pSchema); + return -1; + } + tdFreeSchema(pSchema); + + // Loop to compact each block data + for (int i = 0; i < pTh->pBlkIdx->numOfBlocks; i++) { + SBlock *pBlock = pTh->pInfo->blocks + i; + + // Load the block data + if (tsdbLoadBlockData(pReadh, pBlock, pTh->pInfo) < 0) { + return -1; + } + + // Merge pComph->pDataCols and pReadh->pDCols[0] and write data to file + if (pComph->pDataCols->numOfRows == 0 && pBlock->numOfRows >= defaultRows) { + if (tsdbWriteBlockToRightFile(pComph, pTh->pTable, pReadh->pDCols[0], ppBuf, ppCBuf, ppExBuf) < 0) { + return -1; + } + } else { + int ridx = 0; + + while (true) { + if (pReadh->pDCols[0]->numOfRows - ridx == 0) break; + int rowsToMerge = MIN(pReadh->pDCols[0]->numOfRows - ridx, defaultRows - pComph->pDataCols->numOfRows); + + tdMergeDataCols(pComph->pDataCols, pReadh->pDCols[0], rowsToMerge, &ridx, pCfg->update != TD_ROW_PARTIAL_UPDATE); + + if (pComph->pDataCols->numOfRows < defaultRows) { + break; + } + + if (tsdbWriteBlockToRightFile(pComph, pTh->pTable, pComph->pDataCols, ppBuf, ppCBuf, ppExBuf) < 0) { + return -1; + } + tdResetDataCols(pComph->pDataCols); + } + } + } + + if (pComph->pDataCols->numOfRows > 0 && + tsdbWriteBlockToRightFile(pComph, pTh->pTable, pComph->pDataCols, ppBuf, ppCBuf, ppExBuf) < 0) { + return -1; + } + + if (tsdbWriteBlockInfoImpl(TSDB_COMPACT_HEAD_FILE(pComph), pTh->pTable, pComph->aSupBlk, NULL, ppBuf, &blkIdx) < + 0) { + return -1; + } + + if ((blkIdx.numOfBlocks > 0) && (taosArrayPush(pComph->aBlkIdx, (void *)(&blkIdx)) == NULL)) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } + + if (tsdbWriteBlockIdx(TSDB_COMPACT_HEAD_FILE(pComph), pComph->aBlkIdx, ppBuf) < 0) { + return -1; + } + + return 0; + } + + static int tsdbWriteBlockToRightFile(SCompactH *pComph, STable *pTable, SDataCols *pDataCols, void **ppBuf, + void **ppCBuf, void **ppExBuf) { + STsdb *pRepo = TSDB_COMPACT_REPO(pComph); + STsdbCfg * pCfg = REPO_CFG(pRepo); + SDFile * pDFile; + bool isLast; + SBlock block; + + ASSERT(pDataCols->numOfRows > 0); + + if (pDataCols->numOfRows < pCfg->minRowsPerFileBlock) { + pDFile = TSDB_COMPACT_LAST_FILE(pComph); + isLast = true; + } else { + pDFile = TSDB_COMPACT_DATA_FILE(pComph); + isLast = false; + } + + if (tsdbWriteBlockImpl(pRepo, pTable, pDFile, + isLast ? TSDB_COMPACT_SMAL_FILE(pComph) : TSDB_COMPACT_SMAD_FILE(pComph), pDataCols, &block, + isLast, true, ppBuf, ppCBuf, ppExBuf) < 0) { + return -1; + } + + if (taosArrayPush(pComph->aSupBlk, (void *)(&block)) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + return 0; + } + +#endif \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/src/tsdbFS.c b/source/dnode/vnode/tsdb2/src/tsdbFS.c new file mode 100644 index 0000000000..121e0ccbdf --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbFS.c @@ -0,0 +1,1448 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" +#include "tsdbint.h" +#include + +typedef enum { TSDB_TXN_TEMP_FILE = 0, TSDB_TXN_CURR_FILE } TSDB_TXN_FILE_T; +static const char *tsdbTxnFname[] = {"current.t", "current"}; +#define TSDB_MAX_FSETS(keep, days) ((keep) / (days) + 3) + +static int tsdbComparFidFSet(const void *arg1, const void *arg2); +static void tsdbResetFSStatus(SFSStatus *pStatus); +static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid); +static void tsdbApplyFSTxnOnDisk(SFSStatus *pFrom, SFSStatus *pTo); +static void tsdbGetTxnFname(int repoid, TSDB_TXN_FILE_T ftype, char fname[]); +static int tsdbOpenFSFromCurrent(STsdb *pRepo); +static int tsdbScanAndTryFixFS(STsdb *pRepo); +static int tsdbScanRootDir(STsdb *pRepo); +static int tsdbScanDataDir(STsdb *pRepo); +static bool tsdbIsTFileInFS(STsdbFS *pfs, const TFILE *pf); +static int tsdbRestoreCurrent(STsdb *pRepo); +static int tsdbComparTFILE(const void *arg1, const void *arg2); +static void tsdbScanAndTryFixDFilesHeader(STsdb *pRepo, int32_t *nExpired); +static int tsdbProcessExpiredFS(STsdb *pRepo); +static int tsdbCreateMeta(STsdb *pRepo); +static int tsdbFetchTFileSet(STsdb *pRepo, SArray **fArray); + +// For backward compatibility +// ================== CURRENT file header info +static int tsdbEncodeFSHeader(void **buf, SFSHeader *pHeader) { + int tlen = 0; + + tlen += taosEncodeFixedU32(buf, pHeader->version); + tlen += taosEncodeFixedU32(buf, pHeader->len); + + return tlen; +} + +static void *tsdbDecodeFSHeader(void *buf, SFSHeader *pHeader) { + buf = taosDecodeFixedU32(buf, &(pHeader->version)); + buf = taosDecodeFixedU32(buf, &(pHeader->len)); + + return buf; +} + +// ================== STsdbFSMeta +static int tsdbEncodeFSMeta(void **buf, STsdbFSMeta *pMeta) { + int tlen = 0; + + tlen += taosEncodeFixedU32(buf, pMeta->version); + tlen += taosEncodeFixedI64(buf, pMeta->totalPoints); + tlen += taosEncodeFixedI64(buf, pMeta->totalStorage); + + return tlen; +} + +static void *tsdbDecodeFSMeta(void *buf, STsdbFSMeta *pMeta) { + buf = taosDecodeFixedU32(buf, &(pMeta->version)); + buf = taosDecodeFixedI64(buf, &(pMeta->totalPoints)); + buf = taosDecodeFixedI64(buf, &(pMeta->totalStorage)); + + return buf; +} + +// ================== SFSStatus +static int tsdbEncodeDFileSetArray(void **buf, SArray *pArray) { + int tlen = 0; + uint64_t nset = taosArrayGetSize(pArray); + + tlen += taosEncodeFixedU64(buf, nset); + for (size_t i = 0; i < nset; i++) { + SDFileSet *pSet = taosArrayGet(pArray, i); + + tlen += tsdbEncodeDFileSet(buf, pSet); + } + + return tlen; +} + +static int tsdbDecodeDFileSetArray(void **originBuf, void *buf, SArray *pArray, SFSHeader *pSFSHeader) { + uint64_t nset; + SDFileSet dset; + dset.ver = TSDB_FSET_VER_0; // default value + + taosArrayClear(pArray); + + buf = taosDecodeFixedU64(buf, &nset); + + if (pSFSHeader->version == TSDB_FS_VER_0) { + // record fver in new version of 'current' file + uint64_t extendedSize = pSFSHeader->len + nset * TSDB_FILE_MAX * sizeof(TSDB_FVER_TYPE); + if (taosTSizeof(*originBuf) < extendedSize) { + size_t ptrDistance = POINTER_DISTANCE(buf, *originBuf); + if (tsdbMakeRoom(originBuf, (size_t)extendedSize) < 0) { + terrno = TSDB_CODE_FS_OUT_OF_MEMORY; + return -1; + } + buf = POINTER_SHIFT(*originBuf, ptrDistance); + } + } + + for (size_t i = 0; i < nset; i++) { + buf = tsdbDecodeDFileSet(buf, &dset, pSFSHeader->version); + taosArrayPush(pArray, (void *)(&dset)); + } + return TSDB_CODE_SUCCESS; +} + +static int tsdbEncodeFSStatus(void **buf, SFSStatus *pStatus) { + ASSERT(pStatus->pmf); + + int tlen = 0; + + tlen += tsdbEncodeSMFile(buf, pStatus->pmf); + tlen += tsdbEncodeDFileSetArray(buf, pStatus->df); + + return tlen; +} + +static int tsdbDecodeFSStatus(void **originBuf, void *buf, SFSStatus *pStatus, SFSHeader *pSFSHeader) { + tsdbResetFSStatus(pStatus); + pStatus->pmf = &(pStatus->mf); + + buf = tsdbDecodeSMFile(buf, pStatus->pmf); + return tsdbDecodeDFileSetArray(originBuf, buf, pStatus->df, pSFSHeader); +} + +static SFSStatus *tsdbNewFSStatus(int maxFSet) { + SFSStatus *pStatus = (SFSStatus *)calloc(1, sizeof(*pStatus)); + if (pStatus == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + TSDB_FILE_SET_CLOSED(&(pStatus->mf)); + + pStatus->df = taosArrayInit(maxFSet, sizeof(SDFileSet)); + if (pStatus->df == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + free(pStatus); + return NULL; + } + + return pStatus; +} + +static SFSStatus *tsdbFreeFSStatus(SFSStatus *pStatus) { + if (pStatus) { + pStatus->df = taosArrayDestroy(pStatus->df); + free(pStatus); + } + + return NULL; +} + +static void tsdbResetFSStatus(SFSStatus *pStatus) { + if (pStatus == NULL) { + return; + } + + TSDB_FILE_SET_CLOSED(&(pStatus->mf)); + + pStatus->pmf = NULL; + taosArrayClear(pStatus->df); +} + +static void tsdbSetStatusMFile(SFSStatus *pStatus, const SMFile *pMFile) { + ASSERT(pStatus->pmf == NULL); + + pStatus->pmf = &(pStatus->mf); + tsdbInitMFileEx(pStatus->pmf, (SMFile *)pMFile); +} + +static int tsdbAddDFileSetToStatus(SFSStatus *pStatus, const SDFileSet *pSet) { + if (taosArrayPush(pStatus->df, (void *)pSet) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + TSDB_FSET_SET_CLOSED(((SDFileSet *)taosArrayGetLast(pStatus->df))); + + return 0; +} + +// ================== STsdbFS +STsdbFS *tsdbNewFS(STsdbCfg *pCfg) { + int keep = pCfg->keep; + int days = pCfg->daysPerFile; + int maxFSet = TSDB_MAX_FSETS(keep, days); + STsdbFS *pfs; + + pfs = (STsdbFS *)calloc(1, sizeof(*pfs)); + if (pfs == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + int code = pthread_rwlock_init(&(pfs->lock), NULL); + if (code) { + terrno = TAOS_SYSTEM_ERROR(code); + free(pfs); + return NULL; + } + + pfs->cstatus = tsdbNewFSStatus(maxFSet); + if (pfs->cstatus == NULL) { + tsdbFreeFS(pfs); + return NULL; + } + + pfs->metaCache = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); + if (pfs->metaCache == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbFreeFS(pfs); + return NULL; + } + + pfs->intxn = false; + pfs->metaCacheComp = NULL; + + pfs->nstatus = tsdbNewFSStatus(maxFSet); + if (pfs->nstatus == NULL) { + tsdbFreeFS(pfs); + return NULL; + } + + return pfs; +} + +void *tsdbFreeFS(STsdbFS *pfs) { + if (pfs) { + pfs->nstatus = tsdbFreeFSStatus(pfs->nstatus); + taosHashCleanup(pfs->metaCache); + pfs->metaCache = NULL; + pfs->cstatus = tsdbFreeFSStatus(pfs->cstatus); + pthread_rwlock_destroy(&(pfs->lock)); + free(pfs); + } + + return NULL; +} + +static int tsdbProcessExpiredFS(STsdb *pRepo) { + tsdbStartFSTxn(pRepo, 0, 0); + if (tsdbCreateMeta(pRepo) < 0) { + tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + if (tsdbApplyRtn(pRepo) < 0) { + tsdbEndFSTxnWithError(REPO_FS(pRepo)); + tsdbError("vgId:%d failed to apply rtn since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + if (tsdbEndFSTxn(pRepo) < 0) { + tsdbError("vgId:%d failed to end fs txn since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + return 0; +} + +static int tsdbCreateMeta(STsdb *pRepo) { + STsdbFS *pfs = REPO_FS(pRepo); + SMFile * pOMFile = pfs->cstatus->pmf; + SMFile mf; + SDiskID did; + + if (pOMFile != NULL) { + // keep the old meta file + tsdbUpdateMFile(pfs, pOMFile); + return 0; + } + + // Create a new meta file + did.level = TFS_PRIMARY_LEVEL; + did.id = TFS_PRIMARY_ID; + tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo))); + + if (tsdbCreateMFile(&mf, true) < 0) { + tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + tsdbInfo("vgId:%d meta file %s is created", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf)); + + if (tsdbUpdateMFileHeader(&mf) < 0) { + tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno)); + tsdbApplyMFileChange(&mf, pOMFile); + return -1; + } + + TSDB_FILE_FSYNC(&mf); + tsdbCloseMFile(&mf); + tsdbUpdateMFile(pfs, &mf); + + return 0; +} + +int tsdbOpenFS(STsdb *pRepo) { + STsdbFS *pfs = REPO_FS(pRepo); + char current[TSDB_FILENAME_LEN] = "\0"; + int nExpired = 0; + + ASSERT(pfs != NULL); + + tsdbGetTxnFname(REPO_ID(pRepo), TSDB_TXN_CURR_FILE, current); + + tsdbGetRtnSnap(pRepo, &pRepo->rtn); + if (access(current, F_OK) == 0) { + if (tsdbOpenFSFromCurrent(pRepo) < 0) { + tsdbError("vgId:%d failed to open FS since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + tsdbScanAndTryFixDFilesHeader(pRepo, &nExpired); + if (nExpired > 0) { + tsdbProcessExpiredFS(pRepo); + } + } else { + // should skip expired fileset inside of the function + if (tsdbRestoreCurrent(pRepo) < 0) { + tsdbError("vgId:%d failed to restore current file since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + } + + if (tsdbScanAndTryFixFS(pRepo) < 0) { + tsdbError("vgId:%d failed to scan and fix FS since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + // Load meta cache if has meta file + if ((!(pRepo->state & TSDB_STATE_BAD_META)) && tsdbLoadMetaCache(pRepo, true) < 0) { + tsdbError("vgId:%d failed to open FS while loading meta cache since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + return 0; +} + +void tsdbCloseFS(STsdb *pRepo) { + // Do nothing +} + +// Start a new transaction to modify the file system +void tsdbStartFSTxn(STsdb *pRepo, int64_t pointsAdd, int64_t storageAdd) { + STsdbFS *pfs = REPO_FS(pRepo); + ASSERT(pfs->intxn == false); + + pfs->intxn = true; + tsdbResetFSStatus(pfs->nstatus); + pfs->nstatus->meta = pfs->cstatus->meta; + if (pfs->cstatus->pmf == NULL) { + pfs->nstatus->meta.version = 0; + } else { + pfs->nstatus->meta.version = pfs->cstatus->meta.version + 1; + } + pfs->nstatus->meta.totalPoints = pfs->cstatus->meta.totalPoints + pointsAdd; + pfs->nstatus->meta.totalStorage = pfs->cstatus->meta.totalStorage += storageAdd; +} + +void tsdbUpdateFSTxnMeta(STsdbFS *pfs, STsdbFSMeta *pMeta) { pfs->nstatus->meta = *pMeta; } + +int tsdbEndFSTxn(STsdb *pRepo) { + STsdbFS *pfs = REPO_FS(pRepo); + ASSERT(FS_IN_TXN(pfs)); + SFSStatus *pStatus; + + // Write current file system snapshot + if (tsdbSaveFSStatus(pfs->nstatus, REPO_ID(pRepo)) < 0) { + tsdbEndFSTxnWithError(pfs); + return -1; + } + + // Make new + tsdbWLockFS(pfs); + pStatus = pfs->cstatus; + pfs->cstatus = pfs->nstatus; + pfs->nstatus = pStatus; + tsdbUnLockFS(pfs); + + // Apply actual change to each file and SDFileSet + tsdbApplyFSTxnOnDisk(pfs->nstatus, pfs->cstatus); + + pfs->intxn = false; + return 0; +} + +int tsdbEndFSTxnWithError(STsdbFS *pfs) { + tsdbApplyFSTxnOnDisk(pfs->nstatus, pfs->cstatus); + // TODO: if mf change, reload pfs->metaCache + pfs->intxn = false; + return 0; +} + +void tsdbUpdateMFile(STsdbFS *pfs, const SMFile *pMFile) { tsdbSetStatusMFile(pfs->nstatus, pMFile); } + +int tsdbUpdateDFileSet(STsdbFS *pfs, const SDFileSet *pSet) { return tsdbAddDFileSetToStatus(pfs->nstatus, pSet); } + +static int tsdbSaveFSStatus(SFSStatus *pStatus, int vid) { + SFSHeader fsheader; + void * pBuf = NULL; + void * ptr; + char hbuf[TSDB_FILE_HEAD_SIZE] = "\0"; + char tfname[TSDB_FILENAME_LEN] = "\0"; + char cfname[TSDB_FILENAME_LEN] = "\0"; + + tsdbGetTxnFname(vid, TSDB_TXN_TEMP_FILE, tfname); + tsdbGetTxnFname(vid, TSDB_TXN_CURR_FILE, cfname); + + int fd = open(tfname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755); + if (fd < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + fsheader.version = TSDB_LATEST_SFS_VER; + if (pStatus->pmf == NULL) { + ASSERT(taosArrayGetSize(pStatus->df) == 0); + fsheader.len = 0; + } else { + fsheader.len = tsdbEncodeFSStatus(NULL, pStatus) + sizeof(TSCKSUM); + } + + // Encode header part and write + ptr = hbuf; + tsdbEncodeFSHeader(&ptr, &fsheader); + tsdbEncodeFSMeta(&ptr, &(pStatus->meta)); + + taosCalcChecksumAppend(0, (uint8_t *)hbuf, TSDB_FILE_HEAD_SIZE); + + if (taosWriteFile(fd, hbuf, TSDB_FILE_HEAD_SIZE) < TSDB_FILE_HEAD_SIZE) { + terrno = TAOS_SYSTEM_ERROR(errno); + close(fd); + remove(tfname); + return -1; + } + + // Encode file status and write to file + if (fsheader.len > 0) { + if (tsdbMakeRoom(&(pBuf), fsheader.len) < 0) { + close(fd); + remove(tfname); + return -1; + } + + ptr = pBuf; + tsdbEncodeFSStatus(&ptr, pStatus); + taosCalcChecksumAppend(0, (uint8_t *)pBuf, fsheader.len); + + if (taosWriteFile(fd, pBuf, fsheader.len) < fsheader.len) { + terrno = TAOS_SYSTEM_ERROR(errno); + close(fd); + (void)remove(tfname); + taosTZfree(pBuf); + return -1; + } + } + + // fsync, close and rename + if (taosFsyncFile(fd) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + close(fd); + remove(tfname); + taosTZfree(pBuf); + return -1; + } + + (void)close(fd); + (void)taosRenameFile(tfname, cfname); + taosTZfree(pBuf); + + return 0; +} + +static void tsdbApplyFSTxnOnDisk(SFSStatus *pFrom, SFSStatus *pTo) { + int ifrom = 0; + int ito = 0; + size_t sizeFrom, sizeTo; + SDFileSet *pSetFrom; + SDFileSet *pSetTo; + + sizeFrom = taosArrayGetSize(pFrom->df); + sizeTo = taosArrayGetSize(pTo->df); + + // Apply meta file change + (void)tsdbApplyMFileChange(pFrom->pmf, pTo->pmf); + + // Apply SDFileSet change + if (ifrom >= sizeFrom) { + pSetFrom = NULL; + } else { + pSetFrom = taosArrayGet(pFrom->df, ifrom); + } + + if (ito >= sizeTo) { + pSetTo = NULL; + } else { + pSetTo = taosArrayGet(pTo->df, ito); + } + + while (true) { + if ((pSetTo == NULL) && (pSetFrom == NULL)) break; + + if (pSetTo == NULL || (pSetFrom && pSetFrom->fid < pSetTo->fid)) { + tsdbApplyDFileSetChange(pSetFrom, NULL); + + ifrom++; + if (ifrom >= sizeFrom) { + pSetFrom = NULL; + } else { + pSetFrom = taosArrayGet(pFrom->df, ifrom); + } + } else if (pSetFrom == NULL || pSetFrom->fid > pSetTo->fid) { + // Do nothing + ito++; + if (ito >= sizeTo) { + pSetTo = NULL; + } else { + pSetTo = taosArrayGet(pTo->df, ito); + } + } else { + tsdbApplyDFileSetChange(pSetFrom, pSetTo); + + ifrom++; + if (ifrom >= sizeFrom) { + pSetFrom = NULL; + } else { + pSetFrom = taosArrayGet(pFrom->df, ifrom); + } + + ito++; + if (ito >= sizeTo) { + pSetTo = NULL; + } else { + pSetTo = taosArrayGet(pTo->df, ito); + } + } + } +} + +// ================== SFSIter +// ASSUMPTIONS: the FS Should be read locked when calling these functions +void tsdbFSIterInit(SFSIter *pIter, STsdbFS *pfs, int direction) { + pIter->pfs = pfs; + pIter->direction = direction; + + size_t size = taosArrayGetSize(pfs->cstatus->df); + + pIter->version = pfs->cstatus->meta.version; + + if (size == 0) { + pIter->index = -1; + pIter->fid = TSDB_IVLD_FID; + } else { + if (direction == TSDB_FS_ITER_FORWARD) { + pIter->index = 0; + } else { + pIter->index = (int)(size - 1); + } + + pIter->fid = ((SDFileSet *)taosArrayGet(pfs->cstatus->df, pIter->index))->fid; + } +} + +void tsdbFSIterSeek(SFSIter *pIter, int fid) { + STsdbFS *pfs = pIter->pfs; + size_t size = taosArrayGetSize(pfs->cstatus->df); + + int flags; + if (pIter->direction == TSDB_FS_ITER_FORWARD) { + flags = TD_GE; + } else { + flags = TD_LE; + } + + void *ptr = taosbsearch(&fid, pfs->cstatus->df->pData, size, sizeof(SDFileSet), tsdbComparFidFSet, flags); + if (ptr == NULL) { + pIter->index = -1; + pIter->fid = TSDB_IVLD_FID; + } else { + pIter->index = (int)(TARRAY_ELEM_IDX(pfs->cstatus->df, ptr)); + pIter->fid = ((SDFileSet *)ptr)->fid; + } +} + +SDFileSet *tsdbFSIterNext(SFSIter *pIter) { + STsdbFS * pfs = pIter->pfs; + SDFileSet *pSet; + + if (pIter->index < 0) { + ASSERT(pIter->fid == TSDB_IVLD_FID); + return NULL; + } + + ASSERT(pIter->fid != TSDB_IVLD_FID); + + if (pIter->version != pfs->cstatus->meta.version) { + pIter->version = pfs->cstatus->meta.version; + tsdbFSIterSeek(pIter, pIter->fid); + } + + if (pIter->index < 0) { + return NULL; + } + + pSet = (SDFileSet *)taosArrayGet(pfs->cstatus->df, pIter->index); + ASSERT(pSet->fid == pIter->fid); + + if (pIter->direction == TSDB_FS_ITER_FORWARD) { + pIter->index++; + if (pIter->index >= taosArrayGetSize(pfs->cstatus->df)) { + pIter->index = -1; + } + } else { + pIter->index--; + } + + if (pIter->index >= 0) { + pIter->fid = ((SDFileSet *)taosArrayGet(pfs->cstatus->df, pIter->index))->fid; + } else { + pIter->fid = TSDB_IVLD_FID; + } + + return pSet; +} + +static int tsdbComparFidFSet(const void *arg1, const void *arg2) { + int fid = *(int *)arg1; + SDFileSet *pSet = (SDFileSet *)arg2; + + if (fid < pSet->fid) { + return -1; + } else if (fid == pSet->fid) { + return 0; + } else { + return 1; + } +} + +static void tsdbGetTxnFname(int repoid, TSDB_TXN_FILE_T ftype, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN, "%s/vnode/vnode%d/tsdb/%s", TFS_PRIMARY_PATH(), repoid, tsdbTxnFname[ftype]); +} + +static int tsdbOpenFSFromCurrent(STsdb *pRepo) { + STsdbFS * pfs = REPO_FS(pRepo); + int fd = -1; + void * buffer = NULL; + SFSHeader fsheader; + char current[TSDB_FILENAME_LEN] = "\0"; + void * ptr; + + tsdbGetTxnFname(REPO_ID(pRepo), TSDB_TXN_CURR_FILE, current); + + // current file exists, try to recover + fd = open(current, O_RDONLY | O_BINARY); + if (fd < 0) { + tsdbError("vgId:%d failed to open file %s since %s", REPO_ID(pRepo), current, strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + if (tsdbMakeRoom(&buffer, TSDB_FILE_HEAD_SIZE) < 0) { + goto _err; + } + + int nread = (int)taosReadFile(fd, buffer, TSDB_FILE_HEAD_SIZE); + if (nread < 0) { + tsdbError("vgId:%d failed to read %d bytes from file %s since %s", REPO_ID(pRepo), TSDB_FILENAME_LEN, current, + strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + if (nread < TSDB_FILE_HEAD_SIZE) { + tsdbError("vgId:%d failed to read header of file %s, read bytes:%d", REPO_ID(pRepo), current, nread); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + goto _err; + } + + if (!taosCheckChecksumWhole((uint8_t *)buffer, TSDB_FILE_HEAD_SIZE)) { + tsdbError("vgId:%d header of file %s failed checksum check", REPO_ID(pRepo), current); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + goto _err; + } + + SFSStatus *pStatus = pfs->cstatus; + ptr = buffer; + ptr = tsdbDecodeFSHeader(ptr, &fsheader); + ptr = tsdbDecodeFSMeta(ptr, &(pStatus->meta)); + + if (fsheader.version != TSDB_FS_VER_0) { + // TODO: handle file version change + } + + if (fsheader.len > 0) { + if (tsdbMakeRoom(&buffer, fsheader.len) < 0) { + goto _err; + } + + nread = (int)taosReadFile(fd, buffer, fsheader.len); + if (nread < 0) { + tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), current, strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + if (nread < fsheader.len) { + tsdbError("vgId:%d failed to read %d bytes from file %s", REPO_ID(pRepo), fsheader.len, current); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + goto _err; + } + + if (!taosCheckChecksumWhole((uint8_t *)buffer, fsheader.len)) { + tsdbError("vgId:%d file %s is corrupted since wrong checksum", REPO_ID(pRepo), current); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + goto _err; + } + + ptr = buffer; + if (tsdbDecodeFSStatus(&buffer, ptr, pStatus, &fsheader) < 0) { + goto _err; + } + } else { + tsdbResetFSStatus(pStatus); + } + + taosTZfree(buffer); + close(fd); + + return 0; + +_err: + if (fd >= 0) { + close(fd); + } + taosTZfree(buffer); + return -1; +} + +// Scan and try to fix incorrect files +static int tsdbScanAndTryFixFS(STsdb *pRepo) { + STsdbFS * pfs = REPO_FS(pRepo); + SFSStatus *pStatus = pfs->cstatus; + + if (tsdbScanAndTryFixMFile(pRepo) < 0) { + tsdbError("vgId:%d failed to fix MFile since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + size_t size = taosArrayGetSize(pStatus->df); + + for (size_t i = 0; i < size; i++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pStatus->df, i); + + if (tsdbScanAndTryFixDFileSet(pRepo, pSet) < 0) { + tsdbError("vgId:%d failed to fix DFileSet since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + } + + // remove those unused files + tsdbScanRootDir(pRepo); + tsdbScanDataDir(pRepo); + return 0; +} + +int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta) { + char tbuf[128]; + STsdbFS * pfs = REPO_FS(pRepo); + SMFile mf; + SMFile * pMFile = &mf; + void * pBuf = NULL; + SKVRecord rInfo; + int64_t maxBufSize = 0; + SMFInfo minfo; + + taosHashClear(pfs->metaCache); + + // No meta file, just return + if (pfs->cstatus->pmf == NULL) return 0; + + mf = pfs->cstatus->mf; + // Load cache first + if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) { + return -1; + } + + if (tsdbLoadMFileHeader(pMFile, &minfo) < 0) { + tsdbCloseMFile(pMFile); + return -1; + } + + while (true) { + int64_t tsize = tsdbReadMFile(pMFile, tbuf, sizeof(SKVRecord)); + if (tsize == 0) break; + + if (tsize < 0) { + tsdbError("vgId:%d failed to read META file since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + if (tsize < sizeof(SKVRecord)) { + tsdbError("vgId:%d failed to read %" PRIzu " bytes from file %s", REPO_ID(pRepo), sizeof(SKVRecord), + TSDB_FILE_FULL_NAME(pMFile)); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbCloseMFile(pMFile); + return -1; + } + + void *ptr = tsdbDecodeKVRecord(tbuf, &rInfo); + ASSERT(POINTER_DISTANCE(ptr, tbuf) == sizeof(SKVRecord)); + // ASSERT((rInfo.offset > 0) ? (pStore->info.size == rInfo.offset) : true); + + if (rInfo.offset < 0) { + taosHashRemove(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid)); +#if 0 + pStore->info.size += sizeof(SKVRecord); + pStore->info.nRecords--; + pStore->info.nDels++; + pStore->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2); +#endif + } else { + ASSERT(rInfo.offset > 0 && rInfo.size > 0); + if (taosHashPut(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid), &rInfo, sizeof(rInfo)) < 0) { + tsdbError("vgId:%d failed to load meta cache from file %s since OOM", REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pMFile)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + tsdbCloseMFile(pMFile); + return -1; + } + + maxBufSize = MAX(maxBufSize, rInfo.size); + + if (tsdbSeekMFile(pMFile, rInfo.size, SEEK_CUR) < 0) { + tsdbError("vgId:%d failed to lseek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), + tstrerror(terrno)); + tsdbCloseMFile(pMFile); + return -1; + } + +#if 0 + pStore->info.size += (sizeof(SKVRecord) + rInfo.size); + pStore->info.nRecords++; +#endif + } + } + + if (recoverMeta) { + pBuf = malloc((size_t)maxBufSize); + if (pBuf == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbCloseMFile(pMFile); + return -1; + } + + SKVRecord *pRecord = taosHashIterate(pfs->metaCache, NULL); + while (pRecord) { + if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) { + tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), + tstrerror(terrno)); + tfree(pBuf); + tsdbCloseMFile(pMFile); + return -1; + } + + int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size); + if (nread < 0) { + tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), + tstrerror(terrno)); + tfree(pBuf); + tsdbCloseMFile(pMFile); + return -1; + } + + if (nread < pRecord->size) { + tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d", + REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tfree(pBuf); + tsdbCloseMFile(pMFile); + return -1; + } + + if (tsdbRestoreTable(pRepo, pBuf, (int)pRecord->size) < 0) { + tsdbError("vgId:%d failed to restore table, uid %" PRId64 ", since %s" PRIu64, REPO_ID(pRepo), pRecord->uid, + tstrerror(terrno)); + tfree(pBuf); + tsdbCloseMFile(pMFile); + return -1; + } + + pRecord = taosHashIterate(pfs->metaCache, pRecord); + } + + tsdbOrgMeta(pRepo); + } + + tsdbCloseMFile(pMFile); + tfree(pBuf); + return 0; +} + +static int tsdbScanRootDir(STsdb *pRepo) { + char rootDir[TSDB_FILENAME_LEN]; + char bname[TSDB_FILENAME_LEN]; + STsdbFS * pfs = REPO_FS(pRepo); + const TFILE *pf; + + tsdbGetRootDir(REPO_ID(pRepo), rootDir); + TDIR *tdir = tfsOpendir(rootDir); + if (tdir == NULL) { + tsdbError("vgId:%d failed to open directory %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno)); + return -1; + } + + while ((pf = tfsReaddir(tdir))) { + tfsbasename(pf, bname); + + if (strcmp(bname, tsdbTxnFname[TSDB_TXN_CURR_FILE]) == 0 || strcmp(bname, "data") == 0) { + // Skip current file and data directory + continue; + } + + if (pfs->cstatus->pmf && tfsIsSameFile(pf, &(pfs->cstatus->pmf->f))) { + continue; + } + + (void)tfsremove(pf); + tsdbDebug("vgId:%d invalid file %s is removed", REPO_ID(pRepo), TFILE_NAME(pf)); + } + + tfsClosedir(tdir); + + return 0; +} + +static int tsdbScanDataDir(STsdb *pRepo) { + char dataDir[TSDB_FILENAME_LEN]; + char bname[TSDB_FILENAME_LEN]; + STsdbFS * pfs = REPO_FS(pRepo); + const TFILE *pf; + + tsdbGetDataDir(REPO_ID(pRepo), dataDir); + TDIR *tdir = tfsOpendir(dataDir); + if (tdir == NULL) { + tsdbError("vgId:%d failed to open directory %s since %s", REPO_ID(pRepo), dataDir, tstrerror(terrno)); + return -1; + } + + while ((pf = tfsReaddir(tdir))) { + tfsbasename(pf, bname); + + if (!tsdbIsTFileInFS(pfs, pf)) { + (void)tfsremove(pf); + tsdbDebug("vgId:%d invalid file %s is removed", REPO_ID(pRepo), TFILE_NAME(pf)); + } + } + + tfsClosedir(tdir); + + return 0; +} + +static bool tsdbIsTFileInFS(STsdbFS *pfs, const TFILE *pf) { + SFSIter fsiter; + tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD); + SDFileSet *pSet; + + while ((pSet = tsdbFSIterNext(&fsiter))) { + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + SDFile *pDFile = TSDB_DFILE_IN_SET(pSet, ftype); + if (tfsIsSameFile(pf, TSDB_FILE_F(pDFile))) { + return true; + } + } + } + + return false; +} + +static int tsdbRestoreMeta(STsdb *pRepo) { + char rootDir[TSDB_FILENAME_LEN]; + char bname[TSDB_FILENAME_LEN]; + TDIR * tdir = NULL; + const TFILE *pf = NULL; + const char * pattern = "^meta(-ver[0-9]+)?$"; + regex_t regex; + STsdbFS * pfs = REPO_FS(pRepo); + + regcomp(®ex, pattern, REG_EXTENDED); + + tsdbInfo("vgId:%d try to restore meta", REPO_ID(pRepo)); + + tsdbGetRootDir(REPO_ID(pRepo), rootDir); + + tdir = tfsOpendir(rootDir); + if (tdir == NULL) { + tsdbError("vgId:%d failed to open dir %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno)); + regfree(®ex); + return -1; + } + + while ((pf = tfsReaddir(tdir))) { + tfsbasename(pf, bname); + + if (strcmp(bname, "data") == 0) { + // Skip the data/ directory + continue; + } + + if (strcmp(bname, tsdbTxnFname[TSDB_TXN_TEMP_FILE]) == 0) { + // Skip current.t file + tsdbInfo("vgId:%d file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); + (void)tfsremove(pf); + continue; + } + + int code = regexec(®ex, bname, 0, NULL, 0); + if (code == 0) { + // Match + if (pfs->cstatus->pmf != NULL) { + tsdbError("vgId:%d failed to restore meta since two file exists, file1 %s and file2 %s", REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), TFILE_NAME(pf)); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tfsClosedir(tdir); + regfree(®ex); + return -1; + } else { + uint32_t _version = 0; + if (strcmp(bname, "meta") != 0) { + sscanf(bname, "meta-ver%" PRIu32, &_version); + pfs->cstatus->meta.version = _version; + } + + pfs->cstatus->pmf = &(pfs->cstatus->mf); + pfs->cstatus->pmf->f = *pf; + TSDB_FILE_SET_CLOSED(pfs->cstatus->pmf); + + if (tsdbOpenMFile(pfs->cstatus->pmf, O_RDONLY) < 0) { + tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno)); + tfsClosedir(tdir); + regfree(®ex); + return -1; + } + + if (tsdbLoadMFileHeader(pfs->cstatus->pmf, &(pfs->cstatus->pmf->info)) < 0) { + tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno)); + tsdbCloseMFile(pfs->cstatus->pmf); + tfsClosedir(tdir); + regfree(®ex); + return -1; + } + + if (tsdbForceKeepFile) { + struct stat tfstat; + + // Get real file size + if (fstat(pfs->cstatus->pmf->fd, &tfstat) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tsdbCloseMFile(pfs->cstatus->pmf); + tfsClosedir(tdir); + regfree(®ex); + return -1; + } + + if (pfs->cstatus->pmf->info.size != tfstat.st_size) { + int64_t tfsize = pfs->cstatus->pmf->info.size; + pfs->cstatus->pmf->info.size = tfstat.st_size; + tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), tfsize, pfs->cstatus->pmf->info.size); + } + } + + tsdbCloseMFile(pfs->cstatus->pmf); + } + } else if (code == REG_NOMATCH) { + // Not match + tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); + tfsremove(pf); + continue; + } else { + // Has other error + tsdbError("vgId:%d failed to restore meta file while run regexec since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + tfsClosedir(tdir); + regfree(®ex); + return -1; + } + } + + if (pfs->cstatus->pmf) { + tsdbInfo("vgId:%d meta file %s is restored", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pfs->cstatus->pmf)); + } else { + tsdbInfo("vgId:%d no meta file is restored", REPO_ID(pRepo)); + } + + tfsClosedir(tdir); + regfree(®ex); + return 0; +} + +static int tsdbFetchTFileSet(STsdb *pRepo, SArray **fArray) { + char dataDir[TSDB_FILENAME_LEN]; + char bname[TSDB_FILENAME_LEN]; + TDIR * tdir = NULL; + const TFILE *pf = NULL; + const char * pattern = "^v[0-9]+f[0-9]+\\.(head|data|last|smad|smal)(-ver[0-9]+)?$"; + regex_t regex; + + tsdbGetDataDir(REPO_ID(pRepo), dataDir); + + // Resource allocation and init + regcomp(®ex, pattern, REG_EXTENDED); + + *fArray = taosArrayInit(1024, sizeof(TFILE)); + if (*fArray == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to fetch TFileSet while open directory %s since %s", REPO_ID(pRepo), dataDir, + tstrerror(terrno)); + regfree(®ex); + return -1; + } + + tdir = tfsOpendir(dataDir); + if (tdir == NULL) { + tsdbError("vgId:%d failed to fetch TFileSet while open directory %s since %s", REPO_ID(pRepo), dataDir, + tstrerror(terrno)); + taosArrayDestroy(*fArray); + regfree(®ex); + return -1; + } + + while ((pf = tfsReaddir(tdir))) { + tfsbasename(pf, bname); + + int code = regexec(®ex, bname, 0, NULL, 0); + if (code == 0) { + if (taosArrayPush(*fArray, (void *)pf) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tfsClosedir(tdir); + taosArrayDestroy(*fArray); + regfree(®ex); + return -1; + } + } else if (code == REG_NOMATCH) { + // Not match + tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), TFILE_NAME(pf)); + (void)tfsremove(pf); + continue; + } else { + // Has other error + tsdbError("vgId:%d failed to fetch TFileSet Array while run regexec since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + tfsClosedir(tdir); + taosArrayDestroy(*fArray); + regfree(®ex); + return -1; + } + } + + tfsClosedir(tdir); + regfree(®ex); + + // Sort the array according to file name + taosArraySort(*fArray, tsdbComparTFILE); + return 0; +} + +// update the function if the DFileSet definition updates +static bool tsdbIsDFileSetValid(int nFiles) { + switch (nFiles) { + case TSDB_FILE_MIN: + case TSDB_FILE_MAX: + return true; + default: + return false; + } +} + +static int tsdbRestoreDFileSet(STsdb *pRepo) { + const TFILE *pf = NULL; + SArray * fArray = NULL; + STsdbFS * pfs = REPO_FS(pRepo); + char dataDir[TSDB_FILENAME_LEN] = "\0"; + size_t fArraySize = 0; + + tsdbGetDataDir(REPO_ID(pRepo), dataDir); + + if (tsdbFetchTFileSet(pRepo, &fArray) < 0) { + tsdbError("vgId:%d failed to fetch TFileSet from %s to restore since %s", REPO_ID(pRepo), dataDir, + tstrerror(terrno)); + return -1; + } + + if ((fArraySize = taosArrayGetSize(fArray)) <= 0) { + taosArrayDestroy(fArray); + tsdbInfo("vgId:%d size of DFileSet from %s is %" PRIu32, REPO_ID(pRepo), dataDir, (uint32_t)fArraySize); + return 0; + } + + // Loop to recover each file set + SDFileSet fset = {0}; + uint8_t nDFiles = 0; + bool isOneFSetFinish = true; + int lastFType = -1; + // one fileset ends when (1) the array ends or (2) encounter different fid + for (size_t index = 0; index < fArraySize; ++index) { + int tvid = -1, tfid = -1; + TSDB_FILE_T ttype = TSDB_FILE_MAX; + uint32_t tversion = -1; + char bname[TSDB_FILENAME_LEN] = "\0"; + + pf = taosArrayGet(fArray, index); + tfsbasename(pf, bname); + tsdbParseDFilename(bname, &tvid, &tfid, &ttype, &tversion); + ASSERT(tvid == REPO_ID(pRepo)); + SDFile *pDFile = TSDB_DFILE_IN_SET(&fset, ttype); + if (tfid < pRepo->rtn.minFid) { // skip the file expired + continue; + } + if ((isOneFSetFinish == false) && (lastFType == ttype)) { // only fetch the 1st file with same fid and type. + continue; + } + + lastFType = ttype; + + if (index == 0) { + memset(&fset, 0, sizeof(SDFileSet)); + TSDB_FSET_SET_CLOSED(&fset); + nDFiles = 1; + fset.fid = tfid; + pDFile->f = *pf; + isOneFSetFinish = false; + } else { + if (fset.fid == tfid) { + ++nDFiles; + pDFile->f = *pf; + // (1) the array ends + if (index == fArraySize - 1) { + if (tsdbIsDFileSetValid(nDFiles)) { + tsdbInfo("vgId:%d DFileSet %d is fetched, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles); + isOneFSetFinish = true; + } else { + // return error in case of removing uncomplete DFileSets + // terrno = TSDB_CODE_TDB_INCOMPLETE_DFILESET; + tsdbError("vgId:%d incomplete DFileSet, fid:%d, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles); + taosArrayDestroy(fArray); + return -1; + } + } + } else { + // (2) encounter different fid + if (tsdbIsDFileSetValid(nDFiles)) { + tsdbInfo("vgId:%d DFileSet %d is fetched, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles); + isOneFSetFinish = true; + } else { + // return error in case of removing uncomplete DFileSets + // terrno = TSDB_CODE_TDB_INCOMPLETE_DFILESET; + tsdbError("vgId:%d incomplete DFileSet, fid:%d, nDFiles=%" PRIu8, REPO_ID(pRepo), fset.fid, nDFiles); + taosArrayDestroy(fArray); + return -1; +#if 0 + // next FSet + memset(&fset, 0, sizeof(SDFileSet)); + TSDB_FSET_SET_CLOSED(&fset); + nDFiles = 1; + fset.fid = tfid; + pDFile->f = *pf; + isOneFSetFinish = false; + continue; +#endif + } + } + } + + if (isOneFSetFinish) { + for (TSDB_FILE_T ftype = 0; ftype < nDFiles; ++ftype) { + SDFile * pDFile1 = TSDB_DFILE_IN_SET(&fset, ftype); + if (tsdbOpenDFile(pDFile1, O_RDONLY) < 0) { + tsdbError("vgId:%d failed to open DFile %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile1), + tstrerror(terrno)); + taosArrayDestroy(fArray); + return -1; + } + + if (tsdbLoadDFileHeader(pDFile1, &(pDFile1->info)) < 0) { + tsdbError("vgId:%d failed to load DFile %s header since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile1), + tstrerror(terrno)); + taosArrayDestroy(fArray); + return -1; + } + + if (tsdbForceKeepFile) { + struct stat tfstat; + + // Get real file size + if (fstat(pDFile1->fd, &tfstat) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + taosArrayDestroy(fArray); + return -1; + } + + if (pDFile1->info.size != tfstat.st_size) { + int64_t tfsize = pDFile1->info.size; + pDFile1->info.size = tfstat.st_size; + tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pDFile1), tfsize, pDFile1->info.size); + } + } + + tsdbCloseDFile(pDFile1); + } + tsdbInfo("vgId:%d FSET %d is restored", REPO_ID(pRepo), fset.fid); + + // TODO: update the logic when TSDB_FSET_VER definition update. + if (nDFiles == TSDB_FILE_MIN) { + fset.ver = TSDB_FSET_VER_0; + } else { + fset.ver = TSDB_LATEST_FSET_VER; + } + + taosArrayPush(pfs->cstatus->df, &fset); + + // next FSet + memset(&fset, 0, sizeof(SDFileSet)); + TSDB_FSET_SET_CLOSED(&fset); + nDFiles = 1; + fset.fid = tfid; + pDFile->f = *pf; + isOneFSetFinish = false; + } + } + + // Resource release + taosArrayDestroy(fArray); + + return 0; +} + +static int tsdbRestoreCurrent(STsdb *pRepo) { + // Loop to recover mfile + if (tsdbRestoreMeta(pRepo) < 0) { + tsdbError("vgId:%d failed to restore current since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + // Loop to recover dfile set + if (tsdbRestoreDFileSet(pRepo) < 0) { + tsdbError("vgId:%d failed to restore DFileSet since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + if (tsdbSaveFSStatus(pRepo->fs->cstatus, REPO_ID(pRepo)) < 0) { + tsdbError("vgId:%d failed to restore corrent since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + + return 0; +} + +static int tsdbComparTFILE(const void *arg1, const void *arg2) { + TFILE *pf1 = (TFILE *)arg1; + TFILE *pf2 = (TFILE *)arg2; + + int vid1, fid1, vid2, fid2; + TSDB_FILE_T ftype1, ftype2; + uint32_t version1, version2; + char bname1[TSDB_FILENAME_LEN]; + char bname2[TSDB_FILENAME_LEN]; + + tfsbasename(pf1, bname1); + tfsbasename(pf2, bname2); + tsdbParseDFilename(bname1, &vid1, &fid1, &ftype1, &version1); + tsdbParseDFilename(bname2, &vid2, &fid2, &ftype2, &version2); + + if (fid1 < fid2) { + return -1; + } else if (fid1 > fid2) { + return 1; + } else { + if (ftype1 < ftype2) { + return -1; + } else if (ftype1 > ftype2) { + return 1; + } else { + if (version1 < version2) { + return -1; + } else if (version1 > version2) { + return 1; + } else { + return 0; + } + } + } +} + +static void tsdbScanAndTryFixDFilesHeader(STsdb *pRepo, int32_t *nExpired) { + STsdbFS * pfs = REPO_FS(pRepo); + SFSStatus *pStatus = pfs->cstatus; + SDFInfo info; + + for (size_t i = 0; i < taosArrayGetSize(pStatus->df); i++) { + SDFileSet fset; + tsdbInitDFileSetEx(&fset, (SDFileSet *)taosArrayGet(pStatus->df, i)); + if (fset.fid < pRepo->rtn.minFid) { + ++*nExpired; + } + tsdbDebug("vgId:%d scan DFileSet %d header", REPO_ID(pRepo), fset.fid); + + if (tsdbOpenDFileSet(&fset, O_RDWR) < 0) { + tsdbError("vgId:%d failed to open DFileSet %d since %s, continue", REPO_ID(pRepo), fset.fid, tstrerror(terrno)); + continue; + } + + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(&fset); ftype++) { + SDFile *pDFile = TSDB_DFILE_IN_SET(&fset, ftype); + + if ((tsdbLoadDFileHeader(pDFile, &info) < 0) || pDFile->info.size != info.size || + pDFile->info.magic != info.magic) { + if (tsdbUpdateDFileHeader(pDFile) < 0) { + tsdbError("vgId:%d failed to update DFile header of %s since %s, continue", REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno)); + } else { + tsdbInfo("vgId:%d DFile header of %s is updated", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile)); + TSDB_FILE_FSYNC(pDFile); + } + } else { + tsdbDebug("vgId:%d DFile header of %s is correct", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile)); + } + } + + tsdbCloseDFileSet(&fset); + } +} diff --git a/source/dnode/vnode/tsdb2/src/tsdbFile.c b/source/dnode/vnode/tsdb2/src/tsdbFile.c new file mode 100644 index 0000000000..f2a0652f03 --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbFile.c @@ -0,0 +1,719 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbint.h" + +static const char *TSDB_FNAME_SUFFIX[] = { + "head", // TSDB_FILE_HEAD + "data", // TSDB_FILE_DATA + "last", // TSDB_FILE_LAST + "smad", // TSDB_FILE_SMA_DATA(Small Materialized Aggregate for .data File) + "smal", // TSDB_FILE_SMA_LAST(Small Materialized Aggregate for .last File) + "", // TSDB_FILE_MAX + "meta", // TSDB_FILE_META +}; + +static void tsdbGetFilename(int vid, int fid, uint32_t ver, TSDB_FILE_T ftype, char *fname); +static int tsdbRollBackMFile(SMFile *pMFile); +static int tsdbEncodeDFInfo(void **buf, SDFInfo *pInfo); +static void *tsdbDecodeDFInfo(void *buf, SDFInfo *pInfo, TSDB_FVER_TYPE sfver); +static int tsdbRollBackDFile(SDFile *pDFile); + +// ============== SMFile +void tsdbInitMFile(SMFile *pMFile, SDiskID did, int vid, uint32_t ver) { + char fname[TSDB_FILENAME_LEN]; + + TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_OK); + + memset(&(pMFile->info), 0, sizeof(pMFile->info)); + pMFile->info.magic = TSDB_FILE_INIT_MAGIC; + + tsdbGetFilename(vid, 0, ver, TSDB_FILE_META, fname); + tfsInitFile(TSDB_FILE_F(pMFile), did.level, did.id, fname); +} + +void tsdbInitMFileEx(SMFile *pMFile, const SMFile *pOMFile) { + *pMFile = *pOMFile; + TSDB_FILE_SET_CLOSED(pMFile); +} + +int tsdbEncodeSMFile(void **buf, SMFile *pMFile) { + int tlen = 0; + + tlen += tsdbEncodeMFInfo(buf, &(pMFile->info)); + tlen += tfsEncodeFile(buf, &(pMFile->f)); + + return tlen; +} + +void *tsdbDecodeSMFile(void *buf, SMFile *pMFile) { + buf = tsdbDecodeMFInfo(buf, &(pMFile->info)); + buf = tfsDecodeFile(buf, &(pMFile->f)); + TSDB_FILE_SET_CLOSED(pMFile); + + return buf; +} + +int tsdbEncodeSMFileEx(void **buf, SMFile *pMFile) { + int tlen = 0; + + tlen += tsdbEncodeMFInfo(buf, &(pMFile->info)); + tlen += taosEncodeString(buf, TSDB_FILE_FULL_NAME(pMFile)); + + return tlen; +} + +void *tsdbDecodeSMFileEx(void *buf, SMFile *pMFile) { + char *aname; + buf = tsdbDecodeMFInfo(buf, &(pMFile->info)); + buf = taosDecodeString(buf, &aname); + tstrncpy(TSDB_FILE_FULL_NAME(pMFile), aname, TSDB_FILENAME_LEN); + TSDB_FILE_SET_CLOSED(pMFile); + + tfree(aname); + + return buf; +} + +int tsdbApplyMFileChange(SMFile *from, SMFile *to) { + if (from == NULL && to == NULL) return 0; + + if (from != NULL) { + if (to == NULL) { + return tsdbRemoveMFile(from); + } else { + if (tfsIsSameFile(TSDB_FILE_F(from), TSDB_FILE_F(to))) { + if (from->info.size > to->info.size) { + tsdbRollBackMFile(to); + } + } else { + return tsdbRemoveMFile(from); + } + } + } + + return 0; +} + +int tsdbCreateMFile(SMFile *pMFile, bool updateHeader) { + ASSERT(pMFile->info.size == 0 && pMFile->info.magic == TSDB_FILE_INIT_MAGIC); + + pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755); + if (pMFile->fd < 0) { + if (errno == ENOENT) { + // Try to create directory recursively + char *s = strdup(TFILE_REL_NAME(&(pMFile->f))); + if (tfsMkdirRecurAt(dirname(s), TSDB_FILE_LEVEL(pMFile), TSDB_FILE_ID(pMFile)) < 0) { + tfree(s); + return -1; + } + tfree(s); + + pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755); + if (pMFile->fd < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + } else { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + } + + if (!updateHeader) { + return 0; + } + + pMFile->info.size += TSDB_FILE_HEAD_SIZE; + + if (tsdbUpdateMFileHeader(pMFile) < 0) { + tsdbCloseMFile(pMFile); + tsdbRemoveMFile(pMFile); + return -1; + } + + return 0; +} + +int tsdbUpdateMFileHeader(SMFile *pMFile) { + char buf[TSDB_FILE_HEAD_SIZE] = "\0"; + + if (tsdbSeekMFile(pMFile, 0, SEEK_SET) < 0) { + return -1; + } + + void *ptr = buf; + tsdbEncodeMFInfo(&ptr, TSDB_FILE_INFO(pMFile)); + + taosCalcChecksumAppend(0, (uint8_t *)buf, TSDB_FILE_HEAD_SIZE); + if (tsdbWriteMFile(pMFile, buf, TSDB_FILE_HEAD_SIZE) < 0) { + return -1; + } + + return 0; +} + +int tsdbLoadMFileHeader(SMFile *pMFile, SMFInfo *pInfo) { + char buf[TSDB_FILE_HEAD_SIZE] = "\0"; + + ASSERT(TSDB_FILE_OPENED(pMFile)); + + if (tsdbSeekMFile(pMFile, 0, SEEK_SET) < 0) { + return -1; + } + + if (tsdbReadMFile(pMFile, buf, TSDB_FILE_HEAD_SIZE) < 0) { + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)buf, TSDB_FILE_HEAD_SIZE)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return -1; + } + + tsdbDecodeMFInfo(buf, pInfo); + return 0; +} + +int tsdbScanAndTryFixMFile(STsdb *pRepo) { + SMFile * pMFile = pRepo->fs->cstatus->pmf; + struct stat mfstat; + SMFile mf; + + if (pMFile == NULL) { + // No meta file, no need to scan + return 0; + } + + tsdbInitMFileEx(&mf, pMFile); + + if (access(TSDB_FILE_FULL_NAME(pMFile), F_OK) != 0) { + tsdbError("vgId:%d meta file %s not exist, report to upper layer to fix it", REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pMFile)); + pRepo->state |= TSDB_STATE_BAD_META; + TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_BAD); + return 0; + } + + if (stat(TSDB_FILE_FULL_NAME(&mf), &mfstat) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + if (pMFile->info.size < mfstat.st_size) { + if (tsdbOpenMFile(&mf, O_WRONLY) < 0) { + return -1; + } + + if (taosFtruncateFile(mf.fd, mf.info.size) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tsdbCloseMFile(&mf); + return -1; + } + + if (tsdbUpdateMFileHeader(&mf) < 0) { + tsdbCloseMFile(&mf); + return -1; + } + + tsdbCloseMFile(&mf); + tsdbInfo("vgId:%d file %s is truncated from %" PRId64 " to %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), + mfstat.st_size, pMFile->info.size); + } else if (pMFile->info.size > mfstat.st_size) { + tsdbError("vgId:%d meta file %s has wrong size %" PRId64 " expected %" PRId64 ", report to upper layer to fix it", + REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), mfstat.st_size, pMFile->info.size); + pRepo->state |= TSDB_STATE_BAD_META; + TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_BAD); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return 0; + } else { + tsdbDebug("vgId:%d meta file %s passes the scan", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile)); + } + + return 0; +} + +int tsdbEncodeMFInfo(void **buf, SMFInfo *pInfo) { + int tlen = 0; + + tlen += taosEncodeVariantI64(buf, pInfo->size); + tlen += taosEncodeVariantI64(buf, pInfo->tombSize); + tlen += taosEncodeVariantI64(buf, pInfo->nRecords); + tlen += taosEncodeVariantI64(buf, pInfo->nDels); + tlen += taosEncodeFixedU32(buf, pInfo->magic); + + return tlen; +} + +void *tsdbDecodeMFInfo(void *buf, SMFInfo *pInfo) { + buf = taosDecodeVariantI64(buf, &(pInfo->size)); + buf = taosDecodeVariantI64(buf, &(pInfo->tombSize)); + buf = taosDecodeVariantI64(buf, &(pInfo->nRecords)); + buf = taosDecodeVariantI64(buf, &(pInfo->nDels)); + buf = taosDecodeFixedU32(buf, &(pInfo->magic)); + + return buf; +} + +static int tsdbRollBackMFile(SMFile *pMFile) { + SMFile mf; + + tsdbInitMFileEx(&mf, pMFile); + + if (tsdbOpenMFile(&mf, O_WRONLY) < 0) { + return -1; + } + + if (taosFtruncateFile(TSDB_FILE_FD(&mf), pMFile->info.size) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tsdbCloseMFile(&mf); + return -1; + } + + if (tsdbUpdateMFileHeader(&mf) < 0) { + tsdbCloseMFile(&mf); + return -1; + } + + TSDB_FILE_FSYNC(&mf); + + tsdbCloseMFile(&mf); + return 0; +} + +// ============== Operations on SDFile +void tsdbInitDFile(SDFile *pDFile, SDiskID did, int vid, int fid, uint32_t ver, TSDB_FILE_T ftype) { + char fname[TSDB_FILENAME_LEN]; + + TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_OK); + + TSDB_FILE_SET_CLOSED(pDFile); + + memset(&(pDFile->info), 0, sizeof(pDFile->info)); + pDFile->info.magic = TSDB_FILE_INIT_MAGIC; + pDFile->info.fver = tsdbGetDFSVersion(ftype); + + tsdbGetFilename(vid, fid, ver, ftype, fname); + tfsInitFile(&(pDFile->f), did.level, did.id, fname); +} + +void tsdbInitDFileEx(SDFile *pDFile, SDFile *pODFile) { + *pDFile = *pODFile; + TSDB_FILE_SET_CLOSED(pDFile); +} + +int tsdbEncodeSDFile(void **buf, SDFile *pDFile) { + int tlen = 0; + + tlen += tsdbEncodeDFInfo(buf, &(pDFile->info)); + tlen += tfsEncodeFile(buf, &(pDFile->f)); + + return tlen; +} + +void *tsdbDecodeSDFile(void *buf, SDFile *pDFile, uint32_t sfver) { + buf = tsdbDecodeDFInfo(buf, &(pDFile->info), sfver); + buf = tfsDecodeFile(buf, &(pDFile->f)); + TSDB_FILE_SET_CLOSED(pDFile); + + return buf; +} + +static int tsdbEncodeSDFileEx(void **buf, SDFile *pDFile) { + int tlen = 0; + + tlen += tsdbEncodeDFInfo(buf, &(pDFile->info)); + tlen += taosEncodeString(buf, TSDB_FILE_FULL_NAME(pDFile)); + + return tlen; +} + +static void *tsdbDecodeSDFileEx(void *buf, SDFile *pDFile) { + char *aname; + // The sync module would send DFileSet with latest verion. + buf = tsdbDecodeDFInfo(buf, &(pDFile->info), TSDB_LATEST_SFS_VER); + buf = taosDecodeString(buf, &aname); + tstrncpy(TSDB_FILE_FULL_NAME(pDFile), aname, TSDB_FILENAME_LEN); + TSDB_FILE_SET_CLOSED(pDFile); + tfree(aname); + + return buf; +} + +int tsdbCreateDFile(SDFile *pDFile, bool updateHeader, TSDB_FILE_T fType) { + ASSERT(pDFile->info.size == 0 && pDFile->info.magic == TSDB_FILE_INIT_MAGIC); + + pDFile->fd = open(TSDB_FILE_FULL_NAME(pDFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755); + if (pDFile->fd < 0) { + if (errno == ENOENT) { + // Try to create directory recursively + char *s = strdup(TFILE_REL_NAME(&(pDFile->f))); + if (tfsMkdirRecurAt(dirname(s), TSDB_FILE_LEVEL(pDFile), TSDB_FILE_ID(pDFile)) < 0) { + tfree(s); + return -1; + } + tfree(s); + + pDFile->fd = open(TSDB_FILE_FULL_NAME(pDFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755); + if (pDFile->fd < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + } else { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + } + + if (!updateHeader) { + return 0; + } + + pDFile->info.size += TSDB_FILE_HEAD_SIZE; + pDFile->info.fver = tsdbGetDFSVersion(fType); + + if (tsdbUpdateDFileHeader(pDFile) < 0) { + tsdbCloseDFile(pDFile); + tsdbRemoveDFile(pDFile); + return -1; + } + + return 0; +} + +int tsdbUpdateDFileHeader(SDFile *pDFile) { + char buf[TSDB_FILE_HEAD_SIZE] = "\0"; + + if (tsdbSeekDFile(pDFile, 0, SEEK_SET) < 0) { + return -1; + } + + void *ptr = buf; + tsdbEncodeDFInfo(&ptr, &(pDFile->info)); + + taosCalcChecksumAppend(0, (uint8_t *)buf, TSDB_FILE_HEAD_SIZE); + if (tsdbWriteDFile(pDFile, buf, TSDB_FILE_HEAD_SIZE) < 0) { + return -1; + } + + return 0; +} + +int tsdbLoadDFileHeader(SDFile *pDFile, SDFInfo *pInfo) { + char buf[TSDB_FILE_HEAD_SIZE] = "\0"; + // uint32_t _version; + + ASSERT(TSDB_FILE_OPENED(pDFile)); + + if (tsdbSeekDFile(pDFile, 0, SEEK_SET) < 0) { + return -1; + } + + if (tsdbReadDFile(pDFile, buf, TSDB_FILE_HEAD_SIZE) < 0) { + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)buf, TSDB_FILE_HEAD_SIZE)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return -1; + } + + void *pBuf = buf; + pBuf = tsdbDecodeDFInfo(pBuf, pInfo, TSDB_LATEST_FVER); // only make sure the parameter sfver > 0 + return 0; +} + +static int tsdbScanAndTryFixDFile(STsdb *pRepo, SDFile *pDFile) { + struct stat dfstat; + SDFile df; + + tsdbInitDFileEx(&df, pDFile); + + if (access(TSDB_FILE_FULL_NAME(pDFile), F_OK) != 0) { + tsdbError("vgId:%d data file %s not exist, report to upper layer to fix it", REPO_ID(pRepo), + TSDB_FILE_FULL_NAME(pDFile)); + pRepo->state |= TSDB_STATE_BAD_DATA; + TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_BAD); + return 0; + } + + if (stat(TSDB_FILE_FULL_NAME(&df), &dfstat) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + if (pDFile->info.size < dfstat.st_size) { + if (tsdbOpenDFile(&df, O_WRONLY) < 0) { + return -1; + } + + if (taosFtruncateFile(df.fd, df.info.size) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tsdbCloseDFile(&df); + return -1; + } + + if (tsdbUpdateDFileHeader(&df) < 0) { + tsdbCloseDFile(&df); + return -1; + } + + tsdbCloseDFile(&df); + tsdbInfo("vgId:%d file %s is truncated from %" PRId64 " to %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), + dfstat.st_size, pDFile->info.size); + } else if (pDFile->info.size > dfstat.st_size) { + tsdbError("vgId:%d data file %s has wrong size %" PRId64 " expected %" PRId64 ", report to upper layer to fix it", + REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), dfstat.st_size, pDFile->info.size); + pRepo->state |= TSDB_STATE_BAD_DATA; + TSDB_FILE_SET_STATE(pDFile, TSDB_FILE_STATE_BAD); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return 0; + } else { + tsdbDebug("vgId:%d file %s passes the scan", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile)); + } + + return 0; +} + +static int tsdbEncodeDFInfo(void **buf, SDFInfo *pInfo) { + int tlen = 0; + tlen += taosEncodeFixedU32(buf, pInfo->fver); + tlen += taosEncodeFixedU32(buf, pInfo->magic); + tlen += taosEncodeFixedU32(buf, pInfo->len); + tlen += taosEncodeFixedU32(buf, pInfo->totalBlocks); + tlen += taosEncodeFixedU32(buf, pInfo->totalSubBlocks); + tlen += taosEncodeFixedU32(buf, pInfo->offset); + tlen += taosEncodeFixedU64(buf, pInfo->size); + tlen += taosEncodeFixedU64(buf, pInfo->tombSize); + + return tlen; +} + +static void *tsdbDecodeDFInfo(void *buf, SDFInfo *pInfo, TSDB_FVER_TYPE sfver) { + if (sfver > TSDB_FS_VER_0) { + buf = taosDecodeFixedU32(buf, &(pInfo->fver)); + } else { + pInfo->fver = TSDB_FS_VER_0; // default value + } + buf = taosDecodeFixedU32(buf, &(pInfo->magic)); + buf = taosDecodeFixedU32(buf, &(pInfo->len)); + buf = taosDecodeFixedU32(buf, &(pInfo->totalBlocks)); + buf = taosDecodeFixedU32(buf, &(pInfo->totalSubBlocks)); + buf = taosDecodeFixedU32(buf, &(pInfo->offset)); + buf = taosDecodeFixedU64(buf, &(pInfo->size)); + buf = taosDecodeFixedU64(buf, &(pInfo->tombSize)); + + return buf; +} + +static int tsdbApplyDFileChange(SDFile *from, SDFile *to) { + ASSERT(from != NULL || to != NULL); + + if (from != NULL) { + if (to == NULL) { + tsdbRemoveDFile(from); + } else { + if (tfsIsSameFile(TSDB_FILE_F(from), TSDB_FILE_F(to))) { + if (from->info.size > to->info.size) { + tsdbRollBackDFile(to); + } + } else { + (void)tsdbRemoveDFile(from); + } + } + } + + return 0; +} + +static int tsdbRollBackDFile(SDFile *pDFile) { + SDFile df = *pDFile; + + if (tsdbOpenDFile(&df, O_WRONLY) < 0) { + return -1; + } + + if (taosFtruncateFile(TSDB_FILE_FD(&df), pDFile->info.size) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tsdbCloseDFile(&df); + return -1; + } + + if (tsdbUpdateDFileHeader(&df) < 0) { + tsdbCloseDFile(&df); + return -1; + } + + TSDB_FILE_FSYNC(&df); + + tsdbCloseDFile(&df); + return 0; +} + +// ============== Operations on SDFileSet +void tsdbInitDFileSet(SDFileSet *pSet, SDiskID did, int vid, int fid, uint32_t ver, uint16_t fsetVer) { + pSet->fid = fid; + pSet->state = 0; + pSet->ver = fsetVer; + + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + SDFile *pDFile = TSDB_DFILE_IN_SET(pSet, ftype); + tsdbInitDFile(pDFile, did, vid, fid, ver, ftype); + } +} + +void tsdbInitDFileSetEx(SDFileSet *pSet, SDFileSet *pOSet) { + ASSERT_TSDB_FSET_NFILES_VALID(pOSet); + pSet->fid = pOSet->fid; + pSet->ver = pOSet->ver; + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + tsdbInitDFileEx(TSDB_DFILE_IN_SET(pSet, ftype), TSDB_DFILE_IN_SET(pOSet, ftype)); + } +} + +int tsdbEncodeDFileSet(void **buf, SDFileSet *pSet) { + int tlen = 0; + + tlen += taosEncodeFixedI32(buf, pSet->fid); + tlen += taosEncodeFixedU16(buf, pSet->ver); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + tlen += tsdbEncodeSDFile(buf, TSDB_DFILE_IN_SET(pSet, ftype)); + } + + return tlen; +} + +void *tsdbDecodeDFileSet(void *buf, SDFileSet *pSet, uint32_t sfver) { + int32_t fid; + + buf = taosDecodeFixedI32(buf, &(fid)); + pSet->state = 0; + pSet->fid = fid; + + if (sfver > TSDB_FS_VER_0) { + buf = taosDecodeFixedU16(buf, &(pSet->ver)); + } + + ASSERT_TSDB_FSET_NFILES_VALID(pSet); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + buf = tsdbDecodeSDFile(buf, TSDB_DFILE_IN_SET(pSet, ftype), sfver); + } + return buf; +} + +int tsdbEncodeDFileSetEx(void **buf, SDFileSet *pSet) { + int tlen = 0; + + tlen += taosEncodeFixedI32(buf, pSet->fid); + tlen += taosEncodeFixedU16(buf, pSet->ver); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + tlen += tsdbEncodeSDFileEx(buf, TSDB_DFILE_IN_SET(pSet, ftype)); + } + + return tlen; +} + +void *tsdbDecodeDFileSetEx(void *buf, SDFileSet *pSet) { + int32_t fid; + + buf = taosDecodeFixedI32(buf, &(fid)); + buf = taosDecodeFixedU16(buf, &(pSet->ver)); + pSet->fid = fid; + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + buf = tsdbDecodeSDFileEx(buf, TSDB_DFILE_IN_SET(pSet, ftype)); + } + return buf; +} + +int tsdbApplyDFileSetChange(SDFileSet *from, SDFileSet *to) { + uint8_t nFilesFrom = from ? tsdbGetNFiles(from) : 0; + uint8_t nFilesTo = to ? tsdbGetNFiles(to) : 0; + for (TSDB_FILE_T ftype = 0; ftype < MAX(nFilesFrom, nFilesTo); ftype++) { + SDFile *pDFileFrom = ftype < nFilesFrom ? TSDB_DFILE_IN_SET(from, ftype) : NULL; + SDFile *pDFileTo = ftype < nFilesTo ? TSDB_DFILE_IN_SET(to, ftype) : NULL; + if (tsdbApplyDFileChange(pDFileFrom, pDFileTo) < 0) { + return -1; + } + } + + return 0; +} + +int tsdbCreateDFileSet(SDFileSet *pSet, bool updateHeader) { + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + if (tsdbCreateDFile(TSDB_DFILE_IN_SET(pSet, ftype), updateHeader, ftype) < 0) { + tsdbCloseDFileSet(pSet); + tsdbRemoveDFileSet(pSet); + return -1; + } + } + + return 0; +} + +int tsdbUpdateDFileSetHeader(SDFileSet *pSet) { + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + if (tsdbUpdateDFileHeader(TSDB_DFILE_IN_SET(pSet, ftype)) < 0) { + return -1; + } + } + return 0; +} + +int tsdbScanAndTryFixDFileSet(STsdb *pRepo, SDFileSet *pSet) { + ASSERT_TSDB_FSET_NFILES_VALID(pSet); + for (TSDB_FILE_T ftype = 0; ftype < tsdbGetNFiles(pSet); ftype++) { + if (tsdbScanAndTryFixDFile(pRepo, TSDB_DFILE_IN_SET(pSet, ftype)) < 0) { + return -1; + } + } + return 0; +} + +int tsdbParseDFilename(const char *fname, int *vid, int *fid, TSDB_FILE_T *ftype, uint32_t *_version) { + char *p = NULL; + *_version = 0; + *ftype = TSDB_FILE_MAX; + + sscanf(fname, "v%df%d.%m[a-z]-ver%" PRIu32, vid, fid, &p, _version); + for (TSDB_FILE_T i = 0; i < TSDB_FILE_MAX; i++) { + if (strcmp(p, TSDB_FNAME_SUFFIX[i]) == 0) { + *ftype = i; + break; + } + } + + tfree(p); + return 0; +} + +static void tsdbGetFilename(int vid, int fid, uint32_t ver, TSDB_FILE_T ftype, char *fname) { + ASSERT(ftype != TSDB_FILE_MAX); + + if (ftype < TSDB_FILE_MAX) { + if (ver == 0) { + snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data/v%df%d.%s", vid, vid, fid, TSDB_FNAME_SUFFIX[ftype]); + } else { + snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data/v%df%d.%s-ver%" PRIu32, vid, vid, fid, + TSDB_FNAME_SUFFIX[ftype], ver); + } + } else { + if (ver == 0) { + snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/%s", vid, TSDB_FNAME_SUFFIX[ftype]); + } else { + snprintf(fname, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/%s-ver%" PRIu32, vid, TSDB_FNAME_SUFFIX[ftype], ver); + } + } +} diff --git a/source/dnode/vnode/tsdb2/src/tsdbMain.c b/source/dnode/vnode/tsdb2/src/tsdbMain.c new file mode 100644 index 0000000000..bc5357f6af --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbMain.c @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +// no test file errors here +#include "taosdef.h" +#include "tsdbint.h" +#include "tthread.h" +#include "ttimer.h" + +#define IS_VALID_PRECISION(precision) \ + (((precision) >= TSDB_TIME_PRECISION_MILLI) && ((precision) <= TSDB_TIME_PRECISION_NANO)) +#define TSDB_DEFAULT_COMPRESSION TWO_STAGE_COMP +#define IS_VALID_COMPRESSION(compression) (((compression) >= NO_COMPRESSION) && ((compression) <= TWO_STAGE_COMP)) + +static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg); +static STsdb * tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH); +static void tsdbFreeRepo(STsdb *pRepo); +static void tsdbStartStream(STsdb *pRepo); +static void tsdbStopStream(STsdb *pRepo); +static int tsdbRestoreLastColumns(STsdb *pRepo, STable *pTable, SReadH *pReadh); +static int tsdbRestoreLastRow(STsdb *pRepo, STable *pTable, SReadH *pReadh, SBlockIdx *pIdx); + +STsdb *tsdbOpen(const char *path, STsdbCfg *pCfg) { + STsdb * pTsdb; + STsdbCfg config = *pCfg; + + terrno = TSDB_CODE_SUCCESS; + + // Check and set default configurations + if (tsdbCheckAndSetDefaultCfg(&config) < 0) { + tsdbError("vgId:%d failed to open TSDB repository since %s", config.tsdbId, tstrerror(terrno)); + return NULL; + } + + // Create new TSDB object + if ((pTsdb = tsdbNewRepo(&config, pAppH)) == NULL) { + tsdbError("vgId:%d failed to open TSDB repository while creating TSDB object since %s", config.tsdbId, + tstrerror(terrno)); + return NULL; + } + + // Open meta + if (tsdbOpenMeta(pTsdb) < 0) { + tsdbError("vgId:%d failed to open TSDB repository while opening Meta since %s", config.tsdbId, tstrerror(terrno)); + tsdbClose(pTsdb, false); + return NULL; + } + + if (tsdbOpenFS(pTsdb) < 0) { + tsdbError("vgId:%d failed to open TSDB repository while opening FS since %s", config.tsdbId, tstrerror(terrno)); + tsdbClose(pTsdb, false); + return NULL; + } + + // TODO: Restore information from data + if ((!(pTsdb->state & TSDB_STATE_BAD_DATA)) && tsdbRestoreInfo(pTsdb) < 0) { + tsdbError("vgId:%d failed to open TSDB repository while restore info since %s", config.tsdbId, tstrerror(terrno)); + tsdbClose(pTsdb, false); + return NULL; + } + + pTsdb->mergeBuf = NULL; + + tsdbStartStream(pTsdb); + + tsdbDebug("vgId:%d, TSDB repository opened", REPO_ID(pTsdb)); + + return pTsdb; +} + +// Note: all working thread and query thread must stopped when calling this function +int tsdbClose(STsdb *repo, int toCommit) { + if (repo == NULL) return 0; + + STsdb *pRepo = repo; + int vgId = REPO_ID(pRepo); + + terrno = TSDB_CODE_SUCCESS; + + tsdbStopStream(pRepo); + if (pRepo->pthread) { + taosDestoryThread(pRepo->pthread); + pRepo->pthread = NULL; + } + + if (toCommit) { + tsdbSyncCommit(repo); + } + + tsem_wait(&(pRepo->readyToCommit)); + + tsdbUnRefMemTable(pRepo, pRepo->mem); + tsdbUnRefMemTable(pRepo, pRepo->imem); + pRepo->mem = NULL; + pRepo->imem = NULL; + + tsdbCloseFS(pRepo); + tsdbCloseMeta(pRepo); + tsdbFreeRepo(pRepo); + tsdbDebug("vgId:%d repository is closed", vgId); + + if (terrno != TSDB_CODE_SUCCESS) { + return -1; + } else { + return 0; + } +} + +STsdbCfg *tsdbGetCfg(const STsdb *repo) { + ASSERT(repo != NULL); + return &((STsdb *)repo)->config; +} + +int tsdbLockRepo(STsdb *pRepo) { + int code = pthread_mutex_lock(&pRepo->mutex); + if (code != 0) { + tsdbError("vgId:%d failed to lock tsdb since %s", REPO_ID(pRepo), strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + pRepo->repoLocked = true; + return 0; +} + +int tsdbUnlockRepo(STsdb *pRepo) { + ASSERT(IS_REPO_LOCKED(pRepo)); + pRepo->repoLocked = false; + int code = pthread_mutex_unlock(&pRepo->mutex); + if (code != 0) { + tsdbError("vgId:%d failed to unlock tsdb since %s", REPO_ID(pRepo), strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + return 0; +} + +// int tsdbCheckWal(STsdbRepo *pRepo, uint32_t walSize) { // MB +// STsdbCfg *pCfg = &(pRepo->config); +// if ((walSize > tsdbWalFlushSize) && (walSize > (pCfg->totalBlocks / 2 * pCfg->cacheBlockSize))) { +// if (tsdbAsyncCommit(pRepo) < 0) return -1; +// } +// return 0; +// } + +// int tsdbCheckCommit(STsdb *pRepo) { +// ASSERT(pRepo->mem != NULL); +// STsdbCfg *pCfg = &(pRepo->config); + +// STsdbBufBlock *pBufBlock = tsdbGetCurrBufBlock(pRepo); +// ASSERT(pBufBlock != NULL); +// if ((pRepo->mem->extraBuffList != NULL) || +// ((listNEles(pRepo->mem->bufBlockList) >= pCfg->totalBlocks / 3) && (pBufBlock->remain < TSDB_BUFFER_RESERVE))) +// { +// // trigger commit +// if (tsdbAsyncCommit(pRepo) < 0) return -1; +// } +// return 0; +// } + +STsdbMeta *tsdbGetMeta(STsdb *pRepo) { return pRepo->tsdbMeta; } + +STsdbRepoInfo *tsdbGetStatus(STsdb *pRepo) { return NULL; } + +int tsdbGetState(STsdb *repo) { return repo->state; } + +int8_t tsdbGetCompactState(STsdb *repo) { return (int8_t)(repo->compactState); } + +void tsdbReportStat(void *repo, int64_t *totalPoints, int64_t *totalStorage, int64_t *compStorage) { + ASSERT(repo != NULL); + STsdb *pRepo = repo; + *totalPoints = pRepo->stat.pointsWritten; + *totalStorage = pRepo->stat.totalStorage; + *compStorage = pRepo->stat.compStorage; +} + +int32_t tsdbConfigRepo(STsdb *repo, STsdbCfg *pCfg) { + // TODO: think about multithread cases + if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1; + + STsdbCfg *pRCfg = &repo->config; + + ASSERT(pRCfg->tsdbId == pCfg->tsdbId); + ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize); + ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile); + ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock); + ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock); + ASSERT(pRCfg->precision == pCfg->precision); + + bool configChanged = false; + if (pRCfg->compression != pCfg->compression) { + configChanged = true; + } + if (pRCfg->keep != pCfg->keep) { + configChanged = true; + } + if (pRCfg->keep1 != pCfg->keep1) { + configChanged = true; + } + if (pRCfg->keep2 != pCfg->keep2) { + configChanged = true; + } + if (pRCfg->cacheLastRow != pCfg->cacheLastRow) { + configChanged = true; + } + if (pRCfg->totalBlocks != pCfg->totalBlocks) { + configChanged = true; + } + + if (!configChanged) { + tsdbError("vgId:%d no config changed", REPO_ID(repo)); + } + + int code = pthread_mutex_lock(&repo->save_mutex); + if (code != 0) { + tsdbError("vgId:%d failed to lock tsdb save config mutex since %s", REPO_ID(repo), strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + + STsdbCfg *pSaveCfg = &repo->save_config; + *pSaveCfg = repo->config; + + pSaveCfg->compression = pCfg->compression; + pSaveCfg->keep = pCfg->keep; + pSaveCfg->keep1 = pCfg->keep1; + pSaveCfg->keep2 = pCfg->keep2; + pSaveCfg->cacheLastRow = pCfg->cacheLastRow; + pSaveCfg->totalBlocks = pCfg->totalBlocks; + + tsdbInfo("vgId:%d old config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)", REPO_ID(repo), + pRCfg->compression, pRCfg->keep, pRCfg->keep1, pRCfg->keep2, pRCfg->cacheLastRow, pRCfg->totalBlocks); + tsdbInfo("vgId:%d new config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)", REPO_ID(repo), + pSaveCfg->compression, pSaveCfg->keep, pSaveCfg->keep1, pSaveCfg->keep2, pSaveCfg->cacheLastRow, + pSaveCfg->totalBlocks); + + repo->config_changed = true; + + pthread_mutex_unlock(&repo->save_mutex); + + // schedule a commit msg and wait for the new config applied + tsdbSyncCommitConfig(repo); + + return 0; +#if 0 + STsdbRepo *pRepo = (STsdbRepo *)repo; + STsdbCfg config = pRepo->config; + STsdbCfg * pRCfg = &pRepo->config; + + if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1; + + ASSERT(pRCfg->tsdbId == pCfg->tsdbId); + ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize); + ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile); + ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock); + ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock); + ASSERT(pRCfg->precision == pCfg->precision); + + bool configChanged = false; + if (pRCfg->compression != pCfg->compression) { + tsdbAlterCompression(pRepo, pCfg->compression); + config.compression = pCfg->compression; + configChanged = true; + } + if (pRCfg->keep != pCfg->keep) { + if (tsdbAlterKeep(pRepo, pCfg->keep) < 0) { + tsdbError("vgId:%d failed to configure repo when alter keep since %s", REPO_ID(pRepo), tstrerror(terrno)); + config.keep = pCfg->keep; + return -1; + } + configChanged = true; + } + if (pRCfg->totalBlocks != pCfg->totalBlocks) { + tsdbAlterCacheTotalBlocks(pRepo, pCfg->totalBlocks); + config.totalBlocks = pCfg->totalBlocks; + configChanged = true; + } + if (pRCfg->cacheLastRow != pCfg->cacheLastRow) { + config.cacheLastRow = pCfg->cacheLastRow; + configChanged = true; + } + + if (configChanged) { + if (tsdbSaveConfig(pRepo->rootDir, &config) < 0) { + tsdbError("vgId:%d failed to configure repository while save config since %s", REPO_ID(pRepo), tstrerror(terrno)); + return -1; + } + } + + return 0; +#endif +} + +void tsdbGetRootDir(int repoid, char dirName[]) { snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb", repoid); } + +void tsdbGetDataDir(int repoid, char dirName[]) { + snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data", repoid); +} + +static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) { + // Check tsdbId + if (pCfg->tsdbId < 0) { + tsdbError("vgId:%d invalid vgroup ID", pCfg->tsdbId); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + + // Check precision + if (pCfg->precision == -1) { + pCfg->precision = TSDB_DEFAULT_PRECISION; + } else { + if (!IS_VALID_PRECISION(pCfg->precision)) { + tsdbError("vgId:%d invalid precision configuration %d", pCfg->tsdbId, pCfg->precision); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + } + + // Check compression + if (pCfg->compression == -1) { + pCfg->compression = TSDB_DEFAULT_COMPRESSION; + } else { + if (!IS_VALID_COMPRESSION(pCfg->compression)) { + tsdbError("vgId:%d invalid compression configuration %d", pCfg->tsdbId, pCfg->precision); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + } + + // Check daysPerFile + if (pCfg->daysPerFile == -1) { + pCfg->daysPerFile = TSDB_DEFAULT_DAYS_PER_FILE; + } else { + if (pCfg->daysPerFile < TSDB_MIN_DAYS_PER_FILE || pCfg->daysPerFile > TSDB_MAX_DAYS_PER_FILE) { + tsdbError( + "vgId:%d invalid daysPerFile configuration! daysPerFile %d TSDB_MIN_DAYS_PER_FILE %d TSDB_MAX_DAYS_PER_FILE " + "%d", + pCfg->tsdbId, pCfg->daysPerFile, TSDB_MIN_DAYS_PER_FILE, TSDB_MAX_DAYS_PER_FILE); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + } + + // Check minRowsPerFileBlock and maxRowsPerFileBlock + if (pCfg->minRowsPerFileBlock == -1) { + pCfg->minRowsPerFileBlock = TSDB_DEFAULT_MIN_ROW_FBLOCK; + } else { + if (pCfg->minRowsPerFileBlock < TSDB_MIN_MIN_ROW_FBLOCK || pCfg->minRowsPerFileBlock > TSDB_MAX_MIN_ROW_FBLOCK) { + tsdbError( + "vgId:%d invalid minRowsPerFileBlock configuration! minRowsPerFileBlock %d TSDB_MIN_MIN_ROW_FBLOCK %d " + "TSDB_MAX_MIN_ROW_FBLOCK %d", + pCfg->tsdbId, pCfg->minRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + } + + if (pCfg->maxRowsPerFileBlock == -1) { + pCfg->maxRowsPerFileBlock = TSDB_DEFAULT_MAX_ROW_FBLOCK; + } else { + if (pCfg->maxRowsPerFileBlock < TSDB_MIN_MAX_ROW_FBLOCK || pCfg->maxRowsPerFileBlock > TSDB_MAX_MAX_ROW_FBLOCK) { + tsdbError( + "vgId:%d invalid maxRowsPerFileBlock configuration! maxRowsPerFileBlock %d TSDB_MIN_MAX_ROW_FBLOCK %d " + "TSDB_MAX_MAX_ROW_FBLOCK %d", + pCfg->tsdbId, pCfg->maxRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + } + + if (pCfg->minRowsPerFileBlock > pCfg->maxRowsPerFileBlock) { + tsdbError("vgId:%d invalid configuration! minRowsPerFileBlock %d maxRowsPerFileBlock %d", pCfg->tsdbId, + pCfg->minRowsPerFileBlock, pCfg->maxRowsPerFileBlock); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + + // Check keep +#if 0 // already checked and set in mnodeSetDefaultDbCfg + if (pCfg->keep == -1) { + pCfg->keep = TSDB_DEFAULT_KEEP; + } else { + if (pCfg->keep < TSDB_MIN_KEEP || pCfg->keep > TSDB_MAX_KEEP) { + tsdbError( + "vgId:%d invalid keep configuration! keep %d TSDB_MIN_KEEP %d " + "TSDB_MAX_KEEP %d", + pCfg->tsdbId, pCfg->keep, TSDB_MIN_KEEP, TSDB_MAX_KEEP); + terrno = TSDB_CODE_TDB_INVALID_CONFIG; + return -1; + } + } + + if (pCfg->keep1 == 0) { + pCfg->keep1 = pCfg->keep; + } + + if (pCfg->keep2 == 0) { + pCfg->keep2 = pCfg->keep; + } +#endif + + int32_t keepMin = pCfg->keep1; + int32_t keepMid = pCfg->keep2; + int32_t keepMax = pCfg->keep; + + if (keepMin > keepMid) { + SWAP(keepMin, keepMid, int32_t); + } + if (keepMin > keepMax) { + SWAP(keepMin, keepMax, int32_t); + } + if (keepMid > keepMax) { + SWAP(keepMid, keepMax, int32_t); + } + + pCfg->keep = keepMax; + pCfg->keep1 = keepMin; + pCfg->keep2 = keepMid; + // update check + if (pCfg->update < TD_ROW_DISCARD_UPDATE || pCfg->update > TD_ROW_PARTIAL_UPDATE) + pCfg->update = TD_ROW_DISCARD_UPDATE; + + // update cacheLastRow + if (pCfg->cacheLastRow != 0) { + if (pCfg->cacheLastRow > 3) pCfg->cacheLastRow = 1; + } + return 0; +} + +static STsdb *tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH) { + STsdb *pRepo = (STsdb *)calloc(1, sizeof(*pRepo)); + if (pRepo == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + pRepo->state = TSDB_STATE_OK; + pRepo->code = TSDB_CODE_SUCCESS; + pRepo->compactState = 0; + pRepo->config = *pCfg; + if (pAppH) { + pRepo->appH = *pAppH; + } + pRepo->repoLocked = false; + pRepo->pthread = NULL; + + int code = pthread_mutex_init(&(pRepo->mutex), NULL); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + tsdbFreeRepo(pRepo); + return NULL; + } + + code = pthread_mutex_init(&(pRepo->save_mutex), NULL); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(code); + tsdbFreeRepo(pRepo); + return NULL; + } + pRepo->config_changed = false; + pRepo->cacheLastConfigVersion = 0; + + code = tsem_init(&(pRepo->readyToCommit), 0, 1); + if (code != 0) { + code = errno; + terrno = TAOS_SYSTEM_ERROR(code); + tsdbFreeRepo(pRepo); + return NULL; + } + + pRepo->tsdbMeta = tsdbNewMeta(pCfg); + if (pRepo->tsdbMeta == NULL) { + tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno)); + tsdbFreeRepo(pRepo); + return NULL; + } + + pRepo->fs = tsdbNewFS(pCfg); + if (pRepo->fs == NULL) { + tsdbError("vgId:%d failed to TSDB file system since %s", REPO_ID(pRepo), tstrerror(terrno)); + tsdbFreeRepo(pRepo); + return NULL; + } + + return pRepo; +} + +static void tsdbFreeRepo(STsdb *pRepo) { + if (pRepo) { + tsdbFreeFS(pRepo->fs); + tsdbFreeMeta(pRepo->tsdbMeta); + tsdbFreeMergeBuf(pRepo->mergeBuf); + // tsdbFreeMemTable(pRepo->mem); + // tsdbFreeMemTable(pRepo->imem); + tsem_destroy(&(pRepo->readyToCommit)); + pthread_mutex_destroy(&pRepo->mutex); + free(pRepo); + } +} + +static void tsdbStartStream(STsdb *pRepo) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + + for (int i = 0; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable && pTable->type == TSDB_STREAM_TABLE) { + pTable->cqhandle = + (*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data, + pTable->sql, tsdbGetTableSchemaImpl(pTable, false, false, -1, -1), 0); + } + } +} + +static void tsdbStopStream(STsdb *pRepo) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + + for (int i = 0; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable && pTable->type == TSDB_STREAM_TABLE) { + (*pRepo->appH.cqDropFunc)(pTable->cqhandle); + } + } +} + +static int tsdbRestoreLastColumns(STsdb *pRepo, STable *pTable, SReadH *pReadh) { + // tsdbInfo("tsdbRestoreLastColumns of table %s", pTable->name->data); + + STSchema *pSchema = tsdbGetTableLatestSchema(pTable); + if (pSchema == NULL) { + tsdbError("tsdbGetTableLatestSchema of table %s fail", pTable->name->data); + return 0; + } + + SBlock * pBlock; + int numColumns; + int32_t blockIdx; + SDataStatis *pBlockStatis = NULL; + // SMemRow row = NULL; + // restore last column data with last schema + + int err = 0; + + numColumns = schemaNCols(pSchema); + if (numColumns <= pTable->restoreColumnNum) { + pTable->hasRestoreLastColumn = true; + return 0; + } + if (pTable->lastColSVersion != schemaVersion(pSchema)) { + if (tsdbInitColIdCacheWithSchema(pTable, pSchema) < 0) { + return -1; + } + } + + // row = taosTMalloc(memRowMaxBytesFromSchema(pSchema)); + // if (row == NULL) { + // terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + // err = -1; + // goto out; + // } + + // memRowSetType(row, SMEM_ROW_DATA); + // tdInitDataRow(memRowDataBody(row), pSchema); + + // first load block index info + if (tsdbLoadBlockInfo(pReadh, NULL, NULL) < 0) { + err = -1; + goto out; + } + + pBlockStatis = calloc(numColumns, sizeof(SDataStatis)); + if (pBlockStatis == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + err = -1; + goto out; + } + memset(pBlockStatis, 0, numColumns * sizeof(SDataStatis)); + for (int32_t i = 0; i < numColumns; ++i) { + STColumn *pCol = schemaColAt(pSchema, i); + pBlockStatis[i].colId = pCol->colId; + } + + // load block from backward + SBlockIdx *pIdx = pReadh->pBlkIdx; + blockIdx = (int32_t)(pIdx->numOfBlocks - 1); + + while (numColumns > pTable->restoreColumnNum && blockIdx >= 0) { + bool loadStatisData = false; + pBlock = pReadh->pBlkInfo->blocks + blockIdx; + blockIdx -= 1; + + // load block data + if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) { + err = -1; + goto out; + } + + // file block with sub-blocks has no statistics data + if (pBlock->numOfSubBlocks <= 1) { + if (tsdbLoadBlockStatis(pReadh, pBlock) == 0) { + tsdbGetBlockStatis(pReadh, pBlockStatis, (int)numColumns, pBlock); + loadStatisData = true; + } + } + TSDB_WLOCK_TABLE(pTable); // lock when update pTable->lastCols[] + for (int16_t i = 0; i < numColumns && numColumns > pTable->restoreColumnNum; ++i) { + STColumn *pCol = schemaColAt(pSchema, i); + // ignore loaded columns + if (pTable->lastCols[i].bytes != 0) { + continue; + } + + // ignore block which has no not-null colId column + if (loadStatisData && pBlockStatis[i].numOfNull == pBlock->numOfRows) { + continue; + } + + // OK,let's load row from backward to get not-null column + for (int32_t rowId = pBlock->numOfRows - 1; rowId >= 0; rowId--) { + SDataCol * pDataCol = pReadh->pDCols[0]->cols + i; + const void *pColData = tdGetColDataOfRow(pDataCol, rowId); + // tdAppendColVal(memRowDataBody(row), pColData, pCol->type, pCol->offset); + // SDataCol *pDataCol = readh.pDCols[0]->cols + j; + // void *value = tdGetRowDataOfCol(memRowDataBody(row), (int8_t)pCol->type, TD_DATA_ROW_HEAD_SIZE + + // + // pCol->offset); + if (isNull(pColData, pCol->type)) { + continue; + } + + int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId); + if (idx == -1) { + tsdbError("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d fail", REPO_ID(pRepo), + pTable->name->data, pCol->colId); + continue; + } + // save not-null column + uint16_t bytes = IS_VAR_DATA_TYPE(pCol->type) ? varDataTLen(pColData) : pCol->bytes; + SDataCol *pLastCol = &(pTable->lastCols[idx]); + pLastCol->pData = malloc(bytes); + pLastCol->bytes = bytes; + pLastCol->colId = pCol->colId; + memcpy(pLastCol->pData, pColData, bytes); + + // save row ts(in column 0) + pDataCol = pReadh->pDCols[0]->cols + 0; + // pCol = schemaColAt(pSchema, 0); + // tdAppendColVal(memRowDataBody(row), tdGetColDataOfRow(pDataCol, rowId), pCol->type, pCol->offset); + // pLastCol->ts = memRowKey(row); + pLastCol->ts = tdGetKey(*(TKEY *)(tdGetColDataOfRow(pDataCol, rowId))); + + pTable->restoreColumnNum += 1; + + tsdbDebug("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d, %" PRId64, REPO_ID(pRepo), + pTable->name->data, pLastCol->colId, pLastCol->ts); + break; + } + } + TSDB_WUNLOCK_TABLE(pTable); + } + +out: + // taosTZfree(row); + tfree(pBlockStatis); + + if (err == 0 && numColumns <= pTable->restoreColumnNum) { + pTable->hasRestoreLastColumn = true; + } + + return err; +} + +static int tsdbRestoreLastRow(STsdb *pRepo, STable *pTable, SReadH *pReadh, SBlockIdx *pIdx) { + ASSERT(pTable->lastRow == NULL); + if (tsdbLoadBlockInfo(pReadh, NULL, NULL) < 0) { + return -1; + } + + SBlock *pBlock = pReadh->pBlkInfo->blocks + pIdx->numOfBlocks - 1; + + if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) { + return -1; + } + + // Get the data in row + + STSchema *pSchema = tsdbGetTableSchema(pTable); + SMemRow lastRow = taosTMalloc(memRowMaxBytesFromSchema(pSchema)); + if (lastRow == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + memRowSetType(lastRow, SMEM_ROW_DATA); + tdInitDataRow(memRowDataBody(lastRow), pSchema); + for (int icol = 0; icol < schemaNCols(pSchema); icol++) { + STColumn *pCol = schemaColAt(pSchema, icol); + SDataCol *pDataCol = pReadh->pDCols[0]->cols + icol; + tdAppendColVal(memRowDataBody(lastRow), tdGetColDataOfRow(pDataCol, pBlock->numOfRows - 1), pCol->type, + pCol->offset); + } + + TSKEY lastKey = memRowKey(lastRow); + + // during the load data in file, new data would be inserted and last row has been updated + TSDB_WLOCK_TABLE(pTable); + if (pTable->lastRow == NULL) { + pTable->lastKey = lastKey; + pTable->lastRow = lastRow; + TSDB_WUNLOCK_TABLE(pTable); + } else { + TSDB_WUNLOCK_TABLE(pTable); + taosTZfree(lastRow); + } + + return 0; +} + +int tsdbRestoreInfo(STsdb *pRepo) { + SFSIter fsiter; + SReadH readh; + SDFileSet *pSet; + STsdbMeta *pMeta = pRepo->tsdbMeta; + STsdbCfg * pCfg = REPO_CFG(pRepo); + + if (tsdbInitReadH(&readh, pRepo) < 0) { + return -1; + } + + tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD); + + if (CACHE_LAST_NULL_COLUMN(pCfg)) { + for (int i = 1; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable == NULL) continue; + pTable->restoreColumnNum = 0; + pTable->hasRestoreLastColumn = false; + } + } + + while ((pSet = tsdbFSIterNext(&fsiter)) != NULL) { + if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) { + tsdbDestroyReadH(&readh); + return -1; + } + + if (tsdbLoadBlockIdx(&readh) < 0) { + tsdbDestroyReadH(&readh); + return -1; + } + + for (int i = 1; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable == NULL) continue; + + // tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data); + + if (tsdbSetReadTable(&readh, pTable) < 0) { + tsdbDestroyReadH(&readh); + return -1; + } + + TSKEY lastKey = tsdbGetTableLastKeyImpl(pTable); + SBlockIdx *pIdx = readh.pBlkIdx; + if (pIdx && lastKey < pIdx->maxKey) { + pTable->lastKey = pIdx->maxKey; + + if (CACHE_LAST_ROW(pCfg) && tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) { + tsdbDestroyReadH(&readh); + return -1; + } + } + + // restore NULL columns + if (pIdx && CACHE_LAST_NULL_COLUMN(pCfg) && !pTable->hasRestoreLastColumn) { + if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) { + tsdbDestroyReadH(&readh); + return -1; + } + } + } + } + + tsdbDestroyReadH(&readh); + + // if (CACHE_LAST_NULL_COLUMN(pCfg)) { + // atomic_store_8(&pRepo->hasCachedLastColumn, 1); + // } + + return 0; +} + +int32_t tsdbLoadLastCache(STsdb *pRepo, STable *pTable) { + SFSIter fsiter; + SReadH readh; + SDFileSet *pSet; + int cacheLastRowTableNum = 0; + int cacheLastColTableNum = 0; + + bool cacheLastRow = CACHE_LAST_ROW(&(pRepo->config)); + bool cacheLastCol = CACHE_LAST_NULL_COLUMN(&(pRepo->config)); + + tsdbDebug("tsdbLoadLastCache for %s, cacheLastRow:%d, cacheLastCol:%d", pTable->name->data, cacheLastRow, + cacheLastCol); + + pTable->cacheLastConfigVersion = pRepo->cacheLastConfigVersion; + + if (!cacheLastRow && pTable->lastRow != NULL) { + taosTZfree(pTable->lastRow); + pTable->lastRow = NULL; + } + if (!cacheLastCol && pTable->lastCols != NULL) { + tsdbFreeLastColumns(pTable); + } + + if (!cacheLastRow && !cacheLastCol) { + return 0; + } + + cacheLastRowTableNum = (cacheLastRow && pTable->lastRow == NULL) ? 1 : 0; + cacheLastColTableNum = (cacheLastCol && pTable->lastCols == NULL) ? 1 : 0; + + if (cacheLastRowTableNum == 0 && cacheLastColTableNum == 0) { + return 0; + } + + if (tsdbInitReadH(&readh, pRepo) < 0) { + return -1; + } + + tsdbRLockFS(REPO_FS(pRepo)); + tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD); + + while ((cacheLastRowTableNum > 0 || cacheLastColTableNum > 0) && (pSet = tsdbFSIterNext(&fsiter)) != NULL) { + if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) { + tsdbUnLockFS(REPO_FS(pRepo)); + tsdbDestroyReadH(&readh); + return -1; + } + + if (tsdbLoadBlockIdx(&readh) < 0) { + tsdbUnLockFS(REPO_FS(pRepo)); + tsdbDestroyReadH(&readh); + return -1; + } + + // tsdbDebug("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data); + + if (tsdbSetReadTable(&readh, pTable) < 0) { + tsdbUnLockFS(REPO_FS(pRepo)); + tsdbDestroyReadH(&readh); + return -1; + } + + SBlockIdx *pIdx = readh.pBlkIdx; + + if (pIdx && (cacheLastRowTableNum > 0) && (pTable->lastRow == NULL)) { + if (tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) { + tsdbUnLockFS(REPO_FS(pRepo)); + tsdbDestroyReadH(&readh); + return -1; + } + cacheLastRowTableNum -= 1; + } + + // restore NULL columns + if (pIdx && (cacheLastColTableNum > 0) && !pTable->hasRestoreLastColumn) { + if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) { + tsdbUnLockFS(REPO_FS(pRepo)); + tsdbDestroyReadH(&readh); + return -1; + } + if (pTable->hasRestoreLastColumn) { + cacheLastColTableNum -= 1; + } + } + } + + tsdbUnLockFS(REPO_FS(pRepo)); + tsdbDestroyReadH(&readh); + + return 0; +} + +UNUSED_FUNC int tsdbCacheLastData(STsdb *pRepo, STsdbCfg *oldCfg) { + bool cacheLastRow = false, cacheLastCol = false; + SFSIter fsiter; + SReadH readh; + SDFileSet *pSet; + STsdbMeta *pMeta = pRepo->tsdbMeta; + int tableNum = 0; + int maxTableIdx = 0; + int cacheLastRowTableNum = 0; + int cacheLastColTableNum = 0; + + bool need_free_last_row = CACHE_LAST_ROW(oldCfg) && !CACHE_LAST_ROW(&(pRepo->config)); + bool need_free_last_col = CACHE_LAST_NULL_COLUMN(oldCfg) && !CACHE_LAST_NULL_COLUMN(&(pRepo->config)); + + if (CACHE_LAST_ROW(&(pRepo->config)) || CACHE_LAST_NULL_COLUMN(&(pRepo->config))) { + tsdbInfo("tsdbCacheLastData cache last data since cacheLast option changed"); + cacheLastRow = !CACHE_LAST_ROW(oldCfg) && CACHE_LAST_ROW(&(pRepo->config)); + cacheLastCol = !CACHE_LAST_NULL_COLUMN(oldCfg) && CACHE_LAST_NULL_COLUMN(&(pRepo->config)); + } + + // calc max table idx and table num + for (int i = 1; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable == NULL) continue; + tableNum += 1; + maxTableIdx = i; + if (cacheLastCol) { + pTable->restoreColumnNum = 0; + pTable->hasRestoreLastColumn = false; + } + } + + // if close last option,need to free data + if (need_free_last_row || need_free_last_col) { + // if (need_free_last_col) { + // atomic_store_8(&pRepo->hasCachedLastColumn, 0); + // } + tsdbInfo("free cache last data since cacheLast option changed"); + for (int i = 1; i <= maxTableIdx; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable == NULL) continue; + if (need_free_last_row) { + taosTZfree(pTable->lastRow); + pTable->lastRow = NULL; + } + if (need_free_last_col) { + tsdbFreeLastColumns(pTable); + pTable->hasRestoreLastColumn = false; + } + } + } + + if (!cacheLastRow && !cacheLastCol) { + return 0; + } + + cacheLastRowTableNum = cacheLastRow ? tableNum : 0; + cacheLastColTableNum = cacheLastCol ? tableNum : 0; + + if (tsdbInitReadH(&readh, pRepo) < 0) { + return -1; + } + + tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD); + + while ((pSet = tsdbFSIterNext(&fsiter)) != NULL && (cacheLastRowTableNum > 0 || cacheLastColTableNum > 0)) { + if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) { + tsdbDestroyReadH(&readh); + return -1; + } + + if (tsdbLoadBlockIdx(&readh) < 0) { + tsdbDestroyReadH(&readh); + return -1; + } + + for (int i = 1; i <= maxTableIdx; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable == NULL) continue; + + // tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data); + + if (tsdbSetReadTable(&readh, pTable) < 0) { + tsdbDestroyReadH(&readh); + return -1; + } + + SBlockIdx *pIdx = readh.pBlkIdx; + + if (pIdx && cacheLastRowTableNum > 0 && pTable->lastRow == NULL) { + pTable->lastKey = pIdx->maxKey; + + if (tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) { + tsdbDestroyReadH(&readh); + return -1; + } + cacheLastRowTableNum -= 1; + } + + // restore NULL columns + if (pIdx && cacheLastColTableNum > 0 && !pTable->hasRestoreLastColumn) { + if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) { + tsdbDestroyReadH(&readh); + return -1; + } + if (pTable->hasRestoreLastColumn) { + cacheLastColTableNum -= 1; + } + } + } + } + + tsdbDestroyReadH(&readh); + + // if (cacheLastCol) { + // atomic_store_8(&pRepo->hasCachedLastColumn, 1); + // } + + return 0; +} diff --git a/source/dnode/vnode/tsdb2/src/tsdbMemTable.c b/source/dnode/vnode/tsdb2/src/tsdbMemTable.c new file mode 100644 index 0000000000..6a868258e1 --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbMemTable.c @@ -0,0 +1,1006 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#if 0 + +#include "tdataformat.h" +#include "tfunctional.h" +#include "tsdbRowMergeBuf.h" +#include "tsdbint.h" +#include "tskiplist.h" +#include "ttime.h" + +#define TSDB_DATA_SKIPLIST_LEVEL 5 +#define TSDB_MAX_INSERT_BATCH 512 + +static SMemTable * tsdbNewMemTable(STsdb *pRepo); +static void tsdbFreeMemTable(SMemTable *pMemTable); +static STableData *tsdbNewTableData(STsdbCfg *pCfg, STable *pTable); +static void tsdbFreeTableData(STableData *pTableData); +static char * tsdbGetTsTupleKey(const void *data); +static int tsdbAdjustMemMaxTables(SMemTable *pMemTable, int maxTables); +static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row); +static int tsdbScanAndConvertSubmitMsg(STsdb *pRepo, SSubmitMsg *pMsg); +static int tsdbInsertDataToTable(STsdb *pRepo, SSubmitBlk *pBlock, int32_t *affectedrows); +static int tsdbCheckTableSchema(STsdb *pRepo, SSubmitBlk *pBlock, STable *pTable); +static int tsdbUpdateTableLatestInfo(STsdb *pRepo, STable *pTable, SMemRow row); + +static FORCE_INLINE int tsdbCheckRowRange(STsdb *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey, + TSKEY now); + +int32_t tsdbInsertData(STsdb *repo, SSubmitMsg *pMsg, SShellSubmitRspMsg *pRsp) { + STsdb * pRepo = repo; + SSubmitMsgIter msgIter = {0}; + SSubmitBlk * pBlock = NULL; + int32_t affectedrows = 0, numOfRows = 0; + + if (tsdbScanAndConvertSubmitMsg(pRepo, pMsg) < 0) { + if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) { + tsdbError("vgId:%d failed to insert data since %s", REPO_ID(pRepo), tstrerror(terrno)); + } + return -1; + } + + tInitSubmitMsgIter(pMsg, &msgIter); + while (true) { + tGetSubmitMsgNext(&msgIter, &pBlock); + if (pBlock == NULL) break; + if (tsdbInsertDataToTable(pRepo, pBlock, &affectedrows) < 0) { + return -1; + } + numOfRows += pBlock->numOfRows; + } + + if (pRsp != NULL) { + pRsp->affectedRows = htonl(affectedrows); + pRsp->numOfRows = htonl(numOfRows); + } + + if (tsdbCheckCommit(pRepo) < 0) return -1; + return 0; +} + +// ---------------- INTERNAL FUNCTIONS ---------------- +int tsdbRefMemTable(STsdb *pRepo, SMemTable *pMemTable) { + if (pMemTable == NULL) return 0; + int ref = T_REF_INC(pMemTable); + tsdbDebug("vgId:%d ref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref); + return 0; +} + +// Need to lock the repository +int tsdbUnRefMemTable(STsdb *pRepo, SMemTable *pMemTable) { + if (pMemTable == NULL) return 0; + + int ref = T_REF_DEC(pMemTable); + tsdbDebug("vgId:%d unref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref); + if (ref == 0) { + STsdbBufPool *pBufPool = pRepo->pPool; + + SListNode *pNode = NULL; + bool addNew = false; + if (tsdbLockRepo(pRepo) < 0) return -1; + while ((pNode = tdListPopHead(pMemTable->bufBlockList)) != NULL) { + if (pBufPool->nRecycleBlocks > 0) { + tsdbRecycleBufferBlock(pBufPool, pNode, false); + pBufPool->nRecycleBlocks -= 1; + } else { + if (pBufPool->nElasticBlocks > 0 && listNEles(pBufPool->bufBlockList) > 2) { + tsdbRecycleBufferBlock(pBufPool, pNode, true); + } else { + tdListAppendNode(pBufPool->bufBlockList, pNode); + addNew = true; + } + } + } + if (addNew) { + int code = pthread_cond_signal(&pBufPool->poolNotEmpty); + if (code != 0) { + if (tsdbUnlockRepo(pRepo) < 0) return -1; + tsdbError("vgId:%d failed to signal pool not empty since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + } + + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + for (int i = 0; i < pMemTable->maxTables; i++) { + if (pMemTable->tData[i] != NULL) { + tsdbFreeTableData(pMemTable->tData[i]); + } + } + + tdListDiscard(pMemTable->actList); + tdListDiscard(pMemTable->bufBlockList); + tsdbFreeMemTable(pMemTable); + } + return 0; +} + +int tsdbTakeMemSnapshot(STsdb *pRepo, SMemSnapshot *pSnapshot, SArray *pATable) { + memset(pSnapshot, 0, sizeof(*pSnapshot)); + + if (tsdbLockRepo(pRepo) < 0) return -1; + + pSnapshot->omem = pRepo->mem; + pSnapshot->imem = pRepo->imem; + tsdbRefMemTable(pRepo, pRepo->mem); + tsdbRefMemTable(pRepo, pRepo->imem); + + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + if (pSnapshot->omem) { + taosRLockLatch(&(pSnapshot->omem->latch)); + + pSnapshot->mem = &(pSnapshot->mtable); + + pSnapshot->mem->tData = (STableData **)calloc(pSnapshot->omem->maxTables, sizeof(STableData *)); + if (pSnapshot->mem->tData == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + taosRUnLockLatch(&(pSnapshot->omem->latch)); + tsdbUnRefMemTable(pRepo, pSnapshot->omem); + tsdbUnRefMemTable(pRepo, pSnapshot->imem); + pSnapshot->mem = NULL; + pSnapshot->imem = NULL; + pSnapshot->omem = NULL; + return -1; + } + + pSnapshot->mem->keyFirst = pSnapshot->omem->keyFirst; + pSnapshot->mem->keyLast = pSnapshot->omem->keyLast; + pSnapshot->mem->numOfRows = pSnapshot->omem->numOfRows; + pSnapshot->mem->maxTables = pSnapshot->omem->maxTables; + + for (size_t i = 0; i < taosArrayGetSize(pATable); i++) { + STable * pTable = *(STable **)taosArrayGet(pATable, i); + int32_t tid = TABLE_TID(pTable); + STableData *pTableData = (tid < pSnapshot->omem->maxTables) ? pSnapshot->omem->tData[tid] : NULL; + + if ((pTableData == NULL) || (TABLE_UID(pTable) != pTableData->uid)) continue; + + pSnapshot->mem->tData[tid] = pTableData; + T_REF_INC(pTableData); + } + + taosRUnLockLatch(&(pSnapshot->omem->latch)); + } + + tsdbDebug("vgId:%d take memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem); + return 0; +} + +void tsdbUnTakeMemSnapShot(STsdb *pRepo, SMemSnapshot *pSnapshot) { + tsdbDebug("vgId:%d untake memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem); + + if (pSnapshot->mem) { + ASSERT(pSnapshot->omem != NULL); + + for (size_t i = 0; i < pSnapshot->mem->maxTables; i++) { + STableData *pTableData = pSnapshot->mem->tData[i]; + if (pTableData) { + tsdbFreeTableData(pTableData); + } + } + tfree(pSnapshot->mem->tData); + + tsdbUnRefMemTable(pRepo, pSnapshot->omem); + } + + tsdbUnRefMemTable(pRepo, pSnapshot->imem); + + pSnapshot->mem = NULL; + pSnapshot->imem = NULL; + pSnapshot->omem = NULL; +} + +void *tsdbAllocBytes(STsdb *pRepo, int bytes) { + STsdbCfg * pCfg = &pRepo->config; + STsdbBufBlock *pBufBlock = NULL; + void * ptr = NULL; + + // Either allocate from buffer blocks or from SYSTEM memory pool + if (pRepo->mem == NULL) { + SMemTable *pMemTable = tsdbNewMemTable(pRepo); + if (pMemTable == NULL) return NULL; + pRepo->mem = pMemTable; + } + + ASSERT(pRepo->mem != NULL); + + pBufBlock = tsdbGetCurrBufBlock(pRepo); + if ((pRepo->mem->extraBuffList != NULL) || + ((listNEles(pRepo->mem->bufBlockList) >= pCfg->totalBlocks / 3) && (pBufBlock->remain < bytes))) { + // allocate from SYSTEM buffer pool + if (pRepo->mem->extraBuffList == NULL) { + pRepo->mem->extraBuffList = tdListNew(0); + if (pRepo->mem->extraBuffList == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + } + + ASSERT(pRepo->mem->extraBuffList != NULL); + SListNode *pNode = (SListNode *)malloc(sizeof(SListNode) + bytes); + if (pNode == NULL) { + if (listNEles(pRepo->mem->extraBuffList) == 0) { + tdListFree(pRepo->mem->extraBuffList); + pRepo->mem->extraBuffList = NULL; + } + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + TD_DLIST_NODE_NEXT(pNode) = TD_DLIST_NODE_PREV(pNode) = NULL; + tdListAppendNode(pRepo->mem->extraBuffList, pNode); + ptr = (void *)(pNode->data); + tsdbTrace("vgId:%d allocate %d bytes from SYSTEM buffer block", REPO_ID(pRepo), bytes); + } else { // allocate from TSDB buffer pool + if (pBufBlock == NULL || pBufBlock->remain < bytes) { + ASSERT(listNEles(pRepo->mem->bufBlockList) < pCfg->totalBlocks / 3); + if (tsdbLockRepo(pRepo) < 0) return NULL; + SListNode *pNode = tsdbAllocBufBlockFromPool(pRepo); + tdListAppendNode(pRepo->mem->bufBlockList, pNode); + if (tsdbUnlockRepo(pRepo) < 0) return NULL; + pBufBlock = tsdbGetCurrBufBlock(pRepo); + } + + ASSERT(pBufBlock->remain >= bytes); + ptr = POINTER_SHIFT(pBufBlock->data, pBufBlock->offset); + pBufBlock->offset += bytes; + pBufBlock->remain -= bytes; + tsdbTrace("vgId:%d allocate %d bytes from TSDB buffer block, nBlocks %d offset %d remain %d", REPO_ID(pRepo), bytes, + listNEles(pRepo->mem->bufBlockList), pBufBlock->offset, pBufBlock->remain); + } + + return ptr; +} + +int tsdbSyncCommitConfig(STsdb *pRepo) { + ASSERT(pRepo->config_changed == true); + tsem_wait(&(pRepo->readyToCommit)); + + if (pRepo->code != TSDB_CODE_SUCCESS) { + tsdbWarn("vgId:%d try to commit config when TSDB not in good state: %s", REPO_ID(pRepo), tstrerror(terrno)); + } + + if (tsdbLockRepo(pRepo) < 0) return -1; + tsdbScheduleCommit(pRepo, COMMIT_CONFIG_REQ); + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + tsem_wait(&(pRepo->readyToCommit)); + tsem_post(&(pRepo->readyToCommit)); + + if (pRepo->code != TSDB_CODE_SUCCESS) { + terrno = pRepo->code; + return -1; + } + + terrno = TSDB_CODE_SUCCESS; + return 0; +} + +int tsdbAsyncCommit(STsdb *pRepo) { + tsem_wait(&(pRepo->readyToCommit)); + + ASSERT(pRepo->imem == NULL); + if (pRepo->mem == NULL) { + tsem_post(&(pRepo->readyToCommit)); + return 0; + } + + if (pRepo->code != TSDB_CODE_SUCCESS) { + tsdbWarn("vgId:%d try to commit when TSDB not in good state: %s", REPO_ID(pRepo), tstrerror(terrno)); + } + + if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_START, TSDB_CODE_SUCCESS); + if (tsdbLockRepo(pRepo) < 0) return -1; + pRepo->imem = pRepo->mem; + pRepo->mem = NULL; + tsdbScheduleCommit(pRepo, COMMIT_REQ); + if (tsdbUnlockRepo(pRepo) < 0) return -1; + + return 0; +} + +int tsdbSyncCommit(STsdb *repo) { + STsdb *pRepo = repo; + + tsdbAsyncCommit(pRepo); + tsem_wait(&(pRepo->readyToCommit)); + tsem_post(&(pRepo->readyToCommit)); + + if (pRepo->code != TSDB_CODE_SUCCESS) { + terrno = pRepo->code; + return -1; + } else { + terrno = TSDB_CODE_SUCCESS; + return 0; + } +} + +/** + * This is an important function to load data or try to load data from memory skiplist iterator. + * + * This function load memory data until: + * 1. iterator ends + * 2. data key exceeds maxKey + * 3. rowsIncreased = rowsInserted - rowsDeleteSucceed >= maxRowsToRead + * 4. operations in pCols not exceeds its max capacity if pCols is given + * + * The function tries to procceed AS MUCH AS POSSIBLE. + */ +int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey, int maxRowsToRead, SDataCols *pCols, + TKEY *filterKeys, int nFilterKeys, bool keepDup, SMergeInfo *pMergeInfo) { + ASSERT(maxRowsToRead > 0 && nFilterKeys >= 0); + if (pIter == NULL) return 0; + STSchema * pSchema = NULL; + TSKEY rowKey = 0; + TSKEY fKey = 0; + bool isRowDel = false; + int filterIter = 0; + SMemRow row = NULL; + SMergeInfo mInfo; + + if (pMergeInfo == NULL) pMergeInfo = &mInfo; + + memset(pMergeInfo, 0, sizeof(*pMergeInfo)); + pMergeInfo->keyFirst = INT64_MAX; + pMergeInfo->keyLast = INT64_MIN; + if (pCols) tdResetDataCols(pCols); + + row = tsdbNextIterRow(pIter); + if (row == NULL || memRowKey(row) > maxKey) { + rowKey = INT64_MAX; + isRowDel = false; + } else { + rowKey = memRowKey(row); + isRowDel = memRowDeleted(row); + } + + if (filterIter >= nFilterKeys) { + fKey = INT64_MAX; + } else { + fKey = tdGetKey(filterKeys[filterIter]); + } + + while (true) { + if (fKey == INT64_MAX && rowKey == INT64_MAX) break; + + if (fKey < rowKey) { + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey); + + filterIter++; + if (filterIter >= nFilterKeys) { + fKey = INT64_MAX; + } else { + fKey = tdGetKey(filterKeys[filterIter]); + } + } else if (fKey > rowKey) { + if (isRowDel) { + pMergeInfo->rowsDeleteFailed++; + } else { + if (pMergeInfo->rowsInserted - pMergeInfo->rowsDeleteSucceed >= maxRowsToRead) break; + if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break; + pMergeInfo->rowsInserted++; + pMergeInfo->nOperations++; + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey); + tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row); + } + + tSkipListIterNext(pIter); + row = tsdbNextIterRow(pIter); + if (row == NULL || memRowKey(row) > maxKey) { + rowKey = INT64_MAX; + isRowDel = false; + } else { + rowKey = memRowKey(row); + isRowDel = memRowDeleted(row); + } + } else { + if (isRowDel) { + ASSERT(!keepDup); + if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break; + pMergeInfo->rowsDeleteSucceed++; + pMergeInfo->nOperations++; + tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row); + } else { + if (keepDup) { + if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break; + pMergeInfo->rowsUpdated++; + pMergeInfo->nOperations++; + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, rowKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, rowKey); + tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row); + } else { + pMergeInfo->keyFirst = MIN(pMergeInfo->keyFirst, fKey); + pMergeInfo->keyLast = MAX(pMergeInfo->keyLast, fKey); + } + } + + tSkipListIterNext(pIter); + row = tsdbNextIterRow(pIter); + if (row == NULL || memRowKey(row) > maxKey) { + rowKey = INT64_MAX; + isRowDel = false; + } else { + rowKey = memRowKey(row); + isRowDel = memRowDeleted(row); + } + + filterIter++; + if (filterIter >= nFilterKeys) { + fKey = INT64_MAX; + } else { + fKey = tdGetKey(filterKeys[filterIter]); + } + } + } + + return 0; +} + +// ---------------- LOCAL FUNCTIONS ---------------- +static SMemTable *tsdbNewMemTable(STsdb *pRepo) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + + SMemTable *pMemTable = (SMemTable *)calloc(1, sizeof(*pMemTable)); + if (pMemTable == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + pMemTable->keyFirst = INT64_MAX; + pMemTable->keyLast = 0; + pMemTable->numOfRows = 0; + + pMemTable->maxTables = pMeta->maxTables; + pMemTable->tData = (STableData **)calloc(pMemTable->maxTables, sizeof(STableData *)); + if (pMemTable->tData == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + pMemTable->actList = tdListNew(0); + if (pMemTable->actList == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + pMemTable->bufBlockList = tdListNew(sizeof(STsdbBufBlock *)); + if (pMemTable->bufBlockList == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + T_REF_INC(pMemTable); + + return pMemTable; + +_err: + tsdbFreeMemTable(pMemTable); + return NULL; +} + +static void tsdbFreeMemTable(SMemTable *pMemTable) { + if (pMemTable) { + ASSERT((pMemTable->bufBlockList == NULL) ? true : (listNEles(pMemTable->bufBlockList) == 0)); + ASSERT((pMemTable->actList == NULL) ? true : (listNEles(pMemTable->actList) == 0)); + + tdListFree(pMemTable->extraBuffList); + tdListFree(pMemTable->bufBlockList); + tdListFree(pMemTable->actList); + tfree(pMemTable->tData); + free(pMemTable); + } +} + +static STableData *tsdbNewTableData(STsdbCfg *pCfg, STable *pTable) { + STableData *pTableData = (STableData *)calloc(1, sizeof(*pTableData)); + if (pTableData == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + pTableData->uid = TABLE_UID(pTable); + pTableData->keyFirst = INT64_MAX; + pTableData->keyLast = 0; + pTableData->numOfRows = 0; + + uint8_t skipListCreateFlags; + if (pCfg->update == TD_ROW_DISCARD_UPDATE) + skipListCreateFlags = SL_DISCARD_DUP_KEY; + else + skipListCreateFlags = SL_UPDATE_DUP_KEY; + + pTableData->pData = + tSkipListCreate(TSDB_DATA_SKIPLIST_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, TYPE_BYTES[TSDB_DATA_TYPE_TIMESTAMP], + tkeyComparFn, skipListCreateFlags, tsdbGetTsTupleKey); + if (pTableData->pData == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + free(pTableData); + return NULL; + } + + T_REF_INC(pTableData); + + return pTableData; +} + +static void tsdbFreeTableData(STableData *pTableData) { + if (pTableData) { + int32_t ref = T_REF_DEC(pTableData); + if (ref == 0) { + tSkipListDestroy(pTableData->pData); + free(pTableData); + } + } +} + +static char *tsdbGetTsTupleKey(const void *data) { return memRowKeys((SMemRow)data); } + +static int tsdbAdjustMemMaxTables(SMemTable *pMemTable, int maxTables) { + ASSERT(pMemTable->maxTables < maxTables); + + STableData **pTableData = (STableData **)calloc(maxTables, sizeof(STableData *)); + if (pTableData == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + memcpy((void *)pTableData, (void *)pMemTable->tData, sizeof(STableData *) * pMemTable->maxTables); + + STableData **tData = pMemTable->tData; + + taosWLockLatch(&(pMemTable->latch)); + pMemTable->maxTables = maxTables; + pMemTable->tData = pTableData; + taosWUnLockLatch(&(pMemTable->latch)); + + tfree(tData); + + return 0; +} + +static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, SMemRow row) { + if (pCols) { + if (*ppSchema == NULL || schemaVersion(*ppSchema) != memRowVersion(row)) { + *ppSchema = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row), (int8_t)memRowType(row)); + if (*ppSchema == NULL) { + ASSERT(false); + return -1; + } + } + + tdAppendMemRowToDataCol(row, *ppSchema, pCols, true); + } + + return 0; +} + +static FORCE_INLINE int tsdbCheckRowRange(STsdb *pRepo, STable *pTable, SMemRow row, TSKEY minKey, TSKEY maxKey, + TSKEY now) { + TSKEY rowKey = memRowKey(row); + if (rowKey < minKey || rowKey > maxKey) { + tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " timestamp is out of range! now %" PRId64 " minKey %" PRId64 + " maxKey %" PRId64 " row key %" PRId64, + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), now, minKey, maxKey, + rowKey); + terrno = TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE; + return -1; + } + + return 0; +} + +static int tsdbScanAndConvertSubmitMsg(STsdb *pRepo, SSubmitMsg *pMsg) { + ASSERT(pMsg != NULL); + STsdbMeta * pMeta = pRepo->tsdbMeta; + SSubmitMsgIter msgIter = {0}; + SSubmitBlk * pBlock = NULL; + SSubmitBlkIter blkIter = {0}; + SMemRow row = NULL; + TSKEY now = taosGetTimestamp(pRepo->config.precision); + TSKEY minKey = now - tsTickPerDay[pRepo->config.precision] * pRepo->config.keep; + TSKEY maxKey = now + tsTickPerDay[pRepo->config.precision] * pRepo->config.daysPerFile; + + terrno = TSDB_CODE_SUCCESS; + pMsg->length = htonl(pMsg->length); + pMsg->numOfBlocks = htonl(pMsg->numOfBlocks); + + if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) return -1; + while (true) { + if (tGetSubmitMsgNext(&msgIter, &pBlock) < 0) return -1; + if (pBlock == NULL) break; + + pBlock->uid = htobe64(pBlock->uid); + pBlock->tid = htonl(pBlock->tid); + pBlock->sversion = htonl(pBlock->sversion); + pBlock->dataLen = htonl(pBlock->dataLen); + pBlock->schemaLen = htonl(pBlock->schemaLen); + pBlock->numOfRows = htons(pBlock->numOfRows); + + if (pBlock->tid <= 0 || pBlock->tid >= pMeta->maxTables) { + tsdbError("vgId:%d failed to get table to insert data, uid %" PRIu64 " tid %d", REPO_ID(pRepo), pBlock->uid, + pBlock->tid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + return -1; + } + + STable *pTable = pMeta->tables[pBlock->tid]; + if (pTable == NULL || TABLE_UID(pTable) != pBlock->uid) { + tsdbError("vgId:%d failed to get table to insert data, uid %" PRIu64 " tid %d", REPO_ID(pRepo), pBlock->uid, + pBlock->tid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + return -1; + } + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + tsdbError("vgId:%d invalid action trying to insert a super table %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable)); + terrno = TSDB_CODE_TDB_INVALID_ACTION; + return -1; + } + + // Check schema version and update schema if needed + if (tsdbCheckTableSchema(pRepo, pBlock, pTable) < 0) { + if (terrno == TSDB_CODE_TDB_TABLE_RECONFIGURE) { + continue; + } else { + return -1; + } + } + + tInitSubmitBlkIter(pBlock, &blkIter); + while ((row = tGetSubmitBlkNext(&blkIter)) != NULL) { + if (tsdbCheckRowRange(pRepo, pTable, row, minKey, maxKey, now) < 0) { + return -1; + } + } + } + + if (terrno != TSDB_CODE_SUCCESS) return -1; + return 0; +} + +// row1 has higher priority +static SMemRow tsdbInsertDupKeyMerge(SMemRow row1, SMemRow row2, STsdb *pRepo, STSchema **ppSchema1, + STSchema **ppSchema2, STable *pTable, int32_t *pPoints, SMemRow *pLastRow) { + // for compatiblity, duplicate key inserted when update=0 should be also calculated as affected rows! + if (row1 == NULL && row2 == NULL && pRepo->config.update == TD_ROW_DISCARD_UPDATE) { + (*pPoints)++; + return NULL; + } + + tsdbTrace("vgId:%d a row is %s table %s tid %d uid %" PRIu64 " key %" PRIu64, REPO_ID(pRepo), "updated in", + TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), memRowKey(row1)); + + if (row2 == NULL || pRepo->config.update != TD_ROW_PARTIAL_UPDATE) { + void *pMem = tsdbAllocBytes(pRepo, memRowTLen(row1)); + if (pMem == NULL) return NULL; + memRowCpy(pMem, row1); + (*pPoints)++; + *pLastRow = pMem; + return pMem; + } + + STSchema * pSchema1 = *ppSchema1; + STSchema * pSchema2 = *ppSchema2; + SMergeBuf *pBuf = &pRepo->mergeBuf; + int dv1 = memRowVersion(row1); + int dv2 = memRowVersion(row2); + if (pSchema1 == NULL || schemaVersion(pSchema1) != dv1) { + if (pSchema2 != NULL && schemaVersion(pSchema2) == dv1) { + *ppSchema1 = pSchema2; + } else { + *ppSchema1 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row1), (int8_t)memRowType(row1)); + } + pSchema1 = *ppSchema1; + } + + if (pSchema2 == NULL || schemaVersion(pSchema2) != dv2) { + if (schemaVersion(pSchema1) == dv2) { + pSchema2 = pSchema1; + } else { + *ppSchema2 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row2), (int8_t)memRowType(row2)); + pSchema2 = *ppSchema2; + } + } + + SMemRow tmp = tsdbMergeTwoRows(pBuf, row1, row2, pSchema1, pSchema2); + + void *pMem = tsdbAllocBytes(pRepo, memRowTLen(tmp)); + if (pMem == NULL) return NULL; + memRowCpy(pMem, tmp); + + (*pPoints)++; + *pLastRow = pMem; + return pMem; +} + +static void *tsdbInsertDupKeyMergePacked(void **args) { + return tsdbInsertDupKeyMerge(args[0], args[1], args[2], (STSchema **)&args[3], (STSchema **)&args[4], args[5], + args[6], args[7]); +} + +static void tsdbSetupSkipListHookFns(SSkipList *pSkipList, STsdb *pRepo, STable *pTable, int32_t *pPoints, + SMemRow *pLastRow) { + if (pSkipList->insertHandleFn == NULL) { + tGenericSavedFunc *dupHandleSavedFunc = genericSavedFuncInit((GenericVaFunc)&tsdbInsertDupKeyMergePacked, 9); + dupHandleSavedFunc->args[2] = pRepo; + dupHandleSavedFunc->args[3] = NULL; + dupHandleSavedFunc->args[4] = NULL; + dupHandleSavedFunc->args[5] = pTable; + pSkipList->insertHandleFn = dupHandleSavedFunc; + } + pSkipList->insertHandleFn->args[6] = pPoints; + pSkipList->insertHandleFn->args[7] = pLastRow; +} + +static int tsdbInsertDataToTable(STsdb *pRepo, SSubmitBlk *pBlock, int32_t *pAffectedRows) { + STsdbMeta * pMeta = pRepo->tsdbMeta; + int32_t points = 0; + STable * pTable = NULL; + SSubmitBlkIter blkIter = {0}; + SMemTable * pMemTable = NULL; + STableData * pTableData = NULL; + STsdbCfg * pCfg = &(pRepo->config); + + tInitSubmitBlkIter(pBlock, &blkIter); + if (blkIter.row == NULL) return 0; + TSKEY firstRowKey = memRowKey(blkIter.row); + + tsdbAllocBytes(pRepo, 0); + pMemTable = pRepo->mem; + + ASSERT(pMemTable != NULL); + ASSERT(pBlock->tid < pMeta->maxTables); + + pTable = pMeta->tables[pBlock->tid]; + + ASSERT(pTable != NULL && TABLE_UID(pTable) == pBlock->uid); + + if (TABLE_TID(pTable) >= pMemTable->maxTables) { + if (tsdbAdjustMemMaxTables(pMemTable, pMeta->maxTables) < 0) { + return -1; + } + } + pTableData = pMemTable->tData[TABLE_TID(pTable)]; + + if (pTableData == NULL || pTableData->uid != TABLE_UID(pTable)) { + if (pTableData != NULL) { + taosWLockLatch(&(pMemTable->latch)); + pMemTable->tData[TABLE_TID(pTable)] = NULL; + tsdbFreeTableData(pTableData); + taosWUnLockLatch(&(pMemTable->latch)); + } + + pTableData = tsdbNewTableData(pCfg, pTable); + if (pTableData == NULL) { + tsdbError("vgId:%d failed to insert data to table %s uid %" PRId64 " tid %d since %s", REPO_ID(pRepo), + TABLE_CHAR_NAME(pTable), TABLE_UID(pTable), TABLE_TID(pTable), tstrerror(terrno)); + return -1; + } + + pRepo->mem->tData[TABLE_TID(pTable)] = pTableData; + } + + ASSERT((pTableData != NULL) && pTableData->uid == TABLE_UID(pTable)); + + SMemRow lastRow = NULL; + int64_t osize = SL_SIZE(pTableData->pData); + tsdbSetupSkipListHookFns(pTableData->pData, pRepo, pTable, &points, &lastRow); + tSkipListPutBatchByIter(pTableData->pData, &blkIter, (iter_next_fn_t)tGetSubmitBlkNext); + int64_t dsize = SL_SIZE(pTableData->pData) - osize; + (*pAffectedRows) += points; + + if (lastRow != NULL) { + TSKEY lastRowKey = memRowKey(lastRow); + if (pMemTable->keyFirst > firstRowKey) pMemTable->keyFirst = firstRowKey; + pMemTable->numOfRows += dsize; + + if (pTableData->keyFirst > firstRowKey) pTableData->keyFirst = firstRowKey; + pTableData->numOfRows += dsize; + if (pMemTable->keyLast < lastRowKey) pMemTable->keyLast = lastRowKey; + if (pTableData->keyLast < lastRowKey) pTableData->keyLast = lastRowKey; + if (tsdbUpdateTableLatestInfo(pRepo, pTable, lastRow) < 0) { + return -1; + } + } + + STSchema *pSchema = tsdbGetTableSchemaByVersion(pTable, pBlock->sversion, -1); + pRepo->stat.pointsWritten += points * schemaNCols(pSchema); + pRepo->stat.totalStorage += points * schemaVLen(pSchema); + + return 0; +} + +static int tsdbCheckTableSchema(STsdb *pRepo, SSubmitBlk *pBlock, STable *pTable) { + ASSERT(pTable != NULL); + + STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); + int sversion = schemaVersion(pSchema); + + if (pBlock->sversion == sversion) { + return 0; + } else { + if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { // stream table is not allowed to change schema + terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; + return -1; + } + } + + if (pBlock->sversion > sversion) { // may need to update table schema + if (pBlock->schemaLen > 0) { + tsdbDebug( + "vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, update...", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion); + ASSERT(pBlock->schemaLen % sizeof(STColumn) == 0); + int numOfCols = pBlock->schemaLen / sizeof(STColumn); + STColumn *pTCol = (STColumn *)pBlock->data; + + STSchemaBuilder schemaBuilder = {0}; + if (tdInitTSchemaBuilder(&schemaBuilder, pBlock->sversion) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + tstrerror(terrno)); + return -1; + } + + for (int i = 0; i < numOfCols; i++) { + if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, htons(pTCol[i].colId), htons(pTCol[i].bytes)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + tstrerror(terrno)); + tdDestroyTSchemaBuilder(&schemaBuilder); + return -1; + } + } + + STSchema *pNSchema = tdGetSchemaFromBuilder(&schemaBuilder); + if (pNSchema == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tdDestroyTSchemaBuilder(&schemaBuilder); + return -1; + } + + tdDestroyTSchemaBuilder(&schemaBuilder); + tsdbUpdateTableSchema(pRepo, pTable, pNSchema, true); + } else { + tsdbDebug( + "vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, reconfigure...", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion); + terrno = TSDB_CODE_TDB_TABLE_RECONFIGURE; + return -1; + } + } else { + ASSERT(pBlock->sversion >= 0); + if (tsdbGetTableSchemaImpl(pTable, false, false, pBlock->sversion, -1) == NULL) { + tsdbError("vgId:%d invalid submit schema version %d to table %s tid %d from client", REPO_ID(pRepo), + pBlock->sversion, TABLE_CHAR_NAME(pTable), TABLE_TID(pTable)); + terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; + return -1; + } + } + + return 0; +} + +static void updateTableLatestColumn(STsdb *pRepo, STable *pTable, SMemRow row) { + tsdbDebug("vgId:%d updateTableLatestColumn, %s row version:%d", REPO_ID(pRepo), pTable->name->data, + memRowVersion(row)); + + STSchema *pSchema = tsdbGetTableLatestSchema(pTable); + if (tsdbUpdateLastColSchema(pTable, pSchema) < 0) { + return; + } + + pSchema = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row), (int8_t)memRowType(row)); + if (pSchema == NULL) { + return; + } + + SDataCol *pLatestCols = pTable->lastCols; + int32_t kvIdx = 0; + + for (int16_t j = 0; j < schemaNCols(pSchema); j++) { + STColumn *pTCol = schemaColAt(pSchema, j); + // ignore not exist colId + int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pTCol->colId); + if (idx == -1) { + continue; + } + + void *value = NULL; + + value = tdGetMemRowDataOfColEx(row, pTCol->colId, (int8_t)pTCol->type, + TD_DATA_ROW_HEAD_SIZE + pSchema->columns[j].offset, &kvIdx); + + if ((value == NULL) || isNull(value, pTCol->type)) { + continue; + } + // lock + TSDB_WLOCK_TABLE(pTable); + SDataCol *pDataCol = &(pLatestCols[idx]); + if (pDataCol->pData == NULL) { + pDataCol->pData = malloc(pTCol->bytes); + pDataCol->bytes = pTCol->bytes; + } else if (pDataCol->bytes < pTCol->bytes) { + pDataCol->pData = realloc(pDataCol->pData, pTCol->bytes); + pDataCol->bytes = pTCol->bytes; + } + // the actual value size + uint16_t bytes = IS_VAR_DATA_TYPE(pTCol->type) ? varDataTLen(value) : pTCol->bytes; + // the actual data size CANNOT larger than column size + assert(pTCol->bytes >= bytes); + memcpy(pDataCol->pData, value, bytes); + // tsdbInfo("updateTableLatestColumn vgId:%d cache column %d for %d,%s", REPO_ID(pRepo), j, pDataCol->bytes, + // (char*)pDataCol->pData); + pDataCol->ts = memRowKey(row); + // unlock + TSDB_WUNLOCK_TABLE(pTable); + } +} + +static int tsdbUpdateTableLatestInfo(STsdb *pRepo, STable *pTable, SMemRow row) { + STsdbCfg *pCfg = &pRepo->config; + + // if cacheLastRow config has been reset, free the lastRow + if (!pCfg->cacheLastRow && pTable->lastRow != NULL) { + SMemRow cachedLastRow = pTable->lastRow; + TSDB_WLOCK_TABLE(pTable); + pTable->lastRow = NULL; + TSDB_WUNLOCK_TABLE(pTable); + taosTZfree(cachedLastRow); + } + + if (tsdbGetTableLastKeyImpl(pTable) <= memRowKey(row)) { + if (CACHE_LAST_ROW(pCfg) || pTable->lastRow != NULL) { + SMemRow nrow = pTable->lastRow; + if (taosTSizeof(nrow) < memRowTLen(row)) { + SMemRow orow = nrow; + nrow = taosTMalloc(memRowTLen(row)); + if (nrow == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + memRowCpy(nrow, row); + TSDB_WLOCK_TABLE(pTable); + pTable->lastKey = memRowKey(row); + pTable->lastRow = nrow; + TSDB_WUNLOCK_TABLE(pTable); + taosTZfree(orow); + } else { + TSDB_WLOCK_TABLE(pTable); + pTable->lastKey = memRowKey(row); + memRowCpy(nrow, row); + TSDB_WUNLOCK_TABLE(pTable); + } + } else { + pTable->lastKey = memRowKey(row); + } + + if (CACHE_LAST_NULL_COLUMN(pCfg)) { + updateTableLatestColumn(pRepo, pTable, row); + } + } + + pTable->cacheLastConfigVersion = pRepo->cacheLastConfigVersion; + + return 0; +} + +#endif \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/src/tsdbMeta.c b/source/dnode/vnode/tsdb2/src/tsdbMeta.c new file mode 100644 index 0000000000..f8fa4f5d8c --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbMeta.c @@ -0,0 +1,1695 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#if 0 +#include "tcompare.h" +#include "tsdbint.h" +#include "tutil.h" + +#define TSDB_SUPER_TABLE_SL_LEVEL 5 +#define DEFAULT_TAG_INDEX_COLUMN 0 + +static char * getTagIndexKey(const void *pData); +static STable *tsdbNewTable(); +static STable *tsdbCreateTableFromCfg(STableCfg *pCfg, bool isSuper, STable *pSTable); +static void tsdbFreeTable(STable *pTable); +static int tsdbAddTableToMeta(STsdbRepo *pRepo, STable *pTable, bool addIdx, bool lock); +static void tsdbRemoveTableFromMeta(STsdbRepo *pRepo, STable *pTable, bool rmFromIdx, bool lock); +static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable, bool refSuper); +static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable); +static int tsdbInitTableCfg(STableCfg *config, ETableType type, uint64_t uid, int32_t tid); +static int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup); +static int tsdbTableSetName(STableCfg *config, char *name, bool dup); +static int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup); +static int tsdbTableSetSName(STableCfg *config, char *sname, bool dup); +static int tsdbTableSetSuperUid(STableCfg *config, uint64_t uid); +static int tsdbTableSetTagValue(STableCfg *config, SKVRow row, bool dup); +static int tsdbTableSetStreamSql(STableCfg *config, char *sql, bool dup); +static int tsdbEncodeTableName(void **buf, tstr *name); +static void * tsdbDecodeTableName(void *buf, tstr **name); +static int tsdbEncodeTable(void **buf, STable *pTable); +static void * tsdbDecodeTable(void *buf, STable **pRTable); +static int tsdbGetTableEncodeSize(int8_t act, STable *pTable); +static void * tsdbInsertTableAct(STsdbRepo *pRepo, int8_t act, void *buf, STable *pTable); +static int tsdbRemoveTableFromStore(STsdbRepo *pRepo, STable *pTable); +static int tsdbRmTableFromMeta(STsdbRepo *pRepo, STable *pTable); +static int tsdbAdjustMetaTables(STsdbRepo *pRepo, int tid); +static int tsdbCheckTableTagVal(SKVRow *pKVRow, STSchema *pSchema); +static int tsdbInsertNewTableAction(STsdbRepo *pRepo, STable *pTable); +static int tsdbAddSchema(STable *pTable, STSchema *pSchema); +static void tsdbFreeTableSchema(STable *pTable); + +// ------------------ OUTER FUNCTIONS ------------------ +int tsdbCreateTable(STsdbRepo *repo, STableCfg *pCfg) { + STsdbRepo *pRepo = (STsdbRepo *)repo; + STsdbMeta *pMeta = pRepo->tsdbMeta; + STable * super = NULL; + STable * table = NULL; + bool newSuper = false; + bool superChanged = false; + int tid = pCfg->tableId.tid; + STable * pTable = NULL; + + if (tid < 1 || tid > TSDB_MAX_TABLES) { + tsdbError("vgId:%d failed to create table since invalid tid %d", REPO_ID(pRepo), tid); + terrno = TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO; + goto _err; + } + + if (tid < pMeta->maxTables && pMeta->tables[tid] != NULL) { + if (TABLE_UID(pMeta->tables[tid]) == pCfg->tableId.uid) { + tsdbError("vgId:%d table %s already exists, tid %d uid %" PRId64, REPO_ID(pRepo), + TABLE_CHAR_NAME(pMeta->tables[tid]), TABLE_TID(pMeta->tables[tid]), TABLE_UID(pMeta->tables[tid])); + return 0; + } else { + tsdbInfo("vgId:%d table %s at tid %d uid %" PRIu64 + " exists, replace it with new table, this can be not reasonable", + REPO_ID(pRepo), TABLE_CHAR_NAME(pMeta->tables[tid]), TABLE_TID(pMeta->tables[tid]), + TABLE_UID(pMeta->tables[tid])); + tsdbDropTable(pRepo, pMeta->tables[tid]->tableId); + } + } + + pTable = tsdbGetTableByUid(pMeta, pCfg->tableId.uid); + if (pTable != NULL) { + tsdbError("vgId:%d table %s already exists, tid %d uid %" PRId64, REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + TABLE_TID(pTable), TABLE_UID(pTable)); + terrno = TSDB_CODE_TDB_TABLE_ALREADY_EXIST; + goto _err; + } + + if (pCfg->type == TSDB_CHILD_TABLE) { + super = tsdbGetTableByUid(pMeta, pCfg->superUid); + if (super == NULL) { // super table not exists, try to create it + newSuper = true; + super = tsdbCreateTableFromCfg(pCfg, true, NULL); + if (super == NULL) goto _err; + } else { + if (TABLE_TYPE(super) != TSDB_SUPER_TABLE || TABLE_UID(super) != pCfg->superUid) { + terrno = TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO; + goto _err; + } + + if (schemaVersion(pCfg->tagSchema) > schemaVersion(super->tagSchema)) { + // tag schema out of date, need to update super table tag version + STSchema *pOldSchema = super->tagSchema; + TSDB_WLOCK_TABLE(super); + super->tagSchema = tdDupSchema(pCfg->tagSchema); + TSDB_WUNLOCK_TABLE(super); + tdFreeSchema(pOldSchema); + + superChanged = true; + } + } + } + + table = tsdbCreateTableFromCfg(pCfg, false, super); + if (table == NULL) goto _err; + + // Register to meta + tsdbWLockRepoMeta(pRepo); + if (newSuper) { + if (tsdbAddTableToMeta(pRepo, super, true, false) < 0) { + super = NULL; + tsdbUnlockRepoMeta(pRepo); + goto _err; + } + } + if (tsdbAddTableToMeta(pRepo, table, true, false) < 0) { + table = NULL; + tsdbUnlockRepoMeta(pRepo); + goto _err; + } + tsdbUnlockRepoMeta(pRepo); + + // Write to memtable action + if (newSuper || superChanged) { + // add insert new super table action + if (tsdbInsertNewTableAction(pRepo, super) != 0) { + goto _err; + } + } + // add insert new table action + if (tsdbInsertNewTableAction(pRepo, table) != 0) { + goto _err; + } + + if (tsdbCheckCommit(pRepo) < 0) return -1; + + return 0; + +_err: + if (newSuper) { + tsdbFreeTable(super); + } + tsdbFreeTable(table); + return -1; +} + +int tsdbDropTable(STsdbRepo *repo, STableId tableId) { + STsdbRepo *pRepo = (STsdbRepo *)repo; + STsdbMeta *pMeta = pRepo->tsdbMeta; + uint64_t uid = tableId.uid; + int tid = 0; + char * tbname = NULL; + + STable *pTable = tsdbGetTableByUid(pMeta, uid); + if (pTable == NULL) { + tsdbError("vgId:%d failed to drop table since table not exists! tid:%d uid %" PRIu64, REPO_ID(pRepo), tableId.tid, + uid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + return -1; + } + + tsdbDebug("vgId:%d try to drop table %s type %d", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TYPE(pTable)); + + tid = TABLE_TID(pTable); + tbname = strdup(TABLE_CHAR_NAME(pTable)); + if (tbname == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + // Write to KV store first + if (tsdbRemoveTableFromStore(pRepo, pTable) < 0) { + tsdbError("vgId:%d failed to drop table %s since %s", REPO_ID(pRepo), tbname, tstrerror(terrno)); + goto _err; + } + + // Remove table from Meta + if (tsdbRmTableFromMeta(pRepo, pTable) < 0) { + tsdbError("vgId:%d failed to drop table %s since %s", REPO_ID(pRepo), tbname, tstrerror(terrno)); + goto _err; + } + + tsdbDebug("vgId:%d, table %s is dropped! tid:%d, uid:%" PRId64, pRepo->config.tsdbId, tbname, tid, uid); + free(tbname); + + if (tsdbCheckCommit(pRepo) < 0) goto _err; + + return 0; + +_err: + tfree(tbname); + return -1; +} + +void *tsdbGetTableTagVal(const void *pTable, int32_t colId, int16_t type) { + // TODO: this function should be changed also + + STSchema *pSchema = tsdbGetTableTagSchema((STable *)pTable); + STColumn *pCol = tdGetColOfID(pSchema, colId); + if (pCol == NULL) { + return NULL; // No matched tag volumn + } + + char *val = NULL; + if (pCol->type == TSDB_DATA_TYPE_JSON) { + val = ((STable *)pTable)->tagVal; + } else { + val = tdGetKVRowValOfCol(((STable *)pTable)->tagVal, colId); + assert(type == pCol->type); + } + + return val; +} + +char *tsdbGetTableName(void *pTable) { + // TODO: need to change as thread-safe + + if (pTable == NULL) { + return NULL; + } else { + return (char *)(((STable *)pTable)->name); + } +} + +STableCfg *tsdbCreateTableCfgFromMsg(SMDCreateTableMsg *pMsg) { + if (pMsg == NULL) return NULL; + + SSchema *pSchema = (SSchema *)pMsg->data; + int16_t numOfCols = htons(pMsg->numOfColumns); + int16_t numOfTags = htons(pMsg->numOfTags); + + STSchemaBuilder schemaBuilder = {0}; + + STableCfg *pCfg = (STableCfg *)calloc(1, sizeof(STableCfg)); + if (pCfg == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + if (tsdbInitTableCfg(pCfg, pMsg->tableType, htobe64(pMsg->uid), htonl(pMsg->tid)) < 0) goto _err; + if (tdInitTSchemaBuilder(&schemaBuilder, htonl(pMsg->sversion)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + for (int i = 0; i < numOfCols; i++) { + if (tdAddColToSchema(&schemaBuilder, pSchema[i].type, htons(pSchema[i].colId), htons(pSchema[i].bytes)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + } + if (tsdbTableSetSchema(pCfg, tdGetSchemaFromBuilder(&schemaBuilder), false) < 0) goto _err; + if (tsdbTableSetName(pCfg, pMsg->tableFname, true) < 0) goto _err; + + if (numOfTags > 0) { + // Decode tag schema + tdResetTSchemaBuilder(&schemaBuilder, htonl(pMsg->tversion)); + for (int i = numOfCols; i < numOfCols + numOfTags; i++) { + if (tdAddColToSchema(&schemaBuilder, pSchema[i].type, htons(pSchema[i].colId), htons(pSchema[i].bytes)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + } + if (tsdbTableSetTagSchema(pCfg, tdGetSchemaFromBuilder(&schemaBuilder), false) < 0) goto _err; + if (tsdbTableSetSName(pCfg, pMsg->stableFname, true) < 0) goto _err; + if (tsdbTableSetSuperUid(pCfg, htobe64(pMsg->superTableUid)) < 0) goto _err; + + int32_t tagDataLen = htonl(pMsg->tagDataLen); + if (tagDataLen) { + char *pTagData = pMsg->data + (numOfCols + numOfTags) * sizeof(SSchema); + tsdbTableSetTagValue(pCfg, pTagData, true); + } + } + + if (pMsg->tableType == TSDB_STREAM_TABLE) { + char *sql = pMsg->data + (numOfCols + numOfTags) * sizeof(SSchema); + tsdbTableSetStreamSql(pCfg, sql, true); + } + + tdDestroyTSchemaBuilder(&schemaBuilder); + + return pCfg; + +_err: + tdDestroyTSchemaBuilder(&schemaBuilder); + tsdbClearTableCfg(pCfg); + return NULL; +} + +static UNUSED_FUNC int32_t colIdCompar(const void *left, const void *right) { + int16_t colId = *(int16_t *)left; + STColumn *p2 = (STColumn *)right; + + if (colId == p2->colId) { + return 0; + } + + return (colId < p2->colId) ? -1 : 1; +} + +int tsdbUpdateTableTagValue(STsdbRepo *repo, SUpdateTableTagValMsg *pMsg) { + STsdbRepo *pRepo = (STsdbRepo *)repo; + STsdbMeta *pMeta = pRepo->tsdbMeta; + STSchema * pNewSchema = NULL; + + pMsg->uid = htobe64(pMsg->uid); + pMsg->tid = htonl(pMsg->tid); + pMsg->tversion = htons(pMsg->tversion); + pMsg->colId = htons(pMsg->colId); + pMsg->bytes = htons(pMsg->bytes); + pMsg->tagValLen = htonl(pMsg->tagValLen); + pMsg->numOfTags = htons(pMsg->numOfTags); + pMsg->schemaLen = htonl(pMsg->schemaLen); + for (int i = 0; i < pMsg->numOfTags; i++) { + STColumn *pTCol = (STColumn *)pMsg->data + i; + pTCol->bytes = htons(pTCol->bytes); + pTCol->colId = htons(pTCol->colId); + } + + STable *pTable = tsdbGetTableByUid(pMeta, pMsg->uid); + if (pTable == NULL || TABLE_TID(pTable) != pMsg->tid) { + tsdbError("vgId:%d failed to update table tag value since invalid table id %d uid %" PRIu64, REPO_ID(pRepo), + pMsg->tid, pMsg->uid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + return -1; + } + + if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) { + tsdbError("vgId:%d try to update tag value of a non-child table, invalid action", REPO_ID(pRepo)); + terrno = TSDB_CODE_TDB_INVALID_ACTION; + return -1; + } + + if (schemaVersion(pTable->pSuper->tagSchema) > pMsg->tversion) { + tsdbError( + "vgId:%d failed to update tag value of table %s since version out of date, client tag version %d server tag " + "version %d", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), pMsg->tversion, schemaVersion(pTable->pSuper->tagSchema)); + terrno = TSDB_CODE_TDB_TAG_VER_OUT_OF_DATE; + return -1; + } + + if (schemaVersion(pTable->pSuper->tagSchema) < pMsg->tversion) { // tag schema out of data, + tsdbDebug("vgId:%d need to update tag schema of table %s tid %d uid %" PRIu64 + " since out of date, current version %d new version %d", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), + schemaVersion(pTable->pSuper->tagSchema), pMsg->tversion); + + STSchemaBuilder schemaBuilder = {0}; + + STColumn *pTCol = (STColumn *)pMsg->data; + ASSERT(pMsg->schemaLen % sizeof(STColumn) == 0 && + pTCol[0].colId == colColId(schemaColAt(pTable->pSuper->tagSchema, 0))); + if (tdInitTSchemaBuilder(&schemaBuilder, pMsg->tversion) < 0) { + tsdbDebug("vgId:%d failed to update tag schema of table %s tid %d uid %" PRIu64 " since out of memory", + REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable)); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + for (int i = 0; i < (pMsg->schemaLen / sizeof(STColumn)); i++) { + if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, pTCol[i].colId, pTCol[i].bytes) < 0) { + tdDestroyTSchemaBuilder(&schemaBuilder); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } + pNewSchema = tdGetSchemaFromBuilder(&schemaBuilder); + if (pNewSchema == NULL) { + tdDestroyTSchemaBuilder(&schemaBuilder); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + tdDestroyTSchemaBuilder(&schemaBuilder); + } + + // Change in memory + if (pNewSchema != NULL) { // change super table tag schema + TSDB_WLOCK_TABLE(pTable->pSuper); + STSchema *pOldSchema = pTable->pSuper->tagSchema; + pTable->pSuper->tagSchema = pNewSchema; + tdFreeSchema(pOldSchema); + TSDB_WUNLOCK_TABLE(pTable->pSuper); + } + + bool isChangeIndexCol = + (pMsg->colId == colColId(schemaColAt(pTable->pSuper->tagSchema, 0))) || pMsg->type == TSDB_DATA_TYPE_JSON; + // STColumn *pCol = bsearch(&(pMsg->colId), pMsg->data, pMsg->numOfTags, sizeof(STColumn), colIdCompar); + // ASSERT(pCol != NULL); + + if (isChangeIndexCol) { + tsdbWLockRepoMeta(pRepo); + tsdbRemoveTableFromIndex(pMeta, pTable); + } + TSDB_WLOCK_TABLE(pTable); + if (pMsg->type == TSDB_DATA_TYPE_JSON) { + kvRowFree(pTable->tagVal); + pTable->tagVal = tdKVRowDup(POINTER_SHIFT(pMsg->data, pMsg->schemaLen)); + } else { + tdSetKVRowDataOfCol(&(pTable->tagVal), pMsg->colId, pMsg->type, POINTER_SHIFT(pMsg->data, pMsg->schemaLen)); + } + TSDB_WUNLOCK_TABLE(pTable); + if (isChangeIndexCol) { + tsdbAddTableIntoIndex(pMeta, pTable, false); + tsdbUnlockRepoMeta(pRepo); + } + + // Update on file + int tlen1 = (pNewSchema) ? tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable->pSuper) : 0; + int tlen2 = tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable); + void *buf = tsdbAllocBytes(pRepo, tlen1 + tlen2); + ASSERT(buf != NULL); + if (pNewSchema) { + void *pBuf = tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, buf, pTable->pSuper); + ASSERT(POINTER_DISTANCE(pBuf, buf) == tlen1); + buf = pBuf; + } + tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, buf, pTable); + + if (tsdbCheckCommit(pRepo) < 0) return -1; + + return 0; +} + +// ------------------ INTERNAL FUNCTIONS ------------------ +static int tsdbInsertNewTableAction(STsdbRepo *pRepo, STable *pTable) { + int tlen = 0; + void *pBuf = NULL; + + tlen = tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable); + pBuf = tsdbAllocBytes(pRepo, tlen); + if (pBuf == NULL) { + return -1; + } + void *tBuf = tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, pBuf, pTable); + ASSERT(POINTER_DISTANCE(tBuf, pBuf) == tlen); + + return 0; +} + +STsdbMeta *tsdbNewMeta(STsdbCfg *pCfg) { + STsdbMeta *pMeta = (STsdbMeta *)calloc(1, sizeof(*pMeta)); + if (pMeta == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + int code = pthread_rwlock_init(&pMeta->rwLock, NULL); + if (code != 0) { + tsdbError("vgId:%d failed to init TSDB meta r/w lock since %s", pCfg->tsdbId, strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + goto _err; + } + + pMeta->maxTables = TSDB_INIT_NTABLES + 1; + pMeta->tables = (STable **)calloc(pMeta->maxTables, sizeof(STable *)); + if (pMeta->tables == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + pMeta->superList = tdListNew(sizeof(STable *)); + if (pMeta->superList == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + pMeta->uidMap = + taosHashInit((size_t)(TSDB_INIT_NTABLES * 1.1), taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, false); + if (pMeta->uidMap == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + return pMeta; + +_err: + tsdbFreeMeta(pMeta); + return NULL; +} + +void tsdbFreeMeta(STsdbMeta *pMeta) { + if (pMeta) { + taosHashCleanup(pMeta->uidMap); + tdListFree(pMeta->superList); + tfree(pMeta->tables); + pthread_rwlock_destroy(&pMeta->rwLock); + free(pMeta); + } +} + +int tsdbOpenMeta(STsdbRepo *pRepo) { + return 0; +#if 0 + char * fname = NULL; + STsdbMeta *pMeta = pRepo->tsdbMeta; + ASSERT(pMeta != NULL); + + fname = tsdbGetMetaFileName(pRepo->rootDir); + if (fname == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + // pMeta->pStore = tdOpenKVStore(fname, tsdbRestoreTable, tsdbOrgMeta, (void *)pRepo); + // if (pMeta->pStore == NULL) { + // tsdbError("vgId:%d failed to open TSDB meta while open the kv store since %s", REPO_ID(pRepo), tstrerror(terrno)); + // goto _err; + // } + + tsdbDebug("vgId:%d open TSDB meta succeed", REPO_ID(pRepo)); + tfree(fname); + return 0; + +_err: + tfree(fname); + return -1; +#endif +} + +int tsdbCloseMeta(STsdbRepo *pRepo) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + SListNode *pNode = NULL; + STable * pTable = NULL; + + if (pMeta == NULL) return 0; + // tdCloseKVStore(pMeta->pStore); + for (int i = 1; i < pMeta->maxTables; i++) { + tsdbFreeTable(pMeta->tables[i]); + } + + while ((pNode = tdListPopHead(pMeta->superList)) != NULL) { + tdListNodeGetData(pMeta->superList, pNode, (void *)(&pTable)); + tsdbFreeTable(pTable); + listNodeFree(pNode); + } + + tsdbDebug("vgId:%d TSDB meta is closed", REPO_ID(pRepo)); + return 0; +} + +STable *tsdbGetTableByUid(STsdbMeta *pMeta, uint64_t uid) { + void *ptr = taosHashGet(pMeta->uidMap, (char *)(&uid), sizeof(uid)); + + if (ptr == NULL) return NULL; + + return *(STable **)ptr; +} + +STSchema *tsdbGetTableSchemaByVersion(STable *pTable, int16_t _version, int8_t rowType) { + return tsdbGetTableSchemaImpl(pTable, true, false, _version, rowType); +} + +int tsdbWLockRepoMeta(STsdbRepo *pRepo) { + int code = pthread_rwlock_wrlock(&(pRepo->tsdbMeta->rwLock)); + if (code != 0) { + tsdbError("vgId:%d failed to write lock TSDB meta since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + + return 0; +} + +int tsdbRLockRepoMeta(STsdbRepo *pRepo) { + int code = pthread_rwlock_rdlock(&(pRepo->tsdbMeta->rwLock)); + if (code != 0) { + tsdbError("vgId:%d failed to read lock TSDB meta since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + + return 0; +} + +int tsdbUnlockRepoMeta(STsdbRepo *pRepo) { + int code = pthread_rwlock_unlock(&(pRepo->tsdbMeta->rwLock)); + if (code != 0) { + tsdbError("vgId:%d failed to unlock TSDB meta since %s", REPO_ID(pRepo), strerror(code)); + terrno = TAOS_SYSTEM_ERROR(code); + return -1; + } + + return 0; +} + +void tsdbRefTable(STable *pTable) { + int32_t ref = T_REF_INC(pTable); + UNUSED(ref); + tsdbDebug("ref table %s uid %" PRIu64 " tid:%d, refCount:%d", TABLE_CHAR_NAME(pTable), TABLE_UID(pTable), + TABLE_TID(pTable), ref); +} + +void tsdbUnRefTable(STable *pTable) { + uint64_t uid = TABLE_UID(pTable); + int32_t tid = TABLE_TID(pTable); + int32_t ref = T_REF_DEC(pTable); + + tsdbDebug("unref table, uid:%" PRIu64 " tid:%d, refCount:%d", uid, tid, ref); + + if (ref == 0) { + if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) { + tsdbUnRefTable(pTable->pSuper); + } + tsdbFreeTable(pTable); + } +} + +void tsdbFreeLastColumns(STable *pTable) { + if (pTable->lastCols == NULL) { + return; + } + + for (int i = 0; i < pTable->maxColNum; ++i) { + if (pTable->lastCols[i].bytes == 0) { + continue; + } + tfree(pTable->lastCols[i].pData); + pTable->lastCols[i].bytes = 0; + pTable->lastCols[i].pData = NULL; + } + tfree(pTable->lastCols); + pTable->lastCols = NULL; + pTable->maxColNum = 0; + pTable->lastColSVersion = -1; + pTable->restoreColumnNum = 0; + pTable->hasRestoreLastColumn = false; +} + +int16_t tsdbGetLastColumnsIndexByColId(STable *pTable, int16_t colId) { + if (pTable->lastCols == NULL) { + return -1; + } + // TODO: use binary search instead + for (int16_t i = 0; i < pTable->maxColNum; ++i) { + if (pTable->lastCols[i].colId == colId) { + return i; + } + } + + return -1; +} + +int tsdbInitColIdCacheWithSchema(STable *pTable, STSchema *pSchema) { + TSDB_WLOCK_TABLE(pTable); + if (pTable->lastCols == NULL) { + int16_t numOfColumn = pSchema->numOfCols; + + pTable->lastCols = (SDataCol *)malloc(numOfColumn * sizeof(SDataCol)); + if (pTable->lastCols == NULL) { + TSDB_WUNLOCK_TABLE(pTable); + return -1; + } + + for (int16_t i = 0; i < numOfColumn; ++i) { + STColumn *pCol = schemaColAt(pSchema, i); + SDataCol *pDataCol = &(pTable->lastCols[i]); + pDataCol->bytes = 0; + pDataCol->pData = NULL; + pDataCol->colId = pCol->colId; + } + + pTable->lastColSVersion = schemaVersion(pSchema); + pTable->maxColNum = numOfColumn; + pTable->restoreColumnNum = 0; + pTable->hasRestoreLastColumn = false; + } + TSDB_WUNLOCK_TABLE(pTable); + return 0; +} + +STSchema *tsdbGetTableLatestSchema(STable *pTable) { return tsdbGetTableSchemaByVersion(pTable, -1, -1); } + +int tsdbUpdateLastColSchema(STable *pTable, STSchema *pNewSchema) { + if (pTable->lastColSVersion == schemaVersion(pNewSchema)) { + return 0; + } + + tsdbDebug("tsdbUpdateLastColSchema:%s,%d->%d", pTable->name->data, pTable->lastColSVersion, + schemaVersion(pNewSchema)); + + int16_t numOfCols = pNewSchema->numOfCols; + SDataCol *lastCols = (SDataCol *)malloc(numOfCols * sizeof(SDataCol)); + if (lastCols == NULL) { + return -1; + } + + TSDB_WLOCK_TABLE(pTable); + + for (int16_t i = 0; i < numOfCols; ++i) { + STColumn *pCol = schemaColAt(pNewSchema, i); + int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId); + + SDataCol *pDataCol = &(lastCols[i]); + if (idx != -1) { + // move col data to new last column array + SDataCol *pOldDataCol = &(pTable->lastCols[idx]); + memcpy(pDataCol, pOldDataCol, sizeof(SDataCol)); + } else { + // init new colid data + pDataCol->colId = pCol->colId; + pDataCol->bytes = 0; + pDataCol->pData = NULL; + } + } + + SDataCol *oldLastCols = pTable->lastCols; + int16_t oldLastColNum = pTable->maxColNum; + + pTable->lastColSVersion = schemaVersion(pNewSchema); + pTable->lastCols = lastCols; + pTable->maxColNum = numOfCols; + + if (oldLastCols == NULL) { + TSDB_WUNLOCK_TABLE(pTable); + return 0; + } + + // free old schema last column datas + for (int16_t i = 0; i < oldLastColNum; ++i) { + SDataCol *pDataCol = &(oldLastCols[i]); + if (pDataCol->bytes == 0) { + continue; + } + int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pDataCol->colId); + if (idx != -1) { + continue; + } + + // free not exist column data + tfree(pDataCol->pData); + } + TSDB_WUNLOCK_TABLE(pTable); + tfree(oldLastCols); + + return 0; +} + +void tsdbUpdateTableSchema(STsdbRepo *pRepo, STable *pTable, STSchema *pSchema, bool insertAct) { + ASSERT(TABLE_TYPE(pTable) != TSDB_STREAM_TABLE && TABLE_TYPE(pTable) != TSDB_SUPER_TABLE); + STsdbMeta *pMeta = pRepo->tsdbMeta; + + STable *pCTable = (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) ? pTable->pSuper : pTable; + ASSERT(schemaVersion(pSchema) > schemaVersion(*(STSchema **)taosArrayGetLast(pCTable->schema))); + + TSDB_WLOCK_TABLE(pCTable); + tsdbAddSchema(pCTable, pSchema); + + if (schemaNCols(pSchema) > pMeta->maxCols) pMeta->maxCols = schemaNCols(pSchema); + if (schemaTLen(pSchema) > pMeta->maxRowBytes) pMeta->maxRowBytes = schemaTLen(pSchema); + TSDB_WUNLOCK_TABLE(pCTable); + + if (insertAct) { + if (tsdbInsertNewTableAction(pRepo, pCTable) != 0) { + tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " tsdbInsertNewTableAction fail", REPO_ID(pRepo), + TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable)); + } + } +} + +int tsdbRestoreTable(STsdbRepo *pRepo, void *cont, int contLen) { + STable *pTable = NULL; + + if (!taosCheckChecksumWhole((uint8_t *)cont, contLen)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return -1; + } + + tsdbDecodeTable(cont, &pTable); + + if (tsdbAddTableToMeta(pRepo, pTable, false, false) < 0) { + tsdbFreeTable(pTable); + return -1; + } + + tsdbTrace("vgId:%d table %s tid %d uid %" PRIu64 " is restored from file", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + TABLE_TID(pTable), TABLE_UID(pTable)); + return 0; +} + +void tsdbOrgMeta(STsdbRepo *pRepo) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + + for (int i = 1; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable != NULL && pTable->type == TSDB_CHILD_TABLE) { + tsdbAddTableIntoIndex(pMeta, pTable, true); + } + } +} + +// ------------------ LOCAL FUNCTIONS ------------------ +static char *getTagIndexKey(const void *pData) { + STable *pTable = (STable *)pData; + + STSchema *pSchema = tsdbGetTableTagSchema(pTable); + STColumn *pCol = schemaColAt(pSchema, DEFAULT_TAG_INDEX_COLUMN); + void * res = tdGetKVRowValOfCol(pTable->tagVal, pCol->colId); + if (res == NULL) { + // treat the column as NULL if we cannot find it + res = (char *)getNullValue(pCol->type); + } + return res; +} + +static STable *tsdbNewTable() { + STable *pTable = (STable *)calloc(1, sizeof(*pTable)); + if (pTable == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + pTable->lastKey = TSKEY_INITIAL_VAL; + + pTable->lastCols = NULL; + pTable->restoreColumnNum = 0; + pTable->cacheLastConfigVersion = 0; + pTable->maxColNum = 0; + pTable->hasRestoreLastColumn = false; + pTable->lastColSVersion = -1; + return pTable; +} + +static STable *tsdbCreateTableFromCfg(STableCfg *pCfg, bool isSuper, STable *pSTable) { + STable *pTable = NULL; + size_t tsize = 0; + + pTable = tsdbNewTable(); + if (pTable == NULL) goto _err; + + if (isSuper) { + pTable->type = TSDB_SUPER_TABLE; + tsize = strnlen(pCfg->sname, TSDB_TABLE_NAME_LEN - 1); + pTable->name = calloc(1, tsize + VARSTR_HEADER_SIZE + 1); + if (pTable->name == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + STR_WITH_SIZE_TO_VARSTR(pTable->name, pCfg->sname, (VarDataLenT)tsize); + TABLE_UID(pTable) = pCfg->superUid; + TABLE_TID(pTable) = -1; + TABLE_SUID(pTable) = -1; + pTable->pSuper = NULL; + if (tsdbAddSchema(pTable, tdDupSchema(pCfg->schema)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + pTable->tagSchema = tdDupSchema(pCfg->tagSchema); + if (pTable->tagSchema == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + pTable->tagVal = NULL; + STColumn *pCol = schemaColAt(pTable->tagSchema, DEFAULT_TAG_INDEX_COLUMN); + if (pCol->type == TSDB_DATA_TYPE_JSON) { + assert(pTable->tagSchema->numOfCols == 1); + pTable->jsonKeyMap = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (pTable->jsonKeyMap == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbFreeTable(pTable); + return NULL; + } + // taosHashSetFreeFp(pTable->jsonKeyMap, taosArrayDestroyForHash); + } else { + pTable->pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, colType(pCol), (uint8_t)(colBytes(pCol)), NULL, + SL_ALLOW_DUP_KEY, getTagIndexKey); + if (pTable->pIndex == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + } + } else { + pTable->type = pCfg->type; + tsize = strnlen(pCfg->name, TSDB_TABLE_NAME_LEN - 1); + pTable->name = calloc(1, tsize + VARSTR_HEADER_SIZE + 1); + if (pTable->name == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + STR_WITH_SIZE_TO_VARSTR(pTable->name, pCfg->name, (VarDataLenT)tsize); + TABLE_UID(pTable) = pCfg->tableId.uid; + TABLE_TID(pTable) = pCfg->tableId.tid; + + if (pCfg->type == TSDB_CHILD_TABLE) { + TABLE_SUID(pTable) = pCfg->superUid; + if (tsdbCheckTableTagVal(pCfg->tagValues, pSTable->tagSchema) < 0) { + goto _err; + } + pTable->tagVal = tdKVRowDup(pCfg->tagValues); + if (pTable->tagVal == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + } else { + TABLE_SUID(pTable) = -1; + if (tsdbAddSchema(pTable, tdDupSchema(pCfg->schema)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + + if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { + pTable->sql = strdup(pCfg->sql); + if (pTable->sql == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _err; + } + } + } + } + + T_REF_INC(pTable); + + tsdbDebug("table %s tid %d uid %" PRIu64 " is created", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), + TABLE_UID(pTable)); + + return pTable; + +_err: + tsdbFreeTable(pTable); + return NULL; +} + +static void tsdbFreeTable(STable *pTable) { + if (pTable) { + if (pTable->name != NULL) + tsdbTrace("table %s tid %d uid %" PRIu64 " is freed", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), + TABLE_UID(pTable)); + tfree(TABLE_NAME(pTable)); + if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) { + tsdbFreeTableSchema(pTable); + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + tdFreeSchema(pTable->tagSchema); + } + } + + kvRowFree(pTable->tagVal); + + tSkipListDestroy(pTable->pIndex); + taosHashCleanup(pTable->jsonKeyMap); + taosTZfree(pTable->lastRow); + tfree(pTable->sql); + + tsdbFreeLastColumns(pTable); + free(pTable); + } +} + +static int tsdbAddTableToMeta(STsdbRepo *pRepo, STable *pTable, bool addIdx, bool lock) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + + if (lock && tsdbWLockRepoMeta(pRepo) < 0) { + tsdbError("vgId:%d failed to add table %s to meta since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + tstrerror(terrno)); + return -1; + } + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + if (tdListAppend(pMeta->superList, (void *)(&pTable)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to add table %s to meta since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + tstrerror(terrno)); + goto _err; + } + } else { + if (TABLE_TID(pTable) >= pMeta->maxTables) { + if (tsdbAdjustMetaTables(pRepo, TABLE_TID(pTable)) < 0) goto _err; + } + if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE && addIdx) { // add STABLE to the index + if (tsdbAddTableIntoIndex(pMeta, pTable, true) < 0) { + tsdbDebug("vgId:%d failed to add table %s to meta while add table to index since %s", REPO_ID(pRepo), + TABLE_CHAR_NAME(pTable), tstrerror(terrno)); + goto _err; + } + } + ASSERT(TABLE_TID(pTable) < pMeta->maxTables); + pMeta->tables[TABLE_TID(pTable)] = pTable; + pMeta->nTables++; + } + + if (taosHashPut(pMeta->uidMap, (char *)(&pTable->tableId.uid), sizeof(pTable->tableId.uid), (void *)(&pTable), + sizeof(pTable)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("vgId:%d failed to add table %s to meta while put into uid map since %s", REPO_ID(pRepo), + TABLE_CHAR_NAME(pTable), tstrerror(terrno)); + goto _err; + } + + if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) { + STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); + if (schemaNCols(pSchema) > pMeta->maxCols) pMeta->maxCols = schemaNCols(pSchema); + if (schemaTLen(pSchema) > pMeta->maxRowBytes) pMeta->maxRowBytes = schemaTLen(pSchema); + } + + if (lock && tsdbUnlockRepoMeta(pRepo) < 0) return -1; + if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE && addIdx) { + pTable->cqhandle = + (*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data, + pTable->sql, tsdbGetTableSchemaImpl(pTable, false, false, -1, -1), 1); + } + + tsdbDebug("vgId:%d table %s tid %d uid %" PRIu64 " is added to meta", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + TABLE_TID(pTable), TABLE_UID(pTable)); + return 0; + +_err: + tsdbRemoveTableFromMeta(pRepo, pTable, false, false); + if (lock) tsdbUnlockRepoMeta(pRepo); + return -1; +} + +static void tsdbRemoveTableFromMeta(STsdbRepo *pRepo, STable *pTable, bool rmFromIdx, bool lock) { + STsdbMeta *pMeta = pRepo->tsdbMeta; + SListIter lIter = {0}; + SListNode *pNode = NULL; + STable * tTable = NULL; + + STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); + int maxCols = schemaNCols(pSchema); + int maxRowBytes = schemaTLen(pSchema); + + if (lock) tsdbWLockRepoMeta(pRepo); + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + tdListInitIter(pMeta->superList, &lIter, TD_LIST_BACKWARD); + + while ((pNode = tdListNext(&lIter)) != NULL) { + tdListNodeGetData(pMeta->superList, pNode, (void *)(&tTable)); + if (pTable == tTable) { + tdListPopNode(pMeta->superList, pNode); + free(pNode); + break; + } + } + } else { + pMeta->tables[pTable->tableId.tid] = NULL; + if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE && rmFromIdx) { + tsdbRemoveTableFromIndex(pMeta, pTable); + } + + pMeta->nTables--; + } + + taosHashRemove(pMeta->uidMap, (char *)(&(TABLE_UID(pTable))), sizeof(TABLE_UID(pTable))); + + if (maxCols == pMeta->maxCols || maxRowBytes == pMeta->maxRowBytes) { + maxCols = 0; + maxRowBytes = 0; + for (int i = 0; i < pMeta->maxTables; i++) { + STable *_pTable = pMeta->tables[i]; + if (_pTable != NULL) { + pSchema = tsdbGetTableSchemaImpl(_pTable, false, false, -1, -1); + maxCols = MAX(maxCols, schemaNCols(pSchema)); + maxRowBytes = MAX(maxRowBytes, schemaTLen(pSchema)); + } + } + } + pMeta->maxCols = maxCols; + pMeta->maxRowBytes = maxRowBytes; + + if (lock) tsdbUnlockRepoMeta(pRepo); + tsdbDebug("vgId:%d table %s uid %" PRIu64 " is removed from meta", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), + TABLE_UID(pTable)); + tsdbUnRefTable(pTable); +} + +void *tsdbGetJsonTagValue(STable *pTable, char *key, int32_t keyLen, int16_t *retColId) { + assert(TABLE_TYPE(pTable) == TSDB_CHILD_TABLE); + STable * superTable = pTable->pSuper; + SArray **data = (SArray **)taosHashGet(superTable->jsonKeyMap, key, keyLen); + if (data == NULL) return NULL; + JsonMapValue jmvalue = {pTable, 0}; + JsonMapValue *p = taosArraySearch(*data, &jmvalue, tsdbCompareJsonMapValue, TD_EQ); + if (p == NULL) return NULL; + int16_t colId = p->colId + 1; + if (retColId) *retColId = p->colId; + return tdGetKVRowValOfCol(pTable->tagVal, colId); +} + +int tsdbCompareJsonMapValue(const void *a, const void *b) { + const JsonMapValue *x = (const JsonMapValue *)a; + const JsonMapValue *y = (const JsonMapValue *)b; + if (x->table > y->table) return 1; + if (x->table < y->table) return -1; + return 0; +} + +static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable, bool refSuper) { + ASSERT(pTable->type == TSDB_CHILD_TABLE && pTable != NULL); + STable *pSTable = tsdbGetTableByUid(pMeta, TABLE_SUID(pTable)); + ASSERT(pSTable != NULL); + + pTable->pSuper = pSTable; + if (refSuper) T_REF_INC(pSTable); + + if (pSTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) { + ASSERT(pSTable->tagSchema->numOfCols == 1); + int16_t nCols = kvRowNCols(pTable->tagVal); + ASSERT(nCols % 2 == 1); + // check first + for (int j = 0; j < nCols; ++j) { + if (j != 0 && j % 2 == 0) continue; // jump value + SColIdx *pColIdx = kvRowColIdxAt(pTable->tagVal, j); + void * val = (kvRowColVal(pTable->tagVal, pColIdx)); + if (j == 0) { // json value is the first + int8_t jsonPlaceHolder = *(int8_t *)val; + ASSERT(jsonPlaceHolder == TSDB_DATA_JSON_PLACEHOLDER); + continue; + } + if (j == 1) { + uint32_t jsonNULL = *(uint32_t *)(varDataVal(val)); + ASSERT(jsonNULL == TSDB_DATA_JSON_NULL); + } + + // then insert + char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0}; + jsonKeyMd5(varDataVal(val), varDataLen(val), keyMd5); + SArray * tablistNew = NULL; + SArray **tablist = (SArray **)taosHashGet(pSTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN); + if (tablist == NULL) { + tablistNew = taosArrayInit(8, sizeof(JsonMapValue)); + if (tablistNew == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("out of memory when alloc json tag array"); + return -1; + } + if (taosHashPut(pSTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN, &tablistNew, sizeof(void *)) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbError("out of memory when put json tag array"); + return -1; + } + } else { + tablistNew = *tablist; + } + + JsonMapValue jmvalue = {pTable, pColIdx->colId}; + void * p = taosArraySearch(tablistNew, &jmvalue, tsdbCompareJsonMapValue, TD_EQ); + if (p == NULL) { + p = taosArraySearch(tablistNew, &jmvalue, tsdbCompareJsonMapValue, TD_GE); + if (p == NULL) { + taosArrayPush(tablistNew, &jmvalue); + } else { + taosArrayInsert(tablistNew, TARRAY_ELEM_IDX(tablistNew, p), &jmvalue); + } + } else { + tsdbError("insert dumplicate"); + } + } + } else { + tSkipListPut(pSTable->pIndex, (void *)pTable); + } + + return 0; +} + +static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable) { + ASSERT(pTable->type == TSDB_CHILD_TABLE && pTable != NULL); + + STable *pSTable = pTable->pSuper; + ASSERT(pSTable != NULL); + + if (pSTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) { + ASSERT(pSTable->tagSchema->numOfCols == 1); + int16_t nCols = kvRowNCols(pTable->tagVal); + ASSERT(nCols % 2 == 1); + for (int j = 0; j < nCols; ++j) { + if (j != 0 && j % 2 == 0) continue; // jump value + SColIdx *pColIdx = kvRowColIdxAt(pTable->tagVal, j); + void * val = (kvRowColVal(pTable->tagVal, pColIdx)); + if (j == 0) { // json value is the first + int8_t jsonPlaceHolder = *(int8_t *)val; + ASSERT(jsonPlaceHolder == TSDB_DATA_JSON_PLACEHOLDER); + continue; + } + if (j == 1) { + uint32_t jsonNULL = *(uint32_t *)(varDataVal(val)); + ASSERT(jsonNULL == TSDB_DATA_JSON_NULL); + } + + char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0}; + jsonKeyMd5(varDataVal(val), varDataLen(val), keyMd5); + SArray **tablist = (SArray **)taosHashGet(pSTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN); + if (tablist == NULL) { + tsdbError("json tag no key error,%d", j); + continue; + } + + JsonMapValue jmvalue = {pTable, pColIdx->colId}; + void * p = taosArraySearch(*tablist, &jmvalue, tsdbCompareJsonMapValue, TD_EQ); + if (p == NULL) { + tsdbError("json tag no tableid error,%d", j); + continue; + } + taosArrayRemove(*tablist, TARRAY_ELEM_IDX(*tablist, p)); + } + } else { + char * key = getTagIndexKey(pTable); + SArray *res = tSkipListGet(pSTable->pIndex, key); + + size_t size = taosArrayGetSize(res); + ASSERT(size > 0); + + for (int32_t i = 0; i < size; ++i) { + SSkipListNode *pNode = taosArrayGetP(res, i); + + // STableIndexElem* pElem = (STableIndexElem*) SL_GET_NODE_DATA(pNode); + if ((STable *)SL_GET_NODE_DATA(pNode) == pTable) { // this is the exact what we need + tSkipListRemoveNode(pSTable->pIndex, pNode); + } + } + + taosArrayDestroy(res); + } + return 0; +} + +static int tsdbInitTableCfg(STableCfg *config, ETableType type, uint64_t uid, int32_t tid) { + if (type != TSDB_CHILD_TABLE && type != TSDB_NORMAL_TABLE && type != TSDB_STREAM_TABLE) { + terrno = TSDB_CODE_TDB_INVALID_TABLE_TYPE; + return -1; + } + + memset((void *)config, 0, sizeof(*config)); + + config->type = type; + config->superUid = TSDB_INVALID_SUPER_TABLE_ID; + config->tableId.uid = uid; + config->tableId.tid = tid; + return 0; +} + +static int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup) { + if (dup) { + config->schema = tdDupSchema(pSchema); + if (config->schema == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } else { + config->schema = pSchema; + } + return 0; +} + +static int tsdbTableSetName(STableCfg *config, char *name, bool dup) { + if (dup) { + config->name = strdup(name); + if (config->name == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } else { + config->name = name; + } + + return 0; +} + +static int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup) { + if (config->type != TSDB_CHILD_TABLE) { + terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG; + return -1; + } + + if (dup) { + config->tagSchema = tdDupSchema(pSchema); + if (config->tagSchema == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } else { + config->tagSchema = pSchema; + } + return 0; +} + +static int tsdbTableSetSName(STableCfg *config, char *sname, bool dup) { + if (config->type != TSDB_CHILD_TABLE) { + terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG; + return -1; + } + + if (dup) { + config->sname = strdup(sname); + if (config->sname == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } else { + config->sname = sname; + } + return 0; +} + +static int tsdbTableSetSuperUid(STableCfg *config, uint64_t uid) { + if (config->type != TSDB_CHILD_TABLE || uid == TSDB_INVALID_SUPER_TABLE_ID) { + terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG; + return -1; + } + + config->superUid = uid; + return 0; +} + +static int tsdbTableSetTagValue(STableCfg *config, SKVRow row, bool dup) { + if (config->type != TSDB_CHILD_TABLE) { + terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG; + return -1; + } + + if (dup) { + config->tagValues = tdKVRowDup(row); + if (config->tagValues == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } else { + config->tagValues = row; + } + + return 0; +} + +static int tsdbTableSetStreamSql(STableCfg *config, char *sql, bool dup) { + if (config->type != TSDB_STREAM_TABLE) { + terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG; + return -1; + } + + if (dup) { + config->sql = strdup(sql); + if (config->sql == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + } else { + config->sql = sql; + } + + return 0; +} + +void tsdbClearTableCfg(STableCfg *config) { + if (config) { + if (config->schema) tdFreeSchema(config->schema); + if (config->tagSchema) tdFreeSchema(config->tagSchema); + if (config->tagValues) kvRowFree(config->tagValues); + tfree(config->name); + tfree(config->sname); + tfree(config->sql); + free(config); + } +} + +static int tsdbEncodeTableName(void **buf, tstr *name) { + int tlen = 0; + + tlen += taosEncodeFixedI16(buf, name->len); + if (buf != NULL) { + memcpy(*buf, name->data, name->len); + *buf = POINTER_SHIFT(*buf, name->len); + } + tlen += name->len; + + return tlen; +} + +static void *tsdbDecodeTableName(void *buf, tstr **name) { + VarDataLenT len = 0; + + buf = taosDecodeFixedI16(buf, &len); + *name = calloc(1, sizeof(tstr) + len + 1); + if (*name == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + (*name)->len = len; + memcpy((*name)->data, buf, len); + + buf = POINTER_SHIFT(buf, len); + return buf; +} + +static int tsdbEncodeTable(void **buf, STable *pTable) { + ASSERT(pTable != NULL); + int tlen = 0; + + tlen += taosEncodeFixedU8(buf, pTable->type); + tlen += tsdbEncodeTableName(buf, pTable->name); + tlen += taosEncodeFixedU64(buf, TABLE_UID(pTable)); + tlen += taosEncodeFixedI32(buf, TABLE_TID(pTable)); + + if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) { + tlen += taosEncodeFixedU64(buf, TABLE_SUID(pTable)); + tlen += tdEncodeKVRow(buf, pTable->tagVal); + } else { + uint32_t arraySize = (uint32_t)taosArrayGetSize(pTable->schema); + if (arraySize > UINT8_MAX) { + tlen += taosEncodeFixedU8(buf, 0); + tlen += taosEncodeFixedU32(buf, arraySize); + } else { + tlen += taosEncodeFixedU8(buf, (uint8_t)arraySize); + } + for (uint32_t i = 0; i < arraySize; i++) { + STSchema *pSchema = taosArrayGetP(pTable->schema, i); + tlen += tdEncodeSchema(buf, pSchema); + } + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + tlen += tdEncodeSchema(buf, pTable->tagSchema); + } + + if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { + tlen += taosEncodeString(buf, pTable->sql); + } + } + + return tlen; +} + +static void *tsdbDecodeTable(void *buf, STable **pRTable) { + STable *pTable = tsdbNewTable(); + if (pTable == NULL) return NULL; + + uint8_t type = 0; + + buf = taosDecodeFixedU8(buf, &type); + pTable->type = type; + buf = tsdbDecodeTableName(buf, &(pTable->name)); + buf = taosDecodeFixedU64(buf, &TABLE_UID(pTable)); + buf = taosDecodeFixedI32(buf, &TABLE_TID(pTable)); + + if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) { + buf = taosDecodeFixedU64(buf, &TABLE_SUID(pTable)); + buf = tdDecodeKVRow(buf, &(pTable->tagVal)); + } else { + uint32_t nSchemas = 0; + buf = taosDecodeFixedU8(buf, (uint8_t *)&nSchemas); + if (nSchemas == 0) { + buf = taosDecodeFixedU32(buf, &nSchemas); + } + for (int i = 0; i < nSchemas; i++) { + STSchema *pSchema; + buf = tdDecodeSchema(buf, &pSchema); + tsdbAddSchema(pTable, pSchema); + } + + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + buf = tdDecodeSchema(buf, &(pTable->tagSchema)); + STColumn *pCol = schemaColAt(pTable->tagSchema, DEFAULT_TAG_INDEX_COLUMN); + if (pCol->type == TSDB_DATA_TYPE_JSON) { + assert(pTable->tagSchema->numOfCols == 1); + pTable->jsonKeyMap = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (pTable->jsonKeyMap == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbFreeTable(pTable); + return NULL; + } + // taosHashSetFreeFp(pTable->jsonKeyMap, taosArrayDestroyForHash); + } else { + pTable->pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, colType(pCol), (uint8_t)(colBytes(pCol)), NULL, + SL_ALLOW_DUP_KEY, getTagIndexKey); + if (pTable->pIndex == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbFreeTable(pTable); + return NULL; + } + } + } + + if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { + buf = taosDecodeString(buf, &(pTable->sql)); + } + } + + T_REF_INC(pTable); + + *pRTable = pTable; + + return buf; +} + +static SArray *getJsonTagTableList(STable *pTable) { + uint32_t key = TSDB_DATA_JSON_NULL; + char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0}; + jsonKeyMd5(&key, INT_BYTES, keyMd5); + SArray **tablist = (SArray **)taosHashGet(pTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN); + + return *tablist; +} + +static int tsdbGetTableEncodeSize(int8_t act, STable *pTable) { + int tlen = 0; + if (act == TSDB_UPDATE_META) { + tlen = sizeof(SListNode) + sizeof(SActObj) + sizeof(SActCont) + tsdbEncodeTable(NULL, pTable) + sizeof(TSCKSUM); + } else { + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + size_t tableSize = 0; + if (pTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) { + SArray *tablist = getJsonTagTableList(pTable); + tableSize = taosArrayGetSize(tablist); + } else { + tableSize = SL_SIZE(pTable->pIndex); + } + tlen = (int)((sizeof(SListNode) + sizeof(SActObj)) * (tableSize + 1)); + } else { + tlen = sizeof(SListNode) + sizeof(SActObj); + } + } + + return tlen; +} + +static void *tsdbInsertTableAct(STsdbRepo *pRepo, int8_t act, void *buf, STable *pTable) { + SListNode *pNode = (SListNode *)buf; + SActObj * pAct = (SActObj *)(pNode->data); + SActCont * pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(*pAct)); + void * pBuf = (void *)pCont; + + TD_DLIST_NODE_PREV(pNode) = TD_DLIST_NODE_NEXT(pNode) = NULL; + pAct->act = act; + pAct->uid = TABLE_UID(pTable); + + if (act == TSDB_UPDATE_META) { + pBuf = (void *)(pCont->cont); + pCont->len = tsdbEncodeTable(&pBuf, pTable) + sizeof(TSCKSUM); + taosCalcChecksumAppend(0, (uint8_t *)pCont->cont, pCont->len); + pBuf = POINTER_SHIFT(pBuf, sizeof(TSCKSUM)); + } + + tdListAppendNode(pRepo->mem->actList, pNode); + + return pBuf; +} + +static int tsdbRemoveTableFromStore(STsdbRepo *pRepo, STable *pTable) { + int tlen = tsdbGetTableEncodeSize(TSDB_DROP_META, pTable); + void *buf = tsdbAllocBytes(pRepo, tlen); + if (buf == NULL) { + return -1; + } + + void *pBuf = buf; + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + if (pTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) { + SArray *tablist = getJsonTagTableList(pTable); + for (int i = 0; i < taosArrayGetSize(tablist); ++i) { + JsonMapValue *p = taosArrayGet(tablist, i); + ASSERT(TABLE_TYPE((STable *)(p->table)) == TSDB_CHILD_TABLE); + pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, p->table); + } + } else { + SSkipListIterator *pIter = tSkipListCreateIter(pTable->pIndex); + if (pIter == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + while (tSkipListIterNext(pIter)) { + STable *tTable = (STable *)SL_GET_NODE_DATA(tSkipListIterGet(pIter)); + ASSERT(TABLE_TYPE(tTable) == TSDB_CHILD_TABLE); + pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, tTable); + } + + tSkipListDestroyIter(pIter); + } + } + pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, pTable); + + ASSERT(POINTER_DISTANCE(pBuf, buf) == tlen); + + return 0; +} + +static int tsdbRmTableFromMeta(STsdbRepo *pRepo, STable *pTable) { + if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) { + tsdbWLockRepoMeta(pRepo); + if (pTable->tagSchema->columns[0].type == TSDB_DATA_TYPE_JSON) { + SArray *tablist = getJsonTagTableList(pTable); + for (int i = 0; i < taosArrayGetSize(tablist); ++i) { + JsonMapValue *p = taosArrayGet(tablist, i); + tsdbRemoveTableFromMeta(pRepo, p->table, false, false); + } + } else { + SSkipListIterator *pIter = tSkipListCreateIter(pTable->pIndex); + if (pIter == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + while (tSkipListIterNext(pIter)) { + STable *tTable = (STable *)SL_GET_NODE_DATA(tSkipListIterGet(pIter)); + tsdbRemoveTableFromMeta(pRepo, tTable, false, false); + } + tSkipListDestroyIter(pIter); + } + tsdbRemoveTableFromMeta(pRepo, pTable, false, false); + tsdbUnlockRepoMeta(pRepo); + } else { + if ((TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) && pTable->cqhandle) pRepo->appH.cqDropFunc(pTable->cqhandle); + tsdbRemoveTableFromMeta(pRepo, pTable, true, true); + } + + return 0; +} + +static int tsdbAdjustMetaTables(STsdbRepo *pRepo, int tid) { +#if 0 + STsdbMeta *pMeta = pRepo->tsdbMeta; + ASSERT(tid >= pMeta->maxTables); + + int maxTables = tsdbGetNextMaxTables(tid); + + STable **tables = (STable **)calloc(maxTables, sizeof(STable *)); + if (tables == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + memcpy((void *)tables, (void *)pMeta->tables, sizeof(STable *) * pMeta->maxTables); + pMeta->maxTables = maxTables; + + STable **tTables = pMeta->tables; + pMeta->tables = tables; + tfree(tTables); + tsdbDebug("vgId:%d tsdb meta maxTables is adjusted as %d", REPO_ID(pRepo), maxTables); + +#endif + return 0; +} + +static int tsdbCheckTableTagVal(SKVRow *pKVRow, STSchema *pSchema) { + for (size_t i = 0; i < kvRowNCols(pKVRow); i++) { + SColIdx * pColIdx = kvRowColIdxAt(pKVRow, i); + STColumn *pCol = tdGetColOfID(pSchema, pColIdx->colId); + + if ((pCol == NULL) || (!IS_VAR_DATA_TYPE(pCol->type))) continue; + + void *pValue = tdGetKVRowValOfCol(pKVRow, pCol->colId); + if (varDataTLen(pValue) > pCol->bytes) { + terrno = TSDB_CODE_TDB_IVLD_TAG_VAL; + return -1; + } + } + + return 0; +} + +static int tsdbAddSchema(STable *pTable, STSchema *pSchema) { + ASSERT(TABLE_TYPE(pTable) != TSDB_CHILD_TABLE); + + if (pTable->schema == NULL) { + pTable->schema = taosArrayInit(TSDB_MAX_TABLE_SCHEMAS, sizeof(SSchema *)); + if (pTable->schema == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + } + + ASSERT(taosArrayGetSize(pTable->schema) == 0 || + schemaVersion(pSchema) > schemaVersion(*(STSchema **)taosArrayGetLast(pTable->schema))); + + if (taosArrayPush(pTable->schema, &pSchema) == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + return 0; +} + +static void tsdbFreeTableSchema(STable *pTable) { + ASSERT(pTable != NULL); + + if (pTable->schema) { + for (size_t i = 0; i < taosArrayGetSize(pTable->schema); i++) { + STSchema *pSchema = taosArrayGetP(pTable->schema, i); + tdFreeSchema(pSchema); + } + + taosArrayDestroy(pTable->schema); + } +} +#endif \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/src/tsdbRead.c b/source/dnode/vnode/tsdb2/src/tsdbRead.c new file mode 100644 index 0000000000..63d4447456 --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbRead.c @@ -0,0 +1,4580 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#if 0 + +#include "os.h" +#include "tdataformat.h" +#include "tskiplist.h" +#include "talgo.h" +#include "tcompare.h" +#include "exception.h" + +#include "taosdef.h" +#include "tlosertree.h" +#include "tsdbint.h" +// #include "texpr.h" +// #include "qFilter.h" +#include "cJSON.h" + +#define EXTRA_BYTES 2 +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +#define QH_GET_NUM_OF_COLS(handle) ((size_t)(taosArrayGetSize((handle)->pColumns))) + +#define GET_FILE_DATA_BLOCK_INFO(_checkInfo, _block) \ + ((SDataBlockInfo){.window = {.skey = (_block)->keyFirst, .ekey = (_block)->keyLast}, \ + .numOfCols = (_block)->numOfCols, \ + .rows = (_block)->numOfRows, \ + .tid = (_checkInfo)->tableId.tid, \ + .uid = (_checkInfo)->tableId.uid}) + +// limit offset start optimization for rows read over this value +#define OFFSET_SKIP_THRESHOLD 5000 + +enum { + TSDB_QUERY_TYPE_ALL = 1, + TSDB_QUERY_TYPE_LAST = 2, +}; + +enum { + TSDB_CACHED_TYPE_NONE = 0, + TSDB_CACHED_TYPE_LASTROW = 1, + TSDB_CACHED_TYPE_LAST = 2, +}; + +typedef struct SQueryFilePos { + int32_t fid; + int32_t slot; + int32_t pos; + int64_t lastKey; + int32_t rows; + bool mixBlock; + bool blockCompleted; + STimeWindow win; +} SQueryFilePos; + +typedef struct SDataBlockLoadInfo { + SDFileSet* fileGroup; + int32_t slot; + int32_t tid; + SArray* pLoadedCols; +} SDataBlockLoadInfo; + +typedef struct SLoadCompBlockInfo { + int32_t tid; /* table tid */ + int32_t fileId; +} SLoadCompBlockInfo; + +enum { + CHECKINFO_CHOSEN_MEM = 0, + CHECKINFO_CHOSEN_IMEM = 1, + CHECKINFO_CHOSEN_BOTH = 2 //for update=2(merge case) +}; + + +typedef struct STableCheckInfo { + STableId tableId; + TSKEY lastKey; + STable* pTableObj; + SBlockInfo* pCompInfo; + int32_t compSize; + int32_t numOfBlocks:29; // number of qualified data blocks not the original blocks + uint8_t chosen:2; // indicate which iterator should move forward + bool initBuf; // whether to initialize the in-memory skip list iterator or not + SSkipListIterator* iter; // mem buffer skip list iterator + SSkipListIterator* iiter; // imem buffer skip list iterator +} STableCheckInfo; + +typedef struct STableBlockInfo { + SBlock *compBlock; + STableCheckInfo *pTableCheckInfo; +} STableBlockInfo; + +typedef struct SBlockOrderSupporter { + int32_t numOfTables; + STableBlockInfo** pDataBlockInfo; + int32_t* blockIndexArray; + int32_t* numOfBlocksPerTable; +} SBlockOrderSupporter; + +typedef struct SIOCostSummary { + int64_t blockLoadTime; + int64_t statisInfoLoadTime; + int64_t checkForNextTime; + int64_t headFileLoad; + int64_t headFileLoadTime; +} SIOCostSummary; + +typedef struct STsdbQueryHandle { + STsdbRepo* pTsdb; + SQueryFilePos cur; // current position + int16_t order; + int64_t offset; // limit offset + int64_t srows; // skip offset rows + int64_t frows; // forbid skip offset rows + STimeWindow window; // the primary query time window that applies to all queries + SDataStatis* statis; // query level statistics, only one table block statistics info exists at any time + int32_t numOfBlocks; + SArray* pColumns; // column list, SColumnInfoData array list + bool locateStart; + int32_t outputCapacity; + int32_t realNumOfRows; + SArray* pTableCheckInfo; // SArray + int32_t activeIndex; + bool checkFiles; // check file stage + int8_t cachelastrow; // check if last row cached + bool loadExternalRow; // load time window external data rows + bool currentLoadExternalRows; // current load external rows + int32_t loadType; // block load type + uint64_t qId; // query info handle, for debug purpose + int32_t type; // query type: retrieve all data blocks, 2. retrieve only last row, 3. retrieve direct prev|next rows + SDFileSet* pFileGroup; + SFSIter fileIter; + SReadH rhelper; + STableBlockInfo* pDataBlockInfo; + SDataCols *pDataCols; // in order to hold current file data block + int32_t allocSize; // allocated data block size + SMemRef *pMemRef; + SArray *defaultLoadColumn;// default load column + SDataBlockLoadInfo dataBlockLoadInfo; /* record current block load information */ + SLoadCompBlockInfo compBlockLoadInfo; /* record current compblock information in SQueryAttr */ + + SArray *prev; // previous row which is before than time window + SArray *next; // next row which is after the query time window + SIOCostSummary cost; +} STsdbQueryHandle; + +typedef struct STableGroupSupporter { + int32_t numOfCols; + SColIndex* pCols; + STSchema* pTagSchema; +} STableGroupSupporter; + +typedef struct SRange { + int32_t from; + int32_t to; +} SRange; + +static STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList); +static int32_t checkForCachedLastRow(STsdbQueryHandle* pQueryHandle, STableGroupInfo *groupList); +static int32_t checkForCachedLast(STsdbQueryHandle* pQueryHandle); +static int32_t lazyLoadCacheLast(STsdbQueryHandle* pQueryHandle); +static int32_t tsdbGetCachedLastRow(STable* pTable, SMemRow* pRes, TSKEY* lastKey); + +static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle); +static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock); +static int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order); +static int32_t tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win, STsdbQueryHandle* pQueryHandle); +static int32_t tsdbCheckInfoCompar(const void* key1, const void* key2); +static int32_t doGetExternalRow(STsdbQueryHandle* pQueryHandle, int16_t type, SMemRef* pMemRef); +static void* doFreeColumnInfoData(SArray* pColumnInfoData); +static void* destroyTableCheckInfo(SArray* pTableCheckInfo); +static bool tsdbGetExternalRow(TsdbQueryHandleT pHandle); +static int32_t tsdbQueryTableList(STable* pTable, SArray* pRes, void* filterInfo); + +static void tsdbInitDataBlockLoadInfo(SDataBlockLoadInfo* pBlockLoadInfo) { + pBlockLoadInfo->slot = -1; + pBlockLoadInfo->tid = -1; + pBlockLoadInfo->fileGroup = NULL; +} + +static void tsdbInitCompBlockLoadInfo(SLoadCompBlockInfo* pCompBlockLoadInfo) { + pCompBlockLoadInfo->tid = -1; + pCompBlockLoadInfo->fileId = -1; +} + +static SArray* getColumnIdList(STsdbQueryHandle* pQueryHandle) { + size_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); + assert(numOfCols <= TSDB_MAX_COLUMNS); + + SArray* pIdList = taosArrayInit(numOfCols, sizeof(int16_t)); + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); + taosArrayPush(pIdList, &pCol->info.colId); + } + + return pIdList; +} + +static SArray* getDefaultLoadColumns(STsdbQueryHandle* pQueryHandle, bool loadTS) { + SArray* pLocalIdList = getColumnIdList(pQueryHandle); + + // check if the primary time stamp column needs to load + int16_t colId = *(int16_t*)taosArrayGet(pLocalIdList, 0); + + // the primary timestamp column does not be included in the the specified load column list, add it + if (loadTS && colId != 0) { + int16_t columnId = 0; + taosArrayInsert(pLocalIdList, 0, &columnId); + } + + return pLocalIdList; +} + +static void tsdbMayTakeMemSnapshot(STsdbQueryHandle* pQueryHandle, SArray* psTable) { + assert(pQueryHandle != NULL && pQueryHandle->pMemRef != NULL); + + SMemRef* pMemRef = pQueryHandle->pMemRef; + if (pQueryHandle->pMemRef->ref++ == 0) { + tsdbTakeMemSnapshot(pQueryHandle->pTsdb, &(pMemRef->snapshot), psTable); + } + + taosArrayDestroy(&psTable); +} + +static void tsdbMayUnTakeMemSnapshot(STsdbQueryHandle* pQueryHandle) { + assert(pQueryHandle != NULL); + SMemRef* pMemRef = pQueryHandle->pMemRef; + if (pMemRef == NULL) { // it has been freed + return; + } + + if (--pMemRef->ref == 0) { + tsdbUnTakeMemSnapShot(pQueryHandle->pTsdb, &(pMemRef->snapshot)); + } + + pQueryHandle->pMemRef = NULL; +} + +int64_t tsdbGetNumOfRowsInMemTable(TsdbQueryHandleT* pHandle) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; + + int64_t rows = 0; + SMemRef* pMemRef = pQueryHandle->pMemRef; + if (pMemRef == NULL) { return rows; } + + STableData* pMem = NULL; + STableData* pIMem = NULL; + + SMemTable* pMemT = pMemRef->snapshot.mem; + SMemTable* pIMemT = pMemRef->snapshot.imem; + + size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + for (int32_t i = 0; i < size; ++i) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); + + if (pMemT && pCheckInfo->tableId.tid < pMemT->maxTables) { + pMem = pMemT->tData[pCheckInfo->tableId.tid]; + rows += (pMem && pMem->uid == pCheckInfo->tableId.uid) ? pMem->numOfRows : 0; + } + if (pIMemT && pCheckInfo->tableId.tid < pIMemT->maxTables) { + pIMem = pIMemT->tData[pCheckInfo->tableId.tid]; + rows += (pIMem && pIMem->uid == pCheckInfo->tableId.uid) ? pIMem->numOfRows : 0; + } + } + return rows; +} + +static SArray* createCheckInfoFromTableGroup(STsdbQueryHandle* pQueryHandle, STableGroupInfo* pGroupList, STsdbMeta* pMeta, SArray** psTable) { + size_t sizeOfGroup = taosArrayGetSize(pGroupList->pGroupList); + assert(sizeOfGroup >= 1 && pMeta != NULL); + + // allocate buffer in order to load data blocks from file + SArray* pTableCheckInfo = taosArrayInit(pGroupList->numOfTables, sizeof(STableCheckInfo)); + if (pTableCheckInfo == NULL) { + return NULL; + } + + SArray* pTable = taosArrayInit(4, sizeof(STable*)); + if (pTable == NULL) { + taosArrayDestroy(&pTableCheckInfo); + return NULL; + } + + // todo apply the lastkey of table check to avoid to load header file + for (int32_t i = 0; i < sizeOfGroup; ++i) { + SArray* group = *(SArray**) taosArrayGet(pGroupList->pGroupList, i); + + size_t gsize = taosArrayGetSize(group); + assert(gsize > 0); + + for (int32_t j = 0; j < gsize; ++j) { + STableKeyInfo* pKeyInfo = (STableKeyInfo*) taosArrayGet(group, j); + + STableCheckInfo info = { .lastKey = pKeyInfo->lastKey, .pTableObj = pKeyInfo->pTable }; + assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE || + info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE)); + + info.tableId.tid = info.pTableObj->tableId.tid; + info.tableId.uid = info.pTableObj->tableId.uid; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + if (info.lastKey == INT64_MIN || info.lastKey < pQueryHandle->window.skey) { + info.lastKey = pQueryHandle->window.skey; + } + + assert(info.lastKey >= pQueryHandle->window.skey && info.lastKey <= pQueryHandle->window.ekey); + } else { + assert(info.lastKey >= pQueryHandle->window.ekey && info.lastKey <= pQueryHandle->window.skey); + } + + taosArrayPush(pTableCheckInfo, &info); + tsdbDebug("%p check table uid:%"PRId64", tid:%d from lastKey:%"PRId64" 0x%"PRIx64, pQueryHandle, info.tableId.uid, + info.tableId.tid, info.lastKey, pQueryHandle->qId); + } + } + + taosArraySort(pTableCheckInfo, tsdbCheckInfoCompar); + + size_t gsize = taosArrayGetSize(pTableCheckInfo); + + for (int32_t i = 0; i < gsize; ++i) { + STableCheckInfo* pInfo = (STableCheckInfo*) taosArrayGet(pTableCheckInfo, i); + taosArrayPush(pTable, &pInfo->pTableObj); + } + + *psTable = pTable; + return pTableCheckInfo; +} + +static void resetCheckInfo(STsdbQueryHandle* pQueryHandle) { + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + assert(numOfTables >= 1); + + // todo apply the lastkey of table check to avoid to load header file + for (int32_t i = 0; i < numOfTables; ++i) { + STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i); + pCheckInfo->lastKey = pQueryHandle->window.skey; + pCheckInfo->iter = tSkipListDestroyIter(pCheckInfo->iter); + pCheckInfo->iiter = tSkipListDestroyIter(pCheckInfo->iiter); + pCheckInfo->initBuf = false; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + assert(pCheckInfo->lastKey >= pQueryHandle->window.skey); + } else { + assert(pCheckInfo->lastKey <= pQueryHandle->window.skey); + } + } +} + +// only one table, not need to sort again +static SArray* createCheckInfoFromCheckInfo(STableCheckInfo* pCheckInfo, TSKEY skey, SArray** psTable) { + SArray* pNew = taosArrayInit(1, sizeof(STableCheckInfo)); + SArray* pTable = taosArrayInit(1, sizeof(STable*)); + + STableCheckInfo info = { .lastKey = skey, .pTableObj = pCheckInfo->pTableObj}; + + info.tableId = pCheckInfo->tableId; + taosArrayPush(pNew, &info); + taosArrayPush(pTable, &pCheckInfo->pTableObj); + + *psTable = pTable; + return pNew; +} + +static bool emptyQueryTimewindow(STsdbQueryHandle* pQueryHandle) { + assert(pQueryHandle != NULL); + + STimeWindow* w = &pQueryHandle->window; + bool asc = ASCENDING_TRAVERSE(pQueryHandle->order); + + return ((asc && w->skey > w->ekey) || (!asc && w->ekey > w->skey)); +} + +// Update the query time window according to the data time to live(TTL) information, in order to avoid to return +// the expired data to client, even it is queried already. +static int64_t getEarliestValidTimestamp(STsdbRepo* pTsdb) { + STsdbCfg* pCfg = &pTsdb->config; + + int64_t now = taosGetTimestamp(pCfg->precision); + return now - (tsTickPerDay[pCfg->precision] * pCfg->keep) + 1; // needs to add one tick +} + +static void setQueryTimewindow(STsdbQueryHandle* pQueryHandle, STsdbQueryCond* pCond) { + pQueryHandle->window = pCond->twindow; + + bool updateTs = false; + int64_t startTs = getEarliestValidTimestamp(pQueryHandle->pTsdb); + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + if (startTs > pQueryHandle->window.skey) { + pQueryHandle->window.skey = startTs; + pCond->twindow.skey = startTs; + updateTs = true; + } + } else { + if (startTs > pQueryHandle->window.ekey) { + pQueryHandle->window.ekey = startTs; + pCond->twindow.ekey = startTs; + updateTs = true; + } + } + + if (updateTs) { + tsdbDebug("%p update the query time window, old:%" PRId64 " - %" PRId64 ", new:%" PRId64 " - %" PRId64 + ", 0x%" PRIx64, pQueryHandle, pCond->twindow.skey, pCond->twindow.ekey, pQueryHandle->window.skey, + pQueryHandle->window.ekey, pQueryHandle->qId); + } +} + +static STsdbQueryHandle* tsdbQueryTablesImpl(STsdbRepo* tsdb, STsdbQueryCond* pCond, uint64_t qId, SMemRef* pMemRef) { + STsdbQueryHandle* pQueryHandle = calloc(1, sizeof(STsdbQueryHandle)); + if (pQueryHandle == NULL) { + goto _end; + } + + pQueryHandle->order = pCond->order; + pQueryHandle->offset = pCond->offset; + pQueryHandle->srows = 0; + pQueryHandle->frows = 0; + pQueryHandle->pTsdb = tsdb; + pQueryHandle->type = TSDB_QUERY_TYPE_ALL; + pQueryHandle->cur.fid = INT32_MIN; + pQueryHandle->cur.win = TSWINDOW_INITIALIZER; + pQueryHandle->checkFiles = true; + pQueryHandle->activeIndex = 0; // current active table index + pQueryHandle->qId = qId; + pQueryHandle->allocSize = 0; + pQueryHandle->locateStart = false; + pQueryHandle->pMemRef = pMemRef; + pQueryHandle->loadType = pCond->type; + + pQueryHandle->outputCapacity = ((STsdbRepo*)tsdb)->config.maxRowsPerFileBlock; + pQueryHandle->loadExternalRow = pCond->loadExternalRows; + pQueryHandle->currentLoadExternalRows = pCond->loadExternalRows; + + if (tsdbInitReadH(&pQueryHandle->rhelper, (STsdbRepo*)tsdb) != 0) { + goto _end; + } + + assert(pCond != NULL && pMemRef != NULL); + setQueryTimewindow(pQueryHandle, pCond); + + if (pCond->numOfCols > 0) { + // allocate buffer in order to load data blocks from file + pQueryHandle->statis = calloc(pCond->numOfCols, sizeof(SDataStatis)); + if (pQueryHandle->statis == NULL) { + goto _end; + } + + // todo: use list instead of array? + pQueryHandle->pColumns = taosArrayInit(pCond->numOfCols, sizeof(SColumnInfoData)); + if (pQueryHandle->pColumns == NULL) { + goto _end; + } + + for (int32_t i = 0; i < pCond->numOfCols; ++i) { + SColumnInfoData colInfo = {{0}, 0}; + + colInfo.info = pCond->colList[i]; + colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCond->colList[i].bytes); + if (colInfo.pData == NULL) { + goto _end; + } + + taosArrayPush(pQueryHandle->pColumns, &colInfo); + pQueryHandle->statis[i].colId = colInfo.info.colId; + } + + pQueryHandle->defaultLoadColumn = getDefaultLoadColumns(pQueryHandle, true); + } + + STsdbMeta* pMeta = tsdbGetMeta(tsdb); + assert(pMeta != NULL); + + pQueryHandle->pDataCols = tdNewDataCols(pMeta->maxCols, pQueryHandle->pTsdb->config.maxRowsPerFileBlock); + if (pQueryHandle->pDataCols == NULL) { + tsdbError("%p failed to malloc buf for pDataCols, %"PRIu64, pQueryHandle, pQueryHandle->qId); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _end; + } + + tsdbInitDataBlockLoadInfo(&pQueryHandle->dataBlockLoadInfo); + tsdbInitCompBlockLoadInfo(&pQueryHandle->compBlockLoadInfo); + + return (TsdbQueryHandleT) pQueryHandle; + + _end: + tsdbCleanupQueryHandle(pQueryHandle); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; +} + +TsdbQueryHandleT* tsdbQueryTables(STsdbRepo* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, SMemRef* pRef) { + STsdbQueryHandle* pQueryHandle = tsdbQueryTablesImpl(tsdb, pCond, qId, pRef); + if (pQueryHandle == NULL) { + return NULL; + } + + if (emptyQueryTimewindow(pQueryHandle)) { + return (TsdbQueryHandleT*) pQueryHandle; + } + + STsdbMeta* pMeta = tsdbGetMeta(tsdb); + assert(pMeta != NULL); + + SArray* psTable = NULL; + + // todo apply the lastkey of table check to avoid to load header file + pQueryHandle->pTableCheckInfo = createCheckInfoFromTableGroup(pQueryHandle, groupList, pMeta, &psTable); + if (pQueryHandle->pTableCheckInfo == NULL) { + tsdbCleanupQueryHandle(pQueryHandle); + taosArrayDestroy(&psTable); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return NULL; + } + + tsdbMayTakeMemSnapshot(pQueryHandle, psTable); + + tsdbDebug("%p total numOfTable:%" PRIzu " in query, 0x%"PRIx64, pQueryHandle, taosArrayGetSize(pQueryHandle->pTableCheckInfo), pQueryHandle->qId); + return (TsdbQueryHandleT) pQueryHandle; +} + +void tsdbResetQueryHandle(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond) { + STsdbQueryHandle* pQueryHandle = queryHandle; + + if (emptyQueryTimewindow(pQueryHandle)) { + if (pCond->order != pQueryHandle->order) { + pQueryHandle->order = pCond->order; + SWAP(pQueryHandle->window.skey, pQueryHandle->window.ekey, int64_t); + } + + return; + } + + pQueryHandle->order = pCond->order; + pQueryHandle->offset = pCond->offset; + pQueryHandle->srows = 0; + pQueryHandle->frows = 0; + pQueryHandle->window = pCond->twindow; + pQueryHandle->type = TSDB_QUERY_TYPE_ALL; + pQueryHandle->cur.fid = -1; + pQueryHandle->cur.win = TSWINDOW_INITIALIZER; + pQueryHandle->checkFiles = true; + pQueryHandle->activeIndex = 0; // current active table index + pQueryHandle->locateStart = false; + pQueryHandle->loadExternalRow = pCond->loadExternalRows; + + if (ASCENDING_TRAVERSE(pCond->order)) { + assert(pQueryHandle->window.skey <= pQueryHandle->window.ekey); + } else { + assert(pQueryHandle->window.skey >= pQueryHandle->window.ekey); + } + + // allocate buffer in order to load data blocks from file + memset(pQueryHandle->statis, 0, sizeof(SDataStatis)); + + tsdbInitDataBlockLoadInfo(&pQueryHandle->dataBlockLoadInfo); + tsdbInitCompBlockLoadInfo(&pQueryHandle->compBlockLoadInfo); + + resetCheckInfo(pQueryHandle); +} + +void tsdbResetQueryHandleForNewTable(TsdbQueryHandleT queryHandle, STsdbQueryCond *pCond, STableGroupInfo* groupList) { + STsdbQueryHandle* pQueryHandle = queryHandle; + + pQueryHandle->order = pCond->order; + pQueryHandle->window = pCond->twindow; + pQueryHandle->type = TSDB_QUERY_TYPE_ALL; + pQueryHandle->cur.fid = -1; + pQueryHandle->cur.win = TSWINDOW_INITIALIZER; + pQueryHandle->checkFiles = true; + pQueryHandle->activeIndex = 0; // current active table index + pQueryHandle->locateStart = false; + pQueryHandle->loadExternalRow = pCond->loadExternalRows; + + if (ASCENDING_TRAVERSE(pCond->order)) { + assert(pQueryHandle->window.skey <= pQueryHandle->window.ekey); + } else { + assert(pQueryHandle->window.skey >= pQueryHandle->window.ekey); + } + + // allocate buffer in order to load data blocks from file + memset(pQueryHandle->statis, 0, sizeof(SDataStatis)); + + tsdbInitDataBlockLoadInfo(&pQueryHandle->dataBlockLoadInfo); + tsdbInitCompBlockLoadInfo(&pQueryHandle->compBlockLoadInfo); + + SArray* pTable = NULL; + STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb); + + pQueryHandle->pTableCheckInfo = destroyTableCheckInfo(pQueryHandle->pTableCheckInfo); + + pQueryHandle->pTableCheckInfo = createCheckInfoFromTableGroup(pQueryHandle, groupList, pMeta, &pTable); + if (pQueryHandle->pTableCheckInfo == NULL) { + tsdbCleanupQueryHandle(pQueryHandle); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + } + + pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev); + pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next); +} + +static int32_t lazyLoadCacheLast(STsdbQueryHandle* pQueryHandle) { + STsdbRepo* pRepo = pQueryHandle->pTsdb; + + if (!pQueryHandle->pTableCheckInfo) { + tsdbError("%p table check info is NULL", pQueryHandle); + terrno = TSDB_CODE_QRY_APP_ERROR; + return -1; + } + + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + int32_t code = 0; + for (size_t i = 0; i < numOfTables; ++i) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); + STable* pTable = pCheckInfo->pTableObj; + if (pTable->cacheLastConfigVersion == pRepo->cacheLastConfigVersion) { + continue; + } + code = tsdbLoadLastCache(pRepo, pTable); + if (code != 0) { + tsdbError("%p uid:%" PRId64 ", tid:%d, failed to load last cache since %s", pQueryHandle, pTable->tableId.uid, + pTable->tableId.tid, tstrerror(terrno)); + break; + } + } + + return code; +} + +TsdbQueryHandleT tsdbQueryLastRow(STsdbRepo *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, SMemRef* pMemRef) { + pCond->twindow = updateLastrowForEachGroup(groupList); + + // no qualified table + if (groupList->numOfTables == 0) { + return NULL; + } + + STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef); + if (pQueryHandle == NULL) { + return NULL; + } + + if (lazyLoadCacheLast(pQueryHandle) != TSDB_CODE_SUCCESS) { + return NULL; + } + + int32_t code = checkForCachedLastRow(pQueryHandle, groupList); + if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0 + terrno = code; + return NULL; + } + + assert(pCond->order == TSDB_ORDER_ASC && pCond->twindow.skey <= pCond->twindow.ekey); + if (pQueryHandle->cachelastrow) { + pQueryHandle->type = TSDB_QUERY_TYPE_LAST; + } + + return pQueryHandle; +} + +TsdbQueryHandleT tsdbQueryCacheLast(STsdbRepo *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, SMemRef* pMemRef) { + STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef); + if (pQueryHandle == NULL) { + return NULL; + } + + if (lazyLoadCacheLast(pQueryHandle) != TSDB_CODE_SUCCESS) { + return NULL; + } + + int32_t code = checkForCachedLast(pQueryHandle); + if (code != TSDB_CODE_SUCCESS) { // set the numOfTables to be 0 + terrno = code; + return NULL; + } + + if (pQueryHandle->cachelastrow) { + pQueryHandle->type = TSDB_QUERY_TYPE_LAST; + } + + return pQueryHandle; +} + + +SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) { + assert(pHandle != NULL); + + STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle; + + size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + SArray* res = taosArrayInit(size, POINTER_BYTES); + + for(int32_t i = 0; i < size; ++i) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); + taosArrayPush(res, &pCheckInfo->pTableObj); + } + + return res; +} + +TsdbQueryHandleT tsdbQueryRowsInExternalWindow(STsdbRepo *tsdb, STsdbQueryCond* pCond, STableGroupInfo *groupList, uint64_t qId, SMemRef* pRef) { + STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pRef); + //pQueryHandle->loadExternalRow = true; + //pQueryHandle->currentLoadExternalRows = true; + + return pQueryHandle; +} + +static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) { + STable* pTable = pCheckInfo->pTableObj; + assert(pTable != NULL); + + if (pCheckInfo->initBuf) { + return true; + } + + pCheckInfo->initBuf = true; + int32_t order = pHandle->order; + + // no data in buffer, abort + if (pHandle->pMemRef->snapshot.mem == NULL && pHandle->pMemRef->snapshot.imem == NULL) { + return false; + } + + assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL); + + STableData* pMem = NULL; + STableData* pIMem = NULL; + + SMemTable* pMemT = pHandle->pMemRef->snapshot.mem; + SMemTable* pIMemT = pHandle->pMemRef->snapshot.imem; + + if (pMemT && pCheckInfo->tableId.tid < pMemT->maxTables) { + pMem = pMemT->tData[pCheckInfo->tableId.tid]; + if (pMem != NULL && pMem->uid == pCheckInfo->tableId.uid) { // check uid + TKEY tLastKey = keyToTkey(pCheckInfo->lastKey); + pCheckInfo->iter = + tSkipListCreateIterFromVal(pMem->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order); + } + } + + if (pIMemT && pCheckInfo->tableId.tid < pIMemT->maxTables) { + pIMem = pIMemT->tData[pCheckInfo->tableId.tid]; + if (pIMem != NULL && pIMem->uid == pCheckInfo->tableId.uid) { // check uid + TKEY tLastKey = keyToTkey(pCheckInfo->lastKey); + pCheckInfo->iiter = + tSkipListCreateIterFromVal(pIMem->pData, (const char*)&tLastKey, TSDB_DATA_TYPE_TIMESTAMP, order); + } + } + + // both iterators are NULL, no data in buffer right now + if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) { + return false; + } + + bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter)); + bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter)); + if (memEmpty && imemEmpty) { // buffer is empty + return false; + } + + if (!memEmpty) { + SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter); + assert(node != NULL); + + SMemRow row = (SMemRow)SL_GET_NODE_DATA(node); + TSKEY key = memRowKey(row); // first timestamp in buffer + tsdbDebug("%p uid:%" PRId64 ", tid:%d check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 + "-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", 0x%"PRIx64, + pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, key, order, pMem->keyFirst, pMem->keyLast, + pCheckInfo->lastKey, pMem->numOfRows, pHandle->qId); + + if (ASCENDING_TRAVERSE(order)) { + assert(pCheckInfo->lastKey <= key); + } else { + assert(pCheckInfo->lastKey >= key); + } + + } else { + tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, 0x%"PRIx64, pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, + pHandle->qId); + } + + if (!imemEmpty) { + SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter); + assert(node != NULL); + + SMemRow row = (SMemRow)SL_GET_NODE_DATA(node); + TSKEY key = memRowKey(row); // first timestamp in buffer + tsdbDebug("%p uid:%" PRId64 ", tid:%d check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 + "-%" PRId64 ", lastKey:%" PRId64 ", numOfRows:%"PRId64", 0x%"PRIx64, + pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, key, order, pIMem->keyFirst, pIMem->keyLast, + pCheckInfo->lastKey, pIMem->numOfRows, pHandle->qId); + + if (ASCENDING_TRAVERSE(order)) { + assert(pCheckInfo->lastKey <= key); + } else { + assert(pCheckInfo->lastKey >= key); + } + } else { + tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, 0x%"PRIx64, pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, + pHandle->qId); + } + + return true; +} + +static void destroyTableMemIterator(STableCheckInfo* pCheckInfo) { + tSkipListDestroyIter(pCheckInfo->iter); + tSkipListDestroyIter(pCheckInfo->iiter); +} + +static TSKEY extractFirstTraverseKey(STableCheckInfo* pCheckInfo, int32_t order, int32_t update) { + SMemRow rmem = NULL, rimem = NULL; + if (pCheckInfo->iter) { + SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter); + if (node != NULL) { + rmem = (SMemRow)SL_GET_NODE_DATA(node); + } + } + + if (pCheckInfo->iiter) { + SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter); + if (node != NULL) { + rimem = (SMemRow)SL_GET_NODE_DATA(node); + } + } + + if (rmem == NULL && rimem == NULL) { + return TSKEY_INITIAL_VAL; + } + + if (rmem != NULL && rimem == NULL) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + return memRowKey(rmem); + } + + if (rmem == NULL && rimem != NULL) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + return memRowKey(rimem); + } + + TSKEY r1 = memRowKey(rmem); + TSKEY r2 = memRowKey(rimem); + + if (r1 == r2) { + if(update == TD_ROW_DISCARD_UPDATE){ + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + tSkipListIterNext(pCheckInfo->iter); + return r2; + } + else if(update == TD_ROW_OVERWRITE_UPDATE) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + tSkipListIterNext(pCheckInfo->iiter); + return r1; + } else { + pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH; + return r1; + } + } else { + if (ASCENDING_TRAVERSE(order)) { + if (r1 < r2) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + return r1; + } else { + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + return r2; + } + } else { + if (r1 < r2) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + return r2; + } else { + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + return r1; + } + } + } +} + +static SMemRow getSMemRowInTableMem(STableCheckInfo* pCheckInfo, int32_t order, int32_t update, SMemRow* extraRow) { + SMemRow rmem = NULL, rimem = NULL; + if (pCheckInfo->iter) { + SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter); + if (node != NULL) { + rmem = (SMemRow)SL_GET_NODE_DATA(node); + } + } + + if (pCheckInfo->iiter) { + SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter); + if (node != NULL) { + rimem = (SMemRow)SL_GET_NODE_DATA(node); + } + } + + if (rmem == NULL && rimem == NULL) { + return NULL; + } + + if (rmem != NULL && rimem == NULL) { + pCheckInfo->chosen = 0; + return rmem; + } + + if (rmem == NULL && rimem != NULL) { + pCheckInfo->chosen = 1; + return rimem; + } + + TSKEY r1 = memRowKey(rmem); + TSKEY r2 = memRowKey(rimem); + + if (r1 == r2) { + if (update == TD_ROW_DISCARD_UPDATE) { + tSkipListIterNext(pCheckInfo->iter); + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + return rimem; + } else if(update == TD_ROW_OVERWRITE_UPDATE){ + tSkipListIterNext(pCheckInfo->iiter); + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + return rmem; + } else { + pCheckInfo->chosen = CHECKINFO_CHOSEN_BOTH; + extraRow = rimem; + return rmem; + } + } else { + if (ASCENDING_TRAVERSE(order)) { + if (r1 < r2) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + return rmem; + } else { + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + return rimem; + } + } else { + if (r1 < r2) { + pCheckInfo->chosen = CHECKINFO_CHOSEN_IMEM; + return rimem; + } else { + pCheckInfo->chosen = CHECKINFO_CHOSEN_MEM; + return rmem; + } + } + } +} + +static bool moveToNextRowInMem(STableCheckInfo* pCheckInfo) { + bool hasNext = false; + if (pCheckInfo->chosen == CHECKINFO_CHOSEN_MEM) { + if (pCheckInfo->iter != NULL) { + hasNext = tSkipListIterNext(pCheckInfo->iter); + } + + if (hasNext) { + return hasNext; + } + + if (pCheckInfo->iiter != NULL) { + return tSkipListIterGet(pCheckInfo->iiter) != NULL; + } + } else if (pCheckInfo->chosen == CHECKINFO_CHOSEN_IMEM){ + if (pCheckInfo->iiter != NULL) { + hasNext = tSkipListIterNext(pCheckInfo->iiter); + } + + if (hasNext) { + return hasNext; + } + + if (pCheckInfo->iter != NULL) { + return tSkipListIterGet(pCheckInfo->iter) != NULL; + } + } else { + if (pCheckInfo->iter != NULL) { + hasNext = tSkipListIterNext(pCheckInfo->iter); + } + if (pCheckInfo->iiter != NULL) { + hasNext = tSkipListIterNext(pCheckInfo->iiter) || hasNext; + } + } + + return hasNext; +} + +static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { + STsdbCfg *pCfg = &pHandle->pTsdb->config; + size_t size = taosArrayGetSize(pHandle->pTableCheckInfo); + assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1); + pHandle->cur.fid = INT32_MIN; + + STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex); + + STable* pTable = pCheckInfo->pTableObj; + assert(pTable != NULL); + + if (!pCheckInfo->initBuf) { + initTableMemIterator(pHandle, pCheckInfo); + } + + SMemRow row = getSMemRowInTableMem(pCheckInfo, pHandle->order, pCfg->update, NULL); + if (row == NULL) { + return false; + } + + pCheckInfo->lastKey = memRowKey(row); // first timestamp in buffer + tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, 0x%"PRIx64, pHandle, + pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qId); + + // all data in mem are checked already. + if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) || + (pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) { + return false; + } + + int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1; + STimeWindow* win = &pHandle->cur.win; + pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle); + + // update the last key value + pCheckInfo->lastKey = win->ekey + step; + pHandle->cur.lastKey = win->ekey + step; + pHandle->cur.mixBlock = true; + + if (!ASCENDING_TRAVERSE(pHandle->order)) { + SWAP(win->skey, win->ekey, TSKEY); + } + + return true; +} + +static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precision) { + assert(precision >= TSDB_TIME_PRECISION_MICRO || precision <= TSDB_TIME_PRECISION_NANO); + if (key == TSKEY_INITIAL_VAL) { + return INT32_MIN; + } + + if (key < 0) { + key -= (daysPerFile * tsTickPerDay[precision]); + } + + int64_t fid = (int64_t)(key / (daysPerFile * tsTickPerDay[precision])); // set the starting fileId + if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32 + fid = INT32_MIN; + } + + if (fid > 0L && fid > INT32_MAX) { + fid = INT32_MAX; + } + + return (int32_t)fid; +} + +static int32_t binarySearchForBlock(SBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { + int32_t firstSlot = 0; + int32_t lastSlot = numOfBlocks - 1; + + int32_t midSlot = firstSlot; + + while (1) { + numOfBlocks = lastSlot - firstSlot + 1; + midSlot = (firstSlot + (numOfBlocks >> 1)); + + if (numOfBlocks == 1) break; + + if (skey > pBlock[midSlot].keyLast) { + if (numOfBlocks == 2) break; + if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break; + firstSlot = midSlot + 1; + } else if (skey < pBlock[midSlot].keyFirst) { + if ((order == TSDB_ORDER_ASC) && (skey > pBlock[midSlot - 1].keyLast)) break; + lastSlot = midSlot - 1; + } else { + break; // got the slot + } + } + + return midSlot; +} + +// array :1 2 3 5 7 -2 (8 9) skip 4 and 6 +int32_t memMoveByArray(SBlock *blocks, SArray *pArray) { + // pArray is NULL or size is zero , no need block to move + if(pArray == NULL) + return 0; + size_t count = taosArrayGetSize(pArray); + if(count == 0) + return 0; + + // memmove + int32_t num = 0; + SRange* ranges = (SRange*)TARRAY_GET_START(pArray); + for(size_t i = 0; i < count; i++) { + int32_t step = ranges[i].to - ranges[i].from + 1; + memmove(blocks + num, blocks + ranges[i].from, sizeof(SBlock) * step); + num += step; + } + + return num; +} + +// if block data in memory return false else true +bool blockNoItemInMem(STsdbQueryHandle* q, SBlock* pBlock) { + if(q->pMemRef == NULL) { + return false; + } + + // mem + if(q->pMemRef->snapshot.mem) { + SMemTable* mem = q->pMemRef->snapshot.mem; + if(timeIntersect(mem->keyFirst, mem->keyLast, pBlock->keyFirst, pBlock->keyLast)) + return false; + } + // imem + if(q->pMemRef->snapshot.imem) { + SMemTable* imem = q->pMemRef->snapshot.imem; + if(timeIntersect(imem->keyFirst, imem->keyLast, pBlock->keyFirst, pBlock->keyLast)) + return false; + } + + return true; +} + +#define MAYBE_IN_MEMORY_ROWS 4000 // approximately the capacity of one block +// skip blocks . return value is skip blocks number, skip rows reduce from *pOffset +static int32_t offsetSkipBlock(STsdbQueryHandle* q, SBlockInfo* pBlockInfo, int64_t skey, int64_t ekey, + int32_t sblock, int32_t eblock, SArray** ppArray, bool order) { + int32_t num = 0; + SBlock* blocks = pBlockInfo->blocks; + SArray* pArray = NULL; + SRange range; + range.from = -1; + + // + // ASC + // + if(order) { + for(int32_t i = sblock; i < eblock; i++) { + bool skip = false; + SBlock* pBlock = &blocks[i]; + if(i == sblock && skey > pBlock->keyFirst) { + q->frows += pBlock->numOfRows; // some rows time < s + } else { + // check can skip + if(q->srows + q->frows + pBlock->numOfRows + MAYBE_IN_MEMORY_ROWS < q->offset) { // approximately calculate + if(blockNoItemInMem(q, pBlock)) { + // can skip + q->srows += pBlock->numOfRows; + skip = true; + } else { + q->frows += pBlock->numOfRows; // maybe have some row in memroy + } + } else { + // the remainder be put to pArray + if(pArray == NULL) + pArray = taosArrayInit(1, sizeof(SRange)); + if(range.from == -1) { + range.from = i; + } else { + if(range.to + 1 != i) { + // add the previous + taosArrayPush(pArray, &range); + range.from = i; + } + } + range.to = eblock - 1; + taosArrayPush(pArray, &range); + range.from = -1; + break; + } + } + + if(skip) { + num ++; + } else { + // can't skip, append block index to pArray + if(pArray == NULL) + pArray = taosArrayInit(10, sizeof(SRange)); + if(range.from == -1) { + range.from = i; + } else { + if(range.to + 1 != i) { + // add the previous + taosArrayPush(pArray, &range); + range.from = i; + } + } + range.to = i; + } + } + // end append + if(range.from != -1) { + if(pArray == NULL) + pArray = taosArrayInit(1, sizeof(SRange)); + taosArrayPush(pArray, &range); + } + + // ASC return + *ppArray = pArray; + return num; + } + + // DES + for(int32_t i = eblock - 1; i >= sblock; i--) { + bool skip = false; + SBlock* pBlock = &blocks[i]; + if(i == eblock - 1 && ekey < pBlock->keyLast) { + q->frows += pBlock->numOfRows; // some rows time > e + } else { + // check can skip + if(q->srows + q->frows + pBlock->numOfRows + MAYBE_IN_MEMORY_ROWS < q->offset) { // approximately calculate + if(blockNoItemInMem(q, pBlock)) { + // can skip + q->srows += pBlock->numOfRows; + skip = true; + } else { + q->frows += pBlock->numOfRows; // maybe have some row in memroy + } + } else { + // the remainder be put to pArray + if(pArray == NULL) + pArray = taosArrayInit(1, sizeof(SRange)); + if(range.from == -1) { + range.from = i; + } else { + if(range.to - 1 != i) { + // add the previous + taosArrayPush(pArray, &range); + range.from = i; + } + } + range.to = 0; + taosArrayPush(pArray, &range); + range.from = -1; + break; + } + } + + if(skip) { + num ++; + } else { + // can't skip, append block index to pArray + if(pArray == NULL) + pArray = taosArrayInit(10, sizeof(SRange)); + if(range.from == -1) { + range.from = i; + } else { + if(range.to + 1 != i) { + // add the previous + taosArrayPush(pArray, &range); + range.from = i; + } + } + range.to = i; + } + } + + // end append + if(range.from != -1) { + if(pArray == NULL) + pArray = taosArrayInit(1, sizeof(SRange)); + taosArrayPush(pArray, &range); + } + if(pArray == NULL) + return num; + + // reverse array + size_t count = taosArrayGetSize(pArray); + SRange* ranges = TARRAY_GET_START(pArray); + SArray* pArray1 = taosArrayInit(count, sizeof(SRange)); + + size_t i = count - 1; + while(i >= 0) { + range.from = ranges[i].to; + range.to = ranges[i].from; + taosArrayPush(pArray1, &range); + if(i == 0) + break; + i --; + } + + *ppArray = pArray1; + taosArrayDestroy(&pArray); + return num; +} + +// shrink blocks by condition of query +static void shrinkBlocksByQuery(STsdbQueryHandle *pQueryHandle, STableCheckInfo *pCheckInfo) { + SBlockInfo *pCompInfo = pCheckInfo->pCompInfo; + SBlockIdx *compIndex = pQueryHandle->rhelper.pBlkIdx; + bool order = ASCENDING_TRAVERSE(pQueryHandle->order); + + if (order) { + assert(pCheckInfo->lastKey <= pQueryHandle->window.ekey && pQueryHandle->window.skey <= pQueryHandle->window.ekey); + } else { + assert(pCheckInfo->lastKey >= pQueryHandle->window.ekey && pQueryHandle->window.skey >= pQueryHandle->window.ekey); + } + + TSKEY s = TSKEY_INITIAL_VAL, e = TSKEY_INITIAL_VAL; + s = MIN(pCheckInfo->lastKey, pQueryHandle->window.ekey); + e = MAX(pCheckInfo->lastKey, pQueryHandle->window.ekey); + + // discard the unqualified data block based on the query time window + int32_t start = binarySearchForBlock(pCompInfo->blocks, compIndex->numOfBlocks, s, TSDB_ORDER_ASC); + if (s > pCompInfo->blocks[start].keyLast) { + return ; + } + + int32_t end = start; + // locate e index of blocks -> end + while (end < (int32_t)compIndex->numOfBlocks && (pCompInfo->blocks[end].keyFirst <= e)) { + end += 1; + } + + // calc offset can skip blocks number + int32_t nSkip = 0; + SArray *pArray = NULL; + if(pQueryHandle->offset > 0) { + nSkip = offsetSkipBlock(pQueryHandle, pCompInfo, s, e, start, end, &pArray, order); + } + + if(nSkip > 0) { // have offset and can skip + pCheckInfo->numOfBlocks = memMoveByArray(pCompInfo->blocks, pArray); + } else { // no offset + pCheckInfo->numOfBlocks = end - start; + if(start > 0) + memmove(pCompInfo->blocks, &pCompInfo->blocks[start], pCheckInfo->numOfBlocks * sizeof(SBlock)); + } + + if(pArray) + taosArrayDestroy(&pArray); +} + +// load one table (tsd_index point to) need load blocks info and put into pCheckInfo->pCompInfo->blocks +static int32_t loadBlockInfo(STsdbQueryHandle * pQueryHandle, int32_t tsd_index, int32_t* numOfBlocks) { + // + // ONE PART. Load all blocks info from one table of tsd_index + // + int32_t code = 0; + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, tsd_index); + pCheckInfo->numOfBlocks = 0; + if (tsdbSetReadTable(&pQueryHandle->rhelper, pCheckInfo->pTableObj) != TSDB_CODE_SUCCESS) { + code = terrno; + return code; + } + + SBlockIdx* compIndex = pQueryHandle->rhelper.pBlkIdx; + // no data block in this file, try next file + if (compIndex == NULL || compIndex->uid != pCheckInfo->tableId.uid) { + return 0; // no data blocks in the file belongs to pCheckInfo->pTable + } + + if (pCheckInfo->compSize < (int32_t)compIndex->len) { + assert(compIndex->len > 0); + char* t = realloc(pCheckInfo->pCompInfo, compIndex->len); + if (t == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + code = TSDB_CODE_TDB_OUT_OF_MEMORY; + return code; + } + + pCheckInfo->pCompInfo = (SBlockInfo*)t; + pCheckInfo->compSize = compIndex->len; + } + + if (tsdbLoadBlockInfo(&(pQueryHandle->rhelper), (void**)(&pCheckInfo->pCompInfo), + (uint32_t*)(&pCheckInfo->compSize)) < 0) { + return terrno; + } + + // + // TWO PART. shrink no need blocks from all blocks by condition of query + // + shrinkBlocksByQuery(pQueryHandle, pCheckInfo); + (*numOfBlocks) += pCheckInfo->numOfBlocks; + + return 0; +} + +static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlocks) { + // load all the comp offset value for all tables in this file + int32_t code = TSDB_CODE_SUCCESS; + *numOfBlocks = 0; + + pQueryHandle->cost.headFileLoad += 1; + int64_t s = taosGetTimestampUs(); + + size_t numOfTables = 0; + if (pQueryHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) { + code = loadBlockInfo(pQueryHandle, pQueryHandle->activeIndex, numOfBlocks); + } else if (pQueryHandle->loadType == BLOCK_LOAD_OFFSET_SEQ_ORDER) { + numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + + for (int32_t i = 0; i < numOfTables; ++i) { + code = loadBlockInfo(pQueryHandle, i, numOfBlocks); + if (code != TSDB_CODE_SUCCESS) { + int64_t e = taosGetTimestampUs(); + + pQueryHandle->cost.headFileLoadTime += (e - s); + return code; + } + } + } else { + assert(0); + } + + int64_t e = taosGetTimestampUs(); + pQueryHandle->cost.headFileLoadTime += (e - s); + return code; +} + +static int32_t doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) { + int64_t st = taosGetTimestampUs(); + + STSchema *pSchema = tsdbGetTableSchema(pCheckInfo->pTableObj); + int32_t code = tdInitDataCols(pQueryHandle->pDataCols, pSchema); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p failed to malloc buf for pDataCols, 0x%"PRIx64, pQueryHandle, pQueryHandle->qId); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _error; + } + + code = tdInitDataCols(pQueryHandle->rhelper.pDCols[0], pSchema); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p failed to malloc buf for rhelper.pDataCols[0], 0x%"PRIx64, pQueryHandle, pQueryHandle->qId); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _error; + } + + code = tdInitDataCols(pQueryHandle->rhelper.pDCols[1], pSchema); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p failed to malloc buf for rhelper.pDataCols[1], 0x%"PRIx64, pQueryHandle, pQueryHandle->qId); + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + goto _error; + } + + int16_t* colIds = pQueryHandle->defaultLoadColumn->pData; + + int32_t ret = tsdbLoadBlockDataCols(&(pQueryHandle->rhelper), pBlock, pCheckInfo->pCompInfo, colIds, (int)(QH_GET_NUM_OF_COLS(pQueryHandle))); + if (ret != TSDB_CODE_SUCCESS) { + int32_t c = terrno; + assert(c != TSDB_CODE_SUCCESS); + goto _error; + } + + SDataBlockLoadInfo* pBlockLoadInfo = &pQueryHandle->dataBlockLoadInfo; + + pBlockLoadInfo->fileGroup = pQueryHandle->pFileGroup; + pBlockLoadInfo->slot = pQueryHandle->cur.slot; + pBlockLoadInfo->tid = pCheckInfo->pTableObj->tableId.tid; + + SDataCols* pCols = pQueryHandle->rhelper.pDCols[0]; + assert(pCols->numOfRows != 0 && pCols->numOfRows <= pBlock->numOfRows); + + pBlock->numOfRows = pCols->numOfRows; + + // Convert from TKEY to TSKEY for primary timestamp column if current block has timestamp before 1970-01-01T00:00:00Z + if(pBlock->keyFirst < 0 && colIds[0] == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + int64_t* src = pCols->cols[0].pData; + for(int32_t i = 0; i < pBlock->numOfRows; ++i) { + src[i] = tdGetKey(src[i]); + } + } + + int64_t elapsedTime = (taosGetTimestampUs() - st); + pQueryHandle->cost.blockLoadTime += elapsedTime; + + tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, elapsed time:%"PRId64 " us, 0x%"PRIx64, + pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pQueryHandle->qId); + return TSDB_CODE_SUCCESS; + +_error: + pBlock->numOfRows = 0; + + tsdbError("%p error occurs in loading file block, index:%d, brange:%"PRId64"-%"PRId64", rows:%d, 0x%"PRIx64, + pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, pQueryHandle->qId); + return terrno; +} + +static int32_t getEndPosInDataBlock(STsdbQueryHandle* pQueryHandle, SDataBlockInfo* pBlockInfo); +static int32_t doCopyRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end); +static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols); +static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle); +static void copyAllRemainRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos); + +static int32_t handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo){ + SQueryFilePos* cur = &pQueryHandle->cur; + STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; + SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); + TSKEY key; + int32_t code = TSDB_CODE_SUCCESS; + + /*bool hasData = */ initTableMemIterator(pQueryHandle, pCheckInfo); + assert(cur->pos >= 0 && cur->pos <= binfo.rows); + + key = extractFirstTraverseKey(pCheckInfo, pQueryHandle->order, pCfg->update); + + if (key != TSKEY_INITIAL_VAL) { + tsdbDebug("%p key in mem:%"PRId64", 0x%"PRIx64, pQueryHandle, key, pQueryHandle->qId); + } else { + tsdbDebug("%p no data in mem, 0x%"PRIx64, pQueryHandle, pQueryHandle->qId); + } + + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) || + (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) { + + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) || + (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) { + + // do not load file block into buffer + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; + + TSKEY maxKey = ASCENDING_TRAVERSE(pQueryHandle->order)? (binfo.window.skey - step):(binfo.window.ekey - step); + cur->rows = tsdbReadRowsFromCache(pCheckInfo, maxKey, pQueryHandle->outputCapacity, &cur->win, pQueryHandle); + pQueryHandle->realNumOfRows = cur->rows; + + // update the last key value + pCheckInfo->lastKey = cur->win.ekey + step; + if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { + SWAP(cur->win.skey, cur->win.ekey, TSKEY); + } + + cur->mixBlock = true; + cur->blockCompleted = false; + return code; + } + + + // return error, add test cases + if ((code = doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); + } else { + /* + * no data in cache, only load data from file + * during the query processing, data in cache will not be checked anymore. + * + * Here the buffer is not enough, so only part of file block can be loaded into memory buffer + */ + assert(pQueryHandle->outputCapacity >= binfo.rows); + int32_t endPos = getEndPosInDataBlock(pQueryHandle, &binfo); + + if ((cur->pos == 0 && endPos == binfo.rows -1 && ASCENDING_TRAVERSE(pQueryHandle->order)) || + (cur->pos == (binfo.rows - 1) && endPos == 0 && (!ASCENDING_TRAVERSE(pQueryHandle->order)))) { + pQueryHandle->realNumOfRows = binfo.rows; + + cur->rows = binfo.rows; + cur->win = binfo.window; + cur->mixBlock = false; + cur->blockCompleted = true; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + cur->lastKey = binfo.window.ekey + 1; + cur->pos = binfo.rows; + } else { + cur->lastKey = binfo.window.skey - 1; + cur->pos = -1; + } + } else { // partially copy to dest buffer + copyAllRemainRowsFromFileBlock(pQueryHandle, pCheckInfo, &binfo, endPos); + cur->mixBlock = true; + } + + assert(cur->blockCompleted); + if (cur->rows == binfo.rows) { + tsdbDebug("%p whole file block qualified, brange:%"PRId64"-%"PRId64", rows:%d, lastKey:%"PRId64", tid:%d, %"PRIx64, + pQueryHandle, cur->win.skey, cur->win.ekey, cur->rows, cur->lastKey, binfo.tid, pQueryHandle->qId); + } else { + tsdbDebug("%p create data block from remain file block, brange:%"PRId64"-%"PRId64", rows:%d, total:%d, lastKey:%"PRId64", tid:%d, %"PRIx64, + pQueryHandle, cur->win.skey, cur->win.ekey, cur->rows, binfo.rows, cur->lastKey, binfo.tid, pQueryHandle->qId); + } + + } + + return code; +} + +static int32_t loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SBlock* pBlock, STableCheckInfo* pCheckInfo, bool* exists) { + SQueryFilePos* cur = &pQueryHandle->cur; + int32_t code = TSDB_CODE_SUCCESS; + bool asc = ASCENDING_TRAVERSE(pQueryHandle->order); + + if (asc) { + // query ended in/started from current block + if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) { + if ((code = doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) { + *exists = false; + return code; + } + + SDataCols* pTSCol = pQueryHandle->rhelper.pDCols[0]; + assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows); + + if (pCheckInfo->lastKey > pBlock->keyFirst) { + cur->pos = + binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); + } else { + cur->pos = 0; + } + + assert(pCheckInfo->lastKey <= pBlock->keyLast); + doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); + } else { // the whole block is loaded in to buffer + cur->pos = asc? 0:(pBlock->numOfRows - 1); + code = handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); + } + } else { //desc order, query ended in current block + if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) { + if ((code = doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) != TSDB_CODE_SUCCESS) { + *exists = false; + return code; + } + + SDataCols* pTsCol = pQueryHandle->rhelper.pDCols[0]; + if (pCheckInfo->lastKey < pBlock->keyLast) { + cur->pos = binarySearchForKey(pTsCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); + } else { + cur->pos = pBlock->numOfRows - 1; + } + + assert(pCheckInfo->lastKey >= pBlock->keyFirst); + doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); + } else { + cur->pos = asc? 0:(pBlock->numOfRows-1); + code = handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); + } + } + + *exists = pQueryHandle->realNumOfRows > 0; + return code; +} + +// search last keyList[ret] < key order asc and keyList[ret] > key order desc +static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { + // start end posistion + int s, e; + s = pos; + + // check + assert(pos >=0 && pos < num); + assert(num > 0); + + if (order == TSDB_ORDER_ASC) { + // find the first position which is smaller than the key + e = num - 1; + if (key < keyList[pos]) + return -1; + while (1) { + // check can return + if (key >= keyList[e]) + return e; + if (key <= keyList[s]) + return s; + if (e - s <= 1) + return s; + + // change start or end position + int mid = s + (e - s + 1)/2; + if (keyList[mid] > key) + e = mid; + else if(keyList[mid] < key) + s = mid; + else + return mid; + } + } else { // DESC + // find the first position which is bigger than the key + e = 0; + if (key > keyList[pos]) + return -1; + while (1) { + // check can return + if (key <= keyList[e]) + return e; + if (key >= keyList[s]) + return s; + if (s - e <= 1) + return s; + + // change start or end position + int mid = s - (s - e + 1)/2; + if (keyList[mid] < key) + e = mid; + else if(keyList[mid] > key) + s = mid; + else + return mid; + } + } +} + +static int32_t doCopyRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) { + char* pData = NULL; + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1; + + SDataCols* pCols = pQueryHandle->rhelper.pDCols[0]; + TSKEY* tsArray = pCols->cols[0].pData; + + int32_t num = end - start + 1; + assert(num >= 0); + + if (num == 0) { + return numOfRows; + } + + int32_t requiredNumOfCols = (int32_t)taosArrayGetSize(pQueryHandle->pColumns); + + //data in buffer has greater timestamp, copy data in file block + int32_t i = 0, j = 0; + while(i < requiredNumOfCols && j < pCols->numOfCols) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + + SDataCol* src = &pCols->cols[j]; + if (src->colId < pColInfo->info.colId) { + j++; + continue; + } + + int32_t bytes = pColInfo->info.bytes; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes; + } else { + pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes; + } + + if (!isAllRowsNull(src) && pColInfo->info.colId == src->colId) { + if (pColInfo->info.type != TSDB_DATA_TYPE_BINARY && pColInfo->info.type != TSDB_DATA_TYPE_NCHAR) { + memmove(pData, (char*)src->pData + bytes * start, bytes * num); + } else { // handle the var-string + char* dst = pData; + + // todo refactor, only copy one-by-one + for (int32_t k = start; k < num + start; ++k) { + const char* p = tdGetColDataOfRow(src, k); + memcpy(dst, p, varDataTLen(p)); + dst += bytes; + } + } + + j++; + i++; + } else { // pColInfo->info.colId < src->colId, it is a NULL data + if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) { + char* dst = pData; + + for(int32_t k = start; k < num + start; ++k) { + setVardataNull(dst, pColInfo->info.type); + dst += bytes; + } + } else { + setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num); + } + i++; + } + } + + while (i < requiredNumOfCols) { // the remain columns are all null data + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes; + } else { + pData = (char*)pColInfo->pData + (capacity - numOfRows - num) * pColInfo->info.bytes; + } + + if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) { + char* dst = pData; + + for(int32_t k = start; k < num + start; ++k) { + setVardataNull(dst, pColInfo->info.type); + dst += pColInfo->info.bytes; + } + } else { + setNullN(pData, pColInfo->info.type, pColInfo->info.bytes, num); + } + + i++; + } + + pQueryHandle->cur.win.ekey = tsArray[end]; + pQueryHandle->cur.lastKey = tsArray[end] + step; + + return numOfRows + num; +} + +// Note: row1 always has high priority +static void mergeTwoRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, + SMemRow row1, SMemRow row2, int32_t numOfCols, STable* pTable, + STSchema* pSchema1, STSchema* pSchema2, bool forceSetNull) { + char* pData = NULL; + STSchema* pSchema; + SMemRow row; + int16_t colId; + int16_t offset; + + bool isRow1DataRow = isDataRow(row1); + bool isRow2DataRow = false; + bool isChosenRowDataRow; + int32_t chosen_itr; + void *value; + + // the schema version info is embeded in SDataRow + int32_t numOfColsOfRow1 = 0; + + if (pSchema1 == NULL) { + pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1), (int8_t)memRowType(row1)); + } + if(isRow1DataRow) { + numOfColsOfRow1 = schemaNCols(pSchema1); + } else { + numOfColsOfRow1 = kvRowNCols(memRowKvBody(row1)); + } + + int32_t numOfColsOfRow2 = 0; + if(row2) { + isRow2DataRow = isDataRow(row2); + if (pSchema2 == NULL) { + pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2), (int8_t)memRowType(row2)); + } + if(isRow2DataRow) { + numOfColsOfRow2 = schemaNCols(pSchema2); + } else { + numOfColsOfRow2 = kvRowNCols(memRowKvBody(row2)); + } + } + + + int32_t i = 0, j = 0, k = 0; + while(i < numOfCols && (j < numOfColsOfRow1 || k < numOfColsOfRow2)) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes; + } else { + pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes; + } + + int32_t colIdOfRow1; + if(j >= numOfColsOfRow1) { + colIdOfRow1 = INT32_MAX; + } else if(isRow1DataRow) { + colIdOfRow1 = pSchema1->columns[j].colId; + } else { + void *rowBody = memRowKvBody(row1); + SColIdx *pColIdx = kvRowColIdxAt(rowBody, j); + colIdOfRow1 = pColIdx->colId; + } + + int32_t colIdOfRow2; + if(k >= numOfColsOfRow2) { + colIdOfRow2 = INT32_MAX; + } else if(isRow2DataRow) { + colIdOfRow2 = pSchema2->columns[k].colId; + } else { + void *rowBody = memRowKvBody(row2); + SColIdx *pColIdx = kvRowColIdxAt(rowBody, k); + colIdOfRow2 = pColIdx->colId; + } + + if(colIdOfRow1 == colIdOfRow2) { + if(colIdOfRow1 < pColInfo->info.colId) { + j++; + k++; + continue; + } + row = row1; + pSchema = pSchema1; + isChosenRowDataRow = isRow1DataRow; + chosen_itr = j; + } else if(colIdOfRow1 < colIdOfRow2) { + if(colIdOfRow1 < pColInfo->info.colId) { + j++; + continue; + } + row = row1; + pSchema = pSchema1; + isChosenRowDataRow = isRow1DataRow; + chosen_itr = j; + } else { + if(colIdOfRow2 < pColInfo->info.colId) { + k++; + continue; + } + row = row2; + pSchema = pSchema2; + chosen_itr = k; + isChosenRowDataRow = isRow2DataRow; + } + if(isChosenRowDataRow) { + colId = pSchema->columns[chosen_itr].colId; + offset = pSchema->columns[chosen_itr].offset; + void *rowBody = memRowDataBody(row); + value = tdGetRowDataOfCol(rowBody, (int8_t)pColInfo->info.type, TD_DATA_ROW_HEAD_SIZE + offset); + } else { + void *rowBody = memRowKvBody(row); + SColIdx *pColIdx = kvRowColIdxAt(rowBody, chosen_itr); + colId = pColIdx->colId; + offset = pColIdx->offset; + value = tdGetKvRowDataOfCol(rowBody, offset); + } + + + if (colId == pColInfo->info.colId) { + if(forceSetNull || (!isNull(value, (int8_t)pColInfo->info.type))) { + switch (pColInfo->info.type) { + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: + memcpy(pData, value, varDataTLen(value)); + break; + case TSDB_DATA_TYPE_NULL: + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: + *(uint8_t *)pData = *(uint8_t *)value; + break; + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: + *(uint16_t *)pData = *(uint16_t *)value; + break; + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: + *(uint32_t *)pData = *(uint32_t *)value; + break; + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: + *(uint64_t *)pData = *(uint64_t *)value; + break; + case TSDB_DATA_TYPE_FLOAT: + SET_FLOAT_PTR(pData, value); + break; + case TSDB_DATA_TYPE_DOUBLE: + SET_DOUBLE_PTR(pData, value); + break; + case TSDB_DATA_TYPE_TIMESTAMP: + if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + *(TSKEY *)pData = tdGetKey(*(TKEY *)value); + } else { + *(TSKEY *)pData = *(TSKEY *)value; + } + break; + default: + memcpy(pData, value, pColInfo->info.bytes); + } + } + i++; + + if(row == row1) { + j++; + } else { + k++; + } + } else { + if(forceSetNull) { + if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) { + setVardataNull(pData, pColInfo->info.type); + } else { + setNull(pData, pColInfo->info.type, pColInfo->info.bytes); + } + } + i++; + } + } + + if(forceSetNull) { + while (i < numOfCols) { // the remain columns are all null data + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes; + } else { + pData = (char*)pColInfo->pData + (capacity - numOfRows - 1) * pColInfo->info.bytes; + } + + if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) { + setVardataNull(pData, pColInfo->info.type); + } else { + setNull(pData, pColInfo->info.type, pColInfo->info.bytes); + } + + i++; + } + } +} + +static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) { + if (numOfRows == 0 || ASCENDING_TRAVERSE(pQueryHandle->order)) { + return; + } + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + if (numOfRows < pQueryHandle->outputCapacity) { + int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); + } + } +} + +static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos, int32_t endPos, int32_t numOfExisted, + int32_t* start, int32_t* end) { + *start = -1; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + int32_t remain = endPos - startPos + 1; + if (remain + numOfExisted > pQueryHandle->outputCapacity) { + *end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1; + } else { + *end = endPos; + } + + *start = startPos; + } else { + int32_t remain = (startPos - endPos) + 1; + if (remain + numOfExisted > pQueryHandle->outputCapacity) { + *end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted); + } else { + *end = endPos; + } + + *start = *end; + *end = startPos; + } +} + +static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) { + SQueryFilePos* cur = &pQueryHandle->cur; + + pCheckInfo->lastKey = cur->lastKey; + pQueryHandle->realNumOfRows = numOfRows; + cur->rows = numOfRows; + cur->pos = endPos; +} + +static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle) { + SQueryFilePos* cur = &pQueryHandle->cur; + + if (cur->rows > 0) { + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey); + } else { + assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey); + } + + SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0); + assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]); + } else { + cur->win = pQueryHandle->window; + + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; + cur->lastKey = pQueryHandle->window.ekey + step; + } +} + +static void copyAllRemainRowsFromFileBlock(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SDataBlockInfo* pBlockInfo, int32_t endPos) { + SQueryFilePos* cur = &pQueryHandle->cur; + + SDataCols* pCols = pQueryHandle->rhelper.pDCols[0]; + TSKEY* tsArray = pCols->cols[0].pData; + + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; + int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pQueryHandle)); + + int32_t pos = cur->pos; + + int32_t start = cur->pos; + int32_t end = endPos; + + if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { + SWAP(start, end, int32_t); + } + + assert(pQueryHandle->outputCapacity >= (end - start + 1)); + int32_t numOfRows = doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, 0, start, end); + + // the time window should always be ascending order: skey <= ekey + cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]}; + cur->mixBlock = (numOfRows != pBlockInfo->rows); + cur->lastKey = tsArray[endPos] + step; + cur->blockCompleted = true; + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + moveDataToFront(pQueryHandle, numOfRows, numOfCols); + + // The value of pos may be -1 or pBlockInfo->rows, and it is invalid in both cases. + pos = endPos + step; + updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); + doCheckGeneratedBlockRange(pQueryHandle); + + tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, 0x%"PRIx64, + pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->mixBlock, cur->win.skey, + cur->win.ekey, cur->rows, pQueryHandle->qId); +} + +int32_t getEndPosInDataBlock(STsdbQueryHandle* pQueryHandle, SDataBlockInfo* pBlockInfo) { + // NOTE: reverse the order to find the end position in data block + int32_t endPos = -1; + + SQueryFilePos* cur = &pQueryHandle->cur; + SDataCols* pCols = pQueryHandle->rhelper.pDCols[0]; + + if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey >= pBlockInfo->window.ekey) { + endPos = pBlockInfo->rows - 1; + cur->mixBlock = (cur->pos != 0); + } else if (!ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey <= pBlockInfo->window.skey) { + endPos = 0; + cur->mixBlock = (cur->pos != pBlockInfo->rows - 1); + } else { + assert(pCols->numOfRows > 0); + int pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0 : pBlockInfo->rows - 1; + endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pos, pQueryHandle->window.ekey, pQueryHandle->order); + assert(endPos != -1); + cur->mixBlock = true; + } + + return endPos; +} + +// only return the qualified data to client in terms of query time window, data rows in the same block but do not +// be included in the query time window will be discarded +static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SBlock* pBlock) { + SQueryFilePos* cur = &pQueryHandle->cur; + SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); + STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; + + initTableMemIterator(pQueryHandle, pCheckInfo); + + SDataCols* pCols = pQueryHandle->rhelper.pDCols[0]; + assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX && + cur->pos >= 0 && cur->pos < pBlock->numOfRows); + + // key read from file + TSKEY* keyFile = pCols->cols[0].pData; + assert(pCols->numOfRows == pBlock->numOfRows && keyFile[0] == pBlock->keyFirst && keyFile[pBlock->numOfRows-1] == pBlock->keyLast); + + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; + int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pQueryHandle)); + + STable* pTable = pCheckInfo->pTableObj; + int32_t endPos = getEndPosInDataBlock(pQueryHandle, &blockInfo); + + + tsdbDebug("%p uid:%" PRIu64",tid:%d start merge data block, file block range:%"PRIu64"-%"PRIu64" rows:%d, start:%d," + "end:%d, 0x%"PRIx64, + pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, blockInfo.window.skey, blockInfo.window.ekey, + blockInfo.rows, cur->pos, endPos, pQueryHandle->qId); + + // compared with the data from in-memory buffer, to generate the correct timestamp array list + int32_t numOfRows = 0; + + int16_t rv1 = -1; + int16_t rv2 = -1; + STSchema* pSchema1 = NULL; + STSchema* pSchema2 = NULL; + + // position in file ->fpos + int32_t pos = cur->pos; + cur->win = TSWINDOW_INITIALIZER; + + // no data in buffer, load data from file directly + if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) { + copyAllRemainRowsFromFileBlock(pQueryHandle, pCheckInfo, &blockInfo, endPos); + return; + } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) { + SSkipListNode* node = NULL; + do { + SMemRow row2 = NULL; + SMemRow row1 = getSMemRowInTableMem(pCheckInfo, pQueryHandle->order, pCfg->update, &row2); + if (row1 == NULL) { + break; + } + + TSKEY keyMem = memRowKey(row1); + if ((keyMem > pQueryHandle->window.ekey && ASCENDING_TRAVERSE(pQueryHandle->order)) || + (keyMem < pQueryHandle->window.ekey && !ASCENDING_TRAVERSE(pQueryHandle->order))) { + break; + } + + // break if pos not in this block endPos range. note old code when pos is -1 can crash. + if(ASCENDING_TRAVERSE(pQueryHandle->order)) { //ASC + if(pos > endPos || keyFile[pos] > pQueryHandle->window.ekey) + break; + } else { //DESC + if(pos < endPos || keyFile[pos] < pQueryHandle->window.ekey) + break; + } + + if ((keyMem < keyFile[pos] && ASCENDING_TRAVERSE(pQueryHandle->order)) || + (keyMem > keyFile[pos] && !ASCENDING_TRAVERSE(pQueryHandle->order))) { + if (rv1 != memRowVersion(row1)) { + pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1), (int8_t)memRowType(row1)); + rv1 = memRowVersion(row1); + } + if(row2 && rv2 != memRowVersion(row2)) { + pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2), (int8_t)memRowType(row2)); + rv2 = memRowVersion(row2); + } + + mergeTwoRowFromMem(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pTable, pSchema1, pSchema2, true); + numOfRows += 1; + // record start key with memory key if not + if (cur->win.skey == TSKEY_INITIAL_VAL) { + cur->win.skey = keyMem; + } + + cur->win.ekey = keyMem; + cur->lastKey = keyMem + step; + cur->mixBlock = true; + + moveToNextRowInMem(pCheckInfo); + // same select mem key if update is true + } else if (keyMem == keyFile[pos]) { + if (pCfg->update) { + if(pCfg->update == TD_ROW_PARTIAL_UPDATE) { + doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, pos, pos); + } + if (rv1 != memRowVersion(row1)) { + pSchema1 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row1), (int8_t)memRowType(row1)); + rv1 = memRowVersion(row1); + } + if(row2 && rv2 != memRowVersion(row2)) { + pSchema2 = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row2), (int8_t)memRowType(row2)); + rv2 = memRowVersion(row2); + } + + bool forceSetNull = pCfg->update != TD_ROW_PARTIAL_UPDATE; + mergeTwoRowFromMem(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, row1, row2, numOfCols, pTable, pSchema1, pSchema2, forceSetNull); + numOfRows += 1; + if (cur->win.skey == TSKEY_INITIAL_VAL) { + cur->win.skey = keyMem; + } + + cur->win.ekey = keyMem; + cur->lastKey = keyMem + step; + cur->mixBlock = true; + + //mem move next + moveToNextRowInMem(pCheckInfo); + //file move next, discard file row + pos += step; + } else { + // not update, only mem move to next, discard mem row + moveToNextRowInMem(pCheckInfo); + } + // put file row + } else if ((keyMem > keyFile[pos] && ASCENDING_TRAVERSE(pQueryHandle->order)) || + (keyMem < keyFile[pos] && !ASCENDING_TRAVERSE(pQueryHandle->order))) { + if (cur->win.skey == TSKEY_INITIAL_VAL) { + cur->win.skey = keyFile[pos]; + } + + int32_t end = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pos, keyMem, pQueryHandle->order); + assert(end != -1); + + if (keyFile[end] == keyMem) { // the value of key in cache equals to the end timestamp value, ignore it + if (pCfg->update == TD_ROW_DISCARD_UPDATE) { + moveToNextRowInMem(pCheckInfo); + } else { + // can update, don't copy then deal on next loop with keyMem == keyFile[pos] + end -= step; + } + } + + int32_t qstart = 0, qend = 0; + getQualifiedRowsPos(pQueryHandle, pos, end, numOfRows, &qstart, &qend); + + if(qend >= qstart) { + // copy qend - qstart + 1 rows from file + numOfRows = doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend); + int32_t num = qend - qstart + 1; + pos += num * step; + } else { + // nothing copy from file + pos += step; + } + + cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? keyFile[qend] : keyFile[qstart]; + cur->lastKey = cur->win.ekey + step; + } + } while (numOfRows < pQueryHandle->outputCapacity); + + if (numOfRows < pQueryHandle->outputCapacity) { + /** + * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT + * copy them all to result buffer, since it may be overlapped with file data block. + */ + if (node == NULL || + ((memRowKey((SMemRow)SL_GET_NODE_DATA(node)) > pQueryHandle->window.ekey) && + ASCENDING_TRAVERSE(pQueryHandle->order)) || + ((memRowKey((SMemRow)SL_GET_NODE_DATA(node)) < pQueryHandle->window.ekey) && + !ASCENDING_TRAVERSE(pQueryHandle->order))) { + // no data in cache or data in cache is greater than the ekey of time window, load data from file block + if (cur->win.skey == TSKEY_INITIAL_VAL) { + cur->win.skey = keyFile[pos]; + } + + int32_t start = -1, end = -1; + getQualifiedRowsPos(pQueryHandle, pos, endPos, numOfRows, &start, &end); + + numOfRows = doCopyRowsFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); + pos += (end - start + 1) * step; + + cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? keyFile[end] : keyFile[start]; + cur->lastKey = cur->win.ekey + step; + cur->mixBlock = true; + } + } + } + + cur->blockCompleted = + (((pos > endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || + ((pos < endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); + + if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { + SWAP(cur->win.skey, cur->win.ekey, TSKEY); + } + + moveDataToFront(pQueryHandle, numOfRows, numOfCols); + updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); + doCheckGeneratedBlockRange(pQueryHandle); + + tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, mixblock:%d, brange:%"PRIu64"-%"PRIu64" rows:%d, 0x%"PRIx64, + pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->mixBlock, cur->win.skey, + cur->win.ekey, cur->rows, pQueryHandle->qId); +} + +int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order) { + int firstPos, lastPos, midPos = -1; + int numOfRows; + TSKEY* keyList; + + if (num <= 0) return -1; + + keyList = (TSKEY*)pValue; + firstPos = 0; + lastPos = num - 1; + + if (order == TSDB_ORDER_DESC) { + // find the first position which is smaller than the key + while (1) { + if (key >= keyList[lastPos]) return lastPos; + if (key == keyList[firstPos]) return firstPos; + if (key < keyList[firstPos]) return firstPos - 1; + + numOfRows = lastPos - firstPos + 1; + midPos = (numOfRows >> 1) + firstPos; + + if (key < keyList[midPos]) { + lastPos = midPos - 1; + } else if (key > keyList[midPos]) { + firstPos = midPos + 1; + } else { + break; + } + } + + } else { + // find the first position which is bigger than the key + while (1) { + if (key <= keyList[firstPos]) return firstPos; + if (key == keyList[lastPos]) return lastPos; + + if (key > keyList[lastPos]) { + lastPos = lastPos + 1; + if (lastPos >= num) + return -1; + else + return lastPos; + } + + numOfRows = lastPos - firstPos + 1; + midPos = (numOfRows >> 1) + firstPos; + + if (key < keyList[midPos]) { + lastPos = midPos - 1; + } else if (key > keyList[midPos]) { + firstPos = midPos + 1; + } else { + break; + } + } + } + + return midPos; +} + +static void cleanBlockOrderSupporter(SBlockOrderSupporter* pSupporter, int32_t numOfTables) { + tfree(pSupporter->numOfBlocksPerTable); + tfree(pSupporter->blockIndexArray); + + for (int32_t i = 0; i < numOfTables; ++i) { + STableBlockInfo* pBlockInfo = pSupporter->pDataBlockInfo[i]; + tfree(pBlockInfo); + } + + tfree(pSupporter->pDataBlockInfo); +} + +static int32_t dataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) { + int32_t leftTableIndex = *(int32_t*)pLeft; + int32_t rightTableIndex = *(int32_t*)pRight; + + SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param; + + int32_t leftTableBlockIndex = pSupporter->blockIndexArray[leftTableIndex]; + int32_t rightTableBlockIndex = pSupporter->blockIndexArray[rightTableIndex]; + + if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftTableIndex]) { + /* left block is empty */ + return 1; + } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightTableIndex]) { + /* right block is empty */ + return -1; + } + + STableBlockInfo* pLeftBlockInfoEx = &pSupporter->pDataBlockInfo[leftTableIndex][leftTableBlockIndex]; + STableBlockInfo* pRightBlockInfoEx = &pSupporter->pDataBlockInfo[rightTableIndex][rightTableBlockIndex]; + + // assert(pLeftBlockInfoEx->compBlock->offset != pRightBlockInfoEx->compBlock->offset); +#if 0 // TODO: temporarily comment off requested by Dr. Liao + if (pLeftBlockInfoEx->compBlock->offset == pRightBlockInfoEx->compBlock->offset && + pLeftBlockInfoEx->compBlock->last == pRightBlockInfoEx->compBlock->last) { + tsdbError("error in header file, two block with same offset:%" PRId64, (int64_t)pLeftBlockInfoEx->compBlock->offset); + } +#endif + + return pLeftBlockInfoEx->compBlock->offset > pRightBlockInfoEx->compBlock->offset ? 1 : -1; +} + +static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numOfBlocks, int32_t* numOfAllocBlocks) { + size_t size = sizeof(STableBlockInfo) * numOfBlocks; + + if (pQueryHandle->allocSize < size) { + pQueryHandle->allocSize = (int32_t)size; + char* tmp = realloc(pQueryHandle->pDataBlockInfo, pQueryHandle->allocSize); + if (tmp == NULL) { + return TSDB_CODE_TDB_OUT_OF_MEMORY; + } + + pQueryHandle->pDataBlockInfo = (STableBlockInfo*) tmp; + } + + memset(pQueryHandle->pDataBlockInfo, 0, size); + *numOfAllocBlocks = numOfBlocks; + + // access data blocks according to the offset of each block in asc/desc order. + int32_t numOfTables = (int32_t)taosArrayGetSize(pQueryHandle->pTableCheckInfo); + + SBlockOrderSupporter sup = {0}; + sup.numOfTables = numOfTables; + sup.numOfBlocksPerTable = calloc(1, sizeof(int32_t) * numOfTables); + sup.blockIndexArray = calloc(1, sizeof(int32_t) * numOfTables); + sup.pDataBlockInfo = calloc(1, POINTER_BYTES * numOfTables); + + if (sup.numOfBlocksPerTable == NULL || sup.blockIndexArray == NULL || sup.pDataBlockInfo == NULL) { + cleanBlockOrderSupporter(&sup, 0); + return TSDB_CODE_TDB_OUT_OF_MEMORY; + } + + int32_t cnt = 0; + int32_t numOfQualTables = 0; + + for (int32_t j = 0; j < numOfTables; ++j) { + STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j); + if (pTableCheck->numOfBlocks <= 0) { + continue; + } + + SBlock* pBlock = pTableCheck->pCompInfo->blocks; + sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks; + + char* buf = malloc(sizeof(STableBlockInfo) * pTableCheck->numOfBlocks); + if (buf == NULL) { + cleanBlockOrderSupporter(&sup, numOfQualTables); + return TSDB_CODE_TDB_OUT_OF_MEMORY; + } + + sup.pDataBlockInfo[numOfQualTables] = (STableBlockInfo*)buf; + + for (int32_t k = 0; k < pTableCheck->numOfBlocks; ++k) { + STableBlockInfo* pBlockInfo = &sup.pDataBlockInfo[numOfQualTables][k]; + + pBlockInfo->compBlock = &pBlock[k]; + pBlockInfo->pTableCheckInfo = pTableCheck; + cnt++; + } + + numOfQualTables++; + } + + assert(numOfBlocks == cnt); + + // since there is only one table qualified, blocks are not sorted + if (numOfQualTables == 1) { + memcpy(pQueryHandle->pDataBlockInfo, sup.pDataBlockInfo[0], sizeof(STableBlockInfo) * numOfBlocks); + cleanBlockOrderSupporter(&sup, numOfQualTables); + + tsdbDebug("%p create data blocks info struct completed for 1 table, %d blocks not sorted 0x%"PRIx64, pQueryHandle, cnt, + pQueryHandle->qId); + return TSDB_CODE_SUCCESS; + } + + tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables 0x%"PRIx64, pQueryHandle, cnt, + numOfQualTables, pQueryHandle->qId); + + assert(cnt <= numOfBlocks && numOfQualTables <= numOfTables); // the pTableQueryInfo[j]->numOfBlocks may be 0 + sup.numOfTables = numOfQualTables; + + SLoserTreeInfo* pTree = NULL; + uint8_t ret = tLoserTreeCreate(&pTree, sup.numOfTables, &sup, dataBlockOrderCompar); + if (ret != TSDB_CODE_SUCCESS) { + cleanBlockOrderSupporter(&sup, numOfTables); + return TSDB_CODE_TDB_OUT_OF_MEMORY; + } + + int32_t numOfTotal = 0; + + while (numOfTotal < cnt) { + int32_t pos = pTree->pNode[0].index; + int32_t index = sup.blockIndexArray[pos]++; + + STableBlockInfo* pBlocksInfo = sup.pDataBlockInfo[pos]; + pQueryHandle->pDataBlockInfo[numOfTotal++] = pBlocksInfo[index]; + + // set data block index overflow, in order to disable the offset comparator + if (sup.blockIndexArray[pos] >= sup.numOfBlocksPerTable[pos]) { + sup.blockIndexArray[pos] = sup.numOfBlocksPerTable[pos] + 1; + } + + tLoserTreeAdjust(pTree, pos + sup.numOfTables); + } + + /* + * available when no import exists + * for(int32_t i = 0; i < cnt - 1; ++i) { + * assert((*pDataBlockInfo)[i].compBlock->offset < (*pDataBlockInfo)[i+1].compBlock->offset); + * } + */ + + tsdbDebug("%p %d data blocks sort completed, 0x%"PRIx64, pQueryHandle, cnt, pQueryHandle->qId); + cleanBlockOrderSupporter(&sup, numOfTables); + free(pTree); + + return TSDB_CODE_SUCCESS; +} + +static int32_t getFirstFileDataBlock(STsdbQueryHandle* pQueryHandle, bool* exists); + +static int32_t getDataBlockRv(STsdbQueryHandle* pQueryHandle, STableBlockInfo* pNext, bool *exists) { + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1; + SQueryFilePos* cur = &pQueryHandle->cur; + + while(1) { + int32_t code = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo, exists); + if (code != TSDB_CODE_SUCCESS || *exists) { + return code; + } + + if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) || + (cur->slot == 0 && !ASCENDING_TRAVERSE(pQueryHandle->order))) { + // all data blocks in current file has been checked already, try next file if exists + return getFirstFileDataBlock(pQueryHandle, exists); + } else { // next block of the same file + cur->slot += step; + cur->mixBlock = false; + cur->blockCompleted = false; + pNext = &pQueryHandle->pDataBlockInfo[cur->slot]; + } + } +} + +static int32_t getFirstFileDataBlock(STsdbQueryHandle* pQueryHandle, bool* exists) { + pQueryHandle->numOfBlocks = 0; + SQueryFilePos* cur = &pQueryHandle->cur; + + int32_t code = TSDB_CODE_SUCCESS; + + int32_t numOfBlocks = 0; + int32_t numOfTables = (int32_t)taosArrayGetSize(pQueryHandle->pTableCheckInfo); + + STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; + STimeWindow win = TSWINDOW_INITIALIZER; + + while (true) { + tsdbRLockFS(REPO_FS(pQueryHandle->pTsdb)); + + if ((pQueryHandle->pFileGroup = tsdbFSIterNext(&pQueryHandle->fileIter)) == NULL) { + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + break; + } + + tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pQueryHandle->pFileGroup->fid, &win.skey, &win.ekey); + + // current file are not overlapped with query time window, ignore remain files + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) || + (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, 0x%"PRIx64, pQueryHandle, + pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qId); + pQueryHandle->pFileGroup = NULL; + assert(pQueryHandle->numOfBlocks == 0); + break; + } + + if (tsdbSetAndOpenReadFSet(&pQueryHandle->rhelper, pQueryHandle->pFileGroup) < 0) { + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + code = terrno; + break; + } + + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + + if (tsdbLoadBlockIdx(&pQueryHandle->rhelper) < 0) { + code = terrno; + break; + } + + if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) { + break; + } + + tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, 0x%"PRIx64, pQueryHandle, numOfBlocks, numOfTables, + pQueryHandle->pFileGroup->fid, pQueryHandle->qId); + + assert(numOfBlocks >= 0); + if (numOfBlocks == 0) { + continue; + } + + // todo return error code to query engine + if ((code = createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) { + break; + } + + assert(numOfBlocks >= pQueryHandle->numOfBlocks); + if (pQueryHandle->numOfBlocks > 0) { + break; + } + } + + // no data in file anymore + if (pQueryHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) { + if (code == TSDB_CODE_SUCCESS) { + assert(pQueryHandle->pFileGroup == NULL); + } + + cur->fid = INT32_MIN; // denote that there are no data in file anymore + *exists = false; + return code; + } + + assert(pQueryHandle->pFileGroup != NULL && pQueryHandle->numOfBlocks > 0); + cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1; + cur->fid = pQueryHandle->pFileGroup->fid; + + STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; + return getDataBlockRv(pQueryHandle, pBlockInfo, exists); +} + +static bool isEndFileDataBlock(SQueryFilePos* cur, int32_t numOfBlocks, bool ascTrav) { + assert(cur != NULL && numOfBlocks > 0); + return (cur->slot == numOfBlocks - 1 && ascTrav) || (cur->slot == 0 && !ascTrav); +} + +static void moveToNextDataBlockInCurrentFile(STsdbQueryHandle* pQueryHandle) { + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1; + + SQueryFilePos* cur = &pQueryHandle->cur; + assert(cur->slot < pQueryHandle->numOfBlocks && cur->slot >= 0); + + cur->slot += step; + cur->mixBlock = false; + cur->blockCompleted = false; +} + +int32_t tsdbGetFileBlocksDistInfo(TsdbQueryHandleT* queryHandle, STableBlockDist* pTableBlockInfo) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) queryHandle; + + pTableBlockInfo->totalSize = 0; + pTableBlockInfo->totalRows = 0; + STsdbFS* pFileHandle = REPO_FS(pQueryHandle->pTsdb); + + // find the start data block in file + pQueryHandle->locateStart = true; + STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; + int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision); + + tsdbRLockFS(pFileHandle); + tsdbFSIterInit(&pQueryHandle->fileIter, pFileHandle, pQueryHandle->order); + tsdbFSIterSeek(&pQueryHandle->fileIter, fid); + tsdbUnLockFS(pFileHandle); + + pTableBlockInfo->numOfFiles += 1; + + int32_t code = TSDB_CODE_SUCCESS; + int32_t numOfBlocks = 0; + int32_t numOfTables = (int32_t)taosArrayGetSize(pQueryHandle->pTableCheckInfo); + int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock); + STimeWindow win = TSWINDOW_INITIALIZER; + + while (true) { + numOfBlocks = 0; + tsdbRLockFS(REPO_FS(pQueryHandle->pTsdb)); + + if ((pQueryHandle->pFileGroup = tsdbFSIterNext(&pQueryHandle->fileIter)) == NULL) { + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + break; + } + + tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, pQueryHandle->pFileGroup->fid, &win.skey, &win.ekey); + + // current file are not overlapped with query time window, ignore remain files + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) || + (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, 0x%"PRIx64, pQueryHandle, + pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qId); + pQueryHandle->pFileGroup = NULL; + break; + } + + pTableBlockInfo->numOfFiles += 1; + if (tsdbSetAndOpenReadFSet(&pQueryHandle->rhelper, pQueryHandle->pFileGroup) < 0) { + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + code = terrno; + break; + } + + tsdbUnLockFS(REPO_FS(pQueryHandle->pTsdb)); + + if (tsdbLoadBlockIdx(&pQueryHandle->rhelper) < 0) { + code = terrno; + break; + } + + if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) { + break; + } + + tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, 0x%"PRIx64, pQueryHandle, numOfBlocks, numOfTables, + pQueryHandle->pFileGroup->fid, pQueryHandle->qId); + + if (numOfBlocks == 0) { + continue; + } + + for (int32_t i = 0; i < numOfTables; ++i) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); + + SBlock* pBlock = pCheckInfo->pCompInfo->blocks; + for (int32_t j = 0; j < pCheckInfo->numOfBlocks; ++j) { + pTableBlockInfo->totalSize += pBlock[j].len; + + int32_t numOfRows = pBlock[j].numOfRows; + pTableBlockInfo->totalRows += numOfRows; + if (numOfRows > pTableBlockInfo->maxRows) pTableBlockInfo->maxRows = numOfRows; + if (numOfRows < pTableBlockInfo->minRows) pTableBlockInfo->minRows = numOfRows; + if (numOfRows < defaultRows) pTableBlockInfo->numOfSmallBlocks+=1; + int32_t stepIndex = (numOfRows-1)/TSDB_BLOCK_DIST_STEP_ROWS; + SFileBlockInfo *blockInfo = (SFileBlockInfo*)taosArrayGet(pTableBlockInfo->dataBlockInfos, stepIndex); + blockInfo->numBlocksOfStep++; + } + } + } + + return code; +} + +static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists) { + STsdbFS* pFileHandle = REPO_FS(pQueryHandle->pTsdb); + SQueryFilePos* cur = &pQueryHandle->cur; + + // find the start data block in file + if (!pQueryHandle->locateStart) { + pQueryHandle->locateStart = true; + STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; + int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision); + + tsdbRLockFS(pFileHandle); + tsdbFSIterInit(&pQueryHandle->fileIter, pFileHandle, pQueryHandle->order); + tsdbFSIterSeek(&pQueryHandle->fileIter, fid); + tsdbUnLockFS(pFileHandle); + + return getFirstFileDataBlock(pQueryHandle, exists); + } else { + // check if current file block is all consumed + STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; + STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo; + + // current block is done, try next + if ((!cur->mixBlock) || cur->blockCompleted) { + // all data blocks in current file has been checked already, try next file if exists + } else { + tsdbDebug("%p continue in current data block, index:%d, pos:%d, 0x%"PRIx64, pQueryHandle, cur->slot, cur->pos, + pQueryHandle->qId); + int32_t code = handleDataMergeIfNeeded(pQueryHandle, pBlockInfo->compBlock, pCheckInfo); + *exists = (pQueryHandle->realNumOfRows > 0); + + if (code != TSDB_CODE_SUCCESS || *exists) { + return code; + } + } + + // current block is empty, try next block in file + // all data blocks in current file has been checked already, try next file if exists + if (isEndFileDataBlock(cur, pQueryHandle->numOfBlocks, ASCENDING_TRAVERSE(pQueryHandle->order))) { + return getFirstFileDataBlock(pQueryHandle, exists); + } else { + moveToNextDataBlockInCurrentFile(pQueryHandle); + STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot]; + return getDataBlockRv(pQueryHandle, pNext, exists); + } + } +} + +static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) { + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + + while (pQueryHandle->activeIndex < numOfTables) { + if (hasMoreDataInCache(pQueryHandle)) { + return true; + } + + pQueryHandle->activeIndex += 1; + } + + // no data in memtable or imemtable, decrease the memory reference. + // TODO !! +// tsdbMayUnTakeMemSnapshot(pQueryHandle); + return false; +} + +//todo not unref yet, since it is not support multi-group interpolation query +static UNUSED_FUNC void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { + // filter the queried time stamp in the first place + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; + + // starts from the buffer in case of descending timestamp order check data blocks + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + + int32_t i = 0; + while(i < numOfTables) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); + + // the first qualified table for interpolation query + if ((pQueryHandle->window.skey <= pCheckInfo->pTableObj->lastKey) && + (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) { + break; + } + + i++; + } + + // there are no data in all the tables + if (i == numOfTables) { + return; + } + + STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i); + taosArrayClear(pQueryHandle->pTableCheckInfo); + + info.lastKey = pQueryHandle->window.skey; + taosArrayPush(pQueryHandle->pTableCheckInfo, &info); +} + +static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int maxRowsToRead, STimeWindow* win, + STsdbQueryHandle* pQueryHandle) { + int numOfRows = 0; + int32_t numOfCols = (int32_t)taosArrayGetSize(pQueryHandle->pColumns); + STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; + win->skey = TSKEY_INITIAL_VAL; + + int64_t st = taosGetTimestampUs(); + STable* pTable = pCheckInfo->pTableObj; + int16_t rv = -1; + STSchema* pSchema = NULL; + + do { + SMemRow row = getSMemRowInTableMem(pCheckInfo, pQueryHandle->order, pCfg->update, NULL); + if (row == NULL) { + break; + } + + TSKEY key = memRowKey(row); + if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) { + tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey, + pQueryHandle->window.ekey); + + break; + } + + if (win->skey == INT64_MIN) { + win->skey = key; + } + + win->ekey = key; + if (rv != memRowVersion(row)) { + pSchema = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row), (int8_t)memRowType(row)); + rv = memRowVersion(row); + } + mergeTwoRowFromMem(pQueryHandle, maxRowsToRead, numOfRows, row, NULL, numOfCols, pTable, pSchema, NULL, true); + + if (++numOfRows >= maxRowsToRead) { + moveToNextRowInMem(pCheckInfo); + break; + } + + } while(moveToNextRowInMem(pCheckInfo)); + + assert(numOfRows <= maxRowsToRead); + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) { + int32_t emptySize = maxRowsToRead - numOfRows; + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); + } + } + + int64_t elapsedTime = taosGetTimestampUs() - st; + tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, 0x%"PRIx64, pQueryHandle, + elapsedTime, numOfRows, numOfCols, pQueryHandle->qId); + + return numOfRows; +} + +static int32_t getAllTableList(STable* pSuperTable, SArray* list) { + STSchema* pTagSchema = tsdbGetTableTagSchema(pSuperTable); + if(pTagSchema && pTagSchema->numOfCols == 1 && pTagSchema->columns[0].type == TSDB_DATA_TYPE_JSON){ + uint32_t key = TSDB_DATA_JSON_NULL; + char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0}; + jsonKeyMd5(&key, INT_BYTES, keyMd5); + SArray** tablist = (SArray**)taosHashGet(pSuperTable->jsonKeyMap, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN); + + for (int i = 0; i < taosArrayGetSize(*tablist); ++i) { + JsonMapValue* p = taosArrayGet(*tablist, i); + STableKeyInfo info = {.pTable = p->table, .lastKey = TSKEY_INITIAL_VAL}; + taosArrayPush(list, &info); + } + }else{ + SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex); + while (tSkipListIterNext(iter)) { + SSkipListNode* pNode = tSkipListIterGet(iter); + + STable* pTable = (STable*) SL_GET_NODE_DATA((SSkipListNode*) pNode); + + STableKeyInfo info = {.pTable = pTable, .lastKey = TSKEY_INITIAL_VAL}; + taosArrayPush(list, &info); + } + + tSkipListDestroyIter(iter); + } + return TSDB_CODE_SUCCESS; +} + +static bool loadBlockOfActiveTable(STsdbQueryHandle* pQueryHandle) { + if (pQueryHandle->checkFiles) { + // check if the query range overlaps with the file data block + bool exists = true; + + int32_t code = getDataBlocksInFiles(pQueryHandle, &exists); + if (code != TSDB_CODE_SUCCESS) { + pQueryHandle->checkFiles = false; + return false; + } + + if (exists) { + tsdbRetrieveDataBlock((TsdbQueryHandleT*) pQueryHandle, NULL); + if (pQueryHandle->currentLoadExternalRows && pQueryHandle->window.skey == pQueryHandle->window.ekey) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, 0); + assert(*(int64_t*)pColInfo->pData == pQueryHandle->window.skey); + } + + pQueryHandle->currentLoadExternalRows = false; // clear the flag, since the exact matched row is found. + return exists; + } + + pQueryHandle->checkFiles = false; + } + + if (hasMoreDataInCache(pQueryHandle)) { + pQueryHandle->currentLoadExternalRows = false; + return true; + } + + // current result is empty + if (pQueryHandle->currentLoadExternalRows && pQueryHandle->window.skey == pQueryHandle->window.ekey && pQueryHandle->cur.rows == 0) { + SMemRef* pMemRef = pQueryHandle->pMemRef; + + doGetExternalRow(pQueryHandle, TSDB_PREV_ROW, pMemRef); + doGetExternalRow(pQueryHandle, TSDB_NEXT_ROW, pMemRef); + + bool result = tsdbGetExternalRow(pQueryHandle); + + pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev); + pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next); + pQueryHandle->currentLoadExternalRows = false; + + return result; + } + + return false; +} + +static bool loadCachedLastRow(STsdbQueryHandle* pQueryHandle) { + // the last row is cached in buffer, return it directly. + // here note that the pQueryHandle->window must be the TS_INITIALIZER + int32_t numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pQueryHandle)); + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + assert(numOfTables > 0 && numOfCols > 0); + + SQueryFilePos* cur = &pQueryHandle->cur; + + SMemRow pRow = NULL; + TSKEY key = TSKEY_INITIAL_VAL; + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; + + if (++pQueryHandle->activeIndex < numOfTables) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex); + int32_t ret = tsdbGetCachedLastRow(pCheckInfo->pTableObj, &pRow, &key); + if (ret != TSDB_CODE_SUCCESS) { + return false; + } + mergeTwoRowFromMem(pQueryHandle, pQueryHandle->outputCapacity, 0, pRow, NULL, numOfCols, pCheckInfo->pTableObj, NULL, NULL, true); + tfree(pRow); + + // update the last key value + pCheckInfo->lastKey = key + step; + + cur->rows = 1; // only one row + cur->lastKey = key + step; + cur->mixBlock = true; + cur->win.skey = key; + cur->win.ekey = key; + + return true; + } + + return false; +} + + + +static bool loadCachedLast(STsdbQueryHandle* pQueryHandle) { + // the last row is cached in buffer, return it directly. + // here note that the pQueryHandle->window must be the TS_INITIALIZER + int32_t tgNumOfCols = (int32_t)QH_GET_NUM_OF_COLS(pQueryHandle); + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + int32_t numOfRows = 0; + assert(numOfTables > 0 && tgNumOfCols > 0); + SQueryFilePos* cur = &pQueryHandle->cur; + TSKEY priKey = TSKEY_INITIAL_VAL; + int32_t priIdx = -1; + SColumnInfoData* pColInfo = NULL; + + while (++pQueryHandle->activeIndex < numOfTables) { + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex); + STable* pTable = pCheckInfo->pTableObj; + char* pData = NULL; + + int32_t numOfCols = pTable->maxColNum; + + if (pTable->lastCols == NULL || pTable->maxColNum <= 0) { + tsdbWarn("no last cached for table %s, uid:%" PRIu64 ",tid:%d", pTable->name->data, pTable->tableId.uid, pTable->tableId.tid); + continue; + } + + int32_t i = 0, j = 0; + + // lock pTable->lastCols[i] as it would be released when schema update(tsdbUpdateLastColSchema) + TSDB_RLOCK_TABLE(pTable); + while(i < tgNumOfCols && j < numOfCols) { + pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + if (pTable->lastCols[j].colId < pColInfo->info.colId) { + j++; + continue; + } else if (pTable->lastCols[j].colId > pColInfo->info.colId) { + i++; + continue; + } + + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes; + + if (pTable->lastCols[j].bytes > 0) { + void* value = pTable->lastCols[j].pData; + switch (pColInfo->info.type) { + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: + memcpy(pData, value, varDataTLen(value)); + break; + case TSDB_DATA_TYPE_NULL: + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: + *(uint8_t *)pData = *(uint8_t *)value; + break; + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: + *(uint16_t *)pData = *(uint16_t *)value; + break; + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: + *(uint32_t *)pData = *(uint32_t *)value; + break; + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: + *(uint64_t *)pData = *(uint64_t *)value; + break; + case TSDB_DATA_TYPE_FLOAT: + SET_FLOAT_PTR(pData, value); + break; + case TSDB_DATA_TYPE_DOUBLE: + SET_DOUBLE_PTR(pData, value); + break; + case TSDB_DATA_TYPE_TIMESTAMP: + if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + priKey = tdGetKey(*(TKEY *)value); + priIdx = i; + + i++; + j++; + continue; + } else { + *(TSKEY *)pData = *(TSKEY *)value; + } + break; + default: + memcpy(pData, value, pColInfo->info.bytes); + } + + for (int32_t n = 0; n < tgNumOfCols; ++n) { + if (n == i) { + continue; + } + + pColInfo = taosArrayGet(pQueryHandle->pColumns, n); + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;; + + if (pColInfo->info.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + *(TSKEY *)pData = pTable->lastCols[j].ts; + continue; + } + + if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) { + setVardataNull(pData, pColInfo->info.type); + } else { + setNull(pData, pColInfo->info.type, pColInfo->info.bytes); + } + } + + numOfRows++; + assert(numOfRows < pQueryHandle->outputCapacity); + } + + i++; + j++; + } + TSDB_RUNLOCK_TABLE(pTable); + + // leave the real ts column as the last row, because last function only (not stable) use the last row as res + if (priKey != TSKEY_INITIAL_VAL) { + pColInfo = taosArrayGet(pQueryHandle->pColumns, priIdx); + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes; + + *(TSKEY *)pData = priKey; + + for (int32_t n = 0; n < tgNumOfCols; ++n) { + if (n == priIdx) { + continue; + } + + pColInfo = taosArrayGet(pQueryHandle->pColumns, n); + pData = (char*)pColInfo->pData + numOfRows * pColInfo->info.bytes;; + + assert (pColInfo->info.colId != PRIMARYKEY_TIMESTAMP_COL_INDEX); + + if (pColInfo->info.type == TSDB_DATA_TYPE_BINARY || pColInfo->info.type == TSDB_DATA_TYPE_NCHAR) { + setVardataNull(pData, pColInfo->info.type); + } else { + setNull(pData, pColInfo->info.type, pColInfo->info.bytes); + } + } + + numOfRows++; + } + + if (numOfRows > 0) { + cur->rows = numOfRows; + cur->mixBlock = true; + + return true; + } + } + + return false; +} + +void tsdbSwitchTable(TsdbQueryHandleT queryHandle) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) queryHandle; + + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex); + pCheckInfo->numOfBlocks = 0; + + pQueryHandle->locateStart = false; + pQueryHandle->checkFiles = true; + pQueryHandle->cur.rows = 0; + pQueryHandle->currentLoadExternalRows = pQueryHandle->loadExternalRow; + + terrno = TSDB_CODE_SUCCESS; + + ++pQueryHandle->activeIndex; +} + + +static bool loadDataBlockFromTableSeq(STsdbQueryHandle* pQueryHandle) { + size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); + assert(numOfTables > 0); + + int64_t stime = taosGetTimestampUs(); + + while(pQueryHandle->activeIndex < numOfTables) { + if (loadBlockOfActiveTable(pQueryHandle)) { + return true; + } + + STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex); + pCheckInfo->numOfBlocks = 0; + + pQueryHandle->activeIndex += 1; + pQueryHandle->locateStart = false; + pQueryHandle->checkFiles = true; + pQueryHandle->cur.rows = 0; + pQueryHandle->currentLoadExternalRows = pQueryHandle->loadExternalRow; + + terrno = TSDB_CODE_SUCCESS; + + int64_t elapsedTime = taosGetTimestampUs() - stime; + pQueryHandle->cost.checkForNextTime += elapsedTime; + } + + return false; +} + +// handle data in cache situation +bool tsdbNextDataBlock(TsdbQueryHandleT pHandle) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; + if (pQueryHandle == NULL) { + return false; + } + + if (emptyQueryTimewindow(pQueryHandle)) { + tsdbDebug("%p query window not overlaps with the data set, no result returned, 0x%"PRIx64, pQueryHandle, pQueryHandle->qId); + return false; + } + + int64_t stime = taosGetTimestampUs(); + int64_t elapsedTime = stime; + + // TODO refactor: remove "type" + if (pQueryHandle->type == TSDB_QUERY_TYPE_LAST) { + if (pQueryHandle->cachelastrow == TSDB_CACHED_TYPE_LASTROW) { + return loadCachedLastRow(pQueryHandle); + } else if (pQueryHandle->cachelastrow == TSDB_CACHED_TYPE_LAST) { + return loadCachedLast(pQueryHandle); + } + } + + if (pQueryHandle->loadType == BLOCK_LOAD_TABLE_SEQ_ORDER) { + return loadDataBlockFromTableSeq(pQueryHandle); + } else { // loadType == RR and Offset Order + if (pQueryHandle->checkFiles) { + // check if the query range overlaps with the file data block + bool exists = true; + + int32_t code = getDataBlocksInFiles(pQueryHandle, &exists); + if (code != TSDB_CODE_SUCCESS) { + pQueryHandle->activeIndex = 0; + pQueryHandle->checkFiles = false; + + return false; + } + + if (exists) { + pQueryHandle->cost.checkForNextTime += (taosGetTimestampUs() - stime); + return exists; + } + + pQueryHandle->activeIndex = 0; + pQueryHandle->checkFiles = false; + } + + // TODO: opt by consider the scan order + bool ret = doHasDataInBuffer(pQueryHandle); + terrno = TSDB_CODE_SUCCESS; + + elapsedTime = taosGetTimestampUs() - stime; + pQueryHandle->cost.checkForNextTime += elapsedTime; + return ret; + } +} + +static int32_t doGetExternalRow(STsdbQueryHandle* pQueryHandle, int16_t type, SMemRef* pMemRef) { + STsdbQueryHandle* pSecQueryHandle = NULL; + + if (type == TSDB_PREV_ROW && pQueryHandle->prev) { + return TSDB_CODE_SUCCESS; + } + + if (type == TSDB_NEXT_ROW && pQueryHandle->next) { + return TSDB_CODE_SUCCESS; + } + + // prepare the structure + int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pQueryHandle); + + if (type == TSDB_PREV_ROW) { + pQueryHandle->prev = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); + if (pQueryHandle->prev == NULL) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + goto out_of_memory; + } + } else { + pQueryHandle->next = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); + if (pQueryHandle->next == NULL) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + goto out_of_memory; + } + } + + SArray* row = (type == TSDB_PREV_ROW)? pQueryHandle->prev : pQueryHandle->next; + + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); + + SColumnInfoData colInfo = {{0}, 0}; + colInfo.info = pCol->info; + colInfo.pData = calloc(1, pCol->info.bytes); + if (colInfo.pData == NULL) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + goto out_of_memory; + } + + taosArrayPush(row, &colInfo); + } + + // load the previous row + STsdbQueryCond cond = {.numOfCols = numOfCols, .loadExternalRows = false, .type = BLOCK_LOAD_OFFSET_SEQ_ORDER}; + if (type == TSDB_PREV_ROW) { + cond.order = TSDB_ORDER_DESC; + cond.twindow = (STimeWindow){pQueryHandle->window.skey, INT64_MIN}; + } else { + cond.order = TSDB_ORDER_ASC; + cond.twindow = (STimeWindow){pQueryHandle->window.skey, INT64_MAX}; + } + + cond.colList = calloc(cond.numOfCols, sizeof(SColumnInfo)); + if (cond.colList == NULL) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + goto out_of_memory; + } + + for (int32_t i = 0; i < cond.numOfCols; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, i); + memcpy(&cond.colList[i], &pColInfoData->info, sizeof(SColumnInfo)); + } + + pSecQueryHandle = tsdbQueryTablesImpl(pQueryHandle->pTsdb, &cond, pQueryHandle->qId, pMemRef); + tfree(cond.colList); + if (pSecQueryHandle == NULL) { + goto out_of_memory; + } + + // current table, only one table + STableCheckInfo* pCurrent = taosArrayGet(pQueryHandle->pTableCheckInfo, pQueryHandle->activeIndex); + + SArray* psTable = NULL; + pSecQueryHandle->pTableCheckInfo = createCheckInfoFromCheckInfo(pCurrent, pSecQueryHandle->window.skey, &psTable); + if (pSecQueryHandle->pTableCheckInfo == NULL) { + taosArrayDestroy(&psTable); + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + goto out_of_memory; + } + + + tsdbMayTakeMemSnapshot(pSecQueryHandle, psTable); + if (!tsdbNextDataBlock((void*)pSecQueryHandle)) { + // no result in current query, free the corresponding result rows structure + if (type == TSDB_PREV_ROW) { + pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev); + } else { + pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next); + } + + goto out_of_memory; + } + + SDataBlockInfo blockInfo = {{0}, 0}; + tsdbRetrieveDataBlockInfo((void*)pSecQueryHandle, &blockInfo); + tsdbRetrieveDataBlock((void*)pSecQueryHandle, pSecQueryHandle->defaultLoadColumn); + + row = (type == TSDB_PREV_ROW)? pQueryHandle->prev:pQueryHandle->next; + int32_t pos = (type == TSDB_PREV_ROW)?pSecQueryHandle->cur.rows - 1:0; + + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(row, i); + SColumnInfoData* s = taosArrayGet(pSecQueryHandle->pColumns, i); + memcpy((char*)pCol->pData, (char*)s->pData + s->info.bytes * pos, pCol->info.bytes); + } + +out_of_memory: + tsdbCleanupQueryHandle(pSecQueryHandle); + return terrno; +} + +bool tsdbGetExternalRow(TsdbQueryHandleT pHandle) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; + SQueryFilePos* cur = &pQueryHandle->cur; + + cur->fid = INT32_MIN; + cur->mixBlock = true; + if (pQueryHandle->prev == NULL || pQueryHandle->next == NULL) { + cur->rows = 0; + return false; + } + + int32_t numOfCols = (int32_t) QH_GET_NUM_OF_COLS(pQueryHandle); + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, i); + SColumnInfoData* first = taosArrayGet(pQueryHandle->prev, i); + + memcpy(pColInfoData->pData, first->pData, pColInfoData->info.bytes); + + SColumnInfoData* sec = taosArrayGet(pQueryHandle->next, i); + memcpy(((char*)pColInfoData->pData) + pColInfoData->info.bytes, sec->pData, pColInfoData->info.bytes); + + if (i == 0 && pColInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP) { + cur->win.skey = *(TSKEY*)pColInfoData->pData; + cur->win.ekey = *(TSKEY*)(((char*)pColInfoData->pData) + TSDB_KEYSIZE); + } + } + + cur->rows = 2; + return true; +} + +/* + * if lastRow == NULL, return TSDB_CODE_TDB_NO_CACHE_LAST_ROW + * else set pRes and return TSDB_CODE_SUCCESS and save lastKey + */ +int32_t tsdbGetCachedLastRow(STable* pTable, SMemRow* pRes, TSKEY* lastKey) { + int32_t code = TSDB_CODE_SUCCESS; + + TSDB_RLOCK_TABLE(pTable); + + if (!pTable->lastRow) { + code = TSDB_CODE_TDB_NO_CACHE_LAST_ROW; + goto out; + } + + if (pRes) { + *pRes = tdMemRowDup(pTable->lastRow); + if (*pRes == NULL) { + code = TSDB_CODE_TDB_OUT_OF_MEMORY; + } + } + +out: + TSDB_RUNLOCK_TABLE(pTable); + return code; +} + +bool isTsdbCacheLastRow(TsdbQueryHandleT* pQueryHandle) { + return ((STsdbQueryHandle *)pQueryHandle)->cachelastrow > TSDB_CACHED_TYPE_NONE; +} + +int32_t checkForCachedLastRow(STsdbQueryHandle* pQueryHandle, STableGroupInfo *groupList) { + assert(pQueryHandle != NULL && groupList != NULL); + + TSKEY key = TSKEY_INITIAL_VAL; + + SArray* group = taosArrayGetP(groupList->pGroupList, 0); + assert(group != NULL); + + STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(group, 0); + + int32_t code = 0; + + if (((STable*)pInfo->pTable)->lastRow) { + code = tsdbGetCachedLastRow(pInfo->pTable, NULL, &key); + if (code != TSDB_CODE_SUCCESS) { + pQueryHandle->cachelastrow = TSDB_CACHED_TYPE_NONE; + } else { + pQueryHandle->cachelastrow = TSDB_CACHED_TYPE_LASTROW; + } + } + + // update the tsdb query time range + if (pQueryHandle->cachelastrow != TSDB_CACHED_TYPE_NONE) { + pQueryHandle->window = TSWINDOW_INITIALIZER; + pQueryHandle->checkFiles = false; + pQueryHandle->activeIndex = -1; // start from -1 + } + + return code; +} + +int32_t checkForCachedLast(STsdbQueryHandle* pQueryHandle) { + assert(pQueryHandle != NULL); + + int32_t code = 0; + + STsdbRepo* pRepo = pQueryHandle->pTsdb; + + if (pRepo && CACHE_LAST_NULL_COLUMN(&(pRepo->config))) { + pQueryHandle->cachelastrow = TSDB_CACHED_TYPE_LAST; + } + + // update the tsdb query time range + if (pQueryHandle->cachelastrow) { + pQueryHandle->checkFiles = false; + pQueryHandle->activeIndex = -1; // start from -1 + } + + return code; +} + + +STimeWindow updateLastrowForEachGroup(STableGroupInfo *groupList) { + STimeWindow window = {INT64_MAX, INT64_MIN}; + + int32_t totalNumOfTable = 0; + SArray* emptyGroup = taosArrayInit(16, sizeof(int32_t)); + + // NOTE: starts from the buffer in case of descending timestamp order check data blocks + size_t numOfGroups = taosArrayGetSize(groupList->pGroupList); + for(int32_t j = 0; j < numOfGroups; ++j) { + SArray* pGroup = taosArrayGetP(groupList->pGroupList, j); + TSKEY key = TSKEY_INITIAL_VAL; + + STableKeyInfo keyInfo = {0}; + + size_t numOfTables = taosArrayGetSize(pGroup); + for(int32_t i = 0; i < numOfTables; ++i) { + STableKeyInfo* pInfo = (STableKeyInfo*) taosArrayGet(pGroup, i); + + // if the lastKey equals to INT64_MIN, there is no data in this table + TSKEY lastKey = ((STable*)(pInfo->pTable))->lastKey; + if (key < lastKey) { + key = lastKey; + + keyInfo.pTable = pInfo->pTable; + keyInfo.lastKey = key; + pInfo->lastKey = key; + + if (key < window.skey) { + window.skey = key; + } + + if (key > window.ekey) { + window.ekey = key; + } + } + } + + // clear current group, unref unused table + for (int32_t i = 0; i < numOfTables; ++i) { + STableKeyInfo* pInfo = (STableKeyInfo*)taosArrayGet(pGroup, i); + + // keyInfo.pTable may be NULL here. + if (pInfo->pTable != keyInfo.pTable) { + tsdbUnRefTable(pInfo->pTable); + } + } + + // more than one table in each group, only one table left for each group + if (keyInfo.pTable != NULL) { + totalNumOfTable++; + if (taosArrayGetSize(pGroup) == 1) { + // do nothing + } else { + taosArrayClear(pGroup); + taosArrayPush(pGroup, &keyInfo); + } + } else { // mark all the empty groups, and remove it later + taosArrayDestroy(&pGroup); + taosArrayPush(emptyGroup, &j); + } + } + + // window does not being updated, so set the original + if (window.skey == INT64_MAX && window.ekey == INT64_MIN) { + window = TSWINDOW_INITIALIZER; + assert(totalNumOfTable == 0 && taosArrayGetSize(groupList->pGroupList) == numOfGroups); + } + + taosArrayRemoveBatch(groupList->pGroupList, TARRAY_GET_START(emptyGroup), (int32_t) taosArrayGetSize(emptyGroup)); + taosArrayDestroy(&emptyGroup); + + groupList->numOfTables = totalNumOfTable; + return window; +} + +void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* pDataBlockInfo) { + STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle; + SQueryFilePos* cur = &pHandle->cur; + STable* pTable = NULL; + + // there are data in file + if (pHandle->cur.fid != INT32_MIN) { + STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot]; + pTable = pBlockInfo->pTableCheckInfo->pTableObj; + } else { + STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex); + pTable = pCheckInfo->pTableObj; + } + + pDataBlockInfo->uid = pTable->tableId.uid; + pDataBlockInfo->tid = pTable->tableId.tid; + pDataBlockInfo->rows = cur->rows; + pDataBlockInfo->window = cur->win; + pDataBlockInfo->numOfCols = (int32_t)(QH_GET_NUM_OF_COLS(pHandle)); +} + +/* + * return null for mixed data block, if not a complete file data block, the statistics value will always return NULL + */ +int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) { + STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle; + + SQueryFilePos* c = &pHandle->cur; + if (c->mixBlock) { + *pBlockStatis = NULL; + return TSDB_CODE_SUCCESS; + } + + STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot]; + assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0))); + + // file block with sub-blocks has no statistics data + if (pBlockInfo->compBlock->numOfSubBlocks > 1) { + *pBlockStatis = NULL; + return TSDB_CODE_SUCCESS; + } + + int64_t stime = taosGetTimestampUs(); + int statisStatus = tsdbLoadBlockStatis(&pHandle->rhelper, pBlockInfo->compBlock); + if (statisStatus < TSDB_STATIS_OK) { + return terrno; + } else if (statisStatus > TSDB_STATIS_OK) { + *pBlockStatis = NULL; + return TSDB_CODE_SUCCESS; + } + + int16_t* colIds = pHandle->defaultLoadColumn->pData; + + size_t numOfCols = QH_GET_NUM_OF_COLS(pHandle); + memset(pHandle->statis, 0, numOfCols * sizeof(SDataStatis)); + for(int32_t i = 0; i < numOfCols; ++i) { + pHandle->statis[i].colId = colIds[i]; + } + + tsdbGetBlockStatis(&pHandle->rhelper, pHandle->statis, (int)numOfCols, pBlockInfo->compBlock); + + // always load the first primary timestamp column data + SDataStatis* pPrimaryColStatis = &pHandle->statis[0]; + assert(pPrimaryColStatis->colId == PRIMARYKEY_TIMESTAMP_COL_INDEX); + + pPrimaryColStatis->numOfNull = 0; + pPrimaryColStatis->min = pBlockInfo->compBlock->keyFirst; + pPrimaryColStatis->max = pBlockInfo->compBlock->keyLast; + + //update the number of NULL data rows + for(int32_t i = 1; i < numOfCols; ++i) { + if (pHandle->statis[i].numOfNull == -1) { // set the column data are all NULL + pHandle->statis[i].numOfNull = pBlockInfo->compBlock->numOfRows; + } + } + + int64_t elapsed = taosGetTimestampUs() - stime; + pHandle->cost.statisInfoLoadTime += elapsed; + + *pBlockStatis = pHandle->statis; + return TSDB_CODE_SUCCESS; +} + +SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) { + /** + * In the following two cases, the data has been loaded to SColumnInfoData. + * 1. data is from cache, 2. data block is not completed qualified to query time range + */ + STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle; + + if (pHandle->cur.fid == INT32_MIN) { + return pHandle->pColumns; + } else { + STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[pHandle->cur.slot]; + STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo; + + if (pHandle->cur.mixBlock) { + return pHandle->pColumns; + } else { + SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock); + assert(pHandle->realNumOfRows <= binfo.rows); + + // data block has been loaded, todo extract method + SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo; + + if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fid == pHandle->cur.fid && + pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) { + return pHandle->pColumns; + } else { // only load the file block + SBlock* pBlock = pBlockInfo->compBlock; + if (doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot) != TSDB_CODE_SUCCESS) { + return NULL; + } + + // todo refactor + int32_t numOfRows = doCopyRowsFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1); + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) { + int32_t emptySize = pHandle->outputCapacity - numOfRows; + int32_t reqNumOfCols = (int32_t)taosArrayGetSize(pHandle->pColumns); + + for(int32_t i = 0; i < reqNumOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i); + memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); + } + } + + return pHandle->pColumns; + } + } + } +} + +void filterPrepare(void* expr, void* param) { + tExprNode* pExpr = (tExprNode*)expr; + if (pExpr->_node.info != NULL) { + return; + } + + pExpr->_node.info = calloc(1, sizeof(tQueryInfo)); + + STSchema* pTSSchema = (STSchema*) param; + tQueryInfo* pInfo = pExpr->_node.info; + tVariant* pCond = pExpr->_node.pRight->pVal; + SSchema* pSchema = pExpr->_node.pLeft->pSchema; + + pInfo->sch = *pSchema; + pInfo->optr = pExpr->_node.optr; + pInfo->compare = getComparFunc(pInfo->sch.type, pInfo->optr); + pInfo->indexed = pTSSchema->columns->colId == pInfo->sch.colId; + + if (pInfo->optr == TSDB_RELATION_IN) { + int dummy = -1; + SHashObj *pObj = NULL; + if (pInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) { + SArray *arr = (SArray *)(pCond->arr); + + size_t size = taosArrayGetSize(arr); + pObj = taosHashInit(size * 2, taosGetDefaultHashFunction(pInfo->sch.type), true, false); + + for (size_t i = 0; i < size; i++) { + char* p = taosArrayGetP(arr, i); + strntolower_s(varDataVal(p), varDataVal(p), varDataLen(p)); + taosHashPut(pObj, varDataVal(p), varDataLen(p), &dummy, sizeof(dummy)); + } + } else { + buildFilterSetFromBinary((void **)&pObj, pCond->pz, pCond->nLen); + } + + pInfo->q = (char *)pObj; + } else if (pCond != NULL) { + uint32_t size = pCond->nLen * TSDB_NCHAR_SIZE; + if (size < (uint32_t)pSchema->bytes) { + size = pSchema->bytes; + } + + // to make sure tonchar does not cause invalid write, since the '\0' needs at least sizeof(wchar_t) space. + pInfo->q = calloc(1, size + TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE); + tVariantDump(pCond, pInfo->q, pSchema->type, true); + } +} + +static int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { + STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param; + STable* pTable1 = ((STableKeyInfo*) p1)->pTable; + STable* pTable2 = ((STableKeyInfo*) p2)->pTable; + + for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) { + SColIndex* pColIndex = &pTableGroupSupp->pCols[i]; + int32_t colIndex = pColIndex->colIndex; + + assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX); + + char * f1 = NULL; + char * f2 = NULL; + int32_t type = 0; + int32_t bytes = 0; + + if (colIndex == TSDB_TBNAME_COLUMN_INDEX) { + f1 = (char*) TABLE_NAME(pTable1); + f2 = (char*) TABLE_NAME(pTable2); + type = TSDB_DATA_TYPE_BINARY; + bytes = tGetTbnameColumnSchema()->bytes; + } else { + if (pTableGroupSupp->pTagSchema && colIndex < pTableGroupSupp->pTagSchema->numOfCols) { + STColumn* pCol = schemaColAt(pTableGroupSupp->pTagSchema, colIndex); + bytes = pCol->bytes; + type = pCol->type; + if (type == TSDB_DATA_TYPE_JSON){ + f1 = getJsonTagValueElment(pTable1, pColIndex->name, (int32_t)strlen(pColIndex->name), NULL, TSDB_MAX_JSON_TAGS_LEN); + f2 = getJsonTagValueElment(pTable2, pColIndex->name, (int32_t)strlen(pColIndex->name), NULL, TSDB_MAX_JSON_TAGS_LEN); + }else{ + f1 = tdGetKVRowValOfCol(pTable1->tagVal, pCol->colId); + f2 = tdGetKVRowValOfCol(pTable2->tagVal, pCol->colId); + } + } + } + + // this tags value may be NULL + if (f1 == NULL && f2 == NULL) { + continue; + } + + if (f1 == NULL) { + return -1; + } + + if (f2 == NULL) { + return 1; + } + + int32_t ret = doCompare(f1, f2, type, bytes); + if (ret == 0) { + continue; + } else { + return ret; + } + } + + return 0; +} + +static int tsdbCheckInfoCompar(const void* key1, const void* key2) { + if (((STableCheckInfo*)key1)->tableId.tid < ((STableCheckInfo*)key2)->tableId.tid) { + return -1; + } else if (((STableCheckInfo*)key1)->tableId.tid > ((STableCheckInfo*)key2)->tableId.tid) { + return 1; + } else { + ASSERT(false); + return 0; + } +} + +void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, TSKEY skey, + STableGroupSupporter* pSupp, __ext_compar_fn_t compareFn) { + STable* pTable = taosArrayGetP(pTableList, 0); + + SArray* g = taosArrayInit(16, sizeof(STableKeyInfo)); + + STableKeyInfo info = {.pTable = pTable, .lastKey = skey}; + taosArrayPush(g, &info); + tsdbRefTable(pTable); + + for (int32_t i = 1; i < numOfTables; ++i) { + STable** prev = taosArrayGet(pTableList, i - 1); + STable** p = taosArrayGet(pTableList, i); + + int32_t ret = compareFn(prev, p, pSupp); + assert(ret == 0 || ret == -1); + + tsdbRefTable(*p); + assert((*p)->type == TSDB_CHILD_TABLE); + + if (ret == 0) { + STableKeyInfo info1 = {.pTable = *p, .lastKey = skey}; + taosArrayPush(g, &info1); + } else { + taosArrayPush(pGroups, &g); // current group is ended, start a new group + g = taosArrayInit(16, sizeof(STableKeyInfo)); + + STableKeyInfo info1 = {.pTable = *p, .lastKey = skey}; + taosArrayPush(g, &info1); + } + } + + taosArrayPush(pGroups, &g); +} + +SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols, TSKEY skey) { + assert(pTableList != NULL); + SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES); + + size_t size = taosArrayGetSize(pTableList); + if (size == 0) { + tsdbDebug("no qualified tables"); + return pTableGroup; + } + + if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table + SArray* sa = taosArrayInit(size, sizeof(STableKeyInfo)); + if (sa == NULL) { + taosArrayDestroy(&pTableGroup); + return NULL; + } + + for(int32_t i = 0; i < size; ++i) { + STableKeyInfo *pKeyInfo = taosArrayGet(pTableList, i); + tsdbRefTable(pKeyInfo->pTable); + + STableKeyInfo info = {.pTable = pKeyInfo->pTable, .lastKey = skey}; + taosArrayPush(sa, &info); + } + + taosArrayPush(pTableGroup, &sa); + tsdbDebug("all %" PRIzu " tables belong to one group", size); + } else { + STableGroupSupporter sup = {0}; + sup.numOfCols = numOfOrderCols; + sup.pTagSchema = pTagSchema; + sup.pCols = pCols; + + taosqsort(pTableList->pData, size, sizeof(STableKeyInfo), &sup, tableGroupComparFn); + createTableGroupImpl(pTableGroup, pTableList, size, skey, &sup, tableGroupComparFn); + } + + return pTableGroup; +} + +int32_t tsdbQuerySTableByTagCond(STsdbRepo* tsdb, uint64_t uid, TSKEY skey, const char* pTagCond, size_t len, + STableGroupInfo* pGroupInfo, SColIndex* pColIndex, int32_t numOfCols) { + SArray* res = NULL; + if (tsdbRLockRepoMeta(tsdb) < 0) goto _error; + + STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), uid); + if (pTable == NULL) { + tsdbError("%p failed to get stable, uid:%" PRIu64, tsdb, uid); + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + tsdbUnlockRepoMeta(tsdb); + + goto _error; + } + + if (pTable->type != TSDB_SUPER_TABLE) { + tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid, + pTable->name->data); + terrno = TSDB_CODE_COM_OPS_NOT_SUPPORT; //basically, this error is caused by invalid sql issued by client + + tsdbUnlockRepoMeta(tsdb); + goto _error; + } + + //NOTE: not add ref count for super table + res = taosArrayInit(8, sizeof(STableKeyInfo)); + STSchema* pTagSchema = tsdbGetTableTagSchema(pTable); + + // no tags and tbname condition, all child tables of this stable are involved + if (pTagCond == NULL || len == 0) { + int32_t ret = getAllTableList(pTable, res); + if (ret != TSDB_CODE_SUCCESS) { + tsdbUnlockRepoMeta(tsdb); + goto _error; + } + + pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(res); + pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey); + + tsdbDebug("%p no table name/tag condition, all tables qualified, numOfTables:%u, group:%zu", tsdb, + pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList)); + + taosArrayDestroy(&res); + if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error; + return ret; + } + + int32_t ret = TSDB_CODE_SUCCESS; + tExprNode* expr = NULL; + + TRY(TSDB_MAX_TAG_CONDITIONS) { + expr = exprTreeFromBinary(pTagCond, len); + CLEANUP_EXECUTE(); + + } CATCH( code ) { + CLEANUP_EXECUTE(); + terrno = code; + tsdbUnlockRepoMeta(tsdb); // unlock tsdb in any cases + + goto _error; + // TODO: more error handling + } END_TRY + + void *filterInfo = calloc(1, sizeof(SFilterInfo)); + ((SFilterInfo*)filterInfo)->pTable = pTable; + ret = filterInitFromTree(expr, &filterInfo, 0); + tExprTreeDestroy(expr, NULL); + + if (ret != TSDB_CODE_SUCCESS) { + terrno = ret; + tsdbUnlockRepoMeta(tsdb); + filterFreeInfo(filterInfo); + goto _error; + } + + ret = tsdbQueryTableList(pTable, res, filterInfo); + if (ret != TSDB_CODE_SUCCESS) { + terrno = ret; + tsdbUnlockRepoMeta(tsdb); + filterFreeInfo(filterInfo); + goto _error; + } + + filterFreeInfo(filterInfo); + + pGroupInfo->numOfTables = (uint32_t)taosArrayGetSize(res); + pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols, skey); + + tsdbDebug("%p stable tid:%d, uid:%"PRIu64" query, numOfTables:%u, belong to %" PRIzu " groups", tsdb, pTable->tableId.tid, + pTable->tableId.uid, pGroupInfo->numOfTables, taosArrayGetSize(pGroupInfo->pGroupList)); + + taosArrayDestroy(&res); + + if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error; + return ret; + + _error: + + taosArrayDestroy(&res); + return terrno; +} + +int32_t tsdbGetOneTableGroup(STsdbRepo* tsdb, uint64_t uid, TSKEY startKey, STableGroupInfo* pGroupInfo) { + if (tsdbRLockRepoMeta(tsdb) < 0) goto _error; + + STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), uid); + if (pTable == NULL) { + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + tsdbUnlockRepoMeta(tsdb); + goto _error; + } + + assert(pTable->type == TSDB_CHILD_TABLE || pTable->type == TSDB_NORMAL_TABLE || pTable->type == TSDB_STREAM_TABLE); + tsdbRefTable(pTable); + if (tsdbUnlockRepoMeta(tsdb) < 0) goto _error; + + pGroupInfo->numOfTables = 1; + pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES); + + SArray* group = taosArrayInit(1, sizeof(STableKeyInfo)); + + STableKeyInfo info = {.pTable = pTable, .lastKey = startKey}; + taosArrayPush(group, &info); + + taosArrayPush(pGroupInfo->pGroupList, &group); + return TSDB_CODE_SUCCESS; + + _error: + return terrno; +} + +int32_t tsdbGetTableGroupFromIdList(STsdbRepo* tsdb, SArray* pTableIdList, STableGroupInfo* pGroupInfo) { + if (tsdbRLockRepoMeta(tsdb) < 0) { + return terrno; + } + + assert(pTableIdList != NULL); + size_t size = taosArrayGetSize(pTableIdList); + pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES); + SArray* group = taosArrayInit(1, sizeof(STableKeyInfo)); + + for(int32_t i = 0; i < size; ++i) { + STableIdInfo *id = taosArrayGet(pTableIdList, i); + + STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), id->uid); + if (pTable == NULL) { + tsdbWarn("table uid:%"PRIu64", tid:%d has been drop already", id->uid, id->tid); + continue; + } + + if (pTable->type == TSDB_SUPER_TABLE) { + tsdbError("direct query on super tale is not allowed, table uid:%"PRIu64", tid:%d", id->uid, id->tid); + terrno = TSDB_CODE_QRY_INVALID_MSG; + tsdbUnlockRepoMeta(tsdb); + taosArrayDestroy(&group); + return terrno; + } + + tsdbRefTable(pTable); + + STableKeyInfo info = {.pTable = pTable, .lastKey = id->key}; + taosArrayPush(group, &info); + } + + if (tsdbUnlockRepoMeta(tsdb) < 0) { + taosArrayDestroy(&group); + return terrno; + } + + pGroupInfo->numOfTables = (uint32_t) taosArrayGetSize(group); + if (pGroupInfo->numOfTables > 0) { + taosArrayPush(pGroupInfo->pGroupList, &group); + } else { + taosArrayDestroy(&group); + } + + return TSDB_CODE_SUCCESS; +} + +static void* doFreeColumnInfoData(SArray* pColumnInfoData) { + if (pColumnInfoData == NULL) { + return NULL; + } + + size_t cols = taosArrayGetSize(pColumnInfoData); + for (int32_t i = 0; i < cols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pColumnInfoData, i); + tfree(pColInfo->pData); + } + + taosArrayDestroy(&pColumnInfoData); + return NULL; +} + +static void* destroyTableCheckInfo(SArray* pTableCheckInfo) { + size_t size = taosArrayGetSize(pTableCheckInfo); + for (int32_t i = 0; i < size; ++i) { + STableCheckInfo* p = taosArrayGet(pTableCheckInfo, i); + destroyTableMemIterator(p); + + tfree(p->pCompInfo); + } + + taosArrayDestroy(&pTableCheckInfo); + return NULL; +} + +void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*)queryHandle; + if (pQueryHandle == NULL) { + return; + } + + pQueryHandle->pColumns = doFreeColumnInfoData(pQueryHandle->pColumns); + + taosArrayDestroy(&pQueryHandle->defaultLoadColumn); + tfree(pQueryHandle->pDataBlockInfo); + tfree(pQueryHandle->statis); + + if (!emptyQueryTimewindow(pQueryHandle)) { + tsdbMayUnTakeMemSnapshot(pQueryHandle); + } else { + assert(pQueryHandle->pTableCheckInfo == NULL); + } + + if (pQueryHandle->pTableCheckInfo != NULL) { + pQueryHandle->pTableCheckInfo = destroyTableCheckInfo(pQueryHandle->pTableCheckInfo); + } + + tsdbDestroyReadH(&pQueryHandle->rhelper); + + tdFreeDataCols(pQueryHandle->pDataCols); + pQueryHandle->pDataCols = NULL; + + pQueryHandle->prev = doFreeColumnInfoData(pQueryHandle->prev); + pQueryHandle->next = doFreeColumnInfoData(pQueryHandle->next); + + SIOCostSummary* pCost = &pQueryHandle->cost; + + tsdbDebug("%p :io-cost summary: head-file read cnt:%"PRIu64", head-file time:%"PRIu64" us, statis-info:%"PRId64" us, datablock:%" PRId64" us, check data:%"PRId64" us, 0x%"PRIx64, + pQueryHandle, pCost->headFileLoad, pCost->headFileLoadTime, pCost->statisInfoLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pQueryHandle->qId); + + tfree(pQueryHandle); +} + +void tsdbDestroyTableGroup(STableGroupInfo *pGroupList) { + assert(pGroupList != NULL); + + size_t numOfGroup = taosArrayGetSize(pGroupList->pGroupList); + + for(int32_t i = 0; i < numOfGroup; ++i) { + SArray* p = taosArrayGetP(pGroupList->pGroupList, i); + + size_t numOfTables = taosArrayGetSize(p); + for(int32_t j = 0; j < numOfTables; ++j) { + STable* pTable = taosArrayGetP(p, j); + if (pTable != NULL) { // in case of handling retrieve data from tsdb + tsdbUnRefTable(pTable); + } + //assert(pTable != NULL); + } + + taosArrayDestroy(&p); + } + + taosHashCleanup(pGroupList->map); + taosArrayDestroy(&pGroupList->pGroupList); + pGroupList->numOfTables = 0; +} + + +static FORCE_INLINE int32_t tsdbGetTagDataFromId(void *param, int32_t id, void **data) { + STable* pTable = (STable*)(SL_GET_NODE_DATA((SSkipListNode *)param)); + + if (id == TSDB_TBNAME_COLUMN_INDEX) { + *data = TABLE_NAME(pTable); + } else { + *data = tdGetKVRowValOfCol(pTable->tagVal, id); + } + + return TSDB_CODE_SUCCESS; +} + + + +static void queryIndexedColumn(SSkipList* pSkipList, void* filterInfo, SArray* res) { + SSkipListIterator* iter = NULL; + char *startVal = NULL; + int32_t order = 0; + int32_t inRange = 0; + int32_t flag = 0; + bool all = false; + int8_t *addToResult = NULL; + + filterGetIndexedColumnInfo(filterInfo, &startVal, &order, &flag); + + tsdbDebug("filter index column start, order:%d, flag:%d", order, flag); + + while (order) { + if (FILTER_GET_FLAG(order, TSDB_ORDER_ASC)) { + iter = tSkipListCreateIterFromVal(pSkipList, startVal, pSkipList->type, TSDB_ORDER_ASC); + FILTER_CLR_FLAG(order, TSDB_ORDER_ASC); + } else { + iter = tSkipListCreateIterFromVal(pSkipList, startVal, pSkipList->type, TSDB_ORDER_DESC); + FILTER_CLR_FLAG(order, TSDB_ORDER_DESC); + } + + while (tSkipListIterNext(iter)) { + SSkipListNode *pNode = tSkipListIterGet(iter); + + if (inRange == 0 || !FILTER_GET_FLAG(flag, FI_ACTION_NO_NEED)) { + tsdbDebug("filter index column, filter it"); + filterSetColFieldData(filterInfo, pNode, tsdbGetTagDataFromId); + all = filterExecute(filterInfo, 1, &addToResult, NULL, 0); + } + + char *pData = SL_GET_NODE_DATA(pNode); + + tsdbDebug("filter index column, table:%s, result:%d", ((STable *)pData)->name->data, all); + + if (all || (addToResult && *addToResult)) { + STableKeyInfo info = {.pTable = (void*)pData, .lastKey = TSKEY_INITIAL_VAL}; + taosArrayPush(res, &info); + inRange = 1; + } else if (inRange){ + break; + } + } + + inRange = 0; + + tfree(addToResult); + tSkipListDestroyIter(iter); + } + + tsdbDebug("filter index column end"); +} + +static void queryIndexlessColumn(SSkipList* pSkipList, void* filterInfo, SArray* res) { + SSkipListIterator* iter = tSkipListCreateIter(pSkipList); + int8_t *addToResult = NULL; + + while (tSkipListIterNext(iter)) { + + SSkipListNode *pNode = tSkipListIterGet(iter); + + filterSetColFieldData(filterInfo, pNode, tsdbGetTagDataFromId); + + char *pData = SL_GET_NODE_DATA(pNode); + + bool all = filterExecute(filterInfo, 1, &addToResult, NULL, 0); + + if (all || (addToResult && *addToResult)) { + STableKeyInfo info = {.pTable = (void*)pData, .lastKey = TSKEY_INITIAL_VAL}; + taosArrayPush(res, &info); + } + } + + tfree(addToResult); + + tSkipListDestroyIter(iter); +} + +static FORCE_INLINE int32_t tsdbGetJsonTagDataFromId(void *param, int32_t id, char* name, void **data) { + JsonMapValue* jsonMapV = (JsonMapValue*)(param); + STable* pTable = (STable*)(jsonMapV->table); + + if (id == TSDB_TBNAME_COLUMN_INDEX) { + *data = TABLE_NAME(pTable); + } else { + void* jsonData = tsdbGetJsonTagValue(pTable, name, TSDB_MAX_JSON_KEY_MD5_LEN, NULL); + // jsonData == NULL for ? operation + // if(jsonData != NULL) jsonData += CHAR_BYTES; // jump type + *data = jsonData; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t queryByJsonTag(STable* pTable, void* filterInfo, SArray* res){ + // get all table in fields, and dumplicate it + SArray* tabList = NULL; + bool needQueryAll = false; + SFilterInfo* info = (SFilterInfo*)filterInfo; + for (uint16_t i = 0; i < info->fields[FLD_TYPE_COLUMN].num; ++i) { + SFilterField* fi = &info->fields[FLD_TYPE_COLUMN].fields[i]; + SSchema* sch = fi->desc; + if (sch->colId == TSDB_TBNAME_COLUMN_INDEX) { + tabList = taosArrayInit(32, sizeof(JsonMapValue)); + getAllTableList(pTable, tabList); // query all table + needQueryAll = true; + break; + } + } + for (uint16_t i = 0; i < info->unitNum; ++i) { // is null operation need query all table + SFilterUnit* unit = &info->units[i]; + if (unit->compare.optr == TSDB_RELATION_ISNULL) { + tabList = taosArrayInit(32, sizeof(JsonMapValue)); + getAllTableList(pTable, tabList); // query all table + needQueryAll = true; + break; + } + } + + for (uint16_t i = 0; i < info->fields[FLD_TYPE_COLUMN].num; ++i) { + if (needQueryAll) break; // query all table + SFilterField* fi = &info->fields[FLD_TYPE_COLUMN].fields[i]; + SSchema* sch = fi->desc; + char* key = sch->name; + + SArray** data = (SArray**)taosHashGet(pTable->jsonKeyMap, key, TSDB_MAX_JSON_KEY_MD5_LEN); + if(data == NULL) continue; + if(tabList == NULL) { + tabList = taosArrayDup(*data); + }else{ + for(int j = 0; j < taosArrayGetSize(*data); j++){ + void* element = taosArrayGet(*data, j); + void* p = taosArraySearch(tabList, element, tsdbCompareJsonMapValue, TD_EQ); + if (p == NULL) { + p = taosArraySearch(tabList, element, tsdbCompareJsonMapValue, TD_GE); + if(p == NULL){ + taosArrayPush(tabList, element); + }else{ + taosArrayInsert(tabList, TARRAY_ELEM_IDX(tabList, p), element); + } + } + } + } + } + if(tabList == NULL){ + tsdbError("json key not exist, no candidate table"); + return TSDB_CODE_SUCCESS; + } + size_t size = taosArrayGetSize(tabList); + int8_t *addToResult = NULL; + for(int i = 0; i < size; i++){ + JsonMapValue* data = taosArrayGet(tabList, i); + filterSetJsonColFieldData(filterInfo, data, tsdbGetJsonTagDataFromId); + bool all = filterExecute(filterInfo, 1, &addToResult, NULL, 0); + + if (all || (addToResult && *addToResult)) { + STableKeyInfo kInfo = {.pTable = (void*)(data->table), .lastKey = TSKEY_INITIAL_VAL}; + taosArrayPush(res, &kInfo); + } + } + tfree(addToResult); + taosArrayDestroy(&tabList); + return TSDB_CODE_SUCCESS; +} + +static int32_t tsdbQueryTableList(STable* pTable, SArray* pRes, void* filterInfo) { + STSchema* pTSSchema = pTable->tagSchema; + + if(pTSSchema->columns->type == TSDB_DATA_TYPE_JSON){ + return queryByJsonTag(pTable, filterInfo, pRes); + }else{ + bool indexQuery = false; + SSkipList *pSkipList = pTable->pIndex; + + filterIsIndexedColumnQuery(filterInfo, pTSSchema->columns->colId, &indexQuery); + + if (indexQuery) { + queryIndexedColumn(pSkipList, filterInfo, pRes); + } else { + queryIndexlessColumn(pSkipList, filterInfo, pRes); + } + } + + return TSDB_CODE_SUCCESS; +} + +void* getJsonTagValueElment(void* data, char* key, int32_t keyLen, char* dst, int16_t bytes){ + char keyMd5[TSDB_MAX_JSON_KEY_MD5_LEN] = {0}; + jsonKeyMd5(key, keyLen, keyMd5); + + void* result = tsdbGetJsonTagValue(data, keyMd5, TSDB_MAX_JSON_KEY_MD5_LEN, NULL); + if (result == NULL){ // json key no result + if(!dst) return NULL; + *dst = TSDB_DATA_TYPE_JSON; + setNull(dst + CHAR_BYTES, TSDB_DATA_TYPE_JSON, 0); + return dst; + } + + char* realData = POINTER_SHIFT(result, CHAR_BYTES); + if(*(char*)result == TSDB_DATA_TYPE_NCHAR || *(char*)result == TSDB_DATA_TYPE_BINARY) { + assert(varDataTLen(realData) < bytes); + if(!dst) return result; + memcpy(dst, result, CHAR_BYTES + varDataTLen(realData)); + return dst; + }else if (*(char*)result == TSDB_DATA_TYPE_DOUBLE || *(char*)result == TSDB_DATA_TYPE_BIGINT) { + if(!dst) return result; + memcpy(dst, result, CHAR_BYTES + LONG_BYTES); + return dst; + }else if (*(char*)result == TSDB_DATA_TYPE_BOOL) { + if(!dst) return result; + memcpy(dst, result, CHAR_BYTES + CHAR_BYTES); + return dst; + }else { + assert(0); + } + return result; +} + +void getJsonTagValueAll(void* data, void* dst, int16_t bytes) { + char* json = parseTagDatatoJson(data); + char* tagData = POINTER_SHIFT(dst, CHAR_BYTES); + *(char*)dst = TSDB_DATA_TYPE_JSON; + if(json == NULL){ + setNull(tagData, TSDB_DATA_TYPE_JSON, 0); + return; + } + + int32_t length = 0; + if(!taosMbsToUcs4(json, strlen(json), varDataVal(tagData), bytes - VARSTR_HEADER_SIZE - CHAR_BYTES, &length)){ + tsdbError("getJsonTagValueAll mbstoucs4 error! length:%d", length); + } + varDataSetLen(tagData, length); + assert(varDataTLen(tagData) <= bytes); + tfree(json); +} + +char* parseTagDatatoJson(void *p){ + char* string = NULL; + cJSON *json = cJSON_CreateObject(); + if (json == NULL) + { + goto end; + } + + int16_t nCols = kvRowNCols(p); + ASSERT(nCols%2 == 1); + char tagJsonKey[TSDB_MAX_JSON_KEY_LEN + 1] = {0}; + for (int j = 0; j < nCols; ++j) { + SColIdx * pColIdx = kvRowColIdxAt(p, j); + void* val = (kvRowColVal(p, pColIdx)); + if (j == 0){ + int8_t jsonPlaceHolder = *(int8_t*)val; + ASSERT(jsonPlaceHolder == TSDB_DATA_JSON_PLACEHOLDER); + continue; + } + if(j == 1){ + uint32_t jsonNULL = *(uint32_t*)(varDataVal(val)); + ASSERT(jsonNULL == TSDB_DATA_JSON_NULL); + continue; + } + if (j == 2){ + if(*(uint32_t*)(varDataVal(val + CHAR_BYTES)) == TSDB_DATA_JSON_NULL) goto end; + continue; + } + if (j%2 == 1) { // json key encode by binary + ASSERT(varDataLen(val) <= TSDB_MAX_JSON_KEY_LEN); + memset(tagJsonKey, 0, sizeof(tagJsonKey)); + memcpy(tagJsonKey, varDataVal(val), varDataLen(val)); + }else{ // json value + char* realData = POINTER_SHIFT(val, CHAR_BYTES); + char type = *(char*)val; + if(type == TSDB_DATA_TYPE_BINARY) { + assert(*(uint32_t*)varDataVal(realData) == TSDB_DATA_JSON_null); // json null value + assert(varDataLen(realData) == INT_BYTES); + cJSON* value = cJSON_CreateNull(); + if (value == NULL) + { + goto end; + } + cJSON_AddItemToObject(json, tagJsonKey, value); + }else if(type == TSDB_DATA_TYPE_NCHAR) { + cJSON* value = NULL; + if (varDataLen(realData) > 0){ + char *tagJsonValue = calloc(varDataLen(realData), 1); + int32_t length = taosUcs4ToMbs(varDataVal(realData), varDataLen(realData), tagJsonValue); + if (length < 0) { + tsdbError("charset:%s to %s. val:%s convert json value failed.", DEFAULT_UNICODE_ENCODEC, tsCharset, + (char*)val); + free(tagJsonValue); + goto end; + } + value = cJSON_CreateString(tagJsonValue); + free(tagJsonValue); + if (value == NULL) + { + goto end; + } + }else if(varDataLen(realData) == 0){ + value = cJSON_CreateString(""); + }else{ + assert(0); + } + + cJSON_AddItemToObject(json, tagJsonKey, value); + }else if(type == TSDB_DATA_TYPE_DOUBLE){ + double jsonVd = *(double*)(realData); + cJSON* value = cJSON_CreateNumber(jsonVd); + if (value == NULL) + { + goto end; + } + cJSON_AddItemToObject(json, tagJsonKey, value); + }else if(type == TSDB_DATA_TYPE_BIGINT){ + int64_t jsonVd = *(int64_t*)(realData); + cJSON* value = cJSON_CreateNumber((double)jsonVd); + if (value == NULL) + { + goto end; + } + cJSON_AddItemToObject(json, tagJsonKey, value); + }else if (type == TSDB_DATA_TYPE_BOOL) { + char jsonVd = *(char*)(realData); + cJSON* value = cJSON_CreateBool(jsonVd); + if (value == NULL) + { + goto end; + } + cJSON_AddItemToObject(json, tagJsonKey, value); + } + else{ + tsdbError("unsupportted json value"); + } + } + } + string = cJSON_PrintUnformatted(json); +end: + cJSON_Delete(json); + return string; +} + +// obtain queryHandle attribute +int64_t tsdbSkipOffset(TsdbQueryHandleT queryHandle) { + STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*)queryHandle; + if (pQueryHandle) { + return pQueryHandle->srows; + } + return 0; +} +#endif \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/src/tsdbReadImpl.c b/source/dnode/vnode/tsdb2/src/tsdbReadImpl.c new file mode 100644 index 0000000000..58438c8598 --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbReadImpl.c @@ -0,0 +1,878 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbint.h" + +#define TSDB_KEY_COL_OFFSET 0 + +static void tsdbResetReadTable(SReadH *pReadh); +static void tsdbResetReadFile(SReadH *pReadh); +static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols); +static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int8_t comp, int numOfRows, + int maxPoints, char *buffer, int bufferSize); +static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds, + int numOfColIds); +static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol); +static int tsdbLoadBlockStatisFromDFile(SReadH *pReadh, SBlock *pBlock); +static int tsdbLoadBlockStatisFromAggr(SReadH *pReadh, SBlock *pBlock); + +int tsdbInitReadH(SReadH *pReadh, STsdb *pRepo) { + ASSERT(pReadh != NULL && pRepo != NULL); + + STsdbCfg *pCfg = REPO_CFG(pRepo); + + memset((void *)pReadh, 0, sizeof(*pReadh)); + pReadh->pRepo = pRepo; + + TSDB_FSET_SET_CLOSED(TSDB_READ_FSET(pReadh)); + + pReadh->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx)); + if (pReadh->aBlkIdx == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + pReadh->pDCols[0] = tdNewDataCols(0, pCfg->maxRowsPerFileBlock); + if (pReadh->pDCols[0] == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyReadH(pReadh); + return -1; + } + + pReadh->pDCols[1] = tdNewDataCols(0, pCfg->maxRowsPerFileBlock); + if (pReadh->pDCols[1] == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + tsdbDestroyReadH(pReadh); + return -1; + } + + return 0; +} + +void tsdbDestroyReadH(SReadH *pReadh) { + if (pReadh == NULL) return; + pReadh->pExBuf = taosTZfree(pReadh->pExBuf); + pReadh->pCBuf = taosTZfree(pReadh->pCBuf); + pReadh->pBuf = taosTZfree(pReadh->pBuf); + pReadh->pDCols[0] = tdFreeDataCols(pReadh->pDCols[0]); + pReadh->pDCols[1] = tdFreeDataCols(pReadh->pDCols[1]); + pReadh->pAggrBlkData = taosTZfree(pReadh->pAggrBlkData); + pReadh->pBlkData = taosTZfree(pReadh->pBlkData); + pReadh->pBlkInfo = taosTZfree(pReadh->pBlkInfo); + pReadh->cidx = 0; + pReadh->pBlkIdx = NULL; + pReadh->pTable = NULL; + pReadh->aBlkIdx = taosArrayDestroy(pReadh->aBlkIdx); + tsdbCloseDFileSet(TSDB_READ_FSET(pReadh)); + pReadh->pRepo = NULL; +} + +int tsdbSetAndOpenReadFSet(SReadH *pReadh, SDFileSet *pSet) { + ASSERT(pSet != NULL); + tsdbResetReadFile(pReadh); + + pReadh->rSet = *pSet; + TSDB_FSET_SET_CLOSED(TSDB_READ_FSET(pReadh)); + if (tsdbOpenDFileSet(TSDB_READ_FSET(pReadh), O_RDONLY) < 0) { + tsdbError("vgId:%d failed to open file set %d since %s", TSDB_READ_REPO_ID(pReadh), TSDB_FSET_FID(pSet), + tstrerror(terrno)); + return -1; + } + + return 0; +} + +void tsdbCloseAndUnsetFSet(SReadH *pReadh) { tsdbResetReadFile(pReadh); } + +int tsdbLoadBlockIdx(SReadH *pReadh) { + SDFile * pHeadf = TSDB_READ_HEAD_FILE(pReadh); + SBlockIdx blkIdx; + + ASSERT(taosArrayGetSize(pReadh->aBlkIdx) == 0); + + // No data at all, just return + if (pHeadf->info.offset <= 0) return 0; + + if (tsdbSeekDFile(pHeadf, pHeadf->info.offset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load SBlockIdx part while seek file %s since %s, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pHeadf->info.offset, + pHeadf->info.len); + return -1; + } + + if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pHeadf->info.len) < 0) return -1; + + int64_t nread = tsdbReadDFile(pHeadf, TSDB_READ_BUF(pReadh), pHeadf->info.len); + if (nread < 0) { + tsdbError("vgId:%d failed to load SBlockIdx part while read file %s since %s, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pHeadf->info.offset, + pHeadf->info.len); + return -1; + } + + if (nread < pHeadf->info.len) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d SBlockIdx part in file %s is corrupted, offset:%u expected bytes:%u read bytes: %" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pHeadf->info.offset, pHeadf->info.len, nread); + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)TSDB_READ_BUF(pReadh), pHeadf->info.len)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d SBlockIdx part in file %s is corrupted since wrong checksum, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pHeadf->info.offset, pHeadf->info.len); + return -1; + } + + void *ptr = TSDB_READ_BUF(pReadh); + int tsize = 0; + while (POINTER_DISTANCE(ptr, TSDB_READ_BUF(pReadh)) < (pHeadf->info.len - sizeof(TSCKSUM))) { + ptr = tsdbDecodeSBlockIdx(ptr, &blkIdx); + ASSERT(ptr != NULL); + + if (taosArrayPush(pReadh->aBlkIdx, (void *)(&blkIdx)) == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + tsize++; + ASSERT(tsize == 1 || ((SBlockIdx *)taosArrayGet(pReadh->aBlkIdx, tsize - 2))->tid < + ((SBlockIdx *)taosArrayGet(pReadh->aBlkIdx, tsize - 1))->tid); + } + + return 0; +} + +int tsdbSetReadTable(SReadH *pReadh, STable *pTable) { + STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1); + + pReadh->pTable = pTable; + + if (tdInitDataCols(pReadh->pDCols[0], pSchema) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + if (tdInitDataCols(pReadh->pDCols[1], pSchema) < 0) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + + size_t size = taosArrayGetSize(pReadh->aBlkIdx); + if (size > 0) { + while (true) { + if (pReadh->cidx >= size) { + pReadh->pBlkIdx = NULL; + break; + } + + SBlockIdx *pBlkIdx = taosArrayGet(pReadh->aBlkIdx, pReadh->cidx); + if (pBlkIdx->tid == TABLE_TID(pTable)) { + if (pBlkIdx->uid == TABLE_UID(pTable)) { + pReadh->pBlkIdx = pBlkIdx; + } else { + pReadh->pBlkIdx = NULL; + } + pReadh->cidx++; + break; + } else if (pBlkIdx->tid > TABLE_TID(pTable)) { + pReadh->pBlkIdx = NULL; + break; + } else { + pReadh->cidx++; + } + } + } else { + pReadh->pBlkIdx = NULL; + } + + return 0; +} + +#if 0 +int tsdbLoadBlockInfo(SReadH *pReadh, void *pTarget) { + ASSERT(pReadh->pBlkIdx != NULL); + + SDFile * pHeadf = TSDB_READ_HEAD_FILE(pReadh); + SBlockIdx *pBlkIdx = pReadh->pBlkIdx; + + if (tsdbSeekDFile(pHeadf, pBlkIdx->offset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load SBlockInfo part while seek file %s since %s, offset:%u len:%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len); + return -1; + } + + if (tsdbMakeRoom((void **)(&(pReadh->pBlkInfo)), pBlkIdx->len) < 0) return -1; + + int64_t nread = tsdbReadDFile(pHeadf, (void *)(pReadh->pBlkInfo), pBlkIdx->len); + if (nread < 0) { + tsdbError("vgId:%d failed to load SBlockInfo part while read file %s since %s, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len); + return -1; + } + + if (nread < pBlkIdx->len) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d SBlockInfo part in file %s is corrupted, offset:%u expected bytes:%u read bytes:%" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, nread); + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pBlkInfo), pBlkIdx->len)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d SBlockInfo part in file %s is corrupted since wrong checksum, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len); + return -1; + } + + ASSERT(pBlkIdx->tid == pReadh->pBlkInfo->tid && pBlkIdx->uid == pReadh->pBlkInfo->uid); + + if (pTarget) { + memcpy(pTarget, (void *)(pReadh->pBlkInfo), pBlkIdx->len); + } + + return 0; +} +#endif + +static FORCE_INLINE int32_t tsdbGetSBlockVer(int32_t fver) { + switch (fver) { + case TSDB_FS_VER_0: + return TSDB_SBLK_VER_0; + case TSDB_FS_VER_1: + return TSDB_SBLK_VER_1; + default: + return SBlockVerLatest; + } +} + +static FORCE_INLINE size_t tsdbSizeOfSBlock(int32_t sBlkVer) { + switch (sBlkVer) { + case TSDB_SBLK_VER_0: + return sizeof(SBlockV0); + case TSDB_SBLK_VER_1: + return sizeof(SBlockV1); + default: + return sizeof(SBlock); + } +} + +static int tsdbSBlkInfoRefactor(SDFile *pHeadf, SBlockInfo **pDstBlkInfo, SBlockIdx *pBlkIdx, uint32_t *dstBlkInfoLen) { + int sBlkVer = tsdbGetSBlockVer(pHeadf->info.fver); + if (sBlkVer > TSDB_SBLK_VER_0) { + *dstBlkInfoLen = pBlkIdx->len; + return TSDB_CODE_SUCCESS; + } + size_t originBlkSize = tsdbSizeOfSBlock(sBlkVer); + size_t nBlks = (pBlkIdx->len - sizeof(SBlockInfo)) / originBlkSize; + + *dstBlkInfoLen = (uint32_t)(sizeof(SBlockInfo) + nBlks * sizeof(SBlock)); + + if (pBlkIdx->len == *dstBlkInfoLen) { + return TSDB_CODE_SUCCESS; + } + + ASSERT(*dstBlkInfoLen >= pBlkIdx->len); + + SBlockInfo *tmpBlkInfo = NULL; + if (tsdbMakeRoom((void **)(&tmpBlkInfo), *dstBlkInfoLen) < 0) return -1; + memset(tmpBlkInfo, 0, *dstBlkInfoLen); // the blkVer is set to 0 + memcpy(tmpBlkInfo, *pDstBlkInfo, sizeof(SBlockInfo)); // copy header + uint32_t nSubBlks = 0; + for (int i = 0; i < nBlks; ++i) { + SBlock *tmpBlk = tmpBlkInfo->blocks + i; + memcpy(tmpBlk, POINTER_SHIFT((*pDstBlkInfo)->blocks, i * originBlkSize), originBlkSize); + if (i < pBlkIdx->numOfBlocks) { // super blocks + if (tmpBlk->numOfSubBlocks > 1) { // has sub blocks + tmpBlk->offset = sizeof(SBlockInfo) + (pBlkIdx->numOfBlocks + nSubBlks) * sizeof(SBlock); + nSubBlks += tmpBlk->numOfSubBlocks; + } + } + // TODO: update the fields if the SBlock definition change later + } + + taosTZfree(*pDstBlkInfo); + *pDstBlkInfo = tmpBlkInfo; + + return TSDB_CODE_SUCCESS; +} + +int tsdbLoadBlockInfo(SReadH *pReadh, void **pTarget, uint32_t *extendedLen) { + ASSERT(pReadh->pBlkIdx != NULL); + + SDFile * pHeadf = TSDB_READ_HEAD_FILE(pReadh); + SBlockIdx * pBlkIdx = pReadh->pBlkIdx; + + if (tsdbSeekDFile(pHeadf, pBlkIdx->offset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load SBlockInfo part while seek file %s since %s, offset:%u len:%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len); + return -1; + } + + if (tsdbMakeRoom((void **)(&pReadh->pBlkInfo), pBlkIdx->len) < 0) return -1; + + int64_t nread = tsdbReadDFile(pHeadf, (void *)(pReadh->pBlkInfo), pBlkIdx->len); + if (nread < 0) { + tsdbError("vgId:%d failed to load SBlockInfo part while read file %s since %s, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), tstrerror(terrno), pBlkIdx->offset, pBlkIdx->len); + return -1; + } + + if (nread < pBlkIdx->len) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d SBlockInfo part in file %s is corrupted, offset:%u expected bytes:%u read bytes:%" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len, nread); + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pBlkInfo), pBlkIdx->len)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d SBlockInfo part in file %s is corrupted since wrong checksum, offset:%u len :%u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pHeadf), pBlkIdx->offset, pBlkIdx->len); + return -1; + } + + ASSERT(pBlkIdx->tid == pReadh->pBlkInfo->tid && pBlkIdx->uid == pReadh->pBlkInfo->uid); + + uint32_t dstBlkInfoLen = 0; + if (tsdbSBlkInfoRefactor(pHeadf, &(pReadh->pBlkInfo), pBlkIdx, &dstBlkInfoLen) < 0) { + return -1; + } + + if (extendedLen != NULL) { + if (pTarget != NULL) { + if (*extendedLen < dstBlkInfoLen) { + char *t = realloc(*pTarget, dstBlkInfoLen); + if (t == NULL) { + terrno = TSDB_CODE_TDB_OUT_OF_MEMORY; + return -1; + } + *pTarget = t; + } + memcpy(*pTarget, (void *)(pReadh->pBlkInfo), dstBlkInfoLen); + } + *extendedLen = dstBlkInfoLen; + } + + return TSDB_CODE_SUCCESS; +} + +int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { + ASSERT(pBlock->numOfSubBlocks > 0); + int8_t update = pReadh->pRepo->config.update; + + SBlock *iBlock = pBlock; + if (pBlock->numOfSubBlocks > 1) { + if (pBlkInfo) { + iBlock = (SBlock *)POINTER_SHIFT(pBlkInfo, pBlock->offset); + } else { + iBlock = (SBlock *)POINTER_SHIFT(pReadh->pBlkInfo, pBlock->offset); + } + } + + if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[0]) < 0) return -1; + for (int i = 1; i < pBlock->numOfSubBlocks; i++) { + iBlock++; + if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[1]) < 0) return -1; + if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, update != TD_ROW_PARTIAL_UPDATE) < 0) return -1; + } + + ASSERT(pReadh->pDCols[0]->numOfRows == pBlock->numOfRows); + ASSERT(dataColsKeyFirst(pReadh->pDCols[0]) == pBlock->keyFirst); + ASSERT(dataColsKeyLast(pReadh->pDCols[0]) == pBlock->keyLast); + + return 0; +} + +int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, int16_t *colIds, int numOfColsIds) { + ASSERT(pBlock->numOfSubBlocks > 0); + int8_t update = pReadh->pRepo->config.update; + + SBlock *iBlock = pBlock; + if (pBlock->numOfSubBlocks > 1) { + if (pBlkInfo) { + iBlock = POINTER_SHIFT(pBlkInfo, pBlock->offset); + } else { + iBlock = POINTER_SHIFT(pReadh->pBlkInfo, pBlock->offset); + } + } + + if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[0], colIds, numOfColsIds) < 0) return -1; + for (int i = 1; i < pBlock->numOfSubBlocks; i++) { + iBlock++; + if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[1], colIds, numOfColsIds) < 0) return -1; + if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, update != TD_ROW_PARTIAL_UPDATE) < 0) return -1; + } + + ASSERT(pReadh->pDCols[0]->numOfRows == pBlock->numOfRows); + ASSERT(dataColsKeyFirst(pReadh->pDCols[0]) == pBlock->keyFirst); + ASSERT(dataColsKeyLast(pReadh->pDCols[0]) == pBlock->keyLast); + + return 0; +} + +static int tsdbLoadBlockStatisFromDFile(SReadH *pReadh, SBlock *pBlock) { + SDFile *pDFile = (pBlock->last) ? TSDB_READ_LAST_FILE(pReadh) : TSDB_READ_DATA_FILE(pReadh); + if (tsdbSeekDFile(pDFile, pBlock->offset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load block statis part while seek file %s to offset %" PRId64 " since %s", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, tstrerror(terrno)); + return -1; + } + + size_t size = tsdbBlockStatisSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer); + if (tsdbMakeRoom((void **)(&(pReadh->pBlkData)), size) < 0) return -1; + + int64_t nread = tsdbReadDFile(pDFile, (void *)(pReadh->pBlkData), size); + if (nread < 0) { + tsdbError("vgId:%d failed to load block statis part while read file %s since %s, offset:%" PRId64 " len :%" PRIzu, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno), (int64_t)pBlock->offset, size); + return -1; + } + + if (nread < size) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block statis part in file %s is corrupted, offset:%" PRId64 " expected bytes:%" PRIzu + " read bytes: %" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, size, nread); + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pBlkData), (uint32_t)size)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block statis part in file %s is corrupted since wrong checksum, offset:%" PRId64 " len :%" PRIzu, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, size); + return -1; + } + return 0; +} + +static int tsdbLoadBlockStatisFromAggr(SReadH *pReadh, SBlock *pBlock) { + ASSERT((pBlock->blkVer > TSDB_SBLK_VER_0) && (pBlock->aggrStat)); // TODO: remove after pass all the test + SDFile *pDFileAggr = pBlock->last ? TSDB_READ_SMAL_FILE(pReadh) : TSDB_READ_SMAD_FILE(pReadh); + + if (tsdbSeekDFile(pDFileAggr, pBlock->aggrOffset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load block aggr part while seek file %s to offset %" PRIu64 " since %s", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), (uint64_t)pBlock->aggrOffset, + tstrerror(terrno)); + return -1; + } + + size_t sizeAggr = tsdbBlockAggrSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer); + if (tsdbMakeRoom((void **)(&(pReadh->pAggrBlkData)), sizeAggr) < 0) return -1; + + int64_t nreadAggr = tsdbReadDFile(pDFileAggr, (void *)(pReadh->pAggrBlkData), sizeAggr); + if (nreadAggr < 0) { + tsdbError("vgId:%d failed to load block aggr part while read file %s since %s, offset:%" PRIu64 " len :%" PRIzu, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), tstrerror(terrno), + (uint64_t)pBlock->aggrOffset, sizeAggr); + return -1; + } + + if (nreadAggr < sizeAggr) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block aggr part in file %s is corrupted, offset:%" PRIu64 " expected bytes:%" PRIzu + " read bytes: %" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), (uint64_t)pBlock->aggrOffset, sizeAggr, + nreadAggr); + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)(pReadh->pAggrBlkData), (uint32_t)sizeAggr)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block aggr part in file %s is corrupted since wrong checksum, offset:%" PRIu64 " len :%" PRIzu, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFileAggr), (uint64_t)pBlock->aggrOffset, sizeAggr); + return -1; + } + return 0; +} + +int tsdbLoadBlockStatis(SReadH *pReadh, SBlock *pBlock) { + ASSERT(pBlock->numOfSubBlocks <= 1); + + if (pBlock->blkVer > TSDB_SBLK_VER_0) { + if (pBlock->aggrStat) { + return tsdbLoadBlockStatisFromAggr(pReadh, pBlock); + } + return 1; + } + return tsdbLoadBlockStatisFromDFile(pReadh, pBlock); +} + +int tsdbLoadBlockOffset(SReadH *pReadh, SBlock *pBlock) { + ASSERT(pBlock->numOfSubBlocks <= 1); + return tsdbLoadBlockStatisFromDFile(pReadh, pBlock); +} + +int tsdbEncodeSBlockIdx(void **buf, SBlockIdx *pIdx) { + int tlen = 0; + + tlen += taosEncodeVariantI32(buf, pIdx->tid); + tlen += taosEncodeVariantU32(buf, pIdx->len); + tlen += taosEncodeVariantU32(buf, pIdx->offset); + tlen += taosEncodeFixedU8(buf, pIdx->hasLast); + tlen += taosEncodeVariantU32(buf, pIdx->numOfBlocks); + tlen += taosEncodeFixedU64(buf, pIdx->uid); + tlen += taosEncodeFixedU64(buf, pIdx->maxKey); + + return tlen; +} + +void *tsdbDecodeSBlockIdx(void *buf, SBlockIdx *pIdx) { + uint8_t hasLast = 0; + uint32_t numOfBlocks = 0; + uint64_t value = 0; + + if ((buf = taosDecodeVariantI32(buf, &(pIdx->tid))) == NULL) return NULL; + if ((buf = taosDecodeVariantU32(buf, &(pIdx->len))) == NULL) return NULL; + if ((buf = taosDecodeVariantU32(buf, &(pIdx->offset))) == NULL) return NULL; + if ((buf = taosDecodeFixedU8(buf, &(hasLast))) == NULL) return NULL; + pIdx->hasLast = hasLast; + if ((buf = taosDecodeVariantU32(buf, &(numOfBlocks))) == NULL) return NULL; + pIdx->numOfBlocks = numOfBlocks; + if ((buf = taosDecodeFixedU64(buf, &value)) == NULL) return NULL; + pIdx->uid = (int64_t)value; + if ((buf = taosDecodeFixedU64(buf, &value)) == NULL) return NULL; + pIdx->maxKey = (TSKEY)value; + + return buf; +} + +void tsdbGetBlockStatis(SReadH *pReadh, SDataStatis *pStatis, int numOfCols, SBlock *pBlock) { + if (pBlock->blkVer == TSDB_SBLK_VER_0) { + SBlockData *pBlockData = pReadh->pBlkData; + + for (int i = 0, j = 0; i < numOfCols;) { + if (j >= pBlockData->numOfCols) { + pStatis[i].numOfNull = -1; + i++; + continue; + } + SBlockColV0 *pSBlkCol = ((SBlockColV0 *)(pBlockData->cols)) + j; + if (pStatis[i].colId == pSBlkCol->colId) { + pStatis[i].sum = pSBlkCol->sum; + pStatis[i].max = pSBlkCol->max; + pStatis[i].min = pSBlkCol->min; + pStatis[i].maxIndex = pSBlkCol->maxIndex; + pStatis[i].minIndex = pSBlkCol->minIndex; + pStatis[i].numOfNull = pSBlkCol->numOfNull; + i++; + j++; + } else if (pStatis[i].colId < pSBlkCol->colId) { + pStatis[i].numOfNull = -1; + i++; + } else { + j++; + } + } + } else if (pBlock->aggrStat) { + SAggrBlkData *pAggrBlkData = pReadh->pAggrBlkData; + + for (int i = 0, j = 0; i < numOfCols;) { + if (j >= pBlock->numOfCols) { + pStatis[i].numOfNull = -1; + i++; + continue; + } + SAggrBlkCol *pAggrBlkCol = ((SAggrBlkCol *)(pAggrBlkData)) + j; + if (pStatis[i].colId == pAggrBlkCol->colId) { + pStatis[i].sum = pAggrBlkCol->sum; + pStatis[i].max = pAggrBlkCol->max; + pStatis[i].min = pAggrBlkCol->min; + pStatis[i].maxIndex = pAggrBlkCol->maxIndex; + pStatis[i].minIndex = pAggrBlkCol->minIndex; + pStatis[i].numOfNull = pAggrBlkCol->numOfNull; + i++; + j++; + } else if (pStatis[i].colId < pAggrBlkCol->colId) { + pStatis[i].numOfNull = -1; + i++; + } else { + j++; + } + } + } +} + +static void tsdbResetReadTable(SReadH *pReadh) { + tdResetDataCols(pReadh->pDCols[0]); + tdResetDataCols(pReadh->pDCols[1]); + pReadh->cidx = 0; + pReadh->pBlkIdx = NULL; + pReadh->pTable = NULL; +} + +static void tsdbResetReadFile(SReadH *pReadh) { + tsdbResetReadTable(pReadh); + taosArrayClear(pReadh->aBlkIdx); + tsdbCloseDFileSet(TSDB_READ_FSET(pReadh)); +} + +static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols) { + ASSERT(pBlock->numOfSubBlocks == 0 || pBlock->numOfSubBlocks == 1); + + SDFile *pDFile = (pBlock->last) ? TSDB_READ_LAST_FILE(pReadh) : TSDB_READ_DATA_FILE(pReadh); + + tdResetDataCols(pDataCols); + if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlock->len) < 0) return -1; + + SBlockData *pBlockData = (SBlockData *)TSDB_READ_BUF(pReadh); + + if (tsdbSeekDFile(pDFile, pBlock->offset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load block data part while seek file %s to offset %" PRId64 " since %s", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, tstrerror(terrno)); + return -1; + } + + int64_t nread = tsdbReadDFile(pDFile, TSDB_READ_BUF(pReadh), pBlock->len); + if (nread < 0) { + tsdbError("vgId:%d failed to load block data part while read file %s since %s, offset:%" PRId64 " len :%d", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno), (int64_t)pBlock->offset, + pBlock->len); + return -1; + } + + if (nread < pBlock->len) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block data part in file %s is corrupted, offset:%" PRId64 + " expected bytes:%d read bytes: %" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, pBlock->len, nread); + return -1; + } + + int32_t tsize = (int32_t)tsdbBlockStatisSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer); + if (!taosCheckChecksumWhole((uint8_t *)TSDB_READ_BUF(pReadh), tsize)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block statis part in file %s is corrupted since wrong checksum, offset:%" PRId64 " len :%d", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), (int64_t)pBlock->offset, tsize); + return -1; + } + + ASSERT(tsize < pBlock->len); + ASSERT(pBlockData->numOfCols == pBlock->numOfCols); + + pDataCols->numOfRows = pBlock->numOfRows; + + // Recover the data + int ccol = 0; // loop iter for SBlockCol object + int dcol = 0; // loop iter for SDataCols object + SBlockCol blockCol = {0}; + SBlockCol *pBlockCol = &blockCol; + while (dcol < pDataCols->numOfCols) { + SDataCol *pDataCol = &(pDataCols->cols[dcol]); + if (dcol != 0 && ccol >= pBlockData->numOfCols) { + // Set current column as NULL and forward + dataColReset(pDataCol); + dcol++; + continue; + } + + int16_t tcolId = 0; + uint32_t toffset = TSDB_KEY_COL_OFFSET; + int32_t tlen = pBlock->keyLen; + + + if (dcol != 0) { + tsdbGetSBlockCol(pBlock, &pBlockCol, pBlockData->cols, ccol); + tcolId = pBlockCol->colId; + toffset = tsdbGetBlockColOffset(pBlockCol); + tlen = pBlockCol->len; + } else { + ASSERT(pDataCol->colId == tcolId); + } + + if (tcolId == pDataCol->colId) { + if (pBlock->algorithm == TWO_STAGE_COMP) { + int zsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES; + if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), zsize) < 0) return -1; + } + + if (tsdbCheckAndDecodeColumnData(pDataCol, POINTER_SHIFT(pBlockData, tsize + toffset), tlen, pBlock->algorithm, + pBlock->numOfRows, pDataCols->maxPoints, TSDB_READ_COMP_BUF(pReadh), + (int)taosTSizeof(TSDB_READ_COMP_BUF(pReadh))) < 0) { + tsdbError("vgId:%d file %s is broken at column %d block offset %" PRId64 " column offset %u", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tcolId, (int64_t)pBlock->offset, toffset); + return -1; + } + + if (dcol != 0) { + ccol++; + } + dcol++; + } else if (tcolId < pDataCol->colId) { + ccol++; + } else { + // Set current column as NULL and forward + dataColReset(pDataCol); + dcol++; + } + } + + return 0; +} + +static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32_t len, int8_t comp, int numOfRows, + int maxPoints, char *buffer, int bufferSize) { + if (!taosCheckChecksumWhole((uint8_t *)content, len)) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return -1; + } + + tdAllocMemForCol(pDataCol, maxPoints); + + // Decode the data + if (comp) { + // Need to decompress + int tlen = (*(tDataTypes[pDataCol->type].decompFunc))(content, len - sizeof(TSCKSUM), numOfRows, pDataCol->pData, + pDataCol->spaceSize, comp, buffer, bufferSize); + if (tlen <= 0) { + tsdbError("Failed to decompress column, file corrupted, len:%d comp:%d numOfRows:%d maxPoints:%d bufferSize:%d", + len, comp, numOfRows, maxPoints, bufferSize); + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + return -1; + } + pDataCol->len = tlen; + } else { + // No need to decompress, just memcpy it + pDataCol->len = len - sizeof(TSCKSUM); + memcpy(pDataCol->pData, content, pDataCol->len); + } + + if (IS_VAR_DATA_TYPE(pDataCol->type)) { + dataColSetOffset(pDataCol, numOfRows); + } + return 0; +} + +static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds, + int numOfColIds) { + ASSERT(pBlock->numOfSubBlocks == 0 || pBlock->numOfSubBlocks == 1); + ASSERT(colIds[0] == 0); + + SDFile * pDFile = (pBlock->last) ? TSDB_READ_LAST_FILE(pReadh) : TSDB_READ_DATA_FILE(pReadh); + SBlockCol blockCol = {0}; + + tdResetDataCols(pDataCols); + + // If only load timestamp column, no need to load SBlockData part + if (numOfColIds > 1 && tsdbLoadBlockOffset(pReadh, pBlock) < 0) return -1; + + pDataCols->numOfRows = pBlock->numOfRows; + + int dcol = 0; + int ccol = 0; + for (int i = 0; i < numOfColIds; i++) { + int16_t colId = colIds[i]; + SDataCol * pDataCol = NULL; + SBlockCol *pBlockCol = NULL; + + while (true) { + if (dcol >= pDataCols->numOfCols) { + pDataCol = NULL; + break; + } + pDataCol = &pDataCols->cols[dcol]; + if (pDataCol->colId > colId) { + pDataCol = NULL; + break; + } else { + dcol++; + if (pDataCol->colId == colId) break; + } + } + + if (pDataCol == NULL) continue; + ASSERT(pDataCol->colId == colId); + + if (colId == 0) { // load the key row + blockCol.colId = colId; + blockCol.len = pBlock->keyLen; + blockCol.type = pDataCol->type; + blockCol.offset = TSDB_KEY_COL_OFFSET; + pBlockCol = &blockCol; + } else { // load non-key rows + while (true) { + if (ccol >= pBlock->numOfCols) { + pBlockCol = NULL; + break; + } + + pBlockCol = &blockCol; + tsdbGetSBlockCol(pBlock, &pBlockCol, pReadh->pBlkData->cols, ccol); + + if (pBlockCol->colId > colId) { + pBlockCol = NULL; + break; + } else { + ccol++; + if (pBlockCol->colId == colId) break; + } + } + + if (pBlockCol == NULL) { + dataColReset(pDataCol); + continue; + } + + ASSERT(pBlockCol->colId == pDataCol->colId); + } + + if (tsdbLoadColData(pReadh, pDFile, pBlock, pBlockCol, pDataCol) < 0) return -1; + } + + return 0; +} + +static int tsdbLoadColData(SReadH *pReadh, SDFile *pDFile, SBlock *pBlock, SBlockCol *pBlockCol, SDataCol *pDataCol) { + ASSERT(pDataCol->colId == pBlockCol->colId); + + STsdb *pRepo = TSDB_READ_REPO(pReadh); + STsdbCfg * pCfg = REPO_CFG(pRepo); + int tsize = pDataCol->bytes * pBlock->numOfRows + COMP_OVERFLOW_BYTES; + + if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlockCol->len) < 0) return -1; + if (tsdbMakeRoom((void **)(&TSDB_READ_COMP_BUF(pReadh)), tsize) < 0) return -1; + + int64_t offset = pBlock->offset + tsdbBlockStatisSize(pBlock->numOfCols, (uint32_t)pBlock->blkVer) + + tsdbGetBlockColOffset(pBlockCol); + if (tsdbSeekDFile(pDFile, offset, SEEK_SET) < 0) { + tsdbError("vgId:%d failed to load block column data while seek file %s to offset %" PRId64 " since %s", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), offset, tstrerror(terrno)); + return -1; + } + + int64_t nread = tsdbReadDFile(pDFile, TSDB_READ_BUF(pReadh), pBlockCol->len); + if (nread < 0) { + tsdbError("vgId:%d failed to load block column data while read file %s since %s, offset:%" PRId64 " len :%d", + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), tstrerror(terrno), offset, pBlockCol->len); + return -1; + } + + if (nread < pBlockCol->len) { + terrno = TSDB_CODE_TDB_FILE_CORRUPTED; + tsdbError("vgId:%d block column data in file %s is corrupted, offset:%" PRId64 " expected bytes:%d" PRIzu + " read bytes: %" PRId64, + TSDB_READ_REPO_ID(pReadh), TSDB_FILE_FULL_NAME(pDFile), offset, pBlockCol->len, nread); + return -1; + } + + if (tsdbCheckAndDecodeColumnData(pDataCol, pReadh->pBuf, pBlockCol->len, pBlock->algorithm, pBlock->numOfRows, + pCfg->maxRowsPerFileBlock, pReadh->pCBuf, (int32_t)taosTSizeof(pReadh->pCBuf)) < 0) { + tsdbError("vgId:%d file %s is broken at column %d offset %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pDFile), + pBlockCol->colId, offset); + return -1; + } + + return 0; +} diff --git a/source/dnode/vnode/tsdb2/src/tsdbRecover.c b/source/dnode/vnode/tsdb2/src/tsdbRecover.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbRecover.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/src/tsdbRowMergeBuf.c b/source/dnode/vnode/tsdb2/src/tsdbRowMergeBuf.c new file mode 100644 index 0000000000..5ce580f70f --- /dev/null +++ b/source/dnode/vnode/tsdb2/src/tsdbRowMergeBuf.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbRowMergeBuf.h" +#include "tdataformat.h" + +// row1 has higher priority +SMemRow tsdbMergeTwoRows(SMergeBuf *pBuf, SMemRow row1, SMemRow row2, STSchema *pSchema1, STSchema *pSchema2) { + if(row2 == NULL) return row1; + if(row1 == NULL) return row2; + ASSERT(pSchema1->version == memRowVersion(row1)); + ASSERT(pSchema2->version == memRowVersion(row2)); + + if(tsdbMergeBufMakeSureRoom(pBuf, pSchema1, pSchema2) < 0) { + return NULL; + } + return mergeTwoMemRows(*pBuf, row1, row2, pSchema1, pSchema2); +} diff --git a/source/dnode/vnode/tsdb2/tests/CMakeLists.txt b/source/dnode/vnode/tsdb2/tests/CMakeLists.txt new file mode 100644 index 0000000000..a3477aef95 --- /dev/null +++ b/source/dnode/vnode/tsdb2/tests/CMakeLists.txt @@ -0,0 +1,6 @@ +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) + +add_executable(tsdbTests ${SOURCE_LIST}) +target_link_libraries(tsdbTests gtest gtest_main pthread common tsdb tutil trpc) + +add_test(NAME unit COMMAND ${CMAKE_CURRENT_BINARY_DIR}/tsdbTests) \ No newline at end of file diff --git a/source/dnode/vnode/tsdb2/tests/tsdbTests.cpp b/source/dnode/vnode/tsdb2/tests/tsdbTests.cpp new file mode 100644 index 0000000000..dc804856fd --- /dev/null +++ b/source/dnode/vnode/tsdb2/tests/tsdbTests.cpp @@ -0,0 +1,163 @@ +#include +#include +#include + +#include "tsdb.h" +#include "tsdbMain.h" + +static double getCurTime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + tv.tv_usec * 1E-6; +} + +typedef struct { + STsdbRepo *pRepo; + bool isAscend; + int tid; + uint64_t uid; + int sversion; + TSKEY startTime; + TSKEY interval; + int totalRows; + int rowsPerSubmit; + STSchema * pSchema; +} SInsertInfo; + +static int insertData(SInsertInfo *pInfo) { + SSubmitMsg *pMsg = + (SSubmitMsg *)malloc(sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + dataRowMaxBytesFromSchema(pInfo->pSchema) * pInfo->rowsPerSubmit); + if (pMsg == NULL) return -1; + TSKEY start_time = pInfo->startTime; + + // Loop to write data + double stime = getCurTime(); + + for (int k = 0; k < pInfo->totalRows/pInfo->rowsPerSubmit; k++) { + memset((void *)pMsg, 0, sizeof(SSubmitMsg)); + SSubmitBlk *pBlock = (SSubmitBlk *)pMsg->blocks; + pBlock->uid = pInfo->uid; + pBlock->tid = pInfo->tid; + pBlock->sversion = pInfo->sversion; + pBlock->dataLen = 0; + pBlock->schemaLen = 0; + pBlock->numOfRows = 0; + for (int i = 0; i < pInfo->rowsPerSubmit; i++) { + // start_time += 1000; + if (pInfo->isAscend) { + start_time += pInfo->interval; + } else { + start_time -= pInfo->interval; + } + SDataRow row = (SDataRow)(pBlock->data + pBlock->dataLen); + tdInitDataRow(row, pInfo->pSchema); + + for (int j = 0; j < schemaNCols(pInfo->pSchema); j++) { + STColumn *pTCol = schemaColAt(pInfo->pSchema, j); + if (j == 0) { // Just for timestamp + tdAppendColVal(row, (void *)(&start_time), pTCol->type, pTCol->offset); + } else { // For int + int val = 10; + tdAppendColVal(row, (void *)(&val), pTCol->type, pTCol->offset); + } + } + pBlock->dataLen += dataRowLen(row); + pBlock->numOfRows++; + } + pMsg->length = sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + pBlock->dataLen; + pMsg->numOfBlocks = 1; + + pBlock->dataLen = htonl(pBlock->dataLen); + pBlock->numOfRows = htonl(pBlock->numOfRows); + pBlock->schemaLen = htonl(pBlock->schemaLen); + pBlock->uid = htobe64(pBlock->uid); + pBlock->tid = htonl(pBlock->tid); + + pBlock->sversion = htonl(pBlock->sversion); + pBlock->padding = htonl(pBlock->padding); + + pMsg->length = htonl(pMsg->length); + pMsg->numOfBlocks = htonl(pMsg->numOfBlocks); + + if (tsdbInsertData(pInfo->pRepo, pMsg, NULL) < 0) { + tfree(pMsg); + return -1; + } + } + + double etime = getCurTime(); + + printf("Spent %f seconds to write %d records\n", etime - stime, pInfo->totalRows); + tfree(pMsg); + return 0; +} + +static void tsdbSetCfg(STsdbCfg *pCfg, int32_t tsdbId, int32_t cacheBlockSize, int32_t totalBlocks, int32_t maxTables, + int32_t daysPerFile, int32_t keep, int32_t minRows, int32_t maxRows, int8_t precision, + int8_t compression) { + pCfg->tsdbId = tsdbId; + pCfg->cacheBlockSize = cacheBlockSize; + pCfg->totalBlocks = totalBlocks; + // pCfg->maxTables = maxTables; + pCfg->daysPerFile = daysPerFile; + pCfg->keep = keep; + pCfg->minRowsPerFileBlock = minRows; + pCfg->maxRowsPerFileBlock = maxRows; + pCfg->precision = precision; + pCfg->compression = compression; +} + +static void tsdbSetTableCfg(STableCfg *pCfg) { + STSchemaBuilder schemaBuilder = {0}; + + pCfg->type = TSDB_NORMAL_TABLE; + pCfg->superUid = TSDB_INVALID_SUPER_TABLE_ID; + pCfg->tableId.tid = 1; + pCfg->tableId.uid = 5849583783847394; + tdInitTSchemaBuilder(&schemaBuilder, 0); + + int colId = 0; + for (int i = 0; i < 5; i++) { + tdAddColToSchema(&schemaBuilder, (colId == 0) ? TSDB_DATA_TYPE_TIMESTAMP : TSDB_DATA_TYPE_INT, colId, 0); + colId++; + } + + pCfg->schema = tdGetSchemaFromBuilder(&schemaBuilder); + pCfg->name = strdup("t1"); + + tdDestroyTSchemaBuilder(&schemaBuilder); +} + +TEST(TsdbTest, testInsertSpeed) { + int vnode = 1; + int ret = 0; + STsdbCfg tsdbCfg; + STableCfg tableCfg; + std::string testDir = "./test"; + char * rootDir = strdup((testDir + "/vnode" + std::to_string(vnode)).c_str()); + + tsdbDebugFlag = 131; //NOTE: you must set the flag + + taosRemoveDir(rootDir); + + // Create and open repository + tsdbSetCfg(&tsdbCfg, 1, 16, 4, -1, -1, -1, -1, -1, -1, -1); + tsdbCreateRepo(rootDir, &tsdbCfg); + STsdbRepo *repo = tsdbOpenRepo(rootDir, NULL); + ASSERT_NE(repo, nullptr); + + // Create table + tsdbSetTableCfg(&tableCfg); + tsdbCreateTable(repo, &tableCfg); + + // Insert data + SInsertInfo iInfo = {repo, true, 1, 5849583783847394, 0, 1590000000000, 10, 10000000, 100, tableCfg.schema}; + + insertData(&iInfo); + + tsdbCloseRepo(repo, 1); +} + +static char *getTKey(const void *data) { + return (char *)data; +} \ No newline at end of file diff --git a/source/libs/CMakeLists.txt b/source/libs/CMakeLists.txt index 027532bbb1..1dc16c74f7 100644 --- a/source/libs/CMakeLists.txt +++ b/source/libs/CMakeLists.txt @@ -12,3 +12,4 @@ add_subdirectory(planner) add_subdirectory(function) add_subdirectory(qcom) add_subdirectory(qworker) +add_subdirectory(tfs) diff --git a/source/libs/catalog/inc/catalogInt.h b/source/libs/catalog/inc/catalogInt.h index f426139c14..91a9c5248c 100644 --- a/source/libs/catalog/inc/catalogInt.h +++ b/source/libs/catalog/inc/catalogInt.h @@ -22,20 +22,31 @@ extern "C" { #include "catalog.h" #include "common.h" -#include "tlog.h" +#include "query.h" #define CTG_DEFAULT_CACHE_CLUSTER_NUMBER 6 #define CTG_DEFAULT_CACHE_VGROUP_NUMBER 100 #define CTG_DEFAULT_CACHE_DB_NUMBER 20 #define CTG_DEFAULT_CACHE_TABLEMETA_NUMBER 100000 +#define CTG_DEFAULT_RENT_SECOND 10 +#define CTG_DEFAULT_RENT_SLOT_SIZE 10 + +#define CTG_RENT_SLOT_SECOND 2 #define CTG_DEFAULT_INVALID_VERSION (-1) +#define CTG_ERR_CODE_TABLE_NOT_EXIST TSDB_CODE_TDB_INVALID_TABLE_ID + enum { CTG_READ = 1, CTG_WRITE, }; +enum { + CTG_RENT_DB = 1, + CTG_RENT_STABLE, +}; + typedef struct SVgroupListCache { int32_t vgroupVersion; SHashObj *cache; // key:vgId, value:SVgroupInfo @@ -51,30 +62,76 @@ typedef struct STableMetaCache { SHashObj *stableCache; //key:suid, value:STableMeta* } STableMetaCache; +typedef struct SRentSlotInfo { + SRWLatch lock; + bool needSort; + SArray *meta; // element is SDbVgVersion or SSTableMetaVersion +} SRentSlotInfo; + +typedef struct SMetaRentMgmt { + int8_t type; + uint16_t slotNum; + uint16_t slotRIdx; + int64_t lastReadMsec; + SRentSlotInfo *slots; +} SMetaRentMgmt; + typedef struct SCatalog { + uint64_t clusterId; SDBVgroupCache dbCache; STableMetaCache tableCache; + SMetaRentMgmt dbRent; + SMetaRentMgmt stableRent; } SCatalog; +typedef struct SCtgApiStat { + +} SCtgApiStat; + +typedef struct SCtgResourceStat { + +} SCtgResourceStat; + +typedef struct SCtgCacheStat { + +} SCtgCacheStat; + +typedef struct SCatalogStat { + SCtgApiStat api; + SCtgResourceStat resource; + SCtgCacheStat cache; +} SCatalogStat; + typedef struct SCatalogMgmt { - void *pMsgSender; // used to send messsage to mnode to fetch necessary metadata - SHashObj *pCluster; // items cached for each cluster, the hash key is the cluster-id got from mgmt node - SCatalogCfg cfg; + SHashObj *pCluster; //key: clusterId, value: SCatalog* + SCatalogStat stat; + SCatalogCfg cfg; } SCatalogMgmt; typedef uint32_t (*tableNameHashFp)(const char *, uint32_t); -#define ctgFatal(...) do { if (ctgDebugFlag & DEBUG_FATAL) { taosPrintLog("CTG FATAL ", ctgDebugFlag, __VA_ARGS__); }} while(0) -#define ctgError(...) do { if (ctgDebugFlag & DEBUG_ERROR) { taosPrintLog("CTG ERROR ", ctgDebugFlag, __VA_ARGS__); }} while(0) -#define ctgWarn(...) do { if (ctgDebugFlag & DEBUG_WARN) { taosPrintLog("CTG WARN ", ctgDebugFlag, __VA_ARGS__); }} while(0) -#define ctgInfo(...) do { if (ctgDebugFlag & DEBUG_INFO) { taosPrintLog("CTG ", ctgDebugFlag, __VA_ARGS__); }} while(0) -#define ctgDebug(...) do { if (ctgDebugFlag & DEBUG_DEBUG) { taosPrintLog("CTG ", ctgDebugFlag, __VA_ARGS__); }} while(0) -#define ctgTrace(...) do { if (ctgDebugFlag & DEBUG_TRACE) { taosPrintLog("CTG ", ctgDebugFlag, __VA_ARGS__); }} while(0) -#define ctgDebugL(...) do { if (ctgDebugFlag & DEBUG_DEBUG) { taosPrintLongString("CTG ", ctgDebugFlag, __VA_ARGS__); }} while(0) +#define CTG_IS_META_NONE(type) ((type) == META_TYPE_NON_TABLE) +#define CTG_IS_META_CTABLE(type) ((type) == META_TYPE_CTABLE) +#define CTG_IS_META_TABLE(type) ((type) == META_TYPE_TABLE) +#define CTG_IS_META_BOTH(type) ((type) == META_TYPE_BOTH_TABLE) + +#define CTG_IS_STABLE(isSTable) (1 == (isSTable)) +#define CTG_IS_NOT_STABLE(isSTable) (0 == (isSTable)) +#define CTG_IS_UNKNOWN_STABLE(isSTable) ((isSTable) < 0) +#define CTG_SET_STABLE(isSTable, tbType) do { (isSTable) = ((tbType) == TSDB_SUPER_TABLE) ? 1 : ((tbType) > TSDB_SUPER_TABLE ? 0 : -1); } while (0) +#define CTG_TBTYPE_MATCH(isSTable, tbType) (CTG_IS_UNKNOWN_STABLE(isSTable) || (CTG_IS_STABLE(isSTable) && (tbType) == TSDB_SUPER_TABLE) || (CTG_IS_NOT_STABLE(isSTable) && (tbType) != TSDB_SUPER_TABLE)) + +#define CTG_TABLE_NOT_EXIST(code) (code == CTG_ERR_CODE_TABLE_NOT_EXIST) + +#define ctgFatal(param, ...) qFatal("CTG:%p " param, pCatalog, __VA_ARGS__) +#define ctgError(param, ...) qError("CTG:%p " param, pCatalog, __VA_ARGS__) +#define ctgWarn(param, ...) qWarn("CTG:%p " param, pCatalog, __VA_ARGS__) +#define ctgInfo(param, ...) qInfo("CTG:%p " param, pCatalog, __VA_ARGS__) +#define ctgDebug(param, ...) qDebug("CTG:%p " param, pCatalog, __VA_ARGS__) +#define ctgTrace(param, ...) qTrace("CTG:%p " param, pCatalog, __VA_ARGS__) #define CTG_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0) #define CTG_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0) -#define CTG_ERR_LRET(c,...) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { ctgError(__VA_ARGS__); terrno = _code; return _code; } } while (0) #define CTG_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0) #define TD_RWLATCH_WRITE_FLAG_COPY 0x40000000 @@ -82,15 +139,15 @@ typedef uint32_t (*tableNameHashFp)(const char *, uint32_t); #define CTG_LOCK(type, _lock) do { \ if (CTG_READ == (type)) { \ assert(atomic_load_32((_lock)) >= 0); \ - ctgDebug("CTG RLOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG RLOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ taosRLockLatch(_lock); \ - ctgDebug("CTG RLOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG RLOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ assert(atomic_load_32((_lock)) > 0); \ } else { \ assert(atomic_load_32((_lock)) >= 0); \ - ctgDebug("CTG WLOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG WLOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ taosWLockLatch(_lock); \ - ctgDebug("CTG WLOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG WLOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ assert(atomic_load_32((_lock)) == TD_RWLATCH_WRITE_FLAG_COPY); \ } \ } while (0) @@ -98,15 +155,15 @@ typedef uint32_t (*tableNameHashFp)(const char *, uint32_t); #define CTG_UNLOCK(type, _lock) do { \ if (CTG_READ == (type)) { \ assert(atomic_load_32((_lock)) > 0); \ - ctgDebug("CTG RULOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG RULOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ taosRUnLockLatch(_lock); \ - ctgDebug("CTG RULOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG RULOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ assert(atomic_load_32((_lock)) >= 0); \ } else { \ assert(atomic_load_32((_lock)) == TD_RWLATCH_WRITE_FLAG_COPY); \ - ctgDebug("CTG WULOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG WULOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ taosWUnLockLatch(_lock); \ - ctgDebug("CTG WULOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ + qDebug("CTG WULOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \ assert(atomic_load_32((_lock)) >= 0); \ } \ } while (0) diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index b4d51e50a4..94f34b8e17 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -23,7 +23,7 @@ SCatalogMgmt ctgMgmt = {0}; int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbName, SDBVgroupInfo **dbInfo, bool *inCache) { if (NULL == pCatalog->dbCache.cache) { *inCache = false; - ctgWarn("no db cache"); + ctgWarn("empty db cache, dbName:%s", dbName); return TSDB_CODE_SUCCESS; } @@ -34,7 +34,7 @@ int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbName, S if (NULL == info) { *inCache = false; - ctgWarn("no db cache, dbName:%s", dbName); + ctgWarn("not in db vgroup cache, dbName:%s", dbName); return TSDB_CODE_SUCCESS; } @@ -52,6 +52,8 @@ int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbName, S *dbInfo = info; *inCache = true; + + ctgDebug("Got db vgroup from cache, dbName:%s", dbName); return TSDB_CODE_SUCCESS; } @@ -63,7 +65,13 @@ int32_t ctgGetDBVgroupFromMnode(struct SCatalog* pCatalog, void *pRpc, const SEp SEpSet *pVnodeEpSet = NULL; int32_t msgLen = 0; - CTG_ERR_RET(queryBuildMsg[TMSG_INDEX(TDMT_MND_USE_DB)](input, &msg, 0, &msgLen)); + ctgDebug("try to get db vgroup from mnode, db:%s", input->db); + + int32_t code = queryBuildMsg[TMSG_INDEX(TDMT_MND_USE_DB)](input, &msg, 0, &msgLen); + if (code) { + ctgError("Build use db msg failed, code:%x, db:%s", code, input->db); + CTG_ERR_RET(code); + } SRpcMsg rpcMsg = { .msgType = TDMT_MND_USE_DB, @@ -75,19 +83,47 @@ int32_t ctgGetDBVgroupFromMnode(struct SCatalog* pCatalog, void *pRpc, const SEp rpcSendRecv(pRpc, (SEpSet*)pMgmtEps, &rpcMsg, &rpcRsp); if (TSDB_CODE_SUCCESS != rpcRsp.code) { - ctgError("error rsp for use db, code:%x", rpcRsp.code); + ctgError("error rsp for use db, code:%x, db:%s", rpcRsp.code, input->db); CTG_ERR_RET(rpcRsp.code); } - CTG_ERR_RET(queryProcessMsgRsp[TMSG_INDEX(TDMT_MND_USE_DB)](out, rpcRsp.pCont, rpcRsp.contLen)); + code = queryProcessMsgRsp[TMSG_INDEX(TDMT_MND_USE_DB)](out, rpcRsp.pCont, rpcRsp.contLen); + if (code) { + ctgError("Process use db rsp failed, code:%x, db:%s", code, input->db); + CTG_ERR_RET(code); + } return TSDB_CODE_SUCCESS; } +int32_t ctgIsTableMetaExistInCache(struct SCatalog* pCatalog, const char* tbFullName, int32_t *exist) { + if (NULL == pCatalog->tableCache.cache) { + *exist = 0; + ctgWarn("empty tablemeta cache, tbName:%s", tbFullName); + return TSDB_CODE_SUCCESS; + } + + size_t sz = 0; + STableMeta *tbMeta = taosHashGet(pCatalog->tableCache.cache, tbFullName, strlen(tbFullName)); + + if (NULL == tbMeta) { + *exist = 0; + ctgDebug("tablemeta not in cache, tbName:%s", tbFullName); + return TSDB_CODE_SUCCESS; + } + + *exist = 1; + + ctgDebug("tablemeta is in cache, tbName:%s", tbFullName); + + return TSDB_CODE_SUCCESS; +} + int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableName, STableMeta** pTableMeta, int32_t *exist) { if (NULL == pCatalog->tableCache.cache) { *exist = 0; + ctgWarn("empty tablemeta cache, tbName:%s", pTableName->tname); return TSDB_CODE_SUCCESS; } @@ -101,12 +137,17 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN if (NULL == *pTableMeta) { *exist = 0; + ctgDebug("tablemeta not in cache, tbName:%s", tbFullName); return TSDB_CODE_SUCCESS; } *exist = 1; + + tbMeta = *pTableMeta; if (tbMeta->tableType != TSDB_CHILD_TABLE) { + ctgDebug("Got tablemeta from cache, tbName:%s", tbFullName); + return TSDB_CODE_SUCCESS; } @@ -115,7 +156,7 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN STableMeta **stbMeta = taosHashGet(pCatalog->tableCache.stableCache, &tbMeta->suid, sizeof(tbMeta->suid)); if (NULL == stbMeta || NULL == *stbMeta) { CTG_UNLOCK(CTG_READ, &pCatalog->tableCache.stableLock); - qError("no stable:%"PRIx64 " meta in cache", tbMeta->suid); + ctgError("stable not in stableCache, suid:%"PRIx64, tbMeta->suid); tfree(*pTableMeta); *exist = 0; return TSDB_CODE_SUCCESS; @@ -124,7 +165,7 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN if ((*stbMeta)->suid != tbMeta->suid) { CTG_UNLOCK(CTG_READ, &pCatalog->tableCache.stableLock); tfree(*pTableMeta); - ctgError("stable cache error, expected suid:%"PRId64 ",actual suid:%"PRId64, tbMeta->suid, (*stbMeta)->suid); + ctgError("stable suid in stableCache mis-match, expected suid:%"PRIx64 ",actual suid:%"PRIx64, tbMeta->suid, (*stbMeta)->suid); CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); } @@ -132,17 +173,47 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN *pTableMeta = realloc(*pTableMeta, metaSize); if (NULL == *pTableMeta) { CTG_UNLOCK(CTG_READ, &pCatalog->tableCache.stableLock); - ctgError("calloc size[%d] failed", metaSize); + ctgError("realloc size[%d] failed", metaSize); CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); } memcpy(&(*pTableMeta)->sversion, &(*stbMeta)->sversion, metaSize - sizeof(SCTableMeta)); CTG_UNLOCK(CTG_READ, &pCatalog->tableCache.stableLock); + + ctgDebug("Got tablemeta from cache, tbName:%s", tbFullName); return TSDB_CODE_SUCCESS; } +int32_t ctgGetTableTypeFromCache(struct SCatalog* pCatalog, const SName* pTableName, int32_t *tbType) { + if (NULL == pCatalog->tableCache.cache) { + ctgWarn("empty tablemeta cache, tbName:%s", pTableName->tname); + return TSDB_CODE_SUCCESS; + } + + char tbFullName[TSDB_TABLE_FNAME_LEN]; + tNameExtractFullName(pTableName, tbFullName); + + size_t sz = 0; + STableMeta *pTableMeta = NULL; + + taosHashGetCloneExt(pCatalog->tableCache.cache, tbFullName, strlen(tbFullName), NULL, (void **)&pTableMeta, &sz); + + if (NULL == pTableMeta) { + ctgWarn("tablemeta not in cache, tbName:%s", tbFullName); + + return TSDB_CODE_SUCCESS; + } + + *tbType = pTableMeta->tableType; + + ctgDebug("Got tabletype from cache, tbName:%s, type:%d", tbFullName, *tbType); + + return TSDB_CODE_SUCCESS; +} + + void ctgGenEpSet(SEpSet *epSet, SVgroupInfo *vgroupInfo) { epSet->inUse = 0; epSet->numOfEps = vgroupInfo->numOfEps; @@ -153,20 +224,19 @@ void ctgGenEpSet(SEpSet *epSet, SVgroupInfo *vgroupInfo) { } } -int32_t ctgGetTableMetaFromMnode(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, STableMetaOutput* output) { - if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == output) { - CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); - } - - char tbFullName[TSDB_TABLE_FNAME_LEN]; - tNameExtractFullName(pTableName, tbFullName); - +int32_t ctgGetTableMetaFromMnodeImpl(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, char* tbFullName, STableMetaOutput* output) { SBuildTableMetaInput bInput = {.vgId = 0, .dbName = NULL, .tableFullName = tbFullName}; char *msg = NULL; SEpSet *pVnodeEpSet = NULL; int32_t msgLen = 0; - CTG_ERR_RET(queryBuildMsg[TMSG_INDEX(TDMT_MND_STB_META)](&bInput, &msg, 0, &msgLen)); + ctgDebug("try to get table meta from mnode, tbName:%s", tbFullName); + + int32_t code = queryBuildMsg[TMSG_INDEX(TDMT_MND_STB_META)](&bInput, &msg, 0, &msgLen); + if (code) { + ctgError("Build mnode stablemeta msg failed, code:%x", code); + CTG_ERR_RET(code); + } SRpcMsg rpcMsg = { .msgType = TDMT_MND_STB_META, @@ -176,33 +246,57 @@ int32_t ctgGetTableMetaFromMnode(struct SCatalog* pCatalog, void *pRpc, const SE SRpcMsg rpcRsp = {0}; - rpcSendRecv(pRpc, (SEpSet*)pMgmtEps, &rpcMsg, &rpcRsp); + rpcSendRecv(pTransporter, (SEpSet*)pMgmtEps, &rpcMsg, &rpcRsp); if (TSDB_CODE_SUCCESS != rpcRsp.code) { - ctgError("error rsp for table meta, code:%x", rpcRsp.code); + if (CTG_TABLE_NOT_EXIST(rpcRsp.code)) { + SET_META_TYPE_NONE(output->metaType); + ctgDebug("stablemeta not exist in mnode, tbName:%s", tbFullName); + return TSDB_CODE_SUCCESS; + } + + ctgError("error rsp for stablemeta from mnode, code:%x, tbName:%s", rpcRsp.code, tbFullName); CTG_ERR_RET(rpcRsp.code); } - CTG_ERR_RET(queryProcessMsgRsp[TMSG_INDEX(TDMT_MND_STB_META)](output, rpcRsp.pCont, rpcRsp.contLen)); + code = queryProcessMsgRsp[TMSG_INDEX(TDMT_MND_STB_META)](output, rpcRsp.pCont, rpcRsp.contLen); + if (code) { + ctgError("Process mnode stablemeta rsp failed, code:%x, tbName:%s", code, tbFullName); + CTG_ERR_RET(code); + } + + ctgDebug("Got table meta from mnode, tbName:%s", tbFullName); return TSDB_CODE_SUCCESS; } +int32_t ctgGetTableMetaFromMnode(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMetaOutput* output) { + char tbFullName[TSDB_TABLE_FNAME_LEN]; + tNameExtractFullName(pTableName, tbFullName); -int32_t ctgGetTableMetaFromVnode(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, SVgroupInfo *vgroupInfo, STableMetaOutput* output) { - if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == vgroupInfo || NULL == output) { + return ctgGetTableMetaFromMnodeImpl(pCatalog, pTransporter, pMgmtEps, tbFullName, output); +} + +int32_t ctgGetTableMetaFromVnode(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, SVgroupInfo *vgroupInfo, STableMetaOutput* output) { + if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pTableName || NULL == vgroupInfo || NULL == output) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } char dbFullName[TSDB_DB_FNAME_LEN]; tNameGetFullDbName(pTableName, dbFullName); - SBuildTableMetaInput bInput = {.vgId = vgroupInfo->vgId, .dbName = dbFullName, .tableFullName = pTableName->tname}; + ctgDebug("try to get table meta from vnode, db:%s, tbName:%s", dbFullName, pTableName->tname); + + SBuildTableMetaInput bInput = {.vgId = vgroupInfo->vgId, .dbName = dbFullName, .tableFullName = (char *)pTableName->tname}; char *msg = NULL; SEpSet *pVnodeEpSet = NULL; int32_t msgLen = 0; - CTG_ERR_RET(queryBuildMsg[TMSG_INDEX(TDMT_VND_TABLE_META)](&bInput, &msg, 0, &msgLen)); + int32_t code = queryBuildMsg[TMSG_INDEX(TDMT_VND_TABLE_META)](&bInput, &msg, 0, &msgLen); + if (code) { + ctgError("Build vnode tablemeta msg failed, code:%x, tbName:%s", code, pTableName->tname); + CTG_ERR_RET(code); + } SRpcMsg rpcMsg = { .msgType = TDMT_VND_TABLE_META, @@ -214,14 +308,26 @@ int32_t ctgGetTableMetaFromVnode(struct SCatalog* pCatalog, void *pRpc, const SE SEpSet epSet; ctgGenEpSet(&epSet, vgroupInfo); - rpcSendRecv(pRpc, &epSet, &rpcMsg, &rpcRsp); + rpcSendRecv(pTransporter, &epSet, &rpcMsg, &rpcRsp); if (TSDB_CODE_SUCCESS != rpcRsp.code) { - ctgError("error rsp for table meta, code:%x", rpcRsp.code); + if (CTG_TABLE_NOT_EXIST(rpcRsp.code)) { + SET_META_TYPE_NONE(output->metaType); + ctgDebug("tablemeta not exist in vnode, tbName:%s", pTableName->tname); + return TSDB_CODE_SUCCESS; + } + + ctgError("error rsp for table meta from vnode, code:%x, tbName:%s", rpcRsp.code, pTableName->tname); CTG_ERR_RET(rpcRsp.code); } - CTG_ERR_RET(queryProcessMsgRsp[TMSG_INDEX(TDMT_VND_TABLE_META)](output, rpcRsp.pCont, rpcRsp.contLen)); + code = queryProcessMsgRsp[TMSG_INDEX(TDMT_VND_TABLE_META)](output, rpcRsp.pCont, rpcRsp.contLen); + if (code) { + ctgError("Process vnode tablemeta rsp failed, code:%x, tbName:%s", code, pTableName->tname); + CTG_ERR_RET(code); + } + + ctgDebug("Got table meta from vnode, db:%s, tbName:%s", dbFullName, pTableName->tname); return TSDB_CODE_SUCCESS; } @@ -242,10 +348,11 @@ int32_t ctgGetVgInfoFromDB(struct SCatalog *pCatalog, void *pRpc, const SEpSet * SVgroupInfo *vgInfo = NULL; SArray *vgList = NULL; int32_t code = 0; + int32_t vgNum = taosHashGetSize(dbInfo->vgInfo); - vgList = taosArrayInit(taosHashGetSize(dbInfo->vgInfo), sizeof(SVgroupInfo)); + vgList = taosArrayInit(vgNum, sizeof(SVgroupInfo)); if (NULL == vgList) { - ctgError("taosArrayInit failed"); + ctgError("taosArrayInit failed, num:%d", vgNum); CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); } @@ -254,7 +361,7 @@ int32_t ctgGetVgInfoFromDB(struct SCatalog *pCatalog, void *pRpc, const SEpSet * vgInfo = pIter; if (NULL == taosArrayPush(vgList, vgInfo)) { - ctgError("taosArrayPush failed"); + ctgError("taosArrayPush failed, vgId:%d", vgInfo->vgId); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } @@ -265,6 +372,8 @@ int32_t ctgGetVgInfoFromDB(struct SCatalog *pCatalog, void *pRpc, const SEpSet * *vgroupList = vgList; vgList = NULL; + ctgDebug("Got vg list from DB, vgNum:%d", vgNum); + return TSDB_CODE_SUCCESS; _return: @@ -276,7 +385,7 @@ _return: CTG_RET(code); } -int32_t ctgGetVgInfoFromHashValue(SDBVgroupInfo *dbInfo, const SName *pTableName, SVgroupInfo *pVgroup) { +int32_t ctgGetVgInfoFromHashValue(struct SCatalog *pCatalog, SDBVgroupInfo *dbInfo, const SName *pTableName, SVgroupInfo *pVgroup) { int32_t code = 0; int32_t vgNum = taosHashGetSize(dbInfo->vgInfo); @@ -284,7 +393,7 @@ int32_t ctgGetVgInfoFromHashValue(SDBVgroupInfo *dbInfo, const SName *pTableName tNameGetFullDbName(pTableName, db); if (vgNum <= 0) { - ctgError("db[%s] vgroup cache invalid, vgroup number:%d", db, vgNum); + ctgError("db vgroup cache invalid, db:%s, vgroup number:%d", db, vgNum); CTG_ERR_RET(TSDB_CODE_TSC_DB_NOT_SELECTED); } @@ -302,6 +411,7 @@ int32_t ctgGetVgInfoFromHashValue(SDBVgroupInfo *dbInfo, const SName *pTableName while (pIter) { vgInfo = pIter; if (hashValue >= vgInfo->hashBegin && hashValue <= vgInfo->hashEnd) { + taosHashCancelIterate(dbInfo->vgInfo, pIter); break; } @@ -310,147 +420,311 @@ int32_t ctgGetVgInfoFromHashValue(SDBVgroupInfo *dbInfo, const SName *pTableName } if (NULL == vgInfo) { - ctgError("no hash range found for hash value [%u], numOfVgId:%d", hashValue, taosHashGetSize(dbInfo->vgInfo)); - - void *pIter1 = taosHashIterate(dbInfo->vgInfo, NULL); - while (pIter1) { - vgInfo = pIter1; - ctgError("valid range:[%u, %u], vgId:%d", vgInfo->hashBegin, vgInfo->hashEnd, vgInfo->vgId); - pIter1 = taosHashIterate(dbInfo->vgInfo, pIter1); - } - + ctgError("no hash range found for hash value [%u], db:%s, numOfVgId:%d", hashValue, db, taosHashGetSize(dbInfo->vgInfo)); CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR); } *pVgroup = *vgInfo; _return: - CTG_RET(TSDB_CODE_SUCCESS); + CTG_RET(code); } -int32_t ctgGetTableMetaImpl(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, bool forceUpdate, STableMeta** pTableMeta) { - if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == pTableMeta) { - CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); +int32_t ctgSTableVersionCompare(const void* key1, const void* key2) { + if (((SSTableMetaVersion*)key1)->suid < ((SSTableMetaVersion*)key2)->suid) { + return -1; + } else if (((SSTableMetaVersion*)key1)->suid > ((SSTableMetaVersion*)key2)->suid) { + return 1; + } else { + return 0; } +} + +int32_t ctgDbVgVersionCompare(const void* key1, const void* key2) { + if (((SDbVgVersion*)key1)->dbId < ((SDbVgVersion*)key2)->dbId) { + return -1; + } else if (((SDbVgVersion*)key1)->dbId > ((SDbVgVersion*)key2)->dbId) { + return 1; + } else { + return 0; + } +} + + +int32_t ctgMetaRentInit(SMetaRentMgmt *mgmt, uint32_t rentSec, int8_t type) { + mgmt->slotRIdx = 0; + mgmt->slotNum = rentSec / CTG_RENT_SLOT_SECOND; + mgmt->type = type; + + size_t msgSize = sizeof(SRentSlotInfo) * mgmt->slotNum; - int32_t exist = 0; - - if (!forceUpdate) { - CTG_ERR_RET(ctgGetTableMetaFromCache(pCatalog, pTableName, pTableMeta, &exist)); - - if (exist) { - return TSDB_CODE_SUCCESS; - } + mgmt->slots = calloc(1, msgSize); + if (NULL == mgmt->slots) { + qError("calloc %d failed", (int32_t)msgSize); + return TSDB_CODE_CTG_MEM_ERROR; } - CTG_ERR_RET(catalogRenewTableMeta(pCatalog, pRpc, pMgmtEps, pTableName)); - - CTG_ERR_RET(ctgGetTableMetaFromCache(pCatalog, pTableName, pTableMeta, &exist)); - - if (0 == exist) { - ctgError("get table meta from cache failed, but fetch succeed"); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); - } + qDebug("meta rent initialized, type:%d, slotNum:%d", type, mgmt->slotNum); return TSDB_CODE_SUCCESS; } -int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *output) { +int32_t ctgMetaRentAdd(SMetaRentMgmt *mgmt, void *meta, int64_t id, int32_t size) { + int16_t widx = abs(id % mgmt->slotNum); + + SRentSlotInfo *slot = &mgmt->slots[widx]; int32_t code = 0; - if (output->metaNum != 1 && output->metaNum != 2) { - ctgError("invalid table meta number[%d] got from meta rsp", output->metaNum); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); - } - - if (NULL == output->tbMeta) { - ctgError("no valid table meta got from meta rsp"); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); - } - - if (NULL == pCatalog->tableCache.cache) { - pCatalog->tableCache.cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - if (NULL == pCatalog->tableCache.cache) { - ctgError("init hash[%d] for tablemeta cache failed", ctgMgmt.cfg.maxTblCacheNum); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + CTG_LOCK(CTG_WRITE, &slot->lock); + if (NULL == slot->meta) { + slot->meta = taosArrayInit(CTG_DEFAULT_RENT_SLOT_SIZE, size); + if (NULL == slot->meta) { + qError("taosArrayInit %d failed, id:%"PRIx64", slot idx:%d, type:%d", CTG_DEFAULT_RENT_SLOT_SIZE, id, widx, mgmt->type); + CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } } - if (NULL == pCatalog->tableCache.stableCache) { - pCatalog->tableCache.stableCache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); - if (NULL == pCatalog->tableCache.stableCache) { - ctgError("init hash[%d] for stablemeta cache failed", ctgMgmt.cfg.maxTblCacheNum); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } + if (NULL == taosArrayPush(slot->meta, meta)) { + qError("taosArrayPush meta to rent failed, id:%"PRIx64", slot idx:%d, type:%d", id, widx, mgmt->type); + CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } - if (output->metaNum == 2) { - if (taosHashPut(pCatalog->tableCache.cache, output->ctbFname, strlen(output->ctbFname), &output->ctbMeta, sizeof(output->ctbMeta)) != 0) { - ctgError("push ctable[%s] to table cache failed", output->ctbFname); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } + slot->needSort = true; - if (TSDB_SUPER_TABLE != output->tbMeta->tableType) { - ctgError("table type[%d] error, expected:%d", output->tbMeta->tableType, TSDB_SUPER_TABLE); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); - } + qDebug("add meta to rent, id:%"PRIx64", slot idx:%d, type:%d", id, widx, mgmt->type); + +_return: + + CTG_UNLOCK(CTG_WRITE, &slot->lock); + CTG_RET(code); +} + +int32_t ctgMetaRentUpdate(SMetaRentMgmt *mgmt, void *meta, int64_t id, int32_t size, __compar_fn_t compare) { + int16_t widx = abs(id % mgmt->slotNum); + + SRentSlotInfo *slot = &mgmt->slots[widx]; + int32_t code = 0; + + CTG_LOCK(CTG_WRITE, &slot->lock); + if (NULL == slot->meta) { + qError("meta in slot is empty, id:%"PRIx64", slot idx:%d, type:%d", id, widx, mgmt->type); + CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } - int32_t tbSize = sizeof(*output->tbMeta) + sizeof(SSchema) * (output->tbMeta->tableInfo.numOfColumns + output->tbMeta->tableInfo.numOfTags); + if (slot->needSort) { + taosArraySort(slot->meta, compare); + slot->needSort = false; + qDebug("slot meta sorted, slot idx:%d, type:%d", widx, mgmt->type); + } - if (TSDB_SUPER_TABLE == output->tbMeta->tableType) { - CTG_LOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); - if (taosHashPut(pCatalog->tableCache.cache, output->tbFname, strlen(output->tbFname), output->tbMeta, tbSize) != 0) { - CTG_UNLOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); - ctgError("push table[%s] to table cache failed", output->tbFname); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } + void *orig = taosArraySearch(slot->meta, &id, compare, TD_EQ); + if (NULL == orig) { + qError("meta not found in slot, id:%"PRIx64", slot idx:%d, type:%d", id, widx, mgmt->type); + CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR); + } - STableMeta *tbMeta = taosHashGet(pCatalog->tableCache.cache, output->tbFname, strlen(output->tbFname)); - if (taosHashPut(pCatalog->tableCache.stableCache, &output->tbMeta->suid, sizeof(output->tbMeta->suid), &tbMeta, POINTER_BYTES) != 0) { - CTG_UNLOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); - ctgError("push suid[%"PRIu64"] to stable cache failed", output->tbMeta->suid); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } - CTG_UNLOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); - } else { - if (taosHashPut(pCatalog->tableCache.cache, output->tbFname, strlen(output->tbFname), output->tbMeta, tbSize) != 0) { - ctgError("push table[%s] to table cache failed", output->tbFname); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } + memcpy(orig, meta, size); + + qDebug("meta in rent updated, id:%"PRIx64", slot idx:%d, type:%d", id, widx, mgmt->type); + +_return: + + CTG_UNLOCK(CTG_WRITE, &slot->lock); + + if (code) { + qWarn("meta in rent update failed, will try to add it, code:%x, id:%"PRIx64", slot idx:%d, type:%d", code, id, widx, mgmt->type); + CTG_RET(ctgMetaRentAdd(mgmt, meta, id, size)); } CTG_RET(code); } +int32_t ctgMetaRentGetImpl(SMetaRentMgmt *mgmt, void **res, uint32_t *num, int32_t size) { + int16_t ridx = atomic_add_fetch_16(&mgmt->slotRIdx, 1); + if (ridx >= mgmt->slotNum) { + ridx %= mgmt->slotNum; + atomic_store_16(&mgmt->slotRIdx, ridx); + } -int32_t ctgGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbName, int32_t forceUpdate, SDBVgroupInfo** dbInfo) { + SRentSlotInfo *slot = &mgmt->slots[ridx]; + int32_t code = 0; + + CTG_LOCK(CTG_READ, &slot->lock); + if (NULL == slot->meta) { + qDebug("empty meta in slot:%d, type:%d", ridx, mgmt->type); + *num = 0; + goto _return; + } + + size_t metaNum = taosArrayGetSize(slot->meta); + if (metaNum <= 0) { + qDebug("no meta in slot:%d, type:%d", ridx, mgmt->type); + *num = 0; + goto _return; + } + + size_t msize = metaNum * size; + *res = malloc(msize); + if (NULL == *res) { + qError("malloc %d failed", (int32_t)msize); + CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); + } + + void *meta = taosArrayGet(slot->meta, 0); + + memcpy(*res, meta, msize); + + *num = (uint32_t)metaNum; + + qDebug("Got %d meta from rent, type:%d", (int32_t)metaNum, mgmt->type); + +_return: + + CTG_UNLOCK(CTG_READ, &slot->lock); + + CTG_RET(code); +} + +int32_t ctgMetaRentGet(SMetaRentMgmt *mgmt, void **res, uint32_t *num, int32_t size) { + while (true) { + int64_t msec = taosGetTimestampMs(); + int64_t lsec = atomic_load_64(&mgmt->lastReadMsec); + if ((msec - lsec) < CTG_RENT_SLOT_SECOND * 1000) { + *res = NULL; + *num = 0; + qDebug("too short time period to get expired meta, type:%d", mgmt->type); + return TSDB_CODE_SUCCESS; + } + + if (lsec != atomic_val_compare_exchange_64(&mgmt->lastReadMsec, lsec, msec)) { + continue; + } + + break; + } + + CTG_ERR_RET(ctgMetaRentGetImpl(mgmt, res, num, size)); + + return TSDB_CODE_SUCCESS; +} + + + +int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *output) { + int32_t code = 0; + + if (NULL == output->tbMeta) { + ctgError("no valid table meta got from meta rsp, tbName:%s", output->tbFname); + CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); + } + + if (NULL == pCatalog->tableCache.cache) { + SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (NULL == cache) { + ctgError("taosHashInit failed, num:%d", ctgMgmt.cfg.maxTblCacheNum); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->tableCache.cache, NULL, cache)) { + taosHashCleanup(cache); + } + } + + if (NULL == pCatalog->tableCache.stableCache) { + SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); + if (NULL == cache) { + ctgError("taosHashInit failed, num:%d", ctgMgmt.cfg.maxTblCacheNum); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->tableCache.stableCache, NULL, cache)) { + taosHashCleanup(cache); + } + } + + if (CTG_IS_META_CTABLE(output->metaType) || CTG_IS_META_BOTH(output->metaType)) { + if (taosHashPut(pCatalog->tableCache.cache, output->ctbFname, strlen(output->ctbFname), &output->ctbMeta, sizeof(output->ctbMeta)) != 0) { + ctgError("taosHashPut ctablemeta to cache failed, ctbName:%s", output->ctbFname); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + ctgDebug("update child tablemeta to cache, tbName:%s", output->ctbFname); + } + + if (CTG_IS_META_CTABLE(output->metaType)) { + return TSDB_CODE_SUCCESS; + } + + if (CTG_IS_META_BOTH(output->metaType) && TSDB_SUPER_TABLE != output->tbMeta->tableType) { + ctgError("table type error, expected:%d, actual:%d", TSDB_SUPER_TABLE, output->tbMeta->tableType); + CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); + } + + int32_t tbSize = sizeof(*output->tbMeta) + sizeof(SSchema) * (output->tbMeta->tableInfo.numOfColumns + output->tbMeta->tableInfo.numOfTags); + + if (TSDB_SUPER_TABLE == output->tbMeta->tableType) { + bool newAdded = false; + SSTableMetaVersion metaRent = {.suid = output->tbMeta->suid, .sversion = output->tbMeta->sversion, .tversion = output->tbMeta->tversion}; + + CTG_LOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); + if (taosHashPut(pCatalog->tableCache.cache, output->tbFname, strlen(output->tbFname), output->tbMeta, tbSize) != 0) { + CTG_UNLOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); + ctgError("taosHashPut tablemeta to cache failed, tbName:%s", output->tbFname); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + STableMeta *tbMeta = taosHashGet(pCatalog->tableCache.cache, output->tbFname, strlen(output->tbFname)); + if (taosHashPutExt(pCatalog->tableCache.stableCache, &output->tbMeta->suid, sizeof(output->tbMeta->suid), &tbMeta, POINTER_BYTES, &newAdded) != 0) { + CTG_UNLOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); + ctgError("taosHashPutExt stable to stable cache failed, suid:%"PRIx64, output->tbMeta->suid); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + CTG_UNLOCK(CTG_WRITE, &pCatalog->tableCache.stableLock); + + ctgDebug("update stable to cache, suid:%"PRIx64, output->tbMeta->suid); + + if (newAdded) { + CTG_ERR_RET(ctgMetaRentAdd(&pCatalog->stableRent, &metaRent, metaRent.suid, sizeof(SSTableMetaVersion))); + } else { + CTG_ERR_RET(ctgMetaRentUpdate(&pCatalog->stableRent, &metaRent, metaRent.suid, sizeof(SSTableMetaVersion), ctgSTableVersionCompare)); + } + } else { + if (taosHashPut(pCatalog->tableCache.cache, output->tbFname, strlen(output->tbFname), output->tbMeta, tbSize) != 0) { + ctgError("taosHashPut tablemeta to cache failed, tbName:%s", output->tbFname); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + } + + ctgDebug("update tablemeta to cache, tbName:%s", output->tbFname); + + CTG_RET(code); +} + +int32_t ctgGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbName, bool forceUpdate, SDBVgroupInfo** dbInfo) { bool inCache = false; - if (0 == forceUpdate) { + if (!forceUpdate) { CTG_ERR_RET(ctgGetDBVgroupFromCache(pCatalog, dbName, dbInfo, &inCache)); - if (inCache) { return TSDB_CODE_SUCCESS; } + + ctgDebug("failed to get DB vgroupInfo from cache, dbName:%s, load it from mnode, update:%d", dbName, forceUpdate); } SUseDbOutput DbOut = {0}; SBuildUseDBInput input = {0}; - strncpy(input.db, dbName, sizeof(input.db)); - input.db[sizeof(input.db) - 1] = 0; + tstrncpy(input.db, dbName, tListLen(input.db)); input.vgVersion = CTG_DEFAULT_INVALID_VERSION; while (true) { CTG_ERR_RET(ctgGetDBVgroupFromMnode(pCatalog, pRpc, pMgmtEps, &input, &DbOut)); - CTG_ERR_RET(catalogUpdateDBVgroup(pCatalog, dbName, &DbOut.dbVgroup)); - CTG_ERR_RET(ctgGetDBVgroupFromCache(pCatalog, dbName, dbInfo, &inCache)); if (!inCache) { - ctgWarn("get db vgroup from cache failed, db:%s", dbName); + ctgWarn("can't get db vgroup from cache, will retry, db:%s", dbName); continue; } @@ -466,7 +740,7 @@ int32_t ctgValidateAndRemoveDb(struct SCatalog* pCatalog, const char* dbName, SD if (oldInfo) { CTG_LOCK(CTG_WRITE, &oldInfo->lock); if (dbInfo->vgVersion <= oldInfo->vgVersion) { - ctgInfo("dbName:%s vg will not update, vgVersion:%d , current:%d", dbName, dbInfo->vgVersion, oldInfo->vgVersion); + ctgInfo("db vgVersion is not new, db:%s, vgVersion:%d, current:%d", dbName, dbInfo->vgVersion, oldInfo->vgVersion); CTG_UNLOCK(CTG_WRITE, &oldInfo->lock); taosHashRelease(pCatalog->dbCache.cache, oldInfo); @@ -474,7 +748,7 @@ int32_t ctgValidateAndRemoveDb(struct SCatalog* pCatalog, const char* dbName, SD } if (oldInfo->vgInfo) { - ctgInfo("dbName:%s vg will be cleanup", dbName); + ctgInfo("cleanup db vgInfo, db:%s", dbName); taosHashCleanup(oldInfo->vgInfo); oldInfo->vgInfo = NULL; } @@ -487,10 +761,174 @@ int32_t ctgValidateAndRemoveDb(struct SCatalog* pCatalog, const char* dbName, SD return TSDB_CODE_SUCCESS; } +int32_t ctgRenewTableMetaImpl(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable) { + if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pTableName) { + CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); + } + + SVgroupInfo vgroupInfo = {0}; + int32_t code = 0; + + CTG_ERR_RET(catalogGetTableHashVgroup(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo)); + + STableMetaOutput voutput = {0}; + STableMetaOutput moutput = {0}; + STableMetaOutput *output = &voutput; + + if (CTG_IS_STABLE(isSTable)) { + ctgDebug("will renew table meta, supposed to be stable, tbName:%s", pTableName->tname); + + // if get from mnode failed, will not try vnode + CTG_ERR_JRET(ctgGetTableMetaFromMnode(pCatalog, pTransporter, pMgmtEps, pTableName, &moutput)); + + if (CTG_IS_META_NONE(moutput.metaType)) { + CTG_ERR_JRET(ctgGetTableMetaFromVnode(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo, &voutput)); + } else { + output = &moutput; + } + } else { + ctgDebug("will renew table meta, not supposed to be stable, tbName:%s, isStable:%d", pTableName->tname, isSTable); + + // if get from vnode failed or no table meta, will not try mnode + CTG_ERR_JRET(ctgGetTableMetaFromVnode(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo, &voutput)); + + if (CTG_IS_META_TABLE(voutput.metaType) && TSDB_SUPER_TABLE == voutput.tbMeta->tableType) { + ctgDebug("will continue to renew table meta since got stable, tbName:%s, metaType:%d", pTableName->tname, voutput.metaType); + + CTG_ERR_JRET(ctgGetTableMetaFromMnodeImpl(pCatalog, pTransporter, pMgmtEps, voutput.tbFname, &moutput)); + + tfree(voutput.tbMeta); + voutput.tbMeta = moutput.tbMeta; + moutput.tbMeta = NULL; + } else if (CTG_IS_META_BOTH(voutput.metaType)) { + int32_t exist = 0; + CTG_ERR_JRET(ctgIsTableMetaExistInCache(pCatalog, voutput.tbFname, &exist)); + if (0 == exist) { + CTG_ERR_JRET(ctgGetTableMetaFromMnodeImpl(pCatalog, pTransporter, pMgmtEps, voutput.tbFname, &moutput)); + + if (CTG_IS_META_NONE(moutput.metaType)) { + SET_META_TYPE_NONE(voutput.metaType); + } + + tfree(voutput.tbMeta); + voutput.tbMeta = moutput.tbMeta; + moutput.tbMeta = NULL; + } else { + SET_META_TYPE_CTABLE(voutput.metaType); + } + } + } + + if (CTG_IS_META_NONE(output->metaType)) { + ctgError("no tablemeta got, tbNmae:%s", pTableName->tname); + CTG_ERR_JRET(CTG_ERR_CODE_TABLE_NOT_EXIST); + } + + CTG_ERR_JRET(ctgUpdateTableMetaCache(pCatalog, output)); + +_return: + + tfree(voutput.tbMeta); + tfree(moutput.tbMeta); + + CTG_RET(code); +} + +int32_t ctgGetTableMeta(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, bool forceUpdate, STableMeta** pTableMeta, int32_t isSTable) { + if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == pTableMeta) { + CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); + } + + int32_t exist = 0; + + if (!forceUpdate) { + CTG_ERR_RET(ctgGetTableMetaFromCache(pCatalog, pTableName, pTableMeta, &exist)); + + if (exist && CTG_TBTYPE_MATCH(isSTable, (*pTableMeta)->tableType)) { + return TSDB_CODE_SUCCESS; + } + } else if (CTG_IS_UNKNOWN_STABLE(isSTable)) { + int32_t tbType = 0; + + CTG_ERR_RET(ctgGetTableTypeFromCache(pCatalog, pTableName, &tbType)); + + CTG_SET_STABLE(isSTable, tbType); + } + + CTG_ERR_RET(ctgRenewTableMetaImpl(pCatalog, pRpc, pMgmtEps, pTableName, isSTable)); + + CTG_ERR_RET(ctgGetTableMetaFromCache(pCatalog, pTableName, pTableMeta, &exist)); + + if (0 == exist) { + ctgError("renew tablemeta succeed but get from cache failed, may be deleted, tbName:%s", pTableName->tname); + CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); + } + + return TSDB_CODE_SUCCESS; +} + +void ctgFreeMetaRent(SMetaRentMgmt *mgmt) { + if (NULL == mgmt->slots) { + return; + } + + for (int32_t i = 0; i < mgmt->slotNum; ++i) { + SRentSlotInfo *slot = &mgmt->slots[i]; + if (slot->meta) { + taosArrayDestroy(slot->meta); + slot->meta = NULL; + } + } + + tfree(mgmt->slots); +} + +void ctgFreeDbCache(SDBVgroupCache *db) { + if (NULL == db->cache) { + return; + } + + SDBVgroupInfo *dbInfo = NULL; + void *pIter = taosHashIterate(db->cache, NULL); + while (pIter) { + dbInfo = pIter; + + if (dbInfo->vgInfo) { + taosHashCleanup(dbInfo->vgInfo); + dbInfo->vgInfo = NULL; + } + + pIter = taosHashIterate(db->cache, pIter); + } + + taosHashCleanup(db->cache); + db->cache = NULL; +} + +void ctgFreeTableMetaCache(STableMetaCache *table) { + if (table->stableCache) { + taosHashCleanup(table->stableCache); + table->stableCache = NULL; + } + + if (table->cache) { + taosHashCleanup(table->cache); + table->cache = NULL; + } +} + +void ctgFreeHandle(struct SCatalog* pCatalog) { + ctgFreeMetaRent(&pCatalog->dbRent); + ctgFreeMetaRent(&pCatalog->stableRent); + ctgFreeDbCache(&pCatalog->dbCache); + ctgFreeTableMetaCache(&pCatalog->tableCache); + + free(pCatalog); +} int32_t catalogInit(SCatalogCfg *cfg) { if (ctgMgmt.pCluster) { - ctgError("catalog already init"); + qError("catalog already init"); CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } @@ -504,16 +942,29 @@ int32_t catalogInit(SCatalogCfg *cfg) { if (ctgMgmt.cfg.maxTblCacheNum == 0) { ctgMgmt.cfg.maxTblCacheNum = CTG_DEFAULT_CACHE_TABLEMETA_NUMBER; } + + if (ctgMgmt.cfg.dbRentSec == 0) { + ctgMgmt.cfg.dbRentSec = CTG_DEFAULT_RENT_SECOND; + } + + if (ctgMgmt.cfg.stableRentSec == 0) { + ctgMgmt.cfg.stableRentSec = CTG_DEFAULT_RENT_SECOND; + } } else { ctgMgmt.cfg.maxDBCacheNum = CTG_DEFAULT_CACHE_DB_NUMBER; ctgMgmt.cfg.maxTblCacheNum = CTG_DEFAULT_CACHE_TABLEMETA_NUMBER; + ctgMgmt.cfg.dbRentSec = CTG_DEFAULT_RENT_SECOND; + ctgMgmt.cfg.stableRentSec = CTG_DEFAULT_RENT_SECOND; } - ctgMgmt.pCluster = taosHashInit(CTG_DEFAULT_CACHE_CLUSTER_NUMBER, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); + ctgMgmt.pCluster = taosHashInit(CTG_DEFAULT_CACHE_CLUSTER_NUMBER, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (NULL == ctgMgmt.pCluster) { - CTG_ERR_LRET(TSDB_CODE_CTG_INTERNAL_ERROR, "init %d cluster cache failed", CTG_DEFAULT_CACHE_CLUSTER_NUMBER); + qError("taosHashInit %d cluster cache failed", CTG_DEFAULT_CACHE_CLUSTER_NUMBER); + CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); } + qDebug("catalog initialized, maxDb:%u, maxTbl:%u, dbRentSec:%u, stableRentSec:%u", ctgMgmt.cfg.maxDBCacheNum, ctgMgmt.cfg.maxTblCacheNum, ctgMgmt.cfg.dbRentSec, ctgMgmt.cfg.stableRentSec); + return TSDB_CODE_SUCCESS; } @@ -523,32 +974,75 @@ int32_t catalogGetHandle(uint64_t clusterId, struct SCatalog** catalogHandle) { } if (NULL == ctgMgmt.pCluster) { - ctgError("cluster cache are not ready"); + qError("cluster cache are not ready, clusterId:%"PRIx64, clusterId); CTG_ERR_RET(TSDB_CODE_CTG_NOT_READY); } - SCatalog **ctg = (SCatalog **)taosHashGet(ctgMgmt.pCluster, (char*)&clusterId, sizeof(clusterId)); + int32_t code = 0; + SCatalog *clusterCtg = NULL; - if (ctg && (*ctg)) { - *catalogHandle = *ctg; - return TSDB_CODE_SUCCESS; - } + while (true) { + SCatalog **ctg = (SCatalog **)taosHashGet(ctgMgmt.pCluster, (char*)&clusterId, sizeof(clusterId)); - SCatalog *clusterCtg = calloc(1, sizeof(SCatalog)); - if (NULL == clusterCtg) { - ctgError("calloc %d failed", (int32_t)sizeof(SCatalog)); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } + if (ctg && (*ctg)) { + *catalogHandle = *ctg; + qDebug("got catalog handle from cache, clusterId:%"PRIx64", CTG:%p", clusterId, *ctg); + return TSDB_CODE_SUCCESS; + } - if (taosHashPut(ctgMgmt.pCluster, &clusterId, sizeof(clusterId), &clusterCtg, POINTER_BYTES)) { - ctgError("put cluster %"PRIx64" cache to hash failed", clusterId); - tfree(clusterCtg); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); + clusterCtg = calloc(1, sizeof(SCatalog)); + if (NULL == clusterCtg) { + qError("calloc %d failed", (int32_t)sizeof(SCatalog)); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + clusterCtg->clusterId = clusterId; + + CTG_ERR_JRET(ctgMetaRentInit(&clusterCtg->dbRent, ctgMgmt.cfg.dbRentSec, CTG_RENT_DB)); + CTG_ERR_JRET(ctgMetaRentInit(&clusterCtg->stableRent, ctgMgmt.cfg.stableRentSec, CTG_RENT_STABLE)); + + code = taosHashPut(ctgMgmt.pCluster, &clusterId, sizeof(clusterId), &clusterCtg, POINTER_BYTES); + if (code) { + if (HASH_NODE_EXIST(code)) { + ctgFreeHandle(clusterCtg); + continue; + } + + qError("taosHashPut CTG to cache failed, clusterId:%"PRIx64, clusterId); + CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR); + } + + qDebug("add CTG to cache, clusterId:%"PRIx64", CTG:%p", clusterId, clusterCtg); + + break; } *catalogHandle = clusterCtg; return TSDB_CODE_SUCCESS; + +_return: + + ctgFreeHandle(clusterCtg); + + CTG_RET(code); +} + +void catalogFreeHandle(struct SCatalog* pCatalog) { + if (NULL == pCatalog) { + return; + } + + if (taosHashRemove(ctgMgmt.pCluster, &pCatalog->clusterId, sizeof(pCatalog->clusterId))) { + ctgWarn("taosHashRemove from cluster failed, may already be freed, clusterId:%"PRIx64, pCatalog->clusterId); + return; + } + + uint64_t clusterId = pCatalog->clusterId; + + ctgFreeHandle(pCatalog); + + ctgInfo("handle freed, culsterId:%"PRIx64, clusterId); } int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, int32_t* version) { @@ -558,36 +1052,40 @@ int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, if (NULL == pCatalog->dbCache.cache) { *version = CTG_DEFAULT_INVALID_VERSION; + ctgInfo("empty db cache, dbName:%s", dbName); return TSDB_CODE_SUCCESS; } SDBVgroupInfo * dbInfo = taosHashAcquire(pCatalog->dbCache.cache, dbName, strlen(dbName)); if (NULL == dbInfo) { *version = CTG_DEFAULT_INVALID_VERSION; + ctgInfo("db not in cache, dbName:%s", dbName); return TSDB_CODE_SUCCESS; } *version = dbInfo->vgVersion; taosHashRelease(pCatalog->dbCache.cache, dbInfo); + ctgDebug("Got db vgVersion from cache, dbName:%s, vgVersion:%d", dbName, *version); + return TSDB_CODE_SUCCESS; } -int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbName, int32_t forceUpdate, SArray** vgroupList) { +int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbName, bool forceUpdate, SArray** vgroupList) { if (NULL == pCatalog || NULL == dbName || NULL == pRpc || NULL == pMgmtEps || NULL == vgroupList) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } - SDBVgroupInfo* db = NULL; - int32_t code = 0; + SDBVgroupInfo* db = NULL; SVgroupInfo *vgInfo = NULL; + + int32_t code = 0; SArray *vgList = NULL; - CTG_ERR_JRET(ctgGetDBVgroup(pCatalog, pRpc, pMgmtEps, dbName, forceUpdate, &db)); vgList = taosArrayInit(taosHashGetSize(db->vgInfo), sizeof(SVgroupInfo)); if (NULL == vgList) { - ctgError("taosArrayInit failed"); + ctgError("taosArrayInit %d failed", taosHashGetSize(db->vgInfo)); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } @@ -596,7 +1094,7 @@ int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* vgInfo = pIter; if (NULL == taosArrayPush(vgList, vgInfo)) { - ctgError("taosArrayPush failed"); + ctgError("taosArrayPush failed, vgId:%d", vgInfo->vgId); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } @@ -608,7 +1106,6 @@ int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* vgList = NULL; _return: - if (db) { CTG_UNLOCK(CTG_READ, &db->lock); taosHashRelease(pCatalog->dbCache.cache, db); @@ -631,12 +1128,12 @@ int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDB } if (NULL == dbInfo->vgInfo || dbInfo->vgVersion < 0 || taosHashGetSize(dbInfo->vgInfo) <= 0) { - ctgError("invalid db vg, dbName:%s", dbName); + ctgError("invalid db vgInfo, dbName:%s, vgInfo:%p, vgVersion:%d", dbName, dbInfo->vgInfo, dbInfo->vgVersion); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } if (dbInfo->vgVersion < 0) { - ctgWarn("invalid db vgVersion:%d, dbName:%s", dbInfo->vgVersion, dbName); + ctgWarn("db vgVersion less than 0, dbName:%s, vgVersion:%d", dbName, dbInfo->vgVersion); if (pCatalog->dbCache.cache) { CTG_ERR_JRET(ctgValidateAndRemoveDb(pCatalog, dbName, dbInfo)); @@ -644,28 +1141,41 @@ int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDB CTG_ERR_JRET(taosHashRemove(pCatalog->dbCache.cache, dbName, strlen(dbName))); } - ctgWarn("remove db [%s] from cache", dbName); + ctgWarn("db removed from cache, db:%s", dbName); goto _return; } if (NULL == pCatalog->dbCache.cache) { - pCatalog->dbCache.cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - if (NULL == pCatalog->dbCache.cache) { - ctgError("init hash[%d] for db cache failed", CTG_DEFAULT_CACHE_DB_NUMBER); + SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (NULL == cache) { + ctgError("taosHashInit %d failed", CTG_DEFAULT_CACHE_DB_NUMBER); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } + + if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->dbCache.cache, NULL, cache)) { + taosHashCleanup(cache); + } } else { CTG_ERR_JRET(ctgValidateAndRemoveDb(pCatalog, dbName, dbInfo)); } - if (taosHashPut(pCatalog->dbCache.cache, dbName, strlen(dbName), dbInfo, sizeof(*dbInfo)) != 0) { - ctgError("push to vgroup hash cache failed"); + bool newAdded = false; + if (taosHashPutExt(pCatalog->dbCache.cache, dbName, strlen(dbName), dbInfo, sizeof(*dbInfo), &newAdded) != 0) { + ctgError("taosHashPutExt db vgroup to cache failed, db:%s", dbName); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } + dbInfo->vgInfo = NULL; + + SDbVgVersion vgVersion = {.dbId = dbInfo->dbId, .vgVersion = dbInfo->vgVersion}; + if (newAdded) { + CTG_ERR_JRET(ctgMetaRentAdd(&pCatalog->dbRent, &vgVersion, dbInfo->dbId, sizeof(SDbVgVersion))); + } else { + CTG_ERR_JRET(ctgMetaRentUpdate(&pCatalog->dbRent, &vgVersion, dbInfo->dbId, sizeof(SDbVgVersion), ctgDbVgVersionCompare)); + } + ctgDebug("dbName:%s vgroup updated, vgVersion:%d", dbName, dbInfo->vgVersion); - dbInfo->vgInfo = NULL; _return: @@ -678,34 +1188,23 @@ _return: } int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) { - return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta); + return ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, -1); } -int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName) { +int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) { + return ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, 1); +} + +int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable) { if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pTableName) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } - SVgroupInfo vgroupInfo = {0}; - int32_t code = 0; - - CTG_ERR_RET(catalogGetTableHashVgroup(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo)); - - STableMetaOutput output = {0}; - - CTG_ERR_RET(ctgGetTableMetaFromVnode(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo, &output)); - - //CTG_ERR_RET(ctgGetTableMetaFromMnode(pCatalog, pRpc, pMgmtEps, pTableName, &output)); - - CTG_ERR_JRET(ctgUpdateTableMetaCache(pCatalog, &output)); - -_return: - tfree(output.tbMeta); - CTG_RET(code); + return ctgRenewTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, isSTable); } -int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) { - return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, true, pTableMeta); +int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta, int32_t isSTable) { + return ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, true, pTableMeta, isSTable); } int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, SArray** pVgroupList) { @@ -721,29 +1220,39 @@ int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const S *pVgroupList = NULL; - CTG_ERR_JRET(catalogGetTableMeta(pCatalog, pRpc, pMgmtEps, pTableName, &tbMeta)); + CTG_ERR_JRET(ctgGetTableMeta(pCatalog, pRpc, pMgmtEps, pTableName, false, &tbMeta, -1)); char db[TSDB_DB_FNAME_LEN] = {0}; tNameGetFullDbName(pTableName, db); CTG_ERR_JRET(ctgGetDBVgroup(pCatalog, pRpc, pMgmtEps, db, false, &dbVgroup)); + // REMOEV THIS .... + if (0 == tbMeta->vgId) { + SVgroupInfo vgroup = {0}; + + catalogGetTableHashVgroup(pCatalog, pRpc, pMgmtEps, pTableName, &vgroup); + + tbMeta->vgId = vgroup.vgId; + } + // REMOVE THIS .... + if (tbMeta->tableType == TSDB_SUPER_TABLE) { CTG_ERR_JRET(ctgGetVgInfoFromDB(pCatalog, pRpc, pMgmtEps, dbVgroup, pVgroupList)); } else { int32_t vgId = tbMeta->vgId; if (NULL == taosHashGetClone(dbVgroup->vgInfo, &vgId, sizeof(vgId), &vgroupInfo)) { - ctgError("vgId[%d] not found in vgroup list", vgId); + ctgError("table's vgId not found in vgroup list, vgId:%d, tbName:%s", vgId, pTableName->tname); CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR); } vgList = taosArrayInit(1, sizeof(SVgroupInfo)); if (NULL == vgList) { - ctgError("taosArrayInit failed"); + ctgError("taosArrayInit %d failed", (int32_t)sizeof(SVgroupInfo)); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } if (NULL == taosArrayPush(vgList, &vgroupInfo)) { - ctgError("push vgroupInfo to array failed"); + ctgError("taosArrayPush vgroupInfo to array failed, vgId:%d, tbName:%s", vgId, pTableName->tname); CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR); } @@ -777,7 +1286,7 @@ int32_t catalogGetTableHashVgroup(struct SCatalog *pCatalog, void *pTransporter, CTG_ERR_RET(ctgGetDBVgroup(pCatalog, pTransporter, pMgmtEps, db, false, &dbInfo)); - CTG_ERR_JRET(ctgGetVgInfoFromHashValue(dbInfo, pTableName, pVgroup)); + CTG_ERR_JRET(ctgGetVgInfoFromHashValue(pCatalog, dbInfo, pTableName, pVgroup)); _return: if (dbInfo) { @@ -789,8 +1298,8 @@ _return: } -int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SCatalogReq* pReq, SMetaData* pRsp) { - if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pReq || NULL == pRsp) { +int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SCatalogReq* pReq, SMetaData* pRsp) { + if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pReq || NULL == pRsp) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } @@ -799,13 +1308,13 @@ int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pRpc, const SEpSet* p if (pReq->pTableName) { int32_t tbNum = (int32_t)taosArrayGetSize(pReq->pTableName); if (tbNum <= 0) { - ctgError("empty table name list"); + ctgError("empty table name list, tbNum:%d", tbNum); CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } pRsp->pTableMeta = taosArrayInit(tbNum, POINTER_BYTES); if (NULL == pRsp->pTableMeta) { - ctgError("taosArrayInit num[%d] failed", tbNum); + ctgError("taosArrayInit %d failed", tbNum); CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); } @@ -813,7 +1322,7 @@ int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pRpc, const SEpSet* p SName *name = taosArrayGet(pReq->pTableName, i); STableMeta *pTableMeta = NULL; - CTG_ERR_JRET(catalogGetTableMeta(pCatalog, pRpc, pMgmtEps, name, &pTableMeta)); + CTG_ERR_JRET(ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, name, false, &pTableMeta, -1)); if (NULL == taosArrayPush(pRsp->pTableMeta, &pTableMeta)) { ctgError("taosArrayPush failed, idx:%d", i); @@ -826,7 +1335,6 @@ int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pRpc, const SEpSet* p return TSDB_CODE_SUCCESS; _return: - if (pRsp->pTableMeta) { int32_t aSize = taosArrayGetSize(pRsp->pTableMeta); for (int32_t i = 0; i < aSize; ++i) { @@ -846,16 +1354,49 @@ int32_t catalogGetQnodeList(struct SCatalog* pCatalog, void *pRpc, const SEpSet* CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + //TODO return TSDB_CODE_SUCCESS; } +int32_t catalogGetExpiredSTables(struct SCatalog* pCatalog, SSTableMetaVersion **stables, uint32_t *num) { + if (NULL == pCatalog || NULL == stables || NULL == num) { + CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); + } + + CTG_RET(ctgMetaRentGet(&pCatalog->stableRent, (void **)stables, num, sizeof(SSTableMetaVersion))); +} + +int32_t catalogGetExpiredDBs(struct SCatalog* pCatalog, SDbVgVersion **dbs, uint32_t *num) { + if (NULL == pCatalog || NULL == dbs || NULL == num) { + CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); + } + + CTG_RET(ctgMetaRentGet(&pCatalog->dbRent, (void **)dbs, num, sizeof(SDbVgVersion))); +} + void catalogDestroy(void) { - if (ctgMgmt.pCluster) { - taosHashCleanup(ctgMgmt.pCluster); //TBD - ctgMgmt.pCluster = NULL; + if (NULL == ctgMgmt.pCluster) { + return; } + + SCatalog *pCatalog = NULL; + void *pIter = taosHashIterate(ctgMgmt.pCluster, NULL); + while (pIter) { + pCatalog = *(SCatalog **)pIter; + + if (pCatalog) { + catalogFreeHandle(pCatalog); + } + + pIter = taosHashIterate(ctgMgmt.pCluster, pIter); + } + + taosHashCleanup(ctgMgmt.pCluster); + ctgMgmt.pCluster = NULL; + + qInfo("catalog destroyed"); } diff --git a/source/libs/catalog/test/CMakeLists.txt b/source/libs/catalog/test/CMakeLists.txt index 3c7418bdcc..d12e0f310c 100644 --- a/source/libs/catalog/test/CMakeLists.txt +++ b/source/libs/catalog/test/CMakeLists.txt @@ -16,3 +16,8 @@ TARGET_INCLUDE_DIRECTORIES( PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/catalog/" PRIVATE "${CMAKE_SOURCE_DIR}/source/libs/catalog/inc" ) + +add_test( + NAME catalogTest + COMMAND catalogTest +) diff --git a/source/libs/catalog/test/catalogTests.cpp b/source/libs/catalog/test/catalogTests.cpp index 1d8a48dfcb..49e3ef532f 100644 --- a/source/libs/catalog/test/catalogTests.cpp +++ b/source/libs/catalog/test/catalogTests.cpp @@ -42,10 +42,13 @@ extern "C" int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMeta void ctgTestSetPrepareTableMeta(); void ctgTestSetPrepareCTableMeta(); void ctgTestSetPrepareSTableMeta(); +void ctgTestSetPrepareMultiSTableMeta(); bool ctgTestStop = false; bool ctgTestEnableSleep = false; -bool ctgTestDeadLoop = true; +bool ctgTestDeadLoop = false; +int32_t ctgTestPrintNum = 200000; +int32_t ctgTestMTRunSec = 30; int32_t ctgTestCurrentVgVersion = 0; int32_t ctgTestVgVersion = 1; @@ -54,6 +57,8 @@ int32_t ctgTestColNum = 2; int32_t ctgTestTagNum = 1; int32_t ctgTestSVersion = 1; int32_t ctgTestTVersion = 1; +int32_t ctgTestSuid = 2; +int64_t ctgTestDbId = 33; uint64_t ctgTestClusterId = 0x1; char *ctgTestDbname = "1.db1"; @@ -101,7 +106,6 @@ void ctgTestInitLogFile() { const char *defaultLogFileNamePrefix = "taoslog"; const int32_t maxLogFileNum = 10; - ctgDebugFlag = 159; tsAsyncLog = 0; char temp[128] = {0}; @@ -128,7 +132,7 @@ void ctgTestBuildCTableMetaOutput(STableMetaOutput *output) { char tbFullName[TSDB_TABLE_FNAME_LEN]; tNameExtractFullName(&cn, tbFullName); - output->metaNum = 2; + SET_META_TYPE_BOTH_TABLE(output->metaType); strcpy(output->ctbFname, tbFullName); @@ -183,6 +187,7 @@ void ctgTestBuildDBVgroup(SDBVgroupInfo *dbVgroup) { ctgTestCurrentVgVersion = dbVgroup->vgVersion; dbVgroup->hashMethod = 0; + dbVgroup->dbId = ctgTestDbId; dbVgroup->vgInfo = taosHashInit(ctgTestVgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); vgNum = ctgTestGetVgNumFromVgVersion(dbVgroup->vgVersion); @@ -216,6 +221,7 @@ void ctgTestPrepareDbVgroups(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcM ctgTestCurrentVgVersion = ctgTestVgVersion; rspMsg->vgNum = htonl(ctgTestVgNum); rspMsg->hashMethod = 0; + rspMsg->uid = htobe64(ctgTestDbId); SVgroupInfo *vg = NULL; uint32_t hashUnit = UINT32_MAX / ctgTestVgNum; @@ -338,8 +344,8 @@ void ctgTestPrepareSTableMeta(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpc rspMsg->update = 1; rspMsg->sversion = htonl(ctgTestSVersion); rspMsg->tversion = htonl(ctgTestTVersion); - rspMsg->suid = htobe64(0x0000000000000002); - rspMsg->tuid = htobe64(0x0000000000000003); + rspMsg->suid = htobe64(ctgTestSuid); + rspMsg->tuid = htobe64(ctgTestSuid); rspMsg->vgId = 0; SSchema *s = NULL; @@ -365,6 +371,53 @@ void ctgTestPrepareSTableMeta(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpc return; } +void ctgTestPrepareMultiSTableMeta(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp) { + STableMetaMsg *rspMsg = NULL; //todo + static int32_t idx = 1; + + pRsp->code =0; + pRsp->contLen = sizeof(STableMetaMsg) + (ctgTestColNum + ctgTestTagNum) * sizeof(SSchema); + pRsp->pCont = calloc(1, pRsp->contLen); + rspMsg = (STableMetaMsg *)pRsp->pCont; + sprintf(rspMsg->tbFname, "%s.%s_%d", ctgTestDbname, ctgTestSTablename, idx); + sprintf(rspMsg->stbFname, "%s.%s_%d", ctgTestDbname, ctgTestSTablename, idx); + rspMsg->numOfTags = htonl(ctgTestTagNum); + rspMsg->numOfColumns = htonl(ctgTestColNum); + rspMsg->precision = 1; + rspMsg->tableType = TSDB_SUPER_TABLE; + rspMsg->update = 1; + rspMsg->sversion = htonl(ctgTestSVersion); + rspMsg->tversion = htonl(ctgTestTVersion); + rspMsg->suid = htobe64(ctgTestSuid + idx); + rspMsg->tuid = htobe64(ctgTestSuid + idx); + rspMsg->vgId = 0; + + SSchema *s = NULL; + s = &rspMsg->pSchema[0]; + s->type = TSDB_DATA_TYPE_TIMESTAMP; + s->colId = htonl(1); + s->bytes = htonl(8); + strcpy(s->name, "ts"); + + s = &rspMsg->pSchema[1]; + s->type = TSDB_DATA_TYPE_INT; + s->colId = htonl(2); + s->bytes = htonl(4); + strcpy(s->name, "col1s"); + + s = &rspMsg->pSchema[2]; + s->type = TSDB_DATA_TYPE_BINARY; + s->colId = htonl(3); + s->bytes = htonl(12); + strcpy(s->name, "tag1s"); + + ++idx; + + return; +} + + + void ctgTestPrepareDbVgroupsAndNormalMeta(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp) { ctgTestPrepareDbVgroups(shandle, pEpSet, pMsg, pRsp); @@ -390,6 +443,14 @@ void ctgTestPrepareDbVgroupsAndSuperMeta(void *shandle, SEpSet *pEpSet, SRpcMsg return; } +void ctgTestPrepareDbVgroupsAndMultiSuperMeta(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp) { + ctgTestPrepareDbVgroups(shandle, pEpSet, pMsg, pRsp); + + ctgTestSetPrepareMultiSTableMeta(); + + return; +} + void ctgTestSetPrepareDbVgroups() { @@ -444,6 +505,20 @@ void ctgTestSetPrepareSTableMeta() { } } +void ctgTestSetPrepareMultiSTableMeta() { + static Stub stub; + stub.set(rpcSendRecv, ctgTestPrepareMultiSTableMeta); + { + AddrAny any("libtransport.so"); + std::map result; + any.get_global_func_addr_dynsym("^rpcSendRecv$", result); + for (const auto& f : result) { + stub.set(f.second, ctgTestPrepareMultiSTableMeta); + } + } +} + + void ctgTestSetPrepareDbVgroupsAndNormalMeta() { static Stub stub; stub.set(rpcSendRecv, ctgTestPrepareDbVgroupsAndNormalMeta); @@ -484,6 +559,19 @@ void ctgTestSetPrepareDbVgroupsAndSuperMeta() { } } +void ctgTestSetPrepareDbVgroupsAndMultiSuperMeta() { + static Stub stub; + stub.set(rpcSendRecv, ctgTestPrepareDbVgroupsAndMultiSuperMeta); + { + AddrAny any("libtransport.so"); + std::map result; + any.get_global_func_addr_dynsym("^rpcSendRecv$", result); + for (const auto& f : result) { + stub.set(f.second, ctgTestPrepareDbVgroupsAndMultiSuperMeta); + } + } +} + } @@ -507,7 +595,7 @@ void *ctgTestGetDbVgroupThread(void *param) { if (ctgTestEnableSleep) { usleep(rand()%5); } - if (++n % 50000 == 0) { + if (++n % ctgTestPrintNum == 0) { printf("Get:%d\n", n); } } @@ -531,7 +619,7 @@ void *ctgTestSetDbVgroupThread(void *param) { if (ctgTestEnableSleep) { usleep(rand()%5); } - if (++n % 50000 == 0) { + if (++n % ctgTestPrintNum == 0) { printf("Set:%d\n", n); } } @@ -563,7 +651,7 @@ void *ctgTestGetCtableMetaThread(void *param) { usleep(rand()%5); } - if (++n % 50000 == 0) { + if (++n % ctgTestPrintNum == 0) { printf("Get:%d\n", n); } } @@ -589,7 +677,7 @@ void *ctgTestSetCtableMetaThread(void *param) { if (ctgTestEnableSleep) { usleep(rand()%5); } - if (++n % 50000 == 0) { + if (++n % ctgTestPrintNum == 0) { printf("Set:%d\n", n); } } @@ -600,7 +688,6 @@ void *ctgTestSetCtableMetaThread(void *param) { } -#if 0 TEST(tableMeta, normalTable) { struct SCatalog* pCtg = NULL; @@ -628,6 +715,7 @@ TEST(tableMeta, normalTable) { ASSERT_EQ(vgInfo.vgId, 8); ASSERT_EQ(vgInfo.numOfEps, 3); + ctgTestSetPrepareTableMeta(); STableMeta *tableMeta = NULL; @@ -654,6 +742,41 @@ TEST(tableMeta, normalTable) { ASSERT_EQ(tableMeta->tableInfo.precision, 1); ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + SDbVgVersion *dbs = NULL; + SSTableMetaVersion *stb = NULL; + uint32_t dbNum = 0, stbNum = 0, allDbNum = 0, allStbNum = 0; + int32_t i = 0; + while (i < 5) { + ++i; + code = catalogGetExpiredDBs(pCtg, &dbs, &dbNum); + ASSERT_EQ(code, 0); + code = catalogGetExpiredSTables(pCtg, &stb, &stbNum); + ASSERT_EQ(code, 0); + + if (dbNum) { + printf("got expired db,dbId:%"PRId64"\n", dbs->dbId); + free(dbs); + dbs = NULL; + } else { + printf("no expired db\n"); + } + + if (stbNum) { + printf("got expired stb,suid:%"PRId64"\n", stb->suid); + free(stb); + stb = NULL; + } else { + printf("no expired stb\n"); + } + + allDbNum += dbNum; + allStbNum += stbNum; + sleep(2); + } + + ASSERT_EQ(allDbNum, 1); + ASSERT_EQ(allStbNum, 0); + catalogDestroy(); } @@ -715,6 +838,42 @@ TEST(tableMeta, childTableCase) { ASSERT_EQ(tableMeta->tableInfo.precision, 1); ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + SDbVgVersion *dbs = NULL; + SSTableMetaVersion *stb = NULL; + uint32_t dbNum = 0, stbNum = 0, allDbNum = 0, allStbNum = 0; + int32_t i = 0; + while (i < 5) { + ++i; + code = catalogGetExpiredDBs(pCtg, &dbs, &dbNum); + ASSERT_EQ(code, 0); + code = catalogGetExpiredSTables(pCtg, &stb, &stbNum); + ASSERT_EQ(code, 0); + + if (dbNum) { + printf("got expired db,dbId:%"PRId64"\n", dbs->dbId); + free(dbs); + dbs = NULL; + } else { + printf("no expired db\n"); + } + + if (stbNum) { + printf("got expired stb,suid:%"PRId64"\n", stb->suid); + free(stb); + stb = NULL; + } else { + printf("no expired stb\n"); + } + + allDbNum += dbNum; + allStbNum += stbNum; + sleep(2); + } + + ASSERT_EQ(allDbNum, 1); + ASSERT_EQ(allStbNum, 1); + + catalogDestroy(); } @@ -745,6 +904,8 @@ TEST(tableMeta, superTableCase) { ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE); ASSERT_EQ(tableMeta->sversion, ctgTestSVersion); ASSERT_EQ(tableMeta->tversion, ctgTestTVersion); + ASSERT_EQ(tableMeta->uid, ctgTestSuid); + ASSERT_EQ(tableMeta->suid, ctgTestSuid); ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum); ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum); ASSERT_EQ(tableMeta->tableInfo.precision, 1); @@ -768,7 +929,7 @@ TEST(tableMeta, superTableCase) { ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); tableMeta = NULL; - code = catalogRenewAndGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta); + code = catalogRenewAndGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta, 0); ASSERT_EQ(code, 0); ASSERT_EQ(tableMeta->vgId, 9); ASSERT_EQ(tableMeta->tableType, TSDB_CHILD_TABLE); @@ -779,6 +940,40 @@ TEST(tableMeta, superTableCase) { ASSERT_EQ(tableMeta->tableInfo.precision, 1); ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + SDbVgVersion *dbs = NULL; + SSTableMetaVersion *stb = NULL; + uint32_t dbNum = 0, stbNum = 0, allDbNum = 0, allStbNum = 0; + int32_t i = 0; + while (i < 5) { + ++i; + code = catalogGetExpiredDBs(pCtg, &dbs, &dbNum); + ASSERT_EQ(code, 0); + code = catalogGetExpiredSTables(pCtg, &stb, &stbNum); + ASSERT_EQ(code, 0); + + if (dbNum) { + printf("got expired db,dbId:%"PRId64"\n", dbs->dbId); + free(dbs); + dbs = NULL; + } else { + printf("no expired db\n"); + } + + if (stbNum) { + printf("got expired stb,suid:%"PRId64"\n", stb->suid); + free(stb); + stb = NULL; + } else { + printf("no expired stb\n"); + } + + allDbNum += dbNum; + allStbNum += stbNum; + sleep(2); + } + + ASSERT_EQ(allDbNum, 1); + ASSERT_EQ(allStbNum, 1); catalogDestroy(); @@ -948,7 +1143,6 @@ TEST(dbVgroup, getSetDbVgroupCase) { catalogDestroy(); } - TEST(multiThread, getSetDbVgroupCase) { struct SCatalog* pCtg = NULL; void *mockPointer = (void *)0x1; @@ -956,6 +1150,7 @@ TEST(multiThread, getSetDbVgroupCase) { SVgroupInfo *pvgInfo = NULL; SDBVgroupInfo dbVgroup = {0}; SArray *vgList = NULL; + ctgTestStop = false; ctgTestInitLogFile(); @@ -988,7 +1183,7 @@ TEST(multiThread, getSetDbVgroupCase) { if (ctgTestDeadLoop) { sleep(1); } else { - sleep(600); + sleep(ctgTestMTRunSec); break; } } @@ -999,9 +1194,6 @@ TEST(multiThread, getSetDbVgroupCase) { catalogDestroy(); } -#endif - - TEST(multiThread, ctableMeta) { struct SCatalog* pCtg = NULL; void *mockPointer = (void *)0x1; @@ -1009,6 +1201,7 @@ TEST(multiThread, ctableMeta) { SVgroupInfo *pvgInfo = NULL; SDBVgroupInfo dbVgroup = {0}; SArray *vgList = NULL; + ctgTestStop = false; ctgTestSetPrepareDbVgroupsAndChildMeta(); @@ -1038,7 +1231,7 @@ TEST(multiThread, ctableMeta) { if (ctgTestDeadLoop) { sleep(1); } else { - sleep(600); + sleep(ctgTestMTRunSec); break; } } @@ -1050,6 +1243,78 @@ TEST(multiThread, ctableMeta) { } +TEST(rentTest, allRent) { + struct SCatalog* pCtg = NULL; + void *mockPointer = (void *)0x1; + SVgroupInfo vgInfo = {0}; + SVgroupInfo *pvgInfo = NULL; + SDBVgroupInfo dbVgroup = {0}; + SArray *vgList = NULL; + ctgTestStop = false; + SDbVgVersion *dbs = NULL; + SSTableMetaVersion *stable = NULL; + uint32_t num = 0; + + ctgTestSetPrepareDbVgroupsAndMultiSuperMeta(); + + initQueryModuleMsgHandle(); + + int32_t code = catalogInit(NULL); + ASSERT_EQ(code, 0); + + code = catalogGetHandle(ctgTestClusterId, &pCtg); + ASSERT_EQ(code, 0); + + + SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1}; + strcpy(n.dbname, "db1"); + + for (int32_t i = 1; i <= 10; ++i) { + sprintf(n.tname, "%s_%d", ctgTestSTablename, i); + + STableMeta *tableMeta = NULL; + code = catalogGetSTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta); + ASSERT_EQ(code, 0); + ASSERT_EQ(tableMeta->vgId, 0); + ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE); + ASSERT_EQ(tableMeta->sversion, ctgTestSVersion); + ASSERT_EQ(tableMeta->tversion, ctgTestTVersion); + ASSERT_EQ(tableMeta->uid, ctgTestSuid + i); + ASSERT_EQ(tableMeta->suid, ctgTestSuid + i); + ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum); + ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum); + ASSERT_EQ(tableMeta->tableInfo.precision, 1); + ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + + code = catalogGetExpiredDBs(pCtg, &dbs, &num); + ASSERT_EQ(code, 0); + printf("%d - expired dbNum:%d\n", i, num); + if (dbs) { + printf("%d - expired dbId:%"PRId64", vgVersion:%d\n", i, dbs->dbId, dbs->vgVersion); + free(dbs); + dbs = NULL; + } + + code = catalogGetExpiredSTables(pCtg, &stable, &num); + ASSERT_EQ(code, 0); + printf("%d - expired stableNum:%d\n", i, num); + if (stable) { + for (int32_t n = 0; n < num; ++n) { + printf("suid:%"PRId64", sversion:%d, tversion:%d\n", stable[n].suid, stable[n].sversion, stable[n].tversion); + } + free(stable); + stable = NULL; + } + printf("*************************************************\n"); + + sleep(2); + } + + catalogDestroy(); +} + + + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/source/libs/executor/CMakeLists.txt b/source/libs/executor/CMakeLists.txt index a6f70b9e83..ba941ab22d 100644 --- a/source/libs/executor/CMakeLists.txt +++ b/source/libs/executor/CMakeLists.txt @@ -8,5 +8,5 @@ target_include_directories( target_link_libraries( executor - PRIVATE os util common function parser + PRIVATE os util common function parser planner qcom ) \ No newline at end of file diff --git a/source/libs/executor/inc/dataSinkInt.h b/source/libs/executor/inc/dataSinkInt.h new file mode 100644 index 0000000000..1bbf5494dd --- /dev/null +++ b/source/libs/executor/inc/dataSinkInt.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _DATA_SINK_INT_H +#define _DATA_SINK_INT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "common.h" +#include "dataSinkMgt.h" + +struct SDataSink; +struct SDataSinkHandle; + +typedef struct SDataSinkManager { + SDataSinkMgtCfg cfg; + pthread_mutex_t mutex; +} SDataSinkManager; + +typedef int32_t (*FPutDataBlock)(struct SDataSinkHandle* pHandle, const SInputData* pInput, int32_t* pStatus); +typedef void (*FEndPut)(struct SDataSinkHandle* pHandle); +typedef int32_t (*FGetDataLength)(struct SDataSinkHandle* pHandle, int32_t* pStatus); +typedef int32_t (*FGetDataBlock)(struct SDataSinkHandle* pHandle, SOutPutData* pOutput, int32_t* pStatus); +typedef int32_t (*FDestroyDataSinker)(struct SDataSinkHandle* pHandle); + +typedef struct SDataSinkHandle { + FPutDataBlock fPut; + FEndPut fEndPut; + FGetDataLength fGetLen; + FGetDataBlock fGetData; + FDestroyDataSinker fDestroy; +} SDataSinkHandle; + +int32_t createDataDispatcher(SDataSinkManager* pManager, const struct SDataSink* pDataSink, DataSinkHandle* pHandle); + +#ifdef __cplusplus +} +#endif + +#endif /*_DATA_SINK_INT_H*/ diff --git a/source/libs/executor/inc/dataSinkMgt.h b/source/libs/executor/inc/dataSinkMgt.h new file mode 100644 index 0000000000..d13423b25d --- /dev/null +++ b/source/libs/executor/inc/dataSinkMgt.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _DATA_SINK_MGT_H +#define _DATA_SINK_MGT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "os.h" +#include "executorimpl.h" + +#define DS_CAPACITY_ENOUGH 1 +#define DS_CAPACITY_FULL 2 +#define DS_NEED_SCHEDULE 3 +#define DS_END 4 +#define DS_IN_PROCESS 5 + +struct SDataSink; +struct SSDataBlock; + +typedef struct SDataSinkMgtCfg { + uint32_t maxDataBlockNum; + uint32_t maxDataBlockNumPerQuery; +} SDataSinkMgtCfg; + +int32_t dsDataSinkMgtInit(SDataSinkMgtCfg *cfg); + +typedef void* DataSinkHandle; + +typedef struct SInputData { + const SSDataBlock* pData; + SHashObj* pTableRetrieveTsMap; +} SInputData; + +typedef struct SOutPutData { + int32_t numOfRows; + int8_t compressed; + char* pData; +} SOutPutData; + +/** + * Create a subplan's datasinker handle for all later operations. + * @param pDataSink + * @param pHandle output + * @return error code + */ +int32_t dsCreateDataSinker(const struct SDataSink *pDataSink, DataSinkHandle* pHandle); + +/** + * Put the result set returned by the executor into datasinker. + * @param handle + * @param pRes + * @return error code + */ +int32_t dsPutDataBlock(DataSinkHandle handle, const SInputData* pInput, int32_t* pStatus); + +void dsEndPut(DataSinkHandle handle); + +/** + * Get the length of the data returned by the next call to dsGetDataBlock. + * @param handle + * @return data length + */ +int32_t dsGetDataLength(DataSinkHandle handle, int32_t* pStatus); + +/** + * Get data, the caller needs to allocate data memory. + * @param handle + * @param pOutput output + * @param pStatus output + * @return error code + */ +int32_t dsGetDataBlock(DataSinkHandle handle, SOutPutData* pOutput, int32_t* pStatus); + +/** + * After dsGetStatus returns DS_NEED_SCHEDULE, the caller need to put this into the work queue. + * @param ahandle + * @param pItem + */ +void dsScheduleProcess(void* ahandle, void* pItem); + +/** + * Destroy the datasinker handle. + * @param handle + */ +void dsDestroyDataSinker(DataSinkHandle handle); + +#ifdef __cplusplus +} +#endif + +#endif /*_DATA_SINK_MGT_H*/ diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 7e910d5674..2c1bf71638 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -88,37 +88,37 @@ typedef struct SResultRowPool { SArray* pData; // SArray } SResultRowPool; -struct SQueryAttr; -struct SQueryRuntimeEnv; +struct STaskAttr; +struct STaskRuntimeEnv; struct SUdfInfo; -int32_t getOutputInterResultBufSize(struct SQueryAttr* pQueryAttr); +int32_t getOutputInterResultBufSize(struct STaskAttr* pQueryAttr); -size_t getResultRowSize(struct SQueryRuntimeEnv* pRuntimeEnv); +size_t getResultRowSize(struct STaskRuntimeEnv* pRuntimeEnv); int32_t initResultRowInfo(SResultRowInfo* pResultRowInfo, int32_t size, int16_t type); void cleanupResultRowInfo(SResultRowInfo* pResultRowInfo); -void resetResultRowInfo(struct SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo); +void resetResultRowInfo(struct STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo); int32_t numOfClosedResultRows(SResultRowInfo* pResultRowInfo); void closeAllResultRows(SResultRowInfo* pResultRowInfo); int32_t initResultRow(SResultRow *pResultRow); void closeResultRow(SResultRowInfo* pResultRowInfo, int32_t slot); bool isResultRowClosed(SResultRowInfo *pResultRowInfo, int32_t slot); -void clearResultRow(struct SQueryRuntimeEnv* pRuntimeEnv, SResultRow* pResultRow, int16_t type); +void clearResultRow(struct STaskRuntimeEnv* pRuntimeEnv, SResultRow* pResultRow, int16_t type); struct SResultRowEntryInfo* getResultCell(const SResultRow* pRow, int32_t index, int32_t* offset); void* destroyQueryFuncExpr(SExprInfo* pExprInfo, int32_t numOfExpr); void* freeColumnInfo(SColumnInfo* pColumnInfo, int32_t numOfCols); -int32_t getRowNumForMultioutput(struct SQueryAttr* pQueryAttr, bool topBottomQuery, bool stable); +int32_t getRowNumForMultioutput(struct STaskAttr* pQueryAttr, bool topBottomQuery, bool stable); static FORCE_INLINE SResultRow *getResultRow(SResultRowInfo *pResultRowInfo, int32_t slot) { assert(pResultRowInfo != NULL && slot >= 0 && slot < pResultRowInfo->size); return pResultRowInfo->pResult[slot]; } -static FORCE_INLINE char* getPosInResultPage(struct SQueryAttr* pQueryAttr, SFilePage* page, int32_t rowOffset, +static FORCE_INLINE char* getPosInResultPage(struct STaskAttr* pQueryAttr, SFilePage* page, int32_t rowOffset, int32_t offset) { assert(rowOffset >= 0 && pQueryAttr != NULL); @@ -155,7 +155,7 @@ bool hasRemainData(SGroupResInfo* pGroupResInfo); bool incNextGroup(SGroupResInfo* pGroupResInfo); int32_t getNumOfTotalRes(SGroupResInfo* pGroupResInfo); -int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, struct SQueryRuntimeEnv *pRuntimeEnv, int32_t* offset); +int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, struct STaskRuntimeEnv *pRuntimeEnv, int32_t* offset); int32_t initUdfInfo(struct SUdfInfo* pUdfInfo); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 907fb4d2bf..a268215d3d 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -21,13 +21,14 @@ #include "tvariant.h" #include "thash.h" -//#include "parser.h" #include "executil.h" #include "taosdef.h" #include "tarray.h" #include "tfilter.h" #include "tlockfree.h" #include "tpagedfile.h" +#include "planner.h" + struct SColumnFilterElem; @@ -65,7 +66,6 @@ enum { QUERY_OVER = 0x4u, }; - typedef struct SResultRowCell { uint64_t groupId; SResultRow *pRow; @@ -100,7 +100,7 @@ typedef struct STableQueryInfo { TSKEY lastKey; int32_t groupIndex; // group id in table list SVariant tag; - STimeWindow win; + STimeWindow win; // todo remove it later STSCursor cur; void* pTable; // for retrieve the page id list SResultRowInfo resInfo; @@ -128,31 +128,34 @@ typedef struct { int64_t sumRunTimes; } SOperatorProfResult; -typedef struct SQueryCostInfo { - uint64_t loadStatisTime; - uint64_t loadFileBlockTime; - uint64_t loadDataInCacheTime; - uint64_t loadStatisSize; - uint64_t loadFileBlockSize; - uint64_t loadDataInCacheSize; - - uint64_t loadDataTime; - uint64_t totalRows; - uint64_t totalCheckedRows; - uint32_t totalBlocks; - uint32_t loadBlocks; - uint32_t loadBlockStatis; - uint32_t discardBlocks; - uint64_t elapsedTime; - uint64_t firstStageMergeTime; - uint64_t winInfoSize; - uint64_t tableInfoSize; - uint64_t hashSize; - uint64_t numOfTimeWindows; +typedef struct STaskCostInfo { + int64_t start; + int64_t end; - SArray* queryProfEvents; //SArray - SHashObj* operatorProfResults; //map -} SQueryCostInfo; + uint64_t loadStatisTime; + uint64_t loadFileBlockTime; + uint64_t loadDataInCacheTime; + uint64_t loadStatisSize; + uint64_t loadFileBlockSize; + uint64_t loadDataInCacheSize; + + uint64_t loadDataTime; + uint64_t totalRows; + uint64_t totalCheckedRows; + uint32_t totalBlocks; + uint32_t loadBlocks; + uint32_t loadBlockStatis; + uint32_t discardBlocks; + uint64_t elapsedTime; + uint64_t firstStageMergeTime; + uint64_t winInfoSize; + uint64_t tableInfoSize; + uint64_t hashSize; + uint64_t numOfTimeWindows; + + SArray *queryProfEvents; //SArray + SHashObj *operatorProfResults; //map +} STaskCostInfo; typedef struct { int64_t vgroupLimit; @@ -166,7 +169,7 @@ typedef struct { // The basic query information extracted from the SQueryInfo tree to support the // execution of query in a data node. -typedef struct SQueryAttr { +typedef struct STaskAttr { SLimit limit; SLimit slimit; @@ -229,16 +232,40 @@ typedef struct SQueryAttr { STableGroupInfo tableGroupInfo; // table list SArray int32_t vgId; SArray *pUdfInfo; // no need to free -} SQueryAttr; +} STaskAttr; typedef SSDataBlock* (*__operator_fn_t)(void* param, bool* newgroup); typedef void (*__optr_cleanup_fn_t)(void* param, int32_t num); struct SOperatorInfo; -typedef struct SQueryRuntimeEnv { +typedef struct STaskIdInfo { + uint64_t queryId; // this is also a request id + uint64_t subplanId; + uint64_t templateId; + uint64_t taskId; // this is a subplan id +} STaskIdInfo; + +typedef struct STaskInfo { + STaskIdInfo id; + char *content; + uint32_t status; + STimeWindow window; + STaskCostInfo cost; + int64_t owner; // if it is in execution + + STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure + pthread_mutex_t lock; // used to synchronize the rsp/query threads +// tsem_t ready; +// int32_t dataReady; // denote if query result is ready or not +// void* rspContext; // response context + char *sql; // query sql string + jmp_buf env; +} STaskInfo; + +typedef struct STaskRuntimeEnv { jmp_buf env; - SQueryAttr* pQueryAttr; + STaskAttr* pQueryAttr; uint32_t status; // query status void* qinfo; uint8_t scanFlag; // denotes reversed scan of data or not @@ -271,7 +298,7 @@ typedef struct SQueryRuntimeEnv { SRspResultInfo resultInfo; SHashObj *pTableRetrieveTsMap; struct SUdfInfo *pUdfInfo; -} SQueryRuntimeEnv; +} STaskRuntimeEnv; enum { OP_IN_EXECUTING = 1, @@ -287,10 +314,11 @@ typedef struct SOperatorInfo { char *name; // name, used to show the query execution plan void *info; // extension attribution SExprInfo *pExpr; - SQueryRuntimeEnv *pRuntimeEnv; + STaskRuntimeEnv *pRuntimeEnv; + STaskInfo *pTaskInfo; - struct SOperatorInfo **upstream; // upstream pointer list - int32_t numOfUpstream; // number of upstream. The value is always ONE expect for join operator + struct SOperatorInfo **pDownstream; // downstram pointer list + int32_t numOfDownstream; // number of downstream. The value is always ONE expect for join operator __operator_fn_t exec; __optr_cleanup_fn_t cleanup; } SOperatorInfo; @@ -312,8 +340,8 @@ typedef struct SQInfo { int32_t code; // error code to returned to client int64_t owner; // if it is in execution - SQueryRuntimeEnv runtimeEnv; - SQueryAttr query; + STaskRuntimeEnv runtimeEnv; + STaskAttr query; void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; pthread_mutex_t lock; // used to synchronize the rsp/query threads @@ -322,10 +350,10 @@ typedef struct SQInfo { void* rspContext; // response context int64_t startExecTs; // start to exec timestamp char* sql; // query sql string - SQueryCostInfo summary; + STaskCostInfo summary; } SQInfo; -typedef struct SQueryParam { +typedef struct STaskParam { char *sql; char *tagCond; char *colCond; @@ -345,7 +373,7 @@ typedef struct SQueryParam { int32_t tableScanOperator; SArray *pOperator; struct SUdfInfo *pUdfInfo; -} SQueryParam; +} STaskParam; typedef struct STableScanInfo { void *pQueryHandle; @@ -366,9 +394,12 @@ typedef struct STableScanInfo { SSDataBlock block; int32_t numOfOutput; int64_t elapsedTime; - int32_t tableIndex; - int32_t prevGroupId; // previous table group id + + int32_t prevGroupId; // previous table group id + + int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan + STimeWindow window; } STableScanInfo; typedef struct STagScanInfo { @@ -512,34 +543,34 @@ typedef struct SOrderOperatorInfo { void appendUpstream(SOperatorInfo* p, SOperatorInfo* pUpstream); -SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime); -SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime); -SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv); +SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime); +SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, int32_t order, int32_t numOfOutput, int32_t repeatTime); +SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv); -SOperatorInfo* createAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createProjectOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream); -SOperatorInfo* createTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createAllTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createSWindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createFillOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult); -SOperatorInfo* createGroupbyOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createMultiTableAggOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createTagScanOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createDistinctOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv); -SOperatorInfo* createMultiwaySortOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput, +SOperatorInfo* createAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createProjectOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream); +SOperatorInfo* createTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createAllTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createSWindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createFillOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult); +SOperatorInfo* createGroupbyOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createMultiTableAggOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createTagScanOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createDistinctOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv); +SOperatorInfo* createMultiwaySortOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput, int32_t numOfRows, void* merger); -SOperatorInfo* createGlobalAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, bool groupResultMixedUp); -SOperatorInfo* createStatewindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); -SOperatorInfo* createSLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* merger, bool multigroupResult); -SOperatorInfo* createFilterOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, +SOperatorInfo* createGlobalAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, bool groupResultMixedUp); +SOperatorInfo* createStatewindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput); +SOperatorInfo* createSLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* merger, bool multigroupResult); +SOperatorInfo* createFilterOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, SColumnInfo* pCols, int32_t numOfFilter); SOperatorInfo* createJoinOperatorInfo(SOperatorInfo** pUpstream, int32_t numOfUpstream, SSchema* pSchema, int32_t numOfOutput); -SOperatorInfo* createOrderOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal); +SOperatorInfo* createOrderOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal); SSDataBlock* doGlobalAggregate(void* param, bool* newgroup); SSDataBlock* doMultiwayMergeSort(void* param, bool* newgroup); @@ -561,8 +592,8 @@ void updateOutputBuf(SOptrBasicInfo* pBInfo, int32_t *bufCapacity, int32_t numOf void clearOutputBuf(SOptrBasicInfo* pBInfo, int32_t *bufCapacity); void copyTsColoum(SSDataBlock* pRes, SQLFunctionCtx* pCtx, int32_t numOfOutput); -void freeParam(SQueryParam *param); -int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SQueryParam* param); +void freeParam(STaskParam *param); +int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, STaskParam* param); int32_t createQueryFunc(SQueriedTableInfo* pTableInfo, int32_t numOfOutput, SExprInfo** pExprInfo, SSqlExpr** pExprMsg, SColumnInfo* pTagCols, int32_t queryType, void* pMsg, struct SUdfInfo* pUdfInfo); @@ -575,13 +606,13 @@ SGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, SColIndex *pCo SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SGroupbyExpr *pGroupbyExpr, SExprInfo *pExprs, SExprInfo *pSecExprs, STableGroupInfo *pTableGroupInfo, SColumnInfo* pTagCols, SFilterInfo* pFilters, int32_t vgId, char* sql, uint64_t qId, struct SUdfInfo* pUdfInfo); -int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, SQueryParam* param, char* start, +int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, STaskParam* param, char* start, int32_t prevResultLen, void* merger); -int32_t createFilterInfo(SQueryAttr* pQueryAttr, uint64_t qId); +int32_t createFilterInfo(STaskAttr* pQueryAttr, uint64_t qId); void freeColumnFilterInfo(SColumnFilterInfo* pFilter, int32_t numOfFilters); -STableQueryInfo *createTableQueryInfo(SQueryAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf); +STableQueryInfo *createTableQueryInfo(STaskAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf); STableQueryInfo* createTmpTableQueryInfo(STimeWindow win); int32_t buildArithmeticExprFromMsg(SExprInfo *pArithExprInfo, void *pQueryMsg); @@ -590,9 +621,9 @@ bool isQueryKilled(SQInfo *pQInfo); int32_t checkForQueryBuf(size_t numOfTables); bool checkNeedToCompressQueryCol(SQInfo *pQInfo); bool doBuildResCheck(SQInfo* pQInfo); -void setQueryStatus(SQueryRuntimeEnv *pRuntimeEnv, int8_t status); +void setQueryStatus(STaskRuntimeEnv *pRuntimeEnv, int8_t status); -bool onlyQueryTags(SQueryAttr* pQueryAttr); +bool onlyQueryTags(STaskAttr* pQueryAttr); void destroyUdfInfo(struct SUdfInfo* pUdfInfo); bool isValidQInfo(void *param); @@ -607,8 +638,8 @@ void publishQueryAbortEvent(SQInfo* pQInfo, int32_t code); void calculateOperatorProfResults(SQInfo* pQInfo); void queryCostStatis(SQInfo *pQInfo); -void freeQInfo(SQInfo *pQInfo); -void freeQueryAttr(SQueryAttr *pQuery); +void doDestroyTask(SQInfo *pQInfo); +void freeQueryAttr(STaskAttr *pQuery); int32_t getMaximumIdleDurationSec(); diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c new file mode 100644 index 0000000000..3d8e51d04d --- /dev/null +++ b/source/libs/executor/src/dataDispatcher.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "dataSinkInt.h" +#include "dataSinkMgt.h" +#include "planner.h" +#include "tcompression.h" +#include "tglobal.h" +#include "tqueue.h" + +#define DATA_META_LENGTH(tables) (sizeof(int32_t) + sizeof(STableIdInfo) * taosHashGetSize(tables) + sizeof(SRetrieveTableRsp)) + +typedef struct SDataDispatchBuf { + int32_t useSize; + int32_t allocSize; + char* pData; +} SDataDispatchBuf; + +typedef struct SDataCacheEntry { + int32_t dataLen; + int32_t numOfRows; + int8_t compressed; + char data[]; +} SDataCacheEntry; + +typedef struct SDataDispatchHandle { + SDataSinkHandle sink; + SDataSinkManager* pManager; + SDataBlockSchema schema; + STaosQueue* pDataBlocks; + SDataDispatchBuf nextOutput; + int32_t status; + pthread_mutex_t mutex; +} SDataDispatchHandle; + +static bool needCompress(const SSDataBlock* pData, const SDataBlockSchema* pSchema) { + if (tsCompressColData < 0 || 0 == pData->info.rows) { + return false; + } + + for (int32_t col = 0; col < pSchema->numOfCols; ++col) { + SColumnInfoData* pColRes = taosArrayGet(pData->pDataBlock, col); + int32_t colSize = pColRes->info.bytes * pData->info.rows; + if (NEEDTO_COMPRESS_QUERY(colSize)) { + return true; + } + } + + return false; +} + +static int32_t compressColData(SColumnInfoData *pColRes, int32_t numOfRows, char *data, int8_t compressed) { + int32_t colSize = pColRes->info.bytes * numOfRows; + return (*(tDataTypes[pColRes->info.type].compFunc))( + pColRes->pData, colSize, numOfRows, data, colSize + COMP_OVERFLOW_BYTES, compressed, NULL, 0); +} + +static void copyData(const SInputData* pInput, const SDataBlockSchema* pSchema, char* data, int8_t compressed, int32_t *compLen) { + int32_t *compSizes = (int32_t*)data; + if (compressed) { + data += pSchema->numOfCols * sizeof(int32_t); + } + + for (int32_t col = 0; col < pSchema->numOfCols; ++col) { + SColumnInfoData* pColRes = taosArrayGet(pInput->pData->pDataBlock, col); + if (compressed) { + compSizes[col] = compressColData(pColRes, pInput->pData->info.rows, data, compressed); + data += compSizes[col]; + *compLen += compSizes[col]; + compSizes[col] = htonl(compSizes[col]); + } else { + memmove(data, pColRes->pData, pColRes->info.bytes * pInput->pData->info.rows); + data += pColRes->info.bytes * pInput->pData->info.rows; + } + } + + int32_t numOfTables = (int32_t) taosHashGetSize(pInput->pTableRetrieveTsMap); + *(int32_t*)data = htonl(numOfTables); + data += sizeof(int32_t); + + STableIdInfo* item = taosHashIterate(pInput->pTableRetrieveTsMap, NULL); + while (item) { + STableIdInfo* pDst = (STableIdInfo*)data; + pDst->uid = htobe64(item->uid); + pDst->key = htobe64(item->key); + data += sizeof(STableIdInfo); + item = taosHashIterate(pInput->pTableRetrieveTsMap, item); + } +} + +// data format with compress: SDataCacheEntry | cols_data_offset | col1_data col2_data ... | numOfTables | STableIdInfo STableIdInfo ... +// data format: SDataCacheEntry | col1_data col2_data ... | numOfTables | STableIdInfo STableIdInfo ... +static void toDataCacheEntry(const SDataDispatchHandle* pHandle, const SInputData* pInput, SDataDispatchBuf* pBuf) { + SDataCacheEntry* pEntry = (SDataCacheEntry*)pBuf->pData; + pEntry->compressed = (int8_t)needCompress(pInput->pData, &(pHandle->schema)); + pEntry->numOfRows = pInput->pData->info.rows; + + pBuf->useSize = DATA_META_LENGTH(pInput->pTableRetrieveTsMap); + copyData(pInput, &pHandle->schema, pEntry->data, pEntry->compressed, &pEntry->dataLen); + pBuf->useSize += (pEntry->compressed ? pEntry->dataLen : pHandle->schema.resultRowSize * pInput->pData->info.rows); + // todo completed +} + +static bool allocBuf(SDataDispatchHandle* pDispatcher, const SInputData* pInput, SDataDispatchBuf* pBuf) { + if (taosQueueSize(pDispatcher->pDataBlocks) >= pDispatcher->pManager->cfg.maxDataBlockNumPerQuery) { + return false; + } + pBuf->allocSize = DATA_META_LENGTH(pInput->pTableRetrieveTsMap) + pDispatcher->schema.resultRowSize * pInput->pData->info.rows; + pBuf->pData = malloc(pBuf->allocSize); + return NULL != pBuf->pData; +} + +static int32_t updateStatus(SDataDispatchHandle* pDispatcher) { + pthread_mutex_lock(&pDispatcher->mutex); + int32_t status = taosQueueSize(pDispatcher->pDataBlocks) < pDispatcher->pManager->cfg.maxDataBlockNumPerQuery ? DS_CAPACITY_ENOUGH : DS_CAPACITY_FULL; + pDispatcher->status = status; + pthread_mutex_unlock(&pDispatcher->mutex); + return status; +} + +static int32_t getStatus(SDataDispatchHandle* pDispatcher) { + pthread_mutex_lock(&pDispatcher->mutex); + int32_t status = pDispatcher->status; + pthread_mutex_unlock(&pDispatcher->mutex); + return status; +} + +static int32_t putDataBlock(SDataSinkHandle* pHandle, const SInputData* pInput, int32_t* pStatus) { + SDataDispatchHandle* pDispatcher = (SDataDispatchHandle*)pHandle; + SDataDispatchBuf* pBuf = taosAllocateQitem(sizeof(SDataDispatchBuf)); + if (NULL == pBuf || !allocBuf(pDispatcher, pInput, pBuf)) { + return TSDB_CODE_QRY_OUT_OF_MEMORY; + } + toDataCacheEntry(pDispatcher, pInput, pBuf); + taosWriteQitem(pDispatcher->pDataBlocks, pBuf); + *pStatus = updateStatus(pDispatcher); + return TSDB_CODE_SUCCESS; +} + +static void endPut(struct SDataSinkHandle* pHandle) { + SDataDispatchHandle* pDispatcher = (SDataDispatchHandle*)pHandle; + pthread_mutex_lock(&pDispatcher->mutex); + pDispatcher->status = DS_END; + pthread_mutex_unlock(&pDispatcher->mutex); +} + +static int32_t getDataLength(SDataSinkHandle* pHandle, int32_t* pStatus) { + SDataDispatchHandle* pDispatcher = (SDataDispatchHandle*)pHandle; + if (taosQueueEmpty(pDispatcher->pDataBlocks)) { + *pStatus = getStatus(pDispatcher) ? DS_END : DS_IN_PROCESS; + return 0; + } + SDataDispatchBuf* pBuf = NULL; + taosReadQitem(pDispatcher->pDataBlocks, (void**)&pBuf); + memcpy(&pDispatcher->nextOutput, pBuf, sizeof(SDataDispatchBuf)); + taosFreeQitem(pBuf); + return ((SDataCacheEntry*)(pDispatcher->nextOutput.pData))->dataLen; +} + +static int32_t getDataBlock(SDataSinkHandle* pHandle, SOutPutData* pOutput, int32_t* pStatus) { + SDataDispatchHandle* pDispatcher = (SDataDispatchHandle*)pHandle; + SDataCacheEntry* pEntry = (SDataCacheEntry*)(pDispatcher->nextOutput.pData); + memcpy(pOutput->pData, pEntry->data, pEntry->dataLen); + pOutput->numOfRows = pEntry->numOfRows; + pOutput->compressed = pEntry->compressed; + tfree(pDispatcher->nextOutput.pData); // todo persistent + *pStatus = updateStatus(pDispatcher); + return TSDB_CODE_SUCCESS; +} + +static int32_t destroyDataSinker(SDataSinkHandle* pHandle) { + SDataDispatchHandle* pDispatcher = (SDataDispatchHandle*)pHandle; + tfree(pDispatcher->nextOutput.pData); + while (!taosQueueEmpty(pDispatcher->pDataBlocks)) { + SDataDispatchBuf* pBuf = NULL; + taosReadQitem(pDispatcher->pDataBlocks, (void**)&pBuf); + tfree(pBuf->pData); + taosFreeQitem(pBuf); + } + taosCloseQueue(pDispatcher->pDataBlocks); + pthread_mutex_destroy(&pDispatcher->mutex); +} + +int32_t createDataDispatcher(SDataSinkManager* pManager, const SDataSink* pDataSink, DataSinkHandle* pHandle) { + SDataDispatchHandle* dispatcher = calloc(1, sizeof(SDataDispatchHandle)); + if (NULL == dispatcher) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + return TSDB_CODE_QRY_OUT_OF_MEMORY; + } + dispatcher->sink.fPut = putDataBlock; + dispatcher->sink.fGetLen = getDataLength; + dispatcher->sink.fGetData = getDataBlock; + dispatcher->sink.fDestroy = destroyDataSinker; + dispatcher->pManager = pManager; + dispatcher->schema = pDataSink->schema; + dispatcher->status = DS_CAPACITY_ENOUGH; + dispatcher->pDataBlocks = taosOpenQueue(); + pthread_mutex_init(&dispatcher->mutex, NULL); + if (NULL == dispatcher->pDataBlocks) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + return TSDB_CODE_QRY_OUT_OF_MEMORY; + } + *pHandle = dispatcher; + return TSDB_CODE_SUCCESS; +} diff --git a/source/libs/executor/src/dataSinkMgt.c b/source/libs/executor/src/dataSinkMgt.c new file mode 100644 index 0000000000..8a96c5d05f --- /dev/null +++ b/source/libs/executor/src/dataSinkMgt.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "dataSinkMgt.h" +#include "dataSinkInt.h" +#include "planner.h" + +static SDataSinkManager gDataSinkManager = {0}; + +int32_t dsDataSinkMgtInit(SDataSinkMgtCfg *cfg) { + gDataSinkManager.cfg = *cfg; + pthread_mutex_init(&gDataSinkManager.mutex, NULL); +} + +int32_t dsCreateDataSinker(const struct SDataSink *pDataSink, DataSinkHandle* pHandle) { + if (DSINK_Dispatch == pDataSink->info.type) { + return createDataDispatcher(&gDataSinkManager, pDataSink, pHandle); + } + return TSDB_CODE_FAILED; +} + +int32_t dsPutDataBlock(DataSinkHandle handle, const SInputData* pInput, int32_t* pStatus) { + SDataSinkHandle* pHandleImpl = (SDataSinkHandle*)handle; + return pHandleImpl->fPut(pHandleImpl, pInput, pStatus); +} + +void dsEndPut(DataSinkHandle handle) { + SDataSinkHandle* pHandleImpl = (SDataSinkHandle*)handle; + return pHandleImpl->fEndPut(pHandleImpl); +} + +int32_t dsGetDataLength(DataSinkHandle handle, int32_t* pStatus) { + SDataSinkHandle* pHandleImpl = (SDataSinkHandle*)handle; + return pHandleImpl->fGetLen(pHandleImpl, pStatus); +} + +int32_t dsGetDataBlock(DataSinkHandle handle, SOutPutData* pOutput, int32_t* pStatus) { + SDataSinkHandle* pHandleImpl = (SDataSinkHandle*)handle; + return pHandleImpl->fGetData(pHandleImpl, pOutput, pStatus); +} + +void dsScheduleProcess(void* ahandle, void* pItem) { + // todo +} + +void dsDestroyDataSinker(DataSinkHandle handle) { + SDataSinkHandle* pHandleImpl = (SDataSinkHandle*)handle; + pHandleImpl->fDestroy(pHandleImpl); +} diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index ac91f906c7..e8ecffb72c 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -30,7 +30,7 @@ typedef struct SCompSupporter { int32_t order; } SCompSupporter; -int32_t getRowNumForMultioutput(SQueryAttr* pQueryAttr, bool topBottomQuery, bool stable) { +int32_t getRowNumForMultioutput(STaskAttr* pQueryAttr, bool topBottomQuery, bool stable) { if (pQueryAttr && (!stable)) { for (int16_t i = 0; i < pQueryAttr->numOfOutput; ++i) { // if (pQueryAttr->pExpr1[i].base. == FUNCTION_TOP || pQueryAttr->pExpr1[i].base.functionId == FUNCTION_BOTTOM) { @@ -42,7 +42,7 @@ int32_t getRowNumForMultioutput(SQueryAttr* pQueryAttr, bool topBottomQuery, boo return 1; } -int32_t getOutputInterResultBufSize(SQueryAttr* pQueryAttr) { +int32_t getOutputInterResultBufSize(STaskAttr* pQueryAttr) { int32_t size = 0; for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { @@ -86,7 +86,7 @@ void cleanupResultRowInfo(SResultRowInfo *pResultRowInfo) { tfree(pResultRowInfo->pResult); } -void resetResultRowInfo(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo) { +void resetResultRowInfo(STaskRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo) { if (pResultRowInfo == NULL || pResultRowInfo->capacity == 0) { return; } @@ -136,7 +136,7 @@ void closeResultRow(SResultRowInfo *pResultRowInfo, int32_t slot) { getResultRow(pResultRowInfo, slot)->closed = true; } -void clearResultRow(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResultRow, int16_t type) { +void clearResultRow(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResultRow, int16_t type) { if (pResultRow == NULL) { return; } @@ -174,8 +174,8 @@ struct SResultRowEntryInfo* getResultCell(const SResultRow* pRow, int32_t index, return NULL; } -size_t getResultRowSize(SQueryRuntimeEnv* pRuntimeEnv) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +size_t getResultRowSize(STaskRuntimeEnv* pRuntimeEnv) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; return 0; // return (pQueryAttr->numOfOutput * sizeof(SResultRowEntryInfo)) + pQueryAttr->interBufSize + sizeof(SResultRow); } @@ -393,8 +393,8 @@ int32_t getNumOfTotalRes(SGroupResInfo* pGroupResInfo) { return (int32_t) taosArrayGetSize(pGroupResInfo->pRows); } -static int64_t getNumOfResultWindowRes(SQueryRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow, int32_t* rowCellInfoOffset) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +static int64_t getNumOfResultWindowRes(STaskRuntimeEnv* pRuntimeEnv, SResultRow *pResultRow, int32_t* rowCellInfoOffset) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; for (int32_t j = 0; j < pQueryAttr->numOfOutput; ++j) { int32_t functionId = 0;//pQueryAttr->pExpr1[j].base.functionId; @@ -488,7 +488,7 @@ int32_t tsDescOrder(const void* p1, const void* p2) { } } -void orderTheResultRows(SQueryRuntimeEnv* pRuntimeEnv) { +void orderTheResultRows(STaskRuntimeEnv* pRuntimeEnv) { __compar_fn_t fn = NULL; if (pRuntimeEnv->pQueryAttr->order.order == TSDB_ORDER_ASC) { fn = tsAscOrder; @@ -499,7 +499,7 @@ void orderTheResultRows(SQueryRuntimeEnv* pRuntimeEnv) { taosArraySort(pRuntimeEnv->pResultRowArrayList, fn); } -static int32_t mergeIntoGroupResultImplRv(SQueryRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, uint64_t groupId, int32_t* rowCellInfoOffset) { +static int32_t mergeIntoGroupResultImplRv(STaskRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, uint64_t groupId, int32_t* rowCellInfoOffset) { if (!pGroupResInfo->ordered) { orderTheResultRows(pRuntimeEnv); pGroupResInfo->ordered = true; @@ -528,7 +528,7 @@ static int32_t mergeIntoGroupResultImplRv(SQueryRuntimeEnv *pRuntimeEnv, SGroupR return TSDB_CODE_SUCCESS; } -static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(SQueryRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, SArray *pTableList, +static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(STaskRuntimeEnv *pRuntimeEnv, SGroupResInfo* pGroupResInfo, SArray *pTableList, int32_t* rowCellInfoOffset) { bool ascQuery = QUERY_IS_ASC_QUERY(pRuntimeEnv->pQueryAttr); @@ -630,7 +630,7 @@ static UNUSED_FUNC int32_t mergeIntoGroupResultImpl(SQueryRuntimeEnv *pRuntimeEn return code; } -int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, SQueryRuntimeEnv* pRuntimeEnv, int32_t* offset) { +int32_t mergeIntoGroupResult(SGroupResInfo* pGroupResInfo, STaskRuntimeEnv* pRuntimeEnv, int32_t* offset) { int64_t st = taosGetTimestampUs(); while (pGroupResInfo->currentGroup < pGroupResInfo->totalGroup) { diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c new file mode 100644 index 0000000000..b98b7fef5c --- /dev/null +++ b/source/libs/executor/src/executorMain.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" +#include "tcache.h" +#include "tglobal.h" +#include "tmsg.h" +#include "exception.h" + +#include "thash.h" +#include "executorimpl.h" +#include "executor.h" +#include "tlosertree.h" +#include "ttypes.h" +#include "query.h" + +typedef struct STaskMgmt { + pthread_mutex_t lock; + SCacheObj *qinfoPool; // query handle pool + int32_t vgId; + bool closed; +} STaskMgmt; + +static void taskMgmtKillTaskFn(void* handle, void* param1) { + void** fp = (void**)handle; + qKillTask(*fp); +} + +static void freeqinfoFn(void *qhandle) { + void** handle = qhandle; + if (handle == NULL || *handle == NULL) { + return; + } + + qKillTask(*handle); + qDestroyTask(*handle); +} + +void freeParam(STaskParam *param) { + tfree(param->sql); + tfree(param->tagCond); + tfree(param->tbnameCond); + tfree(param->pTableIdList); + taosArrayDestroy(param->pOperator); + tfree(param->pExprs); + tfree(param->pSecExprs); + + tfree(param->pExpr); + tfree(param->pSecExpr); + + tfree(param->pGroupColIndex); + tfree(param->pTagColumnInfo); + tfree(param->pGroupbyExpr); + tfree(param->prevResult); +} + +// todo parse json to get the operator tree. + +int32_t qCreateTask(void* tsdb, int32_t vgId, void* pQueryMsg, qTaskInfo_t* pTaskInfo, uint64_t taskId) { + assert(pQueryMsg != NULL && tsdb != NULL); + + int32_t code = TSDB_CODE_SUCCESS; +#if 0 + STaskParam param = {0}; + code = convertQueryMsg(pQueryMsg, ¶m); + if (code != TSDB_CODE_SUCCESS) { + goto _over; + } + + if (pQueryMsg->numOfTables <= 0) { + qError("Invalid number of tables to query, numOfTables:%d", pQueryMsg->numOfTables); + code = TSDB_CODE_QRY_INVALID_MSG; + goto _over; + } + + if (param.pTableIdList == NULL || taosArrayGetSize(param.pTableIdList) == 0) { + qError("qmsg:%p, SQueryTableMsg wrong format", pQueryMsg); + code = TSDB_CODE_QRY_INVALID_MSG; + goto _over; + } + + SQueriedTableInfo info = { .numOfTags = pQueryMsg->numOfTags, .numOfCols = pQueryMsg->numOfCols, .colList = pQueryMsg->tableCols}; + if ((code = createQueryFunc(&info, pQueryMsg->numOfOutput, ¶m.pExprs, param.pExpr, param.pTagColumnInfo, + pQueryMsg->queryType, pQueryMsg, param.pUdfInfo)) != TSDB_CODE_SUCCESS) { + goto _over; + } + + if (param.pSecExpr != NULL) { + if ((code = createIndirectQueryFuncExprFromMsg(pQueryMsg, pQueryMsg->secondStageOutput, ¶m.pSecExprs, param.pSecExpr, param.pExprs, param.pUdfInfo)) != TSDB_CODE_SUCCESS) { + goto _over; + } + } + + if (param.colCond != NULL) { + if ((code = createQueryFilter(param.colCond, pQueryMsg->colCondLen, ¶m.pFilters)) != TSDB_CODE_SUCCESS) { + goto _over; + } + } + + param.pGroupbyExpr = createGroupbyExprFromMsg(pQueryMsg, param.pGroupColIndex, &code); + if ((param.pGroupbyExpr == NULL && pQueryMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) { + goto _over; + } + + bool isSTableQuery = false; + STableGroupInfo tableGroupInfo = {0}; + int64_t st = taosGetTimestampUs(); + + if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_TABLE_QUERY)) { + STableIdInfo *id = taosArrayGet(param.pTableIdList, 0); + + qDebug("qmsg:%p query normal table, uid:%"PRId64", tid:%d", pQueryMsg, id->uid, id->tid); + if ((code = tsdbGetOneTableGroup(tsdb, id->uid, pQueryMsg->window.skey, &tableGroupInfo)) != TSDB_CODE_SUCCESS) { + goto _over; + } + } else if (TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY|TSDB_QUERY_TYPE_STABLE_QUERY)) { + isSTableQuery = true; + + // also note there's possibility that only one table in the super table + if (!TSDB_QUERY_HAS_TYPE(pQueryMsg->queryType, TSDB_QUERY_TYPE_MULTITABLE_QUERY)) { + STableIdInfo *id = taosArrayGet(param.pTableIdList, 0); + + // group by normal column, do not pass the group by condition to tsdb to group table into different group + int32_t numOfGroupByCols = pQueryMsg->numOfGroupCols; + if (pQueryMsg->numOfGroupCols == 1 && !TSDB_COL_IS_TAG(param.pGroupColIndex->flag)) { + numOfGroupByCols = 0; + } + + qDebug("qmsg:%p query stable, uid:%"PRIu64", tid:%d", pQueryMsg, id->uid, id->tid); + code = tsdbQuerySTableByTagCond(tsdb, id->uid, pQueryMsg->window.skey, param.tagCond, pQueryMsg->tagCondLen, + pQueryMsg->tagNameRelType, param.tbnameCond, &tableGroupInfo, param.pGroupColIndex, numOfGroupByCols); + + if (code != TSDB_CODE_SUCCESS) { + qError("qmsg:%p failed to query stable, reason: %s", pQueryMsg, tstrerror(code)); + goto _over; + } + } else { + code = tsdbGetTableGroupFromIdList(tsdb, param.pTableIdList, &tableGroupInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _over; + } + + qDebug("qmsg:%p query on %u tables in one group from client", pQueryMsg, tableGroupInfo.numOfTables); + } + + int64_t el = taosGetTimestampUs() - st; + qDebug("qmsg:%p tag filter completed, numOfTables:%u, elapsed time:%"PRId64"us", pQueryMsg, tableGroupInfo.numOfTables, el); + } else { + assert(0); + } + + code = checkForQueryBuf(tableGroupInfo.numOfTables); + if (code != TSDB_CODE_SUCCESS) { // not enough query buffer, abort + goto _over; + } + + assert(pQueryMsg->stableQuery == isSTableQuery); + (*pTaskInfo) = createQInfoImpl(pQueryMsg, param.pGroupbyExpr, param.pExprs, param.pSecExprs, &tableGroupInfo, + param.pTagColumnInfo, param.pFilters, vgId, param.sql, qId, param.pUdfInfo); + + param.sql = NULL; + param.pExprs = NULL; + param.pSecExprs = NULL; + param.pGroupbyExpr = NULL; + param.pTagColumnInfo = NULL; + param.pFilters = NULL; + + if ((*pTaskInfo) == NULL) { + code = TSDB_CODE_QRY_OUT_OF_MEMORY; + goto _over; + } + param.pUdfInfo = NULL; + + code = initQInfo(&pQueryMsg->tsBuf, tsdb, NULL, *pTaskInfo, ¶m, (char*)pQueryMsg, pQueryMsg->prevResultLen, NULL); + + _over: + if (param.pGroupbyExpr != NULL) { + taosArrayDestroy(param.pGroupbyExpr->columnInfo); + } + + tfree(param.colCond); + + destroyUdfInfo(param.pUdfInfo); + + taosArrayDestroy(param.pTableIdList); + param.pTableIdList = NULL; + + freeParam(¶m); + + for (int32_t i = 0; i < pQueryMsg->numOfCols; i++) { + SColumnInfo* column = pQueryMsg->tableCols + i; + freeColumnFilterInfo(column->flist.filterInfo, column->flist.numOfFilters); + } + + filterFreeInfo(param.pFilters); + + //pTaskInfo already freed in initQInfo, but *pTaskInfo may not pointer to null; + if (code != TSDB_CODE_SUCCESS) { + *pTaskInfo = NULL; + } +#endif + + // if failed to add ref for all tables in this query, abort current query + return code; +} + +#ifdef TEST_IMPL +// wait moment +int waitMoment(SQInfo* pQInfo){ + if(pQInfo->sql) { + int ms = 0; + char* pcnt = strstr(pQInfo->sql, " count(*)"); + if(pcnt) return 0; + + char* pos = strstr(pQInfo->sql, " t_"); + if(pos){ + pos += 3; + ms = atoi(pos); + while(*pos >= '0' && *pos <= '9'){ + pos ++; + } + char unit_char = *pos; + if(unit_char == 'h'){ + ms *= 3600*1000; + } else if(unit_char == 'm'){ + ms *= 60*1000; + } else if(unit_char == 's'){ + ms *= 1000; + } + } + if(ms == 0) return 0; + printf("test wait sleep %dms. sql=%s ...\n", ms, pQInfo->sql); + + if(ms < 1000) { + taosMsleep(ms); + } else { + int used_ms = 0; + while(used_ms < ms) { + taosMsleep(1000); + used_ms += 1000; + if(isQueryKilled(pQInfo)){ + printf("test check query is canceled, sleep break.%s\n", pQInfo->sql); + break; + } + } + } + } + return 1; +} +#endif + +bool qExecTask(qTaskInfo_t qinfo, uint64_t *qId) { + SQInfo *pQInfo = (SQInfo *)qinfo; + assert(pQInfo && pQInfo->signature == pQInfo); + int64_t threadId = taosGetSelfPthreadId(); + + int64_t curOwner = 0; + if ((curOwner = atomic_val_compare_exchange_64(&pQInfo->owner, 0, threadId)) != 0) { + qError("QInfo:0x%"PRIx64"-%p qhandle is now executed by thread:%p", pQInfo->qId, pQInfo, (void*) curOwner); + pQInfo->code = TSDB_CODE_QRY_IN_EXEC; + return false; + } + + *qId = pQInfo->qId; + if(pQInfo->startExecTs == 0) + pQInfo->startExecTs = taosGetTimestampMs(); + + if (isQueryKilled(pQInfo)) { + qDebug("QInfo:0x%"PRIx64" it is already killed, abort", pQInfo->qId); + return doBuildResCheck(pQInfo); + } + + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + if (pRuntimeEnv->tableqinfoGroupInfo.numOfTables == 0) { + qDebug("QInfo:0x%"PRIx64" no table exists for query, abort", pQInfo->qId); +// setTaskStatus(pRuntimeEnv, QUERY_COMPLETED); + return doBuildResCheck(pQInfo); + } + + // error occurs, record the error code and return to client + int32_t ret = setjmp(pQInfo->runtimeEnv.env); + if (ret != TSDB_CODE_SUCCESS) { + publishQueryAbortEvent(pQInfo, ret); + pQInfo->code = ret; + qDebug("QInfo:0x%"PRIx64" query abort due to error/cancel occurs, code:%s", pQInfo->qId, tstrerror(pQInfo->code)); + return doBuildResCheck(pQInfo); + } + + qDebug("QInfo:0x%"PRIx64" query task is launched", pQInfo->qId); + + bool newgroup = false; + publishOperatorProfEvent(pRuntimeEnv->proot, QUERY_PROF_BEFORE_OPERATOR_EXEC); + + int64_t st = taosGetTimestampUs(); + pRuntimeEnv->outputBuf = pRuntimeEnv->proot->exec(pRuntimeEnv->proot, &newgroup); + pQInfo->summary.elapsedTime += (taosGetTimestampUs() - st); +#ifdef TEST_IMPL + waitMoment(pQInfo); +#endif + publishOperatorProfEvent(pRuntimeEnv->proot, QUERY_PROF_AFTER_OPERATOR_EXEC); + pRuntimeEnv->resultInfo.total += GET_NUM_OF_RESULTS(pRuntimeEnv); + + if (isQueryKilled(pQInfo)) { + qDebug("QInfo:0x%"PRIx64" query is killed", pQInfo->qId); + } else if (GET_NUM_OF_RESULTS(pRuntimeEnv) == 0) { + qDebug("QInfo:0x%"PRIx64" over, %u tables queried, total %"PRId64" rows returned", pQInfo->qId, pRuntimeEnv->tableqinfoGroupInfo.numOfTables, + pRuntimeEnv->resultInfo.total); + } else { + qDebug("QInfo:0x%"PRIx64" query paused, %d rows returned, total:%" PRId64 " rows", pQInfo->qId, + GET_NUM_OF_RESULTS(pRuntimeEnv), pRuntimeEnv->resultInfo.total); + } + + return doBuildResCheck(pQInfo); +} + +int32_t qRetrieveQueryResultInfo(qTaskInfo_t qinfo, bool* buildRes, void* pRspContext) { + SQInfo *pQInfo = (SQInfo *)qinfo; + + if (pQInfo == NULL || !isValidQInfo(pQInfo)) { + qError("QInfo invalid qhandle"); + return TSDB_CODE_QRY_INVALID_QHANDLE; + } + + *buildRes = false; + if (IS_QUERY_KILLED(pQInfo)) { + qDebug("QInfo:0x%"PRIx64" query is killed, code:0x%08x", pQInfo->qId, pQInfo->code); + return pQInfo->code; + } + + int32_t code = TSDB_CODE_SUCCESS; + + if (tsRetrieveBlockingModel) { + pQInfo->rspContext = pRspContext; + tsem_wait(&pQInfo->ready); + *buildRes = true; + code = pQInfo->code; + } else { + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; + + pthread_mutex_lock(&pQInfo->lock); + + assert(pQInfo->rspContext == NULL); + if (pQInfo->dataReady == QUERY_RESULT_READY) { + *buildRes = true; + qDebug("QInfo:0x%"PRIx64" retrieve result info, rowsize:%d, rows:%d, code:%s", pQInfo->qId, pQueryAttr->resultRowSize, + GET_NUM_OF_RESULTS(pRuntimeEnv), tstrerror(pQInfo->code)); + } else { + *buildRes = false; + qDebug("QInfo:0x%"PRIx64" retrieve req set query return result after paused", pQInfo->qId); + pQInfo->rspContext = pRspContext; + assert(pQInfo->rspContext != NULL); + } + + code = pQInfo->code; + pthread_mutex_unlock(&pQInfo->lock); + } + + return code; +} + +void* qGetResultRetrieveMsg(qTaskInfo_t qinfo) { + SQInfo* pQInfo = (SQInfo*) qinfo; + assert(pQInfo != NULL); + + return pQInfo->rspContext; +} + +int32_t qKillTask(qTaskInfo_t qinfo) { + SQInfo *pQInfo = (SQInfo *)qinfo; + + if (pQInfo == NULL || !isValidQInfo(pQInfo)) { + return TSDB_CODE_QRY_INVALID_QHANDLE; + } + + qDebug("QInfo:0x%"PRIx64" query killed", pQInfo->qId); + setQueryKilled(pQInfo); + + // Wait for the query executing thread being stopped/ + // Once the query is stopped, the owner of qHandle will be cleared immediately. + while (pQInfo->owner != 0) { + taosMsleep(100); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t qIsTaskCompleted(qTaskInfo_t qinfo) { + SQInfo *pQInfo = (SQInfo *)qinfo; + + if (pQInfo == NULL || !isValidQInfo(pQInfo)) { + return TSDB_CODE_QRY_INVALID_QHANDLE; + } + + return isQueryKilled(pQInfo) || Q_STATUS_EQUAL(pQInfo->runtimeEnv.status, QUERY_OVER); +} + +void qDestroyTask(qTaskInfo_t qHandle) { + SQInfo* pQInfo = (SQInfo*) qHandle; + if (!isValidQInfo(pQInfo)) { + return; + } + + qDebug("QInfo:0x%"PRIx64" query completed", pQInfo->qId); + queryCostStatis(pQInfo); // print the query cost summary + doDestroyTask(pQInfo); +} + +void* qOpenTaskMgmt(int32_t vgId) { + const int32_t refreshHandleInterval = 30; // every 30 seconds, refresh handle pool + + char cacheName[128] = {0}; + sprintf(cacheName, "qhandle_%d", vgId); + + STaskMgmt* pTaskMgmt = calloc(1, sizeof(STaskMgmt)); + if (pTaskMgmt == NULL) { + terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; + return NULL; + } + + pTaskMgmt->qinfoPool = taosCacheInit(TSDB_CACHE_PTR_KEY, refreshHandleInterval, true, freeqinfoFn, cacheName); + pTaskMgmt->closed = false; + pTaskMgmt->vgId = vgId; + + pthread_mutex_init(&pTaskMgmt->lock, NULL); + + qDebug("vgId:%d, open queryTaskMgmt success", vgId); + return pTaskMgmt; +} + +void qTaskMgmtNotifyClosing(void* pQMgmt) { + if (pQMgmt == NULL) { + return; + } + + STaskMgmt* pQueryMgmt = pQMgmt; + qInfo("vgId:%d, set querymgmt closed, wait for all queries cancelled", pQueryMgmt->vgId); + + pthread_mutex_lock(&pQueryMgmt->lock); + pQueryMgmt->closed = true; + pthread_mutex_unlock(&pQueryMgmt->lock); + + taosCacheRefresh(pQueryMgmt->qinfoPool, taskMgmtKillTaskFn, NULL); +} + +void qQueryMgmtReOpen(void *pQMgmt) { + if (pQMgmt == NULL) { + return; + } + + STaskMgmt *pQueryMgmt = pQMgmt; + qInfo("vgId:%d, set querymgmt reopen", pQueryMgmt->vgId); + + pthread_mutex_lock(&pQueryMgmt->lock); + pQueryMgmt->closed = false; + pthread_mutex_unlock(&pQueryMgmt->lock); +} + +void qCleanupTaskMgmt(void* pQMgmt) { + if (pQMgmt == NULL) { + return; + } + + STaskMgmt* pQueryMgmt = pQMgmt; + int32_t vgId = pQueryMgmt->vgId; + + assert(pQueryMgmt->closed); + + SCacheObj* pqinfoPool = pQueryMgmt->qinfoPool; + pQueryMgmt->qinfoPool = NULL; + + taosCacheCleanup(pqinfoPool); + pthread_mutex_destroy(&pQueryMgmt->lock); + tfree(pQueryMgmt); + + qDebug("vgId:%d, queryMgmt cleanup completed", vgId); +} + +void** qRegisterTask(void* pMgmt, uint64_t qId, void *qInfo) { + if (pMgmt == NULL) { + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + return NULL; + } + + STaskMgmt *pQueryMgmt = pMgmt; + if (pQueryMgmt->qinfoPool == NULL) { + qError("QInfo:0x%"PRIx64"-%p failed to add qhandle into qMgmt, since qMgmt is closed", qId, (void*)qInfo); + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + return NULL; + } + + pthread_mutex_lock(&pQueryMgmt->lock); + if (pQueryMgmt->closed) { + pthread_mutex_unlock(&pQueryMgmt->lock); + qError("QInfo:0x%"PRIx64"-%p failed to add qhandle into cache, since qMgmt is colsing", qId, (void*)qInfo); + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + return NULL; + } else { + void** handle = taosCachePut(pQueryMgmt->qinfoPool, &qId, sizeof(qId), &qInfo, sizeof(TSDB_CACHE_PTR_TYPE), + (getMaximumIdleDurationSec()*1000)); + pthread_mutex_unlock(&pQueryMgmt->lock); + + return handle; + } +} + +void** qAcquireTask(void* pMgmt, uint64_t _key) { + STaskMgmt *pQueryMgmt = pMgmt; + + if (pQueryMgmt->closed) { + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + return NULL; + } + + if (pQueryMgmt->qinfoPool == NULL) { + terrno = TSDB_CODE_QRY_INVALID_QHANDLE; + return NULL; + } + + void** handle = taosCacheAcquireByKey(pQueryMgmt->qinfoPool, &_key, sizeof(_key)); + if (handle == NULL || *handle == NULL) { + terrno = TSDB_CODE_QRY_INVALID_QHANDLE; + return NULL; + } else { + return handle; + } +} + +void** qReleaseTask(void* pMgmt, void* pQInfo, bool freeHandle) { + STaskMgmt *pQueryMgmt = pMgmt; + if (pQueryMgmt->qinfoPool == NULL) { + return NULL; + } + + taosCacheRelease(pQueryMgmt->qinfoPool, pQInfo, freeHandle); + return 0; +} + +#if 0 +//kill by qid +int32_t qKillQueryByQId(void* pMgmt, int64_t qId, int32_t waitMs, int32_t waitCount) { + int32_t error = TSDB_CODE_SUCCESS; + void** handle = qAcquireTask(pMgmt, qId); + if(handle == NULL) return terrno; + + SQInfo* pQInfo = (SQInfo*)(*handle); + if (pQInfo == NULL || !isValidQInfo(pQInfo)) { + return TSDB_CODE_QRY_INVALID_QHANDLE; + } + qWarn("QId:0x%"PRIx64" be killed(no memory commit).", pQInfo->qId); + setQueryKilled(pQInfo); + + // wait query stop + int32_t loop = 0; + while (pQInfo->owner != 0) { + taosMsleep(waitMs); + if(loop++ > waitCount){ + error = TSDB_CODE_FAILED; + break; + } + } + + qReleaseTask(pMgmt, (void **)&handle, true); + return error; +} + +#endif \ No newline at end of file diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index f119627c69..1d2740f0e0 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -12,24 +12,25 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#include #include "os.h" #include "tmsg.h" #include "tglobal.h" #include "ttime.h" - #include "exception.h" + +#include "../../../../contrib/cJson/cJSON.h" #include "executorimpl.h" -#include "thash.h" #include "function.h" #include "tcompare.h" #include "tcompression.h" +#include "thash.h" #include "ttypes.h" +#include "query.h" -#define IS_MASTER_SCAN(runtime) ((runtime)->scanFlag == MASTER_SCAN) +#define IS_MAIN_SCAN(runtime) ((runtime)->scanFlag == MAIN_SCAN) #define IS_REVERSE_SCAN(runtime) ((runtime)->scanFlag == REVERSE_SCAN) #define IS_REPEAT_SCAN(runtime) ((runtime)->scanFlag == REPEAT_SCAN) -#define SET_MASTER_SCAN_FLAG(runtime) ((runtime)->scanFlag = MASTER_SCAN) +#define SET_MAIN_SCAN_FLAG(runtime) ((runtime)->scanFlag = MAIN_SCAN) #define SET_REVERSE_SCAN_FLAG(runtime) ((runtime)->scanFlag = REVERSE_SCAN) #define TSWINDOW_IS_EQUAL(t1, t2) (((t1).skey == (t2).skey) && ((t1).ekey == (t2).ekey)) @@ -41,11 +42,6 @@ #define MULTI_KEY_DELIM "-" -#define TIME_WINDOW_COPY(_dst, _src) do {\ - (_dst).skey = (_src).skey;\ - (_dst).ekey = (_src).ekey;\ -} while (0) - enum { TS_JOIN_TS_EQUAL = 0, TS_JOIN_TS_NOT_EQUALS = 1, @@ -131,40 +127,16 @@ do { \ } \ } while (0) -uint64_t queryHandleId = 0; - int32_t getMaximumIdleDurationSec() { return tsShellActivityTimer * 2; } -int64_t genQueryId(void) { - int64_t uid = 0; - int64_t did = 0;//tsDnodeId; - - uid = did << 54; - - int64_t pid = ((int64_t)taosGetPId()) & 0x3FF; - - uid |= pid << 44; - - int64_t ts = taosGetTimestampMs() & 0x1FFFFFFFF; - - uid |= ts << 11; - - int64_t sid = atomic_add_fetch_64(&queryHandleId, 1) & 0x7FF; - - uid |= sid; - -// //qDebug("gen qid:0x%"PRIx64, uid); - - return uid; -} static int32_t getExprFunctionId(SExprInfo *pExprInfo) { assert(pExprInfo != NULL && pExprInfo->pExpr != NULL && pExprInfo->pExpr->nodeType == TEXPR_UNARYEXPR_NODE); return 0; } -static void getNextTimeWindow(SQueryAttr* pQueryAttr, STimeWindow* tw) { +static void getNextTimeWindow(STaskAttr* pQueryAttr, STimeWindow* tw) { int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order); if (pQueryAttr->interval.intervalUnit != 'n' && pQueryAttr->interval.intervalUnit != 'y') { tw->skey += pQueryAttr->interval.sliding * factor; @@ -198,28 +170,28 @@ static void getNextTimeWindow(SQueryAttr* pQueryAttr, STimeWindow* tw) { } static void doSetTagValueToResultBuf(char* output, const char* val, int16_t type, int16_t bytes); -static void setResultOutputBuf(SQueryRuntimeEnv* pRuntimeEnv, SResultRow* pResult, SQLFunctionCtx* pCtx, +static void setResultOutputBuf(STaskRuntimeEnv* pRuntimeEnv, SResultRow* pResult, SQLFunctionCtx* pCtx, int32_t numOfCols, int32_t* rowCellInfoOffset); -void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset); -static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx); +void setResultRowOutputBufInitCtx(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset); +static bool functionNeedToExecute(STaskRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx); static void setBlockStatisInfo(SQLFunctionCtx *pCtx, SSDataBlock* pSDataBlock, SColIndex* pColIndex); static void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo); -static bool hasMainOutput(SQueryAttr *pQueryAttr); +static bool hasMainOutput(STaskAttr *pQueryAttr); static SColumnInfo* extractColumnFilterInfo(SExprInfo* pExpr, int32_t numOfOutput, int32_t* numOfFilterCols); -static int32_t setTimestampListJoinInfo(SQueryRuntimeEnv* pRuntimeEnv, SVariant* pTag, STableQueryInfo *pTableQueryInfo); +static int32_t setTimestampListJoinInfo(STaskRuntimeEnv* pRuntimeEnv, SVariant* pTag, STableQueryInfo *pTableQueryInfo); static void releaseQueryBuf(size_t numOfTables); static int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order); -//static STsdbQueryCond createTsdbQueryCond(SQueryAttr* pQueryAttr, STimeWindow* win); +//static STsdbQueryCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win); static STableIdInfo createTableIdInfo(STableQueryInfo* pTableQueryInfo); static void setTableScanFilterOperatorInfo(STableScanInfo* pTableScanInfo, SOperatorInfo* pDownstream); -static int32_t getNumOfScanTimes(SQueryAttr* pQueryAttr); +static int32_t getNumOfScanTimes(STaskAttr* pQueryAttr); static void destroyBasicOperatorInfo(void* param, int32_t numOfOutput); static void destroySFillOperatorInfo(void* param, int32_t numOfOutput); @@ -232,32 +204,34 @@ static void destroyStateWindowOperatorInfo(void* param, int32_t numOfOutput); static void destroyAggOperatorInfo(void* param, int32_t numOfOutput); static void destroyOperatorInfo(SOperatorInfo* pOperator); +void setTaskStatus(STaskInfo *pTaskInfo, int8_t status); + static void doSetOperatorCompleted(SOperatorInfo* pOperator) { pOperator->status = OP_EXEC_DONE; - if (pOperator->pRuntimeEnv != NULL) { - setQueryStatus(pOperator->pRuntimeEnv, QUERY_COMPLETED); + if (pOperator->pTaskInfo != NULL) { + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); } } -static int32_t doCopyToSDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock); +static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock); static int32_t getGroupbyColumnIndex(SGroupbyExpr *pGroupbyExpr, SSDataBlock* pDataBlock); -static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *binf, int32_t numOfCols, char *pData, int16_t type, int16_t bytes, int32_t groupIndex); +static int32_t setGroupResultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *binf, int32_t numOfCols, char *pData, int16_t type, int16_t bytes, int32_t groupIndex); static void initCtxOutputBuffer(SQLFunctionCtx* pCtx, int32_t size); -static void getAlignQueryTimeWindow(SQueryAttr *pQueryAttr, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win); -static void setResultBufSize(SQueryAttr* pQueryAttr, SRspResultInfo* pResultInfo); -static void setCtxTagForJoin(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, SExprInfo* pExprInfo, void* pTable); -static void setParamForStableStddev(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr); -static void setParamForStableStddevByColData(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr, char* val, int16_t bytes); -static void doSetTableGroupOutputBuf(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, +static void getAlignQueryTimeWindow(STaskAttr *pQueryAttr, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win); +static void setResultBufSize(STaskAttr* pQueryAttr, SRspResultInfo* pResultInfo); +static void setCtxTagForJoin(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, SExprInfo* pExprInfo, void* pTable); +static void setParamForStableStddev(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr); +static void setParamForStableStddevByColData(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr, char* val, int16_t bytes); +static void doSetTableGroupOutputBuf(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, SQLFunctionCtx* pCtx, int32_t* rowCellInfoOffset, int32_t numOfOutput, int32_t tableGroupId); -SArray* getOrderCheckColumns(SQueryAttr* pQuery); +SArray* getOrderCheckColumns(STaskAttr* pQuery); typedef struct SRowCompSupporter { - SQueryRuntimeEnv *pRuntimeEnv; + STaskRuntimeEnv *pRuntimeEnv; int16_t dataOffset; __compar_fn_t comFunc; } SRowCompSupporter; @@ -267,7 +241,7 @@ static int compareRowData(const void *a, const void *b, const void *userData) { const SResultRow *pRow2 = (const SResultRow *)b; SRowCompSupporter *supporter = (SRowCompSupporter *)userData; - SQueryRuntimeEnv* pRuntimeEnv = supporter->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = supporter->pRuntimeEnv; SFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pRow1->pageId); SFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pRow2->pageId); @@ -279,7 +253,7 @@ static int compareRowData(const void *a, const void *b, const void *userData) { return (in1 != NULL && in2 != NULL) ? supporter->comFunc(in1, in2) : 0; } -static void sortGroupResByOrderList(SGroupResInfo *pGroupResInfo, SQueryRuntimeEnv *pRuntimeEnv, SSDataBlock* pDataBlock) { +static void sortGroupResByOrderList(SGroupResInfo *pGroupResInfo, STaskRuntimeEnv *pRuntimeEnv, SSDataBlock* pDataBlock) { SArray *columnOrderList = getOrderCheckColumns(pRuntimeEnv->pQueryAttr); size_t size = taosArrayGetSize(columnOrderList); taosArrayDestroy(columnOrderList); @@ -375,7 +349,7 @@ static bool isSelectivityWithTagsQuery(SQLFunctionCtx *pCtx, int32_t numOfOutput // return (numOfSelectivity > 0 && hasTags); } -static bool isProjQuery(SQueryAttr *pQueryAttr) { +static bool isProjQuery(STaskAttr *pQueryAttr) { for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { int32_t functId = getExprFunctionId(&pQueryAttr->pExpr1[i]); if (functId != FUNCTION_PRJ && functId != FUNCTION_TAGPRJ) { @@ -398,7 +372,7 @@ static bool hasNull(SColIndex* pColIndex, SColumnDataAgg *pStatis) { return true; } -static void prepareResultListBuffer(SResultRowInfo* pResultRowInfo, SQueryRuntimeEnv* pRuntimeEnv) { +static void prepareResultListBuffer(SResultRowInfo* pResultRowInfo, STaskRuntimeEnv* pRuntimeEnv) { // more than the capacity, reallocate the resources if (pResultRowInfo->size < pResultRowInfo->capacity) { return; @@ -424,7 +398,7 @@ static void prepareResultListBuffer(SResultRowInfo* pResultRowInfo, SQueryRuntim pResultRowInfo->capacity = (int32_t)newCapacity; } -static bool chkResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, char *pData, +static bool chkResultRowFromKey(STaskRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, char *pData, int16_t bytes, bool masterscan, uint64_t uid) { bool existed = false; SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, uid); @@ -462,7 +436,7 @@ static bool chkResultRowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *p } -static SResultRow* doSetResultOutBufByKey(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, int64_t tid, +static SResultRow* doSetResultOutBufByKey(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, int64_t tid, char* pData, int16_t bytes, bool masterscan, uint64_t tableGroupId) { bool existed = false; SET_RES_WINDOW_KEY(pRuntimeEnv->keyBuf, pData, bytes, tableGroupId); @@ -536,7 +510,7 @@ static SResultRow* doSetResultOutBufByKey(SQueryRuntimeEnv* pRuntimeEnv, SResult return pResultRowInfo->pResult[pResultRowInfo->curPos]; } -static void getInitialStartTimeWindow(SQueryAttr* pQueryAttr, TSKEY ts, STimeWindow* w) { +static void getInitialStartTimeWindow(STaskAttr* pQueryAttr, TSKEY ts, STimeWindow* w) { if (QUERY_IS_ASC_QUERY(pQueryAttr)) { getAlignQueryTimeWindow(pQueryAttr, ts, ts, pQueryAttr->window.ekey, w); } else { @@ -561,7 +535,7 @@ static void getInitialStartTimeWindow(SQueryAttr* pQueryAttr, TSKEY ts, STimeWin } // get the correct time window according to the handled timestamp -static STimeWindow getActiveTimeWindow(SResultRowInfo * pResultRowInfo, int64_t ts, SQueryAttr *pQueryAttr) { +static STimeWindow getActiveTimeWindow(SResultRowInfo * pResultRowInfo, int64_t ts, STaskAttr *pQueryAttr) { STimeWindow w = {0}; if (pResultRowInfo->curPos == -1) { // the first window, from the previous stored value @@ -609,7 +583,7 @@ static STimeWindow getActiveTimeWindow(SResultRowInfo * pResultRowInfo, int64_t } // get the correct time window according to the handled timestamp -static STimeWindow getCurrentActiveTimeWindow(SResultRowInfo * pResultRowInfo, int64_t ts, SQueryAttr *pQueryAttr) { +static STimeWindow getCurrentActiveTimeWindow(SResultRowInfo * pResultRowInfo, int64_t ts, STaskAttr *pQueryAttr) { STimeWindow w = {0}; if (pResultRowInfo->curPos == -1) { // the first window, from the previous stored value @@ -680,14 +654,14 @@ static int32_t addNewWindowResultBuf(SResultRow *pWindowRes, SDiskbasedResultBuf return 0; } -static bool chkWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, STimeWindow *win, +static bool chkWindowOutputBufByKey(STaskRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, STimeWindow *win, bool masterscan, SResultRow **pResult, int64_t groupId, SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) { assert(win->skey <= win->ekey); return chkResultRowFromKey(pRuntimeEnv, pResultRowInfo, (char *)&win->skey, TSDB_KEYSIZE, masterscan, groupId); } -static int32_t setResultOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, int64_t tid, STimeWindow *win, +static int32_t setResultOutputBufByKey(STaskRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, int64_t tid, STimeWindow *win, bool masterscan, SResultRow **pResult, int64_t tableGroupId, SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) { assert(win->skey <= win->ekey); @@ -816,7 +790,7 @@ static void doUpdateResultRowIndex(SResultRowInfo*pResultRowInfo, TSKEY lastKey, } } -static void updateResultRowInfoActiveIndex(SResultRowInfo* pResultRowInfo, SQueryAttr* pQueryAttr, TSKEY lastKey) { +static void updateResultRowInfoActiveIndex(SResultRowInfo* pResultRowInfo, STaskAttr* pQueryAttr, TSKEY lastKey) { bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); if ((lastKey > pQueryAttr->window.ekey && ascQuery) || (lastKey < pQueryAttr->window.ekey && (!ascQuery))) { closeAllResultRows(pResultRowInfo); @@ -827,10 +801,10 @@ static void updateResultRowInfoActiveIndex(SResultRowInfo* pResultRowInfo, SQuer } } -static int32_t getNumOfRowsInTimeWindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn, +static int32_t getNumOfRowsInTimeWindow(STaskRuntimeEnv* pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn, int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, bool updateLastKey) { assert(startPos >= 0 && startPos < pDataBlockInfo->rows); - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; STableQueryInfo* item = pRuntimeEnv->current; int32_t num = -1; @@ -867,9 +841,9 @@ static int32_t getNumOfRowsInTimeWindow(SQueryRuntimeEnv* pRuntimeEnv, SDataBloc return num; } -static void doApplyFunctions(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, STimeWindow* pWin, int32_t offset, +static void doApplyFunctions(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, STimeWindow* pWin, int32_t offset, int32_t forwardStep, TSKEY* tsCol, int32_t numOfTotal, int32_t numOfOutput) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; bool hasAggregates = pCtx[0].isAggSet; for (int32_t k = 0; k < numOfOutput; ++k) { @@ -904,7 +878,7 @@ static void doApplyFunctions(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx } } -static int32_t getNextQualifiedWindow(SQueryAttr* pQueryAttr, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, +static int32_t getNextQualifiedWindow(STaskAttr* pQueryAttr, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, TSKEY* primaryKeys, __block_search_fn_t searchFn, int32_t prevPosition) { getNextTimeWindow(pQueryAttr, pNext); @@ -983,7 +957,7 @@ static int32_t getNextQualifiedWindow(SQueryAttr* pQueryAttr, STimeWindow* pNext return startPos; } -static FORCE_INLINE TSKEY reviseWindowEkey(SQueryAttr *pQueryAttr, STimeWindow *pWindow) { +static FORCE_INLINE TSKEY reviseWindowEkey(STaskAttr *pQueryAttr, STimeWindow *pWindow) { TSKEY ekey = -1; if (QUERY_IS_ASC_QUERY(pQueryAttr)) { ekey = pWindow->ekey; @@ -1012,20 +986,20 @@ static void setNotInterpoWindowKey(SQLFunctionCtx* pCtx, int32_t numOfOutput, in } } -static void saveDataBlockLastRow(SQueryRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock, +static void saveDataBlockLastRow(STaskRuntimeEnv* pRuntimeEnv, SDataBlockInfo* pDataBlockInfo, SArray* pDataBlock, int32_t rowIndex) { if (pDataBlock == NULL) { return; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; for (int32_t k = 0; k < pQueryAttr->numOfCols; ++k) { SColumnInfoData *pColInfo = taosArrayGet(pDataBlock, k); memcpy(pRuntimeEnv->prevRow[k], ((char*)pColInfo->pData) + (pColInfo->info.bytes * rowIndex), pColInfo->info.bytes); } } -static TSKEY getStartTsKey(SQueryAttr* pQueryAttr, STimeWindow* win, const TSKEY* tsCols, int32_t rows) { +static TSKEY getStartTsKey(STaskAttr* pQueryAttr, STimeWindow* win, const TSKEY* tsCols, int32_t rows) { TSKEY ts = TSKEY_INITIAL_VAL; bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); @@ -1126,7 +1100,7 @@ static void doSetInputDataBlock(SOperatorInfo* pOperator, SQLFunctionCtx* pCtx, } static void doAggregateImpl(SOperatorInfo* pOperator, TSKEY startTs, SQLFunctionCtx* pCtx, SSDataBlock* pSDataBlock) { - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; for (int32_t k = 0; k < pOperator->numOfOutput; ++k) { if (functionNeedToExecute(pRuntimeEnv, &pCtx[k])) { @@ -1136,8 +1110,8 @@ static void doAggregateImpl(SOperatorInfo* pOperator, TSKEY startTs, SQLFunction } } -static void projectApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t numOfOutput) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +static void projectApplyFunctions(STaskRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t numOfOutput) { + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; for (int32_t k = 0; k < numOfOutput; ++k) { pCtx[k].startTs = pQueryAttr->window.skey; @@ -1161,7 +1135,7 @@ static void projectApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx void doTimeWindowInterpolation(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, SArray* pDataBlock, TSKEY prevTs, int32_t prevRowIndex, TSKEY curTs, int32_t curRowIndex, TSKEY windowKey, int32_t type) { - SQueryRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; SExprInfo* pExpr = pOperator->pExpr; SQLFunctionCtx* pCtx = pInfo->pCtx; @@ -1226,8 +1200,8 @@ void doTimeWindowInterpolation(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, static bool setTimeWindowInterpolationStartTs(SOperatorInfo* pOperatorInfo, SQLFunctionCtx* pCtx, int32_t pos, int32_t numOfRows, SArray* pDataBlock, const TSKEY* tsCols, STimeWindow* win) { - SQueryRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); @@ -1257,8 +1231,8 @@ static bool setTimeWindowInterpolationStartTs(SOperatorInfo* pOperatorInfo, SQLF static bool setTimeWindowInterpolationEndTs(SOperatorInfo* pOperatorInfo, SQLFunctionCtx* pCtx, int32_t endRowIndex, SArray* pDataBlock, const TSKEY* tsCols, TSKEY blockEkey, STimeWindow* win) { - SQueryRuntimeEnv *pRuntimeEnv = pOperatorInfo->pRuntimeEnv; - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskRuntimeEnv *pRuntimeEnv = pOperatorInfo->pRuntimeEnv; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t numOfOutput = pOperatorInfo->numOfOutput; TSKEY actualEndKey = tsCols[endRowIndex]; @@ -1289,8 +1263,8 @@ static bool setTimeWindowInterpolationEndTs(SOperatorInfo* pOperatorInfo, SQLFun static void doWindowBorderInterpolation(SOperatorInfo* pOperatorInfo, SSDataBlock* pBlock, SQLFunctionCtx* pCtx, SResultRow* pResult, STimeWindow* win, int32_t startPos, int32_t forwardStep) { - SQueryRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; if (!pQueryAttr->timeWindowInterpo) { return; } @@ -1340,9 +1314,9 @@ static void doWindowBorderInterpolation(SOperatorInfo* pOperatorInfo, SSDataBloc static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, SSDataBlock* pSDataBlock, int32_t tableGroupId) { STableIntervalOperatorInfo* pInfo = (STableIntervalOperatorInfo*) pOperatorInfo->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; int32_t numOfOutput = pOperatorInfo->numOfOutput; - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order); bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); @@ -1361,7 +1335,7 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul TSKEY ts = getStartTsKey(pQueryAttr, &pSDataBlock->info.window, tsCols, pSDataBlock->info.rows); STimeWindow win = getActiveTimeWindow(pResultRowInfo, ts, pQueryAttr); - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MAIN_SCAN(pRuntimeEnv); SResultRow* pResult = NULL; int32_t ret = setResultOutputBufByKey(pRuntimeEnv, pResultRowInfo, pSDataBlock->info.uid, &win, masterScan, &pResult, tableGroupId, pInfo->pCtx, @@ -1450,9 +1424,9 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul static void hashAllIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, SSDataBlock* pSDataBlock, int32_t tableGroupId) { STableIntervalOperatorInfo* pInfo = (STableIntervalOperatorInfo*) pOperatorInfo->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; int32_t numOfOutput = pOperatorInfo->numOfOutput; - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order); bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); @@ -1469,7 +1443,7 @@ static void hashAllIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pRe TSKEY ts = getStartTsKey(pQueryAttr, &pSDataBlock->info.window, tsCols, pSDataBlock->info.rows); STimeWindow win = getCurrentActiveTimeWindow(pResultRowInfo, ts, pQueryAttr); - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MAIN_SCAN(pRuntimeEnv); SResultRow* pResult = NULL; int32_t forwardStep = 0; @@ -1525,12 +1499,12 @@ static void hashAllIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pRe static void doHashGroupbyAgg(SOperatorInfo* pOperator, SGroupbyOperatorInfo *pInfo, SSDataBlock *pSDataBlock) { - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; STableQueryInfo* item = pRuntimeEnv->current; SColumnInfoData* pColInfoData = taosArrayGet(pSDataBlock->pDataBlock, pInfo->colIndex); - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int16_t bytes = pColInfoData->info.bytes; int16_t type = pColInfoData->info.type; @@ -1607,13 +1581,13 @@ static void doHashGroupbyAgg(SOperatorInfo* pOperator, SGroupbyOperatorInfo *pIn } static void doSessionWindowAggImpl(SOperatorInfo* pOperator, SSWindowOperatorInfo *pInfo, SSDataBlock *pSDataBlock) { - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; STableQueryInfo* item = pRuntimeEnv->current; // primary timestamp column SColumnInfoData* pColInfoData = taosArrayGet(pSDataBlock->pDataBlock, 0); - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MAIN_SCAN(pRuntimeEnv); SOptrBasicInfo* pBInfo = &pInfo->binfo; int64_t gap = pOperator->pRuntimeEnv->pQueryAttr->sw.gap; @@ -1692,7 +1666,7 @@ static void setResultRowKey(SResultRow* pResultRow, char* pData, int16_t type) { } } -static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *binfo, int32_t numOfCols, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) { +static int32_t setGroupResultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *binfo, int32_t numOfCols, char *pData, int16_t type, int16_t bytes, int32_t groupIndex) { SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; int32_t *rowCellInfoOffset = binfo->rowCellInfoOffset; @@ -1746,9 +1720,9 @@ static int32_t getGroupbyColumnIndex(SGroupbyExpr *pGroupbyExpr, SSDataBlock* pD return -1; } -static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) { +static bool functionNeedToExecute(STaskRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx) { struct SResultRowEntryInfo *pResInfo = GET_RES_INFO(pCtx); - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; // in case of timestamp column, always generated results. int32_t functionId = pCtx->functionId; @@ -1843,9 +1817,9 @@ static int32_t setCtxTagColumnInfo(SQLFunctionCtx *pCtx, int32_t numOfOutput) { return TSDB_CODE_SUCCESS; } -static SQLFunctionCtx* createSQLFunctionCtx(SQueryRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput, +static SQLFunctionCtx* createSQLFunctionCtx(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput, int32_t** rowCellInfoOffset) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; SQLFunctionCtx * pFuncCtx = (SQLFunctionCtx *)calloc(numOfOutput, sizeof(SQLFunctionCtx)); if (pFuncCtx == NULL) { @@ -1965,9 +1939,9 @@ static void* destroySQLFunctionCtx(SQLFunctionCtx* pCtx, int32_t numOfOutput) { return NULL; } -static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfTables, SArray* pOperator, void* merger) { +static int32_t setupQueryRuntimeEnv(STaskRuntimeEnv *pRuntimeEnv, int32_t numOfTables, SArray* pOperator, void* merger) { //qDebug("QInfo:0x%"PRIx64" setup runtime env", GET_QID(pRuntimeEnv)); - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; pRuntimeEnv->prevGroupId = INT32_MIN; pRuntimeEnv->pQueryAttr = pQueryAttr; @@ -2019,30 +1993,30 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOf // case OP_MultiTableTimeInterval: { // pRuntimeEnv->proot = // createMultiTableTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // break; // } // case OP_AllMultiTableTimeInterval: { // pRuntimeEnv->proot = // createAllMultiTableTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // break; // } // case OP_TimeWindow: { // pRuntimeEnv->proot = // createTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->upstream[0]->operatorType; +// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; // if (opType != OP_DummyInput && opType != OP_Join) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // } // break; // } // case OP_AllTimeWindow: { // pRuntimeEnv->proot = // createAllTimeIntervalOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->upstream[0]->operatorType; +// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; // if (opType != OP_DummyInput && opType != OP_Join) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // } // break; // } @@ -2050,34 +2024,34 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOf // pRuntimeEnv->proot = // createGroupbyOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); // -// int32_t opType = pRuntimeEnv->proot->upstream[0]->operatorType; +// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; // if (opType != OP_DummyInput) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // } // break; // } // case OP_SessionWindow: { // pRuntimeEnv->proot = // createSWindowOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->upstream[0]->operatorType; +// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; // if (opType != OP_DummyInput) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // } // break; // } // case OP_MultiTableAggregate: { // pRuntimeEnv->proot = // createMultiTableAggOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // break; // } // case OP_Aggregate: { // pRuntimeEnv->proot = // createAggregateOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); // -// int32_t opType = pRuntimeEnv->proot->upstream[0]->operatorType; +// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; // if (opType != OP_DummyInput && opType != OP_Join) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // } // break; // } @@ -2099,9 +2073,9 @@ static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOf // // case OP_StateWindow: { // pRuntimeEnv->proot = createStatewindowOperatorInfo(pRuntimeEnv, pRuntimeEnv->proot, pQueryAttr->pExpr1, pQueryAttr->numOfOutput); -// int32_t opType = pRuntimeEnv->proot->upstream[0]->operatorType; +// int32_t opType = pRuntimeEnv->proot->downstream[0]->operatorType; // if (opType != OP_DummyInput) { -// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->upstream[0]->info, pRuntimeEnv->proot); +// setTableScanFilterOperatorInfo(pRuntimeEnv->proot->downstream[0]->info, pRuntimeEnv->proot); // } // break; // } @@ -2187,8 +2161,8 @@ _clean: return TSDB_CODE_QRY_OUT_OF_MEMORY; } -static void doFreeQueryHandle(SQueryRuntimeEnv* pRuntimeEnv) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +static void doFreeQueryHandle(STaskRuntimeEnv* pRuntimeEnv) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; // tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle); pRuntimeEnv->pQueryHandle = NULL; @@ -2197,7 +2171,7 @@ static void doFreeQueryHandle(SQueryRuntimeEnv* pRuntimeEnv) { // assert(pMemRef->ref == 0 && pMemRef->snapshot.imem == NULL && pMemRef->snapshot.mem == NULL); } -static void destroyTsComp(SQueryRuntimeEnv *pRuntimeEnv, SQueryAttr *pQueryAttr) { +static void destroyTsComp(STaskRuntimeEnv *pRuntimeEnv, STaskAttr *pQueryAttr) { if (pQueryAttr->tsCompQuery && pRuntimeEnv->outputBuf && pRuntimeEnv->outputBuf->pDataBlock && taosArrayGetSize(pRuntimeEnv->outputBuf->pDataBlock) > 0) { SColumnInfoData* pColInfoData = taosArrayGet(pRuntimeEnv->outputBuf->pDataBlock, 0); if (pColInfoData) { @@ -2210,8 +2184,8 @@ static void destroyTsComp(SQueryRuntimeEnv *pRuntimeEnv, SQueryAttr *pQueryAttr) } } -static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +static void teardownQueryRuntimeEnv(STaskRuntimeEnv *pRuntimeEnv) { + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; SQInfo* pQInfo = (SQInfo*) pRuntimeEnv->qinfo; //qDebug("QInfo:0x%"PRIx64" teardown runtime env", pQInfo->qId); @@ -2271,7 +2245,7 @@ bool isQueryKilled(SQInfo *pQInfo) { void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED;} -//static bool isFixedOutputQuery(SQueryAttr* pQueryAttr) { +//static bool isFixedOutputQuery(STaskAttr* pQueryAttr) { // if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) { // return false; // } @@ -2297,7 +2271,7 @@ void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELL //} // todo refactor with isLastRowQuery -//bool isPointInterpoQuery(SQueryAttr *pQueryAttr) { +//bool isPointInterpoQuery(STaskAttr *pQueryAttr) { // for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { // int32_t functionId = pQueryAttr->pExpr1[i].base.functionId; // if (functionId == FUNCTION_INTERP) { @@ -2308,7 +2282,7 @@ void setQueryKilled(SQInfo *pQInfo) { pQInfo->code = TSDB_CODE_TSC_QUERY_CANCELL // return false; //} -static bool isFirstLastRowQuery(SQueryAttr *pQueryAttr) { +static bool isFirstLastRowQuery(STaskAttr *pQueryAttr) { for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { int32_t functionID = getExprFunctionId(&pQueryAttr->pExpr1[i]); if (functionID == FUNCTION_LAST_ROW) { @@ -2319,7 +2293,7 @@ static bool isFirstLastRowQuery(SQueryAttr *pQueryAttr) { return false; } -static bool isCachedLastQuery(SQueryAttr *pQueryAttr) { +static bool isCachedLastQuery(STaskAttr *pQueryAttr) { for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { int32_t functionId = getExprFunctionId(&pQueryAttr->pExpr1[i]); if (functionId == FUNCTION_LAST || functionId == FUNCTION_LAST_DST) { @@ -2354,7 +2328,7 @@ static bool isCachedLastQuery(SQueryAttr *pQueryAttr) { * The following 4 kinds of query are treated as the tags query * tagprj, tid_tag query, count(tbname), 'abc' (user defined constant value column) query */ -bool onlyQueryTags(SQueryAttr* pQueryAttr) { +bool onlyQueryTags(STaskAttr* pQueryAttr) { for(int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { SExprInfo* pExprInfo = &pQueryAttr->pExpr1[i]; @@ -2373,7 +2347,7 @@ bool onlyQueryTags(SQueryAttr* pQueryAttr) { ///////////////////////////////////////////////////////////////////////////////////////////// -void getAlignQueryTimeWindow(SQueryAttr *pQueryAttr, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) { +void getAlignQueryTimeWindow(STaskAttr *pQueryAttr, int64_t key, int64_t keyFirst, int64_t keyLast, STimeWindow *win) { assert(key >= keyFirst && key <= keyLast && pQueryAttr->interval.sliding <= pQueryAttr->interval.interval); win->skey = taosTimeTruncate(key, &pQueryAttr->interval, pQueryAttr->precision); @@ -2394,7 +2368,7 @@ void getAlignQueryTimeWindow(SQueryAttr *pQueryAttr, int64_t key, int64_t keyFir /* * todo add more parameters to check soon.. */ -bool colIdCheck(SQueryAttr *pQueryAttr, uint64_t qId) { +bool colIdCheck(STaskAttr *pQueryAttr, uint64_t qId) { // load data column information is incorrect for (int32_t i = 0; i < pQueryAttr->numOfCols - 1; ++i) { if (pQueryAttr->tableCols[i].colId == pQueryAttr->tableCols[i + 1].colId) { @@ -2408,7 +2382,7 @@ bool colIdCheck(SQueryAttr *pQueryAttr, uint64_t qId) { // todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which // the scan order is not matter -static bool onlyOneQueryType(SQueryAttr *pQueryAttr, int32_t functId, int32_t functIdDst) { +static bool onlyOneQueryType(STaskAttr *pQueryAttr, int32_t functId, int32_t functIdDst) { for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { int32_t functionId = getExprFunctionId(&pQueryAttr->pExpr1[i]); @@ -2425,13 +2399,13 @@ static bool onlyOneQueryType(SQueryAttr *pQueryAttr, int32_t functId, int32_t fu return true; } -static bool onlyFirstQuery(SQueryAttr *pQueryAttr) { return onlyOneQueryType(pQueryAttr, FUNCTION_FIRST, FUNCTION_FIRST_DST); } +static bool onlyFirstQuery(STaskAttr *pQueryAttr) { return onlyOneQueryType(pQueryAttr, FUNCTION_FIRST, FUNCTION_FIRST_DST); } -static bool onlyLastQuery(SQueryAttr *pQueryAttr) { return onlyOneQueryType(pQueryAttr, FUNCTION_LAST, FUNCTION_LAST_DST); } +static bool onlyLastQuery(STaskAttr *pQueryAttr) { return onlyOneQueryType(pQueryAttr, FUNCTION_LAST, FUNCTION_LAST_DST); } -static bool notContainSessionOrStateWindow(SQueryAttr *pQueryAttr) { return !(pQueryAttr->sw.gap > 0 || pQueryAttr->stateWindow); } +static bool notContainSessionOrStateWindow(STaskAttr *pQueryAttr) { return !(pQueryAttr->sw.gap > 0 || pQueryAttr->stateWindow); } -static int32_t updateBlockLoadStatus(SQueryAttr *pQuery, int32_t status) { +static int32_t updateBlockLoadStatus(STaskAttr *pQuery, int32_t status) { bool hasFirstLastFunc = false; bool hasOtherFunc = false; @@ -2465,7 +2439,7 @@ static int32_t updateBlockLoadStatus(SQueryAttr *pQuery, int32_t status) { return status; } -static void doUpdateLastKey(SQueryAttr* pQueryAttr) { +static void doUpdateLastKey(STaskAttr* pQueryAttr) { STimeWindow* win = &pQueryAttr->window; size_t num = taosArrayGetSize(pQueryAttr->tableGroupInfo.pGroupList); @@ -2485,7 +2459,7 @@ static void doUpdateLastKey(SQueryAttr* pQueryAttr) { } static void updateDataCheckOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool stableQuery) { - SQueryAttr* pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; + STaskAttr* pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; // in case of point-interpolation query, use asc order scan char msg[] = "QInfo:0x%"PRIx64" scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64 @@ -2580,8 +2554,8 @@ static void updateDataCheckOrder(SQInfo *pQInfo, SQueryTableMsg* pQueryMsg, bool } } -static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +static void getIntermediateBufInfo(STaskRuntimeEnv* pRuntimeEnv, int32_t* ps, int32_t* rowsize) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t MIN_ROWS_PER_PAGE = 4; *rowsize = (int32_t)(pQueryAttr->resultRowSize * getRowNumForMultioutput(pQueryAttr, pQueryAttr->topBotQuery, pQueryAttr->stableQuery)); @@ -2596,8 +2570,8 @@ static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, i #define IS_PREFILTER_TYPE(_t) ((_t) != TSDB_DATA_TYPE_BINARY && (_t) != TSDB_DATA_TYPE_NCHAR) -//static FORCE_INLINE bool doFilterByBlockStatistics(SQueryRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx, int32_t numOfRows) { -// SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +//static FORCE_INLINE bool doFilterByBlockStatistics(STaskRuntimeEnv* pRuntimeEnv, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx, int32_t numOfRows) { +// STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; // // if (pDataStatis == NULL || pQueryAttr->pFilters == NULL) { // return true; @@ -2606,7 +2580,7 @@ static void getIntermediateBufInfo(SQueryRuntimeEnv* pRuntimeEnv, int32_t* ps, i // return filterRangeExecute(pQueryAttr->pFilters, pDataStatis, pQueryAttr->numOfCols, numOfRows); //} -static bool overlapWithTimeWindow(SQueryAttr* pQueryAttr, SDataBlockInfo* pBlockInfo) { +static bool overlapWithTimeWindow(STaskAttr* pQueryAttr, SDataBlockInfo* pBlockInfo) { STimeWindow w = {0}; TSKEY sk = MIN(pQueryAttr->window.skey, pQueryAttr->window.ekey); @@ -2655,7 +2629,7 @@ static bool overlapWithTimeWindow(SQueryAttr* pQueryAttr, SDataBlockInfo* pBlock return false; } -static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, TSKEY key, bool ascQuery) { +static int32_t doTSJoinFilter(STaskRuntimeEnv *pRuntimeEnv, TSKEY key, bool ascQuery) { STSElem elem = tsBufGetElem(pRuntimeEnv->pTsBuf); #if defined(_DEBUG_VIEW) @@ -2781,7 +2755,7 @@ void doCompactSDataBlock(SSDataBlock* pBlock, int32_t numOfRows, int8_t* p) { } } -void filterRowsInDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols, +void filterRowsInDataBlock(STaskRuntimeEnv* pRuntimeEnv, SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols, SSDataBlock* pBlock, bool ascQuery) { int32_t numOfRows = pBlock->info.rows; @@ -2823,7 +2797,7 @@ void filterRowsInDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SSingleColumnFilterInf tfree(p); } -void filterColRowsInDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock, bool ascQuery) { +void filterColRowsInDataBlock(STaskRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock, bool ascQuery) { int32_t numOfRows = pBlock->info.rows; int8_t *p = NULL; @@ -2913,22 +2887,35 @@ void doSetFilterColumnInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFi } } -int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock, - uint32_t* status) { - *status = BLK_DATA_NO_NEEDED; - pBlock->pDataBlock = NULL; - pBlock->pBlockAgg = NULL; - - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; - int64_t groupId = pRuntimeEnv->current->groupIndex; - bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); - - SQInfo* pQInfo = pRuntimeEnv->qinfo; - SQueryCostInfo* pCost = &pQInfo->summary; +int32_t loadDataBlock(STaskInfo *pTaskInfo, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock, uint32_t* status) { + STaskCostInfo* pCost = &pTaskInfo->cost; pCost->totalBlocks += 1; pCost->totalRows += pBlock->info.rows; + pCost->totalCheckedRows += pBlock->info.rows; + pCost->loadBlocks += 1; + +// pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pQueryHandle, NULL); + if (pBlock->pDataBlock == NULL) { + return terrno; + } +} + +int32_t loadDataBlockOnDemand(STaskInfo *pTaskInfo, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock, uint32_t* status) { + *status = BLK_DATA_NO_NEEDED; + + pBlock->pDataBlock = NULL; + pBlock->pBlockAgg = NULL; + +// int64_t groupId = pRuntimeEnv->current->groupIndex; +// bool ascQuery = QUERY_IS_ASC_QUERY(pQueryAttr); + + STaskCostInfo* pCost = &pTaskInfo->cost; + + pCost->totalBlocks += 1; + pCost->totalRows += pBlock->info.rows; +#if 0 if (pRuntimeEnv->pTsBuf != NULL) { (*status) = BLK_DATA_ALL_NEEDED; @@ -2953,7 +2940,7 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTa // Calculate all time windows that are overlapping or contain current data block. // If current data block is contained by all possible time window, do not load current data block. if (/*pQueryAttr->pFilters || */pQueryAttr->groupbyColumn || pQueryAttr->sw.gap > 0 || - (QUERY_IS_INTERVAL_QUERY(pQueryAttr) && overlapWithTimeWindow(pQueryAttr, &pBlock->info))) { + (QUERY_IS_INTERVAL_QUERY(pQueryAttr) && overlapWithTimeWindow(pTaskInfo, &pBlock->info))) { (*status) = BLK_DATA_ALL_NEEDED; } @@ -2966,7 +2953,7 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTa if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) { SResultRow* pResult = NULL; - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MAIN_SCAN(pRuntimeEnv); TSKEY k = ascQuery? pBlock->info.window.skey : pBlock->info.window.ekey; STimeWindow win = getActiveTimeWindow(pTableScanInfo->pResultRowInfo, k, pQueryAttr); @@ -2995,7 +2982,7 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTa } SDataBlockInfo* pBlockInfo = &pBlock->info; - *status = updateBlockLoadStatus(pRuntimeEnv->pQueryAttr, *status); +// *status = updateBlockLoadStatus(pRuntimeEnv->pQueryAttr, *status); if ((*status) == BLK_DATA_NO_NEEDED || (*status) == BLK_DATA_DISCARD) { //qDebug("QInfo:0x%"PRIx64" data block discard, brange:%" PRId64 "-%" PRId64 ", rows:%d", pQInfo->qId, pBlockInfo->window.skey, @@ -3022,7 +3009,7 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTa if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) { SResultRow* pResult = NULL; - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MAIN_SCAN(pRuntimeEnv); TSKEY k = ascQuery? pBlock->info.window.skey : pBlock->info.window.ekey; STimeWindow win = getActiveTimeWindow(pTableScanInfo->pResultRowInfo, k, pQueryAttr); @@ -3074,7 +3061,7 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTa // filterColRowsInDataBlock(pRuntimeEnv, pBlock, ascQuery); // } } - +#endif return TSDB_CODE_SUCCESS; } @@ -3184,10 +3171,10 @@ static SColumnInfo* doGetTagColumnInfoById(SColumnInfo* pTagColList, int32_t num } void setTagValue(SOperatorInfo* pOperatorInfo, void *pTable, SQLFunctionCtx* pCtx, int32_t numOfOutput) { - SQueryRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperatorInfo->pRuntimeEnv; SExprInfo *pExpr = pOperatorInfo->pExpr; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; SExprInfo* pExprInfo = &pExpr[0]; int32_t functionId = getExprFunctionId(pExprInfo); @@ -3242,7 +3229,7 @@ void setTagValue(SOperatorInfo* pOperatorInfo, void *pTable, SQLFunctionCtx* pCt } } -void copyToSDataBlock(SQueryRuntimeEnv* pRuntimeEnv, int32_t threshold, SSDataBlock* pBlock, int32_t* offset) { +void copyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, int32_t threshold, SSDataBlock* pBlock, int32_t* offset) { SGroupResInfo* pGroupResInfo = &pRuntimeEnv->groupResInfo; pBlock->info.rows = 0; @@ -3293,9 +3280,8 @@ static void updateTableQueryInfoForReverseScan(STableQueryInfo *pTableQueryInfo) } } -static void setupQueryRangeForReverseScan(SQueryRuntimeEnv* pRuntimeEnv) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; - +static void setupQueryRangeForReverseScan(STableScanInfo* pTableScanInfo) { +#if 0 int32_t numOfGroups = (int32_t)(GET_NUM_OF_TABLEGROUP(pRuntimeEnv)); for(int32_t i = 0; i < numOfGroups; ++i) { SArray *group = GET_TABLEGROUP(pRuntimeEnv, i); @@ -3314,6 +3300,8 @@ static void setupQueryRangeForReverseScan(SQueryRuntimeEnv* pRuntimeEnv) { // assert(pCheckInfo->pTable == pTableKeyInfo->pTable); } } +#endif + } void switchCtxOrder(SQLFunctionCtx* pCtx, int32_t numOfOutput) { @@ -3337,7 +3325,7 @@ int32_t initResultRow(SResultRow *pResultRow) { * +------------+-------------------------------------------+-------------------------------------------+ * offset[0] offset[1] offset[2] */ -void setDefaultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *pInfo, int64_t uid, int32_t stage) { +void setDefaultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *pInfo, int64_t uid, int32_t stage) { SQLFunctionCtx* pCtx = pInfo->pCtx; SSDataBlock* pDataBlock = pInfo->pRes; int32_t* rowCellInfoOffset = pInfo->rowCellInfoOffset; @@ -3461,40 +3449,38 @@ void initCtxOutputBuffer(SQLFunctionCtx* pCtx, int32_t size) { } } -void setQueryStatus(SQueryRuntimeEnv *pRuntimeEnv, int8_t status) { +void setTaskStatus(STaskInfo *pTaskInfo, int8_t status) { if (status == QUERY_NOT_COMPLETED) { - pRuntimeEnv->status = status; + pTaskInfo->status = status; } else { // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first - CLEAR_QUERY_STATUS(pRuntimeEnv, QUERY_NOT_COMPLETED); - pRuntimeEnv->status |= status; + CLEAR_QUERY_STATUS(pTaskInfo, QUERY_NOT_COMPLETED); + pTaskInfo->status |= status; } } -static void setupEnvForReverseScan(SQueryRuntimeEnv *pRuntimeEnv, SResultRowInfo *pResultRowInfo, SQLFunctionCtx* pCtx, int32_t numOfOutput) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; - - if (pRuntimeEnv->pTsBuf) { - SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order); - bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf); - assert(ret); - } +static void setupEnvForReverseScan(STableScanInfo *pTableScanInfo, SQLFunctionCtx* pCtx, int32_t numOfOutput) { +// if (pRuntimeEnv->pTsBuf) { +// SWITCH_ORDER(pRuntimeEnv->pTsBuf->cur.order); +// bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf); +// assert(ret); +// } // reverse order time range - SWAP(pQueryAttr->window.skey, pQueryAttr->window.ekey, TSKEY); + SWAP(pTableScanInfo->window.skey, pTableScanInfo->window.ekey, TSKEY); - SET_REVERSE_SCAN_FLAG(pRuntimeEnv); - setQueryStatus(pRuntimeEnv, QUERY_NOT_COMPLETED); + SET_REVERSE_SCAN_FLAG(pTableScanInfo); +// setTaskStatus(pTableScanInfo, QUERY_NOT_COMPLETED); switchCtxOrder(pCtx, numOfOutput); - SWITCH_ORDER(pQueryAttr->order.order); - setupQueryRangeForReverseScan(pRuntimeEnv); + SWITCH_ORDER(pTableScanInfo->order); + setupQueryRangeForReverseScan(pTableScanInfo); } void finalizeQueryResult(SOperatorInfo* pOperator, SQLFunctionCtx* pCtx, SResultRowInfo* pResultRowInfo, int32_t* rowCellInfoOffset) { - SQueryRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t numOfOutput = pOperator->numOfOutput; if (pQueryAttr->groupbyColumn || QUERY_IS_INTERVAL_QUERY(pQueryAttr) || pQueryAttr->sw.gap > 0 || pQueryAttr->stateWindow) { @@ -3539,7 +3525,7 @@ void finalizeQueryResult(SOperatorInfo* pOperator, SQLFunctionCtx* pCtx, SResult } } -static bool hasMainOutput(SQueryAttr *pQueryAttr) { +static bool hasMainOutput(STaskAttr *pQueryAttr) { for (int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { int32_t functionId = getExprFunctionId(&pQueryAttr->pExpr1[i]); @@ -3551,7 +3537,7 @@ static bool hasMainOutput(SQueryAttr *pQueryAttr) { return false; } -STableQueryInfo *createTableQueryInfo(SQueryAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf) { +STableQueryInfo *createTableQueryInfo(STaskAttr* pQueryAttr, void* pTable, bool groupbyColumn, STimeWindow win, void* buf) { STableQueryInfo *pTableQueryInfo = buf; pTableQueryInfo->win = win; @@ -3602,7 +3588,7 @@ void destroyTableQueryInfoImpl(STableQueryInfo *pTableQueryInfo) { cleanupResultRowInfo(&pTableQueryInfo->resInfo); } -void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLFunctionCtx* pCtx, +void setResultRowOutputBufInitCtx(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowCellInfoOffset) { // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group SFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId); @@ -3635,7 +3621,7 @@ void setResultRowOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pRe } } -void doSetTableGroupOutputBuf(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, SQLFunctionCtx* pCtx, +void doSetTableGroupOutputBuf(STaskRuntimeEnv* pRuntimeEnv, SResultRowInfo* pResultRowInfo, SQLFunctionCtx* pCtx, int32_t* rowCellInfoOffset, int32_t numOfOutput, int32_t tableGroupId) { // for simple group by query without interval, all the tables belong to one group result. int64_t uid = 0; @@ -3659,7 +3645,7 @@ void doSetTableGroupOutputBuf(SQueryRuntimeEnv* pRuntimeEnv, SResultRowInfo* pRe setResultRowOutputBufInitCtx(pRuntimeEnv, pResultRow, pCtx, numOfOutput, rowCellInfoOffset); } -void setExecutionContext(SQueryRuntimeEnv* pRuntimeEnv, SOptrBasicInfo* pInfo, int32_t numOfOutput, int32_t tableGroupId, +void setExecutionContext(STaskRuntimeEnv* pRuntimeEnv, SOptrBasicInfo* pInfo, int32_t numOfOutput, int32_t tableGroupId, TSKEY nextKey) { STableQueryInfo *pTableQueryInfo = pRuntimeEnv->current; @@ -3675,7 +3661,7 @@ void setExecutionContext(SQueryRuntimeEnv* pRuntimeEnv, SOptrBasicInfo* pInfo, i pRuntimeEnv->prevGroupId = tableGroupId; } -void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLFunctionCtx* pCtx, +void setResultOutputBuf(STaskRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLFunctionCtx* pCtx, int32_t numOfCols, int32_t* rowCellInfoOffset) { // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group SFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pageId); @@ -3698,8 +3684,8 @@ void setResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SResultRow *pResult, SQLF } } -void setCtxTagForJoin(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, SExprInfo* pExprInfo, void* pTable) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +void setCtxTagForJoin(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, SExprInfo* pExprInfo, void* pTable) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; SSqlExpr* pExpr = &pExprInfo->base; // if (pQueryAttr->stableQuery && (pRuntimeEnv->pTsBuf != NULL) && @@ -3723,8 +3709,8 @@ void setCtxTagForJoin(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, SExpr // } } -int32_t setTimestampListJoinInfo(SQueryRuntimeEnv* pRuntimeEnv, SVariant* pTag, STableQueryInfo *pTableQueryInfo) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +int32_t setTimestampListJoinInfo(STaskRuntimeEnv* pRuntimeEnv, SVariant* pTag, STableQueryInfo *pTableQueryInfo) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; assert(pRuntimeEnv->pTsBuf != NULL); @@ -3766,9 +3752,9 @@ int32_t setTimestampListJoinInfo(SQueryRuntimeEnv* pRuntimeEnv, SVariant* pTag, } // TODO refactor: this funciton should be merged with setparamForStableStddevColumnData function. -void setParamForStableStddev(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExprInfo) { +void setParamForStableStddev(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExprInfo) { #if 0 - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t numOfExprs = pQueryAttr->numOfOutput; for(int32_t i = 0; i < numOfExprs; ++i) { @@ -3801,8 +3787,8 @@ void setParamForStableStddev(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx #endif } -void setParamForStableStddevByColData(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr, char* val, int16_t bytes) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; +void setParamForStableStddevByColData(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SExprInfo* pExpr, char* val, int16_t bytes) { + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; #if 0 int32_t numOfExprs = pQueryAttr->numOfOutput; for(int32_t i = 0; i < numOfExprs; ++i) { @@ -3842,8 +3828,8 @@ void setParamForStableStddevByColData(SQueryRuntimeEnv* pRuntimeEnv, SQLFunction * merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there * is a previous result generated or not. */ -void setIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, TSKEY key) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +void setIntervalQueryRange(STaskRuntimeEnv *pRuntimeEnv, TSKEY key) { + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; STableQueryInfo *pTableQueryInfo = pRuntimeEnv->current; SResultRowInfo *pResultRowInfo = &pTableQueryInfo->resInfo; @@ -3887,8 +3873,8 @@ void setIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, TSKEY key) { * @param result */ -static int32_t doCopyToSDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +static int32_t doCopyToSDataBlock(STaskRuntimeEnv* pRuntimeEnv, SGroupResInfo* pGroupResInfo, int32_t orderType, SSDataBlock* pBlock) { + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t numOfRows = getNumOfTotalRes(pGroupResInfo); int32_t numOfResult = pBlock->info.rows; // there are already exists result rows @@ -3946,7 +3932,7 @@ static int32_t doCopyToSDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SGroupResInfo* return 0; } -static void toSSDataBlock(SGroupResInfo *pGroupResInfo, SQueryRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock) { +static void toSSDataBlock(SGroupResInfo *pGroupResInfo, STaskRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock) { assert(pGroupResInfo->currentGroup <= pGroupResInfo->totalGroup); pBlock->info.rows = 0; @@ -3954,7 +3940,7 @@ static void toSSDataBlock(SGroupResInfo *pGroupResInfo, SQueryRuntimeEnv* pRunti return; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t orderType = TSDB_ORDER_ASC;//(pQueryAttr->pGroupbyExpr != NULL) ? pQueryAttr->pGroupbyExpr->orderType : TSDB_ORDER_ASC; doCopyToSDataBlock(pRuntimeEnv, pGroupResInfo, orderType, pBlock); @@ -3969,9 +3955,9 @@ static void toSSDataBlock(SGroupResInfo *pGroupResInfo, SQueryRuntimeEnv* pRunti } } -static void updateNumOfRowsInResultRows(SQueryRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, +static void updateNumOfRowsInResultRows(STaskRuntimeEnv* pRuntimeEnv, SQLFunctionCtx* pCtx, int32_t numOfOutput, SResultRowInfo* pResultRowInfo, int32_t* rowCellInfoOffset) { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; // update the number of result for each, only update the number of rows for the corresponding window result. if (QUERY_IS_INTERVAL_QUERY(pQueryAttr)) { @@ -4000,8 +3986,8 @@ static int32_t compressQueryColData(SColumnInfoData *pColRes, int32_t numOfRows, } static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data, int8_t compressed, int32_t *compLen) { - SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; SSDataBlock* pRes = pRuntimeEnv->outputBuf; @@ -4070,7 +4056,7 @@ static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data // Check if query is completed or not for stable query or normal table query respectively. if (Q_STATUS_EQUAL(pRuntimeEnv->status, QUERY_COMPLETED) && pRuntimeEnv->proot->status == OP_EXEC_DONE) { - setQueryStatus(pRuntimeEnv, QUERY_OVER); +// setTaskStatus(pOperator->pTaskInfo, QUERY_OVER); } } @@ -4187,8 +4173,8 @@ void calculateOperatorProfResults(SQInfo* pQInfo) { } void queryCostStatis(SQInfo *pQInfo) { - SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; - SQueryCostInfo *pSummary = &pQInfo->summary; + STaskRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + STaskCostInfo *pSummary = &pQInfo->summary; uint64_t hashSize = taosHashGetMemSize(pQInfo->runtimeEnv.pResultRowHashTable); hashSize += taosHashGetMemSize(pRuntimeEnv->tableqinfoGroupInfo.map); @@ -4226,8 +4212,8 @@ void queryCostStatis(SQInfo *pQInfo) { } } -//static void updateOffsetVal(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) { -// SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +//static void updateOffsetVal(STaskRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo) { +// STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; // STableQueryInfo* pTableQueryInfo = pRuntimeEnv->current; // // int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQueryAttr->order.order); @@ -4262,8 +4248,8 @@ void queryCostStatis(SQInfo *pQInfo) { // pBlockInfo->window.skey, pBlockInfo->window.ekey, pBlockInfo->rows, numOfRes, pQuery->current->lastKey); //} -//void skipBlocks(SQueryRuntimeEnv *pRuntimeEnv) { -// SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +//void skipBlocks(STaskRuntimeEnv *pRuntimeEnv) { +// STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; // // if (pQueryAttr->limit.offset <= 0 || pQueryAttr->numOfFilterCols > 0) { // return; @@ -4301,8 +4287,8 @@ void queryCostStatis(SQInfo *pQInfo) { // } //} -//static TSKEY doSkipIntervalProcess(SQueryRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) { -// SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +//static TSKEY doSkipIntervalProcess(STaskRuntimeEnv* pRuntimeEnv, STimeWindow* win, SDataBlockInfo* pBlockInfo, STableQueryInfo* pTableQueryInfo) { +// STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; // SResultRowInfo *pWindowResInfo = &pRuntimeEnv->resultRowInfo; // // assert(pQueryAttr->limit.offset == 0); @@ -4352,8 +4338,8 @@ void queryCostStatis(SQInfo *pQInfo) { // return true; //} -//static bool skipTimeInterval(SQueryRuntimeEnv *pRuntimeEnv, TSKEY* start) { -// SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +//static bool skipTimeInterval(STaskRuntimeEnv *pRuntimeEnv, TSKEY* start) { +// STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; // if (QUERY_IS_ASC_QUERY(pQueryAttr)) { // assert(*start <= pRuntimeEnv->current->lastKey); // } else { @@ -4463,18 +4449,18 @@ void queryCostStatis(SQInfo *pQInfo) { //} void appendUpstream(SOperatorInfo* p, SOperatorInfo* pUpstream) { - if (p->upstream == NULL) { - assert(p->numOfUpstream == 0); + if (p->pDownstream == NULL) { + assert(p->numOfDownstream == 0); } - p->upstream = realloc(p->upstream, POINTER_BYTES * (p->numOfUpstream + 1)); - p->upstream[p->numOfUpstream++] = pUpstream; + p->pDownstream = realloc(p->pDownstream, POINTER_BYTES * (p->numOfDownstream + 1)); + p->pDownstream[p->numOfDownstream++] = pUpstream; } static void doDestroyTableQueryInfo(STableGroupInfo* pTableqinfoGroupInfo); -static int32_t setupQueryHandle(void* tsdb, SQueryRuntimeEnv* pRuntimeEnv, int64_t qId, bool isSTableQuery) { - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; +static int32_t setupQueryHandle(void* tsdb, STaskRuntimeEnv* pRuntimeEnv, int64_t qId, bool isSTableQuery) { + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; #if 0 // TODO set the tags scan handle if (onlyQueryTags(pQueryAttr)) { @@ -4533,9 +4519,9 @@ static int32_t setupQueryHandle(void* tsdb, SQueryRuntimeEnv* pRuntimeEnv, int64 int32_t doInitQInfo(SQInfo* pQInfo, STSBuf* pTsBuf, void* tsdb, void* sourceOptr, int32_t tbScanner, SArray* pOperator, void* param) { - SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + STaskRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; - SQueryAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; + STaskAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; pQueryAttr->tsdb = tsdb; if (tsdb != NULL) { @@ -4616,25 +4602,25 @@ int32_t doInitQInfo(SQInfo* pQInfo, STSBuf* pTsBuf, void* tsdb, void* sourceOptr return code; } - setQueryStatus(pRuntimeEnv, QUERY_NOT_COMPLETED); +// setTaskStatus(pOperator->pTaskInfo, QUERY_NOT_COMPLETED); return TSDB_CODE_SUCCESS; } -static void doTableQueryInfoTimeWindowCheck(SQueryAttr* pQueryAttr, STableQueryInfo* pTableQueryInfo) { - if (QUERY_IS_ASC_QUERY(pQueryAttr)) { +static void doTableQueryInfoTimeWindowCheck(STaskInfo* pTaskInfo, STableQueryInfo* pTableQueryInfo, int32_t order) { + if (order == TSDB_ORDER_ASC) { assert( (pTableQueryInfo->win.skey <= pTableQueryInfo->win.ekey) && - (pTableQueryInfo->lastKey >= pTableQueryInfo->win.skey) && - (pTableQueryInfo->win.skey >= pQueryAttr->window.skey && pTableQueryInfo->win.ekey <= pQueryAttr->window.ekey)); + (pTableQueryInfo->lastKey >= pTaskInfo->window.skey) && + (pTableQueryInfo->win.skey >= pTaskInfo->window.skey && pTableQueryInfo->win.ekey <= pTaskInfo->window.ekey)); } else { assert( (pTableQueryInfo->win.skey >= pTableQueryInfo->win.ekey) && - (pTableQueryInfo->lastKey <= pTableQueryInfo->win.skey) && - (pTableQueryInfo->win.skey <= pQueryAttr->window.skey && pTableQueryInfo->win.ekey >= pQueryAttr->window.ekey)); + (pTableQueryInfo->lastKey <= pTaskInfo->window.skey) && + (pTableQueryInfo->win.skey <= pTaskInfo->window.skey && pTableQueryInfo->win.ekey >= pTaskInfo->window.ekey)); } } -//STsdbQueryCond createTsdbQueryCond(SQueryAttr* pQueryAttr, STimeWindow* win) { +//STsdbQueryCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win) { // STsdbQueryCond cond = { // .colList = pQueryAttr->tableCols, // .order = pQueryAttr->order.order, @@ -4676,7 +4662,7 @@ static STableIdInfo createTableIdInfo(STableQueryInfo* pTableQueryInfo) { // } //} -static void doCloseAllTimeWindow(SQueryRuntimeEnv* pRuntimeEnv) { +static void doCloseAllTimeWindow(STaskRuntimeEnv* pRuntimeEnv) { size_t numOfGroup = GET_NUM_OF_TABLEGROUP(pRuntimeEnv); for (int32_t i = 0; i < numOfGroup; ++i) { SArray* group = GET_TABLEGROUP(pRuntimeEnv, i); @@ -4693,44 +4679,37 @@ static SSDataBlock* doTableScanImpl(void* param, bool* newgroup) { SOperatorInfo *pOperator = (SOperatorInfo*) param; STableScanInfo *pTableScanInfo = pOperator->info; + STaskInfo *pTaskInfo = pOperator->pTaskInfo; + SSDataBlock *pBlock = &pTableScanInfo->block; - SQueryRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; - STableGroupInfo *pTableGroupInfo = &pOperator->pRuntimeEnv->tableqinfoGroupInfo; + STableGroupInfo *pTableGroupInfo = &pOperator->pTaskInfo->tableqinfoGroupInfo; *newgroup = false; -#if 0 - while (tsdbNextDataBlock(pTableScanInfo->pQueryHandle)) { + + while (/*tsdbNextDataBlock(pTableScanInfo->pQueryHandle)*/1) { if (isQueryKilled(pOperator->pRuntimeEnv->qinfo)) { longjmp(pOperator->pRuntimeEnv->env, TSDB_CODE_TSC_QUERY_CANCELLED); } pTableScanInfo->numOfBlocks += 1; - tsdbRetrieveDataBlockInfo(pTableScanInfo->pQueryHandle, &pBlock->info); +// tsdbRetrieveDataBlockInfo(pTableScanInfo->pQueryHandle, &pBlock->info); // todo opt - if (pTableGroupInfo->numOfTables > 1 || (pRuntimeEnv->current == NULL && pTableGroupInfo->numOfTables == 1)) { - STableQueryInfo** pTableQueryInfo = - (STableQueryInfo**)taosHashGet(pTableGroupInfo->map, &pBlock->info.uid, sizeof(pBlock->info.uid)); - if (pTableQueryInfo == NULL) { - break; - } - - pRuntimeEnv->current = *pTableQueryInfo; - doTableQueryInfoTimeWindowCheck(pQueryAttr, *pTableQueryInfo); - - if (pRuntimeEnv->enableGroupData) { - if(pTableScanInfo->prevGroupId != -1 && pTableScanInfo->prevGroupId != (*pTableQueryInfo)->groupIndex) { - *newgroup = true; - } - } - - pTableScanInfo->prevGroupId = (*pTableQueryInfo)->groupIndex; - } +// if (pTableGroupInfo->numOfTables > 1 || (pRuntimeEnv->current == NULL && pTableGroupInfo->numOfTables == 1)) { +// STableQueryInfo** pTableQueryInfo = +// (STableQueryInfo**)taosHashGet(pTableGroupInfo->map, &pBlock->info.uid, sizeof(pBlock->info.uid)); +// if (pTableQueryInfo == NULL) { +// break; +// } +// +// pRuntimeEnv->current = *pTableQueryInfo; +// doTableQueryInfoTimeWindowCheck(pTaskInfo, *pTableQueryInfo, pTableScanInfo->order); +// } // this function never returns error? uint32_t status; - int32_t code = loadDataBlockOnDemand(pOperator->pRuntimeEnv, pTableScanInfo, pBlock, &status); + int32_t code = loadDataBlock(pTaskInfo, pTableScanInfo, pBlock, &status); +// int32_t code = loadDataBlockOnDemand(pOperator->pRuntimeEnv, pTableScanInfo, pBlock, &status); if (code != TSDB_CODE_SUCCESS) { longjmp(pOperator->pRuntimeEnv->env, code); } @@ -4742,7 +4721,6 @@ static SSDataBlock* doTableScanImpl(void* param, bool* newgroup) { return pBlock; } -#endif return NULL; } @@ -4750,9 +4728,8 @@ static SSDataBlock* doTableScanImpl(void* param, bool* newgroup) { static SSDataBlock* doTableScan(void* param, bool *newgroup) { SOperatorInfo* pOperator = (SOperatorInfo*) param; - STableScanInfo *pTableScanInfo = pOperator->info; - SQueryRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STableScanInfo *pTableScanInfo = pOperator->info; + STaskInfo *pTaskInfo = pOperator->pTaskInfo; SResultRowInfo* pResultRowInfo = pTableScanInfo->pResultRowInfo; *newgroup = false; @@ -4775,14 +4752,14 @@ static SSDataBlock* doTableScan(void* param, bool *newgroup) { // STsdbQueryCond cond = createTsdbQueryCond(pQueryAttr, &pQueryAttr->window); // tsdbResetQueryHandle(pTableScanInfo->pQueryHandle, &cond); - setQueryStatus(pRuntimeEnv, QUERY_NOT_COMPLETED); - pRuntimeEnv->scanFlag = REPEAT_SCAN; - - if (pRuntimeEnv->pTsBuf) { - bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf); - assert(ret); - } + setTaskStatus(pTaskInfo, QUERY_NOT_COMPLETED); + pTableScanInfo->scanFlag = REPEAT_SCAN; +// if (pTaskInfo->pTsBuf) { +// bool ret = tsBufNextPos(pRuntimeEnv->pTsBuf); +// assert(ret); +// } +// if (pResultRowInfo->size > 0) { pResultRowInfo->curPos = 0; } @@ -4792,17 +4769,15 @@ static SSDataBlock* doTableScan(void* param, bool *newgroup) { } SSDataBlock *p = NULL; + // todo refactor if (pTableScanInfo->reverseTimes > 0) { - setupEnvForReverseScan(pRuntimeEnv, pTableScanInfo->pResultRowInfo, pTableScanInfo->pCtx, pTableScanInfo->numOfOutput); - + setupEnvForReverseScan(pTableScanInfo, pTableScanInfo->pCtx, pTableScanInfo->numOfOutput); // STsdbQueryCond cond = createTsdbQueryCond(pQueryAttr, &pQueryAttr->window); // tsdbResetQueryHandle(pTableScanInfo->pQueryHandle, &cond); //qDebug("QInfo:0x%"PRIx64" start to reverse scan data blocks due to query func required, qrange:%" PRId64 "-%" PRId64, // GET_QID(pRuntimeEnv), cond.twindow.skey, cond.twindow.ekey); - pRuntimeEnv->scanFlag = REVERSE_SCAN; - pTableScanInfo->times = 1; pTableScanInfo->current = 0; pTableScanInfo->reverseTimes = 0; @@ -4867,31 +4842,31 @@ static SSDataBlock* doBlockInfoScan(void* param, bool* newgroup) { } -SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime) { - assert(repeatTime > 0); +SOperatorInfo* createTableScanOperator(void* pTsdbQueryHandle, int32_t order, int32_t numOfOutput, int32_t repeatTime) { + assert(repeatTime > 0 && numOfOutput > 0); STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); - pInfo->pQueryHandle = pTsdbQueryHandle; - pInfo->times = repeatTime; - pInfo->reverseTimes = 0; - pInfo->order = pRuntimeEnv->pQueryAttr->order.order; - pInfo->current = 0; -// pInfo->prevGroupId = -1; + pInfo->pQueryHandle = pTsdbQueryHandle; + pInfo->times = repeatTime; + pInfo->reverseTimes = 0; + pInfo->order = order; + pInfo->current = 0; + pInfo->scanFlag = MAIN_SCAN; SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); pOperator->name = "TableScanOperator"; -// pOperator->operatorType = OP_TableScan; + pOperator->operatorType = OP_TableScan; pOperator->blockingOptr = false; pOperator->status = OP_IN_EXECUTING; pOperator->info = pInfo; - pOperator->numOfOutput = pRuntimeEnv->pQueryAttr->numOfCols; - pOperator->pRuntimeEnv = pRuntimeEnv; + pOperator->numOfOutput = numOfOutput; + pOperator->pRuntimeEnv = NULL; pOperator->exec = doTableScan; return pOperator; } -SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv) { +SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv) { STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); pInfo->pQueryHandle = pTsdbQueryHandle; @@ -4915,7 +4890,7 @@ SOperatorInfo* createTableSeqScanOperator(void* pTsdbQueryHandle, SQueryRuntimeE return pOperator; } -SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv) { +SOperatorInfo* createTableBlockInfoScanOperator(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv) { STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); pInfo->pQueryHandle = pTsdbQueryHandle; @@ -4998,7 +4973,7 @@ void setTableScanFilterOperatorInfo(STableScanInfo* pTableScanInfo, SOperatorInf } -SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime) { +SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, STaskRuntimeEnv* pRuntimeEnv, int32_t repeatTime, int32_t reverseTime) { assert(repeatTime > 0); STableScanInfo* pInfo = calloc(1, sizeof(STableScanInfo)); @@ -5019,7 +4994,7 @@ SOperatorInfo* createDataBlocksOptScanInfo(void* pTsdbQueryHandle, SQueryRuntime return pOptr; } -SArray* getOrderCheckColumns(SQueryAttr* pQuery) { +SArray* getOrderCheckColumns(STaskAttr* pQuery) { int32_t numOfCols = (pQuery->pGroupbyExpr == NULL)? 0: taosArrayGetSize(pQuery->pGroupbyExpr->columnInfo); SArray* pOrderColumns = NULL; @@ -5058,7 +5033,7 @@ SArray* getOrderCheckColumns(SQueryAttr* pQuery) { return pOrderColumns; } -SArray* getResultGroupCheckColumns(SQueryAttr* pQuery) { +SArray* getResultGroupCheckColumns(STaskAttr* pQuery) { int32_t numOfCols = (pQuery->pGroupbyExpr == NULL)? 0 : taosArrayGetSize(pQuery->pGroupbyExpr->columnInfo); SArray* pOrderColumns = NULL; @@ -5109,7 +5084,7 @@ static void destroySlimitOperatorInfo(void* param, int32_t numOfOutput) { tfree(pInfo->prevRow); } -SOperatorInfo* createGlobalAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, +SOperatorInfo* createGlobalAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, void* param, SArray* pUdfInfo, bool groupResultMixedUp) { SMultiwayMergeInfo* pInfo = calloc(1, sizeof(SMultiwayMergeInfo)); @@ -5172,12 +5147,12 @@ SOperatorInfo* createGlobalAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, pOperator->exec = doGlobalAggregate; pOperator->cleanup = destroyGlobalAggOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo *createMultiwaySortOperatorInfo(SQueryRuntimeEnv *pRuntimeEnv, SExprInfo *pExpr, int32_t numOfOutput, +SOperatorInfo *createMultiwaySortOperatorInfo(STaskRuntimeEnv *pRuntimeEnv, SExprInfo *pExpr, int32_t numOfOutput, int32_t numOfRows, void *merger) { SMultiwayMergeInfo* pInfo = calloc(1, sizeof(SMultiwayMergeInfo)); @@ -5252,9 +5227,9 @@ static SSDataBlock* doSort(void* param, bool* newgroup) { SSDataBlock* pBlock = NULL; while(1) { - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); - pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); + pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); // start to flush data into disk and try do multiway merge sort if (pBlock == NULL) { @@ -5288,7 +5263,7 @@ static SSDataBlock* doSort(void* param, bool* newgroup) { return (pInfo->pDataBlock->info.rows > 0)? pInfo->pDataBlock:NULL; } -SOperatorInfo *createOrderOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal) { +SOperatorInfo *createOrderOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, SOrder* pOrderVal) { SOrderOperatorInfo* pInfo = calloc(1, sizeof(SOrderOperatorInfo)); { @@ -5321,7 +5296,7 @@ SOperatorInfo *createOrderOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorI pOperator->cleanup = destroyOrderOperatorInfo; pOperator->pRuntimeEnv = pRuntimeEnv; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } @@ -5339,17 +5314,17 @@ static SSDataBlock* doAggregate(void* param, bool* newgroup) { SAggOperatorInfo* pAggInfo = pOperator->info; SOptrBasicInfo* pInfo = &pAggInfo->binfo; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5359,8 +5334,8 @@ static SSDataBlock* doAggregate(void* param, bool* newgroup) { setTagValue(pOperator, pRuntimeEnv->current->pTable, pInfo->pCtx, pOperator->numOfOutput); } -// if (upstream->operatorType == OP_DataBlocksOptScan) { -// STableScanInfo* pScanInfo = upstream->info; +// if (downstream->operatorType == OP_DataBlocksOptScan) { +// STableScanInfo* pScanInfo = downstream->info; // order = getTableScanOrder(pScanInfo); // } @@ -5386,7 +5361,7 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { SAggOperatorInfo* pAggInfo = pOperator->info; SOptrBasicInfo* pInfo = &pAggInfo->binfo; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->pRes); @@ -5398,15 +5373,15 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { return pInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5414,8 +5389,8 @@ static SSDataBlock* doSTableAggregate(void* param, bool* newgroup) { setTagValue(pOperator, pRuntimeEnv->current->pTable, pInfo->pCtx, pOperator->numOfOutput); -// if (upstream->operatorType == OP_DataBlocksOptScan) { -// STableScanInfo* pScanInfo = upstream->info; +// if (downstream->operatorType == OP_DataBlocksOptScan) { +// STableScanInfo* pScanInfo = downstream->info; // order = getTableScanOrder(pScanInfo); // } @@ -5455,7 +5430,7 @@ static SSDataBlock* doProjectOperation(void* param, bool* newgroup) { SOperatorInfo* pOperator = (SOperatorInfo*) param; SProjectOperatorInfo* pProjectInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; SOptrBasicInfo *pInfo = &pProjectInfo->binfo; SSDataBlock* pRes = pInfo->pRes; @@ -5492,16 +5467,16 @@ static SSDataBlock* doProjectOperation(void* param, bool* newgroup) { while(1) { bool prevVal = *newgroup; - // The upstream exec may change the value of the newgroup, so use a local variable instead. - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); + // The downstream exec may change the value of the newgroup, so use a local variable instead. + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { assert(*newgroup == false); *newgroup = prevVal; - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); break; } @@ -5547,13 +5522,13 @@ static SSDataBlock* doLimit(void* param, bool* newgroup) { } SLimitOperatorInfo* pInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; SSDataBlock* pBlock = NULL; while (1) { - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); - pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); + pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { doSetOperatorCompleted(pOperator); @@ -5599,12 +5574,12 @@ static SSDataBlock* doFilter(void* param, bool* newgroup) { } SFilterOperatorInfo* pCondInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; while (1) { - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock *pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock *pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5631,7 +5606,7 @@ static SSDataBlock* doIntervalAgg(void* param, bool* newgroup) { STableIntervalOperatorInfo* pIntervalInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainDataInCurrentGroup(&pRuntimeEnv->groupResInfo)) { @@ -5641,16 +5616,16 @@ static SSDataBlock* doIntervalAgg(void* param, bool* newgroup) { return pIntervalInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; STimeWindow win = pQueryAttr->window; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5669,7 +5644,7 @@ static SSDataBlock* doIntervalAgg(void* param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; closeAllResultRows(&pIntervalInfo->resultRowInfo); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); finalizeQueryResult(pOperator, pIntervalInfo->pCtx, &pIntervalInfo->resultRowInfo, pIntervalInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pIntervalInfo->resultRowInfo); @@ -5690,7 +5665,7 @@ static SSDataBlock* doAllIntervalAgg(void* param, bool* newgroup) { STableIntervalOperatorInfo* pIntervalInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pIntervalInfo->pRes); @@ -5701,16 +5676,16 @@ static SSDataBlock* doAllIntervalAgg(void* param, bool* newgroup) { return pIntervalInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; STimeWindow win = pQueryAttr->window; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5729,7 +5704,7 @@ static SSDataBlock* doAllIntervalAgg(void* param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; closeAllResultRows(&pIntervalInfo->resultRowInfo); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); finalizeQueryResult(pOperator, pIntervalInfo->pCtx, &pIntervalInfo->resultRowInfo, pIntervalInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pIntervalInfo->resultRowInfo); @@ -5749,7 +5724,7 @@ static SSDataBlock* doSTableIntervalAgg(void* param, bool* newgroup) { } STableIntervalOperatorInfo* pIntervalInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { int64_t st = taosGetTimestampUs(); @@ -5765,15 +5740,15 @@ static SSDataBlock* doSTableIntervalAgg(void* param, bool* newgroup) { return pIntervalInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5792,7 +5767,7 @@ static SSDataBlock* doSTableIntervalAgg(void* param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; pQueryAttr->order.order = order; // TODO : restore the order doCloseAllTimeWindow(pRuntimeEnv); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); if (pIntervalInfo->pRes->info.rows == 0 || !hasRemainData(&pRuntimeEnv->groupResInfo)) { @@ -5809,7 +5784,7 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { } STableIntervalOperatorInfo* pIntervalInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); @@ -5820,15 +5795,15 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { return pIntervalInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5847,7 +5822,7 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; pQueryAttr->order.order = order; // TODO : restore the order doCloseAllTimeWindow(pRuntimeEnv); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); int64_t st = taosGetTimestampUs(); copyToSDataBlock(pRuntimeEnv, 3000, pIntervalInfo->pRes, pIntervalInfo->rowCellInfoOffset); @@ -5862,13 +5837,13 @@ static SSDataBlock* doAllSTableIntervalAgg(void* param, bool* newgroup) { } static void doStateWindowAggImpl(SOperatorInfo* pOperator, SStateWindowOperatorInfo *pInfo, SSDataBlock *pSDataBlock) { - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; STableQueryInfo* item = pRuntimeEnv->current; SColumnInfoData* pColInfoData = taosArrayGet(pSDataBlock->pDataBlock, pInfo->colIndex); SOptrBasicInfo* pBInfo = &pInfo->binfo; - bool masterScan = IS_MASTER_SCAN(pRuntimeEnv); + bool masterScan = IS_MAIN_SCAN(pRuntimeEnv); int16_t bytes = pColInfoData->info.bytes; int16_t type = pColInfoData->info.type; @@ -5945,7 +5920,7 @@ static SSDataBlock* doStateWindowAgg(void *param, bool* newgroup) { SStateWindowOperatorInfo* pWindowInfo = pOperator->info; SOptrBasicInfo* pBInfo = &pWindowInfo->binfo; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); @@ -5956,14 +5931,14 @@ static SSDataBlock* doStateWindowAgg(void *param, bool* newgroup) { return pBInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t order = pQueryAttr->order.order; STimeWindow win = pQueryAttr->window; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while (1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; @@ -5981,7 +5956,7 @@ static SSDataBlock* doStateWindowAgg(void *param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; closeAllResultRows(&pBInfo->resultRowInfo); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); finalizeQueryResult(pOperator, pBInfo->pCtx, &pBInfo->resultRowInfo, pBInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pBInfo->resultRowInfo); @@ -6004,7 +5979,7 @@ static SSDataBlock* doSessionWindowAgg(void* param, bool* newgroup) { SOptrBasicInfo* pBInfo = &pWindowInfo->binfo; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pBInfo->pRes); @@ -6015,17 +5990,17 @@ static SSDataBlock* doSessionWindowAgg(void* param, bool* newgroup) { return pBInfo->pRes; } - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; //pQueryAttr->order.order = TSDB_ORDER_ASC; int32_t order = pQueryAttr->order.order; STimeWindow win = pQueryAttr->window; - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; } @@ -6041,7 +6016,7 @@ static SSDataBlock* doSessionWindowAgg(void* param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; closeAllResultRows(&pBInfo->resultRowInfo); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); +// setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); finalizeQueryResult(pOperator, pBInfo->pCtx, &pBInfo->resultRowInfo, pBInfo->rowCellInfoOffset); initGroupResInfo(&pRuntimeEnv->groupResInfo, &pBInfo->resultRowInfo); @@ -6062,7 +6037,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { SGroupbyOperatorInfo *pInfo = pOperator->info; - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; if (pOperator->status == OP_RES_TO_RETURN) { toSSDataBlock(&pRuntimeEnv->groupResInfo, pRuntimeEnv, pInfo->binfo.pRes); @@ -6073,12 +6048,12 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { return pInfo->binfo.pRes; } - SOperatorInfo* upstream = pOperator->upstream[0]; + SOperatorInfo* downstream = pOperator->pDownstream[0]; while(1) { - publishOperatorProfEvent(upstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = upstream->exec(upstream, newgroup); - publishOperatorProfEvent(upstream, QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(downstream, QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = downstream->exec(downstream, newgroup); + publishOperatorProfEvent(downstream, QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { break; } @@ -6095,7 +6070,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { pOperator->status = OP_RES_TO_RETURN; closeAllResultRows(&pInfo->binfo.resultRowInfo); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); +// setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); if (!pRuntimeEnv->pQueryAttr->stableQuery) { // finalize include the update of result rows finalizeQueryResult(pOperator, pInfo->binfo.pCtx, &pInfo->binfo.resultRowInfo, pInfo->binfo.rowCellInfoOffset); @@ -6117,7 +6092,7 @@ static SSDataBlock* hashGroupbyAggregate(void* param, bool* newgroup) { return pInfo->binfo.pRes; } -static void doHandleRemainBlockForNewGroupImpl(SFillOperatorInfo *pInfo, SQueryRuntimeEnv* pRuntimeEnv, bool* newgroup) { +static void doHandleRemainBlockForNewGroupImpl(SFillOperatorInfo *pInfo, STaskRuntimeEnv* pRuntimeEnv, bool* newgroup) { pInfo->totalInputRows = pInfo->existNewGroupBlock->info.rows; int64_t ekey = Q_STATUS_EQUAL(pRuntimeEnv->status, QUERY_COMPLETED)?pRuntimeEnv->pQueryAttr->window.ekey:pInfo->existNewGroupBlock->info.window.ekey; taosResetFillInfo(pInfo->pFillInfo, getFillInfoStart(pInfo->pFillInfo)); @@ -6130,7 +6105,7 @@ static void doHandleRemainBlockForNewGroupImpl(SFillOperatorInfo *pInfo, SQueryR *newgroup = true; } -static void doHandleRemainBlockFromNewGroup(SFillOperatorInfo *pInfo, SQueryRuntimeEnv *pRuntimeEnv, bool *newgroup) { +static void doHandleRemainBlockFromNewGroup(SFillOperatorInfo *pInfo, STaskRuntimeEnv *pRuntimeEnv, bool *newgroup) { if (taosFillHasMoreResults(pInfo->pFillInfo)) { *newgroup = false; doFillTimeIntervalGapsInResults(pInfo->pFillInfo, pInfo->pRes, (int32_t)pRuntimeEnv->resultInfo.capacity, pInfo->p); @@ -6155,16 +6130,16 @@ static SSDataBlock* doFill(void* param, bool* newgroup) { return NULL; } - SQueryRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv *pRuntimeEnv = pOperator->pRuntimeEnv; doHandleRemainBlockFromNewGroup(pInfo, pRuntimeEnv, newgroup); if (pInfo->pRes->info.rows > pRuntimeEnv->resultInfo.threshold || (!pInfo->multigroupResult && pInfo->pRes->info.rows > 0)) { return pInfo->pRes; } while(1) { - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); - SSDataBlock* pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); + SSDataBlock* pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); if (*newgroup) { assert(pBlock != NULL); @@ -6220,7 +6195,7 @@ static SSDataBlock* doFill(void* param, bool* newgroup) { } // todo set the attribute of query scan count -static int32_t getNumOfScanTimes(SQueryAttr* pQueryAttr) { +static int32_t getNumOfScanTimes(STaskAttr* pQueryAttr) { for(int32_t i = 0; i < pQueryAttr->numOfOutput; ++i) { int32_t functionId = getExprFunctionId(&pQueryAttr->pExpr1[i]); if (functionId == FUNCTION_STDDEV || functionId == FUNCTION_PERCT) { @@ -6240,23 +6215,23 @@ static void destroyOperatorInfo(SOperatorInfo* pOperator) { pOperator->cleanup(pOperator->info, pOperator->numOfOutput); } - if (pOperator->upstream != NULL) { - for(int32_t i = 0; i < pOperator->numOfUpstream; ++i) { - destroyOperatorInfo(pOperator->upstream[i]); + if (pOperator->pDownstream != NULL) { + for(int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + destroyOperatorInfo(pOperator->pDownstream[i]); } - tfree(pOperator->upstream); - pOperator->numOfUpstream = 0; + tfree(pOperator->pDownstream); + pOperator->numOfDownstream = 0; } tfree(pOperator->info); tfree(pOperator); } -SOperatorInfo* createAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createAggregateOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SAggOperatorInfo* pInfo = calloc(1, sizeof(SAggOperatorInfo)); - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; int32_t numOfRows = (int32_t)(getRowNumForMultioutput(pQueryAttr, pQueryAttr->topBotQuery, pQueryAttr->stableQuery)); pInfo->binfo.pRes = createOutputBuf(pExpr, numOfOutput, numOfRows); @@ -6265,7 +6240,7 @@ SOperatorInfo* createAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOpera initResultRowInfo(&pInfo->binfo.resultRowInfo, 8, TSDB_DATA_TYPE_INT); pInfo->seed = rand(); - setDefaultOutputBuf(pRuntimeEnv, &pInfo->binfo, pInfo->seed, MASTER_SCAN); + setDefaultOutputBuf(pRuntimeEnv, &pInfo->binfo, pInfo->seed, MAIN_SCAN); SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); pOperator->name = "TableAggregate"; @@ -6279,7 +6254,7 @@ SOperatorInfo* createAggregateOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOpera pOperator->exec = doAggregate; pOperator->cleanup = destroyAggOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } @@ -6353,7 +6328,7 @@ static void destroyDistinctOperatorInfo(void* param, int32_t numOfOutput) { pInfo->pRes = destroyOutputBuf(pInfo->pRes); } -SOperatorInfo* createMultiTableAggOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createMultiTableAggOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SAggOperatorInfo* pInfo = calloc(1, sizeof(SAggOperatorInfo)); size_t tableGroup = GET_NUM_OF_TABLEGROUP(pRuntimeEnv); @@ -6374,12 +6349,12 @@ SOperatorInfo* createMultiTableAggOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SO pOperator->exec = doSTableAggregate; pOperator->cleanup = destroyAggOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createProjectOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createProjectOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SProjectOperatorInfo* pInfo = calloc(1, sizeof(SProjectOperatorInfo)); pInfo->seed = rand(); @@ -6390,7 +6365,7 @@ SOperatorInfo* createProjectOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperato pBInfo->pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pBInfo->rowCellInfoOffset); initResultRowInfo(&pBInfo->resultRowInfo, 8, TSDB_DATA_TYPE_INT); - setDefaultOutputBuf(pRuntimeEnv, pBInfo, pInfo->seed, MASTER_SCAN); + setDefaultOutputBuf(pRuntimeEnv, pBInfo, pInfo->seed, MAIN_SCAN); SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); pOperator->name = "ProjectOperator"; @@ -6404,7 +6379,7 @@ SOperatorInfo* createProjectOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperato pOperator->exec = doProjectOperation; pOperator->cleanup = destroyProjectOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } @@ -6442,7 +6417,7 @@ SColumnInfo* extractColumnFilterInfo(SExprInfo* pExpr, int32_t numOfOutput, int3 return 0; } -SOperatorInfo* createFilterOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, +SOperatorInfo* createFilterOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, SColumnInfo* pCols, int32_t numOfFilter) { SFilterOperatorInfo* pInfo = calloc(1, sizeof(SFilterOperatorInfo)); @@ -6462,12 +6437,12 @@ SOperatorInfo* createFilterOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperator pOperator->info = pInfo; pOperator->pRuntimeEnv = pRuntimeEnv; pOperator->cleanup = destroyConditionOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream) { +SOperatorInfo* createLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream) { SLimitOperatorInfo* pInfo = calloc(1, sizeof(SLimitOperatorInfo)); pInfo->limit = pRuntimeEnv->pQueryAttr->limit.limit; @@ -6480,12 +6455,12 @@ SOperatorInfo* createLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorI pOperator->exec = doLimit; pOperator->info = pInfo; pOperator->pRuntimeEnv = pRuntimeEnv; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { STableIntervalOperatorInfo* pInfo = calloc(1, sizeof(STableIntervalOperatorInfo)); pInfo->pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->rowCellInfoOffset); @@ -6505,12 +6480,12 @@ SOperatorInfo* createTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOp pOperator->exec = doIntervalAgg; pOperator->cleanup = destroyBasicOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createAllTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createAllTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { STableIntervalOperatorInfo* pInfo = calloc(1, sizeof(STableIntervalOperatorInfo)); pInfo->pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->rowCellInfoOffset); @@ -6530,11 +6505,11 @@ SOperatorInfo* createAllTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, pOperator->exec = doAllIntervalAgg; pOperator->cleanup = destroyBasicOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createStatewindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createStatewindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SStateWindowOperatorInfo* pInfo = calloc(1, sizeof(SStateWindowOperatorInfo)); pInfo->colIndex = -1; pInfo->reptScan = false; @@ -6554,10 +6529,10 @@ SOperatorInfo* createStatewindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOpe pOperator->exec = doStateWindowAgg; pOperator->cleanup = destroyStateWindowOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createSWindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createSWindowOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SSWindowOperatorInfo* pInfo = calloc(1, sizeof(SSWindowOperatorInfo)); pInfo->binfo.pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->binfo.rowCellInfoOffset); @@ -6579,11 +6554,11 @@ SOperatorInfo* createSWindowOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperato pOperator->exec = doSessionWindowAgg; pOperator->cleanup = destroySWindowOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { STableIntervalOperatorInfo* pInfo = calloc(1, sizeof(STableIntervalOperatorInfo)); pInfo->pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->rowCellInfoOffset); @@ -6603,11 +6578,11 @@ SOperatorInfo* createMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRunti pOperator->exec = doSTableIntervalAgg; pOperator->cleanup = destroyBasicOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { STableIntervalOperatorInfo* pInfo = calloc(1, sizeof(STableIntervalOperatorInfo)); pInfo->pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->rowCellInfoOffset); @@ -6627,20 +6602,20 @@ SOperatorInfo* createAllMultiTableTimeIntervalOperatorInfo(SQueryRuntimeEnv* pRu pOperator->exec = doAllSTableIntervalAgg; pOperator->cleanup = destroyBasicOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createGroupbyOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createGroupbyOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SGroupbyOperatorInfo* pInfo = calloc(1, sizeof(SGroupbyOperatorInfo)); pInfo->colIndex = -1; // group by column index pInfo->binfo.pCtx = createSQLFunctionCtx(pRuntimeEnv, pExpr, numOfOutput, &pInfo->binfo.rowCellInfoOffset); - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; pQueryAttr->resultRowSize = (pQueryAttr->resultRowSize * (int32_t)(getRowNumForMultioutput(pQueryAttr, pQueryAttr->topBotQuery, pQueryAttr->stableQuery))); @@ -6660,17 +6635,17 @@ SOperatorInfo* createGroupbyOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperato pOperator->exec = hashGroupbyAggregate; pOperator->cleanup = destroyGroupbyOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createFillOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult) { +SOperatorInfo* createFillOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, bool multigroupResult) { SFillOperatorInfo* pInfo = calloc(1, sizeof(SFillOperatorInfo)); pInfo->pRes = createOutputBuf(pExpr, numOfOutput, pRuntimeEnv->resultInfo.capacity); pInfo->multigroupResult = multigroupResult; { - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; struct SFillColInfo* pColInfo = createFillColInfo(pExpr, numOfOutput, pQueryAttr->fillVal); STimeWindow w = TSWINDOW_INITIALIZER; @@ -6699,14 +6674,14 @@ SOperatorInfo* createFillOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorIn pOperator->exec = doFill; pOperator->cleanup = destroySFillOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } -SOperatorInfo* createSLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput, void* pMerger, bool multigroupResult) { +SOperatorInfo* createSLimitOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput, void* pMerger, bool multigroupResult) { SSLimitOperatorInfo* pInfo = calloc(1, sizeof(SSLimitOperatorInfo)); - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; pInfo->orderColumnList = getResultGroupCheckColumns(pQueryAttr); pInfo->slimit = pQueryAttr->slimit; @@ -6747,7 +6722,7 @@ SOperatorInfo* createSLimitOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperator pOperator->pRuntimeEnv = pRuntimeEnv; pOperator->cleanup = destroySlimitOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } @@ -6758,7 +6733,7 @@ static SSDataBlock* doTagScan(void* param, bool* newgroup) { return NULL; } - SQueryRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; + STaskRuntimeEnv* pRuntimeEnv = pOperator->pRuntimeEnv; int32_t maxNumOfTables = (int32_t)pRuntimeEnv->resultInfo.capacity; STagScanInfo *pInfo = pOperator->info; @@ -6770,7 +6745,7 @@ static SSDataBlock* doTagScan(void* param, bool* newgroup) { int32_t functionId = getExprFunctionId(&pOperator->pExpr[0]); if (functionId == FUNCTION_TID_TAG) { // return the tags & table Id - SQueryAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr* pQueryAttr = pRuntimeEnv->pQueryAttr; assert(pQueryAttr->numOfOutput == 1); SExprInfo* pExprInfo = &pOperator->pExpr[0]; @@ -6874,7 +6849,7 @@ static SSDataBlock* doTagScan(void* param, bool* newgroup) { } if (pOperator->status == OP_EXEC_DONE) { - setQueryStatus(pOperator->pRuntimeEnv, QUERY_COMPLETED); + setTaskStatus(pOperator->pRuntimeEnv, QUERY_COMPLETED); } pRes->info.rows = count; @@ -6883,7 +6858,7 @@ static SSDataBlock* doTagScan(void* param, bool* newgroup) { #endif } -SOperatorInfo* createTagScanOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createTagScanOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SExprInfo* pExpr, int32_t numOfOutput) { STagScanInfo* pInfo = calloc(1, sizeof(STagScanInfo)); pInfo->pRes = createOutputBuf(pExpr, numOfOutput, pRuntimeEnv->resultInfo.capacity); @@ -6968,9 +6943,9 @@ static SSDataBlock* hashDistinct(void* param, bool* newgroup) { SSDataBlock* pBlock = NULL; while(1) { - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); - pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); - publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); + pBlock = pOperator->pDownstream[0]->exec(pOperator->pDownstream[0], newgroup); + publishOperatorProfEvent(pOperator->pDownstream[0], QUERY_PROF_AFTER_OPERATOR_EXEC); if (pBlock == NULL) { doSetOperatorCompleted(pOperator); @@ -7021,7 +6996,7 @@ static SSDataBlock* hashDistinct(void* param, bool* newgroup) { return (pInfo->pRes->info.rows > 0)? pInfo->pRes:NULL; } -SOperatorInfo* createDistinctOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { +SOperatorInfo* createDistinctOperatorInfo(STaskRuntimeEnv* pRuntimeEnv, SOperatorInfo* downstream, SExprInfo* pExpr, int32_t numOfOutput) { SDistinctOperatorInfo* pInfo = calloc(1, sizeof(SDistinctOperatorInfo)); pInfo->totalBytes = 0; pInfo->buf = NULL; @@ -7045,7 +7020,7 @@ SOperatorInfo* createDistinctOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperat pOperator->pExpr = pExpr; pOperator->cleanup = destroyDistinctOperatorInfo; - appendUpstream(pOperator, upstream); + appendUpstream(pOperator, downstream); return pOperator; } @@ -7199,6 +7174,92 @@ static int32_t deserializeColFilterInfo(SColumnFilterInfo* pColFilters, int16_t return TSDB_CODE_SUCCESS; } +/** + * { + "Id": { + "QueryId": 20, + "TemplateId": 0, + "SubplanId": 0 + }, + "Node": { + "Name": "TableScan", + "InputSchema": [{ + "Type": 9, + "ColId": 1, + "Bytes": 8 + }, { + "Type": 4, + "ColId": 2, + "Bytes": 4 + }, { + "Type": 8, + "ColId": 3, + "Bytes": 20 + }], + "TableScan": { + "TableId": 1, + "TableType": 3, + "Flag": 0, + "Window": { + "StartKey": 0, + "EndKey": 0 + } + } + }, + "DataSink": { + "Name": "Dispatch", + "Dispatch": { + } + } +} + */ +int32_t parseTaskInfo(const char* msg, int32_t len) { + cJSON* pJson = cJSON_Parse(msg); + if (NULL == pJson) { + return TSDB_CODE_INVALID_MSG; + } + + cJSON* pSub = cJSON_GetObjectItem(pJson, "ID"); + if (NULL != pSub) { + printf("Id : %s\n", pSub->valuestring); + } + + cJSON* pNode = cJSON_GetObjectItem(pJson, "Node"); + if (pNode == NULL) { + return TSDB_CODE_INVALID_MSG; + } + + cJSON* pNodeName = cJSON_GetObjectItem(pNode, "name"); + if (pNodeName == NULL) { + return TSDB_CODE_INVALID_MSG; + } + + printf("node name is: %s\n", pNodeName->valuestring); + + cJSON* pNodeSchema = cJSON_GetObjectItem(pNode, "InputSchema"); + if (pNodeSchema == NULL) { + return TSDB_CODE_INVALID_MSG; + } + + cJSON* pOperator = cJSON_GetObjectItem(pNode, pNodeName->valuestring); + if (pOperator == NULL) { + return TSDB_CODE_INVALID_MSG; + } + + cJSON* pTableId = cJSON_GetObjectItem(pOperator, "tableId"); + if (pTableId == NULL) { + return TSDB_CODE_INVALID_MSG; + } + + cJSON* pTimeWindow = cJSON_GetObjectItem(pOperator, "window"); + if (pTimeWindow == NULL) { + return TSDB_CODE_INVALID_MSG; + } + + + +} + /** * pQueryMsg->head has been converted before this function is called. * @@ -7207,7 +7268,7 @@ static int32_t deserializeColFilterInfo(SColumnFilterInfo* pColFilters, int16_t * @param pExpr * @return */ -int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, SQueryParam* param) { +int32_t convertQueryMsg(SQueryTableMsg *pQueryMsg, STaskParam* param) { int32_t code = TSDB_CODE_SUCCESS; // if (taosCheckVersion(pQueryMsg->version, version, 3) != 0) { @@ -7924,7 +7985,7 @@ void* doDestroyFilterInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFil return NULL; } -int32_t createFilterInfo(SQueryAttr* pQueryAttr, uint64_t qId) { +int32_t createFilterInfo(STaskAttr* pQueryAttr, uint64_t qId) { for (int32_t i = 0; i < pQueryAttr->numOfCols; ++i) { // if (pQueryAttr->tableCols[i].flist.numOfFilters > 0 && pQueryAttr->tableCols[i].flist.filterInfo != NULL) { // pQueryAttr->numOfFilterCols++; @@ -7943,7 +8004,7 @@ int32_t createFilterInfo(SQueryAttr* pQueryAttr, uint64_t qId) { return TSDB_CODE_SUCCESS; } -static void doUpdateExprColumnIndex(SQueryAttr *pQueryAttr) { +static void doUpdateExprColumnIndex(STaskAttr *pQueryAttr) { assert(pQueryAttr->pExpr1 != NULL && pQueryAttr != NULL); for (int32_t k = 0; k < pQueryAttr->numOfOutput; ++k) { @@ -7980,7 +8041,7 @@ static void doUpdateExprColumnIndex(SQueryAttr *pQueryAttr) { } } -void setResultBufSize(SQueryAttr* pQueryAttr, SRspResultInfo* pResultInfo) { +void setResultBufSize(STaskAttr* pQueryAttr, SRspResultInfo* pResultInfo) { const int32_t DEFAULT_RESULT_MSG_SIZE = 1024 * (1024 + 512); // the minimum number of rows for projection query @@ -8026,7 +8087,7 @@ SQInfo* createQInfoImpl(SQueryTableMsg* pQueryMsg, SGroupbyExpr* pGroupbyExpr, S // to make sure third party won't overwrite this structure pQInfo->signature = pQInfo; - SQueryAttr* pQueryAttr = &pQInfo->query; + STaskAttr* pQueryAttr = &pQInfo->query; pQInfo->runtimeEnv.pQueryAttr = pQueryAttr; pQueryAttr->tableGroupInfo = *pTableGroupInfo; @@ -8145,7 +8206,7 @@ SQInfo* createQInfoImpl(SQueryTableMsg* pQueryMsg, SGroupbyExpr* pGroupbyExpr, S pQueryAttr->window = pQueryMsg->window; updateDataCheckOrder(pQInfo, pQueryMsg, pQueryAttr->stableQuery); - SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; STimeWindow window = pQueryAttr->window; int32_t index = 0; @@ -8213,7 +8274,7 @@ _cleanup_qinfo: // filterFreeInfo(pFilters); _cleanup: - freeQInfo(pQInfo); + doDestroyTask(pQInfo); return NULL; } @@ -8231,14 +8292,14 @@ bool isValidQInfo(void *param) { return (sig == (uint64_t)pQInfo); } -int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, SQueryParam* param, char* start, +int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* pQInfo, STaskParam* param, char* start, int32_t prevResultLen, void* merger) { int32_t code = TSDB_CODE_SUCCESS; - SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; pRuntimeEnv->qinfo = pQInfo; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; STSBuf *pTsBuf = NULL; @@ -8271,7 +8332,7 @@ int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* (!QUERY_IS_ASC_QUERY(pQueryAttr) && (pQueryAttr->window.ekey > pQueryAttr->window.skey))) { //qDebug("QInfo:0x%"PRIx64" no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo->qId, pQueryAttr->window.skey, // pQueryAttr->window.ekey, pQueryAttr->order.order); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); +// setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); pRuntimeEnv->tableqinfoGroupInfo.numOfTables = 0; // todo free memory return TSDB_CODE_SUCCESS; @@ -8279,7 +8340,7 @@ int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* if (pRuntimeEnv->tableqinfoGroupInfo.numOfTables == 0) { //qDebug("QInfo:0x%"PRIx64" no table qualified for tag filter, abort query", pQInfo->qId); - setQueryStatus(pRuntimeEnv, QUERY_COMPLETED); +// setTaskStatus(pOperator->pTaskInfo, QUERY_COMPLETED); return TSDB_CODE_SUCCESS; } @@ -8292,7 +8353,7 @@ int32_t initQInfo(STsBufInfo* pTsBufInfo, void* tsdb, void* sourceOptr, SQInfo* _error: // table query ref will be decrease during error handling - freeQInfo(pQInfo); + doDestroyTask(pQInfo); return code; } @@ -8373,20 +8434,20 @@ void* freeColumnInfo(SColumnInfo* pColumnInfo, int32_t numOfCols) { return NULL; } -void freeQInfo(SQInfo *pQInfo) { +void doDestroyTask(SQInfo *pQInfo) { if (!isValidQInfo(pQInfo)) { return; } //qDebug("QInfo:0x%"PRIx64" start to free QInfo", pQInfo->qId); - SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; releaseQueryBuf(pRuntimeEnv->tableqinfoGroupInfo.numOfTables); doDestroyTableQueryInfo(&pRuntimeEnv->tableqinfoGroupInfo); teardownQueryRuntimeEnv(&pQInfo->runtimeEnv); - SQueryAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; + STaskAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; freeQueryAttr(pQueryAttr); // tsdbDestroyTableGroup(&pQueryAttr->tableGroupInfo); @@ -8407,8 +8468,8 @@ void freeQInfo(SQInfo *pQInfo) { int32_t doDumpQueryResult(SQInfo *pQInfo, char *data, int8_t compressed, int32_t *compLen) { // the remained number of retrieved rows, not the interpolated result - SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; - SQueryAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskAttr *pQueryAttr = pQInfo->runtimeEnv.pQueryAttr; // load data from file to msg buffer if (pQueryAttr->tsCompQuery) { @@ -8445,7 +8506,7 @@ int32_t doDumpQueryResult(SQInfo *pQInfo, char *data, int8_t compressed, int32_t // all data returned, set query over if (Q_STATUS_EQUAL(pRuntimeEnv->status, QUERY_COMPLETED)) { - setQueryStatus(pRuntimeEnv, QUERY_OVER); +// setTaskStatus(pOperator->pTaskInfo, QUERY_OVER); } } else { doCopyQueryResultToMsg(pQInfo, (int32_t)pRuntimeEnv->outputBuf->info.rows, data, compressed, compLen); @@ -8456,7 +8517,7 @@ int32_t doDumpQueryResult(SQInfo *pQInfo, char *data, int8_t compressed, int32_t if (pQueryAttr->limit.limit > 0 && pQueryAttr->limit.limit == pRuntimeEnv->resultInfo.total) { //qDebug("QInfo:0x%"PRIx64" results limitation reached, limitation:%"PRId64, pQInfo->qId, pQueryAttr->limit.limit); - setQueryStatus(pRuntimeEnv, QUERY_OVER); +// setTaskStatus(pOperator->pTaskInfo, QUERY_OVER); } return TSDB_CODE_SUCCESS; @@ -8535,8 +8596,8 @@ int32_t checkForQueryBuf(size_t numOfTables) { } bool checkNeedToCompressQueryCol(SQInfo *pQInfo) { - SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; - SQueryAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; + STaskRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; + STaskAttr *pQueryAttr = pRuntimeEnv->pQueryAttr; SSDataBlock* pRes = pRuntimeEnv->outputBuf; @@ -8569,7 +8630,7 @@ void releaseQueryBuf(size_t numOfTables) { atomic_add_fetch_64(&tsQueryBufferSizeBytes, t); } -void freeQueryAttr(SQueryAttr* pQueryAttr) { +void freeQueryAttr(STaskAttr* pQueryAttr) { if (pQueryAttr != NULL) { if (pQueryAttr->fillVal != NULL) { tfree(pQueryAttr->fillVal); diff --git a/source/libs/index/inc/indexInt.h b/source/libs/index/inc/indexInt.h index 378af4c1d1..90ad1e15f4 100644 --- a/source/libs/index/inc/indexInt.h +++ b/source/libs/index/inc/indexInt.h @@ -19,6 +19,7 @@ #include "index.h" #include "index_fst.h" #include "taos.h" +#include "tchecksum.h" #include "thash.h" #include "tlog.h" diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index d7363f2f4c..1e0a88e17f 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -34,6 +34,7 @@ typedef struct WriterCtx { int (*read)(struct WriterCtx* ctx, uint8_t* buf, int len); int (*flush)(struct WriterCtx* ctx); int (*readFrom)(struct WriterCtx* ctx, uint8_t* buf, int len, int32_t offset); + int (*size)(struct WriterCtx* ctx); WriterType type; union { struct { diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index d0b8fa4290..19e9375491 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -30,21 +30,20 @@ void* indexQhandle = NULL; -int32_t indexInit() { +void indexInit() { + // refactor later indexQhandle = taosInitScheduler(INDEX_QUEUE_SIZE, INDEX_NUM_OF_THREADS, "index"); - return indexQhandle == NULL ? -1 : 0; - // do nothing } -void indexCleanUp() { taosCleanUpScheduler(indexQhandle); } +void indexCleanUp() { + // refacto later + taosCleanUpScheduler(indexQhandle); +} static int uidCompare(const void* a, const void* b) { + // add more version compare uint64_t u1 = *(uint64_t*)a; uint64_t u2 = *(uint64_t*)b; - if (u1 == u2) { - return 0; - } else { - return u1 < u2 ? -1 : 1; - } + return u1 - u2; } typedef struct SIdxColInfo { int colId; // generated by index internal @@ -61,7 +60,7 @@ static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oTyp static int indexGenTFile(SIndex* index, IndexCache* cache, SArray* batch); int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { - // pthread_once(&isInit, indexInit); + pthread_once(&isInit, indexInit); SIndex* sIdx = calloc(1, sizeof(SIndex)); if (sIdx == NULL) { return -1; } diff --git a/source/libs/index/src/index_cache.c b/source/libs/index/src/index_cache.c index 8bc3776ed9..294c8192e8 100644 --- a/source/libs/index/src/index_cache.c +++ b/source/libs/index/src/index_cache.c @@ -21,7 +21,7 @@ #define MAX_INDEX_KEY_LEN 256 // test only, change later #define MEM_TERM_LIMIT 10 * 10000 -#define MEM_THRESHOLD 1024 * 1024 * 2 +#define MEM_THRESHOLD 1024 * 1024 #define MEM_ESTIMATE_RADIO 1.5 static void indexMemRef(MemTable* tbl); diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index bfaeeaaa33..4f782cef26 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -935,7 +935,10 @@ Fst* fstCreate(FstSlice* slice) { uint32_t checkSum = 0; len -= sizeof(checkSum); taosDecodeFixedU32(buf + len, &checkSum); - + if (taosCheckChecksum(buf, len, checkSum)) { + // verify fst + return NULL; + } CompiledAddr rootAddr; len -= sizeof(rootAddr); taosDecodeFixedU64(buf + len, &rootAddr); diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index 0763aae857..6db5555aa6 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -59,6 +59,13 @@ static int writeCtxDoReadFrom(WriterCtx* ctx, uint8_t* buf, int len, int32_t off } return nRead; } +static int writeCtxGetSize(WriterCtx* ctx) { + if (ctx->type == TFile && ctx->file.readOnly) { + // refactor later + return ctx->file.size; + } + return 0; +} static int writeCtxDoFlush(WriterCtx* ctx) { if (ctx->type == TFile) { // taosFsyncFile(ctx->file.fd); @@ -109,6 +116,7 @@ WriterCtx* writerCtxCreate(WriterType type, const char* path, bool readOnly, int ctx->read = writeCtxDoRead; ctx->flush = writeCtxDoFlush; ctx->readFrom = writeCtxDoReadFrom; + ctx->size = writeCtxGetSize; ctx->offset = 0; ctx->limit = capacity; @@ -159,6 +167,8 @@ int fstCountingWriterWrite(FstCountingWriter* write, uint8_t* buf, uint32_t len) int nWrite = ctx->write(ctx, buf, len); assert(nWrite == len); write->count += len; + + write->summer = taosCalcChecksum(write->summer, buf, len); return len; } int fstCountingWriterRead(FstCountingWriter* write, uint8_t* buf, uint32_t len) { @@ -169,7 +179,10 @@ int fstCountingWriterRead(FstCountingWriter* write, uint8_t* buf, uint32_t len) return nRead; } -uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter* write) { return 0; } +uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter* write) { + // opt + return write->summer; +} int fstCountingWriterFlush(FstCountingWriter* write) { WriterCtx* ctx = write->wrt; diff --git a/source/libs/index/src/index_tfile.c b/source/libs/index/src/index_tfile.c index 90a730d3a9..4b76402560 100644 --- a/source/libs/index/src/index_tfile.c +++ b/source/libs/index/src/index_tfile.c @@ -21,8 +21,11 @@ p * #include "index_fst_counting_writer.h" #include "index_util.h" #include "taosdef.h" +#include "tcoding.h" #include "tcompare.h" +const static uint64_t tfileMagicNumber = 0xdb4775248b80fb57ull; + typedef struct TFileFstIter { FstStreamBuilder* fb; StreamWithState* st; @@ -40,9 +43,12 @@ static void tfileSerialTableIdsToBuf(char* buf, SArray* tableIds); static int tfileWriteHeader(TFileWriter* writer); static int tfileWriteFstOffset(TFileWriter* tw, int32_t offset); static int tfileWriteData(TFileWriter* write, TFileValue* tval); +static int tfileWriteFooter(TFileWriter* write); +// handle file corrupt later static int tfileReaderLoadHeader(TFileReader* reader); static int tfileReaderLoadFst(TFileReader* reader); +static int tfileReaderVerify(TFileReader* reader); static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray* result); static SArray* tfileGetFileList(const char* path); @@ -71,7 +77,10 @@ TFileCache* tfileCacheCreate(const char* path) { } TFileReader* reader = tfileReaderCreate(wc); - if (reader == NULL) { goto End; } + if (reader == NULL) { + indexInfo("skip invalid file: %s", file); + continue; + } TFileHeader* header = &reader->header; ICacheKey key = {.suid = header->suid, .colName = header->colName, .nColName = strlen(header->colName)}; @@ -135,8 +144,14 @@ TFileReader* tfileReaderCreate(WriterCtx* ctx) { TFileReader* reader = calloc(1, sizeof(TFileReader)); if (reader == NULL) { return NULL; } - // T_REF_INC(reader); reader->ctx = ctx; + + if (0 != tfileReaderVerify(reader)) { + tfileReaderDestroy(reader); + indexError("invalid tfile, suid: %" PRIu64 ", colName: %s", reader->header.suid, reader->header.colName); + return NULL; + } + // T_REF_INC(reader); if (0 != tfileReaderLoadHeader(reader)) { tfileReaderDestroy(reader); indexError("failed to load index header, suid: %" PRIu64 ", colName: %s", reader->header.suid, @@ -293,6 +308,8 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) { fstBuilderFinish(tw->fb); fstBuilderDestroy(tw->fb); tw->fb = NULL; + + tfileWriteFooter(tw); return 0; } void tfileWriterClose(TFileWriter* tw) { @@ -499,6 +516,14 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) { } return 0; } +static int tfileWriteFooter(TFileWriter* write) { + char buf[sizeof(tfileMagicNumber) + 1] = {0}; + void* pBuf = (void*)buf; + taosEncodeFixedU64((void**)(void*)&pBuf, tfileMagicNumber); + int nwrite = write->ctx->write(write->ctx, buf, strlen(buf)); + assert(nwrite == sizeof(tfileMagicNumber)); + return nwrite; +} static int tfileReaderLoadHeader(TFileReader* reader) { // TODO simple tfile header later char buf[TFILE_HEADER_SIZE] = {0}; @@ -524,9 +549,14 @@ static int tfileReaderLoadFst(TFileReader* reader) { if (buf == NULL) { return -1; } WriterCtx* ctx = reader->ctx; - int32_t nread = ctx->readFrom(ctx, buf, FST_MAX_SIZE, reader->header.fstOffset); - indexInfo("nread = %d, and fst offset=%d, filename: %s, size: %d ", nread, reader->header.fstOffset, ctx->file.buf, - ctx->file.size); + int size = ctx->size(ctx); + + int64_t ts = taosGetTimestampUs(); + int32_t nread = + ctx->readFrom(ctx, buf, size - reader->header.fstOffset - sizeof(tfileMagicNumber), reader->header.fstOffset); + int64_t cost = taosGetTimestampUs() - ts; + indexInfo("nread = %d, and fst offset=%d, filename: %s, size: %d, time cost: %" PRId64 "us", nread, + reader->header.fstOffset, ctx->file.buf, ctx->file.size, cost); // we assuse fst size less than FST_MAX_SIZE assert(nread > 0 && nread < FST_MAX_SIZE); @@ -555,6 +585,25 @@ static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray* free(buf); return 0; } +static int tfileReaderVerify(TFileReader* reader) { + // just validate header and Footer, file corrupted also shuild be verified later + WriterCtx* ctx = reader->ctx; + + uint64_t tMagicNumber = 0; + + char buf[sizeof(tMagicNumber) + 1] = {0}; + int size = ctx->size(ctx); + + if (size < sizeof(tMagicNumber) || size <= sizeof(reader->header)) { + return -1; + } else if (ctx->readFrom(ctx, buf, sizeof(tMagicNumber), size - sizeof(tMagicNumber)) != sizeof(tMagicNumber)) { + return -1; + } + + taosDecodeFixedU64(buf, &tMagicNumber); + return tMagicNumber == tfileMagicNumber ? 0 : -1; +} + void tfileReaderRef(TFileReader* reader) { if (reader == NULL) { return; } int ref = T_REF_INC(reader); diff --git a/source/libs/index/test/fstTest.cc b/source/libs/index/test/fstTest.cc index 70671a5f3e..a2c0046f9a 100644 --- a/source/libs/index/test/fstTest.cc +++ b/source/libs/index/test/fstTest.cc @@ -1,4 +1,5 @@ +#include #include #include #include @@ -12,7 +13,6 @@ #include "index_tfile.h" #include "tskiplist.h" #include "tutil.h" - void* callback(void* s) { return s; } static std::string fileName = "/tmp/tindex.tindex"; @@ -293,7 +293,7 @@ void validateTFile(char* arg) { std::thread threads[NUM_OF_THREAD]; // std::vector threads; - TFileReader* reader = tfileReaderOpen(arg, 0, 999992, "tag1"); + TFileReader* reader = tfileReaderOpen(arg, 0, 20000000, "tag1"); for (int i = 0; i < NUM_OF_THREAD; i++) { threads[i] = std::thread(fst_get, reader->fst); @@ -306,13 +306,41 @@ void validateTFile(char* arg) { } tfCleanup(); } + +void iterTFileReader(char* path, char* ver) { + tfInit(); + + int version = atoi(ver); + TFileReader* reader = tfileReaderOpen(path, 0, version, "tag1"); + Iterate* iter = tfileIteratorCreate(reader); + bool tn = iter ? iter->next(iter) : false; + int count = 0; + int termCount = 0; + while (tn == true) { + count++; + IterateValue* cv = iter->getValue(iter); + termCount += (int)taosArrayGetSize(cv->val); + printf("col val: %s, size: %d\n", cv->colVal, (int)taosArrayGetSize(cv->val)); + tn = iter->next(iter); + } + printf("total size: %d\n term count: %d\n", count, termCount); + + tfileIteratorDestroy(iter); + tfCleanup(); +} + int main(int argc, char* argv[]) { // tool to check all kind of fst test // if (argc > 1) { validateTFile(argv[1]); } + if (argc > 2) { + // opt + iterTFileReader(argv[1], argv[2]); + } // checkFstCheckIterator(); // checkFstLongTerm(); // checkFstPrefixSearch(); - checkMillonWriteAndReadOfFst(); + // checkMillonWriteAndReadOfFst(); + return 1; } diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index 5438f88b76..4f3330b7b3 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -665,14 +665,19 @@ class IndexObj { size_t numOfTable = 100 * 10000) { std::string tColVal = colVal; size_t colValSize = tColVal.size(); + int skip = 100; + numOfTable /= skip; for (int i = 0; i < numOfTable; i++) { - tColVal[i % colValSize] = 'a' + i % 26; + for (int k = 0; k < 10 && k < colVal.size(); k++) { + // opt + tColVal[rand() % colValSize] = 'a' + k % 26; + } SIndexTerm* term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), tColVal.c_str(), tColVal.size()); SIndexMultiTerm* terms = indexMultiTermCreate(); indexMultiTermAdd(terms, term); - for (size_t i = 0; i < 10; i++) { - int ret = Put(terms, i); + for (size_t j = 0; j < skip; j++) { + int ret = Put(terms, j); assert(ret == 0); } indexMultiTermDestroy(terms); @@ -939,10 +944,11 @@ TEST_F(IndexEnv2, testIndex_read_performance) { TEST_F(IndexEnv2, testIndexMultiTag) { std::string path = "/tmp/multi_tag"; if (index->Init(path) != 0) {} - index->WriteMultiMillonData("tag1", "Hello", 100 * 10000); - index->WriteMultiMillonData("tag2", "Test", 100 * 10000); - index->WriteMultiMillonData("tag3", "Test", 100 * 10000); - index->WriteMultiMillonData("tag4", "Test", 100 * 10000); + int64_t st = taosGetTimestampUs(); + int32_t num = 1000 * 10000; + index->WriteMultiMillonData("tag1", "xxxxxxxxxxxxxxx", num); + std::cout << "numOfRow: " << num << "\ttime cost:" << taosGetTimestampUs() - st << std::endl; + // index->WriteMultiMillonData("tag2", "xxxxxxxxxxxxxxxxxxxxxxxxx", 100 * 10000); } TEST_F(IndexEnv2, testLongComVal) { std::string path = "/tmp/long_colVal"; diff --git a/source/libs/parser/inc/astGenerator.h b/source/libs/parser/inc/astGenerator.h index 22806969af..7f357a2bbd 100644 --- a/source/libs/parser/inc/astGenerator.h +++ b/source/libs/parser/inc/astGenerator.h @@ -123,7 +123,7 @@ typedef struct SCreatedTableInfo { SToken name; // table name token SToken stbName; // super table name token , for using clause SArray *pTagNames; // create by using super table, tag name - SArray *pTagVals; // create by using super table, tag value + SArray *pTagVals; // create by using super table, tag value. SArray char *fullname; // table full name int8_t igExist; // ignore if exists } SCreatedTableInfo; diff --git a/source/libs/parser/inc/astToMsg.h b/source/libs/parser/inc/astToMsg.h index 0d0769a040..1b7fe5ebc5 100644 --- a/source/libs/parser/inc/astToMsg.h +++ b/source/libs/parser/inc/astToMsg.h @@ -12,7 +12,7 @@ SShowReq* buildShowMsg(SShowInfo* pShowInfo, SParseBasicCtx* pParseCtx, char* ms SCreateDbMsg* buildCreateDbMsg(SCreateDbInfo* pCreateDbInfo, SParseBasicCtx *pCtx, SMsgBuf* pMsgBuf); SCreateStbMsg* buildCreateStbMsg(SCreateTableSql* pCreateTableSql, int32_t* len, SParseBasicCtx* pParseCtx, SMsgBuf* pMsgBuf); SDropStbMsg* buildDropStableMsg(SSqlInfo* pInfo, int32_t* len, SParseBasicCtx* pParseCtx, SMsgBuf* pMsgBuf); -SCreateDnodeMsg *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf); -SDropDnodeMsg *buildDropDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf); +SCreateDnodeReq *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf); +SDropDnodeReq *buildDropDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf); #endif // TDENGINE_ASTTOMSG_H diff --git a/source/libs/parser/inc/parserInt.h b/source/libs/parser/inc/parserInt.h index d18934d5f5..10ec335fc8 100644 --- a/source/libs/parser/inc/parserInt.h +++ b/source/libs/parser/inc/parserInt.h @@ -44,14 +44,14 @@ void clearAllTableMetaInfo(SQueryStmtInfo* pQueryInfo, bool removeMeta, uint64_t /** * Validate the sql info, according to the corresponding metadata info from catalog. - * @param pCatalog - * @param pSqlInfo - * @param pQueryInfo a bounded AST with essential meta data from local buffer or mgmt node - * @param id - * @param msg + * @param pCtx + * @param pInfo + * @param pQueryInfo + * @param msgBuf + * @param msgBufLen * @return */ -int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pSqlInfo, SQueryStmtInfo* pQueryInfo, int64_t id, char* msg, int32_t msgLen); +int32_t qParserValidateSqlNode(SParseBasicCtx *pCtx, SSqlInfo* pInfo, SQueryStmtInfo* pQueryInfo, char* msgBuf, int32_t msgBufLen); /** * validate the ddl ast, and convert the ast to the corresponding message format @@ -62,6 +62,14 @@ int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pSqlInfo, SQ */ SDclStmtInfo* qParserValidateDclSqlNode(SSqlInfo* pInfo, SParseBasicCtx* pCtx, char* msgBuf, int32_t msgBufLen); +/** + * + * @param pInfo + * @param pCtx + * @param msgBuf + * @param msgBufLen + * @return + */ SVnodeModifOpStmtInfo* qParserValidateCreateTbSqlNode(SSqlInfo* pInfo, SParseBasicCtx* pCtx, char* msgBuf, int32_t msgBufLen); /** @@ -90,7 +98,7 @@ int32_t checkForInvalidExpr(SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf); * @param msgBufLen * @return */ -int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SCatalogReq* pMetaInfo, char* msg, int32_t msgBufLen); +int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SCatalogReq* pMetaInfo, SParseBasicCtx *pCtx, char* msg, int32_t msgBufLen); /** * Destroy the meta data request structure. diff --git a/source/libs/parser/src/astGenerator.c b/source/libs/parser/src/astGenerator.c index 0cb3cea95f..34ed8bd355 100644 --- a/source/libs/parser/src/astGenerator.c +++ b/source/libs/parser/src/astGenerator.c @@ -686,7 +686,7 @@ void destroySqlNode(SSqlNode *pSqlNode) { void freeCreateTableInfo(void* p) { SCreatedTableInfo* pInfo = (SCreatedTableInfo*) p; taosArrayDestroy(pInfo->pTagNames); - taosArrayDestroyEx(pInfo->pTagVals, freeItem); + taosArrayDestroy(pInfo->pTagVals); tfree(pInfo->fullname); } diff --git a/source/libs/parser/src/astToMsg.c b/source/libs/parser/src/astToMsg.c index 0c1af8dc3c..1ae45556b4 100644 --- a/source/libs/parser/src/astToMsg.c +++ b/source/libs/parser/src/astToMsg.c @@ -335,7 +335,7 @@ SDropStbMsg* buildDropStableMsg(SSqlInfo* pInfo, int32_t* len, SParseBasicCtx* p return pDropTableMsg; } -SCreateDnodeMsg *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf) { +SCreateDnodeReq *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf) { const char* msg1 = "invalid host name (name too long, maximum length 128)"; const char* msg2 = "dnode name can not be string"; const char* msg3 = "port should be an integer that is less than 65535 and greater than 0"; @@ -367,7 +367,7 @@ SCreateDnodeMsg *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMs return NULL; } - SCreateDnodeMsg *pCreate = (SCreateDnodeMsg *) calloc(1, sizeof(SCreateDnodeMsg)); + SCreateDnodeReq *pCreate = (SCreateDnodeReq *) calloc(1, sizeof(SCreateDnodeReq)); if (pCreate == NULL) { buildInvalidOperationMsg(pMsgBuf, msg4); return NULL; @@ -376,18 +376,18 @@ SCreateDnodeMsg *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMs strncpy(pCreate->fqdn, id->z, id->n); pCreate->port = htonl(val); - *len = sizeof(SCreateDnodeMsg); + *len = sizeof(SCreateDnodeReq); return pCreate; } -SDropDnodeMsg *buildDropDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf) { +SDropDnodeReq *buildDropDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf) { SToken* pzName = taosArrayGet(pInfo->pMiscInfo->a, 0); char* end = NULL; - SDropDnodeMsg * pDrop = (SDropDnodeMsg *)calloc(1, sizeof(SDropDnodeMsg)); + SDropDnodeReq * pDrop = (SDropDnodeReq *)calloc(1, sizeof(SDropDnodeReq)); pDrop->dnodeId = strtoll(pzName->z, &end, 10); pDrop->dnodeId = htonl(pDrop->dnodeId); - *len = sizeof(SDropDnodeMsg); + *len = sizeof(SDropDnodeReq); if (end - pzName->z != pzName->n) { buildInvalidOperationMsg(pMsgBuf, "invalid dnode id"); diff --git a/source/libs/parser/src/astValidate.c b/source/libs/parser/src/astValidate.c index b2466fd2dc..faa8c526a0 100644 --- a/source/libs/parser/src/astValidate.c +++ b/source/libs/parser/src/astValidate.c @@ -213,7 +213,7 @@ SQueryStmtInfo *createQueryInfo() { pQueryInfo->slimit.limit = -1; pQueryInfo->slimit.offset = 0; - pQueryInfo->pUpstream = taosArrayInit(4, POINTER_BYTES); + pQueryInfo->pDownstream = taosArrayInit(4, POINTER_BYTES); pQueryInfo->window = TSWINDOW_INITIALIZER; pQueryInfo->exprList = calloc(10, POINTER_BYTES); @@ -247,8 +247,8 @@ static void destroyQueryInfoImpl(SQueryStmtInfo* pQueryInfo) { tfree(pQueryInfo->fillVal); tfree(pQueryInfo->buf); - taosArrayDestroy(pQueryInfo->pUpstream); - pQueryInfo->pUpstream = NULL; + taosArrayDestroy(pQueryInfo->pDownstream); + pQueryInfo->pDownstream = NULL; pQueryInfo->bufLen = 0; } @@ -256,9 +256,9 @@ void destroyQueryInfo(SQueryStmtInfo* pQueryInfo) { while (pQueryInfo != NULL) { SQueryStmtInfo* p = pQueryInfo->sibling; - size_t numOfUpstream = taosArrayGetSize(pQueryInfo->pUpstream); + size_t numOfUpstream = taosArrayGetSize(pQueryInfo->pDownstream); for (int32_t i = 0; i < numOfUpstream; ++i) { - SQueryStmtInfo* pUpQueryInfo = taosArrayGetP(pQueryInfo->pUpstream, i); + SQueryStmtInfo* pUpQueryInfo = taosArrayGetP(pQueryInfo->pDownstream, i); destroyQueryInfoImpl(pUpQueryInfo); clearAllTableMetaInfo(pUpQueryInfo, false, 0); tfree(pUpQueryInfo); @@ -288,7 +288,6 @@ static int32_t doValidateSubquery(SSqlNode* pSqlNode, int32_t index, SQueryStmtI } pSub->pUdfInfo = pUdfInfo; - pSub->pDownstream = pQueryInfo; int32_t code = validateSqlNode(p, pSub, pMsgBuf); if (code != TSDB_CODE_SUCCESS) { return code; @@ -311,7 +310,7 @@ static int32_t doValidateSubquery(SSqlNode* pSqlNode, int32_t index, SQueryStmtI tstrncpy(pTableMetaInfo1->aliasName, subInfo->aliasName.z, subInfo->aliasName.n + 1); } - taosArrayPush(pQueryInfo->pUpstream, &pSub); + taosArrayPush(pQueryInfo->pDownstream, &pSub); // NOTE: order mix up in subquery not support yet. pQueryInfo->order = pSub->order; @@ -600,7 +599,7 @@ int32_t checkForUnsupportedQuery(SQueryStmtInfo* pQueryInfo, SMsgBuf* pMsgBuf) { return buildInvalidOperationMsg(pMsgBuf, msg1); } - if (f == FUNCTION_BLKINFO && taosArrayGetSize(pQueryInfo->pUpstream) > 0) { + if (f == FUNCTION_BLKINFO && taosArrayGetSize(pQueryInfo->pDownstream) > 0) { return buildInvalidOperationMsg(pMsgBuf, msg1); } @@ -1584,7 +1583,6 @@ int32_t validateSqlNode(SSqlNode* pSqlNode, SQueryStmtInfo* pQueryInfo, SMsgBuf* } pushDownAggFuncExprInfo(pQueryInfo); -// addColumnNodeFromLowerLevel(pQueryInfo); for(int32_t i = 0; i < 1; ++i) { SArray* functionList = extractFunctionList(pQueryInfo->exprList[i]); @@ -3630,12 +3628,39 @@ int32_t evaluateSqlNode(SSqlNode* pNode, int32_t tsPrecision, SMsgBuf* pMsgBuf) return TSDB_CODE_SUCCESS; } -int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQueryStmtInfo* pQueryInfo, int64_t id, char* msgBuf, int32_t msgBufLen) { - assert(pCatalog != NULL && pInfo != NULL); +int32_t setTableVgroupList(SParseBasicCtx *pCtx, SName* name, SVgroupsInfo **pVgList) { + SArray* vgroupList = NULL; + int32_t code = catalogGetTableDistVgroup(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, name, &vgroupList); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + int32_t vgroupNum = taosArrayGetSize(vgroupList); + + SVgroupsInfo *vgList = calloc(1, sizeof(SVgroupsInfo) + sizeof(SVgroupMsg) * vgroupNum); + + vgList->numOfVgroups = vgroupNum; + + for (int32_t i = 0; i < vgroupNum; ++i) { + SVgroupInfo *vg = taosArrayGet(vgroupList, i); + vgList->vgroups[i].vgId = vg->vgId; + vgList->vgroups[i].numOfEps = vg->numOfEps; + memcpy(vgList->vgroups[i].epAddr, vg->epAddr, sizeof(vgList->vgroups[i].epAddr)); + } + + *pVgList = vgList; + + taosArrayDestroy(vgroupList); + + return TSDB_CODE_SUCCESS; +} + +int32_t qParserValidateSqlNode(SParseBasicCtx *pCtx, SSqlInfo* pInfo, SQueryStmtInfo* pQueryInfo, char* msgBuf, int32_t msgBufLen) { + assert(pCtx != NULL && pInfo != NULL); int32_t code = 0; - SMsgBuf m = {.buf = msgBuf, .len = msgBufLen}; - SMsgBuf *pMsgBuf = &m; + SMsgBuf m = {.buf = msgBuf, .len = msgBufLen}; + SMsgBuf* pMsgBuf = &m; switch (pInfo->type) { #if 0 @@ -3682,22 +3707,6 @@ int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQuer break; } - case TSDB_SQL_USE_DB: { - const char* msg = "invalid db name"; - SToken* pToken = taosArrayGet(pInfo->pMiscInfo->a, 0); - - if (tscValidateName(pToken) != TSDB_CODE_SUCCESS) { - return buildInvalidOperationMsg(pMsgBuf, msg); - } - - int32_t ret = tNameSetDbName(&pTableMetaInfo->name, getAccountId(pSql), pToken); - if (ret != TSDB_CODE_SUCCESS) { - return buildInvalidOperationMsg(pMsgBuf, msg); - } - - break; - } - case TSDB_SQL_RESET_CACHE: { return TSDB_CODE_SUCCESS; } @@ -3712,55 +3721,6 @@ int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQuer break; } - case TSDB_SQL_CREATE_DNODE: { - const char* msg = "invalid host name (ip address)"; - - if (taosArrayGetSize(pInfo->pMiscInfo->a) > 1) { - return buildInvalidOperationMsg(pMsgBuf, msg); - } - - SToken* id = taosArrayGet(pInfo->pMiscInfo->a, 0); - if (id->type == TK_STRING) { - id->n = strdequote(id->z); - } - break; - } - - case TSDB_SQL_CREATE_ACCT: - case TSDB_SQL_ALTER_ACCT: { - const char* msg1 = "invalid state option, available options[no, r, w, all]"; - const char* msg2 = "invalid user/account name"; - const char* msg3 = "name too long"; - - SToken* pName = &pInfo->pMiscInfo->user.user; - SToken* pPwd = &pInfo->pMiscInfo->user.passwd; - - if (handlePassword(pCmd, pPwd) != TSDB_CODE_SUCCESS) { - return TSDB_CODE_TSC_INVALID_OPERATION; - } - - if (pName->n >= TSDB_USER_LEN) { - return buildInvalidOperationMsg(pMsgBuf, msg3); - } - - if (tscValidateName(pName) != TSDB_CODE_SUCCESS) { - return buildInvalidOperationMsg(pMsgBuf, msg2); - } - - SCreateAcctInfo* pAcctOpt = &pInfo->pMiscInfo->acctOpt; - if (pAcctOpt->stat.n > 0) { - if (pAcctOpt->stat.z[0] == 'r' && pAcctOpt->stat.n == 1) { - } else if (pAcctOpt->stat.z[0] == 'w' && pAcctOpt->stat.n == 1) { - } else if (strncmp(pAcctOpt->stat.z, "all", 3) == 0 && pAcctOpt->stat.n == 3) { - } else if (strncmp(pAcctOpt->stat.z, "no", 2) == 0 && pAcctOpt->stat.n == 2) { - } else { - return buildInvalidOperationMsg(pMsgBuf, msg1); - } - } - - break; - } - case TSDB_SQL_DESCRIBE_TABLE: { const char* msg1 = "invalid table name"; @@ -3819,7 +3779,7 @@ int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQuer char* pMsg = pCmd->payload; - SCfgDnodeMsg* pCfg = (SCfgDnodeMsg*)pMsg; + SMCfgDnodeReq* pCfg = (SMCfgDnodeReq*)pMsg; SToken* t0 = taosArrayGet(pMiscInfo->a, 0); SToken* t1 = taosArrayGet(pMiscInfo->a, 1); @@ -3865,29 +3825,6 @@ int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQuer return TSDB_CODE_SUCCESS; } - case TSDB_SQL_CREATE_TABLE: { - SCreateTableSql* pCreateTable = pInfo->pCreateTableInfo; - - if (pCreateTable->type == TSQL_CREATE_TABLE || pCreateTable->type == TSQL_CREATE_STABLE) { - if ((code = doCheckForCreateTable(pSql, 0, pInfo)) != TSDB_CODE_SUCCESS) { - return code; - } - - } else if (pCreateTable->type == TSQL_CREATE_TABLE_FROM_STABLE) { - assert(pCmd->numOfCols == 0); - if ((code = doCheckForCreateFromStable(pSql, pInfo)) != TSDB_CODE_SUCCESS) { - return code; - } - - } else if (pCreateTable->type == TSQL_CREATE_STREAM) { - if ((code = doCheckForStream(pSql, pInfo)) != TSDB_CODE_SUCCESS) { - return code; - } - } - - break; - } - case TSDB_SQL_SELECT: { const char * msg1 = "no nested query supported in union clause"; code = loadAllTableMeta(pSql, pInfo); @@ -3981,26 +3918,47 @@ int32_t qParserValidateSqlNode(struct SCatalog* pCatalog, SSqlInfo* pInfo, SQuer } break; } - #endif default: return buildInvalidOperationMsg(pMsgBuf, "not support sql expression"); } +#endif + } - SCatalogReq req = {0}; - SMetaData data = {0}; + SCatalogReq req = {0}; + SMetaData data = {0}; // TODO: check if the qnode info has been cached already req.qNodeRequired = true; - code = qParserExtractRequestedMetaInfo(pInfo, &req, msgBuf, msgBufLen); + code = qParserExtractRequestedMetaInfo(pInfo, &req, pCtx, msgBuf, msgBufLen); if (code != TSDB_CODE_SUCCESS) { return code; } // load the meta data from catalog - code = catalogGetAllMeta(pCatalog, NULL, NULL, &req, &data); +// code = catalogGetAllMeta(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, &req, &data); + STableMeta* pmt = NULL; + + SName* name = taosArrayGet(req.pTableName, 0); + code = catalogGetTableMeta(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, name, &pmt); if (code != TSDB_CODE_SUCCESS) { return code; } + + data.pTableMeta = taosArrayInit(1, POINTER_BYTES); + taosArrayPush(data.pTableMeta, &pmt); + + pQueryInfo->pTableMetaInfo = calloc(1, POINTER_BYTES); + pQueryInfo->pTableMetaInfo[0] = calloc(1, sizeof(STableMetaInfo)); + pQueryInfo->pTableMetaInfo[0]->pTableMeta = pmt; + pQueryInfo->pTableMetaInfo[0]->name = *name; + pQueryInfo->numOfTables = 1; + pQueryInfo->pTableMetaInfo[0]->tagColList = taosArrayInit(4, POINTER_BYTES); + + code = setTableVgroupList(pCtx, name, &pQueryInfo->pTableMetaInfo[0]->vgroupList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(data.pTableMeta); + return code; + } // evaluate the sqlnode STableMeta* pTableMeta = (STableMeta*) taosArrayGetP(data.pTableMeta, 0); diff --git a/source/libs/parser/src/dCDAstProcess.c b/source/libs/parser/src/dCDAstProcess.c index efe7670089..06729813c1 100644 --- a/source/libs/parser/src/dCDAstProcess.c +++ b/source/libs/parser/src/dCDAstProcess.c @@ -1,12 +1,11 @@ -#include -#include -#include "astToMsg.h" +#include "tmsg.h" +#include "tglobal.h" #include "parserInt.h" +#include "ttime.h" +#include "astToMsg.h" +#include "astGenerator.h" #include "parserUtil.h" #include "queryInfoUtil.h" -#include "tglobal.h" -#include "tmsg.h" -#include "ttime.h" /* is contained in pFieldList or not */ static bool has(SArray* pFieldList, int32_t startIndex, const char* name) { @@ -43,7 +42,7 @@ static int32_t setShowInfo(SShowInfo* pShowInfo, SParseBasicCtx* pCtx, void** ou char dbFname[TSDB_DB_FNAME_LEN] = {0}; tNameGetFullDbName(&name, dbFname); - catalogGetDBVgroup(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, dbFname, 0, &array); + catalogGetDBVgroup(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, dbFname, false, &array); SVgroupInfo* info = taosArrayGet(array, 0); pShowReq->head.vgId = htonl(info->vgId); @@ -195,6 +194,18 @@ static int32_t doCheckDbOptions(SCreateDbMsg* pCreate, SMsgBuf* pMsgBuf) { TSDB_MIN_VNODES_PER_DB, TSDB_MAX_VNODES_PER_DB); } + val = htonl(pCreate->maxRows); + if (val < TSDB_MIN_MAX_ROW_FBLOCK || val > TSDB_MAX_MAX_ROW_FBLOCK) { + snprintf(msg, tListLen(msg), "invalid number of max rows in file block for DB:%d valid range: [%d, %d]", val, + TSDB_MIN_MAX_ROW_FBLOCK, TSDB_MAX_MAX_ROW_FBLOCK); + } + + val = htonl(pCreate->minRows); + if (val < TSDB_MIN_MIN_ROW_FBLOCK || val > TSDB_MAX_MIN_ROW_FBLOCK) { + snprintf(msg, tListLen(msg), "invalid number of min rows in file block for DB:%d valid range: [%d, %d]", val, + TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK); + } + return TSDB_CODE_SUCCESS; } @@ -332,7 +343,6 @@ static int32_t doParseSerializeTagValue(SSchema* pTagSchema, int32_t numOfInputT char* endPtr = NULL; char tmpTokenBuf[TSDB_MAX_TAGS_LEN] = {0}; - SKvParam param = {.builder = pKvRowBuilder, .schema = pSchema}; SToken* pItem = taosArrayGet(pTagValList, i); diff --git a/source/libs/parser/src/insertParser.c b/source/libs/parser/src/insertParser.c index 8b3c328cce..04c287baf1 100644 --- a/source/libs/parser/src/insertParser.c +++ b/source/libs/parser/src/insertParser.c @@ -624,12 +624,11 @@ int32_t parseInsertSql(SParseContext* pContext, SVnodeModifOpStmtInfo** pInfo) { if (NULL == context.pVgroupsHashObj || NULL == context.pTableBlockHashObj || NULL == context.pOutput) { terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; - return TSDB_CODE_FAILED; + return TSDB_CODE_TSC_OUT_OF_MEMORY; } *pInfo = context.pOutput; context.pOutput->nodeType = TSDB_SQL_INSERT; - context.pOutput->schemaAttache = pContext->schemaAttached; context.pOutput->payloadType = PAYLOAD_TYPE_KV; int32_t code = skipInsertInto(&context); @@ -638,5 +637,5 @@ int32_t parseInsertSql(SParseContext* pContext, SVnodeModifOpStmtInfo** pInfo) { } destroyInsertParseContext(&context); terrno = code; - return (TSDB_CODE_SUCCESS == code ? TSDB_CODE_SUCCESS : TSDB_CODE_FAILED); + return code; } diff --git a/source/libs/parser/src/parser.c b/source/libs/parser/src/parser.c index 85a8d9e047..f440e6cdfe 100644 --- a/source/libs/parser/src/parser.c +++ b/source/libs/parser/src/parser.c @@ -31,8 +31,8 @@ bool isInsertSql(const char* pStr, size_t length) { } while (1); } -bool qIsDdlQuery(const SQueryNode* pQuery) { - return TSDB_SQL_INSERT != pQuery->type && TSDB_SQL_SELECT != pQuery->type && TSDB_SQL_CREATE_TABLE != pQuery->type; +bool qIsDdlQuery(const SQueryNode* pQueryNode) { + return TSDB_SQL_INSERT != pQueryNode->type && TSDB_SQL_SELECT != pQueryNode->type && TSDB_SQL_CREATE_TABLE != pQueryNode->type; } int32_t parseQuerySql(SParseContext* pCxt, SQueryNode** pQuery) { @@ -44,15 +44,7 @@ int32_t parseQuerySql(SParseContext* pCxt, SQueryNode** pQuery) { } if (!isDqlSqlStatement(&info)) { -// bool toVnode = false; if (info.type == TSDB_SQL_CREATE_TABLE) { -// SCreateTableSql* pCreateSql = info.pCreateTableInfo; -// if (pCreateSql->type == TSQL_CREATE_CTABLE || pCreateSql->type == TSQL_CREATE_TABLE) { -// toVnode = true; -// } -// } - -// if (toVnode) { SVnodeModifOpStmtInfo * pModifStmtInfo = qParserValidateCreateTbSqlNode(&info, &pCxt->ctx, pCxt->pMsg, pCxt->msgLen); if (pModifStmtInfo == NULL) { return terrno; @@ -69,13 +61,13 @@ int32_t parseQuerySql(SParseContext* pCxt, SQueryNode** pQuery) { pDcl->nodeType = info.type; } } else { - SQueryStmtInfo* pQueryInfo = calloc(1, sizeof(SQueryStmtInfo)); + SQueryStmtInfo* pQueryInfo = createQueryInfo(); if (pQueryInfo == NULL) { terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; // set correct error code. return terrno; } - int32_t code = qParserValidateSqlNode(pCxt->ctx.pCatalog, &info, pQueryInfo, pCxt->ctx.requestId, pCxt->pMsg, pCxt->msgLen); + int32_t code = qParserValidateSqlNode(&pCxt->ctx, &info, pQueryInfo, pCxt->pMsg, pCxt->msgLen); if (code == TSDB_CODE_SUCCESS) { *pQuery = (SQueryNode*)pQueryInfo; } @@ -97,7 +89,7 @@ int32_t qParserConvertSql(const char* pStr, size_t length, char** pConvertSql) { return 0; } -static int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SMsgBuf* pMsgBuf); +static int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SParseBasicCtx *pCtx, SMsgBuf* pMsgBuf); static int32_t tnameComparFn(const void* p1, const void* p2) { SName* pn1 = (SName*)p1; @@ -121,7 +113,7 @@ static int32_t tnameComparFn(const void* p1, const void* p2) { } } -static int32_t getTableNameFromSubquery(SSqlNode* pSqlNode, SArray* tableNameList, SMsgBuf* pMsgBuf) { +static int32_t getTableNameFromSubquery(SSqlNode* pSqlNode, SArray* tableNameList, SParseBasicCtx *pCtx, SMsgBuf* pMsgBuf) { int32_t numOfSub = (int32_t)taosArrayGetSize(pSqlNode->from->list); for (int32_t j = 0; j < numOfSub; ++j) { @@ -131,12 +123,12 @@ static int32_t getTableNameFromSubquery(SSqlNode* pSqlNode, SArray* tableNameLis for (int32_t i = 0; i < num; ++i) { SSqlNode* p = taosArrayGetP(sub->pSubquery->node, i); if (p->from->type == SQL_FROM_NODE_TABLES) { - int32_t code = getTableNameFromSqlNode(p, tableNameList, pMsgBuf); + int32_t code = getTableNameFromSqlNode(p, tableNameList, pCtx, pMsgBuf); if (code != TSDB_CODE_SUCCESS) { return code; } } else { - getTableNameFromSubquery(p, tableNameList, pMsgBuf); + getTableNameFromSubquery(p, tableNameList, pCtx, pMsgBuf); } } } @@ -144,7 +136,7 @@ static int32_t getTableNameFromSubquery(SSqlNode* pSqlNode, SArray* tableNameLis return TSDB_CODE_SUCCESS; } -int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SMsgBuf* pMsgBuf) { +int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SParseBasicCtx *pParseCtx, SMsgBuf* pMsgBuf) { const char* msg1 = "invalid table name"; int32_t numOfTables = (int32_t) taosArrayGetSize(pSqlNode->from->list); @@ -163,7 +155,11 @@ int32_t getTableNameFromSqlNode(SSqlNode* pSqlNode, SArray* tableNameList, SMsgB } SName name = {0}; - strndequote(name.tname, t->z, t->n); + int32_t code = createSName(&name, t, pParseCtx, pMsgBuf); + if (code != TSDB_CODE_SUCCESS) { + return buildInvalidOperationMsg(pMsgBuf, msg1); + } + taosArrayPush(tableNameList, &name); } @@ -174,7 +170,7 @@ static void freePtrElem(void* p) { tfree(*(char**)p); } -int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SCatalogReq* pMetaInfo, char* msg, int32_t msgBufLen) { +int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SCatalogReq* pMetaInfo, SParseBasicCtx *pCtx, char* msg, int32_t msgBufLen) { int32_t code = TSDB_CODE_SUCCESS; SMsgBuf msgBuf = {.buf = msg, .len = msgBufLen}; @@ -190,12 +186,12 @@ int32_t qParserExtractRequestedMetaInfo(const SSqlInfo* pSqlInfo, SCatalogReq* p // load the table meta in the FROM clause if (pSqlNode->from->type == SQL_FROM_NODE_TABLES) { - code = getTableNameFromSqlNode(pSqlNode, pMetaInfo->pTableName, &msgBuf); + code = getTableNameFromSqlNode(pSqlNode, pMetaInfo->pTableName, pCtx, &msgBuf); if (code != TSDB_CODE_SUCCESS) { return code; } } else { - code = getTableNameFromSubquery(pSqlNode, pMetaInfo->pTableName, &msgBuf); + code = getTableNameFromSubquery(pSqlNode, pMetaInfo->pTableName, pCtx, &msgBuf); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -241,6 +237,9 @@ void qParserCleanupMetaRequestInfo(SCatalogReq* pMetaReq) { } void qDestroyQuery(SQueryNode* pQueryNode) { + if (NULL == pQueryNode) { + return; + } if (nodeType(pQueryNode) == TSDB_SQL_INSERT || nodeType(pQueryNode) == TSDB_SQL_CREATE_TABLE) { SVnodeModifOpStmtInfo* pModifInfo = (SVnodeModifOpStmtInfo*)pQueryNode; taosArrayDestroy(pModifInfo->pDataBlocks); diff --git a/source/libs/parser/test/parserTests.cpp b/source/libs/parser/test/parserTests.cpp index fe430c5f5e..8758fdbc71 100644 --- a/source/libs/parser/test/parserTests.cpp +++ b/source/libs/parser/test/parserTests.cpp @@ -77,12 +77,15 @@ void sqlCheck(const char* sql, bool valid) { buf.len = 128; buf.buf = msg; + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; SSqlNode* pNode = (SSqlNode*)taosArrayGetP(((SArray*)info1.sub.node), 0); int32_t code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -119,7 +122,11 @@ TEST(testCase, validateAST_test) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -177,7 +184,11 @@ TEST(testCase, function_Test) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -223,7 +234,11 @@ TEST(testCase, function_Test2) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -269,7 +284,11 @@ TEST(testCase, function_Test3) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -314,7 +333,11 @@ TEST(testCase, function_Test4) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -362,7 +385,11 @@ TEST(testCase, function_Test5) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -447,7 +474,11 @@ TEST(testCase, function_Test6) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -525,7 +556,11 @@ TEST(testCase, function_Test6) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -587,7 +622,11 @@ TEST(testCase, function_Test6) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); + ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -636,7 +675,7 @@ TEST(testCase, function_Test6) { code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); ASSERT_EQ(code, 0); - ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -666,7 +705,10 @@ TEST(testCase, function_Test6) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -688,7 +730,7 @@ TEST(testCase, function_Test6) { code = evaluateSqlNode(pNode, TSDB_TIME_PRECISION_NANO, &buf); ASSERT_EQ(code, 0); - ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); diff --git a/source/libs/parser/test/plannerTest.cpp b/source/libs/parser/test/plannerTest.cpp index 04c3a7d81a..8d9fbadfad 100644 --- a/source/libs/parser/test/plannerTest.cpp +++ b/source/libs/parser/test/plannerTest.cpp @@ -81,7 +81,8 @@ void generateLogicplan(const char* sql) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); @@ -121,7 +122,9 @@ TEST(testCase, planner_test) { ASSERT_EQ(code, 0); SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + SParseBasicCtx ctx = {0}; + + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); diff --git a/source/libs/parser/test/tokenizerTest.cpp b/source/libs/parser/test/tokenizerTest.cpp index 3ab6a6531c..ee01a50148 100644 --- a/source/libs/parser/test/tokenizerTest.cpp +++ b/source/libs/parser/test/tokenizerTest.cpp @@ -710,7 +710,11 @@ TEST(testCase, extractMeta_test) { char msg[128] = {0}; SCatalogReq req = {0}; - int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, msg, 128); + + SParseBasicCtx ctx = {0}; + ctx.db = "db1"; + ctx.acctId = 1; + int32_t ret = qParserExtractRequestedMetaInfo(&info1, &req, &ctx, msg, 128); ASSERT_EQ(ret, 0); ASSERT_EQ(taosArrayGetSize(req.pTableName), 1); diff --git a/source/libs/planner/inc/plannerInt.h b/source/libs/planner/inc/plannerInt.h index 31e057f4c0..2a50752c88 100644 --- a/source/libs/planner/inc/plannerInt.h +++ b/source/libs/planner/inc/plannerInt.h @@ -53,8 +53,8 @@ typedef struct SQueryDistPlanNodeInfo { typedef struct SQueryTableInfo { char *tableName; // to be deleted uint64_t uid; // to be deleted - STableMetaInfo* pMeta; - STimeWindow window; + STableMetaInfo *pMeta; + STimeWindow window; } SQueryTableInfo; typedef struct SQueryPlanNode { @@ -106,7 +106,7 @@ int32_t queryPlanToString(struct SQueryPlanNode* pQueryNode, char** str); int32_t queryPlanToSql(struct SQueryPlanNode* pQueryNode, char** sql); int32_t createDag(SQueryPlanNode* pQueryNode, struct SCatalog* pCatalog, SQueryDag** pDag, uint64_t requestId); -int32_t setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep); +void setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep); int32_t subPlanToString(const SSubplan *pPhyNode, char** str, int32_t* len); int32_t stringToSubplan(const char* str, SSubplan** subplan); diff --git a/source/libs/planner/src/logicPlan.c b/source/libs/planner/src/logicPlan.c index fa7b3776dc..9a9b40473b 100644 --- a/source/libs/planner/src/logicPlan.c +++ b/source/libs/planner/src/logicPlan.c @@ -64,10 +64,11 @@ static int32_t createModificationOpPlan(const SQueryNode* pNode, SQueryPlanNode* } int32_t createSelectPlan(const SQueryStmtInfo* pSelect, SQueryPlanNode** pQueryPlan) { - SArray* upstream = createQueryPlanImpl(pSelect); - assert(taosArrayGetSize(upstream) == 1); - *pQueryPlan = taosArrayGetP(upstream, 0); - taosArrayDestroy(upstream); + SArray* pDownstream = createQueryPlanImpl(pSelect); + assert(taosArrayGetSize(pDownstream) == 1); + + *pQueryPlan = taosArrayGetP(pDownstream, 0); + taosArrayDestroy(pDownstream); return TSDB_CODE_SUCCESS; } @@ -100,23 +101,21 @@ void destroyQueryPlan(SQueryPlanNode* pQueryNode) { //====================================================================================================================== -static SQueryPlanNode* createQueryNode(int32_t type, const char* name, SQueryPlanNode** prev, int32_t numOfPrev, +static SQueryPlanNode* createQueryNode(int32_t type, const char* name, SQueryPlanNode** pChildrenNode, int32_t numOfChildren, SExprInfo** pExpr, int32_t numOfOutput, const void* pExtInfo) { SQueryPlanNode* pNode = calloc(1, sizeof(SQueryPlanNode)); pNode->info.type = type; pNode->info.name = strdup(name); - pNode->numOfExpr = numOfOutput; - pNode->pExpr = taosArrayInit(numOfOutput, POINTER_BYTES); - for(int32_t i = 0; i < numOfOutput; ++i) { - taosArrayPush(pNode->pExpr, &pExpr[i]); - } + pNode->pExpr = taosArrayInit(numOfOutput, POINTER_BYTES); + taosArrayAddBatch(pNode->pExpr, pExpr, numOfOutput); + assert(pNode->numOfExpr == numOfOutput); pNode->pChildren = taosArrayInit(4, POINTER_BYTES); - for(int32_t i = 0; i < numOfPrev; ++i) { - taosArrayPush(pNode->pChildren, &prev[i]); + for(int32_t i = 0; i < numOfChildren; ++i) { + taosArrayPush(pNode->pChildren, &pChildrenNode[i]); } switch(type) { @@ -184,8 +183,7 @@ static SQueryPlanNode* createQueryNode(int32_t type, const char* name, SQueryPla return pNode; } -static SQueryPlanNode* doAddTableColumnNode(const SQueryStmtInfo* pQueryInfo, STableMetaInfo* pTableMetaInfo, SQueryTableInfo* info, - SArray* pExprs, SArray* tableCols) { +static SQueryPlanNode* doAddTableColumnNode(const SQueryStmtInfo* pQueryInfo, SQueryTableInfo* info, SArray* pExprs, SArray* tableCols) { if (pQueryInfo->info.onlyTagQuery) { int32_t num = (int32_t) taosArrayGetSize(pExprs); SQueryPlanNode* pNode = createQueryNode(QNODE_TAGSCAN, "TableTagScan", NULL, 0, pExprs->pData, num, info); @@ -193,16 +191,12 @@ static SQueryPlanNode* doAddTableColumnNode(const SQueryStmtInfo* pQueryInfo, ST if (pQueryInfo->info.distinct) { pNode = createQueryNode(QNODE_DISTINCT, "Distinct", &pNode, 1, pExprs->pData, num, NULL); } - return pNode; } SQueryPlanNode* pNode = createQueryNode(QNODE_TABLESCAN, "TableScan", NULL, 0, NULL, 0, info); - if (pQueryInfo->info.projectionQuery) { - int32_t numOfOutput = (int32_t) taosArrayGetSize(pExprs); - pNode = createQueryNode(QNODE_PROJECT, "Projection", &pNode, 1, pExprs->pData, numOfOutput, NULL); - } else { + if (!pQueryInfo->info.projectionQuery) { STableMetaInfo* pTableMetaInfo1 = getMetaInfo(pQueryInfo, 0); // table source column projection, generate the projection expr @@ -262,7 +256,11 @@ static SQueryPlanNode* doCreateQueryPlanForSingleTableImpl(const SQueryStmtInfo* pNode = createQueryNode(QNODE_AGGREGATE, "Aggregate", &pNode, 1, p->pData, num, NULL); } } else { - pNode = createQueryNode(QNODE_PROJECT, "Projection", &pNode, 1, p->pData, num, NULL); + // here we can push down the projection to tablescan operator. + pNode->numOfExpr = num; + pNode->pExpr = taosArrayInit(num, POINTER_BYTES); + taosArrayAddAll(pNode->pExpr, p); +// pNode = createQueryNode(QNODE_PROJECT, "Projection", &pNode, 1, p->pData, num, NULL); } } @@ -299,9 +297,11 @@ static SQueryPlanNode* doCreateQueryPlanForSingleTable(const SQueryStmtInfo* pQu tstrncpy(name, pTableMetaInfo->name.tname, TSDB_TABLE_FNAME_LEN); SQueryTableInfo info = {.tableName = strdup(name), .uid = pTableMetaInfo->pTableMeta->uid,}; + info.window = pQueryInfo->window; + info.pMeta = pTableMetaInfo; // handle the only tag query - SQueryPlanNode* pNode = doAddTableColumnNode(pQueryInfo, pTableMetaInfo, &info, pExprs, tableCols); + SQueryPlanNode* pNode = doAddTableColumnNode(pQueryInfo, &info, pExprs, tableCols); if (pQueryInfo->info.onlyTagQuery) { tfree(info.tableName); return pNode; @@ -326,23 +326,23 @@ static bool isAllAggExpr(SArray* pList) { } SArray* createQueryPlanImpl(const SQueryStmtInfo* pQueryInfo) { - SArray* upstream = NULL; + SArray* pDownstream = NULL; - if (pQueryInfo->pUpstream != NULL && taosArrayGetSize(pQueryInfo->pUpstream) > 0) { // subquery in the from clause - upstream = taosArrayInit(4, POINTER_BYTES); + if (pQueryInfo->pDownstream != NULL && taosArrayGetSize(pQueryInfo->pDownstream) > 0) { // subquery in the from clause + pDownstream = taosArrayInit(4, POINTER_BYTES); - size_t size = taosArrayGetSize(pQueryInfo->pUpstream); + size_t size = taosArrayGetSize(pQueryInfo->pDownstream); for(int32_t i = 0; i < size; ++i) { - SQueryStmtInfo* pq = taosArrayGet(pQueryInfo->pUpstream, i); + SQueryStmtInfo* pq = taosArrayGet(pQueryInfo->pDownstream, i); SArray* p = createQueryPlanImpl(pq); - taosArrayAddBatch(upstream, p->pData, (int32_t) taosArrayGetSize(p)); + taosArrayAddBatch(pDownstream, p->pData, (int32_t) taosArrayGetSize(p)); } } if (pQueryInfo->numOfTables > 1) { // it is a join query // 1. separate the select clause according to table - taosArrayDestroy(upstream); - upstream = taosArrayInit(5, POINTER_BYTES); + taosArrayDestroy(pDownstream); + pDownstream = taosArrayInit(5, POINTER_BYTES); for(int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { STableMetaInfo* pTableMetaInfo = pQueryInfo->pTableMetaInfo[i]; @@ -365,30 +365,30 @@ SArray* createQueryPlanImpl(const SQueryStmtInfo* pQueryInfo) { columnListCopy(tableColumnList, pQueryInfo->colList, uid); // 4. add the projection query node - SQueryPlanNode* pNode = doAddTableColumnNode(pQueryInfo, pTableMetaInfo, &info, exprList, tableColumnList); + SQueryPlanNode* pNode = doAddTableColumnNode(pQueryInfo, &info, exprList, tableColumnList); columnListDestroy(tableColumnList); // dropAllExprInfo(exprList); - taosArrayPush(upstream, &pNode); + taosArrayPush(pDownstream, &pNode); } // 3. add the join node here SQueryTableInfo info = {0}; int32_t num = (int32_t) taosArrayGetSize(pQueryInfo->exprList[0]); - SQueryPlanNode* pNode = createQueryNode(QNODE_JOIN, "Join", upstream->pData, pQueryInfo->numOfTables, + SQueryPlanNode* pNode = createQueryNode(QNODE_JOIN, "Join", pDownstream->pData, pQueryInfo->numOfTables, pQueryInfo->exprList[0]->pData, num, NULL); // 4. add the aggregation or projection execution node pNode = doCreateQueryPlanForSingleTableImpl(pQueryInfo, pNode, &info); - upstream = taosArrayInit(5, POINTER_BYTES); - taosArrayPush(upstream, &pNode); + pDownstream = taosArrayInit(5, POINTER_BYTES); + taosArrayPush(pDownstream, &pNode); } else { // only one table, normal query process STableMetaInfo* pTableMetaInfo = pQueryInfo->pTableMetaInfo[0]; SQueryPlanNode* pNode = doCreateQueryPlanForSingleTable(pQueryInfo, pTableMetaInfo, pQueryInfo->exprList[0], pQueryInfo->colList); - upstream = taosArrayInit(5, POINTER_BYTES); - taosArrayPush(upstream, &pNode); + pDownstream = taosArrayInit(5, POINTER_BYTES); + taosArrayPush(pDownstream, &pNode); } - return upstream; + return pDownstream; } static void doDestroyQueryNode(SQueryPlanNode* pQueryNode) { @@ -434,22 +434,23 @@ static int32_t doPrintPlan(char* buf, SQueryPlanNode* pQueryNode, int32_t level, switch(pQueryNode->info.type) { case QNODE_TABLESCAN: { SQueryTableInfo* pInfo = (SQueryTableInfo*)pQueryNode->pExtInfo; - len1 = sprintf(buf + len, "%s #%" PRIu64 ") time_range: %" PRId64 " - %" PRId64, pInfo->tableName, pInfo->uid, - pInfo->window.skey, pInfo->window.ekey); + len1 = sprintf(buf + len, "%s #%" PRIu64, pInfo->tableName, pInfo->uid); assert(len1 > 0); len += len1; - for (int32_t i = 0; i < pQueryNode->numOfExpr; ++i) { - SColumn* pCol = taosArrayGetP(pQueryNode->pExpr, i); - len1 = sprintf(buf + len, " [%s #%d] ", pCol->name, pCol->info.colId); + len1 = sprintf(buf + len, " , cols:"); + assert(len1 > 0); + len += len1; - assert(len1 > 0); - len += len1; - } - - len1 = sprintf(buf + len, "\n"); + len = printExprInfo(buf, pQueryNode, len); + len1 = sprintf(buf + len, ")"); assert(len1 > 0); + // todo print filter info + len1 = sprintf(buf + len, ") filters:(nil)"); + len += len1; + + len1 = sprintf(buf + len, " time_range: %" PRId64 " - %" PRId64"\n", pInfo->window.skey, pInfo->window.ekey); len += len1; break; } diff --git a/source/libs/planner/src/physicalPlan.c b/source/libs/planner/src/physicalPlan.c index e7468e44eb..7c427efb5a 100644 --- a/source/libs/planner/src/physicalPlan.c +++ b/source/libs/planner/src/physicalPlan.c @@ -75,24 +75,16 @@ int32_t dsinkNameToDsinkType(const char* name) { return DSINK_Unknown; } -static SDataSink* initDataSink(int32_t type, int32_t size) { - SDataSink* sink = (SDataSink*)validPointer(calloc(1, size)); - sink->info.type = type; - sink->info.name = dsinkTypeToDsinkName(type); - return sink; -} - -static SDataSink* createDataDispatcher(SPlanContext* pCxt, SQueryPlanNode* pPlanNode) { - SDataDispatcher* dispatcher = (SDataDispatcher*)initDataSink(DSINK_Dispatch, sizeof(SDataDispatcher)); - return (SDataSink*)dispatcher; -} - -static SDataSink* createDataInserter(SPlanContext* pCxt, SVgDataBlocks* pBlocks) { - SDataInserter* inserter = (SDataInserter*)initDataSink(DSINK_Insert, sizeof(SDataInserter)); - inserter->numOfTables = pBlocks->numOfTables; - inserter->size = pBlocks->size; - SWAP(inserter->pData, pBlocks->pData, char*); - return (SDataSink*)inserter; +static bool copySchema(SDataBlockSchema* dst, const SDataBlockSchema* src) { + dst->pSchema = malloc(sizeof(SSlotSchema) * src->numOfCols); + if (NULL == dst->pSchema) { + return false; + } + memcpy(dst->pSchema, src->pSchema, sizeof(SSlotSchema) * src->numOfCols); + dst->numOfCols = src->numOfCols; + dst->resultRowSize = src->resultRowSize; + dst->precision = src->precision; + return true; } static bool toDataBlockSchema(SQueryPlanNode* pPlanNode, SDataBlockSchema* dataBlockSchema) { @@ -102,6 +94,10 @@ static bool toDataBlockSchema(SQueryPlanNode* pPlanNode, SDataBlockSchema* dataB return false; } memcpy(dataBlockSchema->pSchema, pPlanNode->pSchema, sizeof(SSlotSchema) * pPlanNode->numOfCols); + dataBlockSchema->resultRowSize = 0; + for (int32_t i = 0; i < dataBlockSchema->numOfCols; ++i) { + dataBlockSchema->resultRowSize += dataBlockSchema->pSchema[i].bytes; + } return true; } @@ -120,13 +116,37 @@ static bool cloneExprArray(SArray** dst, SArray* src) { return (TSDB_CODE_SUCCESS == copyAllExprInfo(*dst, src, true) ? true : false); } +static SDataSink* initDataSink(int32_t type, int32_t size, const SPhyNode* pRoot) { + SDataSink* sink = (SDataSink*)validPointer(calloc(1, size)); + sink->info.type = type; + sink->info.name = dsinkTypeToDsinkName(type); + if (NULL !=pRoot && !copySchema(&sink->schema, &pRoot->targetSchema)) { + tfree(sink); + THROW(TSDB_CODE_TSC_OUT_OF_MEMORY); + } + return sink; +} + +static SDataSink* createDataInserter(SPlanContext* pCxt, SVgDataBlocks* pBlocks, const SPhyNode* pRoot) { + SDataInserter* inserter = (SDataInserter*)initDataSink(DSINK_Insert, sizeof(SDataInserter), pRoot); + inserter->numOfTables = pBlocks->numOfTables; + inserter->size = pBlocks->size; + SWAP(inserter->pData, pBlocks->pData, char*); + return (SDataSink*)inserter; +} + +static SDataSink* createDataDispatcher(SPlanContext* pCxt, SQueryPlanNode* pPlanNode, const SPhyNode* pRoot) { + SDataDispatcher* dispatcher = (SDataDispatcher*)initDataSink(DSINK_Dispatch, sizeof(SDataDispatcher), pRoot); + return (SDataSink*)dispatcher; +} + static SPhyNode* initPhyNode(SQueryPlanNode* pPlanNode, int32_t type, int32_t size) { SPhyNode* node = (SPhyNode*)validPointer(calloc(1, size)); node->info.type = type; node->info.name = opTypeToOpName(type); if (!cloneExprArray(&node->pTargets, pPlanNode->pExpr) || !toDataBlockSchema(pPlanNode, &(node->targetSchema))) { free(node); - return NULL; + THROW(TSDB_CODE_TSC_OUT_OF_MEMORY); } return node; } @@ -149,7 +169,7 @@ static SPhyNode* createTagScanNode(SQueryPlanNode* pPlanNode) { static uint8_t getScanFlag(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTable) { // todo - return MASTER_SCAN; + return MAIN_SCAN; } static SPhyNode* createUserTableScanNode(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTable, int32_t op) { @@ -160,9 +180,6 @@ static SPhyNode* createUserTableScanNode(SQueryPlanNode* pPlanNode, SQueryTableI return (SPhyNode*)node; } -static SPhyNode* createSingleTableScanNode(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTable) { - return createUserTableScanNode(pPlanNode, pTable, OP_TableScan); -} static bool isSystemTable(SQueryTableInfo* pTable) { // todo @@ -187,7 +204,8 @@ static SSubplan* initSubplan(SPlanContext* pCxt, int32_t type) { SSubplan* subplan = validPointer(calloc(1, sizeof(SSubplan))); subplan->id = pCxt->nextId; ++(pCxt->nextId.subplanId); - subplan->type = type; + + subplan->type = type; subplan->level = 0; if (NULL != pCxt->pCurrentSubplan) { subplan->level = pCxt->pCurrentSubplan->level + 1; @@ -239,9 +257,10 @@ static uint64_t splitSubplanByTable(SPlanContext* pCxt, SQueryPlanNode* pPlanNod for (int32_t i = 0; i < pTable->pMeta->vgroupList->numOfVgroups; ++i) { STORE_CURRENT_SUBPLAN(pCxt); SSubplan* subplan = initSubplan(pCxt, QUERY_TYPE_SCAN); + subplan->msgType = TDMT_VND_QUERY; vgroupMsgToEpSet(&(pTable->pMeta->vgroupList->vgroups[i]), &subplan->execNode); subplan->pNode = createMultiTableScanNode(pPlanNode, pTable); - subplan->pDataSink = createDataDispatcher(pCxt, pPlanNode); + subplan->pDataSink = createDataDispatcher(pCxt, pPlanNode, subplan->pNode); RECOVERY_CURRENT_SUBPLAN(pCxt); } return pCxt->nextId.templateId++; @@ -250,6 +269,7 @@ static uint64_t splitSubplanByTable(SPlanContext* pCxt, SQueryPlanNode* pPlanNod static SPhyNode* createExchangeNode(SPlanContext* pCxt, SQueryPlanNode* pPlanNode, uint64_t srcTemplateId) { SExchangePhyNode* node = (SExchangePhyNode*)initPhyNode(pPlanNode, OP_Exchange, sizeof(SExchangePhyNode)); node->srcTemplateId = srcTemplateId; + node->pSrcEndPoints = validPointer(taosArrayInit(TARRAY_MIN_SIZE, sizeof(SQueryNodeAddr))); return (SPhyNode*)node; } @@ -258,12 +278,20 @@ static bool needMultiNodeScan(SQueryTableInfo* pTable) { return (TSDB_SUPER_TABLE == pTable->pMeta->pTableMeta->tableType); } +static SPhyNode* createSingleTableScanNode(SQueryPlanNode* pPlanNode, SQueryTableInfo* pTable, SSubplan* subplan) { + vgroupMsgToEpSet(&(pTable->pMeta->vgroupList->vgroups[0]), &subplan->execNode); + + return createUserTableScanNode(pPlanNode, pTable, OP_TableScan); +} + + static SPhyNode* createTableScanNode(SPlanContext* pCxt, SQueryPlanNode* pPlanNode) { SQueryTableInfo* pTable = (SQueryTableInfo*)pPlanNode->pExtInfo; + if (needMultiNodeScan(pTable)) { return createExchangeNode(pCxt, pPlanNode, splitSubplanByTable(pCxt, pPlanNode, pTable)); } - return createSingleTableScanNode(pPlanNode, pTable); + return createSingleTableScanNode(pPlanNode, pTable, pCxt->pCurrentSubplan); } static SPhyNode* createPhyNode(SPlanContext* pCxt, SQueryPlanNode* pPlanNode) { @@ -275,6 +303,8 @@ static SPhyNode* createPhyNode(SPlanContext* pCxt, SQueryPlanNode* pPlanNode) { case QNODE_TABLESCAN: node = createTableScanNode(pCxt, pPlanNode); break; + case QNODE_PROJECT: +// node = create case QNODE_MODIFY: // Insert is not an operator in a physical plan. break; @@ -305,7 +335,7 @@ static void splitModificationOpSubPlan(SPlanContext* pCxt, SQueryPlanNode* pPlan SVgDataBlocks* blocks = (SVgDataBlocks*)taosArrayGetP(pPayload->payload, i); vgroupInfoToEpSet(&blocks->vg, &subplan->execNode); - subplan->pDataSink = createDataInserter(pCxt, blocks); + subplan->pDataSink = createDataInserter(pCxt, blocks, NULL); subplan->pNode = NULL; subplan->type = QUERY_TYPE_MODIFY; subplan->msgType = pPayload->msgType; @@ -319,12 +349,12 @@ static void createSubplanByLevel(SPlanContext* pCxt, SQueryPlanNode* pRoot) { if (QNODE_MODIFY == pRoot->info.type) { splitModificationOpSubPlan(pCxt, pRoot); } else { - SSubplan* subplan = initSubplan(pCxt, QUERY_TYPE_MERGE); + SSubplan* subplan = initSubplan(pCxt, QUERY_TYPE_SCAN); ++(pCxt->nextId.templateId); subplan->msgType = TDMT_VND_QUERY; subplan->pNode = createPhyNode(pCxt, pRoot); - subplan->pDataSink = createDataDispatcher(pCxt, pRoot); + subplan->pDataSink = createDataDispatcher(pCxt, pRoot, subplan->pNode); } // todo deal subquery } @@ -335,7 +365,7 @@ int32_t createDag(SQueryPlanNode* pQueryNode, struct SCatalog* pCatalog, SQueryD .pCatalog = pCatalog, .pDag = validPointer(calloc(1, sizeof(SQueryDag))), .pCurrentSubplan = NULL, - .nextId = {0} // todo queryid + .nextId = {.queryId = requestId}, }; *pDag = context.pDag; @@ -351,6 +381,24 @@ int32_t createDag(SQueryPlanNode* pQueryNode, struct SCatalog* pCatalog, SQueryD return TSDB_CODE_SUCCESS; } -int32_t setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) { - //todo +void setExchangSourceNode(uint64_t templateId, SQueryNodeAddr* pEp, SPhyNode* pNode) { + if (NULL == pNode) { + return; + } + if (OP_Exchange == pNode->info.type) { + SExchangePhyNode* pExchange = (SExchangePhyNode*)pNode; + if (templateId == pExchange->srcTemplateId) { + taosArrayPush(pExchange->pSrcEndPoints, pEp); + } + } + if (pNode->pChildren != NULL) { + size_t size = taosArrayGetSize(pNode->pChildren); + for(int32_t i = 0; i < size; ++i) { + setExchangSourceNode(templateId, pEp, taosArrayGetP(pNode->pChildren, i)); + } + } +} + +void setSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* pEp) { + setExchangSourceNode(templateId, pEp, subplan->pNode); } diff --git a/source/libs/planner/src/physicalPlanJson.c b/source/libs/planner/src/physicalPlanJson.c index b25d9a3627..20da1842cf 100644 --- a/source/libs/planner/src/physicalPlanJson.c +++ b/source/libs/planner/src/physicalPlanJson.c @@ -230,9 +230,11 @@ static bool columnInfoToJson(const void* obj, cJSON* jCol) { if (res) { res = cJSON_AddNumberToObject(jCol, jkColumnInfoBytes, col->bytes); } - if (res) { - res = addRawArray(jCol, jkColumnInfoFilterList, columnFilterInfoToJson, col->flist.filterInfo, sizeof(SColumnFilterInfo), col->flist.numOfFilters); + + if (res) { // TODO: temporarily disable it +// res = addRawArray(jCol, jkColumnInfoFilterList, columnFilterInfoToJson, col->flist.filterInfo, sizeof(SColumnFilterInfo), col->flist.numOfFilters); } + return res; } @@ -396,7 +398,7 @@ static bool exprNodeFromJson(const cJSON* json, void* obj) { case TEXPR_FUNCTION_NODE: return fromObject(json, jkExprNodeFunction, functionFromJson, exprInfo, false); case TEXPR_COL_NODE: - return fromObject(json, jkExprNodeColumn, schemaFromJson, exprInfo->pSchema, false); + return fromObjectWithAlloc(json, jkExprNodeColumn, schemaFromJson, (void**)&exprInfo->pSchema, sizeof(SSchema), false); case TEXPR_VALUE_NODE: return fromObject(json, jkExprNodeValue, variantFromJson, exprInfo->pVal, false); default: @@ -794,7 +796,6 @@ static cJSON* subplanToJson(const SSubplan* subplan) { } // The 'type', 'level', 'execEpSet', 'pChildren' and 'pParents' fields do not need to be serialized. - bool res = addObject(jSubplan, jkSubplanId, subplanIdToJson, &subplan->id); if (res) { res = addObject(jSubplan, jkSubplanNode, phyNodeToJson, subplan->pNode); @@ -807,6 +808,7 @@ static cJSON* subplanToJson(const SSubplan* subplan) { cJSON_Delete(jSubplan); return NULL; } + return jSubplan; } diff --git a/source/libs/planner/src/planner.c b/source/libs/planner/src/planner.c index b61c7c390f..f80a631413 100644 --- a/source/libs/planner/src/planner.c +++ b/source/libs/planner/src/planner.c @@ -64,6 +64,13 @@ int32_t qCreateQueryDag(const struct SQueryNode* pNode, struct SQueryDag** pDag, return code; } + // + if (logicPlan->info.type != QNODE_MODIFY) { +// char* str = NULL; +// queryPlanToString(logicPlan, &str); +// printf("%s\n", str); + } + code = optimizeQueryPlan(logicPlan); if (TSDB_CODE_SUCCESS != code) { destroyQueryPlan(logicPlan); @@ -81,8 +88,8 @@ int32_t qCreateQueryDag(const struct SQueryNode* pNode, struct SQueryDag** pDag, return TSDB_CODE_SUCCESS; } -int32_t qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) { - return setSubplanExecutionNode(subplan, templateId, ep); +void qSetSubplanExecutionNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) { + setSubplanExecutionNode(subplan, templateId, ep); } int32_t qSubPlanToString(const SSubplan *subplan, char** str, int32_t* len) { diff --git a/source/libs/qcom/src/querymsg.c b/source/libs/qcom/src/querymsg.c index 507650159f..093e42a3d2 100644 --- a/source/libs/qcom/src/querymsg.c +++ b/source/libs/qcom/src/querymsg.c @@ -97,6 +97,7 @@ int32_t queryProcessUseDBRsp(void* output, char *msg, int32_t msgSize) { pRsp->vgVersion = ntohl(pRsp->vgVersion); pRsp->vgNum = ntohl(pRsp->vgNum); + pRsp->uid = be64toh(pRsp->uid); if (pRsp->vgNum < 0) { qError("invalid db[%s] vgroup number[%d]", pRsp->db, pRsp->vgNum); @@ -111,6 +112,7 @@ int32_t queryProcessUseDBRsp(void* output, char *msg, int32_t msgSize) { pOut->dbVgroup.vgVersion = pRsp->vgVersion; pOut->dbVgroup.hashMethod = pRsp->hashMethod; + pOut->dbVgroup.dbId = pRsp->uid; pOut->dbVgroup.vgInfo = taosHashInit(pRsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); if (NULL == pOut->dbVgroup.vgInfo) { qError("hash init[%d] failed", pRsp->vgNum); @@ -149,8 +151,8 @@ static int32_t queryConvertTableMetaMsg(STableMetaMsg* pMetaMsg) { pMetaMsg->numOfColumns = ntohl(pMetaMsg->numOfColumns); pMetaMsg->sversion = ntohl(pMetaMsg->sversion); pMetaMsg->tversion = ntohl(pMetaMsg->tversion); - pMetaMsg->tuid = htobe64(pMetaMsg->tuid); - pMetaMsg->suid = htobe64(pMetaMsg->suid); + pMetaMsg->tuid = be64toh(pMetaMsg->tuid); + pMetaMsg->suid = be64toh(pMetaMsg->suid); pMetaMsg->vgId = ntohl(pMetaMsg->vgId); if (pMetaMsg->numOfTags < 0 || pMetaMsg->numOfTags > TSDB_MAX_TAGS) { @@ -208,7 +210,7 @@ int32_t queryCreateTableMetaFromMsg(STableMetaMsg* msg, bool isSuperTable, STabl pTableMeta->vgId = isSuperTable ? 0 : msg->vgId; pTableMeta->tableType = isSuperTable ? TSDB_SUPER_TABLE : msg->tableType; - pTableMeta->uid = msg->tuid; + pTableMeta->uid = isSuperTable ? msg->suid : msg->tuid; pTableMeta->suid = msg->suid; pTableMeta->sversion = msg->sversion; pTableMeta->tversion = msg->tversion; @@ -244,7 +246,7 @@ int32_t queryProcessTableMetaRsp(void* output, char *msg, int32_t msgSize) { } if (pMetaMsg->tableType == TSDB_CHILD_TABLE) { - pOut->metaNum = 2; + SET_META_TYPE_BOTH_TABLE(pOut->metaType); if (pMetaMsg->dbFname[0]) { snprintf(pOut->ctbFname, sizeof(pOut->ctbFname), "%s.%s", pMetaMsg->dbFname, pMetaMsg->tbFname); @@ -261,7 +263,7 @@ int32_t queryProcessTableMetaRsp(void* output, char *msg, int32_t msgSize) { code = queryCreateTableMetaFromMsg(pMetaMsg, true, &pOut->tbMeta); } else { - pOut->metaNum = 1; + SET_META_TYPE_TABLE(pOut->metaType); if (pMetaMsg->dbFname[0]) { snprintf(pOut->tbFname, sizeof(pOut->tbFname), "%s.%s", pMetaMsg->dbFname, pMetaMsg->tbFname); diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index a7ec39bfde..ada9b247ce 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -37,10 +37,10 @@ enum { }; typedef struct SSchedulerMgmt { - uint64_t taskId; - uint64_t sId; + uint64_t taskId; // sequential taksId + uint64_t sId; // schedulerId SSchedulerCfg cfg; - SHashObj *jobs; // key: queryId, value: SQueryJob* + SHashObj *jobs; // key: queryId, value: SQueryJob* } SSchedulerMgmt; typedef struct SSchCallbackParam { @@ -83,52 +83,61 @@ typedef struct SSchJobAttr { typedef struct SSchJob { uint64_t queryId; - int32_t levelNum; - int32_t levelIdx; - int8_t status; SSchJobAttr attr; - SEpSet dataSrcEps; - SEpAddr resEp; + int32_t levelNum; void *transport; SArray *nodeList; // qnode/vnode list, element is SQueryNodeAddr - tsem_t rspSem; - int32_t userFetch; - int32_t remoteFetch; - SSchTask *fetchTask; - - int32_t errCode; - void *res; - int32_t resNumOfRows; + SArray *levels; // Element is SQueryLevel, starting from 0. SArray + SArray *subPlans; // subplan pointer copied from DAG, no need to free it in scheduler + int32_t levelIdx; + SEpSet dataSrcEps; SHashObj *execTasks; // executing tasks, key:taskid, value:SQueryTask* SHashObj *succTasks; // succeed tasks, key:taskid, value:SQueryTask* SHashObj *failTasks; // failed tasks, key:taskid, value:SQueryTask* - SArray *levels; // Element is SQueryLevel, starting from 0. SArray - SArray *subPlans; // Element is SArray*, and nested element is SSubplan. The execution level of subplan, starting from 0. SArray - + int8_t status; + SQueryNodeAddr resNode; + tsem_t rspSem; + int32_t userFetch; + int32_t remoteFetch; + SSchTask *fetchTask; + int32_t errCode; + void *res; + int32_t resNumOfRows; SQueryProfileSummary summary; } SSchJob; #define SCH_HAS_QNODE_IN_CLUSTER(type) (false) //TODO CLUSTER TYPE -#define SCH_TASK_READY_TO_LUNCH(task) ((task)->childReady >= taosArrayGetSize((task)->children)) // MAY NEED TO ENHANCE +#define SCH_TASK_READY_TO_LUNCH(task) (atomic_load_32(&(task)->childReady) >= taosArrayGetSize((task)->children)) #define SCH_IS_DATA_SRC_TASK(task) ((task)->plan->type == QUERY_TYPE_SCAN) #define SCH_TASK_NEED_WAIT_ALL(task) ((task)->plan->type == QUERY_TYPE_MODIFY) -#define SCH_JOB_ERR_LOG(param, ...) qError("QID:%"PRIx64 param, job->queryId, __VA_ARGS__) -#define SCH_TASK_ERR_LOG(param, ...) qError("QID:%"PRIx64",TID:%"PRIx64 param, job->queryId, task->taskId, __VA_ARGS__) +#define SCH_SET_TASK_STATUS(task, st) atomic_store_8(&(task)->status, st) +#define SCH_GET_TASK_STATUS(task) atomic_load_8(&(task)->status) + +#define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st) +#define SCH_GET_JOB_STATUS(job) atomic_load_8(&(job)->status) + +#define SCH_SET_JOB_TYPE(pAttr, type) (pAttr)->queryJob = ((type) != QUERY_TYPE_MODIFY) +#define SCH_JOB_NEED_FETCH(pAttr) ((pAttr)->queryJob) + +#define SCH_JOB_ELOG(param, ...) qError("QID:%"PRIx64" " param, pJob->queryId, __VA_ARGS__) +#define SCH_JOB_DLOG(param, ...) qDebug("QID:%"PRIx64" " param, pJob->queryId, __VA_ARGS__) + +#define SCH_TASK_ELOG(param, ...) qError("QID:%"PRIx64",TID:%"PRIx64" " param, pJob->queryId, pTask->taskId, __VA_ARGS__) +#define SCH_TASK_DLOG(param, ...) qDebug("QID:%"PRIx64",TID:%"PRIx64" " param, pJob->queryId, pTask->taskId, __VA_ARGS__) #define SCH_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0) #define SCH_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0) -#define SCH_ERR_LRET(c,...) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { qError(__VA_ARGS__); terrno = _code; return _code; } } while (0) #define SCH_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0) #define SCH_LOCK(type, _lock) (SCH_READ == (type) ? taosRLockLatch(_lock) : taosWLockLatch(_lock)) #define SCH_UNLOCK(type, _lock) (SCH_READ == (type) ? taosRUnLockLatch(_lock) : taosWUnLockLatch(_lock)) -extern int32_t schLaunchTask(SSchJob *job, SSchTask *task); -extern int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType); +static int32_t schLaunchTask(SSchJob *job, SSchTask *task); +static int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType); #ifdef __cplusplus } diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index f5a49e782c..3ac08b5c42 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -20,204 +20,265 @@ static SSchedulerMgmt schMgmt = {0}; -int32_t schBuildTaskRalation(SSchJob *job, SHashObj *planToTask) { - for (int32_t i = 0; i < job->levelNum; ++i) { - SSchLevel *level = taosArrayGet(job->levels, i); +int32_t schValidateStatus(SSchJob *pJob, int8_t oriStatus, int8_t newStatus) { + int32_t code = 0; + +/* + if (oriStatus == newStatus) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + switch (oriStatus) { + case JOB_TASK_STATUS_NULL: + if (newStatus != JOB_TASK_STATUS_EXECUTING + && newStatus != JOB_TASK_STATUS_FAILED + && newStatus != JOB_TASK_STATUS_NOT_START) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_NOT_START: + if (newStatus != JOB_TASK_STATUS_CANCELLED) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_EXECUTING: + if (newStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED + && newStatus != JOB_TASK_STATUS_FAILED + && newStatus != JOB_TASK_STATUS_CANCELLING + && newStatus != JOB_TASK_STATUS_CANCELLED + && newStatus != JOB_TASK_STATUS_DROPPING) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_PARTIAL_SUCCEED: + if (newStatus != JOB_TASK_STATUS_EXECUTING + && newStatus != JOB_TASK_STATUS_SUCCEED + && newStatus != JOB_TASK_STATUS_CANCELLED) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_SUCCEED: + case JOB_TASK_STATUS_FAILED: + case JOB_TASK_STATUS_CANCELLING: + if (newStatus != JOB_TASK_STATUS_CANCELLED) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_CANCELLED: + case JOB_TASK_STATUS_DROPPING: + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + break; + + default: + qError("invalid task status:%d", oriStatus); + return TSDB_CODE_QRY_APP_ERROR; + } +*/ + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_JOB_ELOG("invalid job status update, from %d to %d", oriStatus, newStatus); + SCH_ERR_RET(code); +} + + +int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) { + for (int32_t i = 0; i < pJob->levelNum; ++i) { + SSchLevel *pLevel = taosArrayGet(pJob->levels, i); - for (int32_t m = 0; m < level->taskNum; ++m) { - SSchTask *task = taosArrayGet(level->subTasks, m); - SSubplan *plan = task->plan; - int32_t childNum = plan->pChildren ? (int32_t)taosArrayGetSize(plan->pChildren) : 0; - int32_t parentNum = plan->pParents ? (int32_t)taosArrayGetSize(plan->pParents) : 0; + for (int32_t m = 0; m < pLevel->taskNum; ++m) { + SSchTask *pTask = taosArrayGet(pLevel->subTasks, m); + SSubplan *pPlan = pTask->plan; + int32_t childNum = pPlan->pChildren ? (int32_t)taosArrayGetSize(pPlan->pChildren) : 0; + int32_t parentNum = pPlan->pParents ? (int32_t)taosArrayGetSize(pPlan->pParents) : 0; if (childNum > 0) { - task->children = taosArrayInit(childNum, POINTER_BYTES); - if (NULL == task->children) { - qError("taosArrayInit %d failed", childNum); + if (pJob->levelIdx == pLevel->level) { + SCH_JOB_ELOG("invalid query plan, lowest level, childNum:%d", childNum); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + pTask->children = taosArrayInit(childNum, POINTER_BYTES); + if (NULL == pTask->children) { + SCH_TASK_ELOG("taosArrayInit %d children failed", childNum); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } } for (int32_t n = 0; n < childNum; ++n) { - SSubplan **child = taosArrayGet(plan->pChildren, n); + SSubplan **child = taosArrayGet(pPlan->pChildren, n); SSchTask **childTask = taosHashGet(planToTask, child, POINTER_BYTES); if (NULL == childTask || NULL == *childTask) { - qError("subplan relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); + SCH_TASK_ELOG("subplan children relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); } - if (NULL == taosArrayPush(task->children, childTask)) { - qError("taosArrayPush failed"); + if (NULL == taosArrayPush(pTask->children, childTask)) { + SCH_TASK_ELOG("taosArrayPush childTask failed, level:%d, taskIdx:%d, childIdx:%d", i, m, n); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } } if (parentNum > 0) { - task->parents = taosArrayInit(parentNum, POINTER_BYTES); - if (NULL == task->parents) { - qError("taosArrayInit %d failed", parentNum); + if (0 == pLevel->level) { + SCH_TASK_ELOG("invalid task info, level:0, parentNum:%d", parentNum); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + pTask->parents = taosArrayInit(parentNum, POINTER_BYTES); + if (NULL == pTask->parents) { + SCH_TASK_ELOG("taosArrayInit %d parents failed", parentNum); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } } for (int32_t n = 0; n < parentNum; ++n) { - SSubplan **parent = taosArrayGet(plan->pParents, n); + SSubplan **parent = taosArrayGet(pPlan->pParents, n); SSchTask **parentTask = taosHashGet(planToTask, parent, POINTER_BYTES); if (NULL == parentTask || NULL == *parentTask) { - qError("subplan relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); + SCH_TASK_ELOG("subplan parent relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); } - if (NULL == taosArrayPush(task->parents, parentTask)) { - qError("taosArrayPush failed"); + if (NULL == taosArrayPush(pTask->parents, parentTask)) { + SCH_TASK_ELOG("taosArrayPush parentTask failed, level:%d, taskIdx:%d, childIdx:%d", i, m, n); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - } + } + + SCH_TASK_DLOG("level:%d, parentNum:%d, childNum:%d", i, parentNum, childNum); } } - SSchLevel *level = taosArrayGet(job->levels, 0); - if (job->attr.queryJob && level->taskNum > 1) { - qError("invalid plan info, level 0, taskNum:%d", level->taskNum); + SSchLevel *pLevel = taosArrayGet(pJob->levels, 0); + if (pJob->attr.queryJob && pLevel->taskNum > 1) { + SCH_JOB_ELOG("invalid query plan, level:0, taskNum:%d", pLevel->taskNum); SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); } - SSchTask *task = taosArrayGet(level->subTasks, 0); - if (task->parents && taosArrayGetSize(task->parents) > 0) { - qError("invalid plan info, level 0, parentNum:%d", (int32_t)taosArrayGetSize(task->parents)); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - return TSDB_CODE_SUCCESS; } -static SSchTask initTask(SSchJob* pJob, SSubplan* plan, SSchLevel *pLevel) { - SSchTask task = {0}; - if (plan->type == QUERY_TYPE_MODIFY) { - pJob->attr.needFetch = false; - } else { - pJob->attr.queryJob = true; - } +int32_t schInitTask(SSchJob* pJob, SSchTask *pTask, SSubplan* pPlan, SSchLevel *pLevel) { + pTask->plan = pPlan; + pTask->level = pLevel; + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); + pTask->taskId = atomic_add_fetch_64(&schMgmt.taskId, 1); - task.plan = plan; - task.level = pLevel; - task.status = JOB_TASK_STATUS_NOT_START; - task.taskId = atomic_add_fetch_64(&schMgmt.taskId, 1); - - return task; + return TSDB_CODE_SUCCESS; } -static void cleanupTask(SSchTask* pTask) { +void schFreeTask(SSchTask* pTask) { taosArrayDestroy(pTask->candidateAddrs); } -int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *pJob) { +int32_t schValidateAndBuildJob(SQueryDag *pDag, SSchJob *pJob) { int32_t code = 0; - pJob->queryId = dag->queryId; + pJob->queryId = pDag->queryId; - if (dag->numOfSubplans <= 0) { - qError("invalid subplan num:%d", dag->numOfSubplans); + if (pDag->numOfSubplans <= 0) { + SCH_JOB_ELOG("invalid subplan num:%d", pDag->numOfSubplans); SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } - int32_t levelNum = (int32_t)taosArrayGetSize(dag->pSubplans); + int32_t levelNum = (int32_t)taosArrayGetSize(pDag->pSubplans); if (levelNum <= 0) { - qError("invalid level num:%d", levelNum); + SCH_JOB_ELOG("invalid level num:%d", levelNum); SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } SHashObj *planToTask = taosHashInit(SCHEDULE_DEFAULT_TASK_NUMBER, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); if (NULL == planToTask) { - qError("taosHashInit %d failed", SCHEDULE_DEFAULT_TASK_NUMBER); + SCH_JOB_ELOG("taosHashInit %d failed", SCHEDULE_DEFAULT_TASK_NUMBER); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } pJob->levels = taosArrayInit(levelNum, sizeof(SSchLevel)); if (NULL == pJob->levels) { - qError("taosArrayInit %d failed", levelNum); + SCH_JOB_ELOG("taosArrayInit %d failed", levelNum); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - //?? - pJob->attr.needFetch = true; - pJob->levelNum = levelNum; pJob->levelIdx = levelNum - 1; - pJob->subPlans = dag->pSubplans; + pJob->subPlans = pDag->pSubplans; SSchLevel level = {0}; - SArray *levelPlans = NULL; - int32_t levelPlanNum = 0; + SArray *plans = NULL; + int32_t taskNum = 0; SSchLevel *pLevel = NULL; level.status = JOB_TASK_STATUS_NOT_START; for (int32_t i = 0; i < levelNum; ++i) { if (NULL == taosArrayPush(pJob->levels, &level)) { - qError("taosArrayPush failed"); + SCH_JOB_ELOG("taosArrayPush level failed, level:%d", i); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } pLevel = taosArrayGet(pJob->levels, i); pLevel->level = i; - levelPlans = taosArrayGetP(dag->pSubplans, i); - if (NULL == levelPlans) { - qError("no level plans for level %d", i); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - levelPlanNum = (int32_t)taosArrayGetSize(levelPlans); - if (levelPlanNum <= 0) { - qError("invalid level plans number:%d, level:%d", levelPlanNum, i); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - pLevel->taskNum = levelPlanNum; - pLevel->subTasks = taosArrayInit(levelPlanNum, sizeof(SSchTask)); + plans = taosArrayGetP(pDag->pSubplans, i); + if (NULL == plans) { + SCH_JOB_ELOG("empty level plan, level:%d", i); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + taskNum = (int32_t)taosArrayGetSize(plans); + if (taskNum <= 0) { + SCH_JOB_ELOG("invalid level plan number:%d, level:%d", taskNum, i); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + pLevel->taskNum = taskNum; + + pLevel->subTasks = taosArrayInit(taskNum, sizeof(SSchTask)); if (NULL == pLevel->subTasks) { - qError("taosArrayInit %d failed", levelPlanNum); + SCH_JOB_ELOG("taosArrayInit %d failed", taskNum); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - for (int32_t n = 0; n < levelPlanNum; ++n) { - SSubplan *plan = taosArrayGetP(levelPlans, n); - if (plan->type == QUERY_TYPE_MODIFY) { - pJob->attr.needFetch = false; - } else { - pJob->attr.queryJob = true; - } + for (int32_t n = 0; n < taskNum; ++n) { + SSubplan *plan = taosArrayGetP(plans, n); - SSchTask task = initTask(pJob, plan, pLevel); + SCH_SET_JOB_TYPE(&pJob->attr, plan->type); + + SSchTask task = {0}; + SSchTask *pTask = &task; + + schInitTask(pJob, &task, plan, pLevel); + void *p = taosArrayPush(pLevel->subTasks, &task); if (NULL == p) { - qError("taosArrayPush failed"); + SCH_TASK_ELOG("taosArrayPush task to level failed, level:%d, taskIdx:%d", pLevel->level, n); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } if (0 != taosHashPut(planToTask, &plan, POINTER_BYTES, &p, POINTER_BYTES)) { - qError("taosHashPut failed"); + SCH_TASK_ELOG("taosHashPut to planToTaks failed, taskIdx:%d", n); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + + SCH_TASK_DLOG("task initialized, level:%d", pLevel->level); } + + SCH_JOB_DLOG("level initialized, taskNum:%d", taskNum); } SCH_ERR_JRET(schBuildTaskRalation(pJob, planToTask)); - if (planToTask) { - taosHashCleanup(planToTask); - } - - return TSDB_CODE_SUCCESS; - _return: - if (pLevel->subTasks) { - taosArrayDestroy(pLevel->subTasks); - } if (planToTask) { taosHashCleanup(planToTask); @@ -226,39 +287,47 @@ _return: SCH_RET(code); } -int32_t schSetTaskCandidateAddrs(SSchJob *job, SSchTask *task) { - if (task->candidateAddrs) { +int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) { + if (NULL != pTask->candidateAddrs) { return TSDB_CODE_SUCCESS; } - task->candidateIdx = 0; - task->candidateAddrs = taosArrayInit(SCH_MAX_CONDIDATE_EP_NUM, sizeof(SQueryNodeAddr)); - if (NULL == task->candidateAddrs) { - qError("taosArrayInit failed"); + pTask->candidateIdx = 0; + pTask->candidateAddrs = taosArrayInit(SCH_MAX_CONDIDATE_EP_NUM, sizeof(SQueryNodeAddr)); + if (NULL == pTask->candidateAddrs) { + SCH_TASK_ELOG("taosArrayInit %d condidate addrs failed", SCH_MAX_CONDIDATE_EP_NUM); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - if (task->plan->execNode.numOfEps > 0) { - if (NULL == taosArrayPush(task->candidateAddrs, &task->plan->execNode)) { - qError("taosArrayPush failed"); + if (pTask->plan->execNode.numOfEps > 0) { + if (NULL == taosArrayPush(pTask->candidateAddrs, &pTask->plan->execNode)) { + SCH_TASK_ELOG("taosArrayPush execNode to candidate addrs failed, errno:%d", errno); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + SCH_TASK_DLOG("use execNode from plan as candidate addr, numOfEps:%d", pTask->plan->execNode.numOfEps); + return TSDB_CODE_SUCCESS; } int32_t addNum = 0; - int32_t nodeNum = taosArrayGetSize(job->nodeList); - - for (int32_t i = 0; i < nodeNum && addNum < SCH_MAX_CONDIDATE_EP_NUM; ++i) { - SQueryNodeAddr *naddr = taosArrayGet(job->nodeList, i); + int32_t nodeNum = 0; + if (pJob->nodeList) { + nodeNum = taosArrayGetSize(pJob->nodeList); - if (NULL == taosArrayPush(task->candidateAddrs, &task->plan->execNode)) { - qError("taosArrayPush failed"); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + for (int32_t i = 0; i < nodeNum && addNum < SCH_MAX_CONDIDATE_EP_NUM; ++i) { + SQueryNodeAddr *naddr = taosArrayGet(pJob->nodeList, i); + + if (NULL == taosArrayPush(pTask->candidateAddrs, &pTask->plan->execNode)) { + SCH_TASK_ELOG("taosArrayPush execNode to candidate addrs failed, addNum:%d, errno:%d", addNum, errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } } - - ++addNum; + } + + if (addNum <= 0) { + SCH_TASK_ELOG("no available execNode as condidate addr, nodeNum:%d", nodeNum); + return TSDB_CODE_QRY_INVALID_INPUT; } /* @@ -274,13 +343,19 @@ int32_t schSetTaskCandidateAddrs(SSchJob *job, SSchTask *task) { } int32_t schPushTaskToExecList(SSchJob *pJob, SSchTask *pTask) { - if (0 != taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES)) { - qError("failed to add new task, taskId:0x%"PRIx64", reqId:0x"PRIx64", out of memory", pJob->queryId); + int32_t code = taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); + if (0 != code) { + if (HASH_NODE_EXIST(code)) { + SCH_TASK_ELOG("task already in exec list, code:%x", code); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + SCH_TASK_ELOG("taosHashPut task to exec list failed, errno:%d", errno); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - qDebug("add one task, taskId:0x%"PRIx64", numOfTasks:%d, reqId:0x%"PRIx64, pTask->taskId, taosHashGetSize(pJob->execTasks), - pJob->queryId); + SCH_TASK_DLOG("task added to exec list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); + return TSDB_CODE_SUCCESS; } @@ -348,7 +423,7 @@ int32_t schProcessOnJobPartialSuccess(SSchJob *job) { job->status = JOB_TASK_STATUS_PARTIAL_SUCCEED; bool needFetch = job->userFetch; - if ((!job->attr.needFetch) && job->attr.syncSchedule) { + if ((!SCH_JOB_NEED_FETCH(&job->attr)) && job->attr.syncSchedule) { tsem_post(&job->rspSem); } @@ -359,14 +434,21 @@ int32_t schProcessOnJobPartialSuccess(SSchJob *job) { return TSDB_CODE_SUCCESS; } -int32_t schProcessOnJobFailure(SSchJob *job, int32_t errCode) { - job->status = JOB_TASK_STATUS_FAILED; - job->errCode = errCode; +int32_t schProcessOnJobFailure(SSchJob *pJob, int32_t errCode) { + int8_t status = SCH_GET_JOB_STATUS(pJob); - atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0); + if (schValidateStatus(pJob, status, JOB_TASK_STATUS_FAILED)) { + SCH_ERR_RET(atomic_load_32(&pJob->errCode)); + } + + SCH_SET_JOB_STATUS(pJob, JOB_TASK_STATUS_FAILED); + + atomic_store_32(&pJob->errCode, errCode); - if (job->userFetch || ((!job->attr.needFetch) && job->attr.syncSchedule)) { - tsem_post(&job->rspSem); + atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); + + if (pJob->userFetch || ((!SCH_JOB_NEED_FETCH(&pJob->attr)) && pJob->attr.syncSchedule)) { + tsem_post(&pJob->rspSem); } return TSDB_CODE_SUCCESS; @@ -379,50 +461,52 @@ int32_t schProcessOnDataFetched(SSchJob *job) { } -int32_t schProcessOnTaskSuccess(SSchJob *job, SSchTask *task) { +int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) { bool moved = false; + int32_t code = 0; - SCH_ERR_RET(schMoveTaskToSuccList(job, task, &moved)); + SCH_ERR_RET(schMoveTaskToSuccList(pJob, pTask, &moved)); if (!moved) { - SCH_TASK_ERR_LOG(" task may already moved, status:%d", task->status); + SCH_TASK_ELOG(" task may already moved, status:%d", pTask->status); return TSDB_CODE_SUCCESS; } - task->status = JOB_TASK_STATUS_SUCCEED; + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_SUCCEED); - int32_t parentNum = task->parents ? (int32_t)taosArrayGetSize(task->parents) : 0; + int32_t parentNum = pTask->parents ? (int32_t)taosArrayGetSize(pTask->parents) : 0; if (parentNum == 0) { - if (task->plan->level != 0) { - qError("level error"); + if (pTask->level->level != 0) { + SCH_TASK_ELOG("no parent task level error, level:%d", pTask->level->level); SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } int32_t taskDone = 0; - if (SCH_TASK_NEED_WAIT_ALL(task)) { - SCH_LOCK(SCH_WRITE, &task->level->lock); - task->level->taskSucceed++; - taskDone = task->level->taskSucceed + task->level->taskFailed; - SCH_UNLOCK(SCH_WRITE, &task->level->lock); + if (SCH_TASK_NEED_WAIT_ALL(pTask)) { + SCH_LOCK(SCH_WRITE, &pTask->level->lock); + pTask->level->taskSucceed++; + taskDone = pTask->level->taskSucceed + pTask->level->taskFailed; + SCH_UNLOCK(SCH_WRITE, &pTask->level->lock); - if (taskDone < task->level->taskNum) { - qDebug("wait all tasks, done:%d, all:%d", taskDone, task->level->taskNum); + if (taskDone < pTask->level->taskNum) { + SCH_TASK_ELOG("wait all tasks, done:%d, all:%d", taskDone, pTask->level->taskNum); return TSDB_CODE_SUCCESS; + } else if (taskDone > pTask->level->taskNum) { + assert(0); } - if (task->level->taskFailed > 0) { - job->status = JOB_TASK_STATUS_FAILED; - SCH_ERR_RET(schProcessOnJobFailure(job, TSDB_CODE_QRY_APP_ERROR)); + if (pTask->level->taskFailed > 0) { + pJob->status = JOB_TASK_STATUS_FAILED; + SCH_ERR_RET(schProcessOnJobFailure(pJob, TSDB_CODE_QRY_APP_ERROR)); return TSDB_CODE_SUCCESS; } } else { - strncpy(job->resEp.fqdn, task->execAddr.epAddr[task->execAddr.inUse].fqdn, sizeof(job->resEp.fqdn)); - job->resEp.port = task->execAddr.epAddr[task->execAddr.inUse].port; + pJob->resNode = pTask->execAddr; } - job->fetchTask = task; - SCH_ERR_RET(schProcessOnJobPartialSuccess(job)); + pJob->fetchTask = pTask; + SCH_ERR_RET(schProcessOnJobPartialSuccess(pJob)); return TSDB_CODE_SUCCESS; } @@ -437,53 +521,56 @@ int32_t schProcessOnTaskSuccess(SSchJob *job, SSchTask *task) { */ for (int32_t i = 0; i < parentNum; ++i) { - SSchTask *par = *(SSchTask **)taosArrayGet(task->parents, i); + SSchTask *par = *(SSchTask **)taosArrayGet(pTask->parents, i); - ++par->childReady; + atomic_add_fetch_32(&par->childReady, 1); - SCH_ERR_RET(qSetSubplanExecutionNode(par->plan, task->plan->id.templateId, &task->execAddr)); + qSetSubplanExecutionNode(par->plan, pTask->plan->id.templateId, &pTask->execAddr); if (SCH_TASK_READY_TO_LUNCH(par)) { - SCH_ERR_RET(schLaunchTask(job, par)); + SCH_ERR_RET(schLaunchTask(pJob, par)); } } return TSDB_CODE_SUCCESS; } -int32_t schProcessOnTaskFailure(SSchJob *job, SSchTask *task, int32_t errCode) { +int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode) { bool needRetry = false; bool moved = false; int32_t taskDone = 0; - SCH_ERR_RET(schTaskCheckAndSetRetry(job, task, errCode, &needRetry)); + SCH_ERR_RET(schTaskCheckAndSetRetry(pJob, pTask, errCode, &needRetry)); if (!needRetry) { - SCH_TASK_ERR_LOG("task failed[%x], no more retry", errCode); + SCH_TASK_ELOG("task failed and no more retry, code:%x", errCode); - SCH_ERR_RET(schMoveTaskToFailList(job, task, &moved)); - if (!moved) { - SCH_TASK_ERR_LOG("task may already moved, status:%d", task->status); - } + if (SCH_GET_TASK_STATUS(pTask) == JOB_TASK_STATUS_EXECUTING) { + SCH_ERR_RET(schMoveTaskToFailList(pJob, pTask, &moved)); + if (!moved) { + SCH_TASK_ELOG("task may already moved, status:%d", pTask->status); + } + } + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_FAILED); - if (SCH_TASK_NEED_WAIT_ALL(task)) { - SCH_LOCK(SCH_WRITE, &task->level->lock); - task->level->taskFailed++; - taskDone = task->level->taskSucceed + task->level->taskFailed; - SCH_UNLOCK(SCH_WRITE, &task->level->lock); + if (SCH_TASK_NEED_WAIT_ALL(pTask)) { + SCH_LOCK(SCH_WRITE, &pTask->level->lock); + pTask->level->taskFailed++; + taskDone = pTask->level->taskSucceed + pTask->level->taskFailed; + SCH_UNLOCK(SCH_WRITE, &pTask->level->lock); - if (taskDone < task->level->taskNum) { - qDebug("wait all tasks, done:%d, all:%d", taskDone, task->level->taskNum); + if (taskDone < pTask->level->taskNum) { + qDebug("wait all tasks, done:%d, all:%d", taskDone, pTask->level->taskNum); return TSDB_CODE_SUCCESS; } } - - job->status = JOB_TASK_STATUS_FAILED; - SCH_ERR_RET(schProcessOnJobFailure(job, errCode)); + + SCH_ERR_RET(schProcessOnJobFailure(pJob, errCode)); - return TSDB_CODE_SUCCESS; + return errCode; } - SCH_ERR_RET(schLaunchTask(job, task)); + SCH_ERR_RET(schLaunchTask(pJob, pTask)); return TSDB_CODE_SUCCESS; } @@ -505,7 +592,7 @@ int32_t schProcessRspMsg(SSchJob *job, SSchTask *task, int32_t msgType, char *ms break; } case TDMT_VND_SUBMIT_RSP: { - if (rspCode != TSDB_CODE_SUCCESS) { + if (rspCode != TSDB_CODE_SUCCESS || NULL == msg) { SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rspCode)); } else { SShellSubmitRspMsg *rsp = (SShellSubmitRspMsg *)msg; @@ -521,7 +608,7 @@ int32_t schProcessRspMsg(SSchJob *job, SSchTask *task, int32_t msgType, char *ms case TDMT_VND_QUERY_RSP: { SQueryTableRsp *rsp = (SQueryTableRsp *)msg; - if (rsp->code != TSDB_CODE_SUCCESS) { + if (rsp->code != TSDB_CODE_SUCCESS || NULL == msg) { SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rsp->code)); } else { code = schBuildAndSendMsg(job, task, TDMT_VND_RES_READY); @@ -534,7 +621,7 @@ int32_t schProcessRspMsg(SSchJob *job, SSchTask *task, int32_t msgType, char *ms case TDMT_VND_RES_READY_RSP: { SResReadyRsp *rsp = (SResReadyRsp *)msg; - if (rsp->code != TSDB_CODE_SUCCESS) { + if (rsp->code != TSDB_CODE_SUCCESS || NULL == msg) { SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rsp->code)); } else { code = schProcessOnTaskSuccess(job, task); @@ -549,7 +636,9 @@ int32_t schProcessRspMsg(SSchJob *job, SSchTask *task, int32_t msgType, char *ms SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)msg; job->res = rsp; - job->resNumOfRows = rsp->numOfRows; + if (rsp) { + job->resNumOfRows = rsp->numOfRows; + } SCH_ERR_JRET(schProcessOnDataFetched(job)); break; @@ -658,13 +747,13 @@ int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t t int32_t code = 0; SMsgSendInfo* pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); if (NULL == pMsgSendInfo) { - qError("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + qError("QID:%"PRIx64 ",TID:%"PRIx64 " calloc %d failed", qId, tId, (int32_t)sizeof(SMsgSendInfo)); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } SSchCallbackParam *param = calloc(1, sizeof(SSchCallbackParam)); if (NULL == param) { - qError("calloc %d failed", (int32_t)sizeof(SSchCallbackParam)); + qError("QID:%"PRIx64 ",TID:%"PRIx64 " calloc %d failed", qId, tId, (int32_t)sizeof(SSchCallbackParam)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -682,11 +771,13 @@ int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t t pMsgSendInfo->fp = fp; int64_t transporterId = 0; + SCH_ERR_JRET(asyncSendMsgToServer(transport, epSet, &transporterId, pMsgSendInfo)); return TSDB_CODE_SUCCESS; _return: + tfree(param); tfree(pMsgSendInfo); @@ -704,109 +795,101 @@ void schConvertAddrToEpSet(SQueryNodeAddr *addr, SEpSet *epSet) { } -int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) { +int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType) { uint32_t msgSize = 0; void *msg = NULL; int32_t code = 0; + SEpSet epSet; + + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + + schConvertAddrToEpSet(addr, &epSet); switch (msgType) { case TDMT_VND_CREATE_TABLE: case TDMT_VND_SUBMIT: { - if (NULL == task->msg || task->msgLen <= 0) { - qError("submit msg is NULL"); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - msgSize = task->msgLen; - msg = task->msg; + msgSize = pTask->msgLen; + msg = pTask->msg; break; } case TDMT_VND_QUERY: { - if (NULL == task->msg) { - qError("query msg is NULL"); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - msgSize = sizeof(SSubQueryMsg) + task->msgLen; + msgSize = sizeof(SSubQueryMsg) + pTask->msgLen; msg = calloc(1, msgSize); if (NULL == msg) { - qError("calloc %d failed", msgSize); + SCH_TASK_ELOG("calloc %d failed", msgSize); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } SSubQueryMsg *pMsg = msg; - pMsg->header.vgId = htonl(task->plan->execNode.nodeId); + pMsg->header.vgId = htonl(addr->nodeId); + pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(job->queryId); - pMsg->taskId = htobe64(task->taskId); - pMsg->contentLen = htonl(task->msgLen); - memcpy(pMsg->msg, task->msg, task->msgLen); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); + pMsg->contentLen = htonl(pTask->msgLen); + memcpy(pMsg->msg, pTask->msg, pTask->msgLen); break; } case TDMT_VND_RES_READY: { msgSize = sizeof(SResReadyMsg); msg = calloc(1, msgSize); if (NULL == msg) { - qError("calloc %d failed", msgSize); + SCH_TASK_ELOG("calloc %d failed", msgSize); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } SResReadyMsg *pMsg = msg; - pMsg->header.vgId = htonl(task->plan->execNode.nodeId); + pMsg->header.vgId = htonl(addr->nodeId); + pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(job->queryId); - pMsg->taskId = htobe64(task->taskId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); break; } case TDMT_VND_FETCH: { - if (NULL == task) { - SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); - } msgSize = sizeof(SResFetchMsg); msg = calloc(1, msgSize); if (NULL == msg) { - qError("calloc %d failed", msgSize); + SCH_TASK_ELOG("calloc %d failed", msgSize); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } SResFetchMsg *pMsg = msg; - pMsg->header.vgId = htonl(task->plan->execNode.nodeId); + pMsg->header.vgId = htonl(addr->nodeId); + pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(job->queryId); - pMsg->taskId = htobe64(task->taskId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); break; } case TDMT_VND_DROP_TASK:{ msgSize = sizeof(STaskDropMsg); msg = calloc(1, msgSize); if (NULL == msg) { - qError("calloc %d failed", msgSize); + SCH_TASK_ELOG("calloc %d failed", msgSize); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } STaskDropMsg *pMsg = msg; - pMsg->header.vgId = htonl(task->plan->execNode.nodeId); + pMsg->header.vgId = htonl(addr->nodeId); + pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(job->queryId); - pMsg->taskId = htobe64(task->taskId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); break; } default: - qError("unknown msg type:%d", msgType); + SCH_TASK_ELOG("unknown msg type:%d", msgType); SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); break; } - SEpSet epSet; - SQueryNodeAddr *addr = taosArrayGet(task->candidateAddrs, task->candidateIdx); - - schConvertAddrToEpSet(addr, &epSet); - SCH_ERR_JRET(schAsyncSendMsg(job->transport, &epSet, job->queryId, task->taskId, msgType, msg, msgSize)); + SCH_ERR_JRET(schAsyncSendMsg(pJob->transport, &epSet, pJob->queryId, pTask->taskId, msgType, msg, msgSize)); return TSDB_CODE_SUCCESS; @@ -816,33 +899,64 @@ _return: SCH_RET(code); } - -int32_t schLaunchTask(SSchJob *job, SSchTask *task) { - SSubplan *plan = task->plan; - SCH_ERR_RET(qSubPlanToString(plan, &task->msg, &task->msgLen)); - SCH_ERR_RET(schSetTaskCandidateAddrs(job, task)); - - if (NULL == task->candidateAddrs || taosArrayGetSize(task->candidateAddrs) <= 0) { - SCH_TASK_ERR_LOG("no valid candidate node for task:%"PRIx64, task->taskId); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); +static FORCE_INLINE bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus) { + int8_t status = SCH_GET_JOB_STATUS(pJob); + if (pStatus) { + *pStatus = status; } - // NOTE: race condition: the task should be put into the hash table before send msg to server - SCH_ERR_RET(schPushTaskToExecList(job, task)); - SCH_ERR_RET(schBuildAndSendMsg(job, task, plan->msgType)); - - task->status = JOB_TASK_STATUS_EXECUTING; - return TSDB_CODE_SUCCESS; + return (status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED + || status == JOB_TASK_STATUS_CANCELLING || status == JOB_TASK_STATUS_DROPPING); } -int32_t schLaunchJob(SSchJob *job) { - SSchLevel *level = taosArrayGet(job->levels, job->levelIdx); - for (int32_t i = 0; i < level->taskNum; ++i) { - SSchTask *task = taosArrayGet(level->subTasks, i); - SCH_ERR_RET(schLaunchTask(job, task)); +int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) { + int8_t status = 0; + int32_t code = 0; + + if (schJobNeedToStop(pJob, &status)) { + SCH_TASK_ELOG("no need to launch task cause of job status, job status:%d", status); + SCH_ERR_RET(atomic_load_32(&pJob->errCode)); + } + + SSubplan *plan = pTask->plan; + + if (NULL == pTask->msg) { + code = qSubPlanToString(plan, &pTask->msg, &pTask->msgLen); + if (TSDB_CODE_SUCCESS != code || NULL == pTask->msg || pTask->msgLen <= 0) { + SCH_TASK_ELOG("subplanToString error, code:%x, msg:%p, len:%d", code, pTask->msg, pTask->msgLen); + SCH_ERR_JRET(code); + } + } + + SCH_ERR_JRET(schSetTaskCandidateAddrs(pJob, pTask)); + + // NOTE: race condition: the task should be put into the hash table before send msg to server + if (SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_EXECUTING) { + SCH_ERR_JRET(schPushTaskToExecList(pJob, pTask)); + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_EXECUTING); } - job->status = JOB_TASK_STATUS_EXECUTING; + SCH_ERR_JRET(schBuildAndSendMsg(pJob, pTask, plan->msgType)); + + return TSDB_CODE_SUCCESS; + +_return: + + code = schProcessOnTaskFailure(pJob, pTask, code); + + SCH_RET(code); +} + +int32_t schLaunchJob(SSchJob *pJob) { + SSchLevel *level = taosArrayGet(pJob->levels, pJob->levelIdx); + + for (int32_t i = 0; i < level->taskNum; ++i) { + SSchTask *pTask = taosArrayGet(level->subTasks, i); + SCH_ERR_RET(schLaunchTask(pJob, pTask)); + } + + pJob->status = JOB_TASK_STATUS_EXECUTING; return TSDB_CODE_SUCCESS; } @@ -875,27 +989,90 @@ void schDropJobAllTasks(SSchJob *job) { } } -uint64_t schGenSchId(void) { - uint64_t sId = 0; +int32_t schExecJobImpl(void *transport, SArray *nodeList, SQueryDag* pDag, void** job, bool syncSchedule) { + if (nodeList && taosArrayGetSize(nodeList) <= 0) { + qInfo("QID:%"PRIx64" input nodeList is empty", pDag->queryId); + } - // TODO + int32_t code = 0; + SSchJob *pJob = calloc(1, sizeof(SSchJob)); + if (NULL == pJob) { + qError("QID:%"PRIx64" calloc %d failed", pDag->queryId, (int32_t)sizeof(SSchJob)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } - qDebug("Gen sId:0x%"PRIx64, sId); + pJob->attr.syncSchedule = syncSchedule; + pJob->transport = transport; + pJob->nodeList = nodeList; - return sId; + SCH_ERR_JRET(schValidateAndBuildJob(pDag, pJob)); + + pJob->execTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); + if (NULL == pJob->execTasks) { + SCH_JOB_ELOG("taosHashInit %d execTasks failed", pDag->numOfSubplans); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->succTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); + if (NULL == pJob->succTasks) { + SCH_JOB_ELOG("taosHashInit %d succTasks failed", pDag->numOfSubplans); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->failTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); + if (NULL == pJob->failTasks) { + SCH_JOB_ELOG("taosHashInit %d failTasks failed", pDag->numOfSubplans); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + tsem_init(&pJob->rspSem, 0, 0); + + code = taosHashPut(schMgmt.jobs, &pJob->queryId, sizeof(pJob->queryId), &pJob, POINTER_BYTES); + if (0 != code) { + if (HASH_NODE_EXIST(code)) { + SCH_JOB_ELOG("job already exist, isQueryJob:%d", pJob->attr.queryJob); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } else { + SCH_JOB_ELOG("taosHashPut job failed, errno:%d", errno); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + } + + pJob->status = JOB_TASK_STATUS_NOT_START; + + SCH_ERR_JRET(schLaunchJob(pJob)); + + *(SSchJob **)job = pJob; + + if (syncSchedule) { + SCH_JOB_DLOG("will wait for rsp now, job status:%d", SCH_GET_JOB_STATUS(pJob)); + tsem_wait(&pJob->rspSem); + } + + SCH_JOB_DLOG("job exec done, job status:%d", SCH_GET_JOB_STATUS(pJob)); + + return TSDB_CODE_SUCCESS; + +_return: + + *(SSchJob **)job = NULL; + + scheduleFreeJob(pJob); + + SCH_RET(code); } int32_t schedulerInit(SSchedulerCfg *cfg) { if (schMgmt.jobs) { - qError("scheduler already init"); + qError("scheduler already initialized"); return TSDB_CODE_QRY_INVALID_INPUT; } if (cfg) { schMgmt.cfg = *cfg; - if (schMgmt.cfg.maxJobNum <= 0) { + if (schMgmt.cfg.maxJobNum == 0) { schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_JOB_NUMBER; } } else { @@ -904,79 +1081,18 @@ int32_t schedulerInit(SSchedulerCfg *cfg) { schMgmt.jobs = taosHashInit(schMgmt.cfg.maxJobNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); if (NULL == schMgmt.jobs) { - SCH_ERR_LRET(TSDB_CODE_QRY_OUT_OF_MEMORY, "init %d schduler jobs failed", schMgmt.cfg.maxJobNum); - } - - schMgmt.sId = schGenSchId(); - - return TSDB_CODE_SUCCESS; -} - - -int32_t scheduleExecJobImpl(void *transport, SArray *nodeList, SQueryDag* pDag, void** pJob, bool syncSchedule) { - if (nodeList && taosArrayGetSize(nodeList) <= 0) { - qInfo("qnodeList is empty"); - } - - int32_t code = 0; - SSchJob *job = calloc(1, sizeof(SSchJob)); - if (NULL == job) { + qError("init schduler jobs failed, num:%u", schMgmt.cfg.maxJobNum); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - job->attr.syncSchedule = syncSchedule; - job->transport = transport; - job->nodeList = nodeList; - - SCH_ERR_JRET(schValidateAndBuildJob(pDag, job)); - - job->execTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); - if (NULL == job->execTasks) { - qError("taosHashInit %d failed", pDag->numOfSubplans); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + if (taosGetSystemUUID((char *)&schMgmt.sId, sizeof(schMgmt.sId))) { + qError("generate schdulerId failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_SYS_ERROR); } - job->succTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); - if (NULL == job->succTasks) { - qError("taosHashInit %d failed", pDag->numOfSubplans); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - job->failTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); - if (NULL == job->failTasks) { - qError("taosHashInit %d failed", pDag->numOfSubplans); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - tsem_init(&job->rspSem, 0, 0); - - code = taosHashPut(schMgmt.jobs, &job->queryId, sizeof(job->queryId), &job, POINTER_BYTES); - if (0 != code) { - if (HASH_NODE_EXIST(code)) { - qError("taosHashPut queryId:0x%"PRIx64" already exist", job->queryId); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } else { - qError("taosHashPut queryId:0x%"PRIx64" failed", job->queryId); - SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - } - - job->status = JOB_TASK_STATUS_NOT_START; - SCH_ERR_JRET(schLaunchJob(job)); - - *(SSchJob **)pJob = job; - - if (syncSchedule) { - tsem_wait(&job->rspSem); - } - - return TSDB_CODE_SUCCESS; - -_return: - *(SSchJob **)pJob = NULL; - scheduleFreeJob(job); + qInfo("scheduler %"PRIx64" initizlized, maxJob:%u", schMgmt.sId, schMgmt.cfg.maxJobNum); - SCH_RET(code); + return TSDB_CODE_SUCCESS; } int32_t scheduleExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void** pJob, SQueryResult *pRes) { @@ -984,22 +1100,22 @@ int32_t scheduleExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } - SCH_ERR_RET(scheduleExecJobImpl(transport, nodeList, pDag, pJob, true)); + SCH_ERR_RET(schExecJobImpl(transport, nodeList, pDag, pJob, true)); SSchJob *job = *(SSchJob **)pJob; - pRes->code = job->errCode; + pRes->code = atomic_load_32(&job->errCode); pRes->numOfRows = job->resNumOfRows; return TSDB_CODE_SUCCESS; } int32_t scheduleAsyncExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void** pJob) { - if (NULL == transport || NULL == nodeList ||NULL == pDag || NULL == pDag->pSubplans || NULL == pJob) { + if (NULL == transport || /*NULL == nodeList || */NULL == pDag || NULL == pDag->pSubplans || NULL == pJob) { SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } - return scheduleExecJobImpl(transport, nodeList, pDag, pJob, false); + return schExecJobImpl(transport, nodeList, pDag, pJob, false); } @@ -1011,14 +1127,14 @@ int32_t scheduleFetchRows(void *pJob, void **data) { SSchJob *job = pJob; int32_t code = 0; - if (!job->attr.needFetch) { + if (!SCH_JOB_NEED_FETCH(&job->attr)) { qError("no need to fetch data"); SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); } if (job->status == JOB_TASK_STATUS_FAILED) { job->res = NULL; - SCH_RET(job->errCode); + SCH_RET(atomic_load_32(&job->errCode)); } if (job->status == JOB_TASK_STATUS_SUCCEED) { @@ -1038,7 +1154,7 @@ int32_t scheduleFetchRows(void *pJob, void **data) { tsem_wait(&job->rspSem); if (job->status == JOB_TASK_STATUS_FAILED) { - code = job->errCode; + code = atomic_load_32(&job->errCode); } if (job->res && ((SRetrieveTableRsp *)job->res)->completed) { @@ -1090,7 +1206,7 @@ void scheduleFreeJob(void *pJob) { int32_t numOfTasks = taosArrayGetSize(pLevel->subTasks); for(int32_t j = 0; j < numOfTasks; ++j) { SSchTask* pTask = taosArrayGet(pLevel->subTasks, j); - cleanupTask(pTask); + schFreeTask(pTask); } taosArrayDestroy(pLevel->subTasks); @@ -1100,6 +1216,7 @@ void scheduleFreeJob(void *pJob) { taosHashCleanup(job->failTasks); taosHashCleanup(job->succTasks); taosArrayDestroy(job->levels); + tfree(job); } diff --git a/source/libs/tfs/CMakeLists.txt b/source/libs/tfs/CMakeLists.txt new file mode 100644 index 0000000000..1b6f662507 --- /dev/null +++ b/source/libs/tfs/CMakeLists.txt @@ -0,0 +1,9 @@ +aux_source_directory(src TFS_SRC) +add_library(tfs STATIC ${TFS_SRC}) +target_include_directories( + tfs + PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/tfs" + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" +) + +target_link_libraries(tfs os util common) \ No newline at end of file diff --git a/src/tfs/inc/tfsint.h b/source/libs/tfs/inc/tfsint.h similarity index 100% rename from src/tfs/inc/tfsint.h rename to source/libs/tfs/inc/tfsint.h diff --git a/src/tfs/src/tdisk.c b/source/libs/tfs/src/tdisk.c similarity index 100% rename from src/tfs/src/tdisk.c rename to source/libs/tfs/src/tdisk.c diff --git a/src/tfs/src/tfs.c b/source/libs/tfs/src/tfs.c similarity index 97% rename from src/tfs/src/tfs.c rename to source/libs/tfs/src/tfs.c index 547f862c20..83d9905511 100644 --- a/src/tfs/src/tfs.c +++ b/source/libs/tfs/src/tfs.c @@ -15,11 +15,11 @@ #include "os.h" -#include "hash.h" #include "taosdef.h" #include "taoserror.h" #include "tfs.h" #include "tfsint.h" +#include "thash.h" #define TMPNAME_LEN (TSDB_FILENAME_LEN * 2 + 32) @@ -270,7 +270,8 @@ int tfsMkdirRecurAt(const char *rname, int level, int id) { // Some platform may modify the contents of the string passed into dirname(). Others may return a pointer to // internal static storage space that will be overwritten by next call. For case like that, we should not use // the pointer directly in this recursion. - // See https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/dirname.3.html + // See + // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/dirname.3.html char *dir = strdup(dirname(s)); if (tfsMkdirRecurAt(dir, level, id) < 0) { @@ -334,7 +335,7 @@ int tfsRename(char *orname, char *nrname) { snprintf(oaname, TMPNAME_LEN, "%s/%s", DISK_DIR(pDisk), orname); snprintf(naname, TMPNAME_LEN, "%s/%s", DISK_DIR(pDisk), nrname); - taosRename(oaname, naname); + taosRenameFile(oaname, naname); } } @@ -504,7 +505,6 @@ static int tfsFormatDir(char *idir, char *odir) { wordfree(&wep); return 0; - } static int tfsCheck() { @@ -599,12 +599,10 @@ void taosGetDisk() { SysDiskSize diskSize; SFSMeta fsMeta; - if (tscEmbedded) { - tfsUpdateInfo(&fsMeta, NULL, 0); - tsTotalDataDirGB = (float)(fsMeta.tsize / unit); - tsUsedDataDirGB = (float)(fsMeta.used / unit); - tsAvailDataDirGB = (float)(fsMeta.avail / unit); - } + tfsUpdateInfo(&fsMeta, NULL, 0); + tsTotalDataDirGB = (float)(fsMeta.tsize / unit); + tsUsedDataDirGB = (float)(fsMeta.used / unit); + tsAvailDataDirGB = (float)(fsMeta.avail / unit); if (taosGetDiskSize(tsLogDir, &diskSize) == 0) { tsTotalLogDirGB = (float)(diskSize.tsize / unit); diff --git a/src/tfs/src/ttier.c b/source/libs/tfs/src/ttier.c similarity index 100% rename from src/tfs/src/ttier.c rename to source/libs/tfs/src/ttier.c diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 0bf56dbaaf..3ea564722b 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -259,31 +259,30 @@ TAOS_DEFINE_ERROR(TSDB_CODE_DND_DNODE_READ_FILE_ERROR, "Read dnode.json error TAOS_DEFINE_ERROR(TSDB_CODE_DND_DNODE_WRITE_FILE_ERROR, "Write dnode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED, "Mnode already deployed") TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_NOT_DEPLOYED, "Mnode not deployed") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ID_INVALID, "Mnode Id invalid") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ID_NOT_FOUND, "Mnode Id not found") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_INVALID_OPTION, "Mnode option invalid") TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_READ_FILE_ERROR, "Read mnode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR, "Write mnode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_ALREADY_DEPLOYED, "Qnode already deployed") TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_NOT_DEPLOYED, "Qnode not deployed") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_ID_INVALID, "Qnode Id invalid") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_ID_NOT_FOUND, "Qnode Id not found") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_INVALID_OPTION, "Qnode option invalid") TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_READ_FILE_ERROR, "Read qnode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_QNODE_WRITE_FILE_ERROR, "Write qnode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_ALREADY_DEPLOYED, "Snode already deployed") TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_NOT_DEPLOYED, "Snode not deployed") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_ID_INVALID, "Snode Id invalid") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_ID_NOT_FOUND, "Snode Id not found") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_INVALID_OPTION, "Snode option invalid") TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_READ_FILE_ERROR, "Read snode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_SNODE_WRITE_FILE_ERROR, "Write snode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_ALREADY_DEPLOYED, "Bnode already deployed") TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_NOT_DEPLOYED, "Bnode not deployed") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_ID_INVALID, "Bnode Id invalid") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_ID_NOT_FOUND, "Bnode Id not found") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_INVALID_OPTION, "Bnode option invalid") TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_READ_FILE_ERROR, "Read bnode.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_BNODE_WRITE_FILE_ERROR, "Write bnode.json error") -TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_TOO_MANY_VNODES, "Too many vnode directories") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_ALREADY_DEPLOYED, "Vnode already deployed") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_NOT_DEPLOYED, "Vnode not deployed") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_INVALID_OPTION, "Vnode option invalid") TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_READ_FILE_ERROR, "Read vnodes.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_WRITE_FILE_ERROR, "Write vnodes.json error") +TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_TOO_MANY_VNODES, "Too many vnodes") // vnode TAOS_DEFINE_ERROR(TSDB_CODE_VND_ACTION_IN_PROGRESS, "Action in progress") diff --git a/source/util/src/thash.c b/source/util/src/thash.c index 6ec7072a1b..2b013bfdd0 100644 --- a/source/util/src/thash.c +++ b/source/util/src/thash.c @@ -19,8 +19,9 @@ #include "taos.h" #include "tdef.h" -#define EXT_SIZE 1024 - +// the add ref count operation may trigger the warning if the reference count is greater than the MAX_WARNING_REF_COUNT +#define MAX_WARNING_REF_COUNT 10000 +#define EXT_SIZE 1024 #define HASH_NEED_RESIZE(_h) ((_h)->size >= (_h)->capacity * HASH_DEFAULT_LOAD_FACTOR) #define DO_FREE_HASH_NODE(_n) \ @@ -214,7 +215,7 @@ static FORCE_INLINE bool taosHashTableEmpty(const SHashObj *pHashObj) { return taosHashGetSize(pHashObj) == 0; } -int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *data, size_t size) { +int32_t taosHashPutImpl(SHashObj *pHashObj, const void *key, size_t keyLen, void *data, size_t size, bool *newAdded) { uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)keyLen); SHashNode *pNewNode = doCreateHashNode(key, keyLen, data, size, hashVal); if (pNewNode == NULL) { @@ -273,6 +274,10 @@ int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *da __rd_unlock((void*) &pHashObj->lock, pHashObj->type); atomic_add_fetch_32(&pHashObj->size, 1); + if (newAdded) { + *newAdded = true; + } + return 0; } else { // not support the update operation, return error @@ -289,10 +294,23 @@ int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *da // enable resize __rd_unlock((void*) &pHashObj->lock, pHashObj->type); + if (newAdded) { + *newAdded = false; + } + return pHashObj->enableUpdate ? 0 : -2; } } +int32_t taosHashPut(SHashObj *pHashObj, const void *key, size_t keyLen, void *data, size_t size) { + return taosHashPutImpl(pHashObj, key, keyLen, data, size, NULL); +} + +int32_t taosHashPutExt(SHashObj *pHashObj, const void *key, size_t keyLen, void *data, size_t size, bool *newAdded) { + return taosHashPutImpl(pHashObj, key, keyLen, data, size, newAdded); +} + + void *taosHashGet(SHashObj *pHashObj, const void *key, size_t keyLen) { return taosHashGetClone(pHashObj, key, keyLen, NULL); } @@ -909,8 +927,24 @@ void *taosHashIterate(SHashObj *pHashObj, void *p) { if (pNode) { SHashEntry *pe = pHashObj->hashList[slot]; - pNode->count++; - data = GET_HASH_NODE_DATA(pNode); + + uint16_t prevRef = atomic_load_16(&pNode->count); + uint16_t afterRef = atomic_add_fetch_16(&pNode->count, 1); + + // the reference count value is overflow, which will cause the delete node operation immediately. + if (prevRef > afterRef) { + uError("hash entry ref count overflow, prev ref:%d, current ref:%d", prevRef, afterRef); + // restore the value + atomic_sub_fetch_16(&pNode->count, 1); + data = NULL; + } else { + data = GET_HASH_NODE_DATA(pNode); + } + + if (afterRef >= MAX_WARNING_REF_COUNT) { + uWarn("hash entry ref count is abnormally high: %d", afterRef); + } + if (pHashObj->type == HASH_ENTRY_LOCK) { taosWUnLockLatch(&pe->latch); } @@ -918,7 +952,6 @@ void *taosHashIterate(SHashObj *pHashObj, void *p) { __rd_unlock((void*) &pHashObj->lock, pHashObj->type); return data; - } void taosHashCancelIterate(SHashObj *pHashObj, void *p) { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 95f2fe76e6..c284efbcd4 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -95,7 +95,6 @@ int32_t tsdbDebugFlag = 131; int32_t tqDebugFlag = 131; int32_t cqDebugFlag = 131; int32_t fsDebugFlag = 135; -int32_t ctgDebugFlag = 131; int64_t dbgEmptyW = 0; int64_t dbgWN = 0; diff --git a/source/util/src/tqueue.c b/source/util/src/tqueue.c index 75f5e9cdbc..5cb149d53c 100644 --- a/source/util/src/tqueue.c +++ b/source/util/src/tqueue.c @@ -112,6 +112,13 @@ bool taosQueueEmpty(STaosQueue *queue) { return empty; } +int32_t taosQueueSize(STaosQueue *queue) { + pthread_mutex_lock(&queue->mutex); + int32_t numOfItems = queue->numOfItems; + pthread_mutex_unlock(&queue->mutex); + return numOfItems; +} + void *taosAllocateQitem(int32_t size) { STaosQnode *pNode = (STaosQnode *)calloc(sizeof(STaosQnode) + size, 1); diff --git a/src/client/src/tscProfile.c b/src/client/src/tscProfile.c index e4b1602661..43e7365a59 100644 --- a/src/client/src/tscProfile.c +++ b/src/client/src/tscProfile.c @@ -226,7 +226,7 @@ void tscKillStream(STscObj *pObj, uint32_t killId) { } int tscBuildQueryStreamDesc(void *pMsg, STscObj *pObj) { - SHeartBeatMsg *pHeartbeat = pMsg; + SHeartBeatReq *pHeartbeat = pMsg; int allocedQueriesNum = pHeartbeat->numOfQueries; int allocedStreamsNum = pHeartbeat->numOfStreams; @@ -327,7 +327,7 @@ int tscBuildQueryStreamDesc(void *pMsg, STscObj *pObj) { } int32_t msgLen = pHeartbeat->numOfQueries * sizeof(SQueryDesc) + pHeartbeat->numOfStreams * sizeof(SStreamDesc) + - sizeof(SHeartBeatMsg); + sizeof(SHeartBeatReq); pHeartbeat->connId = htonl(pObj->connId); pHeartbeat->numOfQueries = htonl(pHeartbeat->numOfQueries); pHeartbeat->numOfStreams = htonl(pHeartbeat->numOfStreams); diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index eb3f8be25a..c68d7cef06 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -776,7 +776,7 @@ int32_t tscValidateSqlInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { char* pMsg = pCmd->payload; - SCfgDnodeMsg* pCfg = (SCfgDnodeMsg*)pMsg; + SMCfgDnodeReq* pCfg = (SMCfgDnodeReq*)pMsg; SStrToken* t0 = taosArrayGet(pMiscInfo->a, 0); SStrToken* t1 = taosArrayGet(pMiscInfo->a, 1); diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index e0489a76d2..41a55869e6 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -1192,13 +1192,13 @@ int32_t tscBuildCreateFuncMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t tscBuildCreateDnodeMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SCreateDnodeMsg); + pCmd->payloadLen = sizeof(SCreateDnodeReq); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("0x%"PRIx64" failed to malloc for query msg", pSql->self); return TSDB_CODE_TSC_OUT_OF_MEMORY; } - SCreateDnodeMsg *pCreate = (SCreateDnodeMsg *)pCmd->payload; + SCreateDnodeReq *pCreate = (SCreateDnodeReq *)pCmd->payload; SStrToken* t0 = taosArrayGet(pInfo->pMiscInfo->a, 0); strncpy(pCreate->ep, t0->z, t0->n); @@ -1287,7 +1287,7 @@ int32_t tscBuildUserMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t tscBuildCfgDnodeMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SCfgDnodeMsg); + pCmd->payloadLen = sizeof(SMCfgDnodeReq); pCmd->msgType = TDMT_MND_CONFIG_DNODE; return TSDB_CODE_SUCCESS; } @@ -1350,13 +1350,13 @@ int32_t tscBuildDropDnodeMsg(SSqlObj *pSql, SSqlInfo *pInfo) { char dnodeEp[TSDB_EP_LEN] = {0}; tstrncpy(dnodeEp, pCmd->payload, TSDB_EP_LEN); - pCmd->payloadLen = sizeof(SDropDnodeMsg); + pCmd->payloadLen = sizeof(SDropDnodeReq); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("0x%"PRIx64" failed to malloc for query msg", pSql->self); return TSDB_CODE_TSC_OUT_OF_MEMORY; } - SDropDnodeMsg * pDrop = (SDropDnodeMsg *)pCmd->payload; + SDropDnodeReq * pDrop = (SDropDnodeReq *)pCmd->payload; tstrncpy(pDrop->ep, dnodeEp, tListLen(pDrop->ep)); pCmd->msgType = TDMT_MND_DROP_DNODE; @@ -1469,7 +1469,7 @@ int32_t tscBuildShowMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t tscBuildKillMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SKillQueryMsg); + pCmd->payloadLen = sizeof(SKillQueryReq); switch (pCmd->command) { case TSDB_SQL_KILL_QUERY: @@ -1862,14 +1862,14 @@ int tscBuildConnectMsg(SSqlObj *pSql, SSqlInfo *pInfo) { STscObj *pObj = pSql->pTscObj; SSqlCmd *pCmd = &pSql->cmd; pCmd->msgType = TDMT_MND_CONNECT; - pCmd->payloadLen = sizeof(SConnectMsg); + pCmd->payloadLen = sizeof(SConnectReq); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("0x%"PRIx64" failed to malloc for query msg", pSql->self); return TSDB_CODE_TSC_OUT_OF_MEMORY; } - SConnectMsg *pConnect = (SConnectMsg*)pCmd->payload; + SConnectReq *pConnect = (SConnectReq*)pCmd->payload; // TODO refactor full_name char *db; // ugly code to move the space @@ -1974,7 +1974,7 @@ int tscBuildHeartBeatMsg(SSqlObj *pSql, SSqlInfo *pInfo) { numOfStreams++; } - int size = numOfQueries * sizeof(SQueryDesc) + numOfStreams * sizeof(SStreamDesc) + sizeof(SHeartBeatMsg) + 100; + int size = numOfQueries * sizeof(SQueryDesc) + numOfStreams * sizeof(SStreamDesc) + sizeof(SHeartBeatReq) + 100; if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, size)) { pthread_mutex_unlock(&pObj->mutex); tscError("0x%"PRIx64" failed to create heartbeat msg", pSql->self); @@ -1982,7 +1982,7 @@ int tscBuildHeartBeatMsg(SSqlObj *pSql, SSqlInfo *pInfo) { } // TODO the expired hb and client can not be identified by server till now. - SHeartBeatMsg *pHeartbeat = (SHeartBeatMsg *)pCmd->payload; + SHeartBeatReq *pHeartbeat = (SHeartBeatReq *)pCmd->payload; tstrncpy(pHeartbeat->clientVer, version, tListLen(pHeartbeat->clientVer)); pHeartbeat->numOfQueries = numOfQueries; diff --git a/src/tfs/CMakeLists.txt b/src/tfs/CMakeLists.txt deleted file mode 100644 index 7f956f07a2..0000000000 --- a/src/tfs/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8...3.20) -PROJECT(TDengine) - -INCLUDE_DIRECTORIES(inc) -AUX_SOURCE_DIRECTORY(src SRC) -ADD_LIBRARY(tfs ${SRC}) -TARGET_LINK_LIBRARIES(tfs tutil) - -IF (TD_LINUX) - # Someone has no gtest directory, so comment it - # ADD_SUBDIRECTORY(tests) -ENDIF () diff --git a/tests/script/general/table/basic1.sim b/tests/script/general/table/basic1.sim index 298f663822..b5393a03dc 100644 --- a/tests/script/general/table/basic1.sim +++ b/tests/script/general/table/basic1.sim @@ -44,24 +44,30 @@ print $data10 $data11 $data12 print =============== create child table sql create table c1 using st tags(1) -sql create table c2 using st tags(2) +sql create table c2 using st tags(2) sql show tables if $rows != 2 then return -1 endi +sql create table c3 using st tags(3) c4 using st tags(4) c5 using st tags(5) c6 using st tags(6) c7 using st tags(7) + +sql show tables +if $rows != 7 then + return -1 +endi + print $data00 $data01 $data02 print $data10 $data11 $data22 print $data20 $data11 $data22 -return - print =============== insert data sql insert into c1 values(now+1s, 1) sql insert into c1 values(now+2s, 2) sql insert into c1 values(now+3s, 3) +return print =============== query data sql select * from c1 if $rows != 3 then diff --git a/tests/test/c/create_table.c b/tests/test/c/create_table.c index 96c7f87392..f2db9d0a0c 100644 --- a/tests/test/c/create_table.c +++ b/tests/test/c/create_table.c @@ -15,43 +15,39 @@ #define _DEFAULT_SOURCE #include "os.h" - #include "taos.h" -#include "taosdef.h" #include "taoserror.h" -#include "thash.h" -#include "tutil.h" #include "ulog.h" -#define MAX_RANDOM_POINTS 20000 #define GREEN "\033[1;32m" #define NC "\033[0m" char dbName[32] = "db"; char stbName[64] = "st"; -int32_t numOfThreads = 2; -int32_t numOfTables = 10000; +int32_t numOfThreads = 1; +int64_t numOfTables = 200000; int32_t createTable = 1; int32_t insertData = 0; -int32_t batchNum = 1; +int32_t batchNum = 100; int32_t numOfVgroups = 2; typedef struct { - int32_t tableBeginIndex; - int32_t tableEndIndex; + int64_t tableBeginIndex; + int64_t tableEndIndex; int32_t threadIndex; char dbName[32]; char stbName[64]; float createTableSpeed; float insertDataSpeed; + int64_t startMs; pthread_t thread; } SThreadInfo; -void parseArgument(int argc, char *argv[]); +void parseArgument(int32_t argc, char *argv[]); void *threadFunc(void *param); void createDbAndStb(); -int main(int argc, char *argv[]) { +int32_t main(int32_t argc, char *argv[]) { parseArgument(argc, argv); createDbAndStb(); @@ -62,9 +58,9 @@ int main(int argc, char *argv[]) { pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_JOINABLE); SThreadInfo *pInfo = (SThreadInfo *)calloc(numOfThreads, sizeof(SThreadInfo)); - int32_t numOfTablesPerThread = numOfTables / numOfThreads; + int64_t numOfTablesPerThread = numOfTables / numOfThreads; numOfTables = numOfTablesPerThread * numOfThreads; - for (int i = 0; i < numOfThreads; ++i) { + for (int32_t i = 0; i < numOfThreads; ++i) { pInfo[i].tableBeginIndex = i * numOfTablesPerThread; pInfo[i].tableEndIndex = (i + 1) * numOfTablesPerThread; pInfo[i].threadIndex = i; @@ -74,22 +70,24 @@ int main(int argc, char *argv[]) { } taosMsleep(300); - for (int i = 0; i < numOfThreads; i++) { + for (int32_t i = 0; i < numOfThreads; i++) { pthread_join(pInfo[i].thread, NULL); } float createTableSpeed = 0; - for (int i = 0; i < numOfThreads; ++i) { + for (int32_t i = 0; i < numOfThreads; ++i) { createTableSpeed += pInfo[i].createTableSpeed; } float insertDataSpeed = 0; - for (int i = 0; i < numOfThreads; ++i) { + for (int32_t i = 0; i < numOfThreads; ++i) { insertDataSpeed += pInfo[i].insertDataSpeed; } - pPrint("%s total %.1f tables/second, threads:%d %s", GREEN, createTableSpeed, numOfThreads, NC); - pPrint("%s total %.1f rows/second, threads:%d %s", GREEN, insertDataSpeed, numOfThreads, NC); + pPrint("%s total %" PRId64 " tables, %.1f tables/second, threads:%d %s", GREEN, numOfTables, createTableSpeed, + numOfThreads, NC); + pPrint("%s total %" PRId64 " tables, %.1f rows/second, threads:%d %s", GREEN, numOfTables, insertDataSpeed, + numOfThreads, NC); pthread_attr_destroy(&thattr); free(pInfo); @@ -135,10 +133,30 @@ void createDbAndStb() { taos_close(con); } +void printCreateProgress(SThreadInfo *pInfo, int64_t t) { + int64_t endMs = taosGetTimestampMs(); + int64_t totalTables = t - pInfo->tableBeginIndex; + float seconds = (endMs - pInfo->startMs) / 1000.0; + float speed = totalTables / seconds; + pInfo->createTableSpeed = speed; + pPrint("thread:%d, %" PRId64 " tables created, time:%.2f sec, speed:%.1f tables/second, ", pInfo->threadIndex, + totalTables, seconds, speed); +} + +void printInsertProgress(SThreadInfo *pInfo, int64_t t) { + int64_t endMs = taosGetTimestampMs(); + int64_t totalTables = t - pInfo->tableBeginIndex; + float seconds = (endMs - pInfo->startMs) / 1000.0; + float speed = totalTables / seconds; + pInfo->insertDataSpeed = speed; + pPrint("thread:%d, %" PRId64 " rows inserted, time:%.2f sec, speed:%.1f rows/second, ", pInfo->threadIndex, + totalTables, seconds, speed); +} + void *threadFunc(void *param) { SThreadInfo *pInfo = (SThreadInfo *)param; - char qstr[65000]; - int code; + char *qstr = malloc(2000 * 1000); + int32_t code = 0; TAOS *con = taos_connect(NULL, "root", "taosdata", NULL, 0); if (con == NULL) { @@ -151,44 +169,59 @@ void *threadFunc(void *param) { taos_free_result(pSql); if (createTable) { - int64_t startMs = taosGetTimestampMs(); - for (int32_t t = pInfo->tableBeginIndex; t < pInfo->tableEndIndex; ++t) { - sprintf(qstr, "create table t%d using %s tags(%d)", t, stbName, t); + pInfo->startMs = taosGetTimestampMs(); + for (int64_t t = pInfo->tableBeginIndex; t < pInfo->tableEndIndex; ++t) { + int64_t batch = (pInfo->tableEndIndex - t); + batch = MIN(batch, batchNum); + + int32_t len = sprintf(qstr, "create table"); + for (int32_t i = 0; i < batch; ++i) { + len += sprintf(qstr + len, " t%" PRId64 " using %s tags(%" PRId64 ")", t + i, stbName, t + i); + } + TAOS_RES *pSql = taos_query(con, qstr); code = taos_errno(pSql); if (code != 0) { - pError("failed to create table t%d, reason:%s", t, tstrerror(code)); + pError("failed to create table t%" PRId64 ", reason:%s", t, tstrerror(code)); } taos_free_result(pSql); + + if (t % 100000 == 0) { + printCreateProgress(pInfo, t); + } + t += (batch - 1); } - int64_t endMs = taosGetTimestampMs(); - int32_t totalTables = pInfo->tableEndIndex - pInfo->tableBeginIndex; - float seconds = (endMs - startMs) / 1000.0; - float speed = totalTables / seconds; - pInfo->createTableSpeed = speed; - pPrint("thread:%d, time:%.2f sec, speed:%.1f tables/second, ", pInfo->threadIndex, seconds, speed); + printCreateProgress(pInfo, pInfo->tableEndIndex); } if (insertData) { - int64_t startMs = taosGetTimestampMs(); - for (int32_t t = pInfo->tableBeginIndex; t < pInfo->tableEndIndex; ++t) { - sprintf(qstr, "insert into %s%d values(now, 1)", stbName, t); + pInfo->startMs = taosGetTimestampMs(); + for (int64_t t = pInfo->tableBeginIndex; t < pInfo->tableEndIndex; ++t) { + int64_t batch = (pInfo->tableEndIndex - t); + batch = MIN(batch, batchNum); + + int32_t len = sprintf(qstr, "insert into"); + for (int32_t i = 0; i < batch; ++i) { + len += sprintf(qstr + len, " t%" PRId64 " values(now, %" PRId64 ")", t + i, t + i); + } + TAOS_RES *pSql = taos_query(con, qstr); code = taos_errno(pSql); if (code != 0) { - pError("failed to create table %s%d, reason:%s", stbName, t, tstrerror(code)); + pError("failed to insert table t%" PRId64 ", reason:%s", t, tstrerror(code)); } taos_free_result(pSql); + + if (t % 100000 == 0) { + printInsertProgress(pInfo, t); + } + t += (batch - 1); } - int64_t endMs = taosGetTimestampMs(); - int32_t totalTables = pInfo->tableEndIndex - pInfo->tableBeginIndex; - float seconds = (endMs - startMs) / 1000.0; - float speed = totalTables / seconds; - pInfo->insertDataSpeed = speed; - pPrint("thread:%d, time:%.2f sec, speed:%.1f rows/second, ", pInfo->threadIndex, seconds, speed); + printInsertProgress(pInfo, pInfo->tableEndIndex); } taos_close(con); + free(qstr); return 0; } @@ -205,7 +238,7 @@ void printHelp() { printf("%s%s\n", indent, "-t"); printf("%s%s%s%d\n", indent, indent, "numOfThreads, default is ", numOfThreads); printf("%s%s\n", indent, "-n"); - printf("%s%s%s%d\n", indent, indent, "numOfTables, default is ", numOfTables); + printf("%s%s%s%" PRId64 "\n", indent, indent, "numOfTables, default is ", numOfTables); printf("%s%s\n", indent, "-v"); printf("%s%s%s%d\n", indent, indent, "numOfVgroups, default is ", numOfVgroups); printf("%s%s\n", indent, "-a"); @@ -218,8 +251,8 @@ void printHelp() { exit(EXIT_SUCCESS); } -void parseArgument(int argc, char *argv[]) { - for (int i = 1; i < argc; i++) { +void parseArgument(int32_t argc, char *argv[]) { + for (int32_t i = 1; i < argc; i++) { if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { printHelp(); exit(0); @@ -232,7 +265,7 @@ void parseArgument(int argc, char *argv[]) { } else if (strcmp(argv[i], "-t") == 0) { numOfThreads = atoi(argv[++i]); } else if (strcmp(argv[i], "-n") == 0) { - numOfTables = atoi(argv[++i]); + numOfTables = atoll(argv[++i]); } else if (strcmp(argv[i], "-n") == 0) { numOfVgroups = atoi(argv[++i]); } else if (strcmp(argv[i], "-a") == 0) { @@ -248,7 +281,7 @@ void parseArgument(int argc, char *argv[]) { pPrint("%s dbName:%s %s", GREEN, dbName, NC); pPrint("%s stbName:%s %s", GREEN, stbName, NC); pPrint("%s configDir:%s %s", GREEN, configDir, NC); - pPrint("%s numOfTables:%d %s", GREEN, numOfTables, NC); + pPrint("%s numOfTables:%" PRId64 " %s", GREEN, numOfTables, NC); pPrint("%s numOfThreads:%d %s", GREEN, numOfThreads, NC); pPrint("%s numOfVgroups:%d %s", GREEN, numOfVgroups, NC); pPrint("%s createTable:%d %s", GREEN, createTable, NC);