diff --git a/docs/zh/12-taos-sql/20-keywords.md b/docs/zh/12-taos-sql/20-keywords.md index 92c814067e..09fea45b7a 100644 --- a/docs/zh/12-taos-sql/20-keywords.md +++ b/docs/zh/12-taos-sql/20-keywords.md @@ -6,7 +6,8 @@ description: TDengine 保留关键字的详细列表 ## 保留关键字 -目前 TDengine 有将近 200 个内部保留关键字,这些关键字无论大小写均不可以用作库名、表名、STable 名、数据列名及标签列名等。这些关键字列表如下: +目前 TDengine 有将近 200 个内部保留关键字,这些关键字无论大小写如果需要用作库名、表名、STable 名、数据列名及标签列名等,需要使用符合``将关键字括起来使用,例如`ADD`。 +关键字列表如下: ### A @@ -239,6 +240,7 @@ description: TDengine 保留关键字的详细列表 - TOPICS - TRIGGER - TSERIES +- TTL ### U diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 9bfee56e29..bf4de9d4de 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -65,13 +65,6 @@ typedef enum { TSDB_STATIS_NONE = 1, // statis part not exist } ETsdbStatisStatus; -typedef enum { - TSDB_SMA_STAT_UNKNOWN = -1, // unknown - TSDB_SMA_STAT_OK = 0, // ready to provide service - TSDB_SMA_STAT_EXPIRED = 1, // not ready or expired - TSDB_SMA_STAT_DROPPED = 2, // sma dropped -} ETsdbSmaStat; // bit operation - typedef enum { TSDB_SMA_TYPE_BLOCK = 0, // Block-wise SMA TSDB_SMA_TYPE_TIME_RANGE = 1, // Time-range-wise SMA diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 891c9ab040..37db574d98 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -184,7 +184,6 @@ typedef struct SQueryTableDataCond { STimeWindow twindows; int64_t startVersion; int64_t endVersion; - int64_t schemaVersion; } SQueryTableDataCond; int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock); diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 8efd58739d..9f919fa250 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -845,6 +845,8 @@ typedef struct { int64_t uid; int32_t vgVersion; int32_t vgNum; + int16_t hashPrefix; + int16_t hashSuffix; int8_t hashMethod; SArray* pVgroupInfos; // Array of SVgroupInfo } SUseDbRsp; @@ -1070,6 +1072,7 @@ typedef struct { typedef struct { int32_t vgId; int32_t syncState; + int64_t cacheUsage; int64_t numOfTables; int64_t numOfTimeSeries; int64_t totalStorage; diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 1fa7dca7dc..44a9e10679 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -116,6 +116,8 @@ typedef struct STableMeta { typedef struct SDBVgInfo { int32_t vgVersion; + int16_t hashPrefix; + int16_t hashSuffix; int8_t hashMethod; int32_t numOfTable; // DB's table num, unit is TSDB_TABLE_NUM_UNIT SHashObj* vgHash; // key:vgId, value:SVgroupInfo diff --git a/include/util/taoserror.h b/include/util/taoserror.h index e39172d74e..d16a599811 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -616,6 +616,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP TAOS_DEF_ERROR_CODE(0, 0x3155) #define TSDB_CODE_RSMA_EMPTY_INFO TAOS_DEF_ERROR_CODE(0, 0x3156) #define TSDB_CODE_RSMA_INVALID_SCHEMA TAOS_DEF_ERROR_CODE(0, 0x3157) +#define TSDB_CODE_RSMA_REGEX_MATCH TAOS_DEF_ERROR_CODE(0, 0x3158) //index #define TSDB_CODE_INDEX_REBUILDING TAOS_DEF_ERROR_CODE(0, 0x3200) diff --git a/include/util/trbtree.h b/include/util/trbtree.h new file mode 100644 index 0000000000..f6d37e3d75 --- /dev/null +++ b/include/util/trbtree.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_UTIL_RBTREE_H_ +#define _TD_UTIL_RBTREE_H_ + +#include "os.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SRBTree SRBTree; +typedef struct SRBTreeNode SRBTreeNode; +typedef struct SRBTreeIter SRBTreeIter; + +typedef int32_t (*tRBTreeCmprFn)(const void *, const void *); + +// SRBTree ============================================= +#define tRBTreeMin(T) ((T)->min == ((T)->NIL) ? NULL : (T)->min) +#define tRBTreeMax(T) ((T)->max == ((T)->NIL) ? NULL : (T)->max) + +void tRBTreeCreate(SRBTree *pTree, tRBTreeCmprFn cmprFn); +SRBTreeNode *tRBTreePut(SRBTree *pTree, SRBTreeNode *z); +void tRBTreeDrop(SRBTree *pTree, SRBTreeNode *z); +SRBTreeNode *tRBTreeDropByKey(SRBTree *pTree, void *pKey); +SRBTreeNode *tRBTreeGet(SRBTree *pTree, void *pKey); + +// SRBTreeIter ============================================= +#define tRBTreeIterCreate(tree, ascend) \ + (SRBTreeIter) { .asc = (ascend), .pTree = (tree), .pNode = (ascend) ? (tree)->min : (tree)->max } + +SRBTreeNode *tRBTreeIterNext(SRBTreeIter *pIter); + +// STRUCT ============================================= +typedef enum { RED, BLACK } ECOLOR; +struct SRBTreeNode { + ECOLOR color; + SRBTreeNode *parent; + SRBTreeNode *left; + SRBTreeNode *right; +}; + +#define RBTREE_NODE_PAYLOAD(N) ((const void *)&(N)[1]) + +struct SRBTree { + tRBTreeCmprFn cmprFn; + int64_t n; + SRBTreeNode *root; + SRBTreeNode *min; + SRBTreeNode *max; + SRBTreeNode *NIL; + SRBTreeNode NILNODE; +}; + +struct SRBTreeIter { + int8_t asc; + SRBTree *pTree; + SRBTreeNode *pNode; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_UTIL_RBTREE_H_*/ \ No newline at end of file diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index bf92a9ba6a..b739aedca0 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -488,7 +488,7 @@ int taos_options_imp(TSDB_OPTION option, const char *str) { */ uint64_t generateRequestId() { static uint64_t hashId = 0; - static int32_t requestSerialId = 0; + static uint32_t requestSerialId = 0; if (hashId == 0) { char uid[64] = {0}; @@ -507,7 +507,8 @@ uint64_t generateRequestId() { while (true) { int64_t ts = taosGetTimestampMs(); uint64_t pid = taosGetPId(); - int32_t val = atomic_add_fetch_32(&requestSerialId, 1); + uint32_t val = atomic_add_fetch_32(&requestSerialId, 1); + if (val >= 0xFFFF) atomic_store_32(&requestSerialId, 0); id = ((hashId & 0x0FFF) << 52) | ((pid & 0x0FFF) << 40) | ((ts & 0xFFFFFF) << 16) | (val & 0xFFFF); if (id) { diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 56e3527f96..84a827ed78 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -73,6 +73,8 @@ static int32_t hbProcessDBInfoRsp(void *value, int32_t valueLen, struct SCatalog vgInfo->vgVersion = rsp->vgVersion; vgInfo->hashMethod = rsp->hashMethod; + vgInfo->hashPrefix = rsp->hashPrefix; + vgInfo->hashSuffix = rsp->hashSuffix; vgInfo->vgHash = taosHashInit(rsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); if (NULL == vgInfo->vgHash) { taosMemoryFree(vgInfo); diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index f91ceb3184..5ebc2729f8 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1672,7 +1672,12 @@ static int32_t doConvertJson(SReqResultInfo* pResultInfo, int32_t numOfCols, int break; } } - if (!needConvert) return TSDB_CODE_SUCCESS; + + if (!needConvert) { + return TSDB_CODE_SUCCESS; + } + + tscDebug("start to convert form json format string"); char* p = (char*)pResultInfo->pData; int32_t dataLen = estimateJsonLen(pResultInfo, numOfCols, numOfRows); diff --git a/source/client/src/clientRawBlockWrite.c b/source/client/src/clientRawBlockWrite.c index 9f244a19c8..c135965f07 100644 --- a/source/client/src/clientRawBlockWrite.c +++ b/source/client/src/clientRawBlockWrite.c @@ -765,6 +765,7 @@ static int32_t taosCreateTable(TAOS* taos, void* meta, int32_t metaLen) { } taosArrayPush(pRequest->tableList, &pName); + pCreateReq->flags |= TD_CREATE_IF_NOT_EXISTS; // change tag cid to new cid if (pCreateReq->type == TSDB_CHILD_TABLE) { STableMeta* pTableMeta = NULL; diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 3b6de2e399..61fc530f57 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -207,6 +207,7 @@ static const SSysDbTableSchema vgroupsSchema[] = { {.name = "v3_dnode", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "v3_status", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + {.name = "cacheload", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "nfiles", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "file_size", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "tsma", .bytes = 1, .type = TSDB_DATA_TYPE_TINYINT, .sysInfo = true}, diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index bcb96dafcc..16b8e55cf7 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2120,6 +2120,7 @@ void blockEncode(const SSDataBlock* pBlock, char* data, int32_t* dataLen, int32_ int32_t* rows = (int32_t*)data; *rows = pBlock->info.rows; data += sizeof(int32_t); + ASSERT(*rows > 0); int32_t* cols = (int32_t*)data; *cols = numOfCols; @@ -2183,6 +2184,8 @@ void blockEncode(const SSDataBlock* pBlock, char* data, int32_t* dataLen, int32_ *actualLen = *dataLen; *groupId = pBlock->info.groupId; + ASSERT(*dataLen > 0); + uDebug("build data block, actualLen:%d, rows:%d, cols:%d", *dataLen, *rows, *cols); } const char* blockDecode(SSDataBlock* pBlock, const char* pData) { diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 908f7c014e..c436e4ffd2 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -129,10 +129,6 @@ int32_t tsMinIntervalTime = 1; int32_t tsQueryBufferSize = -1; int64_t tsQueryBufferSizeBytes = -1; -// tsdb config -// For backward compatibility -bool tsdbForceKeepFile = false; - int32_t tsDiskCfgNum = 0; SDiskCfg tsDiskCfg[TFS_MAX_DISKS] = {0}; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 9b6737a210..ea25094d10 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -994,6 +994,7 @@ int32_t tSerializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { SVnodeLoad *pload = taosArrayGet(pReq->pVloads, i); if (tEncodeI32(&encoder, pload->vgId) < 0) return -1; if (tEncodeI32(&encoder, pload->syncState) < 0) return -1; + if (tEncodeI64(&encoder, pload->cacheUsage) < 0) return -1; if (tEncodeI64(&encoder, pload->numOfTables) < 0) return -1; if (tEncodeI64(&encoder, pload->numOfTimeSeries) < 0) return -1; if (tEncodeI64(&encoder, pload->totalStorage) < 0) return -1; @@ -1063,6 +1064,7 @@ int32_t tDeserializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { SVnodeLoad vload = {0}; if (tDecodeI32(&decoder, &vload.vgId) < 0) return -1; if (tDecodeI32(&decoder, &vload.syncState) < 0) return -1; + if (tDecodeI64(&decoder, &vload.cacheUsage) < 0) return -1; if (tDecodeI64(&decoder, &vload.numOfTables) < 0) return -1; if (tDecodeI64(&decoder, &vload.numOfTimeSeries) < 0) return -1; if (tDecodeI64(&decoder, &vload.totalStorage) < 0) return -1; @@ -2461,6 +2463,8 @@ int32_t tSerializeSUseDbRspImp(SEncoder *pEncoder, const SUseDbRsp *pRsp) { if (tEncodeI64(pEncoder, pRsp->uid) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->vgVersion) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->vgNum) < 0) return -1; + if (tEncodeI16(pEncoder, pRsp->hashPrefix) < 0) return -1; + if (tEncodeI16(pEncoder, pRsp->hashSuffix) < 0) return -1; if (tEncodeI8(pEncoder, pRsp->hashMethod) < 0) return -1; for (int32_t i = 0; i < pRsp->vgNum; ++i) { @@ -2512,6 +2516,8 @@ int32_t tDeserializeSUseDbRspImp(SDecoder *pDecoder, SUseDbRsp *pRsp) { if (tDecodeI64(pDecoder, &pRsp->uid) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->vgVersion) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->vgNum) < 0) return -1; + if (tDecodeI16(pDecoder, &pRsp->hashPrefix) < 0) return -1; + if (tDecodeI16(pDecoder, &pRsp->hashSuffix) < 0) return -1; if (tDecodeI8(pDecoder, &pRsp->hashMethod) < 0) return -1; if (pRsp->vgNum <= 0) { diff --git a/source/common/src/trow.c b/source/common/src/trow.c index 565498a47b..9880fe362e 100644 --- a/source/common/src/trow.c +++ b/source/common/src/trow.c @@ -538,12 +538,12 @@ bool tdSTSRowIterGetTpVal(STSRowIter *pIter, col_type_t colType, int32_t offset, } else { pVal->val = POINTER_SHIFT(TD_ROW_DATA(pRow), offset); } - return TSDB_CODE_SUCCESS; + return true; } if (tdGetBitmapValType(pIter->pBitmap, pIter->colIdx - 1, &pVal->valType, 0) != TSDB_CODE_SUCCESS) { pVal->valType = TD_VTYPE_NONE; - return terrno; + return true; } if (pVal->valType == TD_VTYPE_NORM) { diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 2268a18455..9632be1b24 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -343,6 +343,7 @@ typedef struct { uint32_t hashEnd; char dbName[TSDB_DB_FNAME_LEN]; int64_t dbUid; + int64_t cacheUsage; int64_t numOfTables; int64_t numOfTimeSeries; int64_t totalStorage; diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 77c9d0bb79..939439ca56 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -1171,6 +1171,8 @@ int32_t mndExtractDbInfo(SMnode *pMnode, SDbObj *pDb, SUseDbRsp *pRsp, const SUs pRsp->vgVersion = pDb->vgVersion; pRsp->vgNum = taosArrayGetSize(pRsp->pVgroupInfos); pRsp->hashMethod = pDb->cfg.hashMethod; + pRsp->hashPrefix = pDb->cfg.hashPrefix; + pRsp->hashSuffix = pDb->cfg.hashSuffix; return 0; } @@ -1303,6 +1305,8 @@ int32_t mndValidateDbInfo(SMnode *pMnode, SDbVgVersion *pDbs, int32_t numOfDbs, usedbRsp.vgVersion = pDb->vgVersion; usedbRsp.vgNum = (int32_t)taosArrayGetSize(usedbRsp.pVgroupInfos); usedbRsp.hashMethod = pDb->cfg.hashMethod; + usedbRsp.hashPrefix = pDb->cfg.hashPrefix; + usedbRsp.hashSuffix = pDb->cfg.hashSuffix; taosArrayPush(batchUseRsp.pArray, &usedbRsp); mndReleaseDb(pMnode, pDb); diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index fc5e20ef28..26b4080d14 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -347,6 +347,7 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pVload->vgId); if (pVgroup != NULL) { if (pVload->syncState == TAOS_SYNC_STATE_LEADER) { + pVgroup->cacheUsage = pVload->cacheUsage; pVgroup->numOfTables = pVload->numOfTables; pVgroup->numOfTimeSeries = pVload->numOfTimeSeries; pVgroup->totalStorage = pVload->totalStorage; @@ -853,8 +854,8 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { } int32_t code = -1; - SSdb *pSdb = pMnode->pSdb; - void *pIter = NULL; + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; while (1) { SDnodeObj *pDnode = NULL; pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode); @@ -877,7 +878,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { sdbRelease(pSdb, pDnode); } - + if (code == -1) { terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index d5f9f8a5cf..de29dea511 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -696,6 +696,9 @@ static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *p pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataAppendNULL(pColInfo, numOfRows); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataAppend(pColInfo, numOfRows, (const char *)&pVgroup->cacheUsage, false); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataAppendNULL(pColInfo, numOfRows); diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index a3e17f5377..7a99d26683 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -29,6 +29,7 @@ target_sources( # sma "src/sma/smaEnv.c" "src/sma/smaUtil.c" + "src/sma/smaFS.c" "src/sma/smaOpen.c" "src/sma/smaCommit.c" "src/sma/smaRollup.c" @@ -49,6 +50,10 @@ target_sources( "src/tsdb/tsdbSnapshot.c" "src/tsdb/tsdbCacheRead.c" "src/tsdb/tsdbRetention.c" + "src/tsdb/tsdbDiskData.c" + "src/tsdb/tsdbCompress.c" + "src/tsdb/tsdbCompact.c" + "src/tsdb/tsdbMergeTree.c" # tq "src/tq/tq.c" diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 0fea96764e..8e545eb527 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -155,6 +155,7 @@ int32_t tsdbGetTableSchema(SVnode *pVnode, int64_t uid, STSchema **pSchema, int6 void tsdbCacheSetCapacity(SVnode *pVnode, size_t capacity); size_t tsdbCacheGetCapacity(SVnode *pVnode); +size_t tsdbCacheGetUsage(SVnode *pVnode); // tq typedef struct SMetaTableInfo { diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index c29c4cb6c4..c9d1c3d015 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -38,9 +38,10 @@ typedef struct SSmaEnv SSmaEnv; typedef struct SSmaStat SSmaStat; typedef struct STSmaStat STSmaStat; typedef struct SRSmaStat SRSmaStat; -typedef struct SSmaKey SSmaKey; +typedef struct SRSmaRef SRSmaRef; typedef struct SRSmaInfo SRSmaInfo; typedef struct SRSmaInfoItem SRSmaInfoItem; +typedef struct SRSmaFS SRSmaFS; typedef struct SQTaskFile SQTaskFile; typedef struct SQTaskFReader SQTaskFReader; typedef struct SQTaskFWriter SQTaskFWriter; @@ -54,10 +55,21 @@ struct SSmaEnv { #define SMA_ENV_FLG_CLOSE ((int8_t)0x1) +struct SRSmaRef { + int64_t refId; // for SRSmaStat + int64_t suid; +}; + typedef struct { int8_t inited; int32_t rsetId; void *tmrHandle; // shared by all fetch tasks + /** + * @brief key: void* of SRSmaInfoItem, value: SRSmaRef + * N.B. Although there is a very small possibility that "void*" point to different objects while with the same + * address after release/renew, the functionality is not affected as it just used to fetch the rsma results. + */ + SHashObj *refHash; // shared by all vgroups } SSmaMgmt; #define SMA_ENV_LOCK(env) (&(env)->lock) @@ -73,20 +85,25 @@ struct STSmaStat { struct SQTaskFile { volatile int32_t nRef; - int64_t commitID; + int32_t padding; + int64_t version; int64_t size; }; struct SQTaskFReader { - SSma *pSma; - SQTaskFile fTask; - TdFilePtr pReadH; + SSma *pSma; + int64_t version; + TdFilePtr pReadH; }; struct SQTaskFWriter { - SSma *pSma; - SQTaskFile fTask; - TdFilePtr pWriteH; - char *fname; + SSma *pSma; + int64_t version; + TdFilePtr pWriteH; + char *fname; +}; + +struct SRSmaFS { + SArray *aQTaskInf; // array of SQTaskFile }; struct SRSmaStat { @@ -98,7 +115,7 @@ struct SRSmaStat { volatile int32_t nFetchAll; // active number of fetch all int8_t triggerStat; // shared by fetch tasks int8_t commitStat; // 0 not in committing, 1 in committing - SArray *aTaskFile; // qTaskFiles committed recently(for recovery/snapshot r/w) + SRSmaFS fs; // for recovery/snapshot r/w SHashObj *infoHash; // key: suid, value: SRSmaInfo tsem_t notEmpty; // has items in queue buffer }; @@ -118,6 +135,7 @@ struct SSmaStat { #define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat) #define RSMA_COMMIT_STAT(r) (&(r)->commitStat) #define RSMA_REF_ID(r) ((r)->refId) +#define RSMA_FS(r) (&(r)->fs) #define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { @@ -130,9 +148,9 @@ struct SRSmaInfoItem { }; struct SRSmaInfo { + SSma *pSma; STSchema *pTSchema; int64_t suid; - int64_t refId; // refId of SRSmaStat int64_t lastRecv; // ms int8_t assigned; // 0 idle, 1 assgined for exec int8_t delFlag; @@ -163,14 +181,6 @@ enum { TASK_TRIGGER_STAT_DROPPED = 5, }; -enum { - RSMA_ROLE_CREATE = 0, - RSMA_ROLE_DROP = 1, - RSMA_ROLE_SUBMIT = 2, - RSMA_ROLE_FETCH = 3, - RSMA_ROLE_ITERATE = 4, -}; - enum { RSMA_RESTORE_REBOOT = 1, RSMA_RESTORE_SYNC = 2, @@ -182,88 +192,34 @@ typedef enum { RSMA_EXEC_COMMIT = 3, // triggered by commit } ERsmaExecType; -void tdDestroySmaEnv(SSmaEnv *pSmaEnv); -void *tdFreeSmaEnv(SSmaEnv *pSmaEnv); - -int32_t tdDropTSma(SSma *pSma, char *pMsg); -int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid); -int32_t tdInsertRSmaData(SSma *pSma, char *msg); - +// sma +int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType); +void tdDestroySmaEnv(SSmaEnv *pSmaEnv); +void *tdFreeSmaEnv(SSmaEnv *pSmaEnv); int32_t tdRefSmaStat(SSma *pSma, SSmaStat *pStat); int32_t tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat); -int32_t tdRefRSmaInfo(SSma *pSma, SRSmaInfo *pRSmaInfo); -int32_t tdUnRefRSmaInfo(SSma *pSma, SRSmaInfo *pRSmaInfo); - +int32_t tdLockSma(SSma *pSma); +int32_t tdUnLockSma(SSma *pSma); void *tdAcquireSmaRef(int32_t rsetId, int64_t refId); int32_t tdReleaseSmaRef(int32_t rsetId, int64_t refId); -int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType); +// rsma +int32_t tdRefRSmaInfo(SSma *pSma, SRSmaInfo *pRSmaInfo); +int32_t tdUnRefRSmaInfo(SSma *pSma, SRSmaInfo *pRSmaInfo); +void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); +int32_t tdRSmaFSOpen(SSma *pSma, int64_t version); +void tdRSmaFSClose(SRSmaFS *fs); +int32_t tdRSmaFSRef(SSma *pSma, SRSmaStat *pStat, int64_t version); +void tdRSmaFSUnRef(SSma *pSma, SRSmaStat *pStat, int64_t version); +int32_t tdRSmaFSUpsertQTaskFile(SRSmaFS *pFS, SQTaskFile *qTaskFile); +int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer); +int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); +int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); +int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); +int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer); -int32_t tdLockSma(SSma *pSma); -int32_t tdUnLockSma(SSma *pSma); - -static FORCE_INLINE int8_t tdSmaStat(STSmaStat *pTStat) { - if (pTStat) { - return atomic_load_8(&pTStat->state); - } - return TSDB_SMA_STAT_UNKNOWN; -} - -static FORCE_INLINE bool tdSmaStatIsOK(STSmaStat *pTStat, int8_t *state) { - if (!pTStat) { - return false; - } - - if (state) { - *state = atomic_load_8(&pTStat->state); - return *state == TSDB_SMA_STAT_OK; - } - return atomic_load_8(&pTStat->state) == TSDB_SMA_STAT_OK; -} - -static FORCE_INLINE bool tdSmaStatIsExpired(STSmaStat *pTStat) { - return pTStat ? (atomic_load_8(&pTStat->state) & TSDB_SMA_STAT_EXPIRED) : true; -} - -static FORCE_INLINE bool tdSmaStatIsDropped(STSmaStat *pTStat) { - return pTStat ? (atomic_load_8(&pTStat->state) & TSDB_SMA_STAT_DROPPED) : true; -} - -static FORCE_INLINE void tdSmaStatSetOK(STSmaStat *pTStat) { - if (pTStat) { - atomic_store_8(&pTStat->state, TSDB_SMA_STAT_OK); - } -} - -static FORCE_INLINE void tdSmaStatSetExpired(STSmaStat *pTStat) { - if (pTStat) { - atomic_or_fetch_8(&pTStat->state, TSDB_SMA_STAT_EXPIRED); - } -} - -static FORCE_INLINE void tdSmaStatSetDropped(STSmaStat *pTStat) { - if (pTStat) { - atomic_or_fetch_8(&pTStat->state, TSDB_SMA_STAT_DROPPED); - } -} - -void tdRSmaQTaskInfoGetFileName(int32_t vid, int64_t version, char *outputName); -void tdRSmaQTaskInfoGetFullName(int32_t vid, int64_t version, const char *path, char *outputName); -int32_t tdCloneRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); -void tdFreeQTaskInfo(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); -static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType); -void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType); -void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); -int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); -int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); - -int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); -int32_t tdProcessRSmaRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer); -int32_t tdRsmaRestore(SSma *pSma, int8_t type, int64_t committedVer); - -int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg); -int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg); -int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); +void tdRSmaQTaskInfoGetFileName(int32_t vid, int64_t version, char *outputName); +void tdRSmaQTaskInfoGetFullName(int32_t vid, int64_t version, const char *path, char *outputName); // smaFileUtil ================ diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index d1f5cfb122..d45a6f19f0 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -42,15 +42,15 @@ typedef struct SMemTable SMemTable; typedef struct STbDataIter STbDataIter; typedef struct SMapData SMapData; typedef struct SBlockIdx SBlockIdx; -typedef struct SBlock SBlock; -typedef struct SBlockL SBlockL; +typedef struct SDataBlk SDataBlk; +typedef struct SSttBlk SSttBlk; typedef struct SColData SColData; typedef struct SDiskDataHdr SDiskDataHdr; typedef struct SBlockData SBlockData; typedef struct SDelFile SDelFile; typedef struct SHeadFile SHeadFile; typedef struct SDataFile SDataFile; -typedef struct SLastFile SLastFile; +typedef struct SSttFile SSttFile; typedef struct SSmaFile SSmaFile; typedef struct SDFileSet SDFileSet; typedef struct SDataFWriter SDataFWriter; @@ -64,10 +64,15 @@ typedef struct STsdbReadSnap STsdbReadSnap; typedef struct SBlockInfo SBlockInfo; typedef struct SSmaInfo SSmaInfo; typedef struct SBlockCol SBlockCol; +typedef struct SVersionRange SVersionRange; +typedef struct SLDataIter SLDataIter; -#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) -#define TSDB_MAX_SUBBLOCKS 8 -#define TSDB_FHDR_SIZE 512 +#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) +#define TSDB_MAX_SUBBLOCKS 8 +#define TSDB_MAX_STT_FILE 16 +#define TSDB_DEFAULT_STT_FILE 8 +#define TSDB_FHDR_SIZE 512 +#define TSDB_DEFAULT_PAGE_SIZE 4096 #define HAS_NONE ((int8_t)0x1) #define HAS_NULL ((int8_t)0x2) @@ -79,6 +84,14 @@ typedef struct SBlockCol SBlockCol; #define TSDBKEY_MIN ((TSDBKEY){.ts = TSKEY_MIN, .version = VERSION_MIN}) #define TSDBKEY_MAX ((TSDBKEY){.ts = TSKEY_MAX, .version = VERSION_MAX}) +#define PAGE_CONTENT_SIZE(PAGE) ((PAGE) - sizeof(TSCKSUM)) +#define LOGIC_TO_FILE_OFFSET(LOFFSET, PAGE) \ + ((LOFFSET) / PAGE_CONTENT_SIZE(PAGE) * (PAGE) + (LOFFSET) % PAGE_CONTENT_SIZE(PAGE)) +#define FILE_TO_LOGIC_OFFSET(OFFSET, PAGE) ((OFFSET) / (PAGE)*PAGE_CONTENT_SIZE(PAGE) + (OFFSET) % (PAGE)) +#define PAGE_OFFSET(PGNO, PAGE) (((PGNO)-1) * (PAGE)) +#define OFFSET_PGNO(OFFSET, PAGE) ((OFFSET) / (PAGE) + 1) +#define LOGIC_TO_FILE_SIZE(LSIZE, PAGE) OFFSET_PGNO(LOGIC_TO_FILE_OFFSET(LSIZE, PAGE), PAGE) * (PAGE) + // tsdbUtil.c ============================================================================================== // TSDBROW #define TSDBROW_TS(ROW) (((ROW)->type == 0) ? (ROW)->pTSRow->ts : (ROW)->pBlockData->aTSKEY[(ROW)->iRow]) @@ -111,15 +124,15 @@ int32_t tTABLEIDCmprFn(const void *p1, const void *p2); int32_t tPutBlockCol(uint8_t *p, void *ph); int32_t tGetBlockCol(uint8_t *p, void *ph); int32_t tBlockColCmprFn(const void *p1, const void *p2); -// SBlock -void tBlockReset(SBlock *pBlock); -int32_t tPutBlock(uint8_t *p, void *ph); -int32_t tGetBlock(uint8_t *p, void *ph); -int32_t tBlockCmprFn(const void *p1, const void *p2); -bool tBlockHasSma(SBlock *pBlock); -// SBlockL -int32_t tPutBlockL(uint8_t *p, void *ph); -int32_t tGetBlockL(uint8_t *p, void *ph); +// SDataBlk +void tDataBlkReset(SDataBlk *pBlock); +int32_t tPutDataBlk(uint8_t *p, void *ph); +int32_t tGetDataBlk(uint8_t *p, void *ph); +int32_t tDataBlkCmprFn(const void *p1, const void *p2); +bool tDataBlkHasSma(SDataBlk *pDataBlk); +// SSttBlk +int32_t tPutSttBlk(uint8_t *p, void *ph); +int32_t tGetSttBlk(uint8_t *p, void *ph); // SBlockIdx int32_t tPutBlockIdx(uint8_t *p, void *ph); int32_t tGetBlockIdx(uint8_t *p, void *ph); @@ -170,6 +183,7 @@ int32_t tGetDelData(uint8_t *p, void *ph); void tMapDataReset(SMapData *pMapData); void tMapDataClear(SMapData *pMapData); int32_t tMapDataPutItem(SMapData *pMapData, void *pItem, int32_t (*tPutItemFn)(uint8_t *, void *)); +int32_t tMapDataCopy(SMapData *pFrom, SMapData *pTo); void tMapDataGetItemByIdx(SMapData *pMapData, int32_t idx, void *pItem, int32_t (*tGetItemFn)(uint8_t *, void *)); int32_t tMapDataSearch(SMapData *pMapData, void *pSearchItem, int32_t (*tGetItemFn)(uint8_t *, void *), int32_t (*tItemCmprFn)(const void *, const void *), void *pItem); @@ -191,7 +205,6 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol uint8_t **ppBuf); int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData, uint8_t **ppBuf); -int32_t tsdbReadAndCheck(TdFilePtr pFD, int64_t offset, uint8_t **ppOut, int32_t size, int8_t toCheck); // tsdbMemTable ============================================================================================== // SMemTable int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable); @@ -215,7 +228,7 @@ bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2); int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype); int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile); int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile); -int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile); +int32_t tPutSttFile(uint8_t *p, SSttFile *pSttFile); int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile); int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tGetDelFile(uint8_t *p, SDelFile *pDelFile); @@ -224,7 +237,7 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet); void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]); void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]); -void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]); +void tsdbSttFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSttFile *pSttF, char fname[]); void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]); // SDelFile void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]); @@ -250,7 +263,7 @@ int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync); int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter); int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx); int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *pMapData, SBlockIdx *pBlockIdx); -int32_t tsdbWriteBlockL(SDataFWriter *pWriter, SArray *aBlockL); +int32_t tsdbWriteSttBlk(SDataFWriter *pWriter, SArray *aSttBlk); int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo, int8_t cmprAlg, int8_t toLast); @@ -260,10 +273,10 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS int32_t tsdbDataFReaderClose(SDataFReader **ppReader); int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx); int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *pMapData); -int32_t tsdbReadBlockL(SDataFReader *pReader, SArray *aBlockL); -int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg); -int32_t tsdbReadDataBlock(SDataFReader *pReader, SBlock *pBlock, SBlockData *pBlockData); -int32_t tsdbReadLastBlock(SDataFReader *pReader, SBlockL *pBlockL, SBlockData *pBlockData); +int32_t tsdbReadSttBlk(SDataFReader *pReader, int32_t iStt, SArray *aSttBlk); +int32_t tsdbReadBlockSma(SDataFReader *pReader, SDataBlk *pBlock, SArray *aColumnDataAgg); +int32_t tsdbReadDataBlock(SDataFReader *pReader, SDataBlk *pBlock, SBlockData *pBlockData); +int32_t tsdbReadSttBlock(SDataFReader *pReader, int32_t iStt, SSttBlk *pSttBlk, SBlockData *pBlockData); // SDelFWriter int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb); int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync); @@ -278,6 +291,8 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); // tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap(STsdb *pTsdb, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap(STsdb *pTsdb, STsdbReadSnap *pSnap); +// tsdbMerge.c ============================================================================================== +int32_t tsdbMerge(STsdb *pTsdb); #define TSDB_CACHE_NO(c) ((c).cacheLast == 0) #define TSDB_CACHE_LAST_ROW(c) (((c).cacheLast & 1) > 0) @@ -324,6 +339,11 @@ struct TSDBKEY { TSKEY ts; }; +struct SVersionRange { + uint64_t minVer; + uint64_t maxVer; +}; + typedef struct SMemSkipListNode SMemSkipListNode; struct SMemSkipListNode { int8_t level; @@ -416,7 +436,7 @@ struct SSmaInfo { int32_t size; }; -struct SBlock { +struct SDataBlk { TSDBKEY minKey; TSDBKEY maxKey; int64_t minVer; @@ -428,7 +448,7 @@ struct SBlock { SSmaInfo smaInfo; }; -struct SBlockL { +struct SSttBlk { int64_t suid; int64_t minUid; int64_t maxUid; @@ -467,12 +487,6 @@ struct SBlockData { SArray *aColData; // SArray }; -// ================== TSDB global config -extern bool tsdbForceKeepFile; - -#define TSDB_FS_ITER_FORWARD TSDB_ORDER_ASC -#define TSDB_FS_ITER_BACKWARD TSDB_ORDER_DESC - struct TABLEID { tb_uid_t suid; tb_uid_t uid; @@ -536,7 +550,7 @@ struct SDataFile { int64_t size; }; -struct SLastFile { +struct SSttFile { volatile int32_t nRef; int64_t commitID; @@ -556,8 +570,9 @@ struct SDFileSet { int32_t fid; SHeadFile *pHeadF; SDataFile *pDataF; - SLastFile *pLastF; SSmaFile *pSmaF; + uint8_t nSttF; + SSttFile *aSttF[TSDB_MAX_STT_FILE]; }; struct SRowIter { @@ -572,37 +587,76 @@ struct SRowMerger { SArray *pArray; // SArray }; +typedef struct { + char *path; + int32_t szPage; + int32_t flag; + TdFilePtr pFD; + int64_t pgno; + uint8_t *pBuf; + int64_t szFile; +} STsdbFD; + struct SDelFWriter { - STsdb *pTsdb; - SDelFile fDel; - TdFilePtr pWriteH; - + STsdb *pTsdb; + SDelFile fDel; + STsdbFD *pWriteH; uint8_t *aBuf[1]; }; -struct SDataFWriter { - STsdb *pTsdb; - SDFileSet wSet; - - TdFilePtr pHeadFD; - TdFilePtr pDataFD; - TdFilePtr pLastFD; - TdFilePtr pSmaFD; - - SHeadFile fHead; - SDataFile fData; - SLastFile fLast; - SSmaFile fSma; - - uint8_t *aBuf[4]; -}; - struct STsdbReadSnap { SMemTable *pMem; SMemTable *pIMem; STsdbFS fs; }; +struct SDataFWriter { + STsdb *pTsdb; + SDFileSet wSet; + + STsdbFD *pHeadFD; + STsdbFD *pDataFD; + STsdbFD *pSmaFD; + STsdbFD *pSttFD; + + SHeadFile fHead; + SDataFile fData; + SSmaFile fSma; + SSttFile fStt[TSDB_MAX_STT_FILE]; + + uint8_t *aBuf[4]; +}; + +struct SDataFReader { + STsdb *pTsdb; + SDFileSet *pSet; + STsdbFD *pHeadFD; + STsdbFD *pDataFD; + STsdbFD *pSmaFD; + STsdbFD *aSttFD[TSDB_MAX_STT_FILE]; + uint8_t *aBuf[3]; +}; + +typedef struct { + int64_t suid; + int64_t uid; + TSDBROW row; +} SRowInfo; + +typedef struct SMergeTree { + int8_t backward; + SRBTree rbt; + SArray *pIterList; + SLDataIter *pIter; +} SMergeTree; + +int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, + STimeWindow *pTimeWindow, SVersionRange *pVerRange); +void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); +bool tMergeTreeNext(SMergeTree *pMTree); +TSDBROW tMergeTreeGetRow(SMergeTree *pMTree); +void tMergeTreeClose(SMergeTree *pMTree); + // ========== inline functions ========== static FORCE_INLINE int32_t tsdbKeyCmprFn(const void *p1, const void *p2) { TSDBKEY *pKey1 = (TSDBKEY *)p1; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index c644f1d08c..62aba649a3 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -36,6 +36,7 @@ #include "tlosertree.h" #include "tlrucache.h" #include "tmsgcb.h" +#include "trbtree.h" #include "tref.h" #include "tskiplist.h" #include "tstream.h" @@ -70,8 +71,8 @@ typedef struct SStreamTaskReader SStreamTaskReader; typedef struct SStreamTaskWriter SStreamTaskWriter; typedef struct SStreamStateReader SStreamStateReader; typedef struct SStreamStateWriter SStreamStateWriter; -typedef struct SRsmaSnapReader SRsmaSnapReader; -typedef struct SRsmaSnapWriter SRsmaSnapWriter; +typedef struct SRSmaSnapReader SRSmaSnapReader; +typedef struct SRSmaSnapWriter SRSmaSnapWriter; typedef struct SSnapDataHdr SSnapDataHdr; #define VNODE_META_DIR "meta" @@ -103,7 +104,7 @@ int metaCreateSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* int metaAlterSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); int metaDropSTable(SMeta* pMeta, int64_t verison, SVDropStbReq* pReq, SArray* tbUidList); int metaCreateTable(SMeta* pMeta, int64_t version, SVCreateTbReq* pReq, STableMetaRsp** pMetaRsp); -int metaDropTable(SMeta* pMeta, int64_t version, SVDropTbReq* pReq, SArray* tbUids); +int metaDropTable(SMeta* pMeta, int64_t version, SVDropTbReq* pReq, SArray* tbUids, int64_t *tbUid); int metaTtlDropTable(SMeta* pMeta, int64_t ttl, SArray* tbUids); int metaAlterTable(SMeta* pMeta, int64_t version, SVAlterTbReq* pReq, STableMetaRsp* pMetaRsp); SSchemaWrapper* metaGetTableSchema(SMeta* pMeta, tb_uid_t uid, int32_t sver, bool isinline); @@ -207,7 +208,7 @@ int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq); int32_t tdProcessRSmaSubmit(SSma* pSma, void* pMsg, int32_t inputType); int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq); int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid); -int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore); +int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd); void tdUidStoreDestory(STbUidStore* pStore); void* tdUidStoreFree(STbUidStore* pStore); @@ -247,14 +248,14 @@ int32_t tqOffsetSnapWrite(STqOffsetWriter* pWriter, uint8_t* pData, uint32_t nDa // SStreamTaskReader ====================================== // SStreamStateWriter ===================================== // SStreamStateReader ===================================== -// SRsmaSnapReader ======================================== -int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRsmaSnapReader** ppReader); -int32_t rsmaSnapReaderClose(SRsmaSnapReader** ppReader); -int32_t rsmaSnapRead(SRsmaSnapReader* pReader, uint8_t** ppData); -// SRsmaSnapWriter ======================================== -int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRsmaSnapWriter** ppWriter); -int32_t rsmaSnapWrite(SRsmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); -int32_t rsmaSnapWriterClose(SRsmaSnapWriter** ppWriter, int8_t rollback); +// SRSmaSnapReader ======================================== +int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapReader** ppReader); +int32_t rsmaSnapReaderClose(SRSmaSnapReader** ppReader); +int32_t rsmaSnapRead(SRSmaSnapReader* pReader, uint8_t** ppData); +// SRSmaSnapWriter ======================================== +int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapWriter** ppWriter); +int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t rsmaSnapWriterClose(SRSmaSnapWriter** ppWriter, int8_t rollback); typedef struct { int8_t streamType; // sma or other diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 9cbacd4e36..a34569b08e 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -291,6 +291,38 @@ _query: tDecoderClear(&dc); goto _exit; } + { // Traverse to find the previous qualified data + TBC *pCur; + tdbTbcOpen(pMeta->pTbDb, &pCur, NULL); + STbDbKey key = {.version = sver, .uid = INT64_MAX}; + int c = 0; + tdbTbcMoveTo(pCur, &key, sizeof(key), &c); + if(c < 0){ + tdbTbcMoveToPrev(pCur); + } + + void *pKey = NULL; + void *pVal = NULL; + int vLen = 0, kLen = 0; + while(1){ + int32_t ret = tdbTbcPrev(pCur, &pKey, &kLen, &pVal, &vLen); + if (ret < 0) break; + + STbDbKey *tmp = (STbDbKey*)pKey; + if(tmp->uid != uid){ + continue; + } + SDecoder dcNew = {0}; + SMetaEntry meNew = {0}; + tDecoderInit(&dcNew, pVal, vLen); + metaDecodeEntry(&dcNew, &meNew); + pSchema = tCloneSSchemaWrapper(&meNew.stbEntry.schemaRow); + tDecoderClear(&dcNew); + tdbTbcClose(pCur); + goto _exit; + } + tdbTbcClose(pCur); + } } else if (me.type == TSDB_CHILD_TABLE) { uid = me.ctbEntry.suid; tDecoderClear(&dc); diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index 583a2e098f..2c11b9bf0f 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -474,7 +474,7 @@ _err: return -1; } -int metaDropTable(SMeta *pMeta, int64_t version, SVDropTbReq *pReq, SArray *tbUids) { +int metaDropTable(SMeta *pMeta, int64_t version, SVDropTbReq *pReq, SArray *tbUids, tb_uid_t *tbUid) { void *pData = NULL; int nData = 0; int rc = 0; @@ -496,6 +496,10 @@ int metaDropTable(SMeta *pMeta, int64_t version, SVDropTbReq *pReq, SArray *tbUi taosArrayPush(tbUids, &uid); } + if ((type == TSDB_CHILD_TABLE) && tbUid) { + *tbUid = uid; + } + tdbFree(pData); return 0; } diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 3cf50a035a..fd300ef34a 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -15,13 +15,15 @@ #include "sma.h" +extern SSmaMgmt smaMgmt; + static int32_t tdProcessRSmaSyncPreCommitImpl(SSma *pSma); static int32_t tdProcessRSmaSyncCommitImpl(SSma *pSma); static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma); static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma); static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma); static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma); -static int32_t tdCleanupQTaskInfoFiles(SSma *pSma, SRSmaStat *pRSmaStat); +static int32_t tdUpdateQTaskInfoFiles(SSma *pSma, SRSmaStat *pRSmaStat); /** * @brief Only applicable to Rollup SMA @@ -166,114 +168,51 @@ static int32_t tdProcessRSmaSyncCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } -static int32_t tdCleanupQTaskInfoFiles(SSma *pSma, SRSmaStat *pRSmaStat) { - SVnode *pVnode = pSma->pVnode; - int64_t committed = pRSmaStat->commitAppliedVer; - TdDirPtr pDir = NULL; - TdDirEntryPtr pDirEntry = NULL; - char dir[TSDB_FILENAME_LEN]; - const char *pattern = "v[0-9]+qinf\\.v([0-9]+)?$"; - regex_t regex; - int code = 0; - - tdGetVndDirName(TD_VID(pVnode), tfsGetPrimaryPath(pVnode->pTfs), VNODE_RSMA_DIR, true, dir); - - // Resource allocation and init - if ((code = regcomp(®ex, pattern, REG_EXTENDED)) != 0) { - char errbuf[128]; - regerror(code, ®ex, errbuf, sizeof(errbuf)); - smaWarn("vgId:%d, rsma post commit, regcomp for %s failed since %s", TD_VID(pVnode), dir, errbuf); - return TSDB_CODE_FAILED; - } - - if ((pDir = taosOpenDir(dir)) == NULL) { - regfree(®ex); - terrno = TAOS_SYSTEM_ERROR(errno); - smaDebug("vgId:%d, rsma post commit, open dir %s failed since %s", TD_VID(pVnode), dir, terrstr()); - return TSDB_CODE_FAILED; - } - - int32_t dirLen = strlen(dir); - char *dirEnd = POINTER_SHIFT(dir, dirLen); - regmatch_t regMatch[2]; - while ((pDirEntry = taosReadDir(pDir)) != NULL) { - char *entryName = taosGetDirEntryName(pDirEntry); - if (!entryName) { - continue; - } - - code = regexec(®ex, entryName, 2, regMatch, 0); - - if (code == 0) { - // match - int64_t version = -1; - sscanf((const char *)POINTER_SHIFT(entryName, regMatch[1].rm_so), "%" PRIi64, &version); - if ((version < committed) && (version > -1)) { - strncpy(dirEnd, entryName, TSDB_FILENAME_LEN - dirLen); - if (taosRemoveFile(dir) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - smaWarn("vgId:%d, committed version:%" PRIi64 ", failed to remove %s since %s", TD_VID(pVnode), committed, - dir, terrstr()); - } else { - smaDebug("vgId:%d, committed version:%" PRIi64 ", success to remove %s", TD_VID(pVnode), committed, dir); - } - } - } else if (code == REG_NOMATCH) { - // not match - smaTrace("vgId:%d, rsma post commit, not match %s", TD_VID(pVnode), entryName); - continue; - } else { - // has other error - char errbuf[128]; - regerror(code, ®ex, errbuf, sizeof(errbuf)); - smaWarn("vgId:%d, rsma post commit, regexec failed since %s", TD_VID(pVnode), errbuf); - - taosCloseDir(&pDir); - regfree(®ex); - return TSDB_CODE_FAILED; - } - } - - taosCloseDir(&pDir); - regfree(®ex); - - return TSDB_CODE_SUCCESS; -} - // SQTaskFile ====================================================== -// int32_t tCmprQTaskFile(void const *lhs, void const *rhs) { -// int64_t *lCommitted = *(int64_t *)lhs; -// SQTaskFile *rQTaskF = (SQTaskFile *)rhs; -// if (lCommitted < rQTaskF->commitID) { -// return -1; -// } else if (lCommitted > rQTaskF->commitID) { -// return 1; -// } - -// return 0; -// } - -#if 0 /** * @brief At most time, there is only one qtaskinfo file committed latest in aTaskFile. Sometimes, there would be * multiple qtaskinfo files supporting snapshot replication. * * @param pSma - * @param pRSmaStat + * @param pStat * @return int32_t */ -static int32_t tdCleanupQTaskInfoFiles(SSma *pSma, SRSmaStat *pRSmaStat) { - SVnode *pVnode = pSma->pVnode; - int64_t committed = pRSmaStat->commitAppliedVer; - SArray *aTaskFile = pRSmaStat->aTaskFile; +static int32_t tdUpdateQTaskInfoFiles(SSma *pSma, SRSmaStat *pStat) { + SVnode *pVnode = pSma->pVnode; + SRSmaFS *pFS = RSMA_FS(pStat); + int64_t committed = pStat->commitAppliedVer; + char qTaskInfoFullName[TSDB_FILENAME_LEN]; - void *qTaskFile = taosArraySearch(aTaskFile, committed, tCmprQTaskFile, TD_LE); - + taosWLockLatch(RSMA_FS_LOCK(pStat)); + for (int32_t i = 0; i < taosArrayGetSize(pFS->aQTaskInf);) { + SQTaskFile *pTaskF = taosArrayGet(pFS->aQTaskInf, i); + int32_t oldVal = atomic_fetch_sub_32(&pTaskF->nRef, 1); + if ((oldVal <= 1) && (pTaskF->version < committed)) { + tdRSmaQTaskInfoGetFullName(TD_VID(pVnode), pTaskF->version, tfsGetPrimaryPath(pVnode->pTfs), qTaskInfoFullName); + if (taosRemoveFile(qTaskInfoFullName) < 0) { + smaWarn("vgId:%d, cleanup qinf, committed %" PRIi64 ", failed to remove %s since %s", TD_VID(pVnode), committed, + qTaskInfoFullName, tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + smaDebug("vgId:%d, cleanup qinf, committed %" PRIi64 ", success to remove %s", TD_VID(pVnode), committed, + qTaskInfoFullName); + } + taosArrayRemove(pFS->aQTaskInf, i); + continue; + } + ++i; + } + + SQTaskFile qFile = {.nRef = 1, .padding = 0, .version = committed, .size = 0}; + if (tdRSmaFSUpsertQTaskFile(pFS, &qFile) < 0) { + taosWUnLockLatch(RSMA_FS_LOCK(pStat)); + return TSDB_CODE_FAILED; + } + + taosWUnLockLatch(RSMA_FS_LOCK(pStat)); return TSDB_CODE_SUCCESS; } -#endif /** * @brief post-commit for rollup sma @@ -290,8 +229,7 @@ static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma) { SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSma); - // cleanup outdated qtaskinfo files - tdCleanupQTaskInfoFiles(pSma, pRSmaStat); + tdUpdateQTaskInfoFiles(pSma, pRSmaStat); return TSDB_CODE_SUCCESS; } @@ -427,8 +365,7 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { return TSDB_CODE_SUCCESS; } - SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); - SArray *rsmaDeleted = NULL; + SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); // step 1: merge qTaskInfo and iQTaskInfo // lock @@ -441,11 +378,7 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { if (RSMA_INFO_IS_DEL(pRSmaInfo)) { int32_t refVal = T_REF_VAL_GET(pRSmaInfo); if (refVal == 0) { - if (!rsmaDeleted) { - if ((rsmaDeleted = taosArrayInit(1, sizeof(tb_uid_t)))) { - taosArrayPush(rsmaDeleted, pSuid); - } - } + taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(*pSuid)); } else { smaDebug( "vgId:%d, rsma async post commit, not free rsma info since ref is %d although already deleted for " @@ -471,25 +404,10 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { #endif } - for (int32_t i = 0; i < taosArrayGetSize(rsmaDeleted); ++i) { - tb_uid_t *pSuid = taosArrayGet(rsmaDeleted, i); - void *pRSmaInfo = taosHashGet(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); - if ((pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { - tdFreeRSmaInfo(pSma, pRSmaInfo, true); - smaDebug( - "vgId:%d, rsma async post commit, free rsma info since already deleted and ref is 0 for " - "table:%" PRIi64, - SMA_VID(pSma), *pSuid); - } - taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); - } - taosArrayDestroy(rsmaDeleted); - // unlock // taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); - // step 2: cleanup outdated qtaskinfo files - tdCleanupQTaskInfoFiles(pSma, pRSmaStat); + tdUpdateQTaskInfoFiles(pSma, pRSmaStat); atomic_store_8(RSMA_COMMIT_STAT(pRSmaStat), 0); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 32a419022a..2595a629ba 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -28,6 +28,8 @@ static int32_t tdInitSmaEnv(SSma *pSma, int8_t smaType, SSmaEnv **ppEnv); static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pSma); static int32_t tdRsmaStartExecutor(const SSma *pSma); static int32_t tdRsmaStopExecutor(const SSma *pSma); +static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType); +static void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType); static void *tdFreeTSmaStat(STSmaStat *pStat); static void tdDestroyRSmaStat(void *pRSmaStat); @@ -59,12 +61,23 @@ int32_t smaInit() { return TSDB_CODE_FAILED; } + int32_t type = (8 == POINTER_BYTES) ? TSDB_DATA_TYPE_UBIGINT : TSDB_DATA_TYPE_UINT; + smaMgmt.refHash = taosHashInit(64, taosGetDefaultHashFunction(type), true, HASH_ENTRY_LOCK); + if (!smaMgmt.refHash) { + taosCloseRef(smaMgmt.rsetId); + atomic_store_8(&smaMgmt.inited, 0); + smaError("failed to init sma tmr hanle since %s", terrstr()); + return TSDB_CODE_FAILED; + } + // init fetch timer handle smaMgmt.tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA"); if (!smaMgmt.tmrHandle) { taosCloseRef(smaMgmt.rsetId); + taosHashCleanup(smaMgmt.refHash); + smaMgmt.refHash = NULL; atomic_store_8(&smaMgmt.inited, 0); - smaError("failed to init sma tmr hanle since %s", terrstr()); + smaError("failed to init sma tmr handle since %s", terrstr()); return TSDB_CODE_FAILED; } @@ -93,6 +106,8 @@ void smaCleanUp() { if (old == 1) { taosCloseRef(smaMgmt.rsetId); + taosHashCleanup(smaMgmt.refHash); + smaMgmt.refHash = NULL; taosTmrCleanUp(smaMgmt.tmrHandle); smaInfo("sma mgmt env is cleaned up, rsetId:%d, tmrHandle:%p", smaMgmt.rsetId, smaMgmt.tmrHandle); atomic_store_8(&smaMgmt.inited, 0); @@ -195,6 +210,21 @@ int32_t tdUnRefRSmaInfo(SSma *pSma, SRSmaInfo *pRSmaInfo) { return 0; } +static void tRSmaInfoHashFreeNode(void *data) { + SRSmaInfo *pRSmaInfo = NULL; + SRSmaInfoItem *pItem = NULL; + + if ((pRSmaInfo = *(SRSmaInfo **)data)) { + if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 0)) && pItem->level) { + taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES); + } + if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 1)) && pItem->level) { + taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES); + } + tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo, true); + } +} + static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pSma) { ASSERT(pSmaStat != NULL); @@ -240,10 +270,16 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS if (!RSMA_INFO_HASH(pRSmaStat)) { return TSDB_CODE_FAILED; } + taosHashSetFreeFp(RSMA_INFO_HASH(pRSmaStat), tRSmaInfoHashFreeNode); if (tdRsmaStartExecutor(pSma) < 0) { return TSDB_CODE_FAILED; } + + if (!(RSMA_FS(pRSmaStat)->aQTaskInf = taosArrayInit(1, sizeof(SQTaskFile)))) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_FAILED; + } } else if (smaType == TSDB_SMA_TYPE_TIME_RANGE) { // TODO } else { @@ -278,14 +314,6 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { tsem_destroy(&(pStat->notEmpty)); // step 2: destroy the rsma info and associated fetch tasks - if (taosHashGetSize(RSMA_INFO_HASH(pStat)) > 0) { - void *infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), NULL); - while (infoHash) { - SRSmaInfo *pSmaInfo = *(SRSmaInfo **)infoHash; - tdFreeRSmaInfo(pSma, pSmaInfo, true); - infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), infoHash); - } - } taosHashCleanup(RSMA_INFO_HASH(pStat)); // step 3: wait for all triggered fetch tasks to finish @@ -307,12 +335,15 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { // step 4: tdRsmaStopExecutor(pSma); - // step 5: free pStat + // step 5: + tdRSmaFSClose(RSMA_FS(pStat)); + + // step 6: free pStat taosMemoryFreeClear(pStat); } } -void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType) { +static void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType) { tdDestroySmaState(pSmaStat, smaType); if (smaType == TSDB_SMA_TYPE_TIME_RANGE) { taosMemoryFreeClear(pSmaStat); @@ -329,7 +360,7 @@ void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType) { * @return int32_t */ -int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType) { +static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType) { if (pSmaStat) { if (smaType == TSDB_SMA_TYPE_TIME_RANGE) { tdDestroyTSmaStat(SMA_STAT_TSMA(pSmaStat)); @@ -337,7 +368,7 @@ int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType) { SRSmaStat *pRSmaStat = &pSmaStat->rsmaStat; int32_t vid = SMA_VID(pRSmaStat->pSma); int64_t refId = RSMA_REF_ID(pRSmaStat); - if (taosRemoveRef(smaMgmt.rsetId, RSMA_REF_ID(pRSmaStat)) < 0) { + if (taosRemoveRef(smaMgmt.rsetId, refId) < 0) { smaError("vgId:%d, remove refId:%" PRIi64 " from rsmaRef:%" PRIi32 " failed since %s", vid, refId, smaMgmt.rsetId, terrstr()); } else { diff --git a/source/dnode/vnode/src/sma/smaFS.c b/source/dnode/vnode/src/sma/smaFS.c new file mode 100644 index 0000000000..4387db553e --- /dev/null +++ b/source/dnode/vnode/src/sma/smaFS.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "sma.h" + +// ================================================================================================= + +static int32_t tdFetchQTaskInfoFiles(SSma *pSma, int64_t version, SArray **output); +static int32_t tdQTaskInfCmprFn1(const void *p1, const void *p2); +static int32_t tdQTaskInfCmprFn2(const void *p1, const void *p2); +/** + * @brief Open RSma FS from qTaskInfo files + * + * @param pSma + * @param version + * @return int32_t + */ +int32_t tdRSmaFSOpen(SSma *pSma, int64_t version) { + SVnode *pVnode = pSma->pVnode; + int64_t commitID = pVnode->state.commitID; + SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SRSmaStat *pStat = NULL; + SArray *output = NULL; + + terrno = TSDB_CODE_SUCCESS; + + if (!pEnv) { + return TSDB_CODE_SUCCESS; + } + + if (tdFetchQTaskInfoFiles(pSma, version, &output) < 0) { + goto _end; + } + + pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); + + for (int32_t i = 0; i < taosArrayGetSize(output); ++i) { + int32_t vid = 0; + int64_t version = -1; + sscanf((const char *)taosArrayGetP(output, i), "v%dqinfo.v%" PRIi64, &vid, &version); + SQTaskFile qTaskFile = {.version = version, .nRef = 1}; + if ((terrno = tdRSmaFSUpsertQTaskFile(RSMA_FS(pStat), &qTaskFile)) < 0) { + goto _end; + } + smaInfo("vgId:%d, open fs, version:%" PRIi64 ", ref:%" PRIi64, TD_VID(pVnode), qTaskFile.version, qTaskFile.nRef); + } + +_end: + for (int32_t i = 0; i < taosArrayGetSize(output); ++i) { + void *ptr = taosArrayGetP(output, i); + taosMemoryFreeClear(ptr); + } + taosArrayDestroy(output); + + if (terrno != TSDB_CODE_SUCCESS) { + smaError("vgId:%d, open rsma fs failed since %s", TD_VID(pVnode), terrstr()); + return TSDB_CODE_FAILED; + } + return TSDB_CODE_SUCCESS; +} + +void tdRSmaFSClose(SRSmaFS *fs) { taosArrayDestroy(fs->aQTaskInf); } + +static int32_t tdQTaskInfCmprFn1(const void *p1, const void *p2) { + if (*(int64_t *)p1 < ((SQTaskFile *)p2)->version) { + return -1; + } else if (*(int64_t *)p1 > ((SQTaskFile *)p2)->version) { + return 1; + } + return 0; +} + +int32_t tdRSmaFSRef(SSma *pSma, SRSmaStat *pStat, int64_t version) { + SArray *aQTaskInf = RSMA_FS(pStat)->aQTaskInf; + SQTaskFile *pTaskF = NULL; + int32_t oldVal = 0; + + taosRLockLatch(RSMA_FS_LOCK(pStat)); + if ((pTaskF = taosArraySearch(aQTaskInf, &version, tdQTaskInfCmprFn1, TD_EQ))) { + oldVal = atomic_fetch_add_32(&pTaskF->nRef, 1); + ASSERT(oldVal > 0); + } + taosRUnLockLatch(RSMA_FS_LOCK(pStat)); + return oldVal; +} + +void tdRSmaFSUnRef(SSma *pSma, SRSmaStat *pStat, int64_t version) { + SVnode *pVnode = pSma->pVnode; + SArray *aQTaskInf = RSMA_FS(pStat)->aQTaskInf; + char qTaskFullName[TSDB_FILENAME_LEN]; + SQTaskFile *pTaskF = NULL; + int32_t idx = -1; + + taosWLockLatch(RSMA_FS_LOCK(pStat)); + if ((idx = taosArraySearchIdx(aQTaskInf, &version, tdQTaskInfCmprFn1, TD_EQ)) >= 0) { + ASSERT(idx < taosArrayGetSize(aQTaskInf)); + pTaskF = taosArrayGet(aQTaskInf, idx); + if (atomic_sub_fetch_32(&pTaskF->nRef, 1) <= 0) { + tdRSmaQTaskInfoGetFullName(TD_VID(pVnode), pTaskF->version, tfsGetPrimaryPath(pVnode->pTfs), qTaskFullName); + if (taosRemoveFile(qTaskFullName) < 0) { + smaWarn("vgId:%d, failed to remove %s since %s", TD_VID(pVnode), qTaskFullName, + tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + smaDebug("vgId:%d, success to remove %s", TD_VID(pVnode), qTaskFullName); + } + taosArrayRemove(aQTaskInf, idx); + } + } + taosWUnLockLatch(RSMA_FS_LOCK(pStat)); +} + +/** + * @brief Fetch qtaskfiles LE than version + * + * @param pSma + * @param version + * @param output + * @return int32_t + */ +static int32_t tdFetchQTaskInfoFiles(SSma *pSma, int64_t version, SArray **output) { + SVnode *pVnode = pSma->pVnode; + TdDirPtr pDir = NULL; + TdDirEntryPtr pDirEntry = NULL; + char dir[TSDB_FILENAME_LEN]; + const char *pattern = "v[0-9]+qinf\\.v([0-9]+)?$"; + regex_t regex; + int code = 0; + + tdGetVndDirName(TD_VID(pVnode), tfsGetPrimaryPath(pVnode->pTfs), VNODE_RSMA_DIR, true, dir); + + if (!taosCheckExistFile(dir)) { + smaDebug("vgId:%d, fetch qtask files, no need as dir %s not exist", TD_VID(pVnode), dir); + return TSDB_CODE_SUCCESS; + } + + // Resource allocation and init + if ((code = regcomp(®ex, pattern, REG_EXTENDED)) != 0) { + terrno = TSDB_CODE_RSMA_REGEX_MATCH; + char errbuf[128]; + regerror(code, ®ex, errbuf, sizeof(errbuf)); + smaWarn("vgId:%d, fetch qtask files, regcomp for %s failed since %s", TD_VID(pVnode), dir, errbuf); + return TSDB_CODE_FAILED; + } + + if (!(pDir = taosOpenDir(dir))) { + regfree(®ex); + terrno = TAOS_SYSTEM_ERROR(errno); + smaError("vgId:%d, fetch qtask files, open dir %s failed since %s", TD_VID(pVnode), dir, terrstr()); + return TSDB_CODE_FAILED; + } + + int32_t dirLen = strlen(dir); + char *dirEnd = POINTER_SHIFT(dir, dirLen); + regmatch_t regMatch[2]; + while ((pDirEntry = taosReadDir(pDir))) { + char *entryName = taosGetDirEntryName(pDirEntry); + if (!entryName) { + continue; + } + + code = regexec(®ex, entryName, 2, regMatch, 0); + + if (code == 0) { + // match + smaInfo("vgId:%d, fetch qtask files, max ver:%" PRIi64 ", %s found", TD_VID(pVnode), version, entryName); + + int64_t ver = -1; + sscanf((const char *)POINTER_SHIFT(entryName, regMatch[1].rm_so), "%" PRIi64, &ver); + if ((ver <= version) && (ver > -1)) { + if (!(*output)) { + if (!(*output = taosArrayInit(1, POINTER_BYTES))) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + } + char *entryDup = strdup(entryName); + if (!entryDup) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + if (!taosArrayPush(*output, &entryDup)) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + } else { + } + } else if (code == REG_NOMATCH) { + // not match + smaTrace("vgId:%d, fetch qtask files, not match %s", TD_VID(pVnode), entryName); + continue; + } else { + // has other error + char errbuf[128]; + regerror(code, ®ex, errbuf, sizeof(errbuf)); + smaWarn("vgId:%d, fetch qtask files, regexec failed since %s", TD_VID(pVnode), errbuf); + terrno = TSDB_CODE_RSMA_REGEX_MATCH; + goto _end; + } + } +_end: + taosCloseDir(&pDir); + regfree(®ex); + return terrno == 0 ? TSDB_CODE_SUCCESS : TSDB_CODE_FAILED; +} + +static int32_t tdQTaskFileCmprFn2(const void *p1, const void *p2) { + if (((SQTaskFile *)p1)->version < ((SQTaskFile *)p2)->version) { + return -1; + } else if (((SQTaskFile *)p1)->version > ((SQTaskFile *)p2)->version) { + return 1; + } + + return 0; +} + +int32_t tdRSmaFSUpsertQTaskFile(SRSmaFS *pFS, SQTaskFile *qTaskFile) { + int32_t code = 0; + int32_t idx = taosArraySearchIdx(pFS->aQTaskInf, qTaskFile, tdQTaskFileCmprFn2, TD_GE); + + if (idx < 0) { + idx = taosArrayGetSize(pFS->aQTaskInf); + } else { + SQTaskFile *pTaskF = (SQTaskFile *)taosArrayGet(pFS->aQTaskInf, idx); + int32_t c = tdQTaskFileCmprFn2(pTaskF, qTaskFile); + if (c == 0) { + pTaskF->nRef = qTaskFile->nRef; + pTaskF->version = qTaskFile->version; + pTaskF->size = qTaskFile->size; + goto _exit; + } + } + + if (taosArrayInsert(pFS->aQTaskInf, idx, qTaskFile) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + +_exit: + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index 3c3097bb2f..d9ffda279f 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -150,7 +150,7 @@ int32_t smaOpen(SVnode *pVnode) { } // restore the rsma - if (tdRsmaRestore(pSma, RSMA_RESTORE_REBOOT, pVnode->state.committed) < 0) { + if (tdRSmaRestore(pSma, RSMA_RESTORE_REBOOT, pVnode->state.committed) < 0) { goto _err; } } @@ -181,8 +181,8 @@ int32_t smaClose(SSma *pSma) { * @param committedVer * @return int32_t */ -int32_t tdRsmaRestore(SSma *pSma, int8_t type, int64_t committedVer) { +int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer) { ASSERT(VND_IS_RSMA(pSma->pVnode)); - return tdProcessRSmaRestoreImpl(pSma, type, committedVer); + return tdRSmaProcessRestoreImpl(pSma, type, committedVer); } \ No newline at end of file diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index f2063e3067..fdd173cdf7 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -20,7 +20,7 @@ #define RSMA_QTASKEXEC_SMOOTH_SIZE (100) // cnt #define RSMA_SUBMIT_BATCH_SIZE (1024) // cnt #define RSMA_FETCH_DELAY_MAX (900000) // ms -#define RSMA_FETCH_ACTIVE_MAX (1800) // ms +#define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms SSmaMgmt smaMgmt = { @@ -34,7 +34,7 @@ typedef struct SRSmaQTaskInfoItem SRSmaQTaskInfoItem; typedef struct SRSmaQTaskInfoIter SRSmaQTaskInfoIter; static int32_t tdUidStorePut(STbUidStore *pStore, tb_uid_t suid, tb_uid_t *uid); -static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids); +static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, bool isAdd); static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx); static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, @@ -42,10 +42,12 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSiz static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); -static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo, SArray *pSubmitArr); +static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, int64_t suid); static void tdRSmaFetchTrigger(void *param, void *tmrId); +static int32_t tdRSmaInfoClone(SSma *pSma, SRSmaInfo *pInfo); +static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaQTaskInfoIterInit(SRSmaQTaskInfoIter *pIter, STFile *pTFile); static int32_t tdRSmaQTaskInfoIterNextBlock(SRSmaQTaskInfoIter *pIter, bool *isFinish); static int32_t tdRSmaQTaskInfoRestore(SSma *pSma, int8_t type, SRSmaQTaskInfoIter *pIter); @@ -96,7 +98,7 @@ static FORCE_INLINE int32_t tdRSmaQTaskInfoContLen(int32_t lenWithHead) { static FORCE_INLINE void tdRSmaQTaskInfoIterDestroy(SRSmaQTaskInfoIter *pIter) { taosMemoryFreeClear(pIter->pBuf); } -void tdFreeQTaskInfo(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level) { +static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level) { // Note: free/kill may in RC if (!taskHandle || !(*taskHandle)) return; qTaskInfo_t otaskHandle = atomic_load_ptr(taskHandle); @@ -123,20 +125,20 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { SRSmaInfoItem *pItem = &pInfo->items[i]; if (isDeepFree && pItem->tmrId) { - smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pInfo->suid, - pItem->tmrId, i + 1); + smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, + pInfo->suid, i + 1); taosTmrStopA(&pItem->tmrId); } if (isDeepFree && pInfo->taskInfo[i]) { - tdFreeQTaskInfo(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); + tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } else { smaDebug("vgId:%d, table %" PRIi64 " no need to destroy rsma info level %d since empty taskInfo", SMA_VID(pSma), pInfo->suid, i + 1); } if (pInfo->iTaskInfo[i]) { - tdFreeQTaskInfo(&pInfo->iTaskInfo[i], SMA_VID(pSma), i + 1); + tdRSmaQTaskInfoFree(&pInfo->iTaskInfo[i], SMA_VID(pSma), i + 1); } else { smaDebug("vgId:%d, table %" PRIi64 " no need to destroy rsma info level %d since empty iTaskInfo", SMA_VID(pSma), pInfo->suid, i + 1); @@ -173,7 +175,7 @@ static FORCE_INLINE int32_t tdUidStoreInit(STbUidStore **pStore) { return TSDB_CODE_SUCCESS; } -static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids) { +static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, bool isAdd) { SRSmaInfo *pRSmaInfo = NULL; if (!suid || !tbUids) { @@ -197,7 +199,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids) for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - if (((terrno = qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, true)) < 0)) { + if (((terrno = qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0)) { tdReleaseRSmaInfo(pSma, pRSmaInfo); smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " level %d since %s", SMA_VID(pSma), *suid, i, terrstr()); @@ -213,12 +215,12 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids) return TSDB_CODE_SUCCESS; } -int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore) { +int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { if (!pStore || (taosArrayGetSize(pStore->tbUids) == 0)) { return TSDB_CODE_SUCCESS; } - if (tdUpdateTbUidListImpl(pSma, &pStore->suid, pStore->tbUids) != TSDB_CODE_SUCCESS) { + if (tdUpdateTbUidListImpl(pSma, &pStore->suid, pStore->tbUids, isAdd) != TSDB_CODE_SUCCESS) { return TSDB_CODE_FAILED; } @@ -227,7 +229,7 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore) { tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); SArray *pTbUids = *(SArray **)pIter; - if (tdUpdateTbUidListImpl(pSma, pTbSuid, pTbUids) != TSDB_CODE_SUCCESS) { + if (tdUpdateTbUidListImpl(pSma, pTbSuid, pTbUids, isAdd) != TSDB_CODE_SUCCESS) { taosHashCancelIterate(pStore->uidHash, pIter); return TSDB_CODE_FAILED; } @@ -313,10 +315,17 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } pItem->level = idx == 0 ? TSDB_RETENTION_L1 : TSDB_RETENTION_L2; + ASSERT(pItem->level > 0); + + SRSmaRef rsmaRef = {.refId = pStat->refId, .suid = pRSmaInfo->suid}; + taosHashPut(smaMgmt.refHash, &pItem, POINTER_BYTES, &rsmaRef, sizeof(rsmaRef)); + taosTmrReset(tdRSmaFetchTrigger, pItem->maxDelay, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, table:%" PRIi64 " level:%" PRIi8 " maxdelay:%" PRIi64 " watermark:%" PRIi64 + + smaInfo("vgId:%d, item:%p table:%" PRIi64 " level:%" PRIi8 " maxdelay:%" PRIi64 " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, - TD_VID(pVnode), pRSmaInfo->suid, idx + 1, param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); + TD_VID(pVnode), pItem, pRSmaInfo->suid, idx + 1, param->maxdelay[idx], param->watermark[idx], + pItem->maxDelay); } return TSDB_CODE_SUCCESS; } @@ -330,7 +339,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat * @param tbName * @return int32_t */ -int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName) { +int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName) { if ((param->qmsgLen[0] == 0) && (param->qmsgLen[1] == 0)) { smaDebug("vgId:%d, no qmsg1/qmsg2 for rollup table %s %" PRIi64, SMA_VID(pSma), tbName, suid); return TSDB_CODE_SUCCESS; @@ -368,7 +377,10 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; goto _err; } + pRSmaInfo->pSma = pSma; pRSmaInfo->pTSchema = pTSchema; + pRSmaInfo->suid = suid; + T_REF_INIT_VAL(pRSmaInfo, 1); if (!(pRSmaInfo->queue = taosOpenQueue())) { goto _err; } @@ -382,9 +394,6 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con if (!(pRSmaInfo->iQall = taosAllocateQall())) { goto _err; } - pRSmaInfo->suid = suid; - pRSmaInfo->refId = RSMA_REF_ID(pStat); - T_REF_INIT_VAL(pRSmaInfo, 1); if (tdSetRSmaInfoItemParams(pSma, param, pStat, pRSmaInfo, 0) < 0) { goto _err; @@ -427,7 +436,7 @@ int32_t tdProcessRSmaCreate(SSma *pSma, SVCreateStbReq *pReq) { return TSDB_CODE_SUCCESS; } - return tdProcessRSmaCreateImpl(pSma, &pReq->rsmaParam, pReq->suid, pReq->name); + return tdRSmaProcessCreateImpl(pSma, &pReq->rsmaParam, pReq->suid, pReq->name); } /** @@ -654,15 +663,15 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma if (taosArrayGetSize(pResList) == 0) { if (terrno == 0) { - // smaDebug("vgId:%d, no rsma %" PRIi8 " data fetched yet", SMA_VID(pSma), pItem->level); + // smaDebug("vgId:%d, no rsma level %" PRIi8 " data fetched yet", SMA_VID(pSma), pItem->level); } else { - smaDebug("vgId:%d, no rsma %" PRIi8 " data fetched since %s", SMA_VID(pSma), pItem->level, terrstr()); + smaDebug("vgId:%d, no rsma level %" PRIi8 " data fetched since %s", SMA_VID(pSma), pItem->level, terrstr()); goto _err; } break; } else { - smaDebug("vgId:%d, rsma %" PRIi8 " data fetched", SMA_VID(pSma), pItem->level); + smaDebug("vgId:%d, rsma level %" PRIi8 " data fetched", SMA_VID(pSma), pItem->level); } #if 0 char flag[10] = {0}; @@ -676,21 +685,22 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma // TODO: the schema update should be handled later(TD-17965) if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, SMA_VID(pSma), suid) < 0) { - smaError("vgId:%d, build submit req for rsma stable %" PRIi64 " level %" PRIi8 " failed since %s", - SMA_VID(pSma), suid, pItem->level, terrstr()); + smaError("vgId:%d, build submit req for rsma table %" PRIi64 " level %" PRIi8 " failed since %s", SMA_VID(pSma), + suid, pItem->level, terrstr()); goto _err; } if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { taosMemoryFreeClear(pReq); - smaError("vgId:%d, process submit req for rsma stable %" PRIi64 " level %" PRIi8 " failed since %s", + smaError("vgId:%d, process submit req for rsma table %" PRIi64 " level %" PRIi8 " failed since %s", SMA_VID(pSma), suid, pItem->level, terrstr()); goto _err; } - taosMemoryFreeClear(pReq); - smaDebug("vgId:%d, process submit req for rsma table %" PRIi64 " level %" PRIi8 " version:%" PRIi64, - SMA_VID(pSma), suid, pItem->level, output->info.version); + smaDebug("vgId:%d, process submit req for rsma table %" PRIi64 " level %" PRIi8 " ver %" PRIi64 " len %" PRIu32, + SMA_VID(pSma), suid, pItem->level, output->info.version, htonl(pReq->header.contLen)); + + taosMemoryFreeClear(pReq); } } @@ -817,6 +827,95 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_SUCCESS; } +static int32_t tdCloneQTaskInfo(SSma *pSma, qTaskInfo_t dstTaskInfo, qTaskInfo_t srcTaskInfo, SRSmaParam *param, + tb_uid_t suid, int8_t idx) { + SVnode *pVnode = pSma->pVnode; + char *pOutput = NULL; + int32_t len = 0; + + if ((terrno = qSerializeTaskStatus(srcTaskInfo, &pOutput, &len)) < 0) { + smaError("vgId:%d, rsma clone, table %" PRIi64 " serialize qTaskInfo failed since %s", TD_VID(pVnode), suid, + terrstr()); + goto _err; + } + + SReadHandle handle = { + .meta = pVnode->pMeta, + .vnode = pVnode, + .initTqReader = 1, + }; + ASSERT(!dstTaskInfo); + dstTaskInfo = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle); + if (!dstTaskInfo) { + terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE; + goto _err; + } + + if (qDeserializeTaskStatus(dstTaskInfo, pOutput, len) < 0) { + smaError("vgId:%d, rsma clone, restore rsma task for table:%" PRIi64 " failed since %s", TD_VID(pVnode), suid, + terrstr()); + goto _err; + } + + smaDebug("vgId:%d, rsma clone, restore rsma task for table:%" PRIi64 " succeed", TD_VID(pVnode), suid); + + taosMemoryFreeClear(pOutput); + return TSDB_CODE_SUCCESS; +_err: + taosMemoryFreeClear(pOutput); + tdRSmaQTaskInfoFree(dstTaskInfo, TD_VID(pVnode), idx + 1); + smaError("vgId:%d, rsma clone, restore rsma task for table:%" PRIi64 " failed since %s", TD_VID(pVnode), suid, + terrstr()); + return TSDB_CODE_FAILED; +} + +/** + * @brief Clone qTaskInfo of SRSmaInfo + * + * @param pSma + * @param pInfo + * @return int32_t + */ +static int32_t tdRSmaInfoClone(SSma *pSma, SRSmaInfo *pInfo) { + SRSmaParam *param = NULL; + if (!pInfo) { + return TSDB_CODE_SUCCESS; + } + + SMetaReader mr = {0}; + metaReaderInit(&mr, SMA_META(pSma), 0); + smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid); + if (metaGetTableEntryByUid(&mr, pInfo->suid) < 0) { + smaError("vgId:%d, rsma clone, failed to get table meta for %" PRIi64 " since %s", SMA_VID(pSma), pInfo->suid, + terrstr()); + goto _err; + } + ASSERT(mr.me.type == TSDB_SUPER_TABLE); + ASSERT(mr.me.uid == pInfo->suid); + if (TABLE_IS_ROLLUP(mr.me.flags)) { + param = &mr.me.stbEntry.rsmaParam; + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (!pInfo->iTaskInfo[i]) { + continue; + } + if (tdCloneQTaskInfo(pSma, pInfo->taskInfo[i], pInfo->iTaskInfo[i], param, pInfo->suid, i) < 0) { + goto _err; + } + } + smaDebug("vgId:%d, rsma clone env success for %" PRIi64, SMA_VID(pSma), pInfo->suid); + } else { + terrno = TSDB_CODE_RSMA_INVALID_SCHEMA; + goto _err; + } + + metaReaderClear(&mr); + return TSDB_CODE_SUCCESS; +_err: + metaReaderClear(&mr); + smaError("vgId:%d, rsma clone env failed for %" PRIi64 " since %s", SMA_VID(pSma), pInfo->suid, terrstr()); + return TSDB_CODE_FAILED; +} + /** * @brief During async commit, the SRSmaInfo object would be COW from iRSmaInfoHash and write lock should be applied. * @@ -848,7 +947,7 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { return NULL; } if (!pRSmaInfo->taskInfo[0]) { - if (tdCloneRSmaInfo(pSma, pRSmaInfo) < 0) { + if (tdRSmaInfoClone(pSma, pRSmaInfo) < 0) { // taosRUnLockLatch(SMA_ENV_LOCK(pEnv)); return NULL; } @@ -1006,7 +1105,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { " qmsgLen:%" PRIi32, TD_VID(pVnode), suid, i, param->maxdelay[i], param->watermark[i], param->qmsgLen[i]); } - if (tdProcessRSmaCreateImpl(pSma, &mr.me.stbEntry.rsmaParam, suid, mr.me.name) < 0) { + if (tdRSmaProcessCreateImpl(pSma, &mr.me.stbEntry.rsmaParam, suid, mr.me.name) < 0) { smaError("vgId:%d, rsma restore env failed for %" PRIi64 " since %s", TD_VID(pVnode), suid, terrstr()); goto _err; } @@ -1019,7 +1118,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { goto _err; } - if (tdUpdateTbUidList(pVnode->pSma, &uidStore) < 0) { + if (tdUpdateTbUidList(pVnode->pSma, &uidStore, true) < 0) { smaError("vgId:%d, rsma restore, update tb uid list failed for %" PRIi64 " since %s", TD_VID(pVnode), suid, terrstr()); goto _err; @@ -1118,7 +1217,7 @@ static int32_t tdRSmaRestoreTSDataReload(SSma *pSma) { return TSDB_CODE_SUCCESS; } -int32_t tdProcessRSmaRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer) { +int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer) { // step 1: iterate all stables to restore the rsma env int64_t nTables = 0; if (tdRSmaRestoreQTaskInfoInit(pSma, &nTables) < 0) { @@ -1139,6 +1238,12 @@ int32_t tdProcessRSmaRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer) if (tdRSmaRestoreTSDataReload(pSma) < 0) { goto _err; } + + // step 4: open SRSmaFS for qTaskFiles + if (tdRSmaFSOpen(pSma, qtaskFileVer) < 0) { + goto _err; + } + smaInfo("vgId:%d, restore rsma task %" PRIi8 " from qtaskf %" PRIi64 " succeed", SMA_VID(pSma), type, qtaskFileVer); return TSDB_CODE_SUCCESS; _err: @@ -1468,26 +1573,46 @@ _err: * @param tmrId */ static void tdRSmaFetchTrigger(void *param, void *tmrId) { - SRSmaInfoItem *pItem = param; + SRSmaRef *pRSmaRef = NULL; SSma *pSma = NULL; - SRSmaInfo *pRSmaInfo = tdGetRSmaInfoByItem(pItem); + SRSmaStat *pStat = NULL; + SRSmaInfo *pRSmaInfo = NULL; + SRSmaInfoItem *pItem = NULL; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - smaDebug("rsma fetch task not start since rsma info already deleted, rsetId:%" PRIi64 " refId:%d)", smaMgmt.rsetId, - pRSmaInfo->refId); + if (!(pRSmaRef = taosHashGet(smaMgmt.refHash, ¶m, POINTER_BYTES))) { + smaDebug("rsma fetch task not start since rsma info item:%p not exist in refHash:%p, rsetId:%" PRIi64, param, + *(int64_t *)¶m, smaMgmt.refHash, smaMgmt.rsetId); return; } - SRSmaStat *pStat = (SRSmaStat *)tdAcquireSmaRef(smaMgmt.rsetId, pRSmaInfo->refId); - - if (!pStat) { + if (!(pStat = (SRSmaStat *)tdAcquireSmaRef(smaMgmt.rsetId, pRSmaRef->refId))) { smaDebug("rsma fetch task not start since rsma stat already destroyed, rsetId:%" PRIi64 " refId:%d)", - smaMgmt.rsetId, pRSmaInfo->refId); + smaMgmt.rsetId, pRSmaRef->refId); // pRSmaRef freed in taosHashRemove + taosHashRemove(smaMgmt.refHash, ¶m, POINTER_BYTES); return; } pSma = pStat->pSma; + if (!(pRSmaInfo = tdAcquireRSmaInfoBySuid(pSma, pRSmaRef->suid))) { + smaDebug("rsma fetch task not start since rsma info not exist, rsetId:%" PRIi64 " refId:%d)", smaMgmt.rsetId, + pRSmaRef->refId); // pRSmaRef freed in taosHashRemove + tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId); + taosHashRemove(smaMgmt.refHash, ¶m, POINTER_BYTES); + return; + } + + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + smaDebug("rsma fetch task not start since rsma info already deleted, rsetId:%" PRIi64 " refId:%d)", smaMgmt.rsetId, + pRSmaRef->refId); // pRSmaRef freed in taosHashRemove + tdReleaseRSmaInfo(pSma, pRSmaInfo); + tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId); + taosHashRemove(smaMgmt.refHash, ¶m, POINTER_BYTES); + return; + } + + pItem = *(SRSmaInfoItem **)¶m; + // if rsma trigger stat in paused, cancelled or finished, not start fetch task int8_t rsmaTriggerStat = atomic_load_8(RSMA_TRIGGER_STAT(pStat)); switch (rsmaTriggerStat) { @@ -1495,11 +1620,12 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { case TASK_TRIGGER_STAT_CANCELLED: { smaDebug("vgId:%d, rsma fetch task not start for level %" PRIi8 " since stat is %" PRIi8 ", rsetId rsetId:%" PRIi64 " refId:%d", - SMA_VID(pSma), pItem->level, rsmaTriggerStat, smaMgmt.rsetId, pRSmaInfo->refId); + SMA_VID(pSma), pItem->level, rsmaTriggerStat, smaMgmt.rsetId, pRSmaRef->refId); if (rsmaTriggerStat == TASK_TRIGGER_STAT_PAUSED) { taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); } - tdReleaseSmaRef(smaMgmt.rsetId, pRSmaInfo->refId); + tdReleaseRSmaInfo(pSma, pRSmaInfo); + tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId); return; } default: @@ -1544,11 +1670,11 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { _end: taosTmrReset(tdRSmaFetchTrigger, pItem->maxDelay, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - tdReleaseSmaRef(smaMgmt.rsetId, pRSmaInfo->refId); + tdReleaseRSmaInfo(pSma, pRSmaInfo); + tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId); } static void tdFreeRSmaSubmitItems(SArray *pItems) { - ASSERT(taosArrayGetSize(pItems) > 0); for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) { taosFreeQitem(*(void **)taosArrayGet(pItems, i)); } @@ -1560,10 +1686,9 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { * * @param pSma * @param pInfo - * @param pSubmitArr * @return int32_t */ -static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo, SArray *pSubmitArr) { +static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); @@ -1576,16 +1701,16 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo, SArray *pSubmi int64_t curMs = taosGetTimestampMs(); if ((pItem->nSkipped * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { - smaInfo("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nSkipped:%" PRIi8 " maxDelay:%d, fetch executed", - SMA_VID(pSma), pInfo->suid, i, pItem->nSkipped, pItem->maxDelay); + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nSkipped:%" PRIi8 " maxDelay:%d, fetch executed", + SMA_VID(pSma), pInfo->suid, i, pItem->nSkipped, pItem->maxDelay); } else if (((curMs - pInfo->lastRecv) < RSMA_FETCH_ACTIVE_MAX)) { ++pItem->nSkipped; - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " curMs:%" PRIi64 " lastRecv:%" PRIi64 ", fetch skipped ", + smaTrace("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " curMs:%" PRIi64 " lastRecv:%" PRIi64 ", fetch skipped ", SMA_VID(pSma), pInfo->suid, i, curMs, pInfo->lastRecv); continue; } else { - smaInfo("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " curMs:%" PRIi64 " lastRecv:%" PRIi64 ", fetch executed ", - SMA_VID(pSma), pInfo->suid, i, curMs, pInfo->lastRecv); + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " curMs:%" PRIi64 " lastRecv:%" PRIi64 ", fetch executed ", + SMA_VID(pSma), pInfo->suid, i, curMs, pInfo->lastRecv); } pItem->nSkipped = 0; @@ -1609,10 +1734,8 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo, SArray *pSubmi } _end: - tdReleaseRSmaInfo(pSma, pInfo); return TSDB_CODE_SUCCESS; _err: - tdReleaseRSmaInfo(pSma, pInfo); return TSDB_CODE_FAILED; } @@ -1709,7 +1832,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { if (oldStat == 0 || ((oldStat == 2) && atomic_load_8(RSMA_TRIGGER_STAT(pRSmaStat)) < TASK_TRIGGER_STAT_PAUSED)) { atomic_fetch_add_32(&pRSmaStat->nFetchAll, 1); - tdRSmaFetchAllResult(pSma, pInfo, pSubmitArr); + tdRSmaFetchAllResult(pSma, pInfo); if (0 == atomic_sub_fetch_32(&pRSmaStat->nFetchAll, 1)) { atomic_store_8(RSMA_COMMIT_STAT(pRSmaStat), 0); } diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index 335c15a539..5a0167a75f 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -15,11 +15,13 @@ #include "sma.h" -static int32_t rsmaSnapReadQTaskInfo(SRsmaSnapReader* pReader, uint8_t** ppData); -static int32_t rsmaSnapWriteQTaskInfo(SRsmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +static int32_t rsmaSnapReadQTaskInfo(SRSmaSnapReader* pReader, uint8_t** ppData); +static int32_t rsmaSnapWriteQTaskInfo(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +static int32_t rsmaQTaskInfSnapReaderOpen(SRSmaSnapReader* pReader, int64_t version); +static int32_t rsmaQTaskInfSnapReaderClose(SQTaskFReader** ppReader); -// SRsmaSnapReader ======================================== -struct SRsmaSnapReader { +// SRSmaSnapReader ======================================== +struct SRSmaSnapReader { SSma* pSma; int64_t sver; int64_t ever; @@ -33,13 +35,13 @@ struct SRsmaSnapReader { SQTaskFReader* pQTaskFReader; }; -int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRsmaSnapReader** ppReader) { +int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapReader** ppReader) { int32_t code = 0; SVnode* pVnode = pSma->pVnode; - SRsmaSnapReader* pReader = NULL; + SRSmaSnapReader* pReader = NULL; // alloc - pReader = (SRsmaSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); + pReader = (SRSmaSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -48,7 +50,7 @@ int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRsmaSnapRead pReader->sver = sver; pReader->ever = ever; - // rsma1/rsma2 + // open rsma1/rsma2 for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { code = tsdbSnapReaderOpen(pSma->pRSmaTsdb[i], sver, ever, i == 0 ? SNAP_DATA_RSMA1 : SNAP_DATA_RSMA2, @@ -59,51 +61,112 @@ int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRsmaSnapRead } } - // qtaskinfo - // 1. add ref to qtaskinfo.v${ever} if exists and then start to replicate - char qTaskInfoFullName[TSDB_FILENAME_LEN]; - tdRSmaQTaskInfoGetFullName(TD_VID(pVnode), ever, tfsGetPrimaryPath(pVnode->pTfs), qTaskInfoFullName); - - if (!taosCheckExistFile(qTaskInfoFullName)) { - smaInfo("vgId:%d, vnode snapshot rsma reader for qtaskinfo not need as %s not exists", TD_VID(pVnode), - qTaskInfoFullName); - } else { - pReader->pQTaskFReader = taosMemoryCalloc(1, sizeof(SQTaskFReader)); - if (!pReader->pQTaskFReader) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - TdFilePtr qTaskF = taosOpenFile(qTaskInfoFullName, TD_FILE_READ); - if (!qTaskF) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - pReader->pQTaskFReader->pReadH = qTaskF; -#if 0 - SQTaskFile* pQTaskF = &pReader->pQTaskFReader->fTask; - pQTaskF->nRef = 1; -#endif + // open qtaskinfo + if ((code = rsmaQTaskInfSnapReaderOpen(pReader, ever)) < 0) { + goto _err; } *ppReader = pReader; - smaInfo("vgId:%d, vnode snapshot rsma reader opened %s succeed", TD_VID(pVnode), qTaskInfoFullName); + return TSDB_CODE_SUCCESS; _err: - smaError("vgId:%d, vnode snapshot rsma reader opened failed since %s", TD_VID(pVnode), tstrerror(code)); + smaError("vgId:%d, vnode snapshot rsma reader open failed since %s", TD_VID(pVnode), tstrerror(code)); return TSDB_CODE_FAILED; } -static int32_t rsmaSnapReadQTaskInfo(SRsmaSnapReader* pReader, uint8_t** ppBuf) { +static int32_t rsmaQTaskInfSnapReaderOpen(SRSmaSnapReader* pReader, int64_t version) { + int32_t code = 0; + SSma* pSma = pReader->pSma; + SVnode* pVnode = pSma->pVnode; + SSmaEnv* pEnv = NULL; + SRSmaStat* pStat = NULL; + + if (!(pEnv = SMA_RSMA_ENV(pSma))) { + smaInfo("vgId:%d, vnode snapshot rsma reader for qtaskinfo version %" PRIi64 " not need as env is NULL", + TD_VID(pVnode), version); + return TSDB_CODE_SUCCESS; + } + + pStat = (SRSmaStat*)SMA_ENV_STAT(pEnv); + + int32_t ref = tdRSmaFSRef(pReader->pSma, pStat, version); + if (ref < 1) { + smaInfo("vgId:%d, vnode snapshot rsma reader for qtaskinfo version %" PRIi64 " not need as ref is %d", + TD_VID(pVnode), version, ref); + return TSDB_CODE_SUCCESS; + } + + char qTaskInfoFullName[TSDB_FILENAME_LEN]; + tdRSmaQTaskInfoGetFullName(TD_VID(pVnode), version, tfsGetPrimaryPath(pVnode->pTfs), qTaskInfoFullName); + + if (!taosCheckExistFile(qTaskInfoFullName)) { + tdRSmaFSUnRef(pSma, pStat, version); + smaInfo("vgId:%d, vnode snapshot rsma reader for qtaskinfo version %" PRIi64 " not need as %s not exists", + TD_VID(pVnode), qTaskInfoFullName); + return TSDB_CODE_SUCCESS; + } + + pReader->pQTaskFReader = taosMemoryCalloc(1, sizeof(SQTaskFReader)); + if (!pReader->pQTaskFReader) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + + TdFilePtr fp = taosOpenFile(qTaskInfoFullName, TD_FILE_READ); + if (!fp) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFreeClear(pReader->pQTaskFReader); + goto _end; + } + + pReader->pQTaskFReader->pReadH = fp; + pReader->pQTaskFReader->pSma = pSma; + pReader->pQTaskFReader->version = pReader->ever; + +_end: + if (code < 0) { + tdRSmaFSUnRef(pSma, pStat, version); + smaError("vgId:%d, vnode snapshot rsma reader open %s succeed", TD_VID(pVnode), qTaskInfoFullName); + return TSDB_CODE_FAILED; + } + + smaInfo("vgId:%d, vnode snapshot rsma reader open %s succeed", TD_VID(pVnode), qTaskInfoFullName); + return TSDB_CODE_SUCCESS; +} + +static int32_t rsmaQTaskInfSnapReaderClose(SQTaskFReader** ppReader) { + if (!(*ppReader)) { + return TSDB_CODE_SUCCESS; + } + + SSma* pSma = (*ppReader)->pSma; + SRSmaStat* pStat = SMA_RSMA_STAT(pSma); + int64_t version = (*ppReader)->version; + + taosCloseFile(&(*ppReader)->pReadH); + tdRSmaFSUnRef(pSma, pStat, version); + taosMemoryFreeClear(*ppReader); + smaInfo("vgId:%d, vnode snapshot rsma reader closed for qTaskInfo version %" PRIi64, SMA_VID(pSma), version); + + return TSDB_CODE_SUCCESS; +} + +static int32_t rsmaSnapReadQTaskInfo(SRSmaSnapReader* pReader, uint8_t** ppBuf) { int32_t code = 0; SSma* pSma = pReader->pSma; int64_t n = 0; uint8_t* pBuf = NULL; SQTaskFReader* qReader = pReader->pQTaskFReader; + if (!qReader) { + *ppBuf = NULL; + smaInfo("vgId:%d, vnode snapshot rsma reader qtaskinfo, qTaskReader is NULL", SMA_VID(pSma)); + return 0; + } + if (!qReader->pReadH) { *ppBuf = NULL; - smaInfo("vgId:%d, vnode snapshot rsma reader qtaskinfo, readh is empty", SMA_VID(pSma)); + smaInfo("vgId:%d, vnode snapshot rsma reader qtaskinfo, readh is NULL", SMA_VID(pSma)); return 0; } @@ -153,7 +216,7 @@ _err: return code; } -int32_t rsmaSnapRead(SRsmaSnapReader* pReader, uint8_t** ppData) { +int32_t rsmaSnapRead(SRSmaSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; *ppData = NULL; @@ -205,9 +268,9 @@ _err: return code; } -int32_t rsmaSnapReaderClose(SRsmaSnapReader** ppReader) { +int32_t rsmaSnapReaderClose(SRSmaSnapReader** ppReader) { int32_t code = 0; - SRsmaSnapReader* pReader = *ppReader; + SRSmaSnapReader* pReader = *ppReader; for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pReader->pDataReader[i]) { @@ -215,11 +278,7 @@ int32_t rsmaSnapReaderClose(SRsmaSnapReader** ppReader) { } } - if (pReader->pQTaskFReader) { - taosCloseFile(&pReader->pQTaskFReader->pReadH); - taosMemoryFreeClear(pReader->pQTaskFReader); - smaInfo("vgId:%d, vnode snapshot rsma reader closed for qTaskInfo", SMA_VID(pReader->pSma)); - } + rsmaQTaskInfSnapReaderClose(&pReader->pQTaskFReader); smaInfo("vgId:%d, vnode snapshot rsma reader closed", SMA_VID(pReader->pSma)); @@ -227,8 +286,8 @@ int32_t rsmaSnapReaderClose(SRsmaSnapReader** ppReader) { return code; } -// SRsmaSnapWriter ======================================== -struct SRsmaSnapWriter { +// SRSmaSnapWriter ======================================== +struct SRSmaSnapWriter { SSma* pSma; int64_t sver; int64_t ever; @@ -244,13 +303,13 @@ struct SRsmaSnapWriter { SQTaskFWriter* pQTaskFWriter; }; -int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRsmaSnapWriter** ppWriter) { +int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapWriter** ppWriter) { int32_t code = 0; - SRsmaSnapWriter* pWriter = NULL; + SRSmaSnapWriter* pWriter = NULL; SVnode* pVnode = pSma->pVnode; // alloc - pWriter = (SRsmaSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + pWriter = (SRSmaSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -301,9 +360,9 @@ _err: return code; } -int32_t rsmaSnapWriterClose(SRsmaSnapWriter** ppWriter, int8_t rollback) { +int32_t rsmaSnapWriterClose(SRSmaSnapWriter** ppWriter, int8_t rollback) { int32_t code = 0; - SRsmaSnapWriter* pWriter = *ppWriter; + SRSmaSnapWriter* pWriter = *ppWriter; SVnode* pVnode = pWriter->pSma->pVnode; if (rollback) { @@ -332,7 +391,7 @@ int32_t rsmaSnapWriterClose(SRsmaSnapWriter** ppWriter, int8_t rollback) { pWriter->pQTaskFWriter->fname, qTaskInfoFullName); // rsma restore - if ((code = tdRsmaRestore(pWriter->pSma, RSMA_RESTORE_SYNC, pWriter->ever)) < 0) { + if ((code = tdRSmaRestore(pWriter->pSma, RSMA_RESTORE_SYNC, pWriter->ever)) < 0) { goto _err; } smaInfo("vgId:%d, vnode snapshot rsma writer restore from %s succeed", SMA_VID(pWriter->pSma), qTaskInfoFullName); @@ -349,7 +408,7 @@ _err: return code; } -int32_t rsmaSnapWrite(SRsmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { +int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { int32_t code = 0; SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; @@ -377,7 +436,7 @@ _err: return code; } -static int32_t rsmaSnapWriteQTaskInfo(SRsmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { +static int32_t rsmaSnapWriteQTaskInfo(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { int32_t code = 0; SQTaskFWriter* qWriter = pWriter->pQTaskFWriter; diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index 1687cd46a0..173391b769 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -20,6 +20,10 @@ #define SMA_STORAGE_MINUTES_DAY 1440 #define SMA_STORAGE_SPLIT_FACTOR 14400 // least records in tsma file +static int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg); +static int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg); +static int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); + // TODO: Who is responsible for resource allocate and release? int32_t tdProcessTSmaInsert(SSma *pSma, int64_t indexUid, const char *msg) { int32_t code = TSDB_CODE_SUCCESS; @@ -59,7 +63,7 @@ int32_t smaGetTSmaDays(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t * * @param days unit is minute * @return int32_t */ -int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days) { +static int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days) { SDecoder coder = {0}; tDecoderInit(&coder, pCont, contLen); @@ -106,7 +110,7 @@ _err: * @param pMsg * @return int32_t */ -int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { +static int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { SSmaCfg *pCfg = (SSmaCfg *)pMsg; if (TD_VID(pSma->pVnode) == pCfg->dstVgId) { @@ -145,7 +149,7 @@ int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { * @param msg * @return int32_t */ -int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { +static int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { const SArray *pDataBlocks = (const SArray *)msg; // TODO: destroy SSDataBlocks(msg) if (!pDataBlocks) { diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index d771797963..a4ba0a61a5 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -305,93 +305,4 @@ int32_t tdReleaseSmaRef(int32_t rsetId, int64_t refId) { smaDebug("rsma release ref for rsetId:%" PRIi64 " refId:%d success", rsetId, refId); return TSDB_CODE_SUCCESS; -} - -static int32_t tdCloneQTaskInfo(SSma *pSma, qTaskInfo_t dstTaskInfo, qTaskInfo_t srcTaskInfo, SRSmaParam *param, - tb_uid_t suid, int8_t idx) { - SVnode *pVnode = pSma->pVnode; - char *pOutput = NULL; - int32_t len = 0; - - if ((terrno = qSerializeTaskStatus(srcTaskInfo, &pOutput, &len)) < 0) { - smaError("vgId:%d, rsma clone, table %" PRIi64 " serialize qTaskInfo failed since %s", TD_VID(pVnode), suid, - terrstr()); - goto _err; - } - - SReadHandle handle = { - .meta = pVnode->pMeta, - .vnode = pVnode, - .initTqReader = 1, - }; - ASSERT(!dstTaskInfo); - dstTaskInfo = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle); - if (!dstTaskInfo) { - terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE; - goto _err; - } - - if (qDeserializeTaskStatus(dstTaskInfo, pOutput, len) < 0) { - smaError("vgId:%d, rsma clone, restore rsma task for table:%" PRIi64 " failed since %s", TD_VID(pVnode), suid, - terrstr()); - goto _err; - } - - smaDebug("vgId:%d, rsma clone, restore rsma task for table:%" PRIi64 " succeed", TD_VID(pVnode), suid); - - taosMemoryFreeClear(pOutput); - return TSDB_CODE_SUCCESS; -_err: - taosMemoryFreeClear(pOutput); - tdFreeQTaskInfo(dstTaskInfo, TD_VID(pVnode), idx + 1); - smaError("vgId:%d, rsma clone, restore rsma task for table:%" PRIi64 " failed since %s", TD_VID(pVnode), suid, - terrstr()); - return TSDB_CODE_FAILED; -} - -/** - * @brief Clone qTaskInfo of SRSmaInfo - * - * @param pSma - * @param pInfo - * @return int32_t - */ -int32_t tdCloneRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { - SRSmaParam *param = NULL; - if (!pInfo) { - return TSDB_CODE_SUCCESS; - } - - SMetaReader mr = {0}; - metaReaderInit(&mr, SMA_META(pSma), 0); - smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid); - if (metaGetTableEntryByUid(&mr, pInfo->suid) < 0) { - smaError("vgId:%d, rsma clone, failed to get table meta for %" PRIi64 " since %s", SMA_VID(pSma), pInfo->suid, - terrstr()); - goto _err; - } - ASSERT(mr.me.type == TSDB_SUPER_TABLE); - ASSERT(mr.me.uid == pInfo->suid); - if (TABLE_IS_ROLLUP(mr.me.flags)) { - param = &mr.me.stbEntry.rsmaParam; - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (!pInfo->iTaskInfo[i]) { - continue; - } - if (tdCloneQTaskInfo(pSma, pInfo->taskInfo[i], pInfo->iTaskInfo[i], param, pInfo->suid, i) < 0) { - goto _err; - } - } - smaDebug("vgId:%d, rsma clone env success for %" PRIi64, SMA_VID(pSma), pInfo->suid); - } else { - terrno = TSDB_CODE_RSMA_INVALID_SCHEMA; - goto _err; - } - - metaReaderClear(&mr); - return TSDB_CODE_SUCCESS; -_err: - metaReaderClear(&mr); - smaError("vgId:%d, rsma clone env failed for %" PRIi64 " since %s", SMA_VID(pSma), pInfo->suid, terrstr()); - return TSDB_CODE_FAILED; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 61c6877555..9ac62b4b59 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -418,31 +418,17 @@ typedef enum { } SFSLASTNEXTROWSTATES; typedef struct { - SFSLASTNEXTROWSTATES state; // [input] - STsdb *pTsdb; // [input] - SBlockIdx *pBlockIdxExp; // [input] - STSchema *pTSchema; // [input] + SFSLASTNEXTROWSTATES state; // [input] + STsdb *pTsdb; // [input] tb_uid_t suid; tb_uid_t uid; int32_t nFileSet; int32_t iFileSet; SArray *aDFileSet; SDataFReader *pDataFReader; - SArray *aBlockL; - SBlockL *pBlockL; - SBlockData *pBlockDataL; - SBlockData blockDataL; - int32_t nRow; - int32_t iRow; TSDBROW row; - /* - SArray *aBlockIdx; - SBlockIdx *pBlockIdx; - SMapData blockMap; - int32_t nBlock; - int32_t iBlock; - SBlock block; - */ + + SMergeTree mergeTree; } SFSLastNextRowIter; static int32_t getNextRowFromFSLast(void *iter, TSDBROW **ppRow) { @@ -451,22 +437,16 @@ static int32_t getNextRowFromFSLast(void *iter, TSDBROW **ppRow) { switch (state->state) { case SFSLASTNEXTROW_FS: - // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; state->nFileSet = taosArrayGetSize(state->aDFileSet); state->iFileSet = state->nFileSet; - state->pBlockDataL = NULL; - case SFSLASTNEXTROW_FILESET: { SDFileSet *pFileSet = NULL; _next_fileset: if (--state->iFileSet >= 0) { pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); } else { - if (state->pBlockDataL) { - tBlockDataDestroy(state->pBlockDataL, 1); - state->pBlockDataL = NULL; - } + // tMergeTreeClose(&state->mergeTree); *ppRow = NULL; return code; @@ -475,68 +455,24 @@ static int32_t getNextRowFromFSLast(void *iter, TSDBROW **ppRow) { code = tsdbDataFReaderOpen(&state->pDataFReader, state->pTsdb, pFileSet); if (code) goto _err; - if (!state->aBlockL) { - state->aBlockL = taosArrayInit(0, sizeof(SBlockL)); - } else { - taosArrayClear(state->aBlockL); - } - - code = tsdbReadBlockL(state->pDataFReader, state->aBlockL); - if (code) goto _err; - - // SBlockL *pBlockL = (SBlockL *)taosArrayGet(state->aBlockL, state->iBlockL); - - state->pBlockL = taosArraySearch(state->aBlockL, state->pBlockIdxExp, tCmprBlockL, TD_EQ); - if (!state->pBlockL) { + tMergeTreeOpen(&state->mergeTree, 1, state->pDataFReader, state->suid, state->uid, + &(STimeWindow){.skey = TSKEY_MIN, .ekey = TSKEY_MAX}, + &(SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}); + bool hasVal = tMergeTreeNext(&state->mergeTree); + if (!hasVal) { + state->state = SFSLASTNEXTROW_FILESET; + // tMergeTreeClose(&state->mergeTree); goto _next_fileset; } - - int64_t suid = state->pBlockL->suid; - int64_t uid = state->pBlockL->maxUid; - - if (!state->pBlockDataL) { - state->pBlockDataL = &state->blockDataL; - - tBlockDataCreate(state->pBlockDataL); - } - code = tBlockDataInit(state->pBlockDataL, suid, suid ? 0 : uid, state->pTSchema); - if (code) goto _err; - } - case SFSLASTNEXTROW_BLOCKDATA: - code = tsdbReadLastBlock(state->pDataFReader, state->pBlockL, state->pBlockDataL); - if (code) goto _err; - - state->nRow = state->blockDataL.nRow; - state->iRow = state->nRow - 1; - - if (!state->pBlockDataL->uid) { - while (state->pBlockIdxExp->uid != state->pBlockDataL->aUid[state->iRow]) { - --state->iRow; - } - } - state->state = SFSLASTNEXTROW_BLOCKROW; + } case SFSLASTNEXTROW_BLOCKROW: - if (state->pBlockDataL->uid) { - if (state->iRow >= 0) { - state->row = tsdbRowFromBlockData(state->pBlockDataL, state->iRow); - *ppRow = &state->row; - - if (--state->iRow < 0) { - state->state = SFSLASTNEXTROW_FILESET; - } - } - } else { - if (state->iRow >= 0 && state->pBlockIdxExp->uid == state->pBlockDataL->aUid[state->iRow]) { - state->row = tsdbRowFromBlockData(state->pBlockDataL, state->iRow); - *ppRow = &state->row; - - if (--state->iRow < 0 || state->pBlockIdxExp->uid != state->pBlockDataL->aUid[state->iRow]) { - state->state = SFSLASTNEXTROW_FILESET; - } - } + state->row = tMergeTreeGetRow(&state->mergeTree); + *ppRow = &state->row; + bool hasVal = tMergeTreeNext(&state->mergeTree); + if (!hasVal) { + state->state = SFSLASTNEXTROW_FILESET; } - return code; default: ASSERT(0); @@ -548,15 +484,6 @@ _err: tsdbDataFReaderClose(&state->pDataFReader); state->pDataFReader = NULL; } - if (state->aBlockL) { - taosArrayDestroy(state->aBlockL); - state->aBlockL = NULL; - } - if (state->pBlockDataL) { - tBlockDataDestroy(state->pBlockDataL, 1); - state->pBlockDataL = NULL; - } - *ppRow = NULL; return code; @@ -574,14 +501,6 @@ int32_t clearNextRowFromFSLast(void *iter) { tsdbDataFReaderClose(&state->pDataFReader); state->pDataFReader = NULL; } - if (state->aBlockL) { - taosArrayDestroy(state->aBlockL); - state->aBlockL = NULL; - } - if (state->pBlockDataL) { - tBlockDataDestroy(state->pBlockDataL, 1); - state->pBlockDataL = NULL; - } return code; } @@ -609,7 +528,7 @@ typedef struct SFSNextRowIter { SMapData blockMap; int32_t nBlock; int32_t iBlock; - SBlock block; + SDataBlk block; SBlockData blockData; SBlockData *pBlockData; int32_t nRow; @@ -684,13 +603,13 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { } case SFSNEXTROW_BLOCKDATA: if (state->iBlock >= 0) { - SBlock block = {0}; + SDataBlk block = {0}; - tBlockReset(&block); + tDataBlkReset(&block); // tBlockDataReset(&state->blockData); tBlockDataReset(state->pBlockData); - tMapDataGetItemByIdx(&state->blockMap, state->iBlock, &block, tGetBlock); + tMapDataGetItemByIdx(&state->blockMap, state->iBlock, &block, tGetDataBlk); /* code = tsdbReadBlockData(state->pDataFReader, &state->blockIdx, &block, &state->blockData, NULL, NULL); */ tBlockDataReset(state->pBlockData); code = tBlockDataInit(state->pBlockData, state->suid, state->uid, state->pTSchema); @@ -878,7 +797,7 @@ static bool tsdbKeyDeleted(TSDBKEY *key, SArray *pSkyline, int64_t *iSkyline) { if (key->ts > pItemBack->ts) { return false; } else if (key->ts >= pItemFront->ts && key->ts <= pItemBack->ts) { - if ((key->version <= pItemFront->version || key->ts == pItemBack->ts && key->version <= pItemBack->version)) { + if (key->version <= pItemFront->version || (key->ts == pItemBack->ts && key->version <= pItemBack->version)) { return true; } else { return false; @@ -972,8 +891,6 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->fsLastState.state = (SFSLASTNEXTROWSTATES)SFSNEXTROW_FS; pIter->fsLastState.pTsdb = pTsdb; pIter->fsLastState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; - pIter->fsLastState.pBlockIdxExp = &pIter->idx; - pIter->fsLastState.pTSchema = pTSchema; pIter->fsLastState.suid = suid; pIter->fsLastState.uid = uid; @@ -1372,25 +1289,33 @@ int32_t tsdbCacheGetLastH(SLRUCache *pCache, tb_uid_t uid, STsdb *pTsdb, LRUHand // getTableCacheKeyS(uid, "l", key, &keyLen); getTableCacheKey(uid, 1, key, &keyLen); LRUHandle *h = taosLRUCacheLookup(pCache, key, keyLen); - if (h) { - } else { - SArray *pLastArray = NULL; - code = mergeLast(uid, pTsdb, &pLastArray); - // if table's empty or error, return code of -1 - // if (code < 0 || pRow == NULL) { - if (code < 0 || pLastArray == NULL) { - *handle = NULL; - return 0; - } - - _taos_lru_deleter_t deleter = deleteTableCacheLast; - LRUStatus status = - taosLRUCacheInsert(pCache, key, keyLen, pLastArray, pLastArray->capacity, deleter, NULL, TAOS_LRU_PRIORITY_LOW); - if (status != TAOS_LRU_STATUS_OK) { - code = -1; - } + if (!h) { + taosThreadMutexLock(&pTsdb->lruMutex); h = taosLRUCacheLookup(pCache, key, keyLen); + if (!h) { + SArray *pLastArray = NULL; + code = mergeLast(uid, pTsdb, &pLastArray); + // if table's empty or error, return code of -1 + // if (code < 0 || pRow == NULL) { + if (code < 0 || pLastArray == NULL) { + *handle = NULL; + return 0; + } + + _taos_lru_deleter_t deleter = deleteTableCacheLast; + LRUStatus status = taosLRUCacheInsert(pCache, key, keyLen, pLastArray, pLastArray->capacity, deleter, NULL, + TAOS_LRU_PRIORITY_LOW); + if (status != TAOS_LRU_STATUS_OK) { + code = -1; + } + + taosThreadMutexUnlock(&pTsdb->lruMutex); + + h = taosLRUCacheLookup(pCache, key, keyLen); + } else { + taosThreadMutexUnlock(&pTsdb->lruMutex); + } } *handle = h; @@ -1411,3 +1336,5 @@ void tsdbCacheSetCapacity(SVnode *pVnode, size_t capacity) { } size_t tsdbCacheGetCapacity(SVnode *pVnode) { return taosLRUCacheGetCapacity(pVnode->pTsdb->lruCache); } + +size_t tsdbCacheGetUsage(SVnode *pVnode) { return taosLRUCacheGetUsage(pVnode->pTsdb->lruCache); } diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 04a6de8472..fb06203605 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -20,11 +20,26 @@ typedef struct { STSchema *pTSchema; } SSkmInfo; +typedef enum { MEMORY_DATA_ITER = 0, LAST_DATA_ITER } EDataIterT; + typedef struct { - int64_t suid; - int64_t uid; - TSDBROW row; -} SRowInfo; + SRBTreeNode n; + SRowInfo r; + EDataIterT type; + union { + struct { + int32_t iTbDataP; + STbDataIter iter; + }; // memory data iter + struct { + int32_t iStt; + SArray *aSttBlk; + int32_t iSttBlk; + SBlockData bData; + int32_t iRow; + }; // stt file data iter + }; +} SDataIter; typedef struct { STsdb *pTsdb; @@ -35,8 +50,9 @@ typedef struct { int32_t minRow; int32_t maxRow; int8_t cmprAlg; - SArray *aTbDataP; - STsdbFS fs; + int8_t maxLast; + SArray *aTbDataP; // memory + STsdbFS fs; // disk // -------------- TSKEY nextKey; // reset by each table commit int32_t commitFid; @@ -45,25 +61,24 @@ typedef struct { // commit file data struct { SDataFReader *pReader; - // data - SArray *aBlockIdx; // SArray - int32_t iBlockIdx; - SBlockIdx *pBlockIdx; - SMapData mBlock; // SMapData - SBlockData bData; - // last - SArray *aBlockL; // SArray - int32_t iBlockL; - SBlockData bDatal; - int32_t iRow; - SRowInfo *pRowInfo; - SRowInfo rowInfo; + SArray *aBlockIdx; // SArray + int32_t iBlockIdx; + SBlockIdx *pBlockIdx; + SMapData mBlock; // SMapData + SBlockData bData; } dReader; + struct { + SDataIter *pIter; + SRBTree rbt; + SDataIter dataIter; + SDataIter aDataIter[TSDB_MAX_STT_FILE]; + int8_t toLastOnly; + }; struct { SDataFWriter *pWriter; SArray *aBlockIdx; // SArray - SArray *aBlockL; // SArray - SMapData mBlock; // SMapData + SArray *aSttBlk; // SArray + SMapData mBlock; // SMapData SBlockData bData; SBlockData bDatal; } dWriter; @@ -82,6 +97,26 @@ static int32_t tsdbCommitData(SCommitter *pCommitter); static int32_t tsdbCommitDel(SCommitter *pCommitter); static int32_t tsdbCommitCache(SCommitter *pCommitter); static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno); +static int32_t tsdbNextCommitRow(SCommitter *pCommitter); + +static int32_t tRowInfoCmprFn(const void *p1, const void *p2) { + SRowInfo *pInfo1 = (SRowInfo *)p1; + SRowInfo *pInfo2 = (SRowInfo *)p2; + + if (pInfo1->suid < pInfo2->suid) { + return -1; + } else if (pInfo1->suid > pInfo2->suid) { + return 1; + } + + if (pInfo1->uid < pInfo2->uid) { + return -1; + } else if (pInfo1->uid > pInfo2->uid) { + return 1; + } + + return tsdbRowCmprFn(&pInfo1->row, &pInfo2->row); +} int32_t tsdbBegin(STsdb *pTsdb) { int32_t code = 0; @@ -294,7 +329,10 @@ static int32_t tsdbCommitterUpdateTableSchema(SCommitter *pCommitter, int64_t su int32_t code = 0; if (suid) { - if (pCommitter->skmTable.suid == suid) goto _exit; + if (pCommitter->skmTable.suid == suid) { + pCommitter->skmTable.uid = uid; + goto _exit; + } } else { if (pCommitter->skmTable.uid == uid) goto _exit; } @@ -334,54 +372,6 @@ _exit: return code; } -static int32_t tsdbCommitterNextLastRow(SCommitter *pCommitter) { - int32_t code = 0; - - ASSERT(pCommitter->dReader.pReader); - ASSERT(pCommitter->dReader.pRowInfo); - - SBlockData *pBlockDatal = &pCommitter->dReader.bDatal; - pCommitter->dReader.iRow++; - if (pCommitter->dReader.iRow < pBlockDatal->nRow) { - if (pBlockDatal->uid) { - pCommitter->dReader.pRowInfo->uid = pBlockDatal->uid; - } else { - pCommitter->dReader.pRowInfo->uid = pBlockDatal->aUid[pCommitter->dReader.iRow]; - } - pCommitter->dReader.pRowInfo->row = tsdbRowFromBlockData(pBlockDatal, pCommitter->dReader.iRow); - } else { - pCommitter->dReader.iBlockL++; - if (pCommitter->dReader.iBlockL < taosArrayGetSize(pCommitter->dReader.aBlockL)) { - SBlockL *pBlockL = (SBlockL *)taosArrayGet(pCommitter->dReader.aBlockL, pCommitter->dReader.iBlockL); - int64_t suid = pBlockL->suid; - int64_t uid = pBlockL->maxUid; - - code = tsdbCommitterUpdateTableSchema(pCommitter, suid, uid); - if (code) goto _exit; - - code = tBlockDataInit(pBlockDatal, suid, suid ? 0 : uid, pCommitter->skmTable.pTSchema); - if (code) goto _exit; - - code = tsdbReadLastBlock(pCommitter->dReader.pReader, pBlockL, pBlockDatal); - if (code) goto _exit; - - pCommitter->dReader.iRow = 0; - pCommitter->dReader.pRowInfo->suid = pBlockDatal->suid; - if (pBlockDatal->uid) { - pCommitter->dReader.pRowInfo->uid = pBlockDatal->uid; - } else { - pCommitter->dReader.pRowInfo->uid = pBlockDatal->aUid[0]; - } - pCommitter->dReader.pRowInfo->row = tsdbRowFromBlockData(pBlockDatal, pCommitter->dReader.iRow); - } else { - pCommitter->dReader.pRowInfo = NULL; - } - } - -_exit: - return code; -} - static int32_t tsdbCommitterNextTableData(SCommitter *pCommitter) { int32_t code = 0; @@ -404,6 +394,85 @@ _exit: return code; } +static int32_t tsdbOpenCommitIter(SCommitter *pCommitter) { + int32_t code = 0; + + pCommitter->pIter = NULL; + tRBTreeCreate(&pCommitter->rbt, tRowInfoCmprFn); + + // memory + TSDBKEY tKey = {.ts = pCommitter->minKey, .version = VERSION_MIN}; + SDataIter *pIter = &pCommitter->dataIter; + pIter->type = MEMORY_DATA_ITER; + pIter->iTbDataP = 0; + for (; pIter->iTbDataP < taosArrayGetSize(pCommitter->aTbDataP); pIter->iTbDataP++) { + STbData *pTbData = (STbData *)taosArrayGetP(pCommitter->aTbDataP, pIter->iTbDataP); + tsdbTbDataIterOpen(pTbData, &tKey, 0, &pIter->iter); + TSDBROW *pRow = tsdbTbDataIterGet(&pIter->iter); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pCommitter->nextKey = TMIN(pCommitter->nextKey, TSDBROW_TS(pRow)); + pRow = NULL; + } + + if (pRow == NULL) continue; + + pIter->r.suid = pTbData->suid; + pIter->r.uid = pTbData->uid; + pIter->r.row = *pRow; + break; + } + ASSERT(pIter->iTbDataP < taosArrayGetSize(pCommitter->aTbDataP)); + tRBTreePut(&pCommitter->rbt, (SRBTreeNode *)pIter); + + // disk + pCommitter->toLastOnly = 0; + SDataFReader *pReader = pCommitter->dReader.pReader; + if (pReader) { + if (pReader->pSet->nSttF >= pCommitter->maxLast) { + int8_t iIter = 0; + for (int32_t iStt = 0; iStt < pReader->pSet->nSttF; iStt++) { + pIter = &pCommitter->aDataIter[iIter]; + pIter->type = LAST_DATA_ITER; + pIter->iStt = iStt; + + code = tsdbReadSttBlk(pCommitter->dReader.pReader, iStt, pIter->aSttBlk); + if (code) goto _err; + + if (taosArrayGetSize(pIter->aSttBlk) == 0) continue; + + pIter->iSttBlk = 0; + SSttBlk *pSttBlk = (SSttBlk *)taosArrayGet(pIter->aSttBlk, 0); + code = tsdbReadSttBlock(pCommitter->dReader.pReader, iStt, pSttBlk, &pIter->bData); + if (code) goto _err; + + pIter->iRow = 0; + pIter->r.suid = pIter->bData.suid; + pIter->r.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[0]; + pIter->r.row = tsdbRowFromBlockData(&pIter->bData, 0); + + tRBTreePut(&pCommitter->rbt, (SRBTreeNode *)pIter); + iIter++; + } + } else { + for (int32_t iStt = 0; iStt < pReader->pSet->nSttF; iStt++) { + SSttFile *pSttFile = pReader->pSet->aSttF[iStt]; + if (pSttFile->size > pSttFile->offset) { + pCommitter->toLastOnly = 1; + break; + } + } + } + } + + code = tsdbNextCommitRow(pCommitter); + if (code) goto _err; + + return code; + +_err: + return code; +} + static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; @@ -416,8 +485,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { pCommitter->nextKey = TSKEY_MAX; // Reader - pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, - tDFileSetCmprFn, TD_EQ); + SDFileSet tDFileSet = {.fid = pCommitter->commitFid}; + pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &tDFileSet, tDFileSetCmprFn, TD_EQ); if (pRSet) { code = tsdbDataFReaderOpen(&pCommitter->dReader.pReader, pTsdb, pRSet); if (code) goto _err; @@ -427,68 +496,58 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { if (code) goto _err; pCommitter->dReader.iBlockIdx = 0; - if (pCommitter->dReader.iBlockIdx < taosArrayGetSize(pCommitter->dReader.aBlockIdx)) { - pCommitter->dReader.pBlockIdx = - (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, pCommitter->dReader.iBlockIdx); - + if (taosArrayGetSize(pCommitter->dReader.aBlockIdx) > 0) { + pCommitter->dReader.pBlockIdx = (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, 0); code = tsdbReadBlock(pCommitter->dReader.pReader, pCommitter->dReader.pBlockIdx, &pCommitter->dReader.mBlock); if (code) goto _err; } else { pCommitter->dReader.pBlockIdx = NULL; } tBlockDataReset(&pCommitter->dReader.bData); - - // last - code = tsdbReadBlockL(pCommitter->dReader.pReader, pCommitter->dReader.aBlockL); - if (code) goto _err; - - pCommitter->dReader.iBlockL = -1; - pCommitter->dReader.iRow = -1; - pCommitter->dReader.pRowInfo = &pCommitter->dReader.rowInfo; - tBlockDataReset(&pCommitter->dReader.bDatal); - code = tsdbCommitterNextLastRow(pCommitter); - if (code) goto _err; } else { pCommitter->dReader.pBlockIdx = NULL; - pCommitter->dReader.pRowInfo = NULL; } // Writer - SHeadFile fHead; - SDataFile fData; - SLastFile fLast; - SSmaFile fSma; - SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; + SHeadFile fHead = {.commitID = pCommitter->commitID}; + SDataFile fData = {.commitID = pCommitter->commitID}; + SSmaFile fSma = {.commitID = pCommitter->commitID}; + SSttFile fStt = {.commitID = pCommitter->commitID}; + SDFileSet wSet = {.fid = pCommitter->commitFid, .pHeadF = &fHead, .pDataF = &fData, .pSmaF = &fSma}; if (pRSet) { - wSet.diskId = pRSet->diskId; - wSet.fid = pCommitter->commitFid; - fHead = (SHeadFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; + ASSERT(pRSet->nSttF <= pCommitter->maxLast); fData = *pRSet->pDataF; - fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; fSma = *pRSet->pSmaF; + wSet.diskId = pRSet->diskId; + if (pRSet->nSttF < pCommitter->maxLast) { + for (int32_t iStt = 0; iStt < pRSet->nSttF; iStt++) { + wSet.aSttF[iStt] = pRSet->aSttF[iStt]; + } + wSet.nSttF = pRSet->nSttF + 1; + } else { + wSet.nSttF = 1; + } } else { SDiskID did = {0}; - tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did); - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - wSet.diskId = did; - wSet.fid = pCommitter->commitFid; - fHead = (SHeadFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; - fData = (SDataFile){.commitID = pCommitter->commitID, .size = 0}; - fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; - fSma = (SSmaFile){.commitID = pCommitter->commitID, .size = 0}; + wSet.nSttF = 1; } + wSet.aSttF[wSet.nSttF - 1] = &fStt; code = tsdbDataFWriterOpen(&pCommitter->dWriter.pWriter, pTsdb, &wSet); if (code) goto _err; taosArrayClear(pCommitter->dWriter.aBlockIdx); - taosArrayClear(pCommitter->dWriter.aBlockL); + taosArrayClear(pCommitter->dWriter.aSttBlk); tMapDataReset(&pCommitter->dWriter.mBlock); tBlockDataReset(&pCommitter->dWriter.bData); tBlockDataReset(&pCommitter->dWriter.bDatal); + // open iter + code = tsdbOpenCommitIter(pCommitter); + if (code) goto _err; + _exit: return code; @@ -497,18 +556,14 @@ _err: return code; } -static int32_t tsdbCommitDataBlock(SCommitter *pCommitter, SBlock *pBlock) { +static int32_t tsdbCommitDataBlock(SCommitter *pCommitter) { int32_t code = 0; SBlockData *pBlockData = &pCommitter->dWriter.bData; - SBlock block; + SDataBlk block; ASSERT(pBlockData->nRow > 0); - if (pBlock) { - block = *pBlock; // as a subblock - } else { - tBlockReset(&block); // as a new block - } + tDataBlkReset(&block); // info block.nRow += pBlockData->nRow; @@ -539,8 +594,8 @@ static int32_t tsdbCommitDataBlock(SCommitter *pCommitter, SBlock *pBlock) { ((block.nSubBlock == 1) && !block.hasDup) ? &block.smaInfo : NULL, pCommitter->cmprAlg, 0); if (code) goto _err; - // put SBlock - code = tMapDataPutItem(&pCommitter->dWriter.mBlock, &block, tPutBlock); + // put SDataBlk + code = tMapDataPutItem(&pCommitter->dWriter.mBlock, &block, tPutDataBlk); if (code) goto _err; // clear @@ -555,7 +610,7 @@ _err: static int32_t tsdbCommitLastBlock(SCommitter *pCommitter) { int32_t code = 0; - SBlockL blockL; + SSttBlk blockL; SBlockData *pBlockData = &pCommitter->dWriter.bDatal; ASSERT(pBlockData->nRow > 0); @@ -580,8 +635,8 @@ static int32_t tsdbCommitLastBlock(SCommitter *pCommitter) { code = tsdbWriteBlockData(pCommitter->dWriter.pWriter, pBlockData, &blockL.bInfo, NULL, pCommitter->cmprAlg, 1); if (code) goto _err; - // push SBlockL - if (taosArrayPush(pCommitter->dWriter.aBlockL, &blockL) == NULL) { + // push SSttBlk + if (taosArrayPush(pCommitter->dWriter.aSttBlk, &blockL) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -596,501 +651,6 @@ _err: return code; } -static int32_t tsdbMergeCommitData(SCommitter *pCommitter, STbDataIter *pIter, SBlock *pBlock) { - int32_t code = 0; - STbData *pTbData = pIter->pTbData; - SBlockData *pBlockDataR = &pCommitter->dReader.bData; - SBlockData *pBlockDataW = &pCommitter->dWriter.bData; - - code = tsdbReadDataBlock(pCommitter->dReader.pReader, pBlock, pBlockDataR); - if (code) goto _err; - - tBlockDataClear(pBlockDataW); - int32_t iRow = 0; - TSDBROW row; - TSDBROW *pRow1 = tsdbTbDataIterGet(pIter); - TSDBROW *pRow2 = &row; - *pRow2 = tsdbRowFromBlockData(pBlockDataR, iRow); - while (pRow1 && pRow2) { - int32_t c = tsdbRowCmprFn(pRow1, pRow2); - - if (c < 0) { - code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow1)); - if (code) goto _err; - - code = tBlockDataAppendRow(pBlockDataW, pRow1, pCommitter->skmRow.pTSchema, pTbData->uid); - if (code) goto _err; - - // next - tsdbTbDataIterNext(pIter); - pRow1 = tsdbTbDataIterGet(pIter); - } else if (c > 0) { - code = tBlockDataAppendRow(pBlockDataW, pRow2, NULL, pTbData->uid); - if (code) goto _err; - - iRow++; - if (iRow < pBlockDataR->nRow) { - *pRow2 = tsdbRowFromBlockData(pBlockDataR, iRow); - } else { - pRow2 = NULL; - } - } else { - ASSERT(0); - } - - // check - if (pBlockDataW->nRow >= pCommitter->maxRow * 4 / 5) { - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - } - } - - while (pRow2) { - code = tBlockDataAppendRow(pBlockDataW, pRow2, NULL, pTbData->uid); - if (code) goto _err; - - iRow++; - if (iRow < pBlockDataR->nRow) { - *pRow2 = tsdbRowFromBlockData(pBlockDataR, iRow); - } else { - pRow2 = NULL; - } - - // check - if (pBlockDataW->nRow >= pCommitter->maxRow * 4 / 5) { - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - } - } - - // check - if (pBlockDataW->nRow > 0) { - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, tsdb merge commit data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbCommitTableMemData(SCommitter *pCommitter, STbDataIter *pIter, TSDBKEY toKey) { - int32_t code = 0; - STbData *pTbData = pIter->pTbData; - SBlockData *pBlockData = &pCommitter->dWriter.bData; - - tBlockDataClear(pBlockData); - TSDBROW *pRow = tsdbTbDataIterGet(pIter); - while (true) { - if (pRow == NULL) { - if (pBlockData->nRow > 0) { - goto _write_block; - } else { - break; - } - } - - // update schema - code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); - if (code) goto _err; - - // append - code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); - if (code) goto _err; - - tsdbTbDataIterNext(pIter); - pRow = tsdbTbDataIterGet(pIter); - if (pRow) { - TSDBKEY rowKey = TSDBROW_KEY(pRow); - if (tsdbKeyCmprFn(&rowKey, &toKey) >= 0) { - pRow = NULL; - } - } - - if (pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { - _write_block: - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - } - } - - return code; - -_err: - tsdbError("vgId:%d, tsdb commit table mem data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbGetNumOfRowsLessThan(STbDataIter *pIter, TSDBKEY key) { - int32_t nRow = 0; - - STbDataIter iter = *pIter; - while (true) { - TSDBROW *pRow = tsdbTbDataIterGet(&iter); - if (pRow == NULL) break; - - int32_t c = tsdbKeyCmprFn(&TSDBROW_KEY(pRow), &key); - if (c < 0) { - nRow++; - tsdbTbDataIterNext(&iter); - } else if (c > 0) { - break; - } else { - ASSERT(0); - } - } - - return nRow; -} - -static int32_t tsdbMergeAsSubBlock(SCommitter *pCommitter, STbDataIter *pIter, SBlock *pBlock) { - int32_t code = 0; - STbData *pTbData = pIter->pTbData; - SBlockData *pBlockData = &pCommitter->dWriter.bData; - - tBlockDataClear(pBlockData); - TSDBROW *pRow = tsdbTbDataIterGet(pIter); - while (true) { - if (pRow == NULL) break; - - code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); - if (code) goto _err; - - code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); - if (code) goto _err; - - tsdbTbDataIterNext(pIter); - pRow = tsdbTbDataIterGet(pIter); - if (pRow) { - TSDBKEY rowKey = TSDBROW_KEY(pRow); - if (tsdbKeyCmprFn(&rowKey, &pBlock->maxKey) > 0) { - pRow = NULL; - } - } - } - - ASSERT(pBlockData->nRow > 0 && pBlock->nRow + pBlockData->nRow <= pCommitter->maxRow); - - code = tsdbCommitDataBlock(pCommitter, pBlock); - if (code) goto _err; - - return code; - -_err: - tsdbError("vgId:%d, tsdb merge as subblock failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbMergeCommitLast(SCommitter *pCommitter, STbDataIter *pIter) { - int32_t code = 0; - STbData *pTbData = pIter->pTbData; - int32_t nRow = tsdbGetNumOfRowsLessThan(pIter, (TSDBKEY){.ts = pCommitter->maxKey + 1, .version = VERSION_MIN}); - - if (pCommitter->dReader.pRowInfo && tTABLEIDCmprFn(pTbData, pCommitter->dReader.pRowInfo) == 0) { - if (pCommitter->dReader.pRowInfo->suid) { // super table - for (int32_t iRow = pCommitter->dReader.iRow; iRow < pCommitter->dReader.bDatal.nRow; iRow++) { - if (pTbData->uid != pCommitter->dReader.bDatal.aUid[iRow]) break; - nRow++; - } - } else { // normal table - ASSERT(pCommitter->dReader.iRow == 0); - nRow += pCommitter->dReader.bDatal.nRow; - } - } - - if (nRow == 0) goto _exit; - - TSDBROW *pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { - pRow = NULL; - } - - SRowInfo *pRowInfo = pCommitter->dReader.pRowInfo; - if (pRowInfo && pRowInfo->uid != pTbData->uid) { - pRowInfo = NULL; - } - - while (nRow) { - SBlockData *pBlockData; - int8_t toData; - - if (nRow < pCommitter->minRow) { // to .last - toData = 0; - pBlockData = &pCommitter->dWriter.bDatal; - - // commit and reset block data schema if need - // QUESTION: Is there a case that pBlockData->nRow == 0 but need to change schema ? - if (pBlockData->suid || pBlockData->uid) { - if (pBlockData->suid != pTbData->suid || pBlockData->suid == 0) { - if (pBlockData->nRow > 0) { - code = tsdbCommitLastBlock(pCommitter); - if (code) goto _err; - } - - tBlockDataReset(pBlockData); - } - } - - // set block data schema if need - if (pBlockData->suid == 0 && pBlockData->uid == 0) { - code = tsdbCommitterUpdateTableSchema(pCommitter, pTbData->suid, pTbData->uid); - if (code) goto _err; - - code = - tBlockDataInit(pBlockData, pTbData->suid, pTbData->suid ? 0 : pTbData->uid, pCommitter->skmTable.pTSchema); - if (code) goto _err; - } - - if (pBlockData->nRow + nRow > pCommitter->maxRow) { - code = tsdbCommitLastBlock(pCommitter); - if (code) goto _err; - } - } else { // to .data - toData = 1; - pBlockData = &pCommitter->dWriter.bData; - ASSERT(pBlockData->nRow == 0); - } - - while (pRow && pRowInfo) { - int32_t c = tsdbRowCmprFn(pRow, &pRowInfo->row); - if (c < 0) { - code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); - if (code) goto _err; - - code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); - if (code) goto _err; - - tsdbTbDataIterNext(pIter); - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { - pRow = NULL; - } - } else if (c > 0) { - code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pTbData->uid); - if (code) goto _err; - - code = tsdbCommitterNextLastRow(pCommitter); - if (code) goto _err; - - pRowInfo = pCommitter->dReader.pRowInfo; - if (pRowInfo && pRowInfo->uid != pTbData->uid) { - pRowInfo = NULL; - } - } else { - ASSERT(0); - } - - nRow--; - if (toData) { - if (nRow == 0 || pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - goto _outer_break; - } - } - } - - while (pRow) { - code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); - if (code) goto _err; - - code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); - if (code) goto _err; - - tsdbTbDataIterNext(pIter); - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { - pRow = NULL; - } - - nRow--; - if (toData) { - if (nRow == 0 || pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - goto _outer_break; - } - } - } - - while (pRowInfo) { - code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pTbData->uid); - if (code) goto _err; - - code = tsdbCommitterNextLastRow(pCommitter); - if (code) goto _err; - - pRowInfo = pCommitter->dReader.pRowInfo; - if (pRowInfo && pRowInfo->uid != pTbData->uid) { - pRowInfo = NULL; - } - - nRow--; - if (toData) { - if (nRow == 0 || pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { - code = tsdbCommitDataBlock(pCommitter, NULL); - if (code) goto _err; - goto _outer_break; - } - } - } - - _outer_break: - ASSERT(nRow >= 0); - } - -_exit: - return code; - -_err: - tsdbError("vgId:%d tsdb merge commit last failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbCommitTableData(SCommitter *pCommitter, STbData *pTbData) { - int32_t code = 0; - - ASSERT(pCommitter->dReader.pBlockIdx == NULL || tTABLEIDCmprFn(pCommitter->dReader.pBlockIdx, pTbData) >= 0); - ASSERT(pCommitter->dReader.pRowInfo == NULL || tTABLEIDCmprFn(pCommitter->dReader.pRowInfo, pTbData) >= 0); - - // merge commit table data - STbDataIter iter = {0}; - STbDataIter *pIter = &iter; - TSDBROW *pRow; - - tsdbTbDataIterOpen(pTbData, &(TSDBKEY){.ts = pCommitter->minKey, .version = VERSION_MIN}, 0, pIter); - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { - pRow = NULL; - } - - if (pRow == NULL) { - if (pCommitter->dReader.pBlockIdx && tTABLEIDCmprFn(pCommitter->dReader.pBlockIdx, pTbData) == 0) { - SBlockIdx blockIdx = {.suid = pTbData->suid, .uid = pTbData->uid}; - code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dReader.mBlock, &blockIdx); - if (code) goto _err; - - if (taosArrayPush(pCommitter->dWriter.aBlockIdx, &blockIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - goto _exit; - } - - int32_t iBlock = 0; - SBlock block; - SBlock *pBlock = █ - if (pCommitter->dReader.pBlockIdx && tTABLEIDCmprFn(pTbData, pCommitter->dReader.pBlockIdx) == 0) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - - code = tsdbCommitterUpdateTableSchema(pCommitter, pTbData->suid, pTbData->uid); - if (code) goto _err; - - tMapDataReset(&pCommitter->dWriter.mBlock); - code = tBlockDataInit(&pCommitter->dReader.bData, pTbData->suid, pTbData->uid, pCommitter->skmTable.pTSchema); - if (code) goto _err; - code = tBlockDataInit(&pCommitter->dWriter.bData, pTbData->suid, pTbData->uid, pCommitter->skmTable.pTSchema); - if (code) goto _err; - - // .data merge - while (pBlock && pRow) { - int32_t c = tBlockCmprFn(pBlock, &(SBlock){.minKey = TSDBROW_KEY(pRow), .maxKey = TSDBROW_KEY(pRow)}); - if (c < 0) { // disk - code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pBlock, tPutBlock); - if (code) goto _err; - - // next - iBlock++; - if (iBlock < pCommitter->dReader.mBlock.nItem) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - } else if (c > 0) { // memory - code = tsdbCommitTableMemData(pCommitter, pIter, pBlock->minKey); - if (code) goto _err; - - // next - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { - pRow = NULL; - } - } else { // merge - int32_t nOvlp = tsdbGetNumOfRowsLessThan(pIter, pBlock->maxKey); - - ASSERT(nOvlp > 0); - - if (pBlock->nRow + nOvlp <= pCommitter->maxRow && pBlock->nSubBlock < TSDB_MAX_SUBBLOCKS) { - code = tsdbMergeAsSubBlock(pCommitter, pIter, pBlock); - if (code) goto _err; - } else { - code = tsdbMergeCommitData(pCommitter, pIter, pBlock); - if (code) goto _err; - } - - // next - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { - pRow = NULL; - } - iBlock++; - if (iBlock < pCommitter->dReader.mBlock.nItem) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - } - } - - while (pBlock) { - code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pBlock, tPutBlock); - if (code) goto _err; - - // next - iBlock++; - if (iBlock < pCommitter->dReader.mBlock.nItem) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - } - - // .data append and .last merge - code = tsdbMergeCommitLast(pCommitter, pIter); - if (code) goto _err; - - // end - if (pCommitter->dWriter.mBlock.nItem > 0) { - SBlockIdx blockIdx = {.suid = pTbData->suid, .uid = pTbData->uid}; - code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dWriter.mBlock, &blockIdx); - if (code) goto _err; - - if (taosArrayPush(pCommitter->dWriter.aBlockIdx, &blockIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - -_exit: - pRow = tsdbTbDataIterGet(pIter); - if (pRow) { - pCommitter->nextKey = TMIN(pCommitter->nextKey, TSDBROW_TS(pRow)); - } - - return code; - -_err: - tsdbError("vgId:%d tsdb commit table data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { int32_t code = 0; @@ -1098,8 +658,8 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { code = tsdbWriteBlockIdx(pCommitter->dWriter.pWriter, pCommitter->dWriter.aBlockIdx); if (code) goto _err; - // write aBlockL - code = tsdbWriteBlockL(pCommitter->dWriter.pWriter, pCommitter->dWriter.aBlockL); + // write aSttBlk + code = tsdbWriteSttBlk(pCommitter->dWriter.pWriter, pCommitter->dWriter.aSttBlk); if (code) goto _err; // update file header @@ -1130,10 +690,7 @@ _err: static int32_t tsdbMoveCommitData(SCommitter *pCommitter, TABLEID toTable) { int32_t code = 0; - // .data - while (true) { - if (pCommitter->dReader.pBlockIdx == NULL || tTABLEIDCmprFn(pCommitter->dReader.pBlockIdx, &toTable) >= 0) break; - + while (pCommitter->dReader.pBlockIdx && tTABLEIDCmprFn(pCommitter->dReader.pBlockIdx, &toTable) < 0) { SBlockIdx blockIdx = *pCommitter->dReader.pBlockIdx; code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dReader.mBlock, &blockIdx); if (code) goto _err; @@ -1147,71 +704,6 @@ static int32_t tsdbMoveCommitData(SCommitter *pCommitter, TABLEID toTable) { if (code) goto _err; } - // .last - while (true) { - if (pCommitter->dReader.pRowInfo == NULL || tTABLEIDCmprFn(pCommitter->dReader.pRowInfo, &toTable) >= 0) break; - - SBlockData *pBlockDataR = &pCommitter->dReader.bDatal; - SBlockData *pBlockDataW = &pCommitter->dWriter.bDatal; - tb_uid_t suid = pCommitter->dReader.pRowInfo->suid; - tb_uid_t uid = pCommitter->dReader.pRowInfo->uid; - - ASSERT((pBlockDataR->suid && !pBlockDataR->uid) || (!pBlockDataR->suid && pBlockDataR->uid)); - ASSERT(pBlockDataR->nRow > 0); - - // commit and reset block data schema if need - if (pBlockDataW->suid || pBlockDataW->uid) { - if (pBlockDataW->suid != suid || pBlockDataW->suid == 0) { - if (pBlockDataW->nRow > 0) { - code = tsdbCommitLastBlock(pCommitter); - if (code) goto _err; - } - tBlockDataReset(pBlockDataW); - } - } - - // set block data schema if need - if (pBlockDataW->suid == 0 && pBlockDataW->uid == 0) { - code = tsdbCommitterUpdateTableSchema(pCommitter, suid, uid); - if (code) goto _err; - - code = tBlockDataInit(pBlockDataW, suid, suid ? 0 : uid, pCommitter->skmTable.pTSchema); - if (code) goto _err; - } - - // check if it can make sure that one table data in one block - int32_t nRow = 0; - if (pBlockDataR->suid) { - int32_t iRow = pCommitter->dReader.iRow; - while ((iRow < pBlockDataR->nRow) && (pBlockDataR->aUid[iRow] == uid)) { - nRow++; - iRow++; - } - } else { - ASSERT(pCommitter->dReader.iRow == 0); - nRow = pBlockDataR->nRow; - } - - ASSERT(nRow > 0 && nRow < pCommitter->minRow); - - if (pBlockDataW->nRow + nRow > pCommitter->maxRow) { - ASSERT(pBlockDataW->nRow > 0); - - code = tsdbCommitLastBlock(pCommitter); - if (code) goto _err; - } - - while (nRow > 0) { - code = tBlockDataAppendRow(pBlockDataW, &pCommitter->dReader.pRowInfo->row, NULL, uid); - if (code) goto _err; - - code = tsdbCommitterNextLastRow(pCommitter); - if (code) goto _err; - - nRow--; - } - } - return code; _err: @@ -1219,6 +711,7 @@ _err: return code; } +static int32_t tsdbCommitFileDataImpl(SCommitter *pCommitter); static int32_t tsdbCommitFileData(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; @@ -1228,33 +721,10 @@ static int32_t tsdbCommitFileData(SCommitter *pCommitter) { code = tsdbCommitFileDataStart(pCommitter); if (code) goto _err; - // commit file data impl - for (int32_t iTbData = 0; iTbData < taosArrayGetSize(pCommitter->aTbDataP); iTbData++) { - STbData *pTbData = (STbData *)taosArrayGetP(pCommitter->aTbDataP, iTbData); - - // move commit until current (suid, uid) - code = tsdbMoveCommitData(pCommitter, *(TABLEID *)pTbData); - if (code) goto _err; - - // commit current table data - code = tsdbCommitTableData(pCommitter, pTbData); - if (code) goto _err; - - // move next reader table data if need - if (pCommitter->dReader.pBlockIdx && tTABLEIDCmprFn(pTbData, pCommitter->dReader.pBlockIdx) == 0) { - code = tsdbCommitterNextTableData(pCommitter); - if (code) goto _err; - } - } - - code = tsdbMoveCommitData(pCommitter, (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}); + // impl + code = tsdbCommitFileDataImpl(pCommitter); if (code) goto _err; - if (pCommitter->dWriter.bDatal.nRow > 0) { - code = tsdbCommitLastBlock(pCommitter); - if (code) goto _err; - } - // commit file data end code = tsdbCommitFileDataEnd(pCommitter); if (code) goto _err; @@ -1287,12 +757,12 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { pCommitter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; + pCommitter->maxLast = TSDB_DEFAULT_STT_FILE; // TODO: make it as a config pCommitter->aTbDataP = tsdbMemTableGetTbDataArray(pTsdb->imem); if (pCommitter->aTbDataP == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - code = tsdbFSCopy(pTsdb, &pCommitter->fs); if (code) goto _err; @@ -1306,7 +776,7 @@ _err: static int32_t tsdbCommitDataStart(SCommitter *pCommitter) { int32_t code = 0; - // Reader + // reader pCommitter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pCommitter->dReader.aBlockIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -1316,24 +786,28 @@ static int32_t tsdbCommitDataStart(SCommitter *pCommitter) { code = tBlockDataCreate(&pCommitter->dReader.bData); if (code) goto _exit; - pCommitter->dReader.aBlockL = taosArrayInit(0, sizeof(SBlockL)); - if (pCommitter->dReader.aBlockL == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + // merger + for (int32_t iStt = 0; iStt < TSDB_MAX_STT_FILE; iStt++) { + SDataIter *pIter = &pCommitter->aDataIter[iStt]; + pIter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pIter->aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + code = tBlockDataCreate(&pIter->bData); + if (code) goto _exit; } - code = tBlockDataCreate(&pCommitter->dReader.bDatal); - if (code) goto _exit; - - // Writer + // writer pCommitter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pCommitter->dWriter.aBlockIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - pCommitter->dWriter.aBlockL = taosArrayInit(0, sizeof(SBlockL)); - if (pCommitter->dWriter.aBlockL == NULL) { + pCommitter->dWriter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pCommitter->dWriter.aSttBlk == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } @@ -1349,16 +823,21 @@ _exit: } static void tsdbCommitDataEnd(SCommitter *pCommitter) { - // Reader + // reader taosArrayDestroy(pCommitter->dReader.aBlockIdx); tMapDataClear(&pCommitter->dReader.mBlock); tBlockDataDestroy(&pCommitter->dReader.bData, 1); - taosArrayDestroy(pCommitter->dReader.aBlockL); - tBlockDataDestroy(&pCommitter->dReader.bDatal, 1); - // Writer + // merger + for (int32_t iStt = 0; iStt < TSDB_MAX_STT_FILE; iStt++) { + SDataIter *pIter = &pCommitter->aDataIter[iStt]; + taosArrayDestroy(pIter->aSttBlk); + tBlockDataDestroy(&pIter->bData, 1); + } + + // writer taosArrayDestroy(pCommitter->dWriter.aBlockIdx); - taosArrayDestroy(pCommitter->dWriter.aBlockL); + taosArrayDestroy(pCommitter->dWriter.aSttBlk); tMapDataClear(&pCommitter->dWriter.mBlock); tBlockDataDestroy(&pCommitter->dWriter.bData, 1); tBlockDataDestroy(&pCommitter->dWriter.bDatal, 1); @@ -1389,7 +868,7 @@ static int32_t tsdbCommitData(SCommitter *pCommitter) { tsdbCommitDataEnd(pCommitter); _exit: - tsdbDebug("vgId:%d, commit data done, nRow:%" PRId64, TD_VID(pTsdb->pVnode), pMemTable->nRow); + tsdbInfo("vgId:%d, commit data done, nRow:%" PRId64, TD_VID(pTsdb->pVnode), pMemTable->nRow); return code; _err: @@ -1515,6 +994,11 @@ static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { tsdbFSDestroy(&pCommitter->fs); taosArrayDestroy(pCommitter->aTbDataP); + // if (pCommitter->toMerge) { + // code = tsdbMerge(pTsdb); + // if (code) goto _err; + // } + tsdbInfo("vgId:%d, tsdb end commit", TD_VID(pTsdb->pVnode)); return code; @@ -1522,3 +1006,478 @@ _err: tsdbError("vgId:%d, tsdb end commit failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } + +// ================================================================================ + +static FORCE_INLINE SRowInfo *tsdbGetCommitRow(SCommitter *pCommitter) { + return (pCommitter->pIter) ? &pCommitter->pIter->r : NULL; +} + +static int32_t tsdbNextCommitRow(SCommitter *pCommitter) { + int32_t code = 0; + + if (pCommitter->pIter) { + SDataIter *pIter = pCommitter->pIter; + if (pCommitter->pIter->type == MEMORY_DATA_ITER) { // memory + tsdbTbDataIterNext(&pIter->iter); + TSDBROW *pRow = tsdbTbDataIterGet(&pIter->iter); + while (true) { + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pCommitter->nextKey = TMIN(pCommitter->nextKey, TSDBROW_TS(pRow)); + pRow = NULL; + } + + if (pRow) { + pIter->r.suid = pIter->iter.pTbData->suid; + pIter->r.uid = pIter->iter.pTbData->uid; + pIter->r.row = *pRow; + break; + } + + pIter->iTbDataP++; + if (pIter->iTbDataP < taosArrayGetSize(pCommitter->aTbDataP)) { + STbData *pTbData = (STbData *)taosArrayGetP(pCommitter->aTbDataP, pIter->iTbDataP); + TSDBKEY keyFrom = {.ts = pCommitter->minKey, .version = VERSION_MIN}; + tsdbTbDataIterOpen(pTbData, &keyFrom, 0, &pIter->iter); + pRow = tsdbTbDataIterGet(&pIter->iter); + continue; + } else { + pCommitter->pIter = NULL; + break; + } + } + } else if (pCommitter->pIter->type == LAST_DATA_ITER) { // last file + pIter->iRow++; + if (pIter->iRow < pIter->bData.nRow) { + pIter->r.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; + pIter->r.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); + } else { + pIter->iSttBlk++; + if (pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk)) { + SSttBlk *pSttBlk = (SSttBlk *)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); + + code = tsdbReadSttBlock(pCommitter->dReader.pReader, pIter->iStt, pSttBlk, &pIter->bData); + if (code) goto _exit; + + pIter->iRow = 0; + pIter->r.suid = pIter->bData.suid; + pIter->r.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[0]; + pIter->r.row = tsdbRowFromBlockData(&pIter->bData, 0); + } else { + pCommitter->pIter = NULL; + } + } + } else { + ASSERT(0); + } + + // compare with min in RB Tree + pIter = (SDataIter *)tRBTreeMin(&pCommitter->rbt); + if (pCommitter->pIter && pIter) { + int32_t c = tRowInfoCmprFn(&pCommitter->pIter->r, &pIter->r); + if (c > 0) { + tRBTreePut(&pCommitter->rbt, (SRBTreeNode *)pCommitter->pIter); + pCommitter->pIter = NULL; + } else { + ASSERT(c); + } + } + } + + if (pCommitter->pIter == NULL) { + pCommitter->pIter = (SDataIter *)tRBTreeMin(&pCommitter->rbt); + if (pCommitter->pIter) { + tRBTreeDrop(&pCommitter->rbt, (SRBTreeNode *)pCommitter->pIter); + } + } + +_exit: + return code; +} + +static int32_t tsdbCommitAheadBlock(SCommitter *pCommitter, SDataBlk *pDataBlk) { + int32_t code = 0; + SBlockData *pBlockData = &pCommitter->dWriter.bData; + SRowInfo *pRowInfo = tsdbGetCommitRow(pCommitter); + TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid}; + + tBlockDataClear(pBlockData); + while (pRowInfo) { + ASSERT(pRowInfo->row.type == 0); + code = tsdbCommitterUpdateRowSchema(pCommitter, id.suid, id.uid, TSDBROW_SVERSION(&pRowInfo->row)); + if (code) goto _err; + + code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, pCommitter->skmRow.pTSchema, id.uid); + if (code) goto _err; + + code = tsdbNextCommitRow(pCommitter); + if (code) goto _err; + + pRowInfo = tsdbGetCommitRow(pCommitter); + if (pRowInfo) { + if (pRowInfo->suid != id.suid || pRowInfo->uid != id.uid) { + pRowInfo = NULL; + } else { + TSDBKEY tKey = TSDBROW_KEY(&pRowInfo->row); + if (tsdbKeyCmprFn(&tKey, &pDataBlk->minKey) >= 0) pRowInfo = NULL; + } + } + + if (pBlockData->nRow >= pCommitter->maxRow) { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } + } + + if (pBlockData->nRow) { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } + + return code; + +_err: + tsdbError("vgId:%d, tsdb commit ahead block failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbCommitMergeBlock(SCommitter *pCommitter, SDataBlk *pDataBlk) { + int32_t code = 0; + SRowInfo *pRowInfo = tsdbGetCommitRow(pCommitter); + TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid}; + SBlockData *pBDataR = &pCommitter->dReader.bData; + SBlockData *pBDataW = &pCommitter->dWriter.bData; + + code = tsdbReadDataBlock(pCommitter->dReader.pReader, pDataBlk, pBDataR); + if (code) goto _err; + + tBlockDataClear(pBDataW); + int32_t iRow = 0; + TSDBROW row = tsdbRowFromBlockData(pBDataR, 0); + TSDBROW *pRow = &row; + + while (pRow && pRowInfo) { + int32_t c = tsdbRowCmprFn(pRow, &pRowInfo->row); + if (c < 0) { + code = tBlockDataAppendRow(pBDataW, pRow, NULL, id.uid); + if (code) goto _err; + + iRow++; + if (iRow < pBDataR->nRow) { + row = tsdbRowFromBlockData(pBDataR, iRow); + } else { + pRow = NULL; + } + } else if (c > 0) { + ASSERT(pRowInfo->row.type == 0); + code = tsdbCommitterUpdateRowSchema(pCommitter, id.suid, id.uid, TSDBROW_SVERSION(&pRowInfo->row)); + if (code) goto _err; + + code = tBlockDataAppendRow(pBDataW, &pRowInfo->row, pCommitter->skmRow.pTSchema, id.uid); + if (code) goto _err; + + code = tsdbNextCommitRow(pCommitter); + if (code) goto _err; + + pRowInfo = tsdbGetCommitRow(pCommitter); + if (pRowInfo) { + if (pRowInfo->suid != id.suid || pRowInfo->uid != id.uid) { + pRowInfo = NULL; + } else { + TSDBKEY tKey = TSDBROW_KEY(&pRowInfo->row); + if (tsdbKeyCmprFn(&tKey, &pDataBlk->maxKey) > 0) pRowInfo = NULL; + } + } + } else { + ASSERT(0); + } + + if (pBDataW->nRow >= pCommitter->maxRow) { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } + } + + while (pRow) { + code = tBlockDataAppendRow(pBDataW, pRow, NULL, id.uid); + if (code) goto _err; + + iRow++; + if (iRow < pBDataR->nRow) { + row = tsdbRowFromBlockData(pBDataR, iRow); + } else { + pRow = NULL; + } + + if (pBDataW->nRow >= pCommitter->maxRow) { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } + } + + if (pBDataW->nRow) { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } + + return code; + +_err: + tsdbError("vgId:%d, tsdb commit merge block failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbMergeTableData(SCommitter *pCommitter, TABLEID id) { + int32_t code = 0; + SBlockIdx *pBlockIdx = pCommitter->dReader.pBlockIdx; + + ASSERT(pBlockIdx == NULL || tTABLEIDCmprFn(pBlockIdx, &id) >= 0); + if (pBlockIdx && pBlockIdx->suid == id.suid && pBlockIdx->uid == id.uid) { + int32_t iBlock = 0; + SDataBlk block; + SDataBlk *pDataBlk = █ + SRowInfo *pRowInfo = tsdbGetCommitRow(pCommitter); + + ASSERT(pRowInfo->suid == id.suid && pRowInfo->uid == id.uid); + + tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pDataBlk, tGetDataBlk); + while (pDataBlk && pRowInfo) { + SDataBlk tBlock = {.minKey = TSDBROW_KEY(&pRowInfo->row), .maxKey = TSDBROW_KEY(&pRowInfo->row)}; + int32_t c = tDataBlkCmprFn(pDataBlk, &tBlock); + + if (c < 0) { + code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pDataBlk, tPutDataBlk); + if (code) goto _err; + + iBlock++; + if (iBlock < pCommitter->dReader.mBlock.nItem) { + tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pDataBlk, tGetDataBlk); + } else { + pDataBlk = NULL; + } + } else if (c > 0) { + code = tsdbCommitAheadBlock(pCommitter, pDataBlk); + if (code) goto _err; + + pRowInfo = tsdbGetCommitRow(pCommitter); + if (pRowInfo && (pRowInfo->suid != id.suid || pRowInfo->uid != id.uid)) pRowInfo = NULL; + } else { + code = tsdbCommitMergeBlock(pCommitter, pDataBlk); + if (code) goto _err; + + iBlock++; + if (iBlock < pCommitter->dReader.mBlock.nItem) { + tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pDataBlk, tGetDataBlk); + } else { + pDataBlk = NULL; + } + pRowInfo = tsdbGetCommitRow(pCommitter); + if (pRowInfo && (pRowInfo->suid != id.suid || pRowInfo->uid != id.uid)) pRowInfo = NULL; + } + } + + while (pDataBlk) { + code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pDataBlk, tPutDataBlk); + if (code) goto _err; + + iBlock++; + if (iBlock < pCommitter->dReader.mBlock.nItem) { + tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pDataBlk, tGetDataBlk); + } else { + pDataBlk = NULL; + } + } + + code = tsdbCommitterNextTableData(pCommitter); + if (code) goto _err; + } + +_exit: + return code; + +_err: + tsdbError("vgId:%d tsdb merge table data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbInitLastBlockIfNeed(SCommitter *pCommitter, TABLEID id) { + int32_t code = 0; + + SBlockData *pBDatal = &pCommitter->dWriter.bDatal; + if (pBDatal->suid || pBDatal->uid) { + if ((pBDatal->suid != id.suid) || (id.suid == 0)) { + if (pBDatal->nRow) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _exit; + } + tBlockDataReset(pBDatal); + } + } + + if (!pBDatal->suid && !pBDatal->uid) { + ASSERT(pCommitter->skmTable.suid == id.suid); + ASSERT(pCommitter->skmTable.uid == id.uid); + code = tBlockDataInit(pBDatal, id.suid, id.suid ? 0 : id.uid, pCommitter->skmTable.pTSchema); + if (code) goto _exit; + } + +_exit: + return code; +} + +static int32_t tsdbAppendLastBlock(SCommitter *pCommitter) { + int32_t code = 0; + + SBlockData *pBData = &pCommitter->dWriter.bData; + SBlockData *pBDatal = &pCommitter->dWriter.bDatal; + + TABLEID id = {.suid = pBData->suid, .uid = pBData->uid}; + code = tsdbInitLastBlockIfNeed(pCommitter, id); + if (code) goto _err; + + for (int32_t iRow = 0; iRow < pBData->nRow; iRow++) { + TSDBROW row = tsdbRowFromBlockData(pBData, iRow); + code = tBlockDataAppendRow(pBDatal, &row, NULL, pBData->uid); + if (code) goto _err; + + if (pBDatal->nRow >= pCommitter->maxRow) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } + } + + return code; + +_err: + return code; +} + +static int32_t tsdbCommitTableData(SCommitter *pCommitter, TABLEID id) { + int32_t code = 0; + + SRowInfo *pRowInfo = tsdbGetCommitRow(pCommitter); + if (pRowInfo && (pRowInfo->suid != id.suid || pRowInfo->uid != id.uid)) { + pRowInfo = NULL; + } + + if (pRowInfo == NULL) goto _exit; + + SBlockData *pBData; + if (pCommitter->toLastOnly) { + pBData = &pCommitter->dWriter.bDatal; + code = tsdbInitLastBlockIfNeed(pCommitter, id); + if (code) goto _err; + } else { + pBData = &pCommitter->dWriter.bData; + ASSERT(pBData->nRow == 0); + } + + while (pRowInfo) { + STSchema *pTSchema = NULL; + if (pRowInfo->row.type == 0) { + code = tsdbCommitterUpdateRowSchema(pCommitter, id.suid, id.uid, TSDBROW_SVERSION(&pRowInfo->row)); + if (code) goto _err; + pTSchema = pCommitter->skmRow.pTSchema; + } + + code = tBlockDataAppendRow(pBData, &pRowInfo->row, pTSchema, id.uid); + if (code) goto _err; + + code = tsdbNextCommitRow(pCommitter); + if (code) goto _err; + + pRowInfo = tsdbGetCommitRow(pCommitter); + if (pRowInfo && (pRowInfo->suid != id.suid || pRowInfo->uid != id.uid)) { + pRowInfo = NULL; + } + + if (pBData->nRow >= pCommitter->maxRow) { + if (pCommitter->toLastOnly) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } else { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } + } + } + + if (!pCommitter->toLastOnly && pBData->nRow) { + if (pBData->nRow > pCommitter->minRow) { + code = tsdbCommitDataBlock(pCommitter); + if (code) goto _err; + } else { + code = tsdbAppendLastBlock(pCommitter); + if (code) goto _err; + } + } + +_exit: + return code; + +_err: + tsdbError("vgId:%d tsdb commit table data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbCommitFileDataImpl(SCommitter *pCommitter) { + int32_t code = 0; + + SRowInfo *pRowInfo; + TABLEID id = {0}; + while ((pRowInfo = tsdbGetCommitRow(pCommitter)) != NULL) { + ASSERT(pRowInfo->suid != id.suid || pRowInfo->uid != id.uid); + id.suid = pRowInfo->suid; + id.uid = pRowInfo->uid; + + code = tsdbMoveCommitData(pCommitter, id); + if (code) goto _err; + + // start + tMapDataReset(&pCommitter->dWriter.mBlock); + + // impl + code = tsdbCommitterUpdateTableSchema(pCommitter, id.suid, id.uid); + if (code) goto _err; + code = tBlockDataInit(&pCommitter->dReader.bData, id.suid, id.uid, pCommitter->skmTable.pTSchema); + if (code) goto _err; + code = tBlockDataInit(&pCommitter->dWriter.bData, id.suid, id.uid, pCommitter->skmTable.pTSchema); + if (code) goto _err; + + /* merge with data in .data file */ + code = tsdbMergeTableData(pCommitter, id); + if (code) goto _err; + + /* handle remain table data */ + code = tsdbCommitTableData(pCommitter, id); + if (code) goto _err; + + // end + if (pCommitter->dWriter.mBlock.nItem > 0) { + SBlockIdx blockIdx = {.suid = id.suid, .uid = id.uid}; + code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dWriter.mBlock, &blockIdx); + if (code) goto _err; + + if (taosArrayPush(pCommitter->dWriter.aBlockIdx, &blockIdx) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + } + + id.suid = INT64_MAX; + id.uid = INT64_MAX; + code = tsdbMoveCommitData(pCommitter, id); + if (code) goto _err; + + if (pCommitter->dWriter.bDatal.nRow > 0) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } + + return code; + +_err: + tsdbError("vgId:%d tsdb commit file data impl failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbCompact.c b/source/dnode/vnode/src/tsdb/tsdbCompact.c new file mode 100644 index 0000000000..fb3917be64 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbCompact.c @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" + +typedef struct { + STsdb *pTsdb; + STsdbFS fs; +} STsdbCompactor; + +int32_t tsdbCompact(STsdb *pTsdb) { + int32_t code = 0; + // TODO + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbCompress.c b/source/dnode/vnode/src/tsdb/tsdbCompress.c new file mode 100644 index 0000000000..76be7c1070 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbCompress.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" + +// Integer ===================================================== +typedef struct { + int8_t rawCopy; + int64_t prevVal; + int32_t nVal; + int32_t nBuf; + uint8_t *pBuf; +} SIntCompressor; + +#define I64_SAFE_ADD(a, b) (((a) >= 0 && (b) <= INT64_MAX - (b)) || ((a) < 0 && (b) >= INT64_MIN - (a))) +#define SIMPLE8B_MAX ((uint64_t)1152921504606846974LL) + +static int32_t tsdbCmprI64(SIntCompressor *pCompressor, int64_t val) { + int32_t code = 0; + + // raw copy + if (pCompressor->rawCopy) { + memcpy(pCompressor->pBuf + pCompressor->nBuf, &val, sizeof(val)); + pCompressor->nBuf += sizeof(val); + pCompressor->nVal++; + goto _exit; + } + + if (!I64_SAFE_ADD(val, pCompressor->prevVal)) { + pCompressor->rawCopy = 1; + // TODO: decompress and copy + pCompressor->nVal++; + goto _exit; + } + + int64_t diff = val - pCompressor->prevVal; + uint8_t zigzag = ZIGZAGE(int64_t, diff); + + if (zigzag >= SIMPLE8B_MAX) { + pCompressor->rawCopy = 1; + // TODO: decompress and copy + pCompressor->nVal++; + goto _exit; + } + +_exit: + return code; +} + +// Timestamp ===================================================== + +// Float ===================================================== \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDiskData.c b/source/dnode/vnode/src/tsdb/tsdbDiskData.c new file mode 100644 index 0000000000..3bd71f0ea6 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDiskData.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" + +typedef struct SDiskColBuilder SDiskColBuilder; +struct SDiskColBuilder { + uint8_t flags; + uint8_t *pBitMap; + int32_t *aOffset; + int32_t nData; + uint8_t *pData; +}; + +int32_t tDiskColAddVal(SDiskColBuilder *pBuilder, SColVal *pColVal) { + int32_t code = 0; + // TODO + return code; +} + +// ================================================================ +typedef struct SDiskDataBuilder SDiskDataBuilder; +struct SDiskDataBuilder { + SDiskDataHdr hdr; + SArray *aBlockCol; // SArray +}; + +int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder) { + int32_t code = 0; + // TODO + return code; +} + +void tDiskDataBuilderDestroy(SDiskDataBuilder *pBuilder) { + // TODO +} + +void tDiskDataBuilderInit(SDiskDataBuilder *pBuilder, int64_t suid, int64_t uid, STSchema *pTSchema, int8_t cmprAlg) { + pBuilder->hdr = (SDiskDataHdr){.delimiter = TSDB_FILE_DLMT, // + .fmtVer = 0, + .suid = suid, + .uid = uid, + .cmprAlg = cmprAlg}; +} + +void tDiskDataBuilderReset(SDiskDataBuilder *pBuilder) { + // TODO +} + +int32_t tDiskDataBuilderAddRow(SDiskDataBuilder *pBuilder, TSDBROW *pRow, STSchema *pTSchema, int64_t uid) { + int32_t code = 0; + + // uid (todo) + + // version (todo) + + // TSKEY (todo) + + SRowIter iter = {0}; + tRowIterInit(&iter, pRow, pTSchema); + + for (int32_t iDiskCol = 0; iDiskCol < 0; iDiskCol++) { + } + + return code; +} + +int32_t tDiskDataBuilderGet(SDiskDataBuilder *pBuilder, uint8_t **ppData) { + int32_t code = 0; + // TODO + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 247de99338..8ab733aa56 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -21,6 +21,9 @@ static int32_t tsdbEncodeFS(uint8_t *p, STsdbFS *pFS) { int8_t hasDel = pFS->pDelFile ? 1 : 0; uint32_t nSet = taosArrayGetSize(pFS->aDFileSet); + // version + n += tPutI8(p ? p + n : p, 0); + // SDelFile n += tPutI8(p ? p + n : p, hasDel); if (hasDel) { @@ -110,7 +113,7 @@ _err: // taosRemoveFile(fname); // } -// // last +// // stt // if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { // if (pFrom->pLastF->size > pTo->pLastF->size) { // code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); @@ -140,7 +143,7 @@ _err: // tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); // taosRemoveFile(fname); -// // last +// // stt // tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); // taosRemoveFile(fname); @@ -254,8 +257,10 @@ void tsdbFSDestroy(STsdbFS *pFS) { SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); taosMemoryFree(pSet->pHeadF); taosMemoryFree(pSet->pDataF); - taosMemoryFree(pSet->pLastF); taosMemoryFree(pSet->pSmaF); + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + taosMemoryFree(pSet->aSttF[iStt]); + } } taosArrayDestroy(pFS->aDFileSet); @@ -290,7 +295,7 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size != pSet->pHeadF->size) { + if (size != LOGIC_TO_FILE_SIZE(pSet->pHeadF->size, TSDB_DEFAULT_PAGE_SIZE)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } @@ -301,38 +306,40 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size < pSet->pDataF->size) { + if (size < LOGIC_TO_FILE_SIZE(pSet->pDataF->size, TSDB_DEFAULT_PAGE_SIZE)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pSet->pDataF->size) { + } else if (size > LOGIC_TO_FILE_SIZE(pSet->pDataF->size, TSDB_DEFAULT_PAGE_SIZE)) { code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); if (code) goto _err; } - // last =========== - tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); - if (taosStatFile(fname, &size, NULL)) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - if (size != pSet->pLastF->size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - // sma ============= tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size < pSet->pSmaF->size) { + if (size < LOGIC_TO_FILE_SIZE(pSet->pSmaF->size, TSDB_DEFAULT_PAGE_SIZE)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pSet->pSmaF->size) { + } else if (size > LOGIC_TO_FILE_SIZE(pSet->pSmaF->size, TSDB_DEFAULT_PAGE_SIZE)) { code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); if (code) goto _err; } + + // stt =========== + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + tsdbSttFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSttF[iStt], fname); + if (taosStatFile(fname, &size, NULL)) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + if (size != LOGIC_TO_FILE_SIZE(pSet->aSttF[iStt]->size, TSDB_DEFAULT_PAGE_SIZE)) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; + } + } } { @@ -360,10 +367,12 @@ static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) { int32_t code = 0; int8_t hasDel; uint32_t nSet; - int32_t n; + int32_t n = 0; + + // version + n += tGetI8(pData + n, NULL); // SDelFile - n = 0; n += tGetI8(pData + n, &hasDel); if (hasDel) { pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); @@ -382,41 +391,15 @@ static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) { taosArrayClear(pTsdb->fs.aDFileSet); n += tGetU32v(pData + n, &nSet); for (uint32_t iSet = 0; iSet < nSet; iSet++) { - SDFileSet fSet; + SDFileSet fSet = {0}; - // head - fSet.pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile)); - if (fSet.pHeadF == NULL) { + int32_t nt = tGetDFileSet(pData + n, &fSet); + if (nt < 0) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - fSet.pHeadF->nRef = 1; - // data - fSet.pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile)); - if (fSet.pDataF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - fSet.pDataF->nRef = 1; - - // last - fSet.pLastF = (SLastFile *)taosMemoryCalloc(1, sizeof(SLastFile)); - if (fSet.pLastF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - fSet.pLastF->nRef = 1; - - // sma - fSet.pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile)); - if (fSet.pSmaF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - fSet.pSmaF->nRef = 1; - - n += tGetDFileSet(pData + n, &fSet); + n += nt; if (taosArrayPush(pTsdb->fs.aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -532,13 +515,15 @@ int32_t tsdbFSClose(STsdb *pTsdb) { ASSERT(pSet->pDataF->nRef == 1); taosMemoryFree(pSet->pDataF); - // last - ASSERT(pSet->pLastF->nRef == 1); - taosMemoryFree(pSet->pLastF); - // sma ASSERT(pSet->pSmaF->nRef == 1); taosMemoryFree(pSet->pSmaF); + + // stt + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + ASSERT(pSet->aSttF[iStt]->nRef == 1); + taosMemoryFree(pSet->aSttF[iStt]); + } } taosArrayDestroy(pTsdb->fs.aDFileSet); @@ -586,15 +571,7 @@ int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { } *fSet.pDataF = *pSet->pDataF; - // data - fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); - if (fSet.pLastF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - *fSet.pLastF = *pSet->pLastF; - - // last + // sma fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); if (fSet.pSmaF == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -602,6 +579,16 @@ int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { } *fSet.pSmaF = *pSet->pSmaF; + // stt + for (fSet.nSttF = 0; fSet.nSttF < pSet->nSttF; fSet.nSttF++) { + fSet.aSttF[fSet.nSttF] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (fSet.aSttF[fSet.nSttF] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.aSttF[fSet.nSttF] = *pSet->aSttF[fSet.nSttF]; + } + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -651,14 +638,38 @@ int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { if (c == 0) { *pDFileSet->pHeadF = *pSet->pHeadF; *pDFileSet->pDataF = *pSet->pDataF; - *pDFileSet->pLastF = *pSet->pLastF; *pDFileSet->pSmaF = *pSet->pSmaF; + // stt + if (pSet->nSttF > pDFileSet->nSttF) { + ASSERT(pSet->nSttF == pDFileSet->nSttF + 1); + + pDFileSet->aSttF[pDFileSet->nSttF] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (pDFileSet->aSttF[pDFileSet->nSttF] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *pDFileSet->aSttF[pDFileSet->nSttF] = *pSet->aSttF[pSet->nSttF - 1]; + pDFileSet->nSttF++; + } else if (pSet->nSttF < pDFileSet->nSttF) { + ASSERT(pSet->nSttF == 1); + for (int32_t iStt = 1; iStt < pDFileSet->nSttF; iStt++) { + taosMemoryFree(pDFileSet->aSttF[iStt]); + } + + *pDFileSet->aSttF[0] = *pSet->aSttF[0]; + pDFileSet->nSttF = 1; + } else { + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + *pDFileSet->aSttF[iStt] = *pSet->aSttF[iStt]; + } + } goto _exit; } } - SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + ASSERT(pSet->nSttF == 1); + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid, .nSttF = 1}; // head fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); @@ -676,15 +687,7 @@ int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { } *fSet.pDataF = *pSet->pDataF; - // data - fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); - if (fSet.pLastF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - *fSet.pLastF = *pSet->pLastF; - - // last + // sma fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); if (fSet.pSmaF == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -692,6 +695,14 @@ int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { } *fSet.pSmaF = *pSet->pSmaF; + // stt + fSet.aSttF[0] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (fSet.aSttF[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.aSttF[0] = *pSet->aSttF[0]; + if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -836,27 +847,6 @@ int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { pSetOld->pDataF->size = pSetNew->pDataF->size; } - // last - fSet.pLastF = pSetOld->pLastF; - if ((!sameDisk) || (pSetOld->pLastF->commitID != pSetNew->pLastF->commitID)) { - pSetOld->pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); - if (pSetOld->pLastF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - *pSetOld->pLastF = *pSetNew->pLastF; - pSetOld->pLastF->nRef = 1; - - nRef = atomic_sub_fetch_32(&fSet.pLastF->nRef, 1); - if (nRef == 0) { - tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pLastF, fname); - taosRemoveFile(fname); - taosMemoryFree(fSet.pLastF); - } - } else { - ASSERT(pSetOld->pLastF->size == pSetNew->pLastF->size); - } - // sma fSet.pSmaF = pSetOld->pSmaF; if ((!sameDisk) || (pSetOld->pSmaF->commitID != pSetNew->pSmaF->commitID)) { @@ -879,6 +869,84 @@ int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { pSetOld->pSmaF->size = pSetNew->pSmaF->size; } + // stt + if (sameDisk) { + if (pSetNew->nSttF > pSetOld->nSttF) { + ASSERT(pSetNew->nSttF = pSetOld->nSttF + 1); + pSetOld->aSttF[pSetOld->nSttF] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (pSetOld->aSttF[pSetOld->nSttF] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->aSttF[pSetOld->nSttF] = *pSetNew->aSttF[pSetOld->nSttF]; + pSetOld->aSttF[pSetOld->nSttF]->nRef = 1; + pSetOld->nSttF++; + } else if (pSetNew->nSttF < pSetOld->nSttF) { + ASSERT(pSetNew->nSttF == 1); + for (int32_t iStt = 0; iStt < pSetOld->nSttF; iStt++) { + SSttFile *pSttFile = pSetOld->aSttF[iStt]; + nRef = atomic_sub_fetch_32(&pSttFile->nRef, 1); + if (nRef == 0) { + tsdbSttFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSttFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pSttFile); + } + pSetOld->aSttF[iStt] = NULL; + } + + pSetOld->nSttF = 1; + pSetOld->aSttF[0] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (pSetOld->aSttF[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->aSttF[0] = *pSetNew->aSttF[0]; + pSetOld->aSttF[0]->nRef = 1; + } else { + for (int32_t iStt = 0; iStt < pSetOld->nSttF; iStt++) { + if (pSetOld->aSttF[iStt]->commitID != pSetNew->aSttF[iStt]->commitID) { + SSttFile *pSttFile = pSetOld->aSttF[iStt]; + nRef = atomic_sub_fetch_32(&pSttFile->nRef, 1); + if (nRef == 0) { + tsdbSttFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSttFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pSttFile); + } + + pSetOld->aSttF[iStt] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (pSetOld->aSttF[iStt] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->aSttF[iStt] = *pSetNew->aSttF[iStt]; + pSetOld->aSttF[iStt]->nRef = 1; + } else { + ASSERT(pSetOld->aSttF[iStt]->size == pSetOld->aSttF[iStt]->size); + ASSERT(pSetOld->aSttF[iStt]->offset == pSetOld->aSttF[iStt]->offset); + } + } + } + } else { + ASSERT(pSetOld->nSttF == pSetNew->nSttF); + for (int32_t iStt = 0; iStt < pSetOld->nSttF; iStt++) { + SSttFile *pSttFile = pSetOld->aSttF[iStt]; + nRef = atomic_sub_fetch_32(&pSttFile->nRef, 1); + if (nRef == 0) { + tsdbSttFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSttFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pSttFile); + } + + pSetOld->aSttF[iStt] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (pSetOld->aSttF[iStt] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->aSttF[iStt] = *pSetNew->aSttF[iStt]; + pSetOld->aSttF[iStt]->nRef = 1; + } + } + if (!sameDisk) { pSetOld->diskId = pSetNew->diskId; } @@ -902,13 +970,6 @@ int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { taosMemoryFree(pSetOld->pDataF); } - nRef = atomic_sub_fetch_32(&pSetOld->pLastF->nRef, 1); - if (nRef == 0) { - tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pLastF, fname); - taosRemoveFile(fname); - taosMemoryFree(pSetOld->pLastF); - } - nRef = atomic_sub_fetch_32(&pSetOld->pSmaF->nRef, 1); if (nRef == 0) { tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pSmaF, fname); @@ -916,12 +977,20 @@ int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { taosMemoryFree(pSetOld->pSmaF); } + for (int8_t iStt = 0; iStt < pSetOld->nSttF; iStt++) { + nRef = atomic_sub_fetch_32(&pSetOld->aSttF[iStt]->nRef, 1); + if (nRef == 0) { + tsdbSttFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->aSttF[iStt], fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->aSttF[iStt]); + } + } + taosArrayRemove(pTsdb->fs.aDFileSet, iOld); continue; _add_new: - fSet.diskId = pSetNew->diskId; - fSet.fid = pSetNew->fid; + fSet = (SDFileSet){.diskId = pSetNew->diskId, .fid = pSetNew->fid, .nSttF = 1}; // head fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); @@ -941,15 +1010,6 @@ int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { *fSet.pDataF = *pSetNew->pDataF; fSet.pDataF->nRef = 1; - // last - fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); - if (fSet.pLastF == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - *fSet.pLastF = *pSetNew->pLastF; - fSet.pLastF->nRef = 1; - // sma fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); if (fSet.pSmaF == NULL) { @@ -959,6 +1019,16 @@ int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { *fSet.pSmaF = *pSetNew->pSmaF; fSet.pSmaF->nRef = 1; + // stt + ASSERT(pSetNew->nSttF == 1); + fSet.aSttF[0] = (SSttFile *)taosMemoryMalloc(sizeof(SSttFile)); + if (fSet.aSttF[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.aSttF[0] = *pSetNew->aSttF[0]; + fSet.aSttF[0]->nRef = 1; + if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -1002,12 +1072,14 @@ int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) { nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1); ASSERT(nRef > 0); - nRef = atomic_fetch_add_32(&pSet->pLastF->nRef, 1); - ASSERT(nRef > 0); - nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1); ASSERT(nRef > 0); + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + nRef = atomic_fetch_add_32(&pSet->aSttF[iStt]->nRef, 1); + ASSERT(nRef > 0); + } + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -1053,15 +1125,6 @@ void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) { taosMemoryFree(pSet->pDataF); } - // last - nRef = atomic_sub_fetch_32(&pSet->pLastF->nRef, 1); - ASSERT(nRef >= 0); - if (nRef == 0) { - tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); - taosRemoveFile(fname); - taosMemoryFree(pSet->pLastF); - } - // sma nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1); ASSERT(nRef >= 0); @@ -1070,6 +1133,18 @@ void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) { taosRemoveFile(fname); taosMemoryFree(pSet->pSmaF); } + + // stt + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + nRef = atomic_sub_fetch_32(&pSet->aSttF[iStt]->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbSttFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSttF[iStt], fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->aSttF[iStt]); + /* code */ + } + } } taosArrayDestroy(pFS->aDFileSet); diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 00d2ac848f..619fc17f5b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -53,22 +53,22 @@ static int32_t tGetDataFile(uint8_t *p, SDataFile *pDataFile) { return n; } -int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { +int32_t tPutSttFile(uint8_t *p, SSttFile *pSttFile) { int32_t n = 0; - n += tPutI64v(p ? p + n : p, pLastFile->commitID); - n += tPutI64v(p ? p + n : p, pLastFile->size); - n += tPutI64v(p ? p + n : p, pLastFile->offset); + n += tPutI64v(p ? p + n : p, pSttFile->commitID); + n += tPutI64v(p ? p + n : p, pSttFile->size); + n += tPutI64v(p ? p + n : p, pSttFile->offset); return n; } -static int32_t tGetLastFile(uint8_t *p, SLastFile *pLastFile) { +static int32_t tGetSttFile(uint8_t *p, SSttFile *pSttFile) { int32_t n = 0; - n += tGetI64v(p + n, &pLastFile->commitID); - n += tGetI64v(p + n, &pLastFile->size); - n += tGetI64v(p + n, &pLastFile->offset); + n += tGetI64v(p + n, &pSttFile->commitID); + n += tGetI64v(p + n, &pSttFile->size); + n += tGetI64v(p + n, &pSttFile->offset); return n; } @@ -102,9 +102,9 @@ void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pDataF->commitID, ".data"); } -void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]) { +void tsdbSttFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSttFile *pSttF, char fname[]) { snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pLastF->commitID, ".last"); + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pSttF->commitID, ".stt"); } void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]) { @@ -148,7 +148,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { } // ftruncate - if (taosFtruncateFile(pFD, size) < 0) { + if (taosFtruncateFile(pFD, LOGIC_TO_FILE_SIZE(size, TSDB_DEFAULT_PAGE_SIZE)) < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } @@ -194,9 +194,11 @@ int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet) { n += tPutDataFile(p ? p + n : p, pSet->pDataF); n += tPutSmaFile(p ? p + n : p, pSet->pSmaF); - // last - n += tPutU8(p ? p + n : p, 1); // for future compatibility - n += tPutLastFile(p ? p + n : p, pSet->pLastF); + // stt + n += tPutU8(p ? p + n : p, pSet->nSttF); + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + n += tPutSttFile(p ? p + n : p, pSet->aSttF[iStt]); + } return n; } @@ -208,15 +210,40 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { n += tGetI32v(p + n, &pSet->diskId.id); n += tGetI32v(p + n, &pSet->fid); - // data + // head + pSet->pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile)); + if (pSet->pHeadF == NULL) { + return -1; + } + pSet->pHeadF->nRef = 1; n += tGetHeadFile(p + n, pSet->pHeadF); + + // data + pSet->pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile)); + if (pSet->pDataF == NULL) { + return -1; + } + pSet->pDataF->nRef = 1; n += tGetDataFile(p + n, pSet->pDataF); + + // sma + pSet->pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile)); + if (pSet->pSmaF == NULL) { + return -1; + } + pSet->pSmaF->nRef = 1; n += tGetSmaFile(p + n, pSet->pSmaF); - // last - uint8_t nLast; - n += tGetU8(p + n, &nLast); - n += tGetLastFile(p + n, pSet->pLastF); + // stt + n += tGetU8(p + n, &pSet->nSttF); + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + pSet->aSttF[iStt] = (SSttFile *)taosMemoryCalloc(1, sizeof(SSttFile)); + if (pSet->aSttF[iStt] == NULL) { + return -1; + } + pSet->aSttF[iStt]->nRef = 1; + n += tGetSttFile(p + n, pSet->aSttF[iStt]); + } return n; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c new file mode 100644 index 0000000000..a072f22fa9 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" + +// SLDataIter ================================================= +struct SLDataIter { + SRBTreeNode node; + SSttBlk *pSttBlk; + SDataFReader *pReader; + int32_t iStt; + int8_t backward; + SArray *aSttBlk; + int32_t iSttBlk; + SBlockData bData[2]; + int32_t loadIndex; + int32_t iRow; + SRowInfo rInfo; + uint64_t uid; + STimeWindow timeWindow; + SVersionRange verRange; +}; + +static SBlockData *getCurrentBlock(SLDataIter *pIter) { return &pIter->bData[pIter->loadIndex]; } + +static SBlockData *getNextBlock(SLDataIter *pIter) { + pIter->loadIndex ^= 1; + return getCurrentBlock(pIter); +} + +int32_t tLDataIterOpen(struct SLDataIter **pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid, + uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange) { + int32_t code = 0; + *pIter = taosMemoryCalloc(1, sizeof(SLDataIter)); + if (*pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + (*pIter)->uid = uid; + (*pIter)->pReader = pReader; + (*pIter)->iStt = iStt; + (*pIter)->backward = backward; + (*pIter)->verRange = *pRange; + (*pIter)->timeWindow = *pTimeWindow; + (*pIter)->aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if ((*pIter)->aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + code = tBlockDataCreate(&(*pIter)->bData[0]); + if (code) { + goto _exit; + } + + code = tBlockDataCreate(&(*pIter)->bData[1]); + if (code) { + goto _exit; + } + + code = tsdbReadSttBlk(pReader, iStt, (*pIter)->aSttBlk); + if (code) { + goto _exit; + } + + size_t size = taosArrayGetSize((*pIter)->aSttBlk); + + // find the start block + int32_t index = -1; + if (!backward) { // asc + for (int32_t i = 0; i < size; ++i) { + SSttBlk *p = taosArrayGet((*pIter)->aSttBlk, i); + if (p->suid != suid) { + continue; + } + + if (p->minUid <= uid && p->maxUid >= uid) { + index = i; + break; + } + } + } else { // desc + for (int32_t i = size - 1; i >= 0; --i) { + SSttBlk *p = taosArrayGet((*pIter)->aSttBlk, i); + if (p->suid != suid) { + continue; + } + + if (p->minUid <= uid && p->maxUid >= uid) { + index = i; + break; + } + } + } + + (*pIter)->iSttBlk = index; + if (index != -1) { + (*pIter)->pSttBlk = taosArrayGet((*pIter)->aSttBlk, (*pIter)->iSttBlk); + } + +_exit: + return code; +} + +void tLDataIterClose(SLDataIter *pIter) { + tBlockDataDestroy(&pIter->bData[0], 1); + tBlockDataDestroy(&pIter->bData[1], 1); + taosArrayDestroy(pIter->aSttBlk); + taosMemoryFree(pIter); +} + +void tLDataIterNextBlock(SLDataIter *pIter) { + int32_t step = pIter->backward ? -1 : 1; + pIter->iSttBlk += step; + + int32_t index = -1; + size_t size = taosArrayGetSize(pIter->aSttBlk); + for (int32_t i = pIter->iSttBlk; i < size && i >= 0; i += step) { + SSttBlk *p = taosArrayGet(pIter->aSttBlk, i); + if ((!pIter->backward) && p->minUid > pIter->uid) { + break; + } + + if (pIter->backward && p->maxUid < pIter->uid) { + break; + } + + // check uid firstly + if (p->minUid <= pIter->uid && p->maxUid >= pIter->uid) { + if ((!pIter->backward) && p->minKey > pIter->timeWindow.ekey) { + break; + } + + if (pIter->backward && p->maxKey < pIter->timeWindow.skey) { + break; + } + + // check time range secondly + if (p->minKey <= pIter->timeWindow.ekey && p->maxKey >= pIter->timeWindow.skey) { + if ((!pIter->backward) && p->minVer > pIter->verRange.maxVer) { + break; + } + + if (pIter->backward && p->maxVer < pIter->verRange.minVer) { + break; + } + + if (p->minVer <= pIter->verRange.maxVer && p->maxVer >= pIter->verRange.minVer) { + index = i; + break; + } + } + } + } + + if (index == -1) { + pIter->pSttBlk = NULL; + } else { + pIter->pSttBlk = (SSttBlk *)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); + } +} + +static void findNextValidRow(SLDataIter *pIter) { + int32_t step = pIter->backward ? -1 : 1; + + bool hasVal = false; + int32_t i = pIter->iRow; + SBlockData *pBlockData = getCurrentBlock(pIter); + + for (; i < pBlockData->nRow && i >= 0; i += step) { + if (pBlockData->aUid != NULL) { + if (!pIter->backward) { + if (pBlockData->aUid[i] < pIter->uid) { + continue; + } else if (pBlockData->aUid[i] > pIter->uid) { + break; + } + } else { + if (pBlockData->aUid[i] > pIter->uid) { + continue; + } else if (pBlockData->aUid[i] < pIter->uid) { + break; + } + } + } + + int64_t ts = pBlockData->aTSKEY[i]; + if (!pIter->backward) { // asc + if (ts > pIter->timeWindow.ekey) { // no more data + break; + } else if (ts < pIter->timeWindow.skey) { + continue; + } + } else { + if (ts < pIter->timeWindow.skey) { + break; + } else if (ts > pIter->timeWindow.ekey) { + continue; + } + } + + int64_t ver = pBlockData->aVersion[i]; + if (ver < pIter->verRange.minVer) { + continue; + } + + // todo opt handle desc case + if (ver > pIter->verRange.maxVer) { + continue; + } + + hasVal = true; + break; + } + + pIter->iRow = (hasVal) ? i : -1; +} + +bool tLDataIterNextRow(SLDataIter *pIter) { + int32_t code = 0; + int32_t step = pIter->backward ? -1 : 1; + + // no qualified last file block in current file, no need to fetch row + if (pIter->pSttBlk == NULL) { + return false; + } + + int32_t iBlockL = pIter->iSttBlk; + SBlockData *pBlockData = getCurrentBlock(pIter); + + if (pBlockData->nRow == 0 && pIter->pSttBlk != NULL) { // current block not loaded yet + pBlockData = getNextBlock(pIter); + code = tsdbReadSttBlock(pIter->pReader, pIter->iStt, pIter->pSttBlk, pBlockData); + if (code != TSDB_CODE_SUCCESS) { + goto _exit; + } + + pIter->iRow = (pIter->backward) ? pBlockData->nRow : -1; + } + + pIter->iRow += step; + + while (1) { + findNextValidRow(pIter); + + if (pIter->iRow >= pBlockData->nRow || pIter->iRow < 0) { + tLDataIterNextBlock(pIter); + if (pIter->pSttBlk == NULL) { // no more data + goto _exit; + } + } else { + break; + } + + if (iBlockL != pIter->iSttBlk) { + pBlockData = getNextBlock(pIter); + code = tsdbReadSttBlock(pIter->pReader, pIter->iStt, pIter->pSttBlk, pBlockData); + if (code) { + goto _exit; + } + pIter->iRow = pIter->backward ? (pBlockData->nRow - 1) : 0; + } + } + + pIter->rInfo.suid = pBlockData->suid; + pIter->rInfo.uid = pBlockData->uid; + pIter->rInfo.row = tsdbRowFromBlockData(pBlockData, pIter->iRow); + +_exit: + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + } + + return (code == TSDB_CODE_SUCCESS) && (pIter->pSttBlk != NULL); +} + +SRowInfo *tLDataIterGet(SLDataIter *pIter) { return &pIter->rInfo; } + +// SMergeTree ================================================= +static FORCE_INLINE int32_t tLDataIterCmprFn(const void *p1, const void *p2) { + SLDataIter *pIter1 = (SLDataIter *)(((uint8_t *)p1) - sizeof(SRBTreeNode)); + SLDataIter *pIter2 = (SLDataIter *)(((uint8_t *)p2) - sizeof(SRBTreeNode)); + + TSDBKEY key1 = TSDBROW_KEY(&pIter1->rInfo.row); + TSDBKEY key2 = TSDBROW_KEY(&pIter2->rInfo.row); + + if (key1.ts < key2.ts) { + return -1; + } else if (key1.ts > key2.ts) { + return 1; + } else { + if (key1.version < key2.version) { + return -1; + } else if (key1.version > key2.version) { + return 1; + } else { + return 0; + } + } +} + +int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, + STimeWindow *pTimeWindow, SVersionRange *pVerRange) { + pMTree->backward = backward; + pMTree->pIter = NULL; + pMTree->pIterList = taosArrayInit(4, POINTER_BYTES); + if (pMTree->pIterList == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn); + int32_t code = TSDB_CODE_OUT_OF_MEMORY; + + struct SLDataIter *pIterList[TSDB_DEFAULT_STT_FILE] = {0}; + for (int32_t i = 0; i < pFReader->pSet->nSttF; ++i) { // open all last file + code = tLDataIterOpen(&pIterList[i], pFReader, i, pMTree->backward, suid, uid, pTimeWindow, pVerRange); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } + + bool hasVal = tLDataIterNextRow(pIterList[i]); + if (hasVal) { + taosArrayPush(pMTree->pIterList, &pIterList[i]); + tMergeTreeAddIter(pMTree, pIterList[i]); + } else { + tLDataIterClose(pIterList[i]); + } + } + + return code; + +_end: + tMergeTreeClose(pMTree); + return code; +} + +void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter) { tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pIter); } + +bool tMergeTreeNext(SMergeTree *pMTree) { + int32_t code = TSDB_CODE_SUCCESS; + if (pMTree->pIter) { + SLDataIter *pIter = pMTree->pIter; + + bool hasVal = tLDataIterNextRow(pIter); + if (!hasVal) { + pMTree->pIter = NULL; + } + + // compare with min in RB Tree + pIter = (SLDataIter *)tRBTreeMin(&pMTree->rbt); + if (pMTree->pIter && pIter) { + int32_t c = pMTree->rbt.cmprFn(RBTREE_NODE_PAYLOAD(&pMTree->pIter->node), RBTREE_NODE_PAYLOAD(&pIter->node)); + if (c > 0) { + tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pMTree->pIter); + pMTree->pIter = NULL; + } else { + ASSERT(c); + } + } + } + + if (pMTree->pIter == NULL) { + pMTree->pIter = (SLDataIter *)tRBTreeMin(&pMTree->rbt); + if (pMTree->pIter) { + tRBTreeDrop(&pMTree->rbt, (SRBTreeNode *)pMTree->pIter); + } + } + + return pMTree->pIter != NULL; +} + +TSDBROW tMergeTreeGetRow(SMergeTree *pMTree) { return pMTree->pIter->rInfo.row; } + +void tMergeTreeClose(SMergeTree *pMTree) { + size_t size = taosArrayGetSize(pMTree->pIterList); + for (int32_t i = 0; i < size; ++i) { + SLDataIter *pIter = taosArrayGetP(pMTree->pIterList, i); + tLDataIterClose(pIter); + } + + pMTree->pIterList = taosArrayDestroy(pMTree->pIterList); + pMTree->pIter = NULL; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 6a25633357..8a51fc73a6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -34,21 +34,20 @@ typedef struct { typedef struct { int32_t numOfBlocks; - int32_t numOfLastBlocks; + int32_t numOfLastFiles; } SBlockNumber; typedef struct STableBlockScanInfo { uint64_t uid; TSKEY lastKey; - SMapData mapData; // block info (compressed) - SArray* pBlockList; // block data index list - SIterInfo iter; // mem buffer skip list iterator - SIterInfo iiter; // imem buffer skip list iterator - SArray* delSkyline; // delete info for this table - int32_t fileDelIndex; // file block delete index - int32_t lastBlockDelIndex;// delete index for last block - bool iterInit; // whether to initialize the in-memory skip list iterator or not - int16_t indexInBlockL;// row position in last block + SMapData mapData; // block info (compressed) + SArray* pBlockList; // block data index list + SIterInfo iter; // mem buffer skip list iterator + SIterInfo iiter; // imem buffer skip list iterator + SArray* delSkyline; // delete info for this table + int32_t fileDelIndex; // file block delete index + int32_t lastBlockDelIndex; // delete index for last block + bool iterInit; // whether to initialize the in-memory skip list iterator or not } STableBlockScanInfo; typedef struct SBlockOrderWrapper { @@ -83,28 +82,20 @@ typedef struct SBlockLoadSuppInfo { char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated. } SBlockLoadSuppInfo; -typedef struct SVersionRange { - uint64_t minVer; - uint64_t maxVer; -} SVersionRange; - typedef struct SLastBlockReader { - SArray* pBlockL; - int32_t currentBlockIndex; - SBlockData lastBlockData; STimeWindow window; SVersionRange verRange; int32_t order; uint64_t uid; - int16_t* rowIndex; // row index ptr, usually from the STableBlockScanInfo->indexInBlockL + SMergeTree mergeTree; } SLastBlockReader; typedef struct SFilesetIter { - int32_t numOfFiles; // number of total files - int32_t index; // current accessed index in the list - SArray* pFileList; // data file list + int32_t numOfFiles; // number of total files + int32_t index; // current accessed index in the list + SArray* pFileList; // data file list int32_t order; - SLastBlockReader* pLastBlockReader; // last file block reader + SLastBlockReader* pLastBlockReader; // last file block reader } SFilesetIter; typedef struct SFileDataBlockInfo { @@ -116,9 +107,9 @@ typedef struct SFileDataBlockInfo { typedef struct SDataBlockIter { int32_t numOfBlocks; int32_t index; - SArray* blockList; // SArray + SArray* blockList; // SArray int32_t order; - SBlock block; // current SBlock data + SDataBlk block; // current SDataBlk data SHashObj* pTableMap; } SDataBlockIter; @@ -130,8 +121,8 @@ typedef struct SFileBlockDumpInfo { } SFileBlockDumpInfo; typedef struct SUidOrderCheckInfo { - uint64_t* tableUidList; // access table uid list in uid ascending order list - int32_t currentIndex; // index in table uid list + uint64_t* tableUidList; // access table uid list in uid ascending order list + int32_t currentIndex; // index in table uid list } SUidOrderCheckInfo; typedef struct SReaderStatus { @@ -139,9 +130,9 @@ typedef struct SReaderStatus { bool composedDataBlock; // the returned data block is a composed block or not SHashObj* pTableMap; // SHash STableBlockScanInfo* pTableIter; // table iterator used in building in-memory buffer data blocks. - SUidOrderCheckInfo uidCheckInfo; // check all table in uid order + SUidOrderCheckInfo uidCheckInfo; // check all table in uid order SFileBlockDumpInfo fBlockDumpInfo; - SDFileSet* pCurrentFileset; // current opened file set + SDFileSet* pCurrentFileset; // current opened file set SBlockData fileBlockData; SFilesetIter fileIter; SDataBlockIter blockIter; @@ -175,29 +166,31 @@ static int buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, i static TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader); static int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader, SRowMerger* pMerger); -static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, SRowMerger* pMerger); +static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, + SRowMerger* pMerger); static int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, SRowMerger* pMerger, STsdbReader* pReader); static int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow, uint64_t uid); static int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, - int32_t rowIndex); + int32_t rowIndex); static void setComposedBlockFlag(STsdbReader* pReader, bool composed); static bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order); -static int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow, - STsdbReader* pReader, bool* freeTSRow); -static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, - STSRow** pTSRow); -static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, STsdbReader* pReader); +static int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, + STSRow** pTSRow, STsdbReader* pReader, bool* freeTSRow); +static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, + STsdbReader* pReader, STSRow** pTSRow); +static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, + STsdbReader* pReader); static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, STbData* piMemTbData); static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); -static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader); -static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); -static int32_t doBuildDataBlock(STsdbReader* pReader); +static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader); +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); +static int32_t doBuildDataBlock(STsdbReader* pReader); static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) { SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; @@ -233,16 +226,13 @@ static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableK return NULL; } + int32_t step = ASCENDING_TRAVERSE(pTsdbReader->order)? 1:-1; for (int32_t j = 0; j < numOfTables; ++j) { - STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid, .indexInBlockL = INITIAL_ROW_INDEX_VAL}; + STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid}; if (ASCENDING_TRAVERSE(pTsdbReader->order)) { - if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReader->window.skey) { - info.lastKey = pTsdbReader->window.skey; - } - - ASSERT(info.lastKey >= pTsdbReader->window.skey && info.lastKey <= pTsdbReader->window.ekey); + info.lastKey = pTsdbReader->window.skey - step; } else { - info.lastKey = pTsdbReader->window.skey; + info.lastKey = pTsdbReader->window.ekey - step; } taosHashPut(pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info)); @@ -256,7 +246,7 @@ static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableK return pTableMap; } -static void resetDataBlockScanInfo(SHashObj* pTableMap) { +static void resetDataBlockScanInfo(SHashObj* pTableMap, int64_t ts) { STableBlockScanInfo* p = NULL; while ((p = taosHashIterate(pTableMap, p)) != NULL) { @@ -266,9 +256,8 @@ static void resetDataBlockScanInfo(SHashObj* pTableMap) { p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); } - p->fileDelIndex = -1; - p->delSkyline = taosArrayDestroy(p->delSkyline); - p->lastBlockDelIndex = INITIAL_ROW_INDEX_VAL; + p->delSkyline = taosArrayDestroy(p->delSkyline); + p->lastKey = ts; } } @@ -330,7 +319,8 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap } // init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, STsdbReader* pReader/*int32_t order, const char* idstr*/) { +static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, + STsdbReader* pReader /*int32_t order, const char* idstr*/) { size_t numOfFileset = taosArrayGetSize(aDFileSet); pIter->index = ASCENDING_TRAVERSE(pReader->order) ? -1 : numOfFileset; @@ -345,20 +335,16 @@ static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, STsdb tsdbError("failed to prepare the last block iterator, code:%d %s", tstrerror(code), pReader->idStr); return code; } - - SLastBlockReader* pLReader = pIter->pLastBlockReader; - pLReader->pBlockL = taosArrayInit(4, sizeof(SBlockL)); - pLReader->order = pReader->order; - pLReader->window = pReader->window; - pLReader->verRange = pReader->verRange; - pLReader->currentBlockIndex = -1; - - int32_t code = tBlockDataCreate(&pLReader->lastBlockData); - if (code != TSDB_CODE_SUCCESS) { - return code; - } } + SLastBlockReader* pLReader = pIter->pLastBlockReader; + pLReader->order = pReader->order; + pLReader->window = pReader->window; + pLReader->verRange = pReader->verRange; + + pLReader->uid = 0; + tMergeTreeClose(&pLReader->mergeTree); + tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, pReader->idStr); return TSDB_CODE_SUCCESS; } @@ -372,6 +358,9 @@ static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) { return false; } + pIter->pLastBlockReader->uid = 0; + tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); + // check file the time range of coverage STimeWindow win = {0}; @@ -580,14 +569,12 @@ static void cleanupTableScanInfo(SHashObj* pTableMap) { } // reset the index in last block when handing a new file - px->indexInBlockL = INITIAL_ROW_INDEX_VAL; tMapDataClear(&px->mapData); taosArrayClear(px->pBlockList); } } -static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SArray* pLastBlockIndex, - SBlockNumber * pBlockNum, SArray* pQualifiedLastBlock) { +static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum) { int32_t numOfQTable = 0; size_t sizeInDisk = 0; size_t numOfTables = taosArrayGetSize(pIndexList); @@ -605,8 +592,8 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SArray* sizeInDisk += pScanInfo->mapData.nData; for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { - SBlock block = {0}; - tMapDataGetItemByIdx(&pScanInfo->mapData, j, &block, tGetBlock); + SDataBlk block = {0}; + tMapDataGetItemByIdx(&pScanInfo->mapData, j, &block, tGetDataBlk); // 1. time range check if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) { @@ -632,36 +619,14 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SArray* } } - size_t numOfLast = taosArrayGetSize(pLastBlockIndex); - for(int32_t i = 0; i < numOfLast; ++i) { - SBlockL* pLastBlock = taosArrayGet(pLastBlockIndex, i); - if (pLastBlock->suid != pReader->suid) { - continue; - } - - { - // 1. time range check - if (pLastBlock->minKey > pReader->window.ekey || pLastBlock->maxKey < pReader->window.skey) { - continue; - } - - // 2. version range check - if (pLastBlock->minVer > pReader->verRange.maxVer || pLastBlock->maxVer < pReader->verRange.minVer) { - continue; - } - - pBlockNum->numOfLastBlocks += 1; - taosArrayPush(pQualifiedLastBlock, pLastBlock); - } - } - - int32_t total = pBlockNum->numOfLastBlocks + pBlockNum->numOfBlocks; + pBlockNum->numOfLastFiles = pReader->pFileReader->pSet->nSttF; + int32_t total = pBlockNum->numOfLastFiles + pBlockNum->numOfBlocks; double el = (taosGetTimestampUs() - st) / 1000.0; tsdbDebug( - "load block of %d tables completed, blocks:%d in %d tables, lastBlock:%d, block-info-size:%.2f Kb, elapsed " + "load block of %d tables completed, blocks:%d in %d tables, last-files:%d, block-info-size:%.2f Kb, elapsed " "time:%.2f ms %s", - numOfTables, pBlockNum->numOfBlocks, numOfQTable, pBlockNum->numOfLastBlocks, sizeInDisk / 1000.0, el, + numOfTables, pBlockNum->numOfBlocks, numOfQTable, pBlockNum->numOfLastFiles, sizeInDisk / 1000.0, el, pReader->idStr); pReader->cost.numOfBlocks += total; @@ -702,7 +667,7 @@ static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { return pBlockInfo; } -static SBlock* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } +static SDataBlk* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) { SReaderStatus* pStatus = &pReader->status; @@ -710,7 +675,7 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn SBlockData* pBlockData = &pStatus->fileBlockData; SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); - SBlock* pBlock = getCurrentBlock(pBlockIter); + SDataBlk* pBlock = getCurrentBlock(pBlockIter); SSDataBlock* pResBlock = pReader->pResBlock; int32_t numOfOutputCols = blockDataGetNumOfCols(pResBlock); @@ -795,8 +760,8 @@ static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockI SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; ASSERT(pBlockInfo != NULL); - SBlock* pBlock = getCurrentBlock(pBlockIter); - int32_t code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); + SDataBlk* pBlock = getCurrentBlock(pBlockIter); + int32_t code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); if (code != TSDB_CODE_SUCCESS) { tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 ", rows:%d, code:%s %s", @@ -872,8 +837,8 @@ static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter) { SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); if (pBlockInfo != NULL) { STableBlockScanInfo* pScanInfo = taosHashGet(pBlockIter->pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); - int32_t* mapDataIndex = taosArrayGet(pScanInfo->pBlockList, pBlockInfo->tbBlockIdx); - tMapDataGetItemByIdx(&pScanInfo->mapData, *mapDataIndex, &pBlockIter->block, tGetBlock); + int32_t* mapDataIndex = taosArrayGet(pScanInfo->pBlockList, pBlockInfo->tbBlockIdx); + tMapDataGetItemByIdx(&pScanInfo->mapData, *mapDataIndex, &pBlockIter->block, tGetDataBlk); } #if 0 @@ -924,12 +889,12 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte } sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf; - SBlock block = {0}; + SDataBlk block = {0}; for (int32_t k = 0; k < num; ++k) { SBlockOrderWrapper wrapper = {0}; int32_t* mapDataIndex = taosArrayGet(pTableScanInfo->pBlockList, k); - tMapDataGetItemByIdx(&pTableScanInfo->mapData, *mapDataIndex, &block, tGetBlock); + tMapDataGetItemByIdx(&pTableScanInfo->mapData, *mapDataIndex, &block, tGetDataBlk); wrapper.uid = pTableScanInfo->uid; wrapper.offset = block.aSubBlock[0].offset; @@ -990,8 +955,8 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte } int64_t et = taosGetTimestampUs(); - tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, (et - st) / 1000.0, - pReader->idStr); + tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, + (et - st) / 1000.0, pReader->idStr); cleanupBlockOrderSupporter(&sup); taosMemoryFree(pTree); @@ -1018,15 +983,15 @@ static bool blockIteratorNext(SDataBlockIter* pBlockIter) { /** * This is an two rectangles overlap cases. */ -static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SBlock* pBlock) { +static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SDataBlk* pBlock) { return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) || (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) || (pVerRange->minVer > pBlock->minVer && pVerRange->minVer <= pBlock->maxVer) || (pVerRange->maxVer < pBlock->maxVer && pVerRange->maxVer >= pBlock->minVer); } -static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, - int32_t* nextIndex, int32_t order) { +static SDataBlk* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, + int32_t* nextIndex, int32_t order) { bool asc = ASCENDING_TRAVERSE(order); if (asc && pFBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) { return NULL; @@ -1039,10 +1004,10 @@ static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STab int32_t step = asc ? 1 : -1; *nextIndex = pFBlockInfo->tbBlockIdx + step; - SBlock* pBlock = taosMemoryCalloc(1, sizeof(SBlock)); - int32_t* indexInMapdata = taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); + SDataBlk* pBlock = taosMemoryCalloc(1, sizeof(SDataBlk)); + int32_t* indexInMapdata = taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); - tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, *indexInMapdata, pBlock, tGetBlock); + tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, *indexInMapdata, pBlock, tGetDataBlk); return pBlock; } @@ -1085,7 +1050,7 @@ static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t return TSDB_CODE_SUCCESS; } -static bool overlapWithNeighborBlock(SBlock* pBlock, SBlock* pNeighbor, int32_t order) { +static bool overlapWithNeighborBlock(SDataBlk* pBlock, SDataBlk* pNeighbor, int32_t order) { // it is the last block in current file, no chance to overlap with neighbor blocks. if (ASCENDING_TRAVERSE(order)) { return pBlock->maxKey.ts == pNeighbor->minKey.ts; @@ -1094,19 +1059,19 @@ static bool overlapWithNeighborBlock(SBlock* pBlock, SBlock* pNeighbor, int32_t } } -static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SBlock* pBlock) { +static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SDataBlk* pBlock) { bool ascScan = ASCENDING_TRAVERSE(order); return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) || (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts)); } -static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVerRange) { +static bool keyOverlapFileBlock(TSDBKEY key, SDataBlk* pBlock, SVersionRange* pVerRange) { return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVer >= pVerRange->minVer) && (pBlock->minVer <= pVerRange->maxVer); } -static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock) { +static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SDataBlk* pBlock) { size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); for (int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += 1) { @@ -1140,7 +1105,7 @@ static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, cons return false; } -static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock, int32_t order) { +static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SDataBlk* pBlock, int32_t order) { if (pBlockScanInfo->delSkyline == NULL) { return false; } @@ -1175,10 +1140,10 @@ static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBl // 3. current timestamp should not be overlap with each other // 4. output buffer should be large enough to hold all rows in current block // 5. delete info should not overlap with current block data -static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock, +static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SDataBlk* pBlock, STableBlockScanInfo* pScanInfo, TSDBKEY key, SLastBlockReader* pLastBlockReader) { - int32_t neighborIndex = 0; - SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order); + int32_t neighborIndex = 0; + SDataBlk* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order); // overlap with neighbor bool overlapWithNeighbor = false; @@ -1192,11 +1157,14 @@ static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBloc bool overlapWithDel = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order); // todo here we need to each key in the last files to identify if it is really overlapped with last block + // todo bool overlapWithlastBlock = false; - if (taosArrayGetSize(pLastBlockReader->pBlockL) > 0 && (pLastBlockReader->currentBlockIndex != -1)) { - SBlockL *pBlockL = taosArrayGet(pLastBlockReader->pBlockL, pLastBlockReader->currentBlockIndex); - overlapWithlastBlock = !(pBlock->maxKey.ts < pBlockL->minKey || pBlock->minKey.ts > pBlockL->maxKey); +#if 0 + if (taosArrayGetSize(pLastBlockReader->pSstBlk) > 0 && (pLastBlockReader->currentBlockIndex != -1)) { + SSttBlk* pSstBlk = taosArrayGet(pLastBlockReader->pSstBlk, pLastBlockReader->currentBlockIndex); + overlapWithlastBlock = !(pBlock->maxKey.ts < pSstBlk->minKey || pBlock->minKey.ts > pSstBlk->maxKey); } +#endif bool moreThanOutputCapacity = pBlock->nRow > pReader->capacity; bool partiallyRequired = dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock); @@ -1295,18 +1263,16 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; int64_t tsLast = INT64_MIN; - if ((pLastBlockReader->lastBlockData.nRow > 0) && hasDataInLastBlock(pLastBlockReader)) { + if (hasDataInLastBlock(pLastBlockReader)) { tsLast = getCurrentKeyInLastBlock(pLastBlockReader); } - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - - SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); int64_t minKey = 0; if (pReader->order == TSDB_ORDER_ASC) { - minKey = INT64_MAX; // chosen the minimum value + minKey = INT64_MAX; // chosen the minimum value if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { minKey = tsLast; } @@ -1336,7 +1302,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* bool init = false; // ASC: file block ---> last block -----> imem -----> mem - //DESC: mem -----> imem -----> last block -----> file block + // DESC: mem -----> imem -----> last block -----> file block if (pReader->order == TSDB_ORDER_ASC) { if (minKey == key) { init = true; @@ -1345,7 +1311,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == tsLast) { - TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + TSDBROW fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); if (init) { tRowMerge(&merge, &fRow1); } else { @@ -1374,7 +1340,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == tsLast) { - TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + TSDBROW fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); if (init) { tRowMerge(&merge, &fRow1); } else { @@ -1411,14 +1377,13 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, bool mergeBlockData) { SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; - int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); + // SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; + int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); STSRow* pTSRow = NULL; SRowMerger merge = {0}; - TSDBROW fRow = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); - + TSDBROW fRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); tRowMergerInit(&merge, &fRow, pReader->pSchema); doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, &merge); @@ -1445,7 +1410,7 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader if (pBlockData->nRow > 0) { // no last block available, only data block exists - if (pLastBlockReader->lastBlockData.nRow == 0 || (!hasDataInLastBlock(pLastBlockReader))) { + if (!hasDataInLastBlock(pLastBlockReader)) { return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); } @@ -1455,7 +1420,7 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader ASSERT(ts >= key); if (ASCENDING_TRAVERSE(pReader->order)) { - if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist + if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); } else if (key == ts) { STSRow* pTSRow = NULL; @@ -1463,6 +1428,10 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader tRowMergerInit(&merge, &fRow, pReader->pSchema); doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + + TSDBROW fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tRowMerge(&merge, &fRow1); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, &merge); int32_t code = tRowMergerGetRow(&merge, &pTSRow); @@ -1499,7 +1468,6 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); ASSERT(pRow != NULL && piRow != NULL); - SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; int64_t tsLast = INT64_MIN; if (hasDataInLastBlock(pLastBlockReader)) { tsLast = getCurrentKeyInLastBlock(pLastBlockReader); @@ -1529,7 +1497,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* minKey = tsLast; } } else { - minKey = INT64_MIN; // let find the maximum ts value + minKey = INT64_MIN; // let find the maximum ts value if (minKey < k.ts) { minKey = k.ts; } @@ -1560,7 +1528,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == tsLast) { - TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + TSDBROW fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); if (init) { tRowMerge(&merge, &fRow1); } else { @@ -1610,7 +1578,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == tsLast) { - TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + TSDBROW fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); if (init) { tRowMerge(&merge, &fRow1); } else { @@ -1788,261 +1756,6 @@ static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* } #endif -static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, - STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { - // it is an multi-table data block - if (pBlockData->aUid != NULL) { - uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; - if (uid != pBlockScanInfo->uid) { // move to next row - return false; - } - } - - // check for version and time range - int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; - if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) { - return false; - } - - int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (ts > pReader->window.ekey || ts < pReader->window.skey) { - return false; - } - - TSDBKEY k = {.ts = ts, .version = ver}; - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) { - return false; - } - - return true; -} - -static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } - -static void initLastBlockReader(SLastBlockReader* pLastBlockReader, uint64_t uid, int16_t* startPos) { - pLastBlockReader->uid = uid; - pLastBlockReader->rowIndex = startPos; - - if (*startPos == -1) { - if (ASCENDING_TRAVERSE(pLastBlockReader->order)) { - // do nothing - } else { - *startPos = pLastBlockReader->lastBlockData.nRow; - } - } -} - -static void setAllRowsChecked(SLastBlockReader *pLastBlockReader) { - *pLastBlockReader->rowIndex = ALL_ROWS_CHECKED_INDEX; -} - -static bool nextRowInLastBlock(SLastBlockReader *pLastBlockReader, STableBlockScanInfo* pBlockScanInfo) { - bool asc = ASCENDING_TRAVERSE(pLastBlockReader->order); - int32_t step = (asc) ? 1 : -1; - if (*pLastBlockReader->rowIndex == ALL_ROWS_CHECKED_INDEX) { - return false; - } - - *(pLastBlockReader->rowIndex) += step; - - SBlockData* pBlockData = &pLastBlockReader->lastBlockData; - for(int32_t i = *(pLastBlockReader->rowIndex); i < pBlockData->nRow && i >= 0; i += step) { - if (pBlockData->aUid != NULL) { - if (asc) { - if (pBlockData->aUid[i] < pLastBlockReader->uid) { - continue; - } else if (pBlockData->aUid[i] > pLastBlockReader->uid) { - break; - } - } else { - if (pBlockData->aUid[i] > pLastBlockReader->uid) { - continue; - } else if (pBlockData->aUid[i] < pLastBlockReader->uid) { - break; - } - } - } - - int64_t ts = pBlockData->aTSKEY[i]; - if (ts < pLastBlockReader->window.skey) { - continue; - } - - int64_t ver = pBlockData->aVersion[i]; - if (ver < pLastBlockReader->verRange.minVer) { - continue; - } - - // no data any more, todo opt handle desc case - if (ts > pLastBlockReader->window.ekey) { - continue; - } - - // todo opt handle desc case - if (ver > pLastBlockReader->verRange.maxVer) { - continue; - } - - TSDBKEY k = {.ts = ts, .version = ver}; - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->lastBlockDelIndex, &k, pLastBlockReader->order)) { - continue; - } - - *(pLastBlockReader->rowIndex) = i; - return true; - } - - // set all data is consumed in last block - setAllRowsChecked(pLastBlockReader); - return false; -} - -static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader) { - SBlockData* pBlockData = &pLastBlockReader->lastBlockData; - return pBlockData->aTSKEY[*pLastBlockReader->rowIndex]; -} - -static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { - if (*pLastBlockReader->rowIndex == ALL_ROWS_CHECKED_INDEX) { - return false; - } - - ASSERT(pLastBlockReader->lastBlockData.nRow > 0); - return true; -} - -int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, STsdbReader* pReader) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - - if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) { - return TSDB_CODE_SUCCESS; - } else { - STSRow* pTSRow = NULL; - SRowMerger merge = {0}; - - tRowMergerInit(&merge, &fRow, pReader->pSchema); - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); - int32_t code = tRowMergerGetRow(&merge, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); - - taosMemoryFree(pTSRow); - tRowMergerClear(&merge); - return TSDB_CODE_SUCCESS; - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, - SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - int64_t key = (pBlockData->nRow > 0)? pBlockData->aTSKEY[pDumpInfo->rowIndex]:INT64_MIN; - TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); - TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); - - if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) { - return doMergeMultiLevelRows(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); - } else { - // imem + file + last block - if (pBlockScanInfo->iiter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); - } - - // mem + file + last block - if (pBlockScanInfo->iter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); - } - - // files data blocks + last block - return mergeFileBlockAndLastBlock(pReader, pLastBlockReader, key, pBlockScanInfo, pBlockData); - } -} - -static int32_t buildComposedDataBlock(STsdbReader* pReader) { - SSDataBlock* pResBlock = pReader->pResBlock; - - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - - STableBlockScanInfo* pBlockScanInfo = NULL; - if (pBlockInfo != NULL) { - pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); - } else { - pBlockScanInfo = pReader->status.pTableIter; - } - - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockData* pBlockData = &pReader->status.fileBlockData; - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - - int64_t st = taosGetTimestampUs(); - - while (1) { - // todo check the validate of row in file block - bool hasBlockData = false; - { - while (pBlockData->nRow > 0) { // find the first qualified row in data block - if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { - hasBlockData = true; - break; - } - - pDumpInfo->rowIndex += step; - - SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); - if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - break; - } - } - } - - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - - // no data in last block and block, no need to proceed. - if ((hasBlockData == false) && (hasBlockLData == false)) { - break; - } - - buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); - - // currently loaded file data block is consumed - if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { - SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - break; - } - - if (pResBlock->info.rows >= pReader->capacity) { - break; - } - } - - pResBlock->info.uid = pBlockScanInfo->uid; - blockDataUpdateTsWindow(pResBlock, 0); - - setComposedBlockFlag(pReader, true); - int64_t et = taosGetTimestampUs(); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 - " rows:%d, elapsed time:%.2f ms %s", - pReader, pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, (et - st) / 1000.0, pReader->idStr); - } - - return TSDB_CODE_SUCCESS; -} - -void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; } - static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { if (pBlockScanInfo->iterInit) { return TSDB_CODE_SUCCESS; @@ -2107,6 +1820,225 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea return TSDB_CODE_SUCCESS; } +static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, + STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { + // it is an multi-table data block + if (pBlockData->aUid != NULL) { + uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; + if (uid != pBlockScanInfo->uid) { // move to next row + return false; + } + } + + // check for version and time range + int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; + if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) { + return false; + } + + int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + if (ts > pReader->window.ekey || ts < pReader->window.skey) { + return false; + } + + TSDBKEY k = {.ts = ts, .version = ver}; + if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) { + return false; + } + + return true; +} + +static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } + +static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pBlockScanInfo) { + while (1) { + bool hasVal = tMergeTreeNext(&pLastBlockReader->mergeTree); + if (!hasVal) { + return false; + } + + TSDBROW row = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + TSDBKEY k = TSDBROW_KEY(&row); + if (!hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->lastBlockDelIndex, &k, pLastBlockReader->order)) { + return true; + } + } +} + +static bool initLastBlockReader(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pBlockScanInfo, + STsdbReader* pReader) { + // the last block reader has been initialized for this table. + if (pLastBlockReader->uid == pBlockScanInfo->uid) { + return true; + } + + if (pLastBlockReader->uid != 0) { + tMergeTreeClose(&pLastBlockReader->mergeTree); + } + + initMemDataIterator(pBlockScanInfo, pReader); + pLastBlockReader->uid = pBlockScanInfo->uid; + + int32_t step = ASCENDING_TRAVERSE(pLastBlockReader->order)? 1:-1; + STimeWindow w = pLastBlockReader->window; + if (ASCENDING_TRAVERSE(pLastBlockReader->order)) { + w.skey = pBlockScanInfo->lastKey + step; + } else { + w.ekey = pBlockScanInfo->lastKey + step; + } + + int32_t code = + tMergeTreeOpen(&pLastBlockReader->mergeTree, (pLastBlockReader->order == TSDB_ORDER_DESC), pReader->pFileReader, + pReader->suid, pBlockScanInfo->uid, &w, &pLastBlockReader->verRange); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + return nextRowFromLastBlocks(pLastBlockReader, pBlockScanInfo); +} + +static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader) { + TSDBROW row = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + TSDBKEY key = TSDBROW_KEY(&row); + return key.ts; +} + +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } + +int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, + STsdbReader* pReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + + if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) { + return TSDB_CODE_SUCCESS; + } else { + STSRow* pTSRow = NULL; + SRowMerger merge = {0}; + + tRowMergerInit(&merge, &fRow, pReader->pSchema); + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + int32_t code = tRowMergerGetRow(&merge, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, + SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t key = (pBlockData->nRow > 0) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; + TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); + TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); + + if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) { + return doMergeMultiLevelRows(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); + } else { + // imem + file + last block + if (pBlockScanInfo->iiter.hasVal) { + return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); + } + + // mem + file + last block + if (pBlockScanInfo->iter.hasVal) { + return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); + } + + // files data blocks + last block + return mergeFileBlockAndLastBlock(pReader, pLastBlockReader, key, pBlockScanInfo, pBlockData); + } +} + +static int32_t buildComposedDataBlock(STsdbReader* pReader) { + SSDataBlock* pResBlock = pReader->pResBlock; + + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + + STableBlockScanInfo* pBlockScanInfo = NULL; + if (pBlockInfo != NULL) { + pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); + } else { + pBlockScanInfo = pReader->status.pTableIter; + } + + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SBlockData* pBlockData = &pReader->status.fileBlockData; + int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; + + int64_t st = taosGetTimestampUs(); + + while (1) { + // todo check the validate of row in file block + bool hasBlockData = false; + { + while (pBlockData->nRow > 0) { // find the first qualified row in data block + if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { + hasBlockData = true; + break; + } + + pDumpInfo->rowIndex += step; + + SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); + if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { + setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); + break; + } + } + } + + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if ((hasBlockData == false) && (hasBlockLData == false)) { + break; + } + + buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); + + // currently loaded file data block is consumed + if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { + SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); + setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); + break; + } + + if (pResBlock->info.rows >= pReader->capacity) { + break; + } + } + + pResBlock->info.uid = pBlockScanInfo->uid; + blockDataUpdateTsWindow(pResBlock, 0); + + setComposedBlockFlag(pReader, true); + int64_t et = taosGetTimestampUs(); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 + " rows:%d, elapsed time:%.2f ms %s", + pReader, pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, (et - st) / 1000.0, pReader->idStr); + } + + return TSDB_CODE_SUCCESS; +} + +void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; } + int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, STbData* piMemTbData) { if (pBlockScanInfo->delSkyline != NULL) { @@ -2190,9 +2122,7 @@ _err: } static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}; - - initMemDataIterator(pScanInfo, pReader); + TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}; TSDBROW* pRow = getValidMemRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader); if (pRow != NULL) { key = TSDBROW_KEY(pRow); @@ -2212,12 +2142,10 @@ static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* p static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum) { SReaderStatus* pStatus = &pReader->status; pBlockNum->numOfBlocks = 0; - pBlockNum->numOfLastBlocks = 0; + pBlockNum->numOfLastFiles = 0; size_t numOfTables = taosHashGetSize(pReader->status.pTableMap); SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBlockIdx)); - SArray* pLastBlocks = pStatus->fileIter.pLastBlockReader->pBlockL; - taosArrayClear(pLastBlocks); while (1) { bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader); @@ -2232,32 +2160,16 @@ static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum) { return code; } - code = tsdbReadBlockL(pReader->pFileReader, pLastBlocks); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (taosArrayGetSize(pIndexList) > 0 || taosArrayGetSize(pLastBlocks) > 0) { - SArray* pQLastBlock = taosArrayInit(4, sizeof(SBlockL)); - - code = doLoadFileBlock(pReader, pIndexList, pLastBlocks, pBlockNum, pQLastBlock); + if (taosArrayGetSize(pIndexList) > 0 || pReader->pFileReader->pSet->nSttF > 0) { + code = doLoadFileBlock(pReader, pIndexList, pBlockNum); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(pIndexList); - taosArrayDestroy(pQLastBlock); return code; } - if (pBlockNum->numOfBlocks + pBlockNum->numOfLastBlocks > 0) { - ASSERT(taosArrayGetSize(pQLastBlock) == pBlockNum->numOfLastBlocks); - taosArrayClear(pLastBlocks); - taosArrayAddAll(pLastBlocks, pQLastBlock); - - taosArrayDestroy(pQLastBlock); + if (pBlockNum->numOfBlocks + pBlockNum->numOfLastFiles > 0) { break; } - - taosArrayDestroy(pQLastBlock); } // no blocks in current file, try next files @@ -2267,81 +2179,22 @@ static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum) { return TSDB_CODE_SUCCESS; } -static int32_t doLoadRelatedLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo *pBlockScanInfo, STsdbReader* pReader) { - SArray* pBlocks = pLastBlockReader->pBlockL; - SBlockL* pBlock = NULL; - - uint64_t uid = pBlockScanInfo->uid; - int32_t totalLastBlocks = (int32_t)taosArrayGetSize(pBlocks); - - initMemDataIterator(pBlockScanInfo, pReader); - - // find the correct SBlockL. todo binary search - int32_t index = -1; - for (int32_t i = 0; i < totalLastBlocks; ++i) { - SBlockL* p = taosArrayGet(pBlocks, i); - if (p->minUid <= uid && p->maxUid >= uid) { - index = i; - pBlock = p; - break; - } - } - - if (index == -1) { - pLastBlockReader->currentBlockIndex = index; - tBlockDataReset(&pLastBlockReader->lastBlockData); - return TSDB_CODE_SUCCESS; - } - - // the required last datablock has already loaded - if (index == pLastBlockReader->currentBlockIndex) { - return TSDB_CODE_SUCCESS; - } - - int64_t st = taosGetTimestampUs(); - int32_t code = tBlockDataInit(&pLastBlockReader->lastBlockData, pReader->suid, pReader->suid ? 0 : uid, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - tsdbError("%p init block data failed, code:%s %s", pReader, tstrerror(code), pReader->idStr); - return code; - } - - code = tsdbReadLastBlock(pReader->pFileReader, pBlock, &pLastBlockReader->lastBlockData); - - double el = (taosGetTimestampUs() - st) / 1000.0; - if (code != TSDB_CODE_SUCCESS) { - tsdbError("%p error occurs in loading last block into buffer, last block index:%d, total:%d code:%s %s", pReader, - pLastBlockReader->currentBlockIndex, totalLastBlocks, tstrerror(code), pReader->idStr); - } else { - tsdbDebug("%p load last block completed, uid:%" PRIu64 - " last block index:%d, total:%d rows:%d, minVer:%d, maxVer:%d, brange:%" PRId64 "-%" PRId64 - " elapsed time:%.2f ms, %s", - pReader, uid, index, totalLastBlocks, pBlock->nRow, pBlock->minVer, pBlock->maxVer, pBlock->minKey, - pBlock->maxKey, el, pReader->idStr); - } - - pLastBlockReader->currentBlockIndex = index; - pReader->cost.lastBlockLoad += 1; - pReader->cost.lastBlockLoadTime += el; - - return TSDB_CODE_SUCCESS; -} - static int32_t uidComparFunc(const void* p1, const void* p2) { - uint64_t pu1 = *(uint64_t*) p1; - uint64_t pu2 = *(uint64_t*) p2; + uint64_t pu1 = *(uint64_t*)p1; + uint64_t pu2 = *(uint64_t*)p2; if (pu1 == pu2) { return 0; } else { - return (pu1 < pu2)? -1:1; + return (pu1 < pu2) ? -1 : 1; } } -static void extractOrderedTableUidList(SUidOrderCheckInfo *pOrderCheckInfo, SReaderStatus* pStatus) { +static void extractOrderedTableUidList(SUidOrderCheckInfo* pOrderCheckInfo, SReaderStatus* pStatus) { int32_t index = 0; int32_t total = taosHashGetSize(pStatus->pTableMap); void* p = taosHashIterate(pStatus->pTableMap, NULL); - while(p != NULL) { + while (p != NULL) { STableBlockScanInfo* pScanInfo = p; pOrderCheckInfo->tableUidList[index++] = pScanInfo->uid; p = taosHashIterate(pStatus->pTableMap, p); @@ -2351,9 +2204,12 @@ static void extractOrderedTableUidList(SUidOrderCheckInfo *pOrderCheckInfo, SRea } static int32_t initOrderCheckInfo(SUidOrderCheckInfo* pOrderCheckInfo, SReaderStatus* pStatus) { - if (pOrderCheckInfo->tableUidList == NULL) { - int32_t total = taosHashGetSize(pStatus->pTableMap); + int32_t total = taosHashGetSize(pStatus->pTableMap); + if (total == 0) { + return TSDB_CODE_SUCCESS; + } + if (pOrderCheckInfo->tableUidList == NULL) { pOrderCheckInfo->currentIndex = 0; pOrderCheckInfo->tableUidList = taosMemoryMalloc(total * sizeof(uint64_t)); if (pOrderCheckInfo->tableUidList == NULL) { @@ -2361,20 +2217,17 @@ static int32_t initOrderCheckInfo(SUidOrderCheckInfo* pOrderCheckInfo, SReaderSt } extractOrderedTableUidList(pOrderCheckInfo, pStatus); - uint64_t uid = pOrderCheckInfo->tableUidList[0]; pStatus->pTableIter = taosHashGet(pStatus->pTableMap, &uid, sizeof(uid)); } else { if (pStatus->pTableIter == NULL) { // it is the last block of a new file -// ASSERT(pOrderCheckInfo->currentIndex == taosHashGetSize(pStatus->pTableMap)); - pOrderCheckInfo->currentIndex = 0; uint64_t uid = pOrderCheckInfo->tableUidList[pOrderCheckInfo->currentIndex]; pStatus->pTableIter = taosHashGet(pStatus->pTableMap, &uid, sizeof(uid)); // the tableMap has already updated if (pStatus->pTableIter == NULL) { - void* p = taosMemoryRealloc(pOrderCheckInfo->tableUidList, taosHashGetSize(pStatus->pTableMap)*sizeof(uint64_t)); + void* p = taosMemoryRealloc(pOrderCheckInfo->tableUidList, total * sizeof(uint64_t)); if (p == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -2391,7 +2244,7 @@ static int32_t initOrderCheckInfo(SUidOrderCheckInfo* pOrderCheckInfo, SReaderSt return TSDB_CODE_SUCCESS; } -static bool moveToNextTable(SUidOrderCheckInfo *pOrderedCheckInfo, SReaderStatus* pStatus) { +static bool moveToNextTable(SUidOrderCheckInfo* pOrderedCheckInfo, SReaderStatus* pStatus) { pOrderedCheckInfo->currentIndex += 1; if (pOrderedCheckInfo->currentIndex >= taosHashGetSize(pStatus->pTableMap)) { pStatus->pTableIter = NULL; @@ -2405,38 +2258,20 @@ static bool moveToNextTable(SUidOrderCheckInfo *pOrderedCheckInfo, SReaderStatus } static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; + SReaderStatus* pStatus = &pReader->status; SLastBlockReader* pLastBlockReader = pStatus->fileIter.pLastBlockReader; - SUidOrderCheckInfo *pOrderedCheckInfo = &pStatus->uidCheckInfo; - int32_t code = initOrderCheckInfo(pOrderedCheckInfo, pStatus); + SUidOrderCheckInfo* pOrderedCheckInfo = &pStatus->uidCheckInfo; + int32_t code = initOrderCheckInfo(pOrderedCheckInfo, pStatus); if (code != TSDB_CODE_SUCCESS || (taosHashGetSize(pStatus->pTableMap) == 0)) { return code; } - while(1) { + while (1) { // load the last data block of current table STableBlockScanInfo* pScanInfo = pStatus->pTableIter; - code = doLoadRelatedLastBlock(pLastBlockReader, pScanInfo, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (pLastBlockReader->currentBlockIndex != -1) { - initLastBlockReader(pLastBlockReader, pScanInfo->uid, &pScanInfo->indexInBlockL); - int32_t index = pScanInfo->indexInBlockL; - - if (index == INITIAL_ROW_INDEX_VAL || index == pLastBlockReader->lastBlockData.nRow) { - bool hasData = nextRowInLastBlock(pLastBlockReader, pScanInfo); - if (!hasData) { // current table does not have rows in last block, try next table - bool hasNexTable = moveToNextTable(pOrderedCheckInfo, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - continue; - } - } - } else { // no data in last block, try next table + bool hasVal = initLastBlockReader(pLastBlockReader, pScanInfo, pReader); + if (!hasVal) { bool hasNexTable = moveToNextTable(pOrderedCheckInfo, pStatus); if (!hasNexTable) { return TSDB_CODE_SUCCESS; @@ -2462,9 +2297,8 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { } static int32_t doBuildDataBlock(STsdbReader* pReader) { - TSDBKEY key = {0}; - int32_t code = TSDB_CODE_SUCCESS; - SBlock* pBlock = NULL; + int32_t code = TSDB_CODE_SUCCESS; + SDataBlk* pBlock = NULL; SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; @@ -2482,21 +2316,8 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { pBlock = getCurrentBlock(pBlockIter); } - { - key = getCurrentKeyInBuf(pScanInfo, pReader); - - // load the last data block of current table - code = doLoadRelatedLastBlock(pLastBlockReader, pScanInfo, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - // note: the lastblock may be null here - initLastBlockReader(pLastBlockReader, pScanInfo->uid, &pScanInfo->indexInBlockL); - if (pScanInfo->indexInBlockL == INITIAL_ROW_INDEX_VAL || pScanInfo->indexInBlockL == pLastBlockReader->lastBlockData.nRow) { - bool hasData = nextRowInLastBlock(pLastBlockReader, pScanInfo); - } - } + initLastBlockReader(pLastBlockReader, pScanInfo, pReader); + TSDBKEY key = getCurrentKeyInBuf(pScanInfo, pReader); if (pBlockInfo == NULL) { // build data block from last data file ASSERT(pBlockIter->numOfBlocks == 0); @@ -2524,11 +2345,11 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { if (hasDataInLastBlock(pLastBlockReader) && !ASCENDING_TRAVERSE(pReader->order)) { // only return the rows in last block int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - ASSERT (tsLast >= pBlock->maxKey.ts); + ASSERT(tsLast >= pBlock->maxKey.ts); tBlockDataReset(&pReader->status.fileBlockData); code = buildComposedDataBlock(pReader); - } else { // whole block is required, return it directly + } else { // whole block is required, return it directly SDataBlockInfo* pInfo = &pReader->pResBlock->info; pInfo->rows = pBlock->nRow; pInfo->uid = pScanInfo->uid; @@ -2575,7 +2396,7 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { // set the correct start position in case of the first/last file block, according to the query time window static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) { - SBlock* pBlock = getCurrentBlock(pBlockIter); + SDataBlk* pBlock = getCurrentBlock(pBlockIter); SReaderStatus* pStatus = &pReader->status; @@ -2595,7 +2416,7 @@ static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBl } // all data files are consumed, try data in buffer - if (num.numOfBlocks + num.numOfLastBlocks == 0) { + if (num.numOfBlocks + num.numOfLastFiles == 0) { pReader->status.loadFromFile = false; return code; } @@ -2603,14 +2424,11 @@ static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBl // initialize the block iterator for a new fileset if (num.numOfBlocks > 0) { code = initBlockIterator(pReader, pBlockIter, num.numOfBlocks); - } else { // no block data, only last block exists + } else { // no block data, only last block exists tBlockDataReset(&pReader->status.fileBlockData); resetDataBlockIterator(pBlockIter, pReader->order); } - SLastBlockReader* pLReader = pReader->status.fileIter.pLastBlockReader; - pLReader->currentBlockIndex = -1; - // set the correct start position according to the query time window initBlockDumpInfo(pReader, pBlockIter); return code; @@ -2628,7 +2446,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { SDataBlockIter* pBlockIter = &pReader->status.blockIter; if (pBlockIter->numOfBlocks == 0) { - _begin: + _begin: code = doLoadLastBlockSequentially(pReader); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2675,21 +2493,24 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { bool hasNext = blockIteratorNext(&pReader->status.blockIter); if (hasNext) { // check for the next block in the block accessed order list initBlockDumpInfo(pReader, pBlockIter); - } else if (taosArrayGetSize(pReader->status.fileIter.pLastBlockReader->pBlockL) > 0) { // data blocks in current file are exhausted, let's try the next file now - tBlockDataReset(&pReader->status.fileBlockData); - resetDataBlockIterator(pBlockIter, pReader->order); - goto _begin; } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - - // error happens or all the data files are completely checked - if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { - return code; - } - - // this file does not have blocks, let's start check the last block file - if (pBlockIter->numOfBlocks == 0) { + if (pReader->status.pCurrentFileset->nSttF > 0) { + // data blocks in current file are exhausted, let's try the next file now + tBlockDataReset(&pReader->status.fileBlockData); + resetDataBlockIterator(pBlockIter, pReader->order); goto _begin; + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + + // error happens or all the data files are completely checked + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { + return code; + } + + // this file does not have blocks, let's start check the last block file + if (pBlockIter->numOfBlocks == 0) { + goto _begin; + } } } } @@ -2948,7 +2769,7 @@ typedef enum { CHECK_FILEBLOCK_QUIT = 0x2, } CHECK_FILEBLOCK_STATE; -static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SBlock* pBlock, +static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SDataBlk* pBlock, SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key, CHECK_FILEBLOCK_STATE* state) { SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; @@ -2957,8 +2778,8 @@ static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanIn *state = CHECK_FILEBLOCK_QUIT; int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - int32_t nextIndex = -1; - SBlock* pNeighborBlock = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &nextIndex, pReader->order); + int32_t nextIndex = -1; + SDataBlk* pNeighborBlock = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &nextIndex, pReader->order); if (pNeighborBlock == NULL) { // do nothing return 0; } @@ -3022,7 +2843,7 @@ int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pSc CHECK_FILEBLOCK_STATE st; SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - SBlock* pCurrentBlock = getCurrentBlock(&pReader->status.blockIter); + SDataBlk* pCurrentBlock = getCurrentBlock(&pReader->status.blockIter); checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st); if (st == CHECK_FILEBLOCK_QUIT) { break; @@ -3033,11 +2854,13 @@ int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pSc return TSDB_CODE_SUCCESS; } -int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, SRowMerger* pMerger) { - while(nextRowInLastBlock(pLastBlockReader, pScanInfo)) { +int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, + SRowMerger* pMerger) { + pScanInfo->lastKey = ts; + while (nextRowFromLastBlocks(pLastBlockReader, pScanInfo)) { int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); if (next1 == ts) { - TSDBROW fRow1 = tsdbRowFromBlockData(&pLastBlockReader->lastBlockData, *pLastBlockReader->rowIndex); + TSDBROW fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); tRowMerge(pMerger, &fRow1); } else { break; @@ -3101,7 +2924,7 @@ int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, } int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, - STSRow** pTSRow) { + STSRow** pTSRow) { SRowMerger merge = {0}; TSDBKEY k = TSDBROW_KEY(pRow); @@ -3175,7 +2998,8 @@ int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pR } if (pBlockScanInfo->iter.hasVal && pRow != NULL) { - return doMergeMemTableMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader, freeTSRow); + return doMergeMemTableMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pTSRow, pReader, + freeTSRow); } if (pBlockScanInfo->iiter.hasVal && piRow != NULL) { @@ -3229,7 +3053,8 @@ int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* return TSDB_CODE_SUCCESS; } -int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, int32_t rowIndex) { +int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, + int32_t rowIndex) { int32_t i = 0, j = 0; int32_t outputRowIndex = pResBlock->info.rows; @@ -3306,7 +3131,7 @@ int32_t tsdbSetTableId(STsdbReader* pReader, int64_t uid) { ASSERT(pReader != NULL); taosHashClear(pReader->status.pTableMap); - STableBlockScanInfo info = {.lastKey = 0, .uid = uid, .indexInBlockL = INITIAL_ROW_INDEX_VAL}; + STableBlockScanInfo info = {.lastKey = 0, .uid = uid}; taosHashPut(pReader->status.pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info)); return TDB_CODE_SUCCESS; } @@ -3327,7 +3152,6 @@ void* tsdbGetIvtIdx(SMeta* pMeta) { uint64_t getReaderMaxVersion(STsdbReader* pReader) { return pReader->verRange.maxVer; } - // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader, const char* idstr) { @@ -3377,10 +3201,10 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl } if (pCond->suid != 0) { - pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->suid, pCond->schemaVersion); + pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->suid, pCond->endVersion); } else if (taosArrayGetSize(pTableList) > 0) { STableKeyInfo* pKey = taosArrayGet(pTableList, 0); - pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, pCond->schemaVersion); + pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, pCond->endVersion); } int32_t numOfTables = taosArrayGetSize(pTableList); @@ -3450,7 +3274,6 @@ void tsdbReaderClose(STsdbReader* pReader) { } SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap); taosMemoryFreeClear(pSupInfo->plist); taosMemoryFree(pSupInfo->colIds); @@ -3461,6 +3284,7 @@ void tsdbReaderClose(STsdbReader* pReader) { taosMemoryFreeClear(pSupInfo->buildBuf[i]); } } + taosMemoryFree(pSupInfo->buildBuf); tBlockDataDestroy(&pReader->status.fileBlockData, true); @@ -3474,12 +3298,13 @@ void tsdbReaderClose(STsdbReader* pReader) { tsdbDataFReaderClose(&pReader->pFileReader); } + tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap); + taosMemoryFree(pReader->status.uidCheckInfo.tableUidList); SFilesetIter* pFilesetIter = &pReader->status.fileIter; if (pFilesetIter->pLastBlockReader != NULL) { - tBlockDataDestroy(&pFilesetIter->pLastBlockReader->lastBlockData, true); - taosArrayDestroy(pFilesetIter->pLastBlockReader->pBlockL); + tMergeTreeClose(&pFilesetIter->pLastBlockReader->mergeTree); taosMemoryFree(pFilesetIter->pLastBlockReader); } @@ -3603,12 +3428,12 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockS SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); - SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); - int64_t stime = taosGetTimestampUs(); + SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); + int64_t stime = taosGetTimestampUs(); SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - if (tBlockHasSma(pBlock)) { + if (tDataBlkHasSma(pBlock)) { code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg); if (code != TSDB_CODE_SUCCESS) { tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), @@ -3723,11 +3548,12 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { tsdbDataFReaderClose(&pReader->pFileReader); int32_t numOfTables = taosHashGetSize(pReader->status.pTableMap); - tsdbDataFReaderClose(&pReader->pFileReader); initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); - resetDataBlockScanInfo(pReader->status.pTableMap); + + int64_t ts = ASCENDING_TRAVERSE(pReader->order)?pReader->window.skey-1:pReader->window.ekey+1; + resetDataBlockScanInfo(pReader->status.pTableMap, ts); int32_t code = 0; SDataBlockIter* pBlockIter = &pReader->status.blockIter; @@ -3785,7 +3611,7 @@ int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTa while (true) { if (hasNext) { - SBlock* pBlock = getCurrentBlock(pBlockIter); + SDataBlk* pBlock = getCurrentBlock(pBlockIter); int32_t numOfRows = pBlock->nRow; pTableBlockInfo->totalRows += numOfRows; @@ -3953,4 +3779,4 @@ void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { } tsdbTrace("vgId:%d, untake read snapshot", TD_VID(pTsdb->pVnode)); -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index c8f3862071..128bfe37da 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -15,902 +15,192 @@ #include "tsdb.h" -// SDelFWriter ==================================================== -int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb) { - int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; - char hdr[TSDB_FHDR_SIZE] = {0}; - SDelFWriter *pDelFWriter; - int64_t n; - - // alloc - pDelFWriter = (SDelFWriter *)taosMemoryCalloc(1, sizeof(*pDelFWriter)); - if (pDelFWriter == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pDelFWriter->pTsdb = pTsdb; - pDelFWriter->fDel = *pFile; - - tsdbDelFileName(pTsdb, pFile, fname); - pDelFWriter->pWriteH = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE); - if (pDelFWriter->pWriteH == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // update header - n = taosWriteFile(pDelFWriter->pWriteH, &hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - pDelFWriter->fDel.size = TSDB_FHDR_SIZE; - pDelFWriter->fDel.offset = 0; - - *ppWriter = pDelFWriter; - return code; - -_err: - tsdbError("vgId:%d, failed to open del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - *ppWriter = NULL; - return code; -} - -int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) { - int32_t code = 0; - SDelFWriter *pWriter = *ppWriter; - STsdb *pTsdb = pWriter->pTsdb; - - // sync - if (sync && taosFsyncFile(pWriter->pWriteH) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // close - if (taosCloseFile(&pWriter->pWriteH) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t *); iBuf++) { - tFree(pWriter->aBuf[iBuf]); - } - taosMemoryFree(pWriter); - - *ppWriter = NULL; - return code; - -_err: - tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx) { - int32_t code = 0; - int64_t size; - int64_t n; - - // prepare - size = sizeof(uint32_t); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) { - size += tPutDelData(NULL, taosArrayGet(aDelData, iDelData)); - } - size += sizeof(TSCKSUM); - - // alloc - code = tRealloc(&pWriter->aBuf[0], size); - if (code) goto _err; - - // build - n = 0; - n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) { - n += tPutDelData(pWriter->aBuf[0] + n, taosArrayGet(aDelData, iDelData)); - } - taosCalcChecksumAppend(0, pWriter->aBuf[0], size); - - ASSERT(n + sizeof(TSCKSUM) == size); - - // write - n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - ASSERT(n == size); - - // update - pDelIdx->offset = pWriter->fDel.size; - pDelIdx->size = size; - pWriter->fDel.size += size; - - return code; - -_err: - tsdbError("vgId:%d, failed to write del data since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx) { +// =============== PAGE-WISE FILE =============== +static int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) { int32_t code = 0; - int64_t size; - int64_t n; - SDelIdx *pDelIdx; + STsdbFD *pFD; - // prepare - size = sizeof(uint32_t); - for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { - size += tPutDelIdx(NULL, taosArrayGet(aDelIdx, iDelIdx)); - } - size += sizeof(TSCKSUM); + *ppFD = NULL; - // alloc - code = tRealloc(&pWriter->aBuf[0], size); - if (code) goto _err; - - // build - n = 0; - n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); - for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { - n += tPutDelIdx(pWriter->aBuf[0] + n, taosArrayGet(aDelIdx, iDelIdx)); - } - taosCalcChecksumAppend(0, pWriter->aBuf[0], size); - - ASSERT(n + sizeof(TSCKSUM) == size); - - // write - n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // update - pWriter->fDel.offset = pWriter->fDel.size; - pWriter->fDel.size += size; - - return code; - -_err: - tsdbError("vgId:%d, write del idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter) { - int32_t code = 0; - char hdr[TSDB_FHDR_SIZE]; - int64_t size = TSDB_FHDR_SIZE; - int64_t n; - - // build - memset(hdr, 0, size); - tPutDelFile(hdr, &pWriter->fDel); - taosCalcChecksumAppend(0, hdr, size); - - // seek - if (taosLSeekFile(pWriter->pWriteH, 0, SEEK_SET) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // write - n = taosWriteFile(pWriter->pWriteH, hdr, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, update del file hdr failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -// SDelFReader ==================================================== -struct SDelFReader { - STsdb *pTsdb; - SDelFile fDel; - TdFilePtr pReadH; - - uint8_t *aBuf[1]; -}; - -int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb) { - int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; - SDelFReader *pDelFReader; - int64_t n; - - // alloc - pDelFReader = (SDelFReader *)taosMemoryCalloc(1, sizeof(*pDelFReader)); - if (pDelFReader == NULL) { + pFD = (STsdbFD *)taosMemoryCalloc(1, sizeof(*pFD) + strlen(path) + 1); + if (pFD == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + goto _exit; } - // open impl - pDelFReader->pTsdb = pTsdb; - pDelFReader->fDel = *pFile; - - tsdbDelFileName(pTsdb, pFile, fname); - pDelFReader->pReadH = taosOpenFile(fname, TD_FILE_READ); - if (pDelFReader->pReadH == NULL) { + pFD->path = (char *)&pFD[1]; + strcpy(pFD->path, path); + pFD->szPage = szPage; + pFD->flag = flag; + pFD->pFD = taosOpenFile(path, flag); + if (pFD->pFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); - taosMemoryFree(pDelFReader); - goto _err; + goto _exit; } + pFD->szPage = szPage; + pFD->pgno = 0; + pFD->pBuf = taosMemoryCalloc(1, szPage); + if (pFD->pBuf == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(pFD); + goto _exit; + } + if (taosStatFile(path, &pFD->szFile, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; + } + ASSERT(pFD->szFile % szPage == 0); + pFD->szFile = pFD->szFile / szPage; + *ppFD = pFD; _exit: - *ppReader = pDelFReader; - return code; - -_err: - tsdbError("vgId:%d, del file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - *ppReader = NULL; return code; } -int32_t tsdbDelFReaderClose(SDelFReader **ppReader) { - int32_t code = 0; - SDelFReader *pReader = *ppReader; +static void tsdbCloseFile(STsdbFD **ppFD) { + STsdbFD *pFD = *ppFD; + taosMemoryFree(pFD->pBuf); + taosCloseFile(&pFD->pFD); + taosMemoryFree(pFD); + *ppFD = NULL; +} - if (pReader) { - if (taosCloseFile(&pReader->pReadH) < 0) { +static int32_t tsdbWriteFilePage(STsdbFD *pFD) { + int32_t code = 0; + + if (pFD->pgno > 0) { + int64_t n = taosLSeekFile(pFD->pFD, PAGE_OFFSET(pFD->pgno, pFD->szPage), SEEK_SET); + if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _exit; } - for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(uint8_t *); iBuf++) { - tFree(pReader->aBuf[iBuf]); + + taosCalcChecksumAppend(0, pFD->pBuf, pFD->szPage); + + n = taosWriteFile(pFD->pFD, pFD->pBuf, pFD->szPage); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; + } + + if (pFD->szFile < pFD->pgno) { + pFD->szFile = pFD->pgno; } - taosMemoryFree(pReader); } - *ppReader = NULL; + pFD->pgno = 0; _exit: return code; } -int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) { +static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) { int32_t code = 0; - int64_t offset = pDelIdx->offset; - int64_t size = pDelIdx->size; - int64_t n; - taosArrayClear(aDelData); + ASSERT(pgno <= pFD->szFile); // seek - if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // alloc - code = tRealloc(&pReader->aBuf[0], size); - if (code) goto _err; - - // read - n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size); + int64_t offset = PAGE_OFFSET(pgno, pFD->szPage); + int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // check - if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // // decode - n = 0; - - uint32_t delimiter; - n += tGetU32(pReader->aBuf[0] + n, &delimiter); - while (n < size - sizeof(TSCKSUM)) { - SDelData delData; - n += tGetDelData(pReader->aBuf[0] + n, &delData); - - if (taosArrayPush(aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - ASSERT(n == size - sizeof(TSCKSUM)); - - return code; - -_err: - tsdbError("vgId:%d, read del data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx) { - int32_t code = 0; - int32_t n; - int64_t offset = pReader->fDel.offset; - int64_t size = pReader->fDel.size - offset; - - taosArrayClear(aDelIdx); - - // seek - if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // alloc - code = tRealloc(&pReader->aBuf[0], size); - if (code) goto _err; - - // read - n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // check - if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // decode - n = 0; - uint32_t delimiter; - n += tGetU32(pReader->aBuf[0] + n, &delimiter); - ASSERT(delimiter == TSDB_FILE_DLMT); - - while (n < size - sizeof(TSCKSUM)) { - SDelIdx delIdx; - - n += tGetDelIdx(pReader->aBuf[0] + n, &delIdx); - - if (taosArrayPush(aDelIdx, &delIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - ASSERT(n == size - sizeof(TSCKSUM)); - - return code; - -_err: - tsdbError("vgId:%d, read del idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; -} - -// SDataFReader ==================================================== -struct SDataFReader { - STsdb *pTsdb; - SDFileSet *pSet; - TdFilePtr pHeadFD; - TdFilePtr pDataFD; - TdFilePtr pLastFD; - TdFilePtr pSmaFD; - - uint8_t *aBuf[3]; -}; - -int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet) { - int32_t code = 0; - SDataFReader *pReader; - char fname[TSDB_FILENAME_LEN]; - - // alloc - pReader = (SDataFReader *)taosMemoryCalloc(1, sizeof(*pReader)); - if (pReader == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pReader->pTsdb = pTsdb; - pReader->pSet = pSet; - - // open impl - // head - tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); - pReader->pHeadFD = taosOpenFile(fname, TD_FILE_READ); - if (pReader->pHeadFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // data - tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); - pReader->pDataFD = taosOpenFile(fname, TD_FILE_READ); - if (pReader->pDataFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // last - tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); - pReader->pLastFD = taosOpenFile(fname, TD_FILE_READ); - if (pReader->pLastFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // sma - tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); - pReader->pSmaFD = taosOpenFile(fname, TD_FILE_READ); - if (pReader->pSmaFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - *ppReader = pReader; - return code; - -_err: - tsdbError("vgId:%d, tsdb data file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - *ppReader = NULL; - return code; -} - -int32_t tsdbDataFReaderClose(SDataFReader **ppReader) { - int32_t code = 0; - if (*ppReader == NULL) goto _exit; - - if (taosCloseFile(&(*ppReader)->pHeadFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - if (taosCloseFile(&(*ppReader)->pDataFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - if (taosCloseFile(&(*ppReader)->pLastFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - if (taosCloseFile(&(*ppReader)->pSmaFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - for (int32_t iBuf = 0; iBuf < sizeof((*ppReader)->aBuf) / sizeof(uint8_t *); iBuf++) { - tFree((*ppReader)->aBuf[iBuf]); - } - - taosMemoryFree(*ppReader); - -_exit: - *ppReader = NULL; - return code; - -_err: - tsdbError("vgId:%d, data file reader close failed since %s", TD_VID((*ppReader)->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx) { - int32_t code = 0; - int64_t offset = pReader->pSet->pHeadF->offset; - int64_t size = pReader->pSet->pHeadF->size - offset; - int64_t n; - uint32_t delimiter; - - taosArrayClear(aBlockIdx); - if (size == 0) { goto _exit; } - // alloc - code = tRealloc(&pReader->aBuf[0], size); - if (code) goto _err; - - // seek - if (taosLSeekFile(pReader->pHeadFD, offset, SEEK_SET) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - // read - n = taosReadFile(pReader->pHeadFD, pReader->aBuf[0], size); + n = taosReadFile(pFD->pFD, pFD->pBuf, pFD->szPage); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { + goto _exit; + } else if (n < pFD->szPage) { code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // check - if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // decode - n = 0; - n = tGetU32(pReader->aBuf[0] + n, &delimiter); - ASSERT(delimiter == TSDB_FILE_DLMT); - - while (n < size - sizeof(TSCKSUM)) { - SBlockIdx blockIdx; - n += tGetBlockIdx(pReader->aBuf[0] + n, &blockIdx); - - if (taosArrayPush(aBlockIdx, &blockIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - ASSERT(n + sizeof(TSCKSUM) == size); - -_exit: - return code; - -_err: - tsdbError("vgId:%d, read block idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbReadBlockL(SDataFReader *pReader, SArray *aBlockL) { - int32_t code = 0; - int64_t offset = pReader->pSet->pLastF->offset; - int64_t size = pReader->pSet->pLastF->size - offset; - int64_t n; - uint32_t delimiter; - - taosArrayClear(aBlockL); - if (size == 0) { goto _exit; } - // alloc - code = tRealloc(&pReader->aBuf[0], size); - if (code) goto _err; - - // seek - if (taosLSeekFile(pReader->pLastFD, offset, SEEK_SET) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // read - n = taosReadFile(pReader->pLastFD, pReader->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - // check - if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { + if (!taosCheckChecksumWhole(pFD->pBuf, pFD->szPage)) { code = TSDB_CODE_FILE_CORRUPTED; - goto _err; + goto _exit; } - // decode - n = 0; - n = tGetU32(pReader->aBuf[0] + n, &delimiter); - ASSERT(delimiter == TSDB_FILE_DLMT); - - while (n < size - sizeof(TSCKSUM)) { - SBlockL blockl; - n += tGetBlockL(pReader->aBuf[0] + n, &blockl); - - if (taosArrayPush(aBlockL, &blockl) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - ASSERT(n + sizeof(TSCKSUM) == size); + pFD->pgno = pgno; _exit: return code; - -_err: - tsdbError("vgId:%d read blockl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; } -int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBlock) { +static int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { int32_t code = 0; - int64_t offset = pBlockIdx->offset; - int64_t size = pBlockIdx->size; - int64_t n; - int64_t tn; + int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); + int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage); + int64_t bOffset = fOffset % pFD->szPage; + int64_t n = 0; - // alloc - code = tRealloc(&pReader->aBuf[0], size); - if (code) goto _err; + do { + if (pFD->pgno != pgno) { + code = tsdbWriteFilePage(pFD); + if (code) goto _exit; - // seek - if (taosLSeekFile(pReader->pHeadFD, offset, SEEK_SET) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // read - n = taosReadFile(pReader->pHeadFD, pReader->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // check - if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // decode - n = 0; - - uint32_t delimiter; - n += tGetU32(pReader->aBuf[0] + n, &delimiter); - ASSERT(delimiter == TSDB_FILE_DLMT); - - tn = tGetMapData(pReader->aBuf[0] + n, mBlock); - if (tn < 0) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - n += tn; - ASSERT(n + sizeof(TSCKSUM) == size); - - return code; - -_err: - tsdbError("vgId:%d, read block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg) { - int32_t code = 0; - SSmaInfo *pSmaInfo = &pBlock->smaInfo; - - ASSERT(pSmaInfo->size > 0); - - taosArrayClear(aColumnDataAgg); - - // alloc - int32_t size = pSmaInfo->size + sizeof(TSCKSUM); - code = tRealloc(&pReader->aBuf[0], size); - if (code) goto _err; - - // seek - int64_t n = taosLSeekFile(pReader->pSmaFD, pSmaInfo->offset, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < pSmaInfo->offset) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // read - n = taosReadFile(pReader->pSmaFD, pReader->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // check - if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // decode - n = 0; - while (n < pSmaInfo->size) { - SColumnDataAgg sma; - - n += tGetColumnDataAgg(pReader->aBuf[0] + n, &sma); - if (taosArrayPush(aColumnDataAgg, &sma) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + if (pgno <= pFD->szFile) { + code = tsdbReadFilePage(pFD, pgno); + if (code) goto _exit; + } else { + pFD->pgno = pgno; + } } - } - return code; + int64_t nWrite = TMIN(PAGE_CONTENT_SIZE(pFD->szPage) - bOffset, size - n); + memcpy(pFD->pBuf + bOffset, pBuf + n, nWrite); -_err: - tsdbError("vgId:%d tsdb read block sma failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + pgno++; + bOffset = 0; + n += nWrite; + } while (n < size); + +_exit: return code; } -static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo, int8_t fromLast, - SBlockData *pBlockData) { +static int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { int32_t code = 0; + int64_t n = 0; + int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); + int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage); + int32_t szPgCont = PAGE_CONTENT_SIZE(pFD->szPage); + int64_t bOffset = fOffset % pFD->szPage; - tBlockDataClear(pBlockData); + ASSERT(pgno && pgno <= pFD->szFile); + ASSERT(bOffset < szPgCont); - TdFilePtr pFD = fromLast ? pReader->pLastFD : pReader->pDataFD; - - // uid + version + tskey - code = tsdbReadAndCheck(pFD, pBlkInfo->offset, &pReader->aBuf[0], pBlkInfo->szKey, 1); - if (code) goto _err; - SDiskDataHdr hdr; - uint8_t *p = pReader->aBuf[0] + tGetDiskDataHdr(pReader->aBuf[0], &hdr); - - ASSERT(hdr.delimiter == TSDB_FILE_DLMT); - ASSERT(pBlockData->suid == hdr.suid); - ASSERT(pBlockData->uid == hdr.uid); - - pBlockData->nRow = hdr.nRow; - - // uid - if (hdr.uid == 0) { - ASSERT(hdr.szUid); - code = tsdbDecmprData(p, hdr.szUid, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aUid, - sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]); - if (code) goto _err; - } else { - ASSERT(!hdr.szUid); - } - p += hdr.szUid; - - // version - code = tsdbDecmprData(p, hdr.szVer, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aVersion, - sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]); - if (code) goto _err; - p += hdr.szVer; - - // TSKEY - code = tsdbDecmprData(p, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY, - sizeof(TSKEY) * hdr.nRow, &pReader->aBuf[1]); - if (code) goto _err; - p += hdr.szKey; - - ASSERT(p - pReader->aBuf[0] == pBlkInfo->szKey - sizeof(TSCKSUM)); - - // read and decode columns - if (taosArrayGetSize(pBlockData->aIdx) == 0) goto _exit; - - if (hdr.szBlkCol > 0) { - int64_t offset = pBlkInfo->offset + pBlkInfo->szKey; - code = tsdbReadAndCheck(pFD, offset, &pReader->aBuf[0], hdr.szBlkCol + sizeof(TSCKSUM), 1); - if (code) goto _err; - } - - SBlockCol blockCol = {.cid = 0}; - SBlockCol *pBlockCol = &blockCol; - int32_t n = 0; - - for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { - SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); - - while (pBlockCol && pBlockCol->cid < pColData->cid) { - if (n < hdr.szBlkCol) { - n += tGetBlockCol(pReader->aBuf[0] + n, pBlockCol); - } else { - ASSERT(n == hdr.szBlkCol); - pBlockCol = NULL; - } + while (n < size) { + if (pFD->pgno != pgno) { + code = tsdbReadFilePage(pFD, pgno); + if (code) goto _exit; } - if (pBlockCol == NULL || pBlockCol->cid > pColData->cid) { - // add a lot of NONE - for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { - code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type)); - if (code) goto _err; - } - } else { - ASSERT(pBlockCol->type == pColData->type); - ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE); + int64_t nRead = TMIN(szPgCont - bOffset, size - n); + memcpy(pBuf + n, pFD->pBuf + bOffset, nRead); - if (pBlockCol->flag == HAS_NULL) { - // add a lot of NULL - for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { - code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type)); - if (code) goto _err; - } - } else { - // decode from binary - int64_t offset = pBlkInfo->offset + pBlkInfo->szKey + hdr.szBlkCol + sizeof(TSCKSUM) + pBlockCol->offset; - int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM); - - code = tsdbReadAndCheck(pFD, offset, &pReader->aBuf[1], size, 0); - if (code) goto _err; - - code = tsdbDecmprColData(pReader->aBuf[1], pBlockCol, hdr.cmprAlg, hdr.nRow, pColData, &pReader->aBuf[2]); - if (code) goto _err; - } - } + n += nRead; + pgno++; + bOffset = 0; } _exit: return code; - -_err: - tsdbError("vgId:%d tsdb read block data impl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; } -int32_t tsdbReadDataBlock(SDataFReader *pReader, SBlock *pBlock, SBlockData *pBlockData) { +static int32_t tsdbFsyncFile(STsdbFD *pFD) { int32_t code = 0; - code = tsdbReadBlockDataImpl(pReader, &pBlock->aSubBlock[0], 0, pBlockData); - if (code) goto _err; + code = tsdbWriteFilePage(pFD); + if (code) goto _exit; - if (pBlock->nSubBlock > 1) { - SBlockData bData1; - SBlockData bData2; - - // create - code = tBlockDataCreate(&bData1); - if (code) goto _err; - code = tBlockDataCreate(&bData2); - if (code) goto _err; - - // init - tBlockDataInitEx(&bData1, pBlockData); - tBlockDataInitEx(&bData2, pBlockData); - - for (int32_t iSubBlock = 1; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - code = tsdbReadBlockDataImpl(pReader, &pBlock->aSubBlock[iSubBlock], 0, &bData1); - if (code) { - tBlockDataDestroy(&bData1, 1); - tBlockDataDestroy(&bData2, 1); - goto _err; - } - - code = tBlockDataCopy(pBlockData, &bData2); - if (code) { - tBlockDataDestroy(&bData1, 1); - tBlockDataDestroy(&bData2, 1); - goto _err; - } - - code = tBlockDataMerge(&bData1, &bData2, pBlockData); - if (code) { - tBlockDataDestroy(&bData1, 1); - tBlockDataDestroy(&bData2, 1); - goto _err; - } - } - - tBlockDataDestroy(&bData1, 1); - tBlockDataDestroy(&bData2, 1); + if (taosFsyncFile(pFD->pFD) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; } - return code; - -_err: - tsdbError("vgId:%d tsdb read data block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbReadLastBlock(SDataFReader *pReader, SBlockL *pBlockL, SBlockData *pBlockData) { - int32_t code = 0; - - code = tsdbReadBlockDataImpl(pReader, &pBlockL->bInfo, 1, pBlockData); - if (code) goto _err; - - return code; - -_err: - tsdbError("vgId:%d tsdb read last block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); +_exit: return code; } @@ -919,6 +209,7 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS int32_t code = 0; int32_t flag; int64_t n; + int32_t szPage = TSDB_DEFAULT_PAGE_SIZE; SDataFWriter *pWriter = NULL; char fname[TSDB_FILENAME_LEN]; char hdr[TSDB_FHDR_SIZE] = {0}; @@ -929,128 +220,72 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - if (code) goto _err; pWriter->pTsdb = pTsdb; pWriter->wSet = (SDFileSet){.diskId = pSet->diskId, .fid = pSet->fid, .pHeadF = &pWriter->fHead, .pDataF = &pWriter->fData, - .pLastF = &pWriter->fLast, - .pSmaF = &pWriter->fSma}; + .pSmaF = &pWriter->fSma, + .nSttF = pSet->nSttF}; pWriter->fHead = *pSet->pHeadF; pWriter->fData = *pSet->pDataF; - pWriter->fLast = *pSet->pLastF; pWriter->fSma = *pSet->pSmaF; + for (int8_t iStt = 0; iStt < pSet->nSttF; iStt++) { + pWriter->wSet.aSttF[iStt] = &pWriter->fStt[iStt]; + pWriter->fStt[iStt] = *pSet->aSttF[iStt]; + } // head - flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; + flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname); - pWriter->pHeadFD = taosOpenFile(fname, flag); - if (pWriter->pHeadFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - ASSERT(n == TSDB_FHDR_SIZE); + code = tsdbOpenFile(fname, szPage, flag, &pWriter->pHeadFD); + if (code) goto _err; + code = tsdbWriteFile(pWriter->pHeadFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; pWriter->fHead.size += TSDB_FHDR_SIZE; // data if (pWriter->fData.size == 0) { - flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; + flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { - flag = TD_FILE_WRITE; + flag = TD_FILE_READ | TD_FILE_WRITE; } tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname); - pWriter->pDataFD = taosOpenFile(fname, flag); - if (pWriter->pDataFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbOpenFile(fname, szPage, flag, &pWriter->pDataFD); + if (code) goto _err; if (pWriter->fData.size == 0) { - n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - + code = tsdbWriteFile(pWriter->pDataFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; pWriter->fData.size += TSDB_FHDR_SIZE; - } else { - n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_END); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - ASSERT(n == pWriter->fData.size); - } - - // last - if (pWriter->fLast.size == 0) { - flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; - } else { - flag = TD_FILE_WRITE; - } - tsdbLastFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fLast, fname); - pWriter->pLastFD = taosOpenFile(fname, flag); - if (pWriter->pLastFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - if (pWriter->fLast.size == 0) { - n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - pWriter->fLast.size += TSDB_FHDR_SIZE; - } else { - n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_END); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - ASSERT(n == pWriter->fLast.size); } // sma if (pWriter->fSma.size == 0) { - flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; + flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { - flag = TD_FILE_WRITE; + flag = TD_FILE_READ | TD_FILE_WRITE; } tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname); - pWriter->pSmaFD = taosOpenFile(fname, flag); - if (pWriter->pSmaFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbOpenFile(fname, szPage, flag, &pWriter->pSmaFD); + if (code) goto _err; if (pWriter->fSma.size == 0) { - n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pSmaFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; pWriter->fSma.size += TSDB_FHDR_SIZE; - } else { - n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_END); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - ASSERT(n == pWriter->fSma.size); } + // stt + ASSERT(pWriter->fStt[pSet->nSttF - 1].size == 0); + flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; + tsdbSttFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fStt[pSet->nSttF - 1], fname); + code = tsdbOpenFile(fname, szPage, flag, &pWriter->pSttFD); + if (code) goto _err; + code = tsdbWriteFile(pWriter->pSttFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; + pWriter->fStt[pWriter->wSet.nSttF - 1].size += TSDB_FHDR_SIZE; + *ppWriter = pWriter; return code; @@ -1068,46 +303,23 @@ int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync) { pTsdb = (*ppWriter)->pTsdb; if (sync) { - if (taosFsyncFile((*ppWriter)->pHeadFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbFsyncFile((*ppWriter)->pHeadFD); + if (code) goto _err; - if (taosFsyncFile((*ppWriter)->pDataFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbFsyncFile((*ppWriter)->pDataFD); + if (code) goto _err; - if (taosFsyncFile((*ppWriter)->pLastFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbFsyncFile((*ppWriter)->pSmaFD); + if (code) goto _err; - if (taosFsyncFile((*ppWriter)->pSmaFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbFsyncFile((*ppWriter)->pSttFD); + if (code) goto _err; } - if (taosCloseFile(&(*ppWriter)->pHeadFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - if (taosCloseFile(&(*ppWriter)->pDataFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - if (taosCloseFile(&(*ppWriter)->pLastFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - if (taosCloseFile(&(*ppWriter)->pSmaFD) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + tsdbCloseFile(&(*ppWriter)->pHeadFD); + tsdbCloseFile(&(*ppWriter)->pDataFD); + tsdbCloseFile(&(*ppWriter)->pSmaFD); + tsdbCloseFile(&(*ppWriter)->pSttFD); for (int32_t iBuf = 0; iBuf < sizeof((*ppWriter)->aBuf) / sizeof(uint8_t *); iBuf++) { tFree((*ppWriter)->aBuf[iBuf]); @@ -1130,70 +342,26 @@ int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) { // head ============== memset(hdr, 0, TSDB_FHDR_SIZE); tPutHeadFile(hdr, &pWriter->fHead); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); - - n = taosLSeekFile(pWriter->pHeadFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pHeadFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; // data ============== memset(hdr, 0, TSDB_FHDR_SIZE); tPutDataFile(hdr, &pWriter->fData); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); - - n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // last ============== - memset(hdr, 0, TSDB_FHDR_SIZE); - tPutLastFile(hdr, &pWriter->fLast); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); - - n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pDataFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; // sma ============== memset(hdr, 0, TSDB_FHDR_SIZE); tPutSmaFile(hdr, &pWriter->fSma); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + code = tsdbWriteFile(pWriter->pSmaFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; - n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + // stt ============== + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutSttFile(hdr, &pWriter->fStt[pWriter->wSet.nSttF - 1]); + code = tsdbWriteFile(pWriter->pSttFD, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; return code; @@ -1205,7 +373,7 @@ _err: int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) { int32_t code = 0; SHeadFile *pHeadFile = &pWriter->fHead; - int64_t size = 0; + int64_t size; int64_t n; // check @@ -1215,11 +383,10 @@ int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) { } // prepare - size = sizeof(uint32_t); + size = 0; for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) { size += tPutBlockIdx(NULL, taosArrayGet(aBlockIdx, iBlockIdx)); } - size += sizeof(TSCKSUM); // alloc code = tRealloc(&pWriter->aBuf[0], size); @@ -1227,28 +394,23 @@ int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) { // build n = 0; - n = tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) { n += tPutBlockIdx(pWriter->aBuf[0] + n, taosArrayGet(aBlockIdx, iBlockIdx)); } - taosCalcChecksumAppend(0, pWriter->aBuf[0], size); - - ASSERT(n + sizeof(TSCKSUM) == size); + ASSERT(n == size); // write - n = taosWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pHeadFD, pHeadFile->size, pWriter->aBuf[0], size); + if (code) goto _err; // update pHeadFile->offset = pHeadFile->size; pHeadFile->size += size; _exit: - tsdbTrace("vgId:%d write block idx, offset:%" PRId64 " size:%" PRId64 " nBlockIdx:%d", TD_VID(pWriter->pTsdb->pVnode), - pHeadFile->offset, size, taosArrayGetSize(aBlockIdx)); + // tsdbTrace("vgId:%d write block idx, offset:%" PRId64 " size:%" PRId64 " nBlockIdx:%d", + // TD_VID(pWriter->pTsdb->pVnode), + // pHeadFile->offset, size, taosArrayGetSize(aBlockIdx)); return code; _err: @@ -1265,24 +427,16 @@ int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, SBlockIdx *pBloc ASSERT(mBlock->nItem > 0); // alloc - size = sizeof(uint32_t) + tPutMapData(NULL, mBlock) + sizeof(TSCKSUM); + size = tPutMapData(NULL, mBlock); code = tRealloc(&pWriter->aBuf[0], size); if (code) goto _err; // build - n = 0; - n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); - n += tPutMapData(pWriter->aBuf[0] + n, mBlock); - taosCalcChecksumAppend(0, pWriter->aBuf[0], size); - - ASSERT(n + sizeof(TSCKSUM) == size); + n = tPutMapData(pWriter->aBuf[0], mBlock); // write - n = taosWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pHeadFD, pHeadFile->size, pWriter->aBuf[0], size); + if (code) goto _err; // update pBlockIdx->offset = pHeadFile->size; @@ -1300,24 +454,23 @@ _err: return code; } -int32_t tsdbWriteBlockL(SDataFWriter *pWriter, SArray *aBlockL) { - int32_t code = 0; - SLastFile *pLastFile = &pWriter->fLast; - int64_t size; - int64_t n; +int32_t tsdbWriteSttBlk(SDataFWriter *pWriter, SArray *aSttBlk) { + int32_t code = 0; + SSttFile *pSttFile = &pWriter->fStt[pWriter->wSet.nSttF - 1]; + int64_t size; + int64_t n; // check - if (taosArrayGetSize(aBlockL) == 0) { - pLastFile->offset = pLastFile->size; + if (taosArrayGetSize(aSttBlk) == 0) { + pSttFile->offset = pSttFile->size; goto _exit; } // size - size = sizeof(uint32_t); // TSDB_FILE_DLMT - for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aBlockL); iBlockL++) { - size += tPutBlockL(NULL, taosArrayGet(aBlockL, iBlockL)); + size = 0; + for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSttBlk); iBlockL++) { + size += tPutSttBlk(NULL, taosArrayGet(aSttBlk, iBlockL)); } - size += sizeof(TSCKSUM); // alloc code = tRealloc(&pWriter->aBuf[0], size); @@ -1325,28 +478,21 @@ int32_t tsdbWriteBlockL(SDataFWriter *pWriter, SArray *aBlockL) { // encode n = 0; - n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); - for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aBlockL); iBlockL++) { - n += tPutBlockL(pWriter->aBuf[0] + n, taosArrayGet(aBlockL, iBlockL)); + for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSttBlk); iBlockL++) { + n += tPutSttBlk(pWriter->aBuf[0] + n, taosArrayGet(aSttBlk, iBlockL)); } - taosCalcChecksumAppend(0, pWriter->aBuf[0], size); - - ASSERT(n + sizeof(TSCKSUM) == size); // write - n = taosWriteFile(pWriter->pLastFD, pWriter->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pSttFD, pSttFile->size, pWriter->aBuf[0], size); + if (code) goto _err; // update - pLastFile->offset = pLastFile->size; - pLastFile->size += size; + pSttFile->offset = pSttFile->size; + pSttFile->size += size; _exit: - tsdbTrace("vgId:%d tsdb write blockl, loffset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), - pLastFile->offset, size); + tsdbTrace("vgId:%d tsdb write stt block, loffset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), + pSttFile->offset, size); return code; _err: @@ -1354,30 +500,6 @@ _err: return code; } -static void tsdbUpdateBlockInfo(SBlockData *pBlockData, SBlock *pBlock) { - for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { - TSDBKEY key = {.ts = pBlockData->aTSKEY[iRow], .version = pBlockData->aVersion[iRow]}; - - if (iRow == 0) { - if (tsdbKeyCmprFn(&pBlock->minKey, &key) > 0) { - pBlock->minKey = key; - } - } else { - if (pBlockData->aTSKEY[iRow] == pBlockData->aTSKEY[iRow - 1]) { - pBlock->hasDup = 1; - } - } - - if (iRow == pBlockData->nRow - 1 && tsdbKeyCmprFn(&pBlock->maxKey, &key) < 0) { - pBlock->maxKey = key; - } - - pBlock->minVer = TMIN(pBlock->minVer, key.version); - pBlock->maxVer = TMAX(pBlock->maxVer, key.version); - } - pBlock->nRow += pBlockData->nRow; -} - static int32_t tsdbWriteBlockSma(SDataFWriter *pWriter, SBlockData *pBlockData, SSmaInfo *pSmaInfo) { int32_t code = 0; @@ -1400,21 +522,14 @@ static int32_t tsdbWriteBlockSma(SDataFWriter *pWriter, SBlockData *pBlockData, // write if (pSmaInfo->size) { - int32_t size = pSmaInfo->size + sizeof(TSCKSUM); - - code = tRealloc(&pWriter->aBuf[0], size); + code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size); if (code) goto _err; - taosCalcChecksumAppend(0, pWriter->aBuf[0], size); - - int64_t n = taosWriteFile(pWriter->pSmaFD, pWriter->aBuf[0], size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pWriter->pSmaFD, pWriter->fSma.size, pWriter->aBuf[0], pSmaInfo->size); + if (code) goto _err; pSmaInfo->offset = pWriter->fSma.size; - pWriter->fSma.size += size; + pWriter->fSma.size += pSmaInfo->size; } return code; @@ -1430,7 +545,11 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlock ASSERT(pBlockData->nRow > 0); - pBlkInfo->offset = toLast ? pWriter->fLast.size : pWriter->fData.size; + if (toLast) { + pBlkInfo->offset = pWriter->fStt[pWriter->wSet.nSttF - 1].size; + } else { + pBlkInfo->offset = pWriter->fData.size; + } pBlkInfo->szBlock = 0; pBlkInfo->szKey = 0; @@ -1439,42 +558,34 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlock if (code) goto _err; // write ================= - TdFilePtr pFD = toLast ? pWriter->pLastFD : pWriter->pDataFD; + STsdbFD *pFD = toLast ? pWriter->pSttFD : pWriter->pDataFD; pBlkInfo->szKey = aBufN[3] + aBufN[2]; pBlkInfo->szBlock = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3]; - int64_t n = taosWriteFile(pFD, pWriter->aBuf[3], aBufN[3]); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + int64_t offset = pBlkInfo->offset; + code = tsdbWriteFile(pFD, offset, pWriter->aBuf[3], aBufN[3]); + if (code) goto _err; + offset += aBufN[3]; - n = taosWriteFile(pFD, pWriter->aBuf[2], aBufN[2]); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pFD, offset, pWriter->aBuf[2], aBufN[2]); + if (code) goto _err; + offset += aBufN[2]; if (aBufN[1]) { - n = taosWriteFile(pFD, pWriter->aBuf[1], aBufN[1]); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pFD, offset, pWriter->aBuf[1], aBufN[1]); + if (code) goto _err; + offset += aBufN[1]; } if (aBufN[0]) { - n = taosWriteFile(pFD, pWriter->aBuf[0], aBufN[0]); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + code = tsdbWriteFile(pFD, offset, pWriter->aBuf[0], aBufN[0]); + if (code) goto _err; } // update info if (toLast) { - pWriter->fLast.size += pBlkInfo->szBlock; + pWriter->fStt[pWriter->wSet.nSttF - 1].size += pBlkInfo->szBlock; } else { pWriter->fData.size += pBlkInfo->szBlock; } @@ -1553,9 +664,9 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&pOutFD); taosCloseFile(&PInFD); - // last - tsdbLastFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pLastF, fNameFrom); - tsdbLastFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pLastF, fNameTo); + // stt + tsdbSttFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->aSttF[0], fNameFrom); + tsdbSttFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->aSttF[0], fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1569,7 +680,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pLastF->size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->aSttF[0]->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1607,3 +718,702 @@ _err: tsdbError("vgId:%d, tsdb DFileSet copy failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } + +// SDataFReader ==================================================== +int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet) { + int32_t code = 0; + SDataFReader *pReader; + int32_t szPage = TSDB_DEFAULT_PAGE_SIZE; + char fname[TSDB_FILENAME_LEN]; + + // alloc + pReader = (SDataFReader *)taosMemoryCalloc(1, sizeof(*pReader)); + if (pReader == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + pReader->pTsdb = pTsdb; + pReader->pSet = pSet; + + // head + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->pHeadFD); + if (code) goto _err; + + // data + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->pDataFD); + if (code) goto _err; + + // sma + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->pSmaFD); + if (code) goto _err; + + // stt + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + tsdbSttFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSttF[iStt], fname); + code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->aSttFD[iStt]); + if (code) goto _err; + } + + *ppReader = pReader; + return code; + +_err: + tsdbError("vgId:%d, tsdb data file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + *ppReader = NULL; + return code; +} + +int32_t tsdbDataFReaderClose(SDataFReader **ppReader) { + int32_t code = 0; + if (*ppReader == NULL) return code; + + // head + tsdbCloseFile(&(*ppReader)->pHeadFD); + + // data + tsdbCloseFile(&(*ppReader)->pDataFD); + + // sma + tsdbCloseFile(&(*ppReader)->pSmaFD); + + // stt + for (int32_t iStt = 0; iStt < TSDB_MAX_STT_FILE; iStt++) { + if ((*ppReader)->aSttFD[iStt]) { + tsdbCloseFile(&(*ppReader)->aSttFD[iStt]); + } + } + + for (int32_t iBuf = 0; iBuf < sizeof((*ppReader)->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree((*ppReader)->aBuf[iBuf]); + } + taosMemoryFree(*ppReader); + *ppReader = NULL; + return code; + +_err: + tsdbError("vgId:%d, data file reader close failed since %s", TD_VID((*ppReader)->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx) { + int32_t code = 0; + SHeadFile *pHeadFile = pReader->pSet->pHeadF; + int64_t offset = pHeadFile->offset; + int64_t size = pHeadFile->size - offset; + + taosArrayClear(aBlockIdx); + if (size == 0) return code; + + // alloc + code = tRealloc(&pReader->aBuf[0], size); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size); + if (code) goto _err; + + // decode + int64_t n = 0; + while (n < size) { + SBlockIdx blockIdx; + n += tGetBlockIdx(pReader->aBuf[0] + n, &blockIdx); + + if (taosArrayPush(aBlockIdx, &blockIdx) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + ASSERT(n == size); + + return code; + +_err: + tsdbError("vgId:%d, read block idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadSttBlk(SDataFReader *pReader, int32_t iStt, SArray *aSttBlk) { + int32_t code = 0; + SSttFile *pSttFile = pReader->pSet->aSttF[iStt]; + int64_t offset = pSttFile->offset; + int64_t size = pSttFile->size - offset; + + taosArrayClear(aSttBlk); + if (size == 0) return code; + + // alloc + code = tRealloc(&pReader->aBuf[0], size); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->aSttFD[iStt], offset, pReader->aBuf[0], size); + if (code) goto _err; + + // decode + int64_t n = 0; + while (n < size) { + SSttBlk sttBlk; + n += tGetSttBlk(pReader->aBuf[0] + n, &sttBlk); + + if (taosArrayPush(aSttBlk, &sttBlk) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + ASSERT(n == size); + + return code; + +_err: + tsdbError("vgId:%d read stt blk failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBlock) { + int32_t code = 0; + int64_t offset = pBlockIdx->offset; + int64_t size = pBlockIdx->size; + + // alloc + code = tRealloc(&pReader->aBuf[0], size); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size); + if (code) goto _err; + + // decode + int64_t n = tGetMapData(pReader->aBuf[0], mBlock); + if (n < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + ASSERT(n == size); + + return code; + +_err: + tsdbError("vgId:%d, read block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadBlockSma(SDataFReader *pReader, SDataBlk *pDataBlk, SArray *aColumnDataAgg) { + int32_t code = 0; + SSmaInfo *pSmaInfo = &pDataBlk->smaInfo; + + ASSERT(pSmaInfo->size > 0); + + taosArrayClear(aColumnDataAgg); + + // alloc + code = tRealloc(&pReader->aBuf[0], pSmaInfo->size); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->pSmaFD, pSmaInfo->offset, pReader->aBuf[0], pSmaInfo->size); + if (code) goto _err; + + // decode + int32_t n = 0; + while (n < pSmaInfo->size) { + SColumnDataAgg sma; + n += tGetColumnDataAgg(pReader->aBuf[0] + n, &sma); + + if (taosArrayPush(aColumnDataAgg, &sma) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + ASSERT(n == pSmaInfo->size); + return code; + +_err: + tsdbError("vgId:%d tsdb read block sma failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo, SBlockData *pBlockData) { + int32_t code = 0; + + tBlockDataClear(pBlockData); + + STsdbFD *pFD = pReader->pDataFD; + + // uid + version + tskey + code = tRealloc(&pReader->aBuf[0], pBlkInfo->szKey); + if (code) goto _err; + + code = tsdbReadFile(pFD, pBlkInfo->offset, pReader->aBuf[0], pBlkInfo->szKey); + if (code) goto _err; + + SDiskDataHdr hdr; + uint8_t *p = pReader->aBuf[0] + tGetDiskDataHdr(pReader->aBuf[0], &hdr); + + ASSERT(hdr.delimiter == TSDB_FILE_DLMT); + ASSERT(pBlockData->suid == hdr.suid); + ASSERT(pBlockData->uid == hdr.uid); + + pBlockData->nRow = hdr.nRow; + + // uid + if (hdr.uid == 0) { + ASSERT(hdr.szUid); + code = tsdbDecmprData(p, hdr.szUid, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aUid, + sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]); + if (code) goto _err; + } else { + ASSERT(!hdr.szUid); + } + p += hdr.szUid; + + // version + code = tsdbDecmprData(p, hdr.szVer, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aVersion, + sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]); + if (code) goto _err; + p += hdr.szVer; + + // TSKEY + code = tsdbDecmprData(p, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY, + sizeof(TSKEY) * hdr.nRow, &pReader->aBuf[1]); + if (code) goto _err; + p += hdr.szKey; + + ASSERT(p - pReader->aBuf[0] == pBlkInfo->szKey); + + // read and decode columns + if (taosArrayGetSize(pBlockData->aIdx) == 0) goto _exit; + + if (hdr.szBlkCol > 0) { + int64_t offset = pBlkInfo->offset + pBlkInfo->szKey; + + code = tRealloc(&pReader->aBuf[0], hdr.szBlkCol); + if (code) goto _err; + + code = tsdbReadFile(pFD, offset, pReader->aBuf[0], hdr.szBlkCol); + if (code) goto _err; + } + + SBlockCol blockCol = {.cid = 0}; + SBlockCol *pBlockCol = &blockCol; + int32_t n = 0; + + for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { + SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); + + while (pBlockCol && pBlockCol->cid < pColData->cid) { + if (n < hdr.szBlkCol) { + n += tGetBlockCol(pReader->aBuf[0] + n, pBlockCol); + } else { + ASSERT(n == hdr.szBlkCol); + pBlockCol = NULL; + } + } + + if (pBlockCol == NULL || pBlockCol->cid > pColData->cid) { + // add a lot of NONE + for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { + code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type)); + if (code) goto _err; + } + } else { + ASSERT(pBlockCol->type == pColData->type); + ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE); + + if (pBlockCol->flag == HAS_NULL) { + // add a lot of NULL + for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { + code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type)); + if (code) goto _err; + } + } else { + // decode from binary + int64_t offset = pBlkInfo->offset + pBlkInfo->szKey + hdr.szBlkCol + pBlockCol->offset; + int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue; + + code = tRealloc(&pReader->aBuf[1], size); + if (code) goto _err; + + code = tsdbReadFile(pFD, offset, pReader->aBuf[1], size); + if (code) goto _err; + + code = tsdbDecmprColData(pReader->aBuf[1], pBlockCol, hdr.cmprAlg, hdr.nRow, pColData, &pReader->aBuf[2]); + if (code) goto _err; + } + } + } + +_exit: + return code; + +_err: + tsdbError("vgId:%d tsdb read block data impl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadDataBlock(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData) { + int32_t code = 0; + + code = tsdbReadBlockDataImpl(pReader, &pDataBlk->aSubBlock[0], pBlockData); + if (code) goto _err; + + if (pDataBlk->nSubBlock > 1) { + SBlockData bData1; + SBlockData bData2; + + // create + code = tBlockDataCreate(&bData1); + if (code) goto _err; + code = tBlockDataCreate(&bData2); + if (code) goto _err; + + // init + tBlockDataInitEx(&bData1, pBlockData); + tBlockDataInitEx(&bData2, pBlockData); + + for (int32_t iSubBlock = 1; iSubBlock < pDataBlk->nSubBlock; iSubBlock++) { + code = tsdbReadBlockDataImpl(pReader, &pDataBlk->aSubBlock[iSubBlock], &bData1); + if (code) { + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + goto _err; + } + + code = tBlockDataCopy(pBlockData, &bData2); + if (code) { + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + goto _err; + } + + code = tBlockDataMerge(&bData1, &bData2, pBlockData); + if (code) { + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + goto _err; + } + } + + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + } + + return code; + +_err: + tsdbError("vgId:%d tsdb read data block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadSttBlock(SDataFReader *pReader, int32_t iStt, SSttBlk *pSttBlk, SBlockData *pBlockData) { + int32_t code = 0; + + // alloc + code = tRealloc(&pReader->aBuf[0], pSttBlk->bInfo.szBlock); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->aSttFD[iStt], pSttBlk->bInfo.offset, pReader->aBuf[0], pSttBlk->bInfo.szBlock); + if (code) goto _err; + + // decmpr + code = tDecmprBlockData(pReader->aBuf[0], pSttBlk->bInfo.szBlock, pBlockData, &pReader->aBuf[1]); + if (code) goto _err; + + return code; + +_err: + tsdbError("vgId:%d tsdb read stt block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +// SDelFWriter ==================================================== +int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb) { + int32_t code = 0; + char fname[TSDB_FILENAME_LEN]; + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + SDelFWriter *pDelFWriter; + int64_t n; + + // alloc + pDelFWriter = (SDelFWriter *)taosMemoryCalloc(1, sizeof(*pDelFWriter)); + if (pDelFWriter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + pDelFWriter->pTsdb = pTsdb; + pDelFWriter->fDel = *pFile; + + tsdbDelFileName(pTsdb, pFile, fname); + code = + tsdbOpenFile(fname, TSDB_DEFAULT_PAGE_SIZE, TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE, &pDelFWriter->pWriteH); + if (code) goto _err; + + // update header + code = tsdbWriteFile(pDelFWriter->pWriteH, 0, hdr, TSDB_FHDR_SIZE); + if (code) goto _err; + + pDelFWriter->fDel.size = TSDB_FHDR_SIZE; + pDelFWriter->fDel.offset = 0; + + *ppWriter = pDelFWriter; + return code; + +_err: + tsdbError("vgId:%d, failed to open del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + *ppWriter = NULL; + return code; +} + +int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) { + int32_t code = 0; + SDelFWriter *pWriter = *ppWriter; + STsdb *pTsdb = pWriter->pTsdb; + + // sync + if (sync) { + code = tsdbFsyncFile(pWriter->pWriteH); + if (code) goto _err; + } + + // close + tsdbCloseFile(&pWriter->pWriteH); + + for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree(pWriter->aBuf[iBuf]); + } + taosMemoryFree(pWriter); + + *ppWriter = NULL; + return code; + +_err: + tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx) { + int32_t code = 0; + int64_t size; + int64_t n; + + // prepare + size = 0; + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) { + size += tPutDelData(NULL, taosArrayGet(aDelData, iDelData)); + } + + // alloc + code = tRealloc(&pWriter->aBuf[0], size); + if (code) goto _err; + + // build + n = 0; + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) { + n += tPutDelData(pWriter->aBuf[0] + n, taosArrayGet(aDelData, iDelData)); + } + ASSERT(n == size); + + // write + code = tsdbWriteFile(pWriter->pWriteH, pWriter->fDel.size, pWriter->aBuf[0], size); + if (code) goto _err; + + // update + pDelIdx->offset = pWriter->fDel.size; + pDelIdx->size = size; + pWriter->fDel.size += size; + + return code; + +_err: + tsdbError("vgId:%d, failed to write del data since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx) { + int32_t code = 0; + int64_t size; + int64_t n; + SDelIdx *pDelIdx; + + // prepare + size = 0; + for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { + size += tPutDelIdx(NULL, taosArrayGet(aDelIdx, iDelIdx)); + } + + // alloc + code = tRealloc(&pWriter->aBuf[0], size); + if (code) goto _err; + + // build + n = 0; + for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { + n += tPutDelIdx(pWriter->aBuf[0] + n, taosArrayGet(aDelIdx, iDelIdx)); + } + ASSERT(n == size); + + // write + code = tsdbWriteFile(pWriter->pWriteH, pWriter->fDel.size, pWriter->aBuf[0], size); + if (code) goto _err; + + // update + pWriter->fDel.offset = pWriter->fDel.size; + pWriter->fDel.size += size; + + return code; + +_err: + tsdbError("vgId:%d, write del idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter) { + int32_t code = 0; + char hdr[TSDB_FHDR_SIZE] = {0}; + int64_t size = TSDB_FHDR_SIZE; + int64_t n; + + // build + tPutDelFile(hdr, &pWriter->fDel); + + // write + code = tsdbWriteFile(pWriter->pWriteH, 0, hdr, size); + if (code) goto _err; + + return code; + +_err: + tsdbError("vgId:%d, update del file hdr failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); + return code; +} +// SDelFReader ==================================================== +struct SDelFReader { + STsdb *pTsdb; + SDelFile fDel; + STsdbFD *pReadH; + uint8_t *aBuf[1]; +}; + +int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb) { + int32_t code = 0; + char fname[TSDB_FILENAME_LEN]; + SDelFReader *pDelFReader; + int64_t n; + + // alloc + pDelFReader = (SDelFReader *)taosMemoryCalloc(1, sizeof(*pDelFReader)); + if (pDelFReader == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + // open impl + pDelFReader->pTsdb = pTsdb; + pDelFReader->fDel = *pFile; + + tsdbDelFileName(pTsdb, pFile, fname); + code = tsdbOpenFile(fname, TSDB_DEFAULT_PAGE_SIZE, TD_FILE_READ, &pDelFReader->pReadH); + if (code) goto _err; + + *ppReader = pDelFReader; + return code; + +_err: + tsdbError("vgId:%d, del file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + *ppReader = NULL; + return code; +} + +int32_t tsdbDelFReaderClose(SDelFReader **ppReader) { + int32_t code = 0; + SDelFReader *pReader = *ppReader; + + if (pReader) { + tsdbCloseFile(&pReader->pReadH); + for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree(pReader->aBuf[iBuf]); + } + taosMemoryFree(pReader); + } + *ppReader = NULL; + +_exit: + return code; +} + +int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) { + int32_t code = 0; + int64_t offset = pDelIdx->offset; + int64_t size = pDelIdx->size; + int64_t n; + + taosArrayClear(aDelData); + + // alloc + code = tRealloc(&pReader->aBuf[0], size); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->pReadH, offset, pReader->aBuf[0], size); + if (code) goto _err; + + // // decode + n = 0; + while (n < size) { + SDelData delData; + n += tGetDelData(pReader->aBuf[0] + n, &delData); + + if (taosArrayPush(aDelData, &delData) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + ASSERT(n == size); + + return code; + +_err: + tsdbError("vgId:%d, read del data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx) { + int32_t code = 0; + int32_t n; + int64_t offset = pReader->fDel.offset; + int64_t size = pReader->fDel.size - offset; + + taosArrayClear(aDelIdx); + + // alloc + code = tRealloc(&pReader->aBuf[0], size); + if (code) goto _err; + + // read + code = tsdbReadFile(pReader->pReadH, offset, pReader->aBuf[0], size); + if (code) goto _err; + + // decode + n = 0; + while (n < size) { + SDelIdx delIdx; + + n += tGetDelIdx(pReader->aBuf[0] + n, &delIdx); + + if (taosArrayPush(aDelIdx, &delIdx) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + + ASSERT(n == size); + + return code; + +_err: + tsdbError("vgId:%d, read del idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index a30b9154ab..d10613e719 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -60,7 +60,7 @@ int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { if (expLevel < 0) { taosMemoryFree(pSet->pHeadF); taosMemoryFree(pSet->pDataF); - taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->aSttF[0]); taosMemoryFree(pSet->pSmaF); taosArrayRemove(fs.aDFileSet, iSet); iSet--; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index ab2b2b617a..9fc5639c5e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -27,13 +27,13 @@ struct STsdbSnapReader { int32_t fid; SDataFReader* pDataFReader; SArray* aBlockIdx; // SArray - SArray* aBlockL; // SArray + SArray* aSstBlk; // SArray SBlockIdx* pBlockIdx; - SBlockL* pBlockL; + SSttBlk* pSstBlk; int32_t iBlockIdx; int32_t iBlockL; - SMapData mBlock; // SMapData + SMapData mBlock; // SMapData int32_t iBlock; SBlockData oBlockData; SBlockData nBlockData; @@ -64,7 +64,7 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { code = tsdbReadBlockIdx(pReader->pDataFReader, pReader->aBlockIdx); if (code) goto _err; - code = tsdbReadBlockL(pReader->pDataFReader, pReader->aBlockL); + code = tsdbReadSttBlk(pReader->pDataFReader, 0, pReader->aSstBlk); if (code) goto _err; // init @@ -82,13 +82,13 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { pReader->iBlockL = 0; while (true) { - if (pReader->iBlockL >= taosArrayGetSize(pReader->aBlockL)) { - pReader->pBlockL = NULL; + if (pReader->iBlockL >= taosArrayGetSize(pReader->aSstBlk)) { + pReader->pSstBlk = NULL; break; } - pReader->pBlockL = (SBlockL*)taosArrayGet(pReader->aBlockL, pReader->iBlockL); - if (pReader->pBlockL->minVer <= pReader->ever && pReader->pBlockL->maxVer >= pReader->sver) { + pReader->pSstBlk = (SSttBlk*)taosArrayGet(pReader->aSstBlk, pReader->iBlockL); + if (pReader->pSstBlk->minVer <= pReader->ever && pReader->pSstBlk->maxVer >= pReader->sver) { // TODO break; } @@ -101,8 +101,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { } while (true) { - if (pReader->pBlockIdx && pReader->pBlockL) { - TABLEID id = {.suid = pReader->pBlockL->suid, .uid = pReader->pBlockL->minUid}; + if (pReader->pBlockIdx && pReader->pSstBlk) { + TABLEID id = {.suid = pReader->pSstBlk->suid, .uid = pReader->pSstBlk->minUid}; ASSERT(0); @@ -115,8 +115,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { // } } else if (pReader->pBlockIdx) { while (pReader->iBlock < pReader->mBlock.nItem) { - SBlock block; - tMapDataGetItemByIdx(&pReader->mBlock, pReader->iBlock, &block, tGetBlock); + SDataBlk block; + tMapDataGetItemByIdx(&pReader->mBlock, pReader->iBlock, &block, tGetDataBlk); if (block.minVer <= pReader->ever && block.maxVer >= pReader->sver) { // load data (todo) @@ -142,18 +142,18 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { } if (*ppData) goto _exit; - } else if (pReader->pBlockL) { - while (pReader->pBlockL) { - if (pReader->pBlockL->minVer <= pReader->ever && pReader->pBlockL->maxVer >= pReader->sver) { + } else if (pReader->pSstBlk) { + while (pReader->pSstBlk) { + if (pReader->pSstBlk->minVer <= pReader->ever && pReader->pSstBlk->maxVer >= pReader->sver) { // load data (todo) } // next pReader->iBlockL++; - if (pReader->iBlockL < taosArrayGetSize(pReader->aBlockL)) { - pReader->pBlockL = (SBlockL*)taosArrayGetSize(pReader->aBlockL); + if (pReader->iBlockL < taosArrayGetSize(pReader->aSstBlk)) { + pReader->pSstBlk = (SSttBlk*)taosArrayGetSize(pReader->aSstBlk); } else { - pReader->pBlockL = NULL; + pReader->pSstBlk = NULL; } if (*ppData) goto _exit; @@ -298,8 +298,8 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - pReader->aBlockL = taosArrayInit(0, sizeof(SBlockL)); - if (pReader->aBlockL == NULL) { + pReader->aSstBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pReader->aSstBlk == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -338,7 +338,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { if (pReader->pDataFReader) { tsdbDataFReaderClose(&pReader->pDataFReader); } - taosArrayDestroy(pReader->aBlockL); + taosArrayDestroy(pReader->aSstBlk); taosArrayDestroy(pReader->aBlockIdx); tMapDataClear(&pReader->mBlock); tBlockDataDestroy(&pReader->oBlockData, 1); @@ -426,24 +426,24 @@ struct STsdbSnapWriter { SArray* aBlockIdx; // SArray int32_t iBlockIdx; SBlockIdx* pBlockIdx; - SMapData mBlock; // SMapData + SMapData mBlock; // SMapData int32_t iBlock; SBlockData* pBlockData; int32_t iRow; SBlockData bDataR; - SArray* aBlockL; // SArray + SArray* aSstBlk; // SArray int32_t iBlockL; SBlockData lDataR; SDataFWriter* pDataFWriter; SBlockIdx* pBlockIdxW; // NULL when no committing table - SBlock blockW; + SDataBlk blockW; SBlockData bDataW; SBlockIdx blockIdxW; - SMapData mBlockW; // SMapData + SMapData mBlockW; // SMapData SArray* aBlockIdxW; // SArray - SArray* aBlockLW; // SArray + SArray* aBlockLW; // SArray // for del file SDelFReader* pDelFReader; @@ -475,10 +475,10 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { // &pWriter->blockW, pWriter->cmprAlg); if (code) goto _err; - code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutDataBlk); if (code) goto _err; - tBlockReset(&pWriter->blockW); + tDataBlkReset(&pWriter->blockW); tBlockDataClear(&pWriter->bDataW); } @@ -499,15 +499,15 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { // &pWriter->blockW, pWriter->cmprAlg); // if (code) goto _err; - code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutDataBlk); if (code) goto _err; } while (true) { if (pWriter->iBlock >= pWriter->mBlock.nItem) break; - SBlock block; - tMapDataGetItemByIdx(&pWriter->mBlock, pWriter->iBlock, &block, tGetBlock); + SDataBlk block; + tMapDataGetItemByIdx(&pWriter->mBlock, pWriter->iBlock, &block, tGetDataBlk); // if (block.last) { // code = tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, &pWriter->bDataR, NULL, NULL); @@ -520,13 +520,13 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { // if (code) goto _err; // } - code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutDataBlk); if (code) goto _err; pWriter->iBlock++; } - // SBlock + // SDataBlk // code = tsdbWriteBlock(pWriter->pDataFWriter, &pWriter->mBlockW, NULL, pWriter->pBlockIdxW); // if (code) goto _err; @@ -553,10 +553,10 @@ static int32_t tsdbSnapMoveWriteTableData(STsdbSnapWriter* pWriter, SBlockIdx* p if (code) goto _err; // SBlockData - SBlock block; + SDataBlk block; tMapDataReset(&pWriter->mBlockW); for (int32_t iBlock = 0; iBlock < pWriter->mBlock.nItem; iBlock++) { - tMapDataGetItemByIdx(&pWriter->mBlock, iBlock, &block, tGetBlock); + tMapDataGetItemByIdx(&pWriter->mBlock, iBlock, &block, tGetDataBlk); // if (block.last) { // code = tsdbReadBlockData(pWriter->pDataFReader, pBlockIdx, &block, &pWriter->bDataR, NULL, NULL); @@ -570,11 +570,11 @@ static int32_t tsdbSnapMoveWriteTableData(STsdbSnapWriter* pWriter, SBlockIdx* p // if (code) goto _err; // } - code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutDataBlk); if (code) goto _err; } - // SBlock + // SDataBlk SBlockIdx blockIdx = {.suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; code = tsdbWriteBlock(pWriter->pDataFWriter, &pWriter->mBlockW, &blockIdx); if (code) goto _err; @@ -642,10 +642,10 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { while (true) { if (pWriter->iBlock >= pWriter->mBlock.nItem) break; - SBlock block; - int32_t c; + SDataBlk block; + int32_t c; - tMapDataGetItemByIdx(&pWriter->mBlock, pWriter->iBlock, &block, tGetBlock); + tMapDataGetItemByIdx(&pWriter->mBlock, pWriter->iBlock, &block, tGetDataBlk); // if (block.last) { // pWriter->pBlockData = &pWriter->bDataR; @@ -668,14 +668,14 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { // &pWriter->blockW, pWriter->cmprAlg); // if (code) goto _err; - code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutDataBlk); if (code) goto _err; - tBlockReset(&pWriter->blockW); + tDataBlkReset(&pWriter->blockW); tBlockDataClear(&pWriter->bDataW); } - code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutDataBlk); if (code) goto _err; pWriter->iBlock++; @@ -719,10 +719,10 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { // &pWriter->blockW, pWriter->cmprAlg); // if (code) goto _err; - code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); + code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutDataBlk); if (code) goto _err; - tBlockReset(&pWriter->blockW); + tDataBlkReset(&pWriter->blockW); tBlockDataClear(&pWriter->bDataW); } @@ -803,7 +803,7 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, TABLEID id) { pWriter->pBlockIdxW->suid = id.suid; pWriter->pBlockIdxW->uid = id.uid; - tBlockReset(&pWriter->blockW); + tDataBlkReset(&pWriter->blockW); tBlockDataReset(&pWriter->bDataW); tMapDataReset(&pWriter->mBlockW); } @@ -845,7 +845,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { // write remain stuff if (taosArrayGetSize(pWriter->aBlockLW) > 0) { - code = tsdbWriteBlockL(pWriter->pDataFWriter, pWriter->aBlockIdxW); + code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aBlockIdxW); if (code) goto _err; } @@ -911,12 +911,12 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 code = tsdbReadBlockIdx(pWriter->pDataFReader, pWriter->aBlockIdx); if (code) goto _err; - code = tsdbReadBlockL(pWriter->pDataFReader, pWriter->aBlockL); + code = tsdbReadSttBlk(pWriter->pDataFReader, 0, pWriter->aSstBlk); if (code) goto _err; } else { ASSERT(pWriter->pDataFReader == NULL); taosArrayClear(pWriter->aBlockIdx); - taosArrayClear(pWriter->aBlockL); + taosArrayClear(pWriter->aSstBlk); } pWriter->iBlockIdx = 0; pWriter->pBlockIdx = NULL; @@ -931,23 +931,25 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 // write SHeadFile fHead; SDataFile fData; - SLastFile fLast; + SSttFile fLast; SSmaFile fSma; - SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .aSttF[0] = &fLast, .pSmaF = &fSma}; if (pSet) { wSet.diskId = pSet->diskId; wSet.fid = fid; + wSet.nSttF = 1; fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; fData = *pSet->pDataF; - fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fLast = (SSttFile){.commitID = pWriter->commitID, .size = 0}; fSma = *pSet->pSmaF; } else { wSet.diskId = (SDiskID){.level = 0, .id = 0}; wSet.fid = fid; + wSet.nSttF = 1; fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; fData = (SDataFile){.commitID = pWriter->commitID, .size = 0}; - fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0, .offset = 0}; + fLast = (SSttFile){.commitID = pWriter->commitID, .size = 0, .offset = 0}; fSma = (SSmaFile){.commitID = pWriter->commitID, .size = 0}; } @@ -1145,8 +1147,8 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr code = tBlockDataCreate(&pWriter->bDataR); if (code) goto _err; - pWriter->aBlockL = taosArrayInit(0, sizeof(SBlockL)); - if (pWriter->aBlockL == NULL) { + pWriter->aSstBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pWriter->aSstBlk == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -1159,7 +1161,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr code = tBlockDataCreate(&pWriter->bDataW); if (code) goto _err; - pWriter->aBlockLW = taosArrayInit(0, sizeof(SBlockL)); + pWriter->aBlockLW = taosArrayInit(0, sizeof(SSttBlk)); if (pWriter->aBlockLW == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index cfb04881e6..caeca45e01 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -51,6 +51,22 @@ _exit: return code; } +int32_t tMapDataCopy(SMapData *pFrom, SMapData *pTo) { + int32_t code = 0; + + pTo->nItem = pFrom->nItem; + pTo->nData = pFrom->nData; + code = tRealloc((uint8_t **)&pTo->aOffset, sizeof(int32_t) * pFrom->nItem); + if (code) goto _exit; + code = tRealloc(&pTo->pData, pFrom->nData); + if (code) goto _exit; + memcpy(pTo->aOffset, pFrom->aOffset, sizeof(int32_t) * pFrom->nItem); + memcpy(pTo->pData, pFrom->pData, pFrom->nData); + +_exit: + return code; +} + int32_t tMapDataSearch(SMapData *pMapData, void *pSearchItem, int32_t (*tGetItemFn)(uint8_t *, void *), int32_t (*tItemCmprFn)(const void *, const void *), void *pItem) { int32_t code = 0; @@ -198,7 +214,7 @@ int32_t tCmprBlockIdx(void const *lhs, void const *rhs) { int32_t tCmprBlockL(void const *lhs, void const *rhs) { SBlockIdx *lBlockIdx = (SBlockIdx *)lhs; - SBlockL *rBlockL = (SBlockL *)rhs; + SSttBlk *rBlockL = (SSttBlk *)rhs; if (lBlockIdx->suid < rBlockL->suid) { return -1; @@ -215,69 +231,69 @@ int32_t tCmprBlockL(void const *lhs, void const *rhs) { return 0; } -// SBlock ====================================================== -void tBlockReset(SBlock *pBlock) { - *pBlock = (SBlock){.minKey = TSDBKEY_MAX, .maxKey = TSDBKEY_MIN, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; +// SDataBlk ====================================================== +void tDataBlkReset(SDataBlk *pDataBlk) { + *pDataBlk = (SDataBlk){.minKey = TSDBKEY_MAX, .maxKey = TSDBKEY_MIN, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; } -int32_t tPutBlock(uint8_t *p, void *ph) { - int32_t n = 0; - SBlock *pBlock = (SBlock *)ph; +int32_t tPutDataBlk(uint8_t *p, void *ph) { + int32_t n = 0; + SDataBlk *pDataBlk = (SDataBlk *)ph; - n += tPutI64v(p ? p + n : p, pBlock->minKey.version); - n += tPutI64v(p ? p + n : p, pBlock->minKey.ts); - n += tPutI64v(p ? p + n : p, pBlock->maxKey.version); - n += tPutI64v(p ? p + n : p, pBlock->maxKey.ts); - n += tPutI64v(p ? p + n : p, pBlock->minVer); - n += tPutI64v(p ? p + n : p, pBlock->maxVer); - n += tPutI32v(p ? p + n : p, pBlock->nRow); - n += tPutI8(p ? p + n : p, pBlock->hasDup); - n += tPutI8(p ? p + n : p, pBlock->nSubBlock); - for (int8_t iSubBlock = 0; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - n += tPutI64v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].offset); - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szBlock); - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szKey); + n += tPutI64v(p ? p + n : p, pDataBlk->minKey.version); + n += tPutI64v(p ? p + n : p, pDataBlk->minKey.ts); + n += tPutI64v(p ? p + n : p, pDataBlk->maxKey.version); + n += tPutI64v(p ? p + n : p, pDataBlk->maxKey.ts); + n += tPutI64v(p ? p + n : p, pDataBlk->minVer); + n += tPutI64v(p ? p + n : p, pDataBlk->maxVer); + n += tPutI32v(p ? p + n : p, pDataBlk->nRow); + n += tPutI8(p ? p + n : p, pDataBlk->hasDup); + n += tPutI8(p ? p + n : p, pDataBlk->nSubBlock); + for (int8_t iSubBlock = 0; iSubBlock < pDataBlk->nSubBlock; iSubBlock++) { + n += tPutI64v(p ? p + n : p, pDataBlk->aSubBlock[iSubBlock].offset); + n += tPutI32v(p ? p + n : p, pDataBlk->aSubBlock[iSubBlock].szBlock); + n += tPutI32v(p ? p + n : p, pDataBlk->aSubBlock[iSubBlock].szKey); } - if (pBlock->nSubBlock == 1 && !pBlock->hasDup) { - n += tPutI64v(p ? p + n : p, pBlock->smaInfo.offset); - n += tPutI32v(p ? p + n : p, pBlock->smaInfo.size); + if (pDataBlk->nSubBlock == 1 && !pDataBlk->hasDup) { + n += tPutI64v(p ? p + n : p, pDataBlk->smaInfo.offset); + n += tPutI32v(p ? p + n : p, pDataBlk->smaInfo.size); } return n; } -int32_t tGetBlock(uint8_t *p, void *ph) { - int32_t n = 0; - SBlock *pBlock = (SBlock *)ph; +int32_t tGetDataBlk(uint8_t *p, void *ph) { + int32_t n = 0; + SDataBlk *pDataBlk = (SDataBlk *)ph; - n += tGetI64v(p + n, &pBlock->minKey.version); - n += tGetI64v(p + n, &pBlock->minKey.ts); - n += tGetI64v(p + n, &pBlock->maxKey.version); - n += tGetI64v(p + n, &pBlock->maxKey.ts); - n += tGetI64v(p + n, &pBlock->minVer); - n += tGetI64v(p + n, &pBlock->maxVer); - n += tGetI32v(p + n, &pBlock->nRow); - n += tGetI8(p + n, &pBlock->hasDup); - n += tGetI8(p + n, &pBlock->nSubBlock); - for (int8_t iSubBlock = 0; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - n += tGetI64v(p + n, &pBlock->aSubBlock[iSubBlock].offset); - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szBlock); - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szKey); + n += tGetI64v(p + n, &pDataBlk->minKey.version); + n += tGetI64v(p + n, &pDataBlk->minKey.ts); + n += tGetI64v(p + n, &pDataBlk->maxKey.version); + n += tGetI64v(p + n, &pDataBlk->maxKey.ts); + n += tGetI64v(p + n, &pDataBlk->minVer); + n += tGetI64v(p + n, &pDataBlk->maxVer); + n += tGetI32v(p + n, &pDataBlk->nRow); + n += tGetI8(p + n, &pDataBlk->hasDup); + n += tGetI8(p + n, &pDataBlk->nSubBlock); + for (int8_t iSubBlock = 0; iSubBlock < pDataBlk->nSubBlock; iSubBlock++) { + n += tGetI64v(p + n, &pDataBlk->aSubBlock[iSubBlock].offset); + n += tGetI32v(p + n, &pDataBlk->aSubBlock[iSubBlock].szBlock); + n += tGetI32v(p + n, &pDataBlk->aSubBlock[iSubBlock].szKey); } - if (pBlock->nSubBlock == 1 && !pBlock->hasDup) { - n += tGetI64v(p + n, &pBlock->smaInfo.offset); - n += tGetI32v(p + n, &pBlock->smaInfo.size); + if (pDataBlk->nSubBlock == 1 && !pDataBlk->hasDup) { + n += tGetI64v(p + n, &pDataBlk->smaInfo.offset); + n += tGetI32v(p + n, &pDataBlk->smaInfo.size); } else { - pBlock->smaInfo.offset = 0; - pBlock->smaInfo.size = 0; + pDataBlk->smaInfo.offset = 0; + pDataBlk->smaInfo.size = 0; } return n; } -int32_t tBlockCmprFn(const void *p1, const void *p2) { - SBlock *pBlock1 = (SBlock *)p1; - SBlock *pBlock2 = (SBlock *)p2; +int32_t tDataBlkCmprFn(const void *p1, const void *p2) { + SDataBlk *pBlock1 = (SDataBlk *)p1; + SDataBlk *pBlock2 = (SDataBlk *)p2; if (tsdbKeyCmprFn(&pBlock1->maxKey, &pBlock2->minKey) < 0) { return -1; @@ -288,48 +304,48 @@ int32_t tBlockCmprFn(const void *p1, const void *p2) { return 0; } -bool tBlockHasSma(SBlock *pBlock) { - if (pBlock->nSubBlock > 1) return false; - if (pBlock->hasDup) return false; +bool tDataBlkHasSma(SDataBlk *pDataBlk) { + if (pDataBlk->nSubBlock > 1) return false; + if (pDataBlk->hasDup) return false; - return pBlock->smaInfo.size > 0; + return pDataBlk->smaInfo.size > 0; } -// SBlockL ====================================================== -int32_t tPutBlockL(uint8_t *p, void *ph) { +// SSttBlk ====================================================== +int32_t tPutSttBlk(uint8_t *p, void *ph) { int32_t n = 0; - SBlockL *pBlockL = (SBlockL *)ph; + SSttBlk *pSttBlk = (SSttBlk *)ph; - n += tPutI64(p ? p + n : p, pBlockL->suid); - n += tPutI64(p ? p + n : p, pBlockL->minUid); - n += tPutI64(p ? p + n : p, pBlockL->maxUid); - n += tPutI64v(p ? p + n : p, pBlockL->minKey); - n += tPutI64v(p ? p + n : p, pBlockL->maxKey); - n += tPutI64v(p ? p + n : p, pBlockL->minVer); - n += tPutI64v(p ? p + n : p, pBlockL->maxVer); - n += tPutI32v(p ? p + n : p, pBlockL->nRow); - n += tPutI64v(p ? p + n : p, pBlockL->bInfo.offset); - n += tPutI32v(p ? p + n : p, pBlockL->bInfo.szBlock); - n += tPutI32v(p ? p + n : p, pBlockL->bInfo.szKey); + n += tPutI64(p ? p + n : p, pSttBlk->suid); + n += tPutI64(p ? p + n : p, pSttBlk->minUid); + n += tPutI64(p ? p + n : p, pSttBlk->maxUid); + n += tPutI64v(p ? p + n : p, pSttBlk->minKey); + n += tPutI64v(p ? p + n : p, pSttBlk->maxKey); + n += tPutI64v(p ? p + n : p, pSttBlk->minVer); + n += tPutI64v(p ? p + n : p, pSttBlk->maxVer); + n += tPutI32v(p ? p + n : p, pSttBlk->nRow); + n += tPutI64v(p ? p + n : p, pSttBlk->bInfo.offset); + n += tPutI32v(p ? p + n : p, pSttBlk->bInfo.szBlock); + n += tPutI32v(p ? p + n : p, pSttBlk->bInfo.szKey); return n; } -int32_t tGetBlockL(uint8_t *p, void *ph) { +int32_t tGetSttBlk(uint8_t *p, void *ph) { int32_t n = 0; - SBlockL *pBlockL = (SBlockL *)ph; + SSttBlk *pSttBlk = (SSttBlk *)ph; - n += tGetI64(p + n, &pBlockL->suid); - n += tGetI64(p + n, &pBlockL->minUid); - n += tGetI64(p + n, &pBlockL->maxUid); - n += tGetI64v(p + n, &pBlockL->minKey); - n += tGetI64v(p + n, &pBlockL->maxKey); - n += tGetI64v(p + n, &pBlockL->minVer); - n += tGetI64v(p + n, &pBlockL->maxVer); - n += tGetI32v(p + n, &pBlockL->nRow); - n += tGetI64v(p + n, &pBlockL->bInfo.offset); - n += tGetI32v(p + n, &pBlockL->bInfo.szBlock); - n += tGetI32v(p + n, &pBlockL->bInfo.szKey); + n += tGetI64(p + n, &pSttBlk->suid); + n += tGetI64(p + n, &pSttBlk->minUid); + n += tGetI64(p + n, &pSttBlk->maxUid); + n += tGetI64v(p + n, &pSttBlk->minKey); + n += tGetI64v(p + n, &pSttBlk->maxKey); + n += tGetI64v(p + n, &pSttBlk->minVer); + n += tGetI64v(p + n, &pSttBlk->maxVer); + n += tGetI32v(p + n, &pSttBlk->nRow); + n += tGetI64v(p + n, &pSttBlk->bInfo.offset); + n += tGetI32v(p + n, &pSttBlk->bInfo.szBlock); + n += tGetI32v(p + n, &pSttBlk->bInfo.szKey); return n; } @@ -1532,7 +1548,7 @@ int32_t tCmprBlockData(SBlockData *pBlockData, int8_t cmprAlg, uint8_t **ppOut, if (code) goto _exit; blockCol.offset = aBufN[0]; - aBufN[0] = aBufN[0] + blockCol.szBitmap + blockCol.szOffset + blockCol.szValue + sizeof(TSCKSUM); + aBufN[0] = aBufN[0] + blockCol.szBitmap + blockCol.szOffset + blockCol.szValue; } code = tRealloc(&aBuf[1], hdr.szBlkCol + tPutBlockCol(NULL, &blockCol)); @@ -1540,15 +1556,8 @@ int32_t tCmprBlockData(SBlockData *pBlockData, int8_t cmprAlg, uint8_t **ppOut, hdr.szBlkCol += tPutBlockCol(aBuf[1] + hdr.szBlkCol, &blockCol); } - aBufN[1] = 0; - if (hdr.szBlkCol > 0) { - aBufN[1] = hdr.szBlkCol + sizeof(TSCKSUM); - - code = tRealloc(&aBuf[1], aBufN[1]); - if (code) goto _exit; - - taosCalcChecksumAppend(0, aBuf[1], aBufN[1]); - } + // SBlockCol + aBufN[1] = hdr.szBlkCol; // uid + version + tskey aBufN[2] = 0; @@ -1569,16 +1578,11 @@ int32_t tCmprBlockData(SBlockData *pBlockData, int8_t cmprAlg, uint8_t **ppOut, if (code) goto _exit; aBufN[2] += hdr.szKey; - aBufN[2] += sizeof(TSCKSUM); - code = tRealloc(&aBuf[2], aBufN[2]); - if (code) goto _exit; - // hdr aBufN[3] = tPutDiskDataHdr(NULL, &hdr); code = tRealloc(&aBuf[3], aBufN[3]); if (code) goto _exit; tPutDiskDataHdr(aBuf[3], &hdr); - taosCalcChecksumAppend(taosCalcChecksum(0, aBuf[3], aBufN[3]), aBuf[2], aBufN[2]); // aggragate if (ppOut) { @@ -1603,17 +1607,13 @@ _exit: int32_t tDecmprBlockData(uint8_t *pIn, int32_t szIn, SBlockData *pBlockData, uint8_t *aBuf[]) { int32_t code = 0; - tBlockDataClear(pBlockData); + tBlockDataReset(pBlockData); int32_t n = 0; SDiskDataHdr hdr = {0}; // SDiskDataHdr n += tGetDiskDataHdr(pIn + n, &hdr); - if (!taosCheckChecksumWhole(pIn, n + hdr.szUid + hdr.szVer + hdr.szKey + sizeof(TSCKSUM))) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _exit; - } ASSERT(hdr.delimiter == TSDB_FILE_DLMT); pBlockData->suid = hdr.suid; @@ -1641,7 +1641,7 @@ int32_t tDecmprBlockData(uint8_t *pIn, int32_t szIn, SBlockData *pBlockData, uin code = tsdbDecmprData(pIn + n, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY, sizeof(TSKEY) * hdr.nRow, &aBuf[0]); if (code) goto _exit; - n = n + hdr.szKey + sizeof(TSCKSUM); + n += hdr.szKey; // loop to decode each column data if (hdr.szBlkCol == 0) goto _exit; @@ -1663,8 +1663,8 @@ int32_t tDecmprBlockData(uint8_t *pIn, int32_t szIn, SBlockData *pBlockData, uin if (code) goto _exit; } } else { - code = tsdbDecmprColData(pIn + n + hdr.szBlkCol + sizeof(TSCKSUM) + blockCol.offset, &blockCol, hdr.cmprAlg, - hdr.nRow, pColData, &aBuf[0]); + code = tsdbDecmprColData(pIn + n + hdr.szBlkCol + blockCol.offset, &blockCol, hdr.cmprAlg, hdr.nRow, pColData, + &aBuf[0]); if (code) goto _exit; } } @@ -2046,12 +2046,6 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol } size += pBlockCol->szValue; - // checksum - size += sizeof(TSCKSUM); - code = tRealloc(ppOut, nOut + size); - if (code) goto _exit; - taosCalcChecksumAppend(0, *ppOut + nOut, size); - _exit: return code; } @@ -2060,12 +2054,6 @@ int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, in uint8_t **ppBuf) { int32_t code = 0; - int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM); - if (!taosCheckChecksumWhole(pIn, size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _exit; - } - ASSERT(pColData->cid == pBlockCol->cid); ASSERT(pColData->type == pBlockCol->type); pColData->smaOn = pBlockCol->smaOn; @@ -2137,37 +2125,3 @@ int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, in _exit: return code; } - -int32_t tsdbReadAndCheck(TdFilePtr pFD, int64_t offset, uint8_t **ppOut, int32_t size, int8_t toCheck) { - int32_t code = 0; - - // alloc - code = tRealloc(ppOut, size); - if (code) goto _exit; - - // seek - int64_t n = taosLSeekFile(pFD, offset, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - - // read - n = taosReadFile(pFD, *ppOut, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _exit; - } - - // check - if (toCheck && !taosCheckChecksumWhole(*ppOut, size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _exit; - } - -_exit: - return code; -} diff --git a/source/dnode/vnode/src/tsdb/tsdbWrite.c b/source/dnode/vnode/src/tsdb/tsdbWrite.c index 383652531e..0a9fbf92a4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbWrite.c +++ b/source/dnode/vnode/src/tsdb/tsdbWrite.c @@ -39,7 +39,7 @@ int tsdbInsertData(STsdb *pTsdb, int64_t version, SSubmitReq *pMsg, SSubmitRsp * SSubmitBlkRsp r = {0}; tGetSubmitMsgNext(&msgIter, &pBlock); if (pBlock == NULL) break; - if (tsdbInsertTableData(pTsdb, version, &msgIter, pBlock, &r) < 0) { + if ((terrno = tsdbInsertTableData(pTsdb, version, &msgIter, pBlock, &r)) < 0) { return -1; } diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index 23ca3ddb8d..8cfe1d8adf 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -368,6 +368,7 @@ _exit: int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad) { pLoad->vgId = TD_VID(pVnode); pLoad->syncState = syncGetMyRole(pVnode->sync); + pLoad->cacheUsage = tsdbCacheGetUsage(pVnode); pLoad->numOfTables = metaGetTbNum(pVnode->pMeta); pLoad->numOfTimeSeries = metaGetTimeSeriesNum(pVnode->pMeta); pLoad->totalStorage = (int64_t)3 * 1073741824; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 5a81f91919..03ab5e8285 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -39,7 +39,7 @@ struct SVSnapReader { SStreamStateReader *pStreamStateReader; // rsma int8_t rsmaDone; - SRsmaSnapReader *pRsmaReader; + SRSmaSnapReader *pRsmaReader; }; int32_t vnodeSnapReaderOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapReader **ppReader) { @@ -241,7 +241,7 @@ struct SVSnapWriter { SStreamTaskWriter *pStreamTaskWriter; SStreamStateWriter *pStreamStateWriter; // rsma - SRsmaSnapWriter *pRsmaSnapWriter; + SRSmaSnapWriter *pRsmaSnapWriter; }; int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWriter **ppWriter) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 2724d4cbfd..51d83d8eed 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -534,7 +534,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t version, void *pR } tqUpdateTbUidList(pVnode->pTq, tbUids, true); - if (tdUpdateTbUidList(pVnode->pSma, pStore) < 0) { + if (tdUpdateTbUidList(pVnode->pSma, pStore, true) < 0) { goto _exit; } tdUidStoreFree(pStore); @@ -692,6 +692,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t version, void *pReq SEncoder encoder = {0}; int32_t ret; SArray *tbUids = NULL; + STbUidStore *pStore = NULL; pRsp->msgType = TDMT_VND_DROP_TABLE_RSP; pRsp->pCont = NULL; @@ -715,9 +716,10 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t version, void *pReq for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { SVDropTbReq *pDropTbReq = req.pReqs + iReq; SVDropTbRsp dropTbRsp = {0}; + tb_uid_t tbUid = 0; /* code */ - ret = metaDropTable(pVnode->pMeta, version, pDropTbReq, tbUids); + ret = metaDropTable(pVnode->pMeta, version, pDropTbReq, tbUids, &tbUid); if (ret < 0) { if (pDropTbReq->igNotExists && terrno == TSDB_CODE_VND_TABLE_NOT_EXIST) { dropTbRsp.code = TSDB_CODE_SUCCESS; @@ -726,15 +728,18 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t version, void *pReq } } else { dropTbRsp.code = TSDB_CODE_SUCCESS; + if (tbUid > 0) tdFetchTbUidList(pVnode->pSma, &pStore, pDropTbReq->suid, tbUid); } taosArrayPush(rsp.pArray, &dropTbRsp); } tqUpdateTbUidList(pVnode->pTq, tbUids, false); + tdUpdateTbUidList(pVnode->pSma, pStore, false); _exit: taosArrayDestroy(tbUids); + tdUidStoreFree(pStore); tDecoderClear(&decoder); tEncodeSize(tEncodeSVDropTbBatchRsp, &rsp, pRsp->contLen, ret); pRsp->pCont = rpcMallocCont(pRsp->contLen); diff --git a/source/libs/catalog/src/ctgDbg.c b/source/libs/catalog/src/ctgDbg.c index bd3402dc39..d215242307 100644 --- a/source/libs/catalog/src/ctgDbg.c +++ b/source/libs/catalog/src/ctgDbg.c @@ -367,18 +367,22 @@ void ctgdShowDBCache(SCatalog* pCtg, SHashObj *dbHash) { int32_t stbNum = dbCache->stbCache ? taosHashGetSize(dbCache->stbCache) : 0; int32_t vgVersion = CTG_DEFAULT_INVALID_VERSION; int32_t hashMethod = -1; + int16_t hashPrefix = 0; + int16_t hashSuffix = 0; int32_t vgNum = 0; if (dbCache->vgCache.vgInfo) { vgVersion = dbCache->vgCache.vgInfo->vgVersion; hashMethod = dbCache->vgCache.vgInfo->hashMethod; + hashPrefix = dbCache->vgCache.vgInfo->hashPrefix; + hashSuffix = dbCache->vgCache.vgInfo->hashSuffix; if (dbCache->vgCache.vgInfo->vgHash) { vgNum = taosHashGetSize(dbCache->vgCache.vgInfo->vgHash); } } - ctgDebug("[%d] db [%.*s][0x%"PRIx64"] %s: metaNum:%d, stbNum:%d, vgVersion:%d, hashMethod:%d, vgNum:%d", - i, (int32_t)len, dbFName, dbCache->dbId, dbCache->deleted?"deleted":"", metaNum, stbNum, vgVersion, hashMethod, vgNum); + ctgDebug("[%d] db [%.*s][0x%"PRIx64"] %s: metaNum:%d, stbNum:%d, vgVersion:%d, hashMethod:%d, prefix:%d, suffix:%d, vgNum:%d", + i, (int32_t)len, dbFName, dbCache->dbId, dbCache->deleted?"deleted":"", metaNum, stbNum, vgVersion, hashMethod, hashPrefix, hashSuffix, vgNum); pIter = taosHashIterate(dbHash, pIter); } diff --git a/source/libs/catalog/src/ctgUtil.c b/source/libs/catalog/src/ctgUtil.c index e28234ab76..296100ce6d 100644 --- a/source/libs/catalog/src/ctgUtil.c +++ b/source/libs/catalog/src/ctgUtil.c @@ -848,15 +848,11 @@ int32_t ctgGetVgInfoFromHashValue(SCatalog *pCtg, SDBVgInfo *dbInfo, const SName CTG_ERR_RET(TSDB_CODE_TSC_DB_NOT_SELECTED); } - tableNameHashFp fp = NULL; SVgroupInfo *vgInfo = NULL; - - CTG_ERR_RET(ctgGetHashFunction(dbInfo->hashMethod, &fp)); - char tbFullName[TSDB_TABLE_FNAME_LEN]; tNameExtractFullName(pTableName, tbFullName); - uint32_t hashValue = (*fp)(tbFullName, (uint32_t)strlen(tbFullName)); + uint32_t hashValue = taosGetTbHashVal(tbFullName, (uint32_t)strlen(tbFullName), dbInfo->hashMethod, dbInfo->hashPrefix, dbInfo->hashSuffix); void *pIter = taosHashIterate(dbInfo->vgHash, NULL); while (pIter) { @@ -919,11 +915,7 @@ int32_t ctgGetVgInfosFromHashValue(SCatalog *pCtg, SCtgTaskReq* tReq, SDBVgInfo CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); } - tableNameHashFp fp = NULL; SVgroupInfo *vgInfo = NULL; - - CTG_ERR_RET(ctgGetHashFunction(dbInfo->hashMethod, &fp)); - int32_t tbNum = taosArrayGetSize(pNames); if (1 == vgNum) { @@ -975,7 +967,7 @@ int32_t ctgGetVgInfosFromHashValue(SCatalog *pCtg, SCtgTaskReq* tReq, SDBVgInfo tbNameLen = offset + strlen(pName->tname); strcpy(tbFullName + offset, pName->tname); - uint32_t hashValue = (*fp)(tbFullName, (uint32_t)tbNameLen); + uint32_t hashValue = taosGetTbHashVal(tbFullName, (uint32_t)strlen(tbFullName), dbInfo->hashMethod, dbInfo->hashPrefix, dbInfo->hashSuffix); SVgroupInfo **p = taosArraySearch(pVgList, &hashValue, ctgHashValueComp, TD_EQ); diff --git a/source/libs/catalog/test/catalogTests.cpp b/source/libs/catalog/test/catalogTests.cpp index 0be85333dc..c01c269e64 100644 --- a/source/libs/catalog/test/catalogTests.cpp +++ b/source/libs/catalog/test/catalogTests.cpp @@ -218,6 +218,8 @@ void ctgTestBuildDBVgroup(SDBVgInfo **pdbVgroup) { ctgTestCurrentVgVersion = dbVgroup->vgVersion; dbVgroup->hashMethod = 0; + dbVgroup->hashPrefix = 0; + dbVgroup->hashSuffix = 0; dbVgroup->vgHash = taosHashInit(ctgTestVgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); vgNum = ctgTestGetVgNumFromVgVersion(dbVgroup->vgVersion); diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 9e7fcc2227..0722c2b306 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -87,9 +87,7 @@ struct SqlFunctionCtx; size_t getResultRowSize(struct SqlFunctionCtx* pCtx, int32_t numOfOutput); void initResultRowInfo(SResultRowInfo* pResultRowInfo); - -void initResultRow(SResultRow* pResultRow); -void closeResultRow(SResultRow* pResultRow); +void closeResultRow(SResultRow* pResultRow); struct SResultRowEntryInfo* getResultEntryInfo(const SResultRow* pRow, int32_t index, const int32_t* offset); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index c248015604..a9826e0018 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -591,6 +591,24 @@ typedef struct SMergeAlignedIntervalAggOperatorInfo { SNode* pCondition; } SMergeAlignedIntervalAggOperatorInfo; +typedef struct SStreamIntervalOperatorInfo { + // SOptrBasicInfo should be first, SAggSupporter should be second for stream encode + SOptrBasicInfo binfo; // basic info + SAggSupporter aggSup; // aggregate supporter + SExprSupp scalarSupp; // supporter for perform scalar function + SGroupResInfo groupResInfo; // multiple results build supporter + SInterval interval; // interval info + int32_t primaryTsIndex; // primary time stamp slot id from result of downstream operator. + STimeWindowAggSupp twAggSup; + bool invertible; + bool ignoreExpiredData; + SArray* pRecycledPages; + SArray* pDelWins; // SWinRes + int32_t delIndex; + SSDataBlock* pDelRes; + bool isFinal; +} SStreamIntervalOperatorInfo; + typedef struct SStreamFinalIntervalOperatorInfo { // SOptrBasicInfo should be first, SAggSupporter should be second for stream encode SOptrBasicInfo binfo; // basic info @@ -909,8 +927,7 @@ void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLi void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfoData* pTimeWindowData, int32_t offset, int32_t forwardStep, int32_t numOfTotal, int32_t numOfOutput); -int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, int32_t numOfOutput, SArray* pColList, - char** pNextStart); +int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, SArray* pColList, char** pNextStart); void updateLoadRemoteInfo(SLoadRemoteDataInfo* pInfo, int32_t numOfRows, int32_t dataLen, int64_t startTs, SOperatorInfo* pOperator); @@ -1004,6 +1021,8 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); +SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, + SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/dataDeleter.c b/source/libs/executor/src/dataDeleter.c index 06b7c13fa2..40198615ea 100644 --- a/source/libs/executor/src/dataDeleter.c +++ b/source/libs/executor/src/dataDeleter.c @@ -168,7 +168,9 @@ static void getDataLength(SDataSinkHandle* pHandle, int64_t* pLen, bool* pQueryE taosReadQitem(pDeleter->pDataBlocks, (void**)&pBuf); memcpy(&pDeleter->nextOutput, pBuf, sizeof(SDataDeleterBuf)); taosFreeQitem(pBuf); - *pLen = ((SDataCacheEntry*)(pDeleter->nextOutput.pData))->dataLen; + + SDataCacheEntry* pEntry = (SDataCacheEntry*)pDeleter->nextOutput.pData; + *pLen = pEntry->dataLen; *pQueryEnd = pDeleter->queryEnd; qDebug("got data len %" PRId64 ", row num %d in sink", *pLen, ((SDataCacheEntry*)(pDeleter->nextOutput.pData))->numOfRows); } diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index 20396046ba..1697ed63fb 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -93,6 +93,8 @@ static void toDataCacheEntry(SDataDispatchHandle* pHandle, const SInputData* pIn pBuf->useSize = sizeof(SDataCacheEntry); blockEncode(pInput->pData, pEntry->data, &pEntry->dataLen, numOfCols, pEntry->compressed); + ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data+8)); + ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data+8+4)); pBuf->useSize += pEntry->dataLen; @@ -170,7 +172,13 @@ static void getDataLength(SDataSinkHandle* pHandle, int64_t* pLen, bool* pQueryE taosReadQitem(pDispatcher->pDataBlocks, (void**)&pBuf); memcpy(&pDispatcher->nextOutput, pBuf, sizeof(SDataDispatchBuf)); taosFreeQitem(pBuf); - *pLen = ((SDataCacheEntry*)(pDispatcher->nextOutput.pData))->dataLen; + + SDataCacheEntry* pEntry = (SDataCacheEntry*)pDispatcher->nextOutput.pData; + *pLen = pEntry->dataLen; + + ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data+8)); + ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data+8+4)); + *pQueryEnd = pDispatcher->queryEnd; qDebug("got data len %" PRId64 ", row num %d in sink", *pLen, ((SDataCacheEntry*)(pDispatcher->nextOutput.pData))->numOfRows); } @@ -191,6 +199,9 @@ static int32_t getDataBlock(SDataSinkHandle* pHandle, SOutputData* pOutput) { pOutput->numOfCols = pEntry->numOfCols; pOutput->compressed = pEntry->compressed; + ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data+8)); + ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data+8+4)); + atomic_sub_fetch_64(&pDispatcher->cachedSize, pEntry->dataLen); atomic_sub_fetch_64(&gDataSinkStat.cachedSize, pEntry->dataLen); diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 715eb4780c..3965b7e5b2 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1302,7 +1302,6 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = -1; - pCond->schemaVersion = -1; // pCond->type = pTableScanNode->scanFlag; int32_t j = 0; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 037bf543b1..95415e1113 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -97,6 +97,8 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu } } +static FORCE_INLINE void streamInputBlockDataDestory(void* pBlock) { blockDataDestroy((SSDataBlock*)pBlock); } + void tdCleanupStreamInputDataBlock(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; if (!pTaskInfo || !pTaskInfo->pRoot || pTaskInfo->pRoot->numOfDownstream <= 0) { @@ -107,11 +109,7 @@ void tdCleanupStreamInputDataBlock(qTaskInfo_t tinfo) { if (pOptrInfo->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { SStreamScanInfo* pInfo = pOptrInfo->info; if (pInfo->blockType == STREAM_INPUT__DATA_BLOCK) { - for (int32_t i = 0; i < taosArrayGetSize(pInfo->pBlockLists); ++i) { - SSDataBlock* p = *(SSDataBlock**)taosArrayGet(pInfo->pBlockLists, i); - taosArrayDestroy(p->pDataBlock); - taosMemoryFreeClear(p); - } + taosArrayClearP(pInfo->pBlockLists, streamInputBlockDataDestory); } else { ASSERT(0); } @@ -733,7 +731,6 @@ int32_t initQueryTableDataCondForTmq(SQueryTableDataCond* pCond, SSnapContext* s pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = sContext->snapVersion; - pCond->schemaVersion = sContext->snapVersion; for (int32_t i = 0; i < pCond->numOfCols; ++i) { pCond->colList[i].type = mtInfo.schema->pSchema[i].type; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index e17b994f0e..205bcd58df 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1866,13 +1866,11 @@ void updateLoadRemoteInfo(SLoadRemoteDataInfo* pInfo, int32_t numOfRows, int32_t pOperator->resultInfo.totalRows += numOfRows; } -int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, int32_t numOfOutput, SArray* pColList, - char** pNextStart) { +int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, SArray* pColList, char** pNextStart) { if (pColList == NULL) { // data from other sources blockDataCleanup(pRes); *pNextStart = (char*)blockDecode(pRes, pData); } else { // extract data according to pColList - ASSERT(numOfOutput == taosArrayGetSize(pColList)); char* pStart = pData; int32_t numOfCols = htonl(*(int32_t*)pStart); @@ -1970,7 +1968,7 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn char* pStart = pRetrieveRsp->data; while (index++ < pRetrieveRsp->numOfBlocks) { SSDataBlock* pb = createOneDataBlock(pExchangeInfo->pDummyBlock, false); - code = extractDataBlockFromFetchRsp(pb, pStart, pRetrieveRsp->numOfCols, NULL, &pStart); + code = extractDataBlockFromFetchRsp(pb, pStart, NULL, &pStart); if (code != 0) { taosMemoryFreeClear(pDataInfo->pRsp); goto _error; @@ -2095,7 +2093,7 @@ static int32_t seqLoadRemoteData(SOperatorInfo* pOperator) { SRetrieveTableRsp* pRetrieveRsp = pDataInfo->pRsp; char* pStart = pRetrieveRsp->data; - int32_t code = extractDataBlockFromFetchRsp(NULL, pStart, pRetrieveRsp->numOfCols, NULL, &pStart); + int32_t code = extractDataBlockFromFetchRsp(NULL, pStart, NULL, &pStart); if (pRsp->completed == 1) { qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%d, rowsOfSource:%" PRIu64 @@ -3731,7 +3729,6 @@ static int32_t initTableblockDistQueryCond(uint64_t uid, SQueryTableDataCond* pC pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = -1; - pCond->schemaVersion = -1; return TSDB_CODE_SUCCESS; } @@ -3916,7 +3913,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo pOptr = createAggregateOperatorInfo(ops[0], pExprInfo, num, pResBlock, pAggNode->node.pConditions, pScalarExprInfo, numOfScalarExpr, pAggNode->mergeDataBlock, pTaskInfo); } - } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type || QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { + } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type) { SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &num); @@ -3941,6 +3938,8 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo pOptr = createIntervalOperatorInfo(ops[0], pExprInfo, num, pResBlock, &interval, tsSlotId, &as, pIntervalPhyNode, pTaskInfo, isStream); + } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { + pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) { SMergeAlignedIntervalPhysiNode* pIntervalPhyNode = (SMergeAlignedIntervalPhysiNode*)pPhyNode; pOptr = createMergeAlignedIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 60670b1a49..c099c1c24c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -695,6 +695,7 @@ static void destroyTableScanOperatorInfo(void* param) { cleanupQueryTableDataCond(&pTableScanInfo->cond); tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; if (pTableScanInfo->pColMatchInfo != NULL) { taosArrayDestroy(pTableScanInfo->pColMatchInfo); @@ -2648,7 +2649,7 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { } char* pStart = pRsp->data; - extractDataBlockFromFetchRsp(pInfo->pRes, pRsp->data, pOperator->exprSupp.numOfExprs, pInfo->scanCols, &pStart); + extractDataBlockFromFetchRsp(pInfo->pRes, pRsp->data, pInfo->scanCols, &pStart); updateLoadRemoteInfo(&pInfo->loadInfo, pRsp->numOfRows, pRsp->compLen, startTs, pOperator); // todo log the filter info diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 5a056754eb..d773f8a629 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -622,7 +622,8 @@ static void doInterpUnclosedTimeWindow(SOperatorInfo* pOperatorInfo, int32_t num if (pr->closed) { ASSERT(isResultRowInterpolated(pr, RESULT_ROW_START_INTERP) && isResultRowInterpolated(pr, RESULT_ROW_END_INTERP)); - tdListPopHead(pResultRowInfo->openWindow); + SListNode* pNode = tdListPopHead(pResultRowInfo->openWindow); + taosMemoryFree(pNode); continue; } @@ -651,7 +652,8 @@ static void doInterpUnclosedTimeWindow(SOperatorInfo* pOperatorInfo, int32_t num if (isResultRowInterpolated(pResult, RESULT_ROW_END_INTERP)) { closeResultRow(pr); - tdListPopHead(pResultRowInfo->openWindow); + SListNode* pNode = tdListPopHead(pResultRowInfo->openWindow); + taosMemoryFree(pNode); } else { // the remains are can not be closed yet. break; } @@ -937,7 +939,7 @@ bool isOverdue(TSKEY ts, STimeWindowAggSupp* pSup) { bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pSup) { return isOverdue(pWin->ekey, pSup); } static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, SSDataBlock* pBlock, - int32_t scanFlag, SHashObj* pUpdatedMap) { + int32_t scanFlag) { SIntervalAggOperatorInfo* pInfo = (SIntervalAggOperatorInfo*)pOperatorInfo->info; SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; @@ -953,21 +955,11 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul STimeWindow win = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, ts, &pInfo->interval, pInfo->inputOrder); - int32_t ret = TSDB_CODE_SUCCESS; - if ((!pInfo->ignoreExpiredData || !isCloseWindow(&win, &pInfo->twAggSup)) && - inSlidingWindow(&pInfo->interval, &win, &pBlock->info)) { - ret = setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pSup->pCtx, - numOfOutput, pSup->rowEntryInfoOffset, &pInfo->aggSup, pTaskInfo); - if (ret != TSDB_CODE_SUCCESS || pResult == NULL) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - if (pInfo->execModel == OPTR_EXEC_MODEL_STREAM && pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - saveWinResultRow(pResult, tableGroupId, pUpdatedMap); - setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); - } + int32_t ret = setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pSup->pCtx, + numOfOutput, pSup->rowEntryInfoOffset, &pInfo->aggSup, pTaskInfo); + if (ret != TSDB_CODE_SUCCESS || pResult == NULL) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } - TSKEY ekey = ascScan ? win.ekey : win.skey; int32_t forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->inputOrder); @@ -989,12 +981,9 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul doWindowBorderInterpolation(pInfo, pBlock, pResult, &win, startPos, forwardRows, pSup); } - if ((!pInfo->ignoreExpiredData || !isCloseWindow(&win, &pInfo->twAggSup)) && - inSlidingWindow(&pInfo->interval, &win, &pBlock->info)) { - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &win, true); - doApplyFunctions(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, pBlock->info.rows, - numOfOutput); - } + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &win, true); + doApplyFunctions(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, pBlock->info.rows, + numOfOutput); doCloseWindow(pResultRowInfo, pInfo, pResult); @@ -1005,13 +994,6 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul if (startPos < 0) { break; } - if (pInfo->ignoreExpiredData && isCloseWindow(&nextWin, &pInfo->twAggSup)) { - ekey = ascScan ? nextWin.ekey : nextWin.skey; - forwardRows = - getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->inputOrder); - continue; - } - // null data, failed to allocate more memory buffer int32_t code = setTimeWindowOutputBuf(pResultRowInfo, &nextWin, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset, &pInfo->aggSup, pTaskInfo); @@ -1019,11 +1001,6 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); } - if (pInfo->execModel == OPTR_EXEC_MODEL_STREAM && pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - saveWinResultRow(pResult, tableGroupId, pUpdatedMap); - setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); - } - ekey = ascScan ? nextWin.ekey : nextWin.skey; forwardRows = getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, pInfo->inputOrder); @@ -1128,7 +1105,7 @@ static int32_t doOpenIntervalAgg(SOperatorInfo* pOperator) { setInputDataBlock(pOperator, pSup->pCtx, pBlock, pInfo->inputOrder, scanFlag, true); blockDataUpdateTsWindow(pBlock, pInfo->primaryTsIndex); - hashIntervalAgg(pOperator, &pInfo->binfo.resultRowInfo, pBlock, scanFlag, NULL); + hashIntervalAgg(pOperator, &pInfo->binfo.resultRowInfo, pBlock, scanFlag); } initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, pInfo->resultTsOrder); @@ -1579,141 +1556,6 @@ static void doBuildDeleteResult(SArray* pWins, int32_t* index, SSDataBlock* pBlo } } -static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { - SIntervalAggOperatorInfo* pInfo = pOperator->info; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - - pInfo->inputOrder = TSDB_ORDER_ASC; - SExprSupp* pSup = &pOperator->exprSupp; - - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - if (pOperator->status == OP_RES_TO_RETURN) { - doBuildDeleteResult(pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "single interval"); - return pInfo->pDelRes; - } - - doBuildResultDatablock(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf); - if (pInfo->binfo.pRes->info.rows == 0 || !hasRemainResults(&pInfo->groupResInfo)) { - pOperator->status = OP_EXEC_DONE; - qDebug("===stream===single interval is done"); - freeAllPages(pInfo->pRecycledPages, pInfo->aggSup.pResultBuf); - } - printDataBlock(pInfo->binfo.pRes, "single interval"); - return pInfo->binfo.pRes->info.rows == 0 ? NULL : pInfo->binfo.pRes; - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - - SArray* pUpdated = taosArrayInit(4, POINTER_BYTES); // SResKeyPos - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - SHashObj* pUpdatedMap = taosHashInit(1024, hashFn, false, HASH_NO_LOCK); - - SStreamState* pState = pTaskInfo->streamInfo.pState; - - while (1) { - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); - if (pBlock == NULL) { - break; - } - // qInfo("===stream===%ld", pBlock->info.version); - printDataBlock(pBlock, "single interval recv"); - - if (pBlock->info.type == STREAM_CLEAR) { - doClearWindows(&pInfo->aggSup, &pOperator->exprSupp, &pInfo->interval, pOperator->exprSupp.numOfExprs, pBlock, - NULL); - qDebug("%s clear existed time window results for updates checked", GET_TASKID(pTaskInfo)); - continue; - } - if (pBlock->info.type == STREAM_DELETE_DATA) { - doDeleteSpecifyIntervalWindow(&pInfo->aggSup, pBlock, pInfo->pDelWins, &pInfo->interval, pUpdatedMap); - continue; - } else if (pBlock->info.type == STREAM_GET_ALL) { - getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pUpdatedMap); - continue; - } - - if (pBlock->info.type == STREAM_NORMAL && pBlock->info.version != 0) { - // set input version - pTaskInfo->version = pBlock->info.version; - } - - if (pInfo->scalarSupp.pExprInfo != NULL) { - SExprSupp* pExprSup = &pInfo->scalarSupp; - projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); - } - - // The timewindow that overlaps the timestamps of the input pBlock need to be recalculated and return to the - // caller. Note that all the time window are not close till now. - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pOperator, pSup->pCtx, pBlock, pInfo->inputOrder, MAIN_SCAN, true); - if (pInfo->invertible) { - setInverFunction(pSup->pCtx, pOperator->exprSupp.numOfExprs, pBlock->info.type); - } - - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); - hashIntervalAgg(pOperator, &pInfo->binfo.resultRowInfo, pBlock, MAIN_SCAN, pUpdatedMap); - } - -#if 0 - if (pState) { - printf(">>>>>>>> stream read backend\n"); - SWinKey key = { - .ts = 1, - .groupId = 2, - }; - char* val = NULL; - int32_t sz; - if (streamStateGet(pState, &key, (void**)&val, &sz) < 0) { - ASSERT(0); - } - printf("stream read %s %d\n", val, sz); - streamFreeVal(val); - - SStreamStateCur* pCur = streamStateGetCur(pState, &key); - ASSERT(pCur); - while (streamStateCurNext(pState, pCur) == 0) { - SWinKey key1; - const void* val1; - if (streamStateGetKVByCur(pCur, &key1, &val1, &sz) < 0) { - break; - } - printf("stream iter key groupId:%d ts:%d, value %s %d\n", key1.groupId, key1.ts, val1, sz); - } - streamStateFreeCur(pCur); - } -#endif - - pOperator->status = OP_RES_TO_RETURN; - closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, NULL, pUpdatedMap, - pInfo->pRecycledPages, pInfo->aggSup.pResultBuf); - - void* pIte = NULL; - while ((pIte = taosHashIterate(pUpdatedMap, pIte)) != NULL) { - taosArrayPush(pUpdated, pIte); - } - taosArraySort(pUpdated, resultrowComparAsc); - - finalizeUpdatedResult(pOperator->exprSupp.numOfExprs, pInfo->aggSup.pResultBuf, pUpdated, pSup->rowEntryInfoOffset); - initMultiResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - removeDeleteResults(pUpdatedMap, pInfo->pDelWins); - taosHashCleanup(pUpdatedMap); - doBuildDeleteResult(pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "single interval"); - return pInfo->pDelRes; - } - - doBuildResultDatablock(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf); - printDataBlock(pInfo->binfo.pRes, "single interval"); - return pInfo->binfo.pRes->info.rows == 0 ? NULL : pInfo->binfo.pRes; -} - static void destroyStateWindowOperatorInfo(void* param) { SStateWindowOperatorInfo* pInfo = (SStateWindowOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); @@ -1731,6 +1573,10 @@ void destroyIntervalOperatorInfo(void* param) { SIntervalAggOperatorInfo* pInfo = (SIntervalAggOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); cleanupAggSup(&pInfo->aggSup); + cleanupExprSupp(&pInfo->scalarSupp); + + tdListFree(pInfo->binfo.resultRowInfo.openWindow); + pInfo->pRecycledPages = taosArrayDestroy(pInfo->pRecycledPages); pInfo->pInterpCols = taosArrayDestroy(pInfo->pInterpCols); taosArrayDestroyEx(pInfo->pPrevValues, freeItem); @@ -1919,7 +1765,7 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pOperator->status = OP_NOT_OPENED; pOperator->info = pInfo; - pOperator->fpSet = createOperatorFpSet(doOpenIntervalAgg, doBuildIntervalResult, doStreamIntervalAgg, NULL, + pOperator->fpSet = createOperatorFpSet(doOpenIntervalAgg, doBuildIntervalResult, NULL, NULL, destroyIntervalOperatorInfo, aggEncodeResultRow, aggDecodeResultRow, NULL); if (nodeType(pPhyNode) == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL) { @@ -5621,3 +5467,311 @@ _error: pTaskInfo->code = code; return NULL; } + +static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResultRowInfo, + SSDataBlock* pBlock, int32_t scanFlag, SHashObj* pUpdatedMap) { + SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info; + + SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; + SExprSupp* pSup = &pOperatorInfo->exprSupp; + + int32_t startPos = 0; + int32_t numOfOutput = pSup->numOfExprs; + SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex); + TSKEY* tsCols = (TSKEY*)pColDataInfo->pData; + uint64_t tableGroupId = pBlock->info.groupId; + bool ascScan = true; + TSKEY ts = getStartTsKey(&pBlock->info.window, tsCols); + SResultRow* pResult = NULL; + + STimeWindow win = + getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, ts, &pInfo->interval, TSDB_ORDER_ASC); + int32_t ret = TSDB_CODE_SUCCESS; + if ((!pInfo->ignoreExpiredData || !isCloseWindow(&win, &pInfo->twAggSup)) && + inSlidingWindow(&pInfo->interval, &win, &pBlock->info)) { + ret = setTimeWindowOutputBuf(pResultRowInfo, &win, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, pSup->pCtx, + numOfOutput, pSup->rowEntryInfoOffset, &pInfo->aggSup, pTaskInfo); + if (ret != TSDB_CODE_SUCCESS || pResult == NULL) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + saveWinResultRow(pResult, tableGroupId, pUpdatedMap); + setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); + } + } + + TSKEY ekey = ascScan ? win.ekey : win.skey; + int32_t forwardRows = + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, TSDB_ORDER_ASC); + ASSERT(forwardRows > 0); + + if ((!pInfo->ignoreExpiredData || !isCloseWindow(&win, &pInfo->twAggSup)) && + inSlidingWindow(&pInfo->interval, &win, &pBlock->info)) { + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &win, true); + doApplyFunctions(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, pBlock->info.rows, + numOfOutput); + } + + STimeWindow nextWin = win; + while (1) { + int32_t prevEndPos = forwardRows - 1 + startPos; + startPos = getNextQualifiedWindow(&pInfo->interval, &nextWin, &pBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); + if (startPos < 0) { + break; + } + if (pInfo->ignoreExpiredData && isCloseWindow(&nextWin, &pInfo->twAggSup)) { + ekey = ascScan ? nextWin.ekey : nextWin.skey; + forwardRows = + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, TSDB_ORDER_ASC); + continue; + } + + // null data, failed to allocate more memory buffer + int32_t code = setTimeWindowOutputBuf(pResultRowInfo, &nextWin, (scanFlag == MAIN_SCAN), &pResult, tableGroupId, + pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset, &pInfo->aggSup, pTaskInfo); + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + saveWinResultRow(pResult, tableGroupId, pUpdatedMap); + setResultBufPageDirty(pInfo->aggSup.pResultBuf, &pResultRowInfo->cur); + } + + ekey = ascScan ? nextWin.ekey : nextWin.skey; + forwardRows = + getNumOfRowsInTimeWindow(&pBlock->info, tsCols, startPos, ekey, binarySearchForKey, NULL, TSDB_ORDER_ASC); + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); + doApplyFunctions(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, pBlock->info.rows, + numOfOutput); + } +} + +static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + int64_t maxTs = INT64_MIN; + SExprSupp* pSup = &pOperator->exprSupp; + + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + if (pOperator->status == OP_RES_TO_RETURN) { + doBuildDeleteResult(pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); + if (pInfo->pDelRes->info.rows > 0) { + printDataBlock(pInfo->pDelRes, "single interval"); + return pInfo->pDelRes; + } + + doBuildResultDatablock(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf); + if (pInfo->binfo.pRes->info.rows == 0 || !hasRemainResults(&pInfo->groupResInfo)) { + pOperator->status = OP_EXEC_DONE; + qDebug("===stream===single interval is done"); + freeAllPages(pInfo->pRecycledPages, pInfo->aggSup.pResultBuf); + } + printDataBlock(pInfo->binfo.pRes, "single interval"); + return pInfo->binfo.pRes->info.rows == 0 ? NULL : pInfo->binfo.pRes; + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + + SArray* pUpdated = taosArrayInit(4, POINTER_BYTES); // SResKeyPos + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + SHashObj* pUpdatedMap = taosHashInit(1024, hashFn, false, HASH_NO_LOCK); + + SStreamState* pState = pTaskInfo->streamInfo.pState; + + while (1) { + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + break; + } + printDataBlock(pBlock, "single interval recv"); + + if (pBlock->info.type == STREAM_CLEAR) { + doClearWindows(&pInfo->aggSup, &pOperator->exprSupp, &pInfo->interval, pOperator->exprSupp.numOfExprs, pBlock, + NULL); + qDebug("%s clear existed time window results for updates checked", GET_TASKID(pTaskInfo)); + continue; + } else if (pBlock->info.type == STREAM_DELETE_DATA) { + doDeleteSpecifyIntervalWindow(&pInfo->aggSup, pBlock, pInfo->pDelWins, &pInfo->interval, pUpdatedMap); + continue; + } else if (pBlock->info.type == STREAM_GET_ALL) { + getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pUpdatedMap); + continue; + } + + if (pBlock->info.type == STREAM_NORMAL && pBlock->info.version != 0) { + // set input version + pTaskInfo->version = pBlock->info.version; + } + + if (pInfo->scalarSupp.pExprInfo != NULL) { + SExprSupp* pExprSup = &pInfo->scalarSupp; + projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); + } + + // The timewindow that overlaps the timestamps of the input pBlock need to be recalculated and return to the + // caller. Note that all the time window are not close till now. + // the pDataBlock are always the same one, no need to call this again + setInputDataBlock(pOperator, pSup->pCtx, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + if (pInfo->invertible) { + setInverFunction(pSup->pCtx, pOperator->exprSupp.numOfExprs, pBlock->info.type); + } + + maxTs = TMAX(maxTs, pBlock->info.window.ekey); + doStreamIntervalAggImpl(pOperator, &pInfo->binfo.resultRowInfo, pBlock, MAIN_SCAN, pUpdatedMap); + } + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); + +#if 0 + if (pState) { + printf(">>>>>>>> stream read backend\n"); + SWinKey key = { + .ts = 1, + .groupId = 2, + }; + char* val = NULL; + int32_t sz; + if (streamStateGet(pState, &key, (void**)&val, &sz) < 0) { + ASSERT(0); + } + printf("stream read %s %d\n", val, sz); + streamFreeVal(val); + + SStreamStateCur* pCur = streamStateGetCur(pState, &key); + ASSERT(pCur); + while (streamStateCurNext(pState, pCur) == 0) { + SWinKey key1; + const void* val1; + if (streamStateGetKVByCur(pCur, &key1, &val1, &sz) < 0) { + break; + } + printf("stream iter key groupId:%d ts:%d, value %s %d\n", key1.groupId, key1.ts, val1, sz); + } + streamStateFreeCur(pCur); + } +#endif + + pOperator->status = OP_RES_TO_RETURN; + closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, NULL, pUpdatedMap, + pInfo->pRecycledPages, pInfo->aggSup.pResultBuf); + + void* pIte = NULL; + while ((pIte = taosHashIterate(pUpdatedMap, pIte)) != NULL) { + taosArrayPush(pUpdated, pIte); + } + taosArraySort(pUpdated, resultrowComparAsc); + + finalizeUpdatedResult(pOperator->exprSupp.numOfExprs, pInfo->aggSup.pResultBuf, pUpdated, pSup->rowEntryInfoOffset); + initMultiResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + removeDeleteResults(pUpdatedMap, pInfo->pDelWins); + taosHashCleanup(pUpdatedMap); + doBuildDeleteResult(pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); + if (pInfo->pDelRes->info.rows > 0) { + printDataBlock(pInfo->pDelRes, "single interval"); + return pInfo->pDelRes; + } + + doBuildResultDatablock(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf); + printDataBlock(pInfo->binfo.pRes, "single interval"); + return pInfo->binfo.pRes->info.rows == 0 ? NULL : pInfo->binfo.pRes; +} + +void destroyStreamIntervalOperatorInfo(void* param) { + SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; + cleanupBasicInfo(&pInfo->binfo); + cleanupAggSup(&pInfo->aggSup); + pInfo->pRecycledPages = taosArrayDestroy(pInfo->pRecycledPages); + + pInfo->pDelWins = taosArrayDestroy(pInfo->pDelWins); + pInfo->pDelRes = blockDataDestroy(pInfo->pDelRes); + + cleanupGroupResInfo(&pInfo->groupResInfo); + colDataDestroy(&pInfo->twAggSup.timeWindowData); + taosMemoryFreeClear(param); +} + +SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo) { + SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + goto _error; + } + SStreamIntervalPhysiNode* pIntervalPhyNode = (SStreamIntervalPhysiNode*)pPhyNode; + + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); + ASSERT(numOfCols > 0); + SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc); + SInterval interval = {.interval = pIntervalPhyNode->interval, + .sliding = pIntervalPhyNode->sliding, + .intervalUnit = pIntervalPhyNode->intervalUnit, + .slidingUnit = pIntervalPhyNode->slidingUnit, + .offset = pIntervalPhyNode->offset, + .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision, }; + STimeWindowAggSupp twAggSupp = {.waterMark = pIntervalPhyNode->window.watermark, + .calTrigger = pIntervalPhyNode->window.triggerType, + .maxTs = INT64_MIN, }; + ASSERT(twAggSupp.calTrigger != STREAM_TRIGGER_MAX_DELAY); + pOperator->pTaskInfo = pTaskInfo; + pInfo->interval = interval; + pInfo->twAggSup = twAggSupp; + pInfo->ignoreExpiredData = pIntervalPhyNode->window.igExpired; + pInfo->isFinal = false; + + if (pIntervalPhyNode->window.pExprs != NULL) { + int32_t numOfScalar = 0; + SExprInfo* pScalarExprInfo = createExprInfo(pIntervalPhyNode->window.pExprs, NULL, &numOfScalar); + int32_t code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + + pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId;; + initResultSizeInfo(&pOperator->resultInfo, 4096); + SExprSupp* pSup = &pOperator->exprSupp; + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + int32_t code = initAggInfo(pSup, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + initBasicInfo(&pInfo->binfo, pResBlock); + initStreamFunciton(pSup->pCtx, pSup->numOfExprs); + initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); + + pInfo->invertible = allInvertible(pSup->pCtx, numOfCols); + pInfo->invertible = false; // Todo(liuyao): Dependent TSDB API + pInfo->pRecycledPages = taosArrayInit(4, sizeof(int32_t)); + pInfo->pDelWins = taosArrayInit(4, sizeof(SWinKey)); + pInfo->delIndex = 0; + pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); + initResultRowInfo(&pInfo->binfo.resultRowInfo); + + pOperator->name = "StreamIntervalOperator"; + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL; + pOperator->blocking = true; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doStreamIntervalAgg, NULL, NULL, + destroyStreamIntervalOperatorInfo, aggEncodeResultRow, aggDecodeResultRow, NULL); + + initIntervalDownStream(downstream, pPhyNode->type, &pInfo->aggSup, &pInfo->interval, pInfo->twAggSup.waterMark); + code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return pOperator; + +_error: + destroyStreamIntervalOperatorInfo(pInfo); + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 168cd21c44..63fc9d9e1c 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -227,9 +227,9 @@ static int32_t sortComparInit(SMsortComparParam* cmpParam, SArray* pSources, int continue; } - SPageInfo* pPgInfo = *(SPageInfo**)taosArrayGet(pSource->pageIdList, pSource->pageIndex); + int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); - void* pPage = getBufPage(pHandle->pBuf, getPageId(pPgInfo)); + void* pPage = getBufPage(pHandle->pBuf, *pPgId); code = blockDataFromBuf(pSource->src.pBlock, pPage); if (code != TSDB_CODE_SUCCESS) { return code; @@ -302,9 +302,9 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource *pSource, SMultiwayMergeT pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); } else { - SPageInfo* pPgInfo = *(SPageInfo**)taosArrayGet(pSource->pageIdList, pSource->pageIndex); + int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); - void* pPage = getBufPage(pHandle->pBuf, getPageId(pPgInfo)); + void* pPage = getBufPage(pHandle->pBuf, *pPgId); int32_t code = blockDataFromBuf(pSource->src.pBlock, pPage); if (code != TSDB_CODE_SUCCESS) { return code; diff --git a/source/libs/monitor/src/monMsg.c b/source/libs/monitor/src/monMsg.c index 8fa7e88605..bbee8b1166 100644 --- a/source/libs/monitor/src/monMsg.c +++ b/source/libs/monitor/src/monMsg.c @@ -510,6 +510,7 @@ int32_t tSerializeSMonVloadInfo(void *buf, int32_t bufLen, SMonVloadInfo *pInfo) SVnodeLoad *pLoad = taosArrayGet(pInfo->pVloads, i); if (tEncodeI32(&encoder, pLoad->vgId) < 0) return -1; if (tEncodeI32(&encoder, pLoad->syncState) < 0) return -1; + if (tEncodeI64(&encoder, pLoad->cacheUsage) < 0) return -1; if (tEncodeI64(&encoder, pLoad->numOfTables) < 0) return -1; if (tEncodeI64(&encoder, pLoad->numOfTimeSeries) < 0) return -1; if (tEncodeI64(&encoder, pLoad->totalStorage) < 0) return -1; @@ -544,6 +545,7 @@ int32_t tDeserializeSMonVloadInfo(void *buf, int32_t bufLen, SMonVloadInfo *pInf SVnodeLoad load = {0}; if (tDecodeI32(&decoder, &load.vgId) < 0) return -1; if (tDecodeI32(&decoder, &load.syncState) < 0) return -1; + if (tDecodeI64(&decoder, &load.cacheUsage) < 0) return -1; if (tDecodeI64(&decoder, &load.numOfTables) < 0) return -1; if (tDecodeI64(&decoder, &load.numOfTimeSeries) < 0) return -1; if (tDecodeI64(&decoder, &load.totalStorage) < 0) return -1; @@ -594,7 +596,6 @@ int32_t tDeserializeSMonMloadInfo(void *buf, int32_t bufLen, SMonMloadInfo *pInf return 0; } - int32_t tSerializeSQnodeLoad(void *buf, int32_t bufLen, SQnodeLoad *pInfo) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -639,5 +640,3 @@ int32_t tDeserializeSQnodeLoad(void *buf, int32_t bufLen, SQnodeLoad *pInfo) { tDecoderClear(&decoder); return 0; } - - diff --git a/source/libs/qcom/src/querymsg.c b/source/libs/qcom/src/querymsg.c index e2d3ac1583..e54937114c 100644 --- a/source/libs/qcom/src/querymsg.c +++ b/source/libs/qcom/src/querymsg.c @@ -38,6 +38,8 @@ int32_t queryBuildUseDbOutput(SUseDbOutput *pOut, SUseDbRsp *usedbRsp) { pOut->dbVgroup->vgVersion = usedbRsp->vgVersion; pOut->dbVgroup->hashMethod = usedbRsp->hashMethod; + pOut->dbVgroup->hashPrefix = usedbRsp->hashPrefix; + pOut->dbVgroup->hashSuffix = usedbRsp->hashSuffix; qDebug("Got %d vgroup for db %s", usedbRsp->vgNum, usedbRsp->db); diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index a4d679b281..dea96fa3ac 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -287,10 +287,10 @@ void transCtxMerge(STransCtx* dst, STransCtx* src) { STransCtxVal* sVal = (STransCtxVal*)iter; key = taosHashGetKey(sVal, &klen); - STransCtxVal* dVal = taosHashGet(dst->args, key, klen); - if (dVal) { - dst->freeFunc(dVal->val); - } + // STransCtxVal* dVal = taosHashGet(dst->args, key, klen); + // if (dVal) { + // dst->freeFunc(dVal->val); + // } taosHashPut(dst->args, key, klen, sVal, sizeof(*sVal)); iter = taosHashIterate(src->args, iter); } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 3117152af6..044cdc86b4 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -618,6 +618,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_REMOVE_EXISTS, "Rsma remove exists" TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_FETCH_MSG_MSSED_UP, "Rsma fetch msg is messed up") TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_EMPTY_INFO, "Rsma info is empty") TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_SCHEMA, "Rsma invalid schema") +TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_REGEX_MATCH, "Rsma regex match") //index TAOS_DEFINE_ERROR(TSDB_CODE_INDEX_REBUILDING, "Index is rebuilding") diff --git a/source/util/src/trbtree.c b/source/util/src/trbtree.c index 0970485dad..65f1bac60a 100644 --- a/source/util/src/trbtree.c +++ b/source/util/src/trbtree.c @@ -13,179 +13,297 @@ * along with this program. If not, see . */ -#include "os.h" +#include "trbtree.h" -typedef int32_t (*tRBTreeCmprFn)(void *, void *); - -typedef struct SRBTree SRBTree; -typedef struct SRBTreeNode SRBTreeNode; -typedef struct SRBTreeIter SRBTreeIter; - -struct SRBTreeNode { - enum { RED, BLACK } color; - SRBTreeNode *parent; - SRBTreeNode *left; - SRBTreeNode *right; - uint8_t payload[]; -}; - -struct SRBTree { - tRBTreeCmprFn cmprFn; - SRBTreeNode *root; -}; - -struct SRBTreeIter { - SRBTree *pTree; -}; - -#define RBTREE_NODE_COLOR(N) ((N) ? (N)->color : BLACK) - -// APIs ================================================ -static void tRBTreeRotateLeft(SRBTree *pTree, SRBTreeNode *pNode) { - SRBTreeNode *right = pNode->right; - - pNode->right = right->left; - if (pNode->right) { - pNode->right->parent = pNode; +static void tRBTreeRotateLeft(SRBTree *pTree, SRBTreeNode *x) { + SRBTreeNode *y = x->right; + x->right = y->left; + if (y->left != pTree->NIL) { + y->left->parent = x; } - - right->parent = pNode->parent; - if (pNode->parent == NULL) { - pTree->root = right; - } else if (pNode == pNode->parent->left) { - pNode->parent->left = right; + y->parent = x->parent; + if (x->parent == pTree->NIL) { + pTree->root = y; + } else if (x == x->parent->left) { + x->parent->left = y; } else { - pNode->parent->right = right; + x->parent->right = y; } - - right->left = pNode; - pNode->parent = right; + y->left = x; + x->parent = y; } -static void tRBTreeRotateRight(SRBTree *pTree, SRBTreeNode *pNode) { - SRBTreeNode *left = pNode->left; - - pNode->left = left->right; - if (pNode->left) { - pNode->left->parent = pNode; +static void tRBTreeRotateRight(SRBTree *pTree, SRBTreeNode *x) { + SRBTreeNode *y = x->left; + x->left = y->right; + if (y->right != pTree->NIL) { + y->right->parent = x; } - - left->parent = pNode->parent; - if (pNode->parent == NULL) { - pTree->root = left; - } else if (pNode == pNode->parent->left) { - pNode->parent->left = left; + y->parent = x->parent; + if (x->parent == pTree->NIL) { + pTree->root = y; + } else if (x == x->parent->right) { + x->parent->right = y; } else { - pNode->parent->right = left; + x->parent->left = y; } - - left->right = pNode; - pNode->parent = left; + y->right = x; + x->parent = y; } -#define tRBTreeCreate(compare) \ - (SRBTree) { .cmprFn = (compare), .root = NULL } +static void tRBTreePutFix(SRBTree *pTree, SRBTreeNode *z) { + while (z->parent->color == RED) { + if (z->parent == z->parent->parent->left) { // z.parent is the left child -SRBTreeNode *tRBTreePut(SRBTree *pTree, SRBTreeNode *pNew) { - pNew->left = NULL; - pNew->right = NULL; - pNew->color = RED; + SRBTreeNode *y = z->parent->parent->right; // uncle of z - // insert - if (pTree->root == NULL) { - pNew->parent = NULL; - pTree->root = pNew; - } else { - SRBTreeNode *pNode = pTree->root; - while (true) { - ASSERT(pNode); - - int32_t c = pTree->cmprFn(pNew->payload, pNode->payload); - if (c < 0) { - if (pNode->left) { - pNode = pNode->left; - } else { - pNew->parent = pNode; - pNode->left = pNew; - break; - } - } else if (c > 0) { - if (pNode->right) { - pNode = pNode->right; - } else { - pNew->parent = pNode; - pNode->right = pNew; - break; + if (y->color == RED) { // case 1 + z->parent->color = BLACK; + y->color = BLACK; + z->parent->parent->color = RED; + z = z->parent->parent; + } else { // case2 or case3 + if (z == z->parent->right) { // case2 + z = z->parent; // marked z.parent as new z + tRBTreeRotateLeft(pTree, z); } + // case3 + z->parent->color = BLACK; // made parent black + z->parent->parent->color = RED; // made parent red + tRBTreeRotateRight(pTree, z->parent->parent); + } + } else { // z.parent is the right child + SRBTreeNode *y = z->parent->parent->left; // uncle of z + + if (y->color == RED) { + z->parent->color = BLACK; + y->color = BLACK; + z->parent->parent->color = RED; + z = z->parent->parent; } else { - return NULL; + if (z == z->parent->left) { + z = z->parent; // marked z.parent as new z + tRBTreeRotateRight(pTree, z); + } + z->parent->color = BLACK; // made parent black + z->parent->parent->color = RED; // made parent red + tRBTreeRotateLeft(pTree, z->parent->parent); } } } + pTree->root->color = BLACK; +} + +static void tRBTreeTransplant(SRBTree *pTree, SRBTreeNode *u, SRBTreeNode *v) { + if (u->parent == pTree->NIL) + pTree->root = v; + else if (u == u->parent->left) + u->parent->left = v; + else + u->parent->right = v; + v->parent = u->parent; +} + +static void tRBTreeDropFix(SRBTree *pTree, SRBTreeNode *x) { + while (x != pTree->root && x->color == BLACK) { + if (x == x->parent->left) { + SRBTreeNode *w = x->parent->right; + if (w->color == RED) { + w->color = BLACK; + x->parent->color = RED; + tRBTreeRotateLeft(pTree, x->parent); + w = x->parent->right; + } + if (w->left->color == BLACK && w->right->color == BLACK) { + w->color = RED; + x = x->parent; + } else { + if (w->right->color == BLACK) { + w->left->color = BLACK; + w->color = RED; + tRBTreeRotateRight(pTree, w); + w = x->parent->right; + } + w->color = x->parent->color; + x->parent->color = BLACK; + w->right->color = BLACK; + tRBTreeRotateLeft(pTree, x->parent); + x = pTree->root; + } + } else { + SRBTreeNode *w = x->parent->left; + if (w->color == RED) { + w->color = BLACK; + x->parent->color = RED; + tRBTreeRotateRight(pTree, x->parent); + w = x->parent->left; + } + if (w->right->color == BLACK && w->left->color == BLACK) { + w->color = RED; + x = x->parent; + } else { + if (w->left->color == BLACK) { + w->right->color = BLACK; + w->color = RED; + tRBTreeRotateLeft(pTree, w); + w = x->parent->left; + } + w->color = x->parent->color; + x->parent->color = BLACK; + w->left->color = BLACK; + tRBTreeRotateRight(pTree, x->parent); + x = pTree->root; + } + } + } + x->color = BLACK; +} + +static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) { + if (pNode->right != pTree->NIL) { + pNode = pNode->right; + while (pNode->left != pTree->NIL) { + pNode = pNode->left; + } + } else { + while (true) { + if (pNode->parent == pTree->NIL || pNode == pNode->parent->left) { + pNode = pNode->parent; + break; + } else { + pNode = pNode->parent; + } + } + } + + return pNode; +} + +static SRBTreeNode *tRBTreePredecessor(SRBTree *pTree, SRBTreeNode *pNode) { + if (pNode->left != pTree->NIL) { + pNode = pNode->left; + while (pNode->right != pTree->NIL) { + pNode = pNode->right; + } + } else { + while (true) { + if (pNode->parent == pTree->NIL || pNode == pNode->parent->right) { + pNode = pNode->parent; + break; + } else { + pNode = pNode->parent; + } + } + } + + return pNode; +} + +void tRBTreeCreate(SRBTree *pTree, tRBTreeCmprFn cmprFn) { + pTree->cmprFn = cmprFn; + pTree->n = 0; + pTree->NIL = &pTree->NILNODE; + pTree->NIL->color = BLACK; + pTree->NIL->parent = NULL; + pTree->NIL->left = NULL; + pTree->NIL->right = NULL; + pTree->root = pTree->NIL; + pTree->min = pTree->NIL; + pTree->max = pTree->NIL; +} + +SRBTreeNode *tRBTreePut(SRBTree *pTree, SRBTreeNode *z) { + SRBTreeNode *y = pTree->NIL; // variable for the parent of the added node + SRBTreeNode *temp = pTree->root; + + while (temp != pTree->NIL) { + y = temp; + + int32_t c = pTree->cmprFn(RBTREE_NODE_PAYLOAD(z), RBTREE_NODE_PAYLOAD(temp)); + if (c < 0) { + temp = temp->left; + } else if (c > 0) { + temp = temp->right; + } else { + return NULL; + } + } + z->parent = y; + + if (y == pTree->NIL) { + pTree->root = z; + } else if (pTree->cmprFn(RBTREE_NODE_PAYLOAD(z), RBTREE_NODE_PAYLOAD(y)) < 0) { + y->left = z; + } else { + y->right = z; + } + + z->color = RED; + z->left = pTree->NIL; + z->right = pTree->NIL; + + tRBTreePutFix(pTree, z); + + // update min/max node + if (pTree->min == pTree->NIL || pTree->cmprFn(RBTREE_NODE_PAYLOAD(pTree->min), RBTREE_NODE_PAYLOAD(z)) > 0) { + pTree->min = z; + } + if (pTree->max == pTree->NIL || pTree->cmprFn(RBTREE_NODE_PAYLOAD(pTree->max), RBTREE_NODE_PAYLOAD(z)) < 0) { + pTree->max = z; + } + pTree->n++; + return z; +} + +void tRBTreeDrop(SRBTree *pTree, SRBTreeNode *z) { + SRBTreeNode *y = z; + SRBTreeNode *x; + ECOLOR y_orignal_color = y->color; + + // update min/max node + if (pTree->min == z) { + pTree->min = tRBTreeSuccessor(pTree, pTree->min); + } + if (pTree->max == z) { + pTree->max = tRBTreePredecessor(pTree, pTree->max); + } + + // drop impl + if (z->left == pTree->NIL) { + x = z->right; + tRBTreeTransplant(pTree, z, z->right); + } else if (z->right == pTree->NIL) { + x = z->left; + tRBTreeTransplant(pTree, z, z->left); + } else { + y = tRBTreeSuccessor(pTree, z); + y_orignal_color = y->color; + x = y->right; + if (y->parent == z) { + x->parent = z; + } else { + tRBTreeTransplant(pTree, y, y->right); + y->right = z->right; + y->right->parent = y; + } + tRBTreeTransplant(pTree, z, y); + y->left = z->left; + y->left->parent = y; + y->color = z->color; + } // fix - SRBTreeNode *pNode = pNew; - while (pNode->parent && pNode->parent->color == RED) { - SRBTreeNode *p = pNode->parent; - SRBTreeNode *g = p->parent; - - if (p == g->left) { - SRBTreeNode *u = g->right; - - if (RBTREE_NODE_COLOR(u) == RED) { - p->color = BLACK; - u->color = BLACK; - g->color = RED; - pNode = g; - } else { - if (pNode == p->right) { - pNode = p; - tRBTreeRotateLeft(pTree, pNode); - } - pNode->parent->color = BLACK; - pNode->parent->parent->color = RED; - tRBTreeRotateRight(pTree, pNode->parent->parent); - } - } else { - SRBTreeNode *u = g->left; - - if (RBTREE_NODE_COLOR(u) == RED) { - p->color = BLACK; - u->color = BLACK; - g->color = RED; - } else { - if (pNode == p->left) { - pNode = p; - tRBTreeRotateRight(pTree, pNode); - } - pNode->parent->color = BLACK; - pNode->parent->parent->color = RED; - tRBTreeRotateLeft(pTree, pNode->parent->parent); - } - } + if (y_orignal_color == BLACK) { + tRBTreeDropFix(pTree, x); } - - pTree->root->color = BLACK; - return pNew; + pTree->n--; } -SRBTreeNode *tRBTreeDrop(SRBTree *pTree, void *pKey) { - SRBTreeNode *pNode = pTree->root; - - while (pNode) { - int32_t c = pTree->cmprFn(pKey, pNode->payload); - - if (c < 0) { - pNode = pNode->left; - } else if (c > 0) { - pNode = pNode->right; - } else { - break; - } - } +SRBTreeNode *tRBTreeDropByKey(SRBTree *pTree, void *pKey) { + SRBTreeNode *pNode = tRBTreeGet(pTree, pKey); if (pNode) { - // TODO + tRBTreeDrop(pTree, pNode); } return pNode; @@ -194,8 +312,8 @@ SRBTreeNode *tRBTreeDrop(SRBTree *pTree, void *pKey) { SRBTreeNode *tRBTreeGet(SRBTree *pTree, void *pKey) { SRBTreeNode *pNode = pTree->root; - while (pNode) { - int32_t c = pTree->cmprFn(pKey, pNode->payload); + while (pNode != pTree->NIL) { + int32_t c = pTree->cmprFn(pKey, RBTREE_NODE_PAYLOAD(pNode)); if (c < 0) { pNode = pNode->left; @@ -206,5 +324,23 @@ SRBTreeNode *tRBTreeGet(SRBTree *pTree, void *pKey) { } } - return pNode; + return (pNode == pTree->NIL) ? NULL : pNode; } + +// SRBTreeIter ================================================ +SRBTreeNode *tRBTreeIterNext(SRBTreeIter *pIter) { + SRBTreeNode *pNode = pIter->pNode; + + if (pIter->pNode != pIter->pTree->NIL) { + if (pIter->asc) { + // ascend + pIter->pNode = tRBTreeSuccessor(pIter->pTree, pIter->pNode); + } else { + // descend + pIter->pNode = tRBTreePredecessor(pIter->pTree, pIter->pNode); + } + } + +_exit: + return (pNode == pIter->pTree->NIL) ? NULL : pNode; +} \ No newline at end of file diff --git a/source/util/test/CMakeLists.txt b/source/util/test/CMakeLists.txt index d2a503e661..6e42ef7e75 100644 --- a/source/util/test/CMakeLists.txt +++ b/source/util/test/CMakeLists.txt @@ -75,4 +75,12 @@ target_link_libraries(taosbsearchTest os util gtest_main) add_test( NAME taosbsearchTest COMMAND taosbsearchTest +) + +# trbtreeTest +add_executable(rbtreeTest "trbtreeTest.cpp") +target_link_libraries(rbtreeTest os util gtest_main) +add_test( + NAME rbtreeTest + COMMAND rbtreeTest ) \ No newline at end of file diff --git a/source/util/test/trbtreeTest.cpp b/source/util/test/trbtreeTest.cpp new file mode 100644 index 0000000000..cabf315df0 --- /dev/null +++ b/source/util/test/trbtreeTest.cpp @@ -0,0 +1,40 @@ +#include + +#include +#include + +#include "trbtree.h" + +static int32_t tCmprInteger(const void *p1, const void *p2) { + if (*(int *)p1 < *(int *)p2) { + return -1; + } else if (*(int *)p1 > *(int *)p2) { + return 1; + } + return 0; +} + +TEST(trbtreeTest, rbtree_test1) { +#if 0 + SRBTree rt; + tRBTreeCreate(&rt, tCmprInteger); + int a[] = {1, 3, 4, 2, 7, 5, 8}; + + for (int i = 0; i < sizeof(a) / sizeof(a[0]); i++) { + SRBTreeNode *pNode = (SRBTreeNode *)taosMemoryMalloc(sizeof(*pNode) + sizeof(int)); + *(int *)pNode->payload = a[i]; + + tRBTreePut(&rt, pNode); + } + + SRBTreeIter rti = tRBTreeIterCreate(&rt, 1); + SRBTreeNode *pNode = tRBTreeIterNext(&rti); + int la = 0; + while (pNode) { + GTEST_ASSERT_GT(*(int *)pNode->payload, la); + la = *(int *)pNode->payload; + // printf("%d\n", la); + pNode = tRBTreeIterNext(&rti); + } +#endif +} \ No newline at end of file diff --git a/tests/script/tsim/parser/slimit_alter_tags.sim b/tests/script/tsim/parser/slimit_alter_tags.sim index 3827b14b45..b5afbfa56e 100644 --- a/tests/script/tsim/parser/slimit_alter_tags.sim +++ b/tests/script/tsim/parser/slimit_alter_tags.sim @@ -128,6 +128,7 @@ if $rows != 5 then return -1 endi if $data00 != $rowNum then + print expect $rowNum , actual: $data00 return -1 endi if $data10 != $rowNum then diff --git a/tests/script/tsim/stream/basic1.sim b/tests/script/tsim/stream/basic1.sim index d9777d5133..70ca1179b0 100644 --- a/tests/script/tsim/stream/basic1.sim +++ b/tests/script/tsim/stream/basic1.sim @@ -588,4 +588,38 @@ if $data00 != 5 then goto loop3 endi +#max,min selectivity +sql create database test3 vgroups 1; +sql use test3; +sql create stable st(ts timestamp, a int, b int , c int) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create stream stream_t3 trigger at_once into streamtST3 as select ts, min(a) c6, a, b, c, ta, tb, tc from ts1 interval(10s) ; + +sql insert into ts1 values(1648791211000,1,2,3); +sleep 50 +sql insert into ts1 values(1648791222001,2,2,3); +sleep 50 + +$loop_count = 0 +loop3: +sql select * from streamtST3; + +sleep 300 +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +# row 0 +if $data02 != 1 then + print =====data02=$data02 + goto loop3 +endi + +# row 1 +if $data12 != 2 then + print =====data12=$data12 + goto loop3 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/stream/distributeInterval0.sim b/tests/script/tsim/stream/distributeInterval0.sim index b6b427343e..9b2e940556 100644 --- a/tests/script/tsim/stream/distributeInterval0.sim +++ b/tests/script/tsim/stream/distributeInterval0.sim @@ -198,7 +198,7 @@ endi sql select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5, avg(d) from st interval(10s); -sql create database test1 vgroups 1; +sql create database test1 vgroups 4; sql use test1; sql create stable st(ts timestamp, a int, b int , c int) tags(ta int,tb int,tc int); sql create table ts1 using st tags(1,1,1); @@ -232,4 +232,43 @@ if $data11 != 2 then goto loop2 endi +#max,min selectivity +sql create database test3 vgroups 4; +sql use test3; +sql create stable st(ts timestamp, a int, b int , c int) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create table ts2 using st tags(2,2,2); +sql create stream stream_t3 trigger at_once into streamtST3 as select ts, min(a) c6, a, b, c, ta, tb, tc from st interval(10s) ; + +sql insert into ts1 values(1648791211000,1,2,3); +sleep 50 +sql insert into ts1 values(1648791222001,2,2,3); +sleep 50 +sql insert into ts2 values(1648791211000,1,2,3); +sleep 50 +sql insert into ts2 values(1648791222001,2,2,3); +sleep 50 + +$loop_count = 0 +loop3: +sql select * from streamtST3; + +sleep 300 +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +# row 0 +if $data02 != 1 then + print =====data02=$data02 + goto loop3 +endi + +# row 1 +if $data12 != 2 then + print =====data12=$data12 + goto loop3 +endi + system sh/stop_dnodes.sh diff --git a/tests/script/tsim/stream/partitionbyColumn0.sim b/tests/script/tsim/stream/partitionbyColumn0.sim index d91d4b7bf0..24fdb9c994 100644 --- a/tests/script/tsim/stream/partitionbyColumn0.sim +++ b/tests/script/tsim/stream/partitionbyColumn0.sim @@ -24,7 +24,7 @@ sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); $loop_count = 0 loop0: -sleep 100 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -48,7 +48,7 @@ sql insert into t1 values(1648791213000,1,2,3,1.0); $loop_count = 0 loop1: -sleep 100 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -71,7 +71,7 @@ sql insert into t1 values(1648791213000,2,2,3,1.0); $loop_count = 0 loop2: -sleep 100 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -97,7 +97,7 @@ sql insert into t1 values(1648791213002,1,2,3,1.0); $loop_count = 0 loop3: -sleep 100 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -134,7 +134,7 @@ sql insert into t1 values(1648791213001,1,2,3,1.0) (1648791223001,2,2,3,1.0) (16 $loop_count = 0 loop4: -sleep 100 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -208,7 +208,7 @@ sql insert into t1 values(1648791213001,1,2,3,2.0); $loop_count = 0 loop5: -sleep 100 +sleep 50 sql select * from streamt1 order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -229,7 +229,7 @@ sql insert into t1 values(1648791213001,1,1,6,2.0) (1648791223002,1,1,7,2.0); $loop_count = 0 loop6: -sleep 100 +sleep 50 sql select * from streamt1 order by c1, c4, c2, c3; $loop_count = $loop_count + 1 @@ -294,7 +294,7 @@ sql insert into t2 values(1648791213000,NULL,NULL,NULL,NULL); $loop_count = 0 loop7: -sleep 100 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 @@ -318,7 +318,7 @@ sql insert into t2 values(1648791213000,1,2,3,1.0); $loop_count = 0 loop8: -sleep 100 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 @@ -342,7 +342,7 @@ sql insert into t2 values(1648791213000,2,2,3,1.0); $loop_count = 0 loop9: -sleep 100 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 @@ -372,7 +372,7 @@ sql insert into t2 values(1648791213002,1,2,3,1.0); $loop_count = 0 loop10: -sleep 100 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 @@ -414,7 +414,7 @@ sql insert into t2 values(1648791213001,1,2,3,1.0) (1648791223001,2,2,3,1.0) (16 $loop_count = 0 loop11: -sleep 100 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 @@ -492,7 +492,7 @@ sql insert into t4 values(1648791213000,1,2,3,1.0); $loop_count = 0 loop13: -sleep 100 +sleep 50 sql select * from test.streamt4 order by c1, c2, c3; $loop_count = $loop_count + 1 @@ -534,7 +534,7 @@ sql insert into t1 values(1648791213000,1,2,3,1.0); $loop_count = 0 loop14: -sleep 100 +sleep 50 sql select * from test.streamt4 order by c1, c2, c3; $loop_count = $loop_count + 1 diff --git a/tests/script/tsim/stream/partitionbyColumn1.sim b/tests/script/tsim/stream/partitionbyColumn1.sim index 7f5c53ebe3..1742d52cf0 100644 --- a/tests/script/tsim/stream/partitionbyColumn1.sim +++ b/tests/script/tsim/stream/partitionbyColumn1.sim @@ -24,11 +24,11 @@ sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); $loop_count = 0 loop0: -sleep 300 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -45,12 +45,14 @@ endi sql insert into t1 values(1648791213000,1,2,3,1.0); +$loop_count = 0 + loop1: -sleep 300 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -66,12 +68,14 @@ endi sql insert into t1 values(1648791213000,2,2,3,1.0); +$loop_count = 0 + loop2: -sleep 300 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -90,12 +94,14 @@ sql insert into t1 values(1648791213001,2,2,3,1.0); sql insert into t1 values(1648791213002,2,2,3,1.0); sql insert into t1 values(1648791213002,1,2,3,1.0); +$loop_count = 0 + loop3: -sleep 300 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -125,12 +131,14 @@ sql insert into t1 values(1648791223002,3,2,3,1.0); sql insert into t1 values(1648791223003,3,2,3,1.0); sql insert into t1 values(1648791213001,1,2,3,1.0) (1648791223001,2,2,3,1.0) (1648791223003,1,2,3,1.0); +$loop_count = 0 + loop4: -sleep 300 +sleep 50 sql select * from streamt order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -199,11 +207,11 @@ sql insert into t1 values(1648791213001,1,2,3,2.0); $loop_count = 0 loop5: -sleep 300 +sleep 50 sql select * from streamt1 order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -217,12 +225,14 @@ sql insert into t1 values(1648791223001,1,2,5,2.0); sql insert into t1 values(1648791223002,1,2,5,2.0); sql insert into t1 values(1648791213001,1,1,6,2.0) (1648791223002,1,1,7,2.0); +$loop_count = 0 + loop6: -sleep 300 +sleep 50 sql select * from streamt1 order by c1, c4, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -282,11 +292,11 @@ sql insert into t2 values(1648791213000,NULL,NULL,NULL,NULL); $loop_count = 0 loop7: -sleep 300 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -303,12 +313,14 @@ endi sql insert into t1 values(1648791213000,1,2,3,1.0); sql insert into t2 values(1648791213000,1,2,3,1.0); +$loop_count = 0 + loop8: -sleep 300 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -324,12 +336,15 @@ endi sql insert into t1 values(1648791213000,2,2,3,1.0); sql insert into t2 values(1648791213000,2,2,3,1.0); + +$loop_count = 0 + loop9: -sleep 300 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -352,12 +367,14 @@ sql insert into t2 values(1648791213001,2,2,3,1.0); sql insert into t2 values(1648791213002,2,2,3,1.0); sql insert into t2 values(1648791213002,1,2,3,1.0); +$loop_count = 0 + loop10: -sleep 300 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -373,7 +390,7 @@ endi if $data11 != 2 thenloop4 print =====data11=$data11 - goto loop3 + goto loop10 endi if $data12 != 1 then @@ -392,12 +409,14 @@ sql insert into t2 values(1648791223002,3,2,3,1.0); sql insert into t2 values(1648791223003,3,2,3,1.0); sql insert into t2 values(1648791213001,1,2,3,1.0) (1648791223001,2,2,3,1.0) (1648791223003,1,2,3,1.0); +$loop_count = 0 + loop11: -sleep 300 +sleep 50 sql select * from test.streamt2 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi @@ -470,17 +489,17 @@ sql insert into t4 values(1648791213000,1,2,3,1.0); $loop_count = 0 loop13: -sleep 300 +sleep 50 sql select * from test.streamt4 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi if $rows != 2 then print =====rows=$rows - goto loop14 + goto loop13 endi if $data01 != 1 then @@ -495,12 +514,12 @@ endi if $data11 != 3 then print =====data11=$data11 - goto loop11 + goto loop13 endi if $data12 != 2 then print =====data12=$data12 - goto loop11 + goto loop13 endi sql insert into t4 values(1648791213000,2,2,3,1.0); @@ -509,12 +528,14 @@ sql insert into t1 values(1648791233000,2,2,3,1.0); sql insert into t1 values(1648791213000,1,2,3,1.0); +$loop_count = 0 + loop14: -sleep 300 +sleep 50 sql select * from test.streamt4 order by c1, c2, c3; $loop_count = $loop_count + 1 -if $loop_count == 10 then +if $loop_count == 20 then return -1 endi diff --git a/tests/script/tsim/stream/partitionbyColumn2.sim b/tests/script/tsim/stream/partitionbyColumn2.sim index 3d9acbcac5..75d01b17ec 100644 --- a/tests/script/tsim/stream/partitionbyColumn2.sim +++ b/tests/script/tsim/stream/partitionbyColumn2.sim @@ -40,6 +40,8 @@ endi sql insert into t1 values(1648791213000,1,1,3,1.0); +$loop_count = 0 + loop1: sleep 300 sql select * from streamt order by c1, c4, c2, c3; @@ -61,6 +63,8 @@ endi sql insert into t1 values(1648791213000,2,1,3,1.0); +$loop_count = 0 + loop2: sleep 300 sql select * from streamt order by c1, c4, c2, c3; @@ -85,6 +89,8 @@ sql insert into t1 values(1648791213001,2,1,3,1.0); sql insert into t1 values(1648791213002,2,1,3,1.0); sql insert into t1 values(1648791213002,1,1,3,1.0); +$loop_count = 0 + loop3: sleep 300 sql select * from streamt order by c1, c4, c2, c3; @@ -120,6 +126,8 @@ sql insert into t1 values(1648791223002,3,2,3,1.0); sql insert into t1 values(1648791223003,3,2,3,1.0); sql insert into t1 values(1648791213001,1,1,3,1.0) (1648791223001,2,2,3,1.0) (1648791223003,1,2,3,1.0); +$loop_count = 0 + loop4: sleep 300 sql select * from streamt order by c1, c4, c2, c3; @@ -212,6 +220,8 @@ sql insert into t1 values(1648791223001,1,2,2,5); sql insert into t1 values(1648791223002,1,2,2,6); sql insert into t1 values(1648791213001,1,1,1,7) (1648791223002,1,1,2,8); +$loop_count = 0 + loop6: sleep 300 sql select * from streamt1 order by c1, c4, c2, c3; diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 4305ceff56..11df13d451 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -156,8 +156,8 @@ python3 ./test.py -f 2-query/sin.py python3 ./test.py -f 2-query/sin.py -R python3 ./test.py -f 2-query/smaTest.py python3 ./test.py -f 2-query/smaTest.py -R -python3 ./test.py -f 2-query/sml.py -python3 ./test.py -f 2-query/sml.py -R +#python3 ./test.py -f 2-query/sml.py +#python3 ./test.py -f 2-query/sml.py -R python3 ./test.py -f 2-query/spread.py python3 ./test.py -f 2-query/spread.py -R python3 ./test.py -f 2-query/sqrt.py @@ -512,6 +512,6 @@ python3 ./test.py -f 2-query/count_partition.py -Q 3 python3 ./test.py -f 2-query/max_partition.py -Q 3 python3 ./test.py -f 2-query/last_row.py -Q 3 python3 ./test.py -f 2-query/tsbsQuery.py -Q 3 -python3 ./test.py -f 2-query/sml.py -Q 3 +#python3 ./test.py -f 2-query/sml.py -Q 3 python3 ./test.py -f 2-query/interp.py -Q 3 diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index 1fd1def263..ca3d464da7 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -92,7 +92,7 @@ int smlProcess_telnet_Test() { int smlProcess_json1_Test() { TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); - TAOS_RES *pRes = taos_query(taos, "create database if not exists sml_db schemaless 1"); + TAOS_RES *pRes = taos_query(taos, "create database if not exists sml_db"); taos_free_result(pRes); pRes = taos_query(taos, "use sml_db"); @@ -112,7 +112,7 @@ int smlProcess_json1_Test() { " }," " {" " \"metric\": \"sys.cpu.nice\"," - " \"timestamp\": 1346846400," + " \"timestamp\": 1662344042," " \"value\": 9," " \"tags\": {" " \"host\": \"web02\"," @@ -141,7 +141,7 @@ int smlProcess_json2_Test() { "{" " \"metric\": \"meter_current0\"," " \"timestamp\": {" - " \"value\" : 1346846400," + " \"value\" : 1662344042," " \"type\" : \"s\"" " }," " \"value\": {" @@ -181,7 +181,7 @@ int smlProcess_json3_Test() { "{" " \"metric\": \"meter_current1\"," " \"timestamp\": {" - " \"value\" : 1346846400," + " \"value\" : 1662344042," " \"type\" : \"s\"" " }," " \"value\": {" @@ -249,7 +249,7 @@ int smlProcess_json4_Test() { "{" " \"metric\": \"meter_current2\"," " \"timestamp\": {" - " \"value\" : 1346846500000," + " \"value\" : 1662344042000," " \"type\" : \"ms\"" " }," " \"value\": \"ni\","