From d5e9169769389eac3c736139158dffdc8554fab4 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 8 Apr 2024 10:31:49 +0800 Subject: [PATCH] cos/multi-write: dnode part including mnode/vnode/tsdb --- source/dnode/mgmt/CMakeLists.txt | 8 + source/dnode/mgmt/exe/dmMain.c | 37 +- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 2 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 40 +- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 5 +- source/dnode/mgmt/node_mgmt/CMakeLists.txt | 10 +- source/dnode/mnode/impl/inc/mndDef.h | 3 + source/dnode/mnode/impl/src/mndDb.c | 170 +++++- source/dnode/mnode/impl/src/mndMain.c | 18 +- source/dnode/mnode/impl/src/mndStb.c | 66 +- source/dnode/mnode/impl/src/mndVgroup.c | 13 +- source/dnode/vnode/inc/vnode.h | 19 +- source/dnode/vnode/src/inc/tsdb.h | 3 +- source/dnode/vnode/src/inc/vnodeInt.h | 25 +- source/dnode/vnode/src/tsdb/tsdbCommit2.c | 39 -- source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c | 11 +- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 15 +- source/dnode/vnode/src/tsdb/tsdbDataFileRW.h | 1 + source/dnode/vnode/src/tsdb/tsdbDef.h | 2 +- source/dnode/vnode/src/tsdb/tsdbFS2.c | 96 ++- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 6 +- source/dnode/vnode/src/tsdb/tsdbFSetRW.c | 1 + source/dnode/vnode/src/tsdb/tsdbFSetRW.h | 1 + source/dnode/vnode/src/tsdb/tsdbFile2.c | 145 ++++- source/dnode/vnode/src/tsdb/tsdbFile2.h | 4 +- source/dnode/vnode/src/tsdb/tsdbMerge.c | 7 +- .../dnode/vnode/src/tsdb/tsdbReaderWriter.c | 219 +++++-- source/dnode/vnode/src/tsdb/tsdbRetention.c | 569 ++++++++++++++---- source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 6 +- source/dnode/vnode/src/tsdb/tsdbUpgrade.c | 6 +- source/dnode/vnode/src/tsdb/tsdbUtil.c | 4 +- source/dnode/vnode/src/vnd/vnodeCfg.c | 24 +- source/dnode/vnode/src/vnd/vnodeOpen.c | 39 +- source/dnode/vnode/src/vnd/vnodeRetention.c | 10 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 82 ++- 35 files changed, 1255 insertions(+), 451 deletions(-) diff --git a/source/dnode/mgmt/CMakeLists.txt b/source/dnode/mgmt/CMakeLists.txt index 762b8fd529..e6be56cfb7 100644 --- a/source/dnode/mgmt/CMakeLists.txt +++ b/source/dnode/mgmt/CMakeLists.txt @@ -14,6 +14,14 @@ target_include_directories( PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/node_mgmt/inc" ) +IF (TD_ENTERPRISE) + IF(${BUILD_WITH_S3}) + add_definitions(-DUSE_S3) + ELSEIF(${BUILD_WITH_COS}) + add_definitions(-DUSE_COS) + ENDIF() +ENDIF() + IF (TD_LINUX_64 AND JEMALLOC_ENABLED) ADD_DEFINITIONS(-DTD_JEMALLOC_ENABLED -I${CMAKE_BINARY_DIR}/build/include -L${CMAKE_BINARY_DIR}/build/lib -Wl,-rpath,${CMAKE_BINARY_DIR}/build/lib -ljemalloc) SET(LINK_JEMALLOC "-L${CMAKE_BINARY_DIR}/build/lib -ljemalloc") diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index f9456f1729..4f8858f3c2 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -27,15 +27,15 @@ #include "cus_name.h" #else #ifndef CUS_NAME - #define CUS_NAME "TDengine" +#define CUS_NAME "TDengine" #endif #ifndef CUS_PROMPT - #define CUS_PROMPT "taos" +#define CUS_PROMPT "taos" #endif #ifndef CUS_EMAIL - #define CUS_EMAIL "" +#define CUS_EMAIL "" #endif #endif // clang-format off @@ -58,6 +58,7 @@ static struct { bool dumpSdb; bool generateGrant; bool memDbg; + bool checkS3; bool printAuth; bool printVersion; bool printHelp; @@ -169,7 +170,7 @@ static int32_t dmParseArgs(int32_t argc, char const *argv[]) { return -1; } } else if (strcmp(argv[i], "-a") == 0) { - if(i < argc - 1) { + if (i < argc - 1) { if (strlen(argv[++i]) >= PATH_MAX) { printf("apollo url overflow"); return -1; @@ -182,7 +183,7 @@ static int32_t dmParseArgs(int32_t argc, char const *argv[]) { } else if (strcmp(argv[i], "-s") == 0) { global.dumpSdb = true; } else if (strcmp(argv[i], "-E") == 0) { - if(i < argc - 1) { + if (i < argc - 1) { if (strlen(argv[++i]) >= PATH_MAX) { printf("env file path overflow"); return -1; @@ -207,6 +208,8 @@ static int32_t dmParseArgs(int32_t argc, char const *argv[]) { cmdEnvIndex++; } else if (strcmp(argv[i], "-dm") == 0) { global.memDbg = true; + } else if (strcmp(argv[i], "--checks3") == 0) { + global.checkS3 = true; } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "--usage") == 0 || strcmp(argv[i], "-?") == 0) { global.printHelp = true; @@ -267,8 +270,21 @@ static void dmDumpCfg() { cfgDumpCfg(pCfg, 0, true); } +static int32_t dmCheckS3() { + int32_t code = 0; + SConfig *pCfg = taosGetCfg(); + cfgDumpCfgS3(pCfg, 0, true); +#if defined(USE_S3) + extern int32_t s3CheckCfg(); + + code = s3CheckCfg(); +#endif + return code; +} + static int32_t dmInitLog() { - return taosCreateLog(CUS_PROMPT"dlog", 1, configDir, global.envCmd, global.envFile, global.apolloUrl, global.pArgs, 0); + return taosCreateLog(CUS_PROMPT "dlog", 1, configDir, global.envCmd, global.envFile, global.apolloUrl, global.pArgs, + 0); } static void taosCleanupArgs() { @@ -355,6 +371,15 @@ int mainWindows(int argc, char **argv) { return -1; } + if (global.checkS3) { + int32_t code = dmCheckS3(); + taosCleanupCfg(); + taosCloseLog(); + taosCleanupArgs(); + taosConvDestroy(); + return code; + } + if (global.dumpConfig) { dmDumpCfg(); taosCleanupCfg(); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index a3a5f1d971..3d3b7df3be 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -141,6 +141,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_ALTER_DB, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_COMPACT_DB, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_TRIM_DB, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_S3MIGRATE_DB, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_GET_DB_CFG, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_VGROUP_LIST, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_REDISTRIBUTE_VGROUP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; @@ -208,6 +209,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_TTL_TABLE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TRIM_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_S3MIGRATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_SMA_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_SMA_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_SUBSCRIBE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index bfac0bab9d..3b64dfcfd1 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -53,7 +53,7 @@ void vmGetVnodeLoadsLite(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo) { SVnodeObj **ppVnode = pIter; if (ppVnode == NULL || *ppVnode == NULL) continue; - SVnodeObj *pVnode = *ppVnode; + SVnodeObj *pVnode = *ppVnode; if (!pVnode->failed) { SVnodeLoadLite vload = {0}; if (vnodeGetLoadLite(pVnode->pImpl, &vload) == 0) { @@ -157,6 +157,10 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { pCfg->hashSuffix = pCreate->hashSuffix; pCfg->tsdbPageSize = pCreate->tsdbPageSize * 1024; + pCfg->s3ChunkSize = pCreate->s3ChunkSize; + pCfg->s3KeepLocal = pCreate->s3KeepLocal; + pCfg->s3Compact = pCreate->s3Compact; + pCfg->standby = 0; pCfg->syncCfg.replicaNum = 0; pCfg->syncCfg.totalReplicaNum = 0; @@ -236,17 +240,18 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dInfo( "vgId:%d, vnode management handle msgType:%s, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d " "szBuf:%" PRIu64 ", cacheLast:%d cacheLastSize:%d sstTrigger:%d tsdbPageSize:%d %d dbname:%s dbId:%" PRId64 - ", days:%d keep0:%d keep1:%d keep2:%d keepTimeOffset%d tsma:%d precision:%d compression:%d minRows:%d maxRows:%d" + ", days:%d keep0:%d keep1:%d keep2:%d keepTimeOffset%d s3ChunkSize:%d s3KeepLocal:%d s3Compact:%d tsma:%d " + "precision:%d compression:%d minRows:%d maxRows:%d" ", wal fsync:%d level:%d retentionPeriod:%d retentionSize:%" PRId64 " rollPeriod:%d segSize:%" PRId64 ", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d " "learnerReplica:%d learnerSelfIndex:%d strict:%d changeVersion:%d", req.vgId, TMSG_INFO(pMsg->msgType), req.pages, req.pageSize, req.buffer, req.pageSize * 1024, (uint64_t)req.buffer * 1024 * 1024, req.cacheLast, req.cacheLastSize, req.sstTrigger, req.tsdbPageSize, req.tsdbPageSize * 1024, req.db, req.dbUid, req.daysPerFile, req.daysToKeep0, req.daysToKeep1, req.daysToKeep2, - req.keepTimeOffset, req.isTsma, req.precision, req.compression, req.minRows, req.maxRows, req.walFsyncPeriod, - req.walLevel, req.walRetentionPeriod, req.walRetentionSize, req.walRollPeriod, req.walSegmentSize, req.hashMethod, - req.hashBegin, req.hashEnd, req.hashPrefix, req.hashSuffix, req.replica, req.selfIndex, req.learnerReplica, - req.learnerSelfIndex, req.strict, req.changeVersion); + req.keepTimeOffset, req.s3ChunkSize, req.s3KeepLocal, req.s3Compact, req.isTsma, req.precision, req.compression, + req.minRows, req.maxRows, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, req.walRetentionSize, + req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix, req.hashSuffix, + req.replica, req.selfIndex, req.learnerReplica, req.learnerSelfIndex, req.strict, req.changeVersion); for (int32_t i = 0; i < req.replica; ++i) { dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", req.vgId, i, req.replicas[i].fqdn, req.replicas[i].port, @@ -345,7 +350,7 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { _OVER: if (code != 0) { vnodeClose(pImpl); - vnodeDestroy(0, path, pMgmt->pTfs); + vnodeDestroy(0, path, pMgmt->pTfs, 0); } else { dInfo("vgId:%d, vnode management handle msgType:%s, end to create vnode, vnode is created", req.vgId, TMSG_INFO(pMsg->msgType)); @@ -356,7 +361,7 @@ _OVER: return code; } -//alter replica doesn't use this, but restore dnode still use this +// alter replica doesn't use this, but restore dnode still use this int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { SAlterVnodeTypeReq req = {0}; if (tDeserializeSAlterVnodeReplicaReq(pMsg->pCont, pMsg->contLen, &req) != 0) { @@ -398,8 +403,8 @@ int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pMgmt->name); int32_t vgId = req.vgId; - dInfo("vgId:%d, start to alter vnode type replica:%d selfIndex:%d strict:%d changeVersion:%d", - vgId, req.replica, req.selfIndex, req.strict, req.changeVersion); + dInfo("vgId:%d, start to alter vnode type replica:%d selfIndex:%d strict:%d changeVersion:%d", vgId, req.replica, + req.selfIndex, req.strict, req.changeVersion); for (int32_t i = 0; i < req.replica; ++i) { SReplica *pReplica = &req.replicas[i]; dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id); @@ -482,12 +487,12 @@ int32_t vmProcessCheckLearnCatchupReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return -1; } - if(req.learnerReplicas == 0){ + if (req.learnerReplicas == 0) { req.learnerSelfIndex = -1; } - dInfo("vgId:%d, vnode management handle msgType:%s, start to process check-learner-catchup-request", - req.vgId, TMSG_INFO(pMsg->msgType)); + dInfo("vgId:%d, vnode management handle msgType:%s, start to process check-learner-catchup-request", req.vgId, + TMSG_INFO(pMsg->msgType)); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); if (pVnode == NULL) { @@ -499,7 +504,7 @@ int32_t vmProcessCheckLearnCatchupReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ESyncRole role = vnodeGetRole(pVnode->pImpl); dInfo("vgId:%d, checking node role:%d", req.vgId, role); - if(role == TAOS_SYNC_ROLE_VOTER){ + if (role == TAOS_SYNC_ROLE_VOTER) { dError("vgId:%d, failed to alter vnode type since node already is role:%d", req.vgId, role); terrno = TSDB_CODE_VND_ALREADY_IS_VOTER; vmReleaseVnode(pMgmt, pVnode); @@ -507,7 +512,7 @@ int32_t vmProcessCheckLearnCatchupReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } dInfo("vgId:%d, checking node catch up", req.vgId); - if(vnodeIsCatchUp(pVnode->pImpl) != 1){ + if (vnodeIsCatchUp(pVnode->pImpl) != 1) { terrno = TSDB_CODE_VND_NOT_CATCH_UP; vmReleaseVnode(pMgmt, pVnode); return -1; @@ -517,8 +522,8 @@ int32_t vmProcessCheckLearnCatchupReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { vmReleaseVnode(pMgmt, pVnode); - dInfo("vgId:%d, vnode management handle msgType:%s, end to process check-learner-catchup-request", - req.vgId, TMSG_INFO(pMsg->msgType)); + dInfo("vgId:%d, vnode management handle msgType:%s, end to process check-learner-catchup-request", req.vgId, + TMSG_INFO(pMsg->msgType)); return 0; } @@ -848,6 +853,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_HASHRANGE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMPACT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TRIM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_S3MIGRATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 3dfc2bd96f..296372a955 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -15,9 +15,9 @@ #define _DEFAULT_SOURCE #include "vmInt.h" +#include "libs/function/tudf.h" #include "tfs.h" #include "vnd.h" -#include "libs/function/tudf.h" int32_t vmGetPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { int32_t diskId = -1; @@ -234,6 +234,7 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal) dInfo("vgId:%d, commit data finished", pVnode->vgId); } + int32_t nodeId = vnodeNodeId(pVnode->pImpl); vnodeClose(pVnode->pImpl); pVnode->pImpl = NULL; @@ -250,7 +251,7 @@ _closed: if (pVnode->dropped) { dInfo("vgId:%d, vnode is destroyed, dropped:%d", pVnode->vgId, pVnode->dropped); snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, pVnode->vgId); - vnodeDestroy(pVnode->vgId, path, pMgmt->pTfs); + vnodeDestroy(pVnode->vgId, path, pMgmt->pTfs, nodeId); } vmFreeVnodeObj(&pVnode); diff --git a/source/dnode/mgmt/node_mgmt/CMakeLists.txt b/source/dnode/mgmt/node_mgmt/CMakeLists.txt index 0cdc68345a..82b9384d66 100644 --- a/source/dnode/mgmt/node_mgmt/CMakeLists.txt +++ b/source/dnode/mgmt/node_mgmt/CMakeLists.txt @@ -4,15 +4,13 @@ target_link_libraries( dnode mgmt_mnode mgmt_qnode mgmt_snode mgmt_vnode mgmt_dnode monitorfw ) -IF (TD_STORAGE) - +IF (TD_ENTERPRISE) IF(${BUILD_WITH_S3}) - add_definitions(-DUSE_S3) + add_definitions(-DUSE_S3) ELSEIF(${BUILD_WITH_COS}) - add_definitions(-DUSE_COS) + add_definitions(-DUSE_COS) ENDIF() - -ENDIF () +ENDIF() IF (DEFINED GRANT_CFG_INCLUDE_DIR) add_definitions(-DGRANTS_CFG) diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 4a092057ce..a26dbe890b 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -343,6 +343,9 @@ typedef struct { int32_t walRollPeriod; int64_t walRetentionSize; int64_t walSegmentSize; + int32_t s3ChunkSize; + int32_t s3KeepLocal; + int8_t s3Compact; } SDbCfg; typedef struct { diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 37c2d19bd4..8430b5c6b9 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "mndDb.h" +#include "audit.h" #include "mndCluster.h" #include "mndDnode.h" #include "mndIndex.h" @@ -30,12 +31,11 @@ #include "mndVgroup.h" #include "mndView.h" #include "systable.h" -#include "tjson.h" #include "thttp.h" -#include "audit.h" +#include "tjson.h" #define DB_VER_NUMBER 1 -#define DB_RESERVE_SIZE 42 +#define DB_RESERVE_SIZE 33 static SSdbRow *mndDbActionDecode(SSdbRaw *pRaw); static int32_t mndDbActionInsert(SSdb *pSdb, SDbObj *pDb); @@ -43,14 +43,15 @@ static int32_t mndDbActionDelete(SSdb *pSdb, SDbObj *pDb); static int32_t mndDbActionUpdate(SSdb *pSdb, SDbObj *pOld, SDbObj *pNew); static int32_t mndNewDbActionValidate(SMnode *pMnode, STrans *pTrans, SSdbRaw *pRaw); -static int32_t mndProcessCreateDbReq(SRpcMsg *pReq); -static int32_t mndProcessAlterDbReq(SRpcMsg *pReq); -static int32_t mndProcessDropDbReq(SRpcMsg *pReq); -static int32_t mndProcessUseDbReq(SRpcMsg *pReq); -static int32_t mndProcessTrimDbReq(SRpcMsg *pReq); -static int32_t mndRetrieveDbs(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity); -static void mndCancelGetNextDb(SMnode *pMnode, void *pIter); -static int32_t mndProcessGetDbCfgReq(SRpcMsg *pReq); +static int32_t mndProcessCreateDbReq(SRpcMsg *pReq); +static int32_t mndProcessAlterDbReq(SRpcMsg *pReq); +static int32_t mndProcessDropDbReq(SRpcMsg *pReq); +static int32_t mndProcessUseDbReq(SRpcMsg *pReq); +static int32_t mndProcessTrimDbReq(SRpcMsg *pReq); +static int32_t mndProcessS3MigrateDbReq(SRpcMsg *pReq); +static int32_t mndRetrieveDbs(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity); +static void mndCancelGetNextDb(SMnode *pMnode, void *pIter); +static int32_t mndProcessGetDbCfgReq(SRpcMsg *pReq); #ifndef TD_ENTERPRISE int32_t mndProcessCompactDbReq(SRpcMsg *pReq) { return TSDB_CODE_OPS_NOT_SUPPORT; } @@ -75,6 +76,7 @@ int32_t mndInitDb(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_COMPACT_DB, mndProcessCompactDbReq); mndSetMsgHandle(pMnode, TDMT_MND_TRIM_DB, mndProcessTrimDbReq); mndSetMsgHandle(pMnode, TDMT_MND_GET_DB_CFG, mndProcessGetDbCfgReq); + mndSetMsgHandle(pMnode, TDMT_MND_S3MIGRATE_DB, mndProcessS3MigrateDbReq); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_DB, mndRetrieveDbs); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_DB, mndCancelGetNextDb); @@ -139,6 +141,9 @@ SSdbRaw *mndDbActionEncode(SDbObj *pDb) { SDB_SET_INT32(pRaw, dataPos, pDb->cfg.tsdbPageSize, _OVER) SDB_SET_INT64(pRaw, dataPos, pDb->compactStartTime, _OVER) SDB_SET_INT32(pRaw, dataPos, pDb->cfg.keepTimeOffset, _OVER) + SDB_SET_INT32(pRaw, dataPos, pDb->cfg.s3ChunkSize, _OVER) + SDB_SET_INT32(pRaw, dataPos, pDb->cfg.s3KeepLocal, _OVER) + SDB_SET_INT8(pRaw, dataPos, pDb->cfg.s3Compact, _OVER) SDB_SET_RESERVE(pRaw, dataPos, DB_RESERVE_SIZE, _OVER) SDB_SET_DATALEN(pRaw, dataPos, _OVER) @@ -230,6 +235,9 @@ static SSdbRow *mndDbActionDecode(SSdbRaw *pRaw) { SDB_GET_INT32(pRaw, dataPos, &pDb->cfg.tsdbPageSize, _OVER) SDB_GET_INT64(pRaw, dataPos, &pDb->compactStartTime, _OVER) SDB_GET_INT32(pRaw, dataPos, &pDb->cfg.keepTimeOffset, _OVER) + SDB_GET_INT32(pRaw, dataPos, &pDb->cfg.s3ChunkSize, _OVER) + SDB_GET_INT32(pRaw, dataPos, &pDb->cfg.s3KeepLocal, _OVER) + SDB_GET_INT8(pRaw, dataPos, &pDb->cfg.s3Compact, _OVER) SDB_GET_RESERVE(pRaw, dataPos, DB_RESERVE_SIZE, _OVER) taosInitRWLatch(&pDb->lock); @@ -319,6 +327,9 @@ static int32_t mndDbActionUpdate(SSdb *pSdb, SDbObj *pOld, SDbObj *pNew) { pOld->cfg.minRows = pNew->cfg.minRows; pOld->cfg.maxRows = pNew->cfg.maxRows; pOld->cfg.tsdbPageSize = pNew->cfg.tsdbPageSize; + pOld->cfg.s3ChunkSize = pNew->cfg.s3ChunkSize; + pOld->cfg.s3KeepLocal = pNew->cfg.s3KeepLocal; + pOld->cfg.s3Compact = pNew->cfg.s3Compact; pOld->compactStartTime = pNew->compactStartTime; taosWUnLockLatch(&pOld->lock); return 0; @@ -414,6 +425,10 @@ static int32_t mndCheckDbCfg(SMnode *pMnode, SDbCfg *pCfg) { if (pCfg->tsdbPageSize < TSDB_MIN_TSDB_PAGESIZE || pCfg->tsdbPageSize > TSDB_MAX_TSDB_PAGESIZE) return -1; if (taosArrayGetSize(pCfg->pRetensions) != pCfg->numOfRetensions) return -1; + if (pCfg->s3ChunkSize < TSDB_MIN_S3_CHUNK_SIZE || pCfg->s3ChunkSize > TSDB_MAX_S3_CHUNK_SIZE) return -1; + if (pCfg->s3KeepLocal < TSDB_MIN_S3_KEEP_LOCAL || pCfg->s3KeepLocal > TSDB_MAX_S3_KEEP_LOCAL) return -1; + if (pCfg->s3Compact < TSDB_MIN_S3_COMPACT || pCfg->s3Compact > TSDB_MAX_S3_COMPACT) return -1; + terrno = 0; return terrno; } @@ -448,6 +463,9 @@ static int32_t mndCheckInChangeDbCfg(SMnode *pMnode, SDbCfg *pCfg) { terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; return -1; } + if (pCfg->s3ChunkSize < TSDB_MIN_S3_CHUNK_SIZE || pCfg->s3ChunkSize > TSDB_MAX_S3_CHUNK_SIZE) return -1; + if (pCfg->s3KeepLocal < TSDB_MIN_S3_KEEP_LOCAL || pCfg->s3KeepLocal > TSDB_MAX_S3_KEEP_LOCAL) return -1; + if (pCfg->s3Compact < TSDB_MIN_S3_COMPACT || pCfg->s3Compact > TSDB_MAX_S3_COMPACT) return -1; terrno = 0; return terrno; @@ -484,6 +502,9 @@ static void mndSetDefaultDbCfg(SDbCfg *pCfg) { if (pCfg->walSegmentSize < 0) pCfg->walSegmentSize = TSDB_DEFAULT_DB_WAL_SEGMENT_SIZE; if (pCfg->sstTrigger <= 0) pCfg->sstTrigger = TSDB_DEFAULT_SST_TRIGGER; if (pCfg->tsdbPageSize <= 0) pCfg->tsdbPageSize = TSDB_DEFAULT_TSDB_PAGESIZE; + if (pCfg->s3ChunkSize < 0) pCfg->s3ChunkSize = TSDB_DEFAULT_S3_CHUNK_SIZE; + if (pCfg->s3KeepLocal <= 0) pCfg->s3KeepLocal = TSDB_DEFAULT_S3_KEEP_LOCAL; + if (pCfg->s3Compact <= 0) pCfg->s3Compact = TSDB_DEFAULT_S3_COMPACT; } static int32_t mndSetCreateDbPrepareAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { @@ -628,6 +649,9 @@ static int32_t mndCreateDb(SMnode *pMnode, SRpcMsg *pReq, SCreateDbReq *pCreate, .sstTrigger = pCreate->sstTrigger, .hashPrefix = pCreate->hashPrefix, .hashSuffix = pCreate->hashSuffix, + .s3ChunkSize = pCreate->s3ChunkSize, + .s3KeepLocal = pCreate->s3KeepLocal, + .s3Compact = pCreate->s3Compact, .tsdbPageSize = pCreate->tsdbPageSize, }; @@ -698,17 +722,17 @@ _OVER: return code; } -static void mndBuildAuditDetailInt32(char* detail, char* tmp, char* format, int32_t para){ - if(para > 0){ - if(strlen(detail) > 0) strcat(detail, ", "); +static void mndBuildAuditDetailInt32(char *detail, char *tmp, char *format, int32_t para) { + if (para > 0) { + if (strlen(detail) > 0) strcat(detail, ", "); sprintf(tmp, format, para); strcat(detail, tmp); } } -static void mndBuildAuditDetailInt64(char* detail, char* tmp, char* format, int64_t para){ - if(para > 0){ - if(strlen(detail) > 0) strcat(detail, ", "); +static void mndBuildAuditDetailInt64(char *detail, char *tmp, char *format, int64_t para) { + if (para > 0) { + if (strlen(detail) > 0) strcat(detail, ", "); sprintf(tmp, format, para); strcat(detail, tmp); } @@ -893,6 +917,18 @@ static int32_t mndSetDbCfgFromAlterDbReq(SDbObj *pDb, SAlterDbReq *pAlter) { terrno = 0; } + if (pAlter->s3KeepLocal > TSDB_MIN_S3_KEEP_LOCAL && pAlter->s3KeepLocal != pDb->cfg.s3KeepLocal) { + pDb->cfg.s3KeepLocal = pAlter->s3KeepLocal; + pDb->vgVersion++; + terrno = 0; + } + + if (pAlter->s3Compact > TSDB_MIN_S3_COMPACT && pAlter->s3Compact != pDb->cfg.s3Compact) { + pDb->cfg.s3Compact = pAlter->s3Compact; + pDb->vgVersion++; + terrno = 0; + } + return terrno; } @@ -1081,6 +1117,9 @@ static void mndDumpDbCfgInfo(SDbCfgRsp *cfgRsp, SDbObj *pDb) { cfgRsp->pRetensions = taosArrayDup(pDb->cfg.pRetensions, NULL); cfgRsp->schemaless = pDb->cfg.schemaless; cfgRsp->sstTrigger = pDb->cfg.sstTrigger; + cfgRsp->s3ChunkSize = pDb->cfg.s3ChunkSize; + cfgRsp->s3KeepLocal = pDb->cfg.s3KeepLocal; + cfgRsp->s3Compact = pDb->cfg.s3Compact; } static int32_t mndProcessGetDbCfgReq(SRpcMsg *pReq) { @@ -1277,7 +1316,7 @@ static int32_t mndDropDb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb) { if (mndDropStreamByDb(pMnode, pTrans, pDb) != 0) goto _OVER; #ifdef TD_ENTERPRISE if (mndDropViewByDb(pMnode, pTrans, pDb) != 0) goto _OVER; -#endif +#endif if (mndDropSmasByDb(pMnode, pTrans, pDb) != 0) goto _OVER; if (mndDropIdxsByDb(pMnode, pTrans, pDb) != 0) goto _OVER; if (mndSetDropDbRedoActions(pMnode, pTrans, pDb) != 0) goto _OVER; @@ -1562,21 +1601,19 @@ int32_t mndValidateDbInfo(SMnode *pMnode, SDbCacheInfo *pDbs, int32_t numOfDbs, int32_t numOfTable = mndGetDBTableNum(pDb, pMnode); - if (pDbCacheInfo->vgVersion >= pDb->vgVersion && - pDbCacheInfo->cfgVersion >= pDb->cfgVersion && - numOfTable == pDbCacheInfo->numOfTable && - pDbCacheInfo->stateTs == pDb->stateTs) { + if (pDbCacheInfo->vgVersion >= pDb->vgVersion && pDbCacheInfo->cfgVersion >= pDb->cfgVersion && + numOfTable == pDbCacheInfo->numOfTable && pDbCacheInfo->stateTs == pDb->stateTs) { mTrace("db:%s, valid dbinfo, vgVersion:%d cfgVersion:%d stateTs:%" PRId64 " numOfTables:%d, not changed vgVersion:%d cfgVersion:%d stateTs:%" PRId64 " numOfTables:%d", - pDbCacheInfo->dbFName, pDbCacheInfo->vgVersion, pDbCacheInfo->cfgVersion, pDbCacheInfo->stateTs, pDbCacheInfo->numOfTable, - pDb->vgVersion, pDb->cfgVersion, pDb->stateTs, numOfTable); + pDbCacheInfo->dbFName, pDbCacheInfo->vgVersion, pDbCacheInfo->cfgVersion, pDbCacheInfo->stateTs, + pDbCacheInfo->numOfTable, pDb->vgVersion, pDb->cfgVersion, pDb->stateTs, numOfTable); mndReleaseDb(pMnode, pDb); continue; } else { mInfo("db:%s, valid dbinfo, vgVersion:%d cfgVersion:%d stateTs:%" PRId64 " numOfTables:%d, changed to vgVersion:%d cfgVersion:%d stateTs:%" PRId64 " numOfTables:%d", - pDbCacheInfo->dbFName, pDbCacheInfo->vgVersion, pDbCacheInfo->cfgVersion, pDbCacheInfo->stateTs, pDbCacheInfo->numOfTable, - pDb->vgVersion, pDb->cfgVersion, pDb->stateTs, numOfTable); + pDbCacheInfo->dbFName, pDbCacheInfo->vgVersion, pDbCacheInfo->cfgVersion, pDbCacheInfo->stateTs, + pDbCacheInfo->numOfTable, pDb->vgVersion, pDb->cfgVersion, pDb->stateTs, numOfTable); } if (pDbCacheInfo->cfgVersion < pDb->cfgVersion) { @@ -1584,8 +1621,7 @@ int32_t mndValidateDbInfo(SMnode *pMnode, SDbCacheInfo *pDbs, int32_t numOfDbs, mndDumpDbCfgInfo(rsp.cfgRsp, pDb); } - if (pDbCacheInfo->vgVersion < pDb->vgVersion || - numOfTable != pDbCacheInfo->numOfTable || + if (pDbCacheInfo->vgVersion < pDb->vgVersion || numOfTable != pDbCacheInfo->numOfTable || pDbCacheInfo->stateTs != pDb->stateTs) { rsp.useDbRsp = taosMemoryCalloc(1, sizeof(SUseDbRsp)); rsp.useDbRsp->pVgroupInfos = taosArrayInit(pDb->cfg.numOfVgroups, sizeof(SVgroupInfo)); @@ -1695,6 +1731,77 @@ _OVER: return code; } +static int32_t mndS3MigrateDb(SMnode *pMnode, SDbObj *pDb) { + SSdb *pSdb = pMnode->pSdb; + SVgObj *pVgroup = NULL; + void *pIter = NULL; + SVS3MigrateDbReq s3migrateReq = {.timestamp = taosGetTimestampSec()}; + int32_t reqLen = tSerializeSVS3MigrateDbReq(NULL, 0, &s3migrateReq); + int32_t contLen = reqLen + sizeof(SMsgHead); + + while (1) { + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); + if (pIter == NULL) break; + + if (pVgroup->dbUid != pDb->uid) continue; + + SMsgHead *pHead = rpcMallocCont(contLen); + if (pHead == NULL) { + sdbCancelFetch(pSdb, pVgroup); + sdbRelease(pSdb, pVgroup); + continue; + } + pHead->contLen = htonl(contLen); + pHead->vgId = htonl(pVgroup->vgId); + tSerializeSVS3MigrateDbReq((char *)pHead + sizeof(SMsgHead), contLen, &s3migrateReq); + + SRpcMsg rpcMsg = {.msgType = TDMT_VND_S3MIGRATE, .pCont = pHead, .contLen = contLen}; + SEpSet epSet = mndGetVgroupEpset(pMnode, pVgroup); + int32_t code = tmsgSendReq(&epSet, &rpcMsg); + if (code != 0) { + mError("vgId:%d, failed to send vnode-s3migrate request to vnode since 0x%x", pVgroup->vgId, code); + } else { + mInfo("vgId:%d, send vnode-s3migrate request to vnode, time:%d", pVgroup->vgId, s3migrateReq.timestamp); + } + sdbRelease(pSdb, pVgroup); + } + + return 0; +} + +static int32_t mndProcessS3MigrateDbReq(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + int32_t code = -1; + SDbObj *pDb = NULL; + SS3MigrateDbReq s3migrateReq = {0}; + + if (tDeserializeSS3MigrateDbReq(pReq->pCont, pReq->contLen, &s3migrateReq) != 0) { + terrno = TSDB_CODE_INVALID_MSG; + goto _OVER; + } + + mInfo("db:%s, start to s3migrate", s3migrateReq.db); + + pDb = mndAcquireDb(pMnode, s3migrateReq.db); + if (pDb == NULL) { + goto _OVER; + } + + if (mndCheckDbPrivilege(pMnode, pReq->info.conn.user, MND_OPER_TRIM_DB, pDb) != 0) { + goto _OVER; + } + + code = mndS3MigrateDb(pMnode, pDb); + +_OVER: + if (code != 0) { + mError("db:%s, failed to process s3migrate db req since %s", s3migrateReq.db, terrstr()); + } + + mndReleaseDb(pMnode, pDb); + return code; +} + const char *mndGetDbStr(const char *src) { char *pos = strstr(src, TS_PATH_DELIMITER); if (pos != NULL) ++pos; @@ -1991,6 +2098,13 @@ static void mndDumpDbInfoData(SMnode *pMnode, SSDataBlock *pBlock, SDbObj *pDb, pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, rows, (const char *)&pDb->cfg.keepTimeOffset, false); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, rows, (const char *)&pDb->cfg.s3ChunkSize, false); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, rows, (const char *)&pDb->cfg.s3KeepLocal, false); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, rows, (const char *)&pDb->cfg.s3Compact, false); } taosMemoryFree(buf); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 69a0bd477d..0bb1b63028 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -133,6 +133,14 @@ static void mndPullupTtl(SMnode *pMnode) { } static void mndPullupTrimDb(SMnode *pMnode) { + mTrace("pullup s3migrate"); + int32_t contLen = 0; + void *pReq = mndBuildTimerMsg(&contLen); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_S3MIGRATE_DB_TIMER, .pCont = pReq, .contLen = contLen}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); +} + +static void mndPullupS3MigrateDb(SMnode *pMnode) { mTrace("pullup trim"); int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); @@ -302,6 +310,7 @@ static int32_t minCronTime() { int32_t min = INT32_MAX; min = TMIN(min, tsTtlPushIntervalSec); min = TMIN(min, tsTrimVDbIntervalSec); + min = TMIN(min, tsS3MigrateIntervalSec); min = TMIN(min, tsTransPullupInterval); min = TMIN(min, tsCompactPullupInterval); min = TMIN(min, tsMqRebalanceInterval); @@ -325,6 +334,10 @@ void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) { mndPullupTrimDb(pMnode); } + if (tsS3MigrateEnabled && sec % tsS3MigrateIntervalSec == 0) { + mndPullupS3MigrateDb(pMnode); + } + if (sec % tsTransPullupInterval == 0) { mndPullupTrans(pMnode); } @@ -768,7 +781,8 @@ _OVER: pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER || pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER || pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE || - pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT) { + pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT || + pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER) { mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored, pMnode->stopped, state.restored, syncStr(state.state)); return -1; @@ -912,7 +926,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr if (pObj->id == pMnode->selfDnodeId) { pClusterInfo->first_ep_dnode_id = pObj->id; tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep)); - //pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f; + // pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f; pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode); // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f); tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role)); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 7ee1b36916..5bc855207f 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -42,6 +42,8 @@ static int32_t mndStbActionDelete(SSdb *pSdb, SStbObj *pStb); static int32_t mndStbActionUpdate(SSdb *pSdb, SStbObj *pOld, SStbObj *pNew); static int32_t mndProcessTtlTimer(SRpcMsg *pReq); static int32_t mndProcessTrimDbTimer(SRpcMsg *pReq); +static int32_t mndProcessS3MigrateDbTimer(SRpcMsg *pReq); +static int32_t mndProcessS3MigrateDbRsp(SRpcMsg *pReq); static int32_t mndProcessCreateStbReq(SRpcMsg *pReq); static int32_t mndProcessAlterStbReq(SRpcMsg *pReq); static int32_t mndProcessDropStbReq(SRpcMsg *pReq); @@ -82,6 +84,8 @@ int32_t mndInitStb(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_MND_TABLE_META, mndProcessTableMetaReq); mndSetMsgHandle(pMnode, TDMT_MND_TTL_TIMER, mndProcessTtlTimer); mndSetMsgHandle(pMnode, TDMT_MND_TRIM_DB_TIMER, mndProcessTrimDbTimer); + mndSetMsgHandle(pMnode, TDMT_VND_S3MIGRATE_RSP, mndProcessS3MigrateDbRsp); + mndSetMsgHandle(pMnode, TDMT_MND_S3MIGRATE_DB_TIMER, mndProcessS3MigrateDbTimer); mndSetMsgHandle(pMnode, TDMT_MND_TABLE_CFG, mndProcessTableCfgReq); // mndSetMsgHandle(pMnode, TDMT_MND_SYSTABLE_RETRIEVE, mndProcessRetrieveStbReq); @@ -762,9 +766,9 @@ int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreat memcpy(pDst->db, pDb->name, TSDB_DB_FNAME_LEN); pDst->createdTime = taosGetTimestampMs(); pDst->updateTime = pDst->createdTime; - pDst->uid = - (pCreate->source == TD_REQ_FROM_TAOX_OLD || pCreate->source == TD_REQ_FROM_TAOX) - ? pCreate->suid : mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); + pDst->uid = (pCreate->source == TD_REQ_FROM_TAOX_OLD || pCreate->source == TD_REQ_FROM_TAOX) + ? pCreate->suid + : mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); pDst->dbUid = pDb->uid; pDst->tagVer = 1; pDst->colVer = 1; @@ -983,6 +987,43 @@ static int32_t mndProcessTrimDbTimer(SRpcMsg *pReq) { return 0; } +static int32_t mndProcessS3MigrateDbTimer(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + SVgObj *pVgroup = NULL; + void *pIter = NULL; + SVS3MigrateDbReq s3migrateReq = {.timestamp = taosGetTimestampSec()}; + int32_t reqLen = tSerializeSVS3MigrateDbReq(NULL, 0, &s3migrateReq); + int32_t contLen = reqLen + sizeof(SMsgHead); + + while (1) { + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); + if (pIter == NULL) break; + + SMsgHead *pHead = rpcMallocCont(contLen); + if (pHead == NULL) { + sdbCancelFetch(pSdb, pVgroup); + sdbRelease(pSdb, pVgroup); + continue; + } + pHead->contLen = htonl(contLen); + pHead->vgId = htonl(pVgroup->vgId); + tSerializeSVS3MigrateDbReq((char *)pHead + sizeof(SMsgHead), reqLen, &s3migrateReq); + + SRpcMsg rpcMsg = {.msgType = TDMT_VND_S3MIGRATE, .pCont = pHead, .contLen = contLen}; + SEpSet epSet = mndGetVgroupEpset(pMnode, pVgroup); + int32_t code = tmsgSendReq(&epSet, &rpcMsg); + if (code != 0) { + mError("vgId:%d, timer failed to send vnode-s3migrate request to vnode since 0x%x", pVgroup->vgId, code); + } else { + mInfo("vgId:%d, timer send vnode-s3migrate request to vnode, time:%d", pVgroup->vgId, s3migrateReq.timestamp); + } + sdbRelease(pSdb, pVgroup); + } + + return 0; +} + static int32_t mndFindSuperTableTagIndex(const SStbObj *pStb, const char *tagName) { for (int32_t tag = 0; tag < pStb->numOfTags; tag++) { if (strcmp(pStb->pTags[tag].name, tagName) == 0) { @@ -1131,7 +1172,8 @@ static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { } } else if (terrno != TSDB_CODE_MND_STB_NOT_EXIST) { goto _OVER; - } else if ((createReq.source == TD_REQ_FROM_TAOX_OLD || createReq.source == TD_REQ_FROM_TAOX) && (createReq.tagVer != 1 || createReq.colVer != 1)) { + } else if ((createReq.source == TD_REQ_FROM_TAOX_OLD || createReq.source == TD_REQ_FROM_TAOX) && + (createReq.tagVer != 1 || createReq.colVer != 1)) { mInfo("stb:%s, alter table does not need to be done, because table is deleted", createReq.name); code = 0; goto _OVER; @@ -1182,14 +1224,13 @@ static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { SName name = {0}; tNameFromString(&name, createReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - if(createReq.sql == NULL && createReq.sqlLen == 0){ + if (createReq.sql == NULL && createReq.sqlLen == 0) { char detail[1000] = {0}; sprintf(detail, "dbname:%s, stable name:%s", name.dbname, name.tname); auditRecord(pReq, pMnode->clusterId, "createStb", name.dbname, name.tname, detail, strlen(detail)); - } - else{ + } else { auditRecord(pReq, pMnode->clusterId, "createStb", name.dbname, name.tname, createReq.sql, createReq.sqlLen); } _OVER: @@ -1571,7 +1612,7 @@ static int32_t mndAlterStbTagBytes(SMnode *pMnode, const SStbObj *pOld, SStbObj for (int32_t i = 0; i < pOld->numOfTags; ++i) { nLen += (pOld->pTags[i].colId == colId) ? pField->bytes : pOld->pTags[i].bytes; } - + if (nLen > TSDB_MAX_TAGS_LEN) { terrno = TSDB_CODE_PAR_INVALID_TAGS_LENGTH; return -1; @@ -1934,7 +1975,7 @@ static int32_t mndBuildStbCfgImp(SDbObj *pDb, SStbObj *pStb, const char *tbName, return 0; } -static int32_t mndValidateStbVersion(SMnode *pMnode, SSTableVersion* pStbVer, bool* schema, bool* sma) { +static int32_t mndValidateStbVersion(SMnode *pMnode, SSTableVersion *pStbVer, bool *schema, bool *sma) { char tbFName[TSDB_TABLE_FNAME_LEN] = {0}; snprintf(tbFName, sizeof(tbFName), "%s.%s", pStbVer->dbFName, pStbVer->stbName); @@ -1964,7 +2005,7 @@ static int32_t mndValidateStbVersion(SMnode *pMnode, SSTableVersion* pStbVer, bo } else { *schema = false; } - + if (pStbVer->smaVer && pStbVer->smaVer != pStb->smaVer) { *sma = true; } else { @@ -2535,6 +2576,7 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, static int32_t mndProcessDropTtltbRsp(SRpcMsg *pRsp) { return 0; } static int32_t mndProcessTrimDbRsp(SRpcMsg *pRsp) { return 0; } +static int32_t mndProcessS3MigrateDbRsp(SRpcMsg *pRsp) { return 0; } static int32_t mndProcessDropStbReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; @@ -2748,8 +2790,8 @@ int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbVersions, int32_t pStbVersion->tversion = ntohl(pStbVersion->tversion); pStbVersion->smaVer = ntohl(pStbVersion->smaVer); - bool schema = false; - bool sma = false; + bool schema = false; + bool sma = false; int32_t code = mndValidateStbVersion(pMnode, pStbVersion, &schema, &sma); if (TSDB_CODE_SUCCESS != code) { STableMetaRsp metaRsp = {0}; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index b0290191bc..612db94bb6 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -31,10 +31,10 @@ #define VGROUP_VER_NUMBER 1 #define VGROUP_RESERVE_SIZE 64 -static int32_t mndVgroupActionInsert(SSdb *pSdb, SVgObj *pVgroup); -static int32_t mndVgroupActionDelete(SSdb *pSdb, SVgObj *pVgroup); -static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew); -static int32_t mndNewVgActionValidate(SMnode *pMnode, STrans *pTrans, SSdbRaw *pRaw); +static int32_t mndVgroupActionInsert(SSdb *pSdb, SVgObj *pVgroup); +static int32_t mndVgroupActionDelete(SSdb *pSdb, SVgObj *pVgroup); +static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew); +static int32_t mndNewVgActionValidate(SMnode *pMnode, STrans *pTrans, SSdbRaw *pRaw); static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextVgroup(SMnode *pMnode, void *pIter); @@ -275,6 +275,9 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg createReq.daysToKeep1 = pDb->cfg.daysToKeep1; createReq.daysToKeep2 = pDb->cfg.daysToKeep2; createReq.keepTimeOffset = pDb->cfg.keepTimeOffset; + createReq.s3ChunkSize = pDb->cfg.s3ChunkSize; + createReq.s3KeepLocal = pDb->cfg.s3KeepLocal; + createReq.s3Compact = pDb->cfg.s3Compact; createReq.minRows = pDb->cfg.minRows; createReq.maxRows = pDb->cfg.maxRows; createReq.walFsyncPeriod = pDb->cfg.walFsyncPeriod; @@ -398,6 +401,8 @@ static void *mndBuildAlterVnodeConfigReq(SMnode *pMnode, SDbObj *pDb, SVgObj *pV alterReq.minRows = pDb->cfg.minRows; alterReq.walRetentionPeriod = pDb->cfg.walRetentionPeriod; alterReq.walRetentionSize = pDb->cfg.walRetentionSize; + alterReq.s3KeepLocal = pDb->cfg.s3KeepLocal; + alterReq.s3Compact = pDb->cfg.s3Compact; mInfo("vgId:%d, build alter vnode config req", pVgroup->vgId); int32_t contLen = tSerializeSAlterVnodeConfigReq(NULL, 0, &alterReq); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 294e75602e..7c033ac0f3 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -57,7 +57,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod int32_t diskPrimary, STfs *pTfs); int32_t vnodeRestoreVgroupId(const char *srcPath, const char *dstPath, int32_t srcVgId, int32_t dstVgId, int32_t diskPrimary, STfs *pTfs); -void vnodeDestroy(int32_t vgId, const char *path, STfs *pTfs); +void vnodeDestroy(int32_t vgId, const char *path, STfs *pTfs, int32_t nodeId); SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgCb, bool force); void vnodePreClose(SVnode *pVnode); void vnodePostClose(SVnode *pVnode); @@ -69,7 +69,7 @@ int32_t vnodeBegin(SVnode *pVnode); int32_t vnodeStart(SVnode *pVnode); void vnodeStop(SVnode *pVnode); int64_t vnodeGetSyncHandle(SVnode *pVnode); -int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot); +int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot); void vnodeGetInfo(void *pVnode, const char **dbname, int32_t *vgId, int64_t *numOfTables, int64_t *numOfNormalTables); int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen); int32_t vnodeGetTableList(void *pVnode, int8_t type, SArray *pList); @@ -134,8 +134,8 @@ tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); int32_t metaGetCachedTbGroup(void *pVnode, tb_uid_t suid, const uint8_t *pKey, int32_t keyLen, SArray **pList); int32_t metaPutTbGroupToCache(void *pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen); -bool metaTbInFilterCache(SMeta *pMeta, const void* key, int8_t type); -int32_t metaPutTbToFilterCache(SMeta *pMeta, const void* key, int8_t type); +bool metaTbInFilterCache(SMeta *pMeta, const void *key, int8_t type); +int32_t metaPutTbToFilterCache(SMeta *pMeta, const void *key, int8_t type); int32_t metaSizeOfTbFilterCache(SMeta *pMeta, int8_t type); int32_t metaInitTbFilterCache(SMeta *pMeta); @@ -172,13 +172,13 @@ void *tsdbGetIvtIdx2(SMeta *pMeta); uint64_t tsdbGetReaderMaxVersion2(STsdbReader *pReader); void tsdbReaderSetCloseFlag(STsdbReader *pReader); int64_t tsdbGetLastTimestamp2(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); -void tsdbSetFilesetDelimited(STsdbReader* pReader); -void tsdbReaderSetNotifyCb(STsdbReader* pReader, TsdReaderNotifyCbFn notifyFn, void* param); +void tsdbSetFilesetDelimited(STsdbReader *pReader); +void tsdbReaderSetNotifyCb(STsdbReader *pReader, TsdReaderNotifyCbFn notifyFn, void *param); int32_t tsdbReuseCacherowsReader(void *pReader, void *pTableIdList, int32_t numOfTables); int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr, - SArray* pFuncTypeList); + SArray *pFuncTypeList); int32_t tsdbRetrieveCacheRows(void *pReader, SSDataBlock *pResBlock, const int32_t *slotIds, const int32_t *dstSlotIds, SArray *pTableUids); void *tsdbCacherowsReaderClose(void *pReader); @@ -234,7 +234,7 @@ int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, i bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock **pRes, const char *idstr); int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); -int32_t tqGetStreamExecInfo(SVnode* pVnode, int64_t streamId, int64_t* pDelay, bool* fhFinished); +int32_t tqGetStreamExecInfo(SVnode *pVnode, int64_t streamId, int64_t *pDelay, bool *fhFinished); // sma int32_t smaGetTSmaDays(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); @@ -310,6 +310,9 @@ struct SVnodeCfg { int16_t hashPrefix; int16_t hashSuffix; int32_t tsdbPageSize; + int32_t s3ChunkSize; + int32_t s3KeepLocal; + int8_t s3Compact; }; #define TABLE_ROLLUP_ON ((int8_t)0x1) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index cac3be9ee3..a598e31fcc 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -279,7 +279,7 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); // tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); -int32_t tsdbGetTableSchema(SMeta* pMeta, int64_t uid, STSchema** pSchema, int64_t* suid); +int32_t tsdbGetTableSchema(SMeta *pMeta, int64_t uid, STSchema **pSchema, int64_t *suid); // tsdbMerge.c ============================================================================================== typedef struct { @@ -639,6 +639,7 @@ typedef struct { STsdb *pTsdb; const char *objName; uint8_t s3File; + int32_t lcn; int32_t fid; int64_t cid; int64_t blkno; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index e610161544..35d32948e0 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -108,13 +108,13 @@ typedef struct SQueryNode SQueryNode; #define VNODE_METRIC_SQL_COUNT "taos_sql_req:count" -#define VNODE_METRIC_TAG_NAME_SQL_TYPE "sql_type" +#define VNODE_METRIC_TAG_NAME_SQL_TYPE "sql_type" #define VNODE_METRIC_TAG_NAME_CLUSTER_ID "cluster_id" -#define VNODE_METRIC_TAG_NAME_DNODE_ID "dnode_id" -#define VNODE_METRIC_TAG_NAME_DNODE_EP "dnode_ep" -#define VNODE_METRIC_TAG_NAME_VGROUP_ID "vgroup_id" -#define VNODE_METRIC_TAG_NAME_USERNAME "username" -#define VNODE_METRIC_TAG_NAME_RESULT "result" +#define VNODE_METRIC_TAG_NAME_DNODE_ID "dnode_id" +#define VNODE_METRIC_TAG_NAME_DNODE_EP "dnode_ep" +#define VNODE_METRIC_TAG_NAME_VGROUP_ID "vgroup_id" +#define VNODE_METRIC_TAG_NAME_USERNAME "username" +#define VNODE_METRIC_TAG_NAME_RESULT "result" #define VNODE_METRIC_TAG_VALUE_INSERT_AFFECTED_ROWS "inserted_rows" //#define VNODE_METRIC_TAG_VALUE_INSERT "insert" @@ -239,6 +239,7 @@ int32_t tsdbCacheNewNTableColumn(STsdb* pTsdb, int64_t uid, int16_t cid, int8_t int32_t tsdbCacheDropNTableColumn(STsdb* pTsdb, int64_t uid, int16_t cid, int8_t col_type); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); int32_t tsdbRetention(STsdb* tsdb, int64_t now, int32_t sync); +int32_t tsdbS3Migrate(STsdb* tsdb, int64_t now, int32_t sync); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmitRsp2* pRsp); int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows); @@ -461,12 +462,12 @@ typedef struct SVCommitSched { int64_t maxWaitMs; } SVCommitSched; -typedef struct SVMonitorObj{ - char strClusterId[TSDB_CLUSTER_ID_LEN]; - char strDnodeId[TSDB_NODE_ID_LEN]; - char strVgId[TSDB_VGROUP_ID_LEN]; - taos_counter_t *insertCounter; -}SVMonitorObj; +typedef struct SVMonitorObj { + char strClusterId[TSDB_CLUSTER_ID_LEN]; + char strDnodeId[TSDB_NODE_ID_LEN]; + char strVgId[TSDB_VGROUP_ID_LEN]; + taos_counter_t* insertCounter; +} SVMonitorObj; struct SVnode { char* path; diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index dc76aa61b2..34476c5cfd 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -46,7 +46,6 @@ typedef struct { STFileSet *fset; TABLEID tbid[1]; bool hasTSData; - bool skipTsRow; } ctx[1]; // reader @@ -128,21 +127,8 @@ static int32_t tsdbCommitTSData(SCommitter2 *committer) { continue; } } - /* - extern int8_t tsS3Enabled; - int32_t nlevel = tfsGetLevel(committer->tsdb->pVnode->pTfs); - committer->ctx->skipTsRow = false; - if (tsS3Enabled && nlevel > 1 && committer->ctx->did.level == nlevel - 1) { - committer->ctx->skipTsRow = true; - } - */ int64_t ts = TSDBROW_TS(&row->row); - - if (committer->ctx->skipTsRow && ts <= committer->ctx->maxKey) { - ts = committer->ctx->maxKey + 1; - } - if (ts > committer->ctx->maxKey) { committer->ctx->nextKey = TMIN(committer->ctx->nextKey, ts); code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); @@ -403,31 +389,6 @@ static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) { // reset nextKey committer->ctx->nextKey = TSKEY_MAX; - committer->ctx->skipTsRow = false; - - extern int8_t tsS3Enabled; - extern int32_t tsS3UploadDelaySec; - long s3Size(const char *object_name); - int32_t nlevel = tfsGetLevel(committer->tsdb->pVnode->pTfs); - if (tsS3Enabled && nlevel > 1 && committer->ctx->fset) { - STFileObj *fobj = committer->ctx->fset->farr[TSDB_FTYPE_DATA]; - if (fobj && fobj->f->did.level == nlevel - 1) { - // if exists on s3 or local mtime < committer->ctx->now - tsS3UploadDelay - const char *object_name = taosDirEntryBaseName((char *)fobj->fname); - - if (taosCheckExistFile(fobj->fname)) { - int32_t mtime = 0; - taosStatFile(fobj->fname, NULL, &mtime, NULL); - if (mtime < committer->ctx->now - tsS3UploadDelaySec) { - committer->ctx->skipTsRow = true; - } - } else /*if (s3Size(object_name) > 0) */ { - committer->ctx->skipTsRow = true; - } - } - // new fset can be written with ts data - } - _exit: if (code) { TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c index d2e3cb08e5..f8cf591777 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRAW.c @@ -29,15 +29,16 @@ int32_t tsdbDataFileRAWReaderOpen(const char *fname, const SDataFileRAWReaderCon reader[0]->config[0] = config[0]; + int32_t lcn = config->file.lcn; if (fname) { if (fname) { - code = tsdbOpenFile(fname, config->tsdb, TD_FILE_READ, &reader[0]->fd); + code = tsdbOpenFile(fname, config->tsdb, TD_FILE_READ, &reader[0]->fd, lcn); TSDB_CHECK_CODE(code, lino, _exit); } } else { char fname1[TSDB_FILENAME_LEN]; tsdbTFileName(config->tsdb, &config->file, fname1); - code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd); + code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd, lcn); TSDB_CHECK_CODE(code, lino, _exit); } @@ -113,8 +114,8 @@ static int32_t tsdbDataFileRAWWriterCloseAbort(SDataFileRAWWriter *writer) { static int32_t tsdbDataFileRAWWriterDoClose(SDataFileRAWWriter *writer) { return 0; } static int32_t tsdbDataFileRAWWriterCloseCommit(SDataFileRAWWriter *writer, TFileOpArray *opArr) { - int32_t code = 0; - int32_t lino = 0; + int32_t code = 0; + int32_t lino = 0; ASSERT(writer->ctx->offset <= writer->file.size); ASSERT(writer->config->fid == writer->file.fid); @@ -151,7 +152,7 @@ static int32_t tsdbDataFileRAWWriterOpenDataFD(SDataFileRAWWriter *writer) { } tsdbTFileName(writer->config->tsdb, &writer->file, fname); - code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd); + code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd, writer->file.lcn); TSDB_CHECK_CODE(code, lino, _exit); _exit: diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index e1625c9ddb..3f35bcb166 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -97,7 +97,8 @@ int32_t tsdbDataFileReaderOpen(const char *fname[], const SDataFileReaderConfig if (fname) { for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { if (fname[i]) { - code = tsdbOpenFile(fname[i], config->tsdb, TD_FILE_READ, &reader[0]->fd[i]); + int32_t lcn = config->files[i].file.lcn; + code = tsdbOpenFile(fname[i], config->tsdb, TD_FILE_READ, &reader[0]->fd[i], lcn); TSDB_CHECK_CODE(code, lino, _exit); } } @@ -106,7 +107,8 @@ int32_t tsdbDataFileReaderOpen(const char *fname[], const SDataFileReaderConfig if (config->files[i].exist) { char fname1[TSDB_FILENAME_LEN]; tsdbTFileName(config->tsdb, &config->files[i].file, fname1); - code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd[i]); + int32_t lcn = config->files[i].file.lcn; + code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd[i], lcn); TSDB_CHECK_CODE(code, lino, _exit); } } @@ -303,7 +305,7 @@ int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRe } int64_t szHint = 0; - if (bData->nColData > 3) { + if (bData->nColData > 2) { int64_t offset = 0; SBlockCol bc = {.cid = 0}; SBlockCol *blockCol = &bc; @@ -642,6 +644,7 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, + .lcn = writer->config->lcn == -1 ? 0 : -1, .minVer = VERSION_MAX, .maxVer = VERSION_MIN, }; @@ -1620,8 +1623,9 @@ static int32_t tsdbDataFileWriterOpenDataFD(SDataFileWriter *writer) { flag |= (TD_FILE_CREATE | TD_FILE_TRUNC); } + int32_t lcn = writer->files[ftype].lcn; tsdbTFileName(writer->config->tsdb, &writer->files[ftype], fname); - code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd[ftype]); + code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd[ftype], lcn); TSDB_CHECK_CODE(code, lino, _exit); if (writer->files[ftype].size == 0) { @@ -1789,8 +1793,9 @@ static int32_t tsdbDataFileWriterOpenTombFD(SDataFileWriter *writer) { int32_t flag = (TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + int32_t lcn = writer->files[ftype].lcn; tsdbTFileName(writer->config->tsdb, writer->files + ftype, fname); - code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd[ftype]); + code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd[ftype], lcn); TSDB_CHECK_CODE(code, lino, _exit); uint8_t hdr[TSDB_FHDR_SIZE] = {0}; diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h index c4aed6e787..c6d754b014 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -79,6 +79,7 @@ typedef struct SDataFileWriterConfig { int64_t cid; SDiskID did; int64_t compactVersion; + int32_t lcn; struct { bool exist; STFile file; diff --git a/source/dnode/vnode/src/tsdb/tsdbDef.h b/source/dnode/vnode/src/tsdb/tsdbDef.h index 0f512e1306..6aa6e32804 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDef.h +++ b/source/dnode/vnode/src/tsdb/tsdbDef.h @@ -31,7 +31,7 @@ typedef struct SFDataPtr { int64_t size; } SFDataPtr; -extern int32_t tsdbOpenFile(const char *path, STsdb *pTsdb, int32_t flag, STsdbFD **ppFD); +extern int32_t tsdbOpenFile(const char *path, STsdb *pTsdb, int32_t flag, STsdbFD **ppFD, int32_t lcn); extern void tsdbCloseFile(STsdbFD **ppFD); extern int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size); extern int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size, int64_t szHint); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 1d2fd60df2..6195c37ab2 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -20,7 +20,7 @@ #define BLOCK_COMMIT_FACTOR 3 -extern void remove_file(const char *fname, bool last_level); +extern void remove_file(const char *fname); #define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT #define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1) @@ -355,17 +355,32 @@ static int32_t tsdbFSDoScanAndFixFile(STFileSystem *fs, const STFileObj *fobj) { // check file existence if (!taosCheckExistFile(fobj->fname)) { - if (tsS3Enabled) { - const char *object_name = taosDirEntryBaseName((char *)fobj->fname); - long s3_size = s3Size(object_name); - if (s3_size > 0) { - return 0; + bool found = false; + + if (tsS3Enabled && fobj->f->lcn > 1) { + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileLastChunkName(fs->tsdb, fobj->f, fname1); + if (!taosCheckExistFile(fname1)) { + code = TSDB_CODE_FILE_CORRUPTED; + tsdbError("vgId:%d %s failed since file:%s does not exist", TD_VID(fs->tsdb->pVnode), __func__, fname1); + return code; } + + found = true; + /* + const char *object_name = taosDirEntryBaseName((char *)fobj->fname); + long s3_size = s3Size(object_name); + if (s3_size > 0) { + return 0; + } + */ } - code = TSDB_CODE_FILE_CORRUPTED; - tsdbError("vgId:%d %s failed since file:%s does not exist", TD_VID(fs->tsdb->pVnode), __func__, fobj->fname); - return code; + if (!found) { + code = TSDB_CODE_FILE_CORRUPTED; + tsdbError("vgId:%d %s failed since file:%s does not exist", TD_VID(fs->tsdb->pVnode), __func__, fobj->fname); + return code; + } } { // TODO: check file size @@ -530,9 +545,9 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { if (taosIsDir(file->aname)) continue; if (tsdbFSGetFileObjHashEntry(&fobjHash, file->aname) == NULL && - strncmp(file->aname + strlen(file->aname) - 3, ".cp", 3)) { - int32_t nlevel = tfsGetLevel(fs->tsdb->pVnode->pTfs); - remove_file(file->aname, nlevel > 1 && file->did.level == nlevel - 1); + strncmp(file->aname + strlen(file->aname) - 3, ".cp", 3) && + strncmp(file->aname + strlen(file->aname) - 5, ".data", 5)) { + remove_file(file->aname); } } @@ -900,57 +915,28 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { continue; } - bool skipMerge = false; + // bool skipMerge = false; int32_t numFile = TARRAY2_SIZE(lvl->fobjArr); if (numFile >= sttTrigger && (!fset->mergeScheduled)) { - // launch merge - { - extern int8_t tsS3Enabled; - extern int32_t tsS3UploadDelaySec; - long s3Size(const char *object_name); - int32_t nlevel = tfsGetLevel(fs->tsdb->pVnode->pTfs); - if (tsS3Enabled && nlevel > 1) { - STFileObj *fobj = fset->farr[TSDB_FTYPE_DATA]; - if (fobj && fobj->f->did.level == nlevel - 1) { - // if exists on s3 or local mtime < committer->ctx->now - tsS3UploadDelay - const char *object_name = taosDirEntryBaseName((char *)fobj->fname); + code = tsdbTFileSetOpenChannel(fset); + TSDB_CHECK_CODE(code, lino, _exit); - if (taosCheckExistFile(fobj->fname)) { - int32_t now = taosGetTimestampSec(); - int32_t mtime = 0; - taosStatFile(fobj->fname, NULL, &mtime, NULL); - if (mtime < now - tsS3UploadDelaySec) { - skipMerge = true; - } - } else /* if (s3Size(object_name) > 0) */ { - skipMerge = true; - } - } - // new fset can be written with ts data - } + SMergeArg *arg = taosMemoryMalloc(sizeof(*arg)); + if (arg == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); } - if (!skipMerge) { - code = tsdbTFileSetOpenChannel(fset); - TSDB_CHECK_CODE(code, lino, _exit); + arg->tsdb = fs->tsdb; + arg->fid = fset->fid; - SMergeArg *arg = taosMemoryMalloc(sizeof(*arg)); - if (arg == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - arg->tsdb = fs->tsdb; - arg->fid = fset->fid; - - code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_HIGH, tsdbMerge, taosMemoryFree, - arg, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - fset->mergeScheduled = true; - } + code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_HIGH, tsdbMerge, taosMemoryFree, arg, + NULL); + TSDB_CHECK_CODE(code, lino, _exit); + fset->mergeScheduled = true; } - if (numFile >= sttTrigger * BLOCK_COMMIT_FACTOR && !skipMerge) { + if (numFile >= sttTrigger * BLOCK_COMMIT_FACTOR) { tsdbFSSetBlockCommit(fset, true); } else { tsdbFSSetBlockCommit(fset, false); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index e088f54930..a2cdd9f381 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -386,7 +386,7 @@ int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *f fobj2->f[0] = fobj1->f[0]; } } else { - tsdbTFileObjRemove(fobj2); + tsdbTFileObjRemoveUpdateLC(fobj2); code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]); if (code) return code; } @@ -585,7 +585,7 @@ int32_t tsdbTFileSetRangeClear(STFileSetRange **fsr) { return 0; } -int32_t tsdbTFileSetRangeArrayDestroy(TFileSetRangeArray** ppArr) { +int32_t tsdbTFileSetRangeArrayDestroy(TFileSetRangeArray **ppArr) { if (ppArr && ppArr[0]) { TARRAY2_DESTROY(ppArr[0], tsdbTFileSetRangeClear); taosMemoryFree(ppArr[0]); @@ -663,4 +663,4 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset) { int32_t tsdbTFileSetOpenChannel(STFileSet *fset) { if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) return 0; return vnodeAChannelInit(vnodeAsyncHandle[1], &fset->bgTaskChannel); -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c index e6b3cf8f54..6eec43baff 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c @@ -149,6 +149,7 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .skmTb = writer[0]->skmTb, .skmRow = writer[0]->skmRow, .bufArr = writer[0]->bufArr, + .lcn = config->lcn, }; for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { dataWriterConfig.files[ftype].exist = config->files[ftype].exist; diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h index 0a8049cded..640e9db5cc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h @@ -37,6 +37,7 @@ typedef struct { int64_t cid; SDiskID did; int32_t level; + int32_t lcn; struct { bool exist; STFile file; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c index 8cd9304188..792d94ce73 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -15,6 +15,7 @@ #include "tsdbFile2.h" #include "cos.h" +#include "vnd.h" // to_json static int32_t head_to_json(const STFile *file, cJSON *json); @@ -42,24 +43,9 @@ static const struct { [TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json}, }; -void remove_file(const char *fname, bool last_level) { - int32_t code = taosRemoveFile(fname); - if (code) { - if (tsS3Enabled && last_level) { - const char *object_name = taosDirEntryBaseName((char *)fname); - long s3_size = tsS3Enabled ? s3Size(object_name) : 0; - if (!strncmp(fname + strlen(fname) - 5, ".data", 5) && s3_size > 0) { - s3DeleteObjects(&object_name, 1); - tsdbInfo("file:%s is removed from s3", fname); - } else { - tsdbError("file:%s remove failed", fname); - } - } else { - tsdbError("file:%s remove failed", fname); - } - } else { - tsdbInfo("file:%s is removed", fname); - } +void remove_file(const char *fname) { + taosRemoveFile(fname); + tsdbInfo("file:%s is removed", fname); } static int32_t tfile_to_json(const STFile *file, cJSON *json) { @@ -73,6 +59,11 @@ static int32_t tfile_to_json(const STFile *file, cJSON *json) { return TSDB_CODE_OUT_OF_MEMORY; } + /* lcn - last chunk number */ + if (cJSON_AddNumberToObject(json, "lcn", file->lcn) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + /* fid */ if (cJSON_AddNumberToObject(json, "fid", file->fid) == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -121,6 +112,14 @@ static int32_t tfile_from_json(const cJSON *json, STFile *file) { return TSDB_CODE_FILE_CORRUPTED; } + /* lcn */ + item = cJSON_GetObjectItem(json, "lcn"); + if (cJSON_IsNumber(item)) { + file->lcn = item->valuedouble; + } else { + // return TSDB_CODE_FILE_CORRUPTED; + } + /* fid */ item = cJSON_GetObjectItem(json, "fid"); if (cJSON_IsNumber(item)) { @@ -236,7 +235,8 @@ int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) { fobj[0]->state = TSDB_FSTATE_LIVE; fobj[0]->ref = 1; tsdbTFileName(pTsdb, f, fobj[0]->fname); - fobj[0]->nlevel = tfsGetLevel(pTsdb->pVnode->pTfs); + // fobj[0]->nlevel = tfsGetLevel(pTsdb->pVnode->pTfs); + fobj[0]->nlevel = vnodeNodeId(pTsdb->pVnode); return 0; } @@ -258,7 +258,7 @@ int32_t tsdbTFileObjUnref(STFileObj *fobj) { tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); if (nRef == 0) { if (fobj->state == TSDB_FSTATE_DEAD) { - remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1); + remove_file(fobj->fname); } taosMemoryFree(fobj); } @@ -266,6 +266,61 @@ int32_t tsdbTFileObjUnref(STFileObj *fobj) { return 0; } +static void tsdbTFileObjRemoveLC(STFileObj *fobj, bool remove_all) { + if (fobj->f->type != TSDB_FTYPE_DATA) { + remove_file(fobj->fname); + return; + } + + if (!remove_all) { + if (fobj->f->lcn < 1) { + remove_file(fobj->fname); + return; + } else { + // remove local last chunk file + char lc_path[TSDB_FILENAME_LEN]; + tstrncpy(lc_path, fobj->fname, TSDB_FQDN_LEN); + + char *dot = strrchr(lc_path, '.'); + if (!dot) { + tsdbError("unexpected path: %s", lc_path); + return; + } + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - lc_path), "%d.data", fobj->f->lcn); + + remove_file(lc_path); + } + } else { + // delete by data file prefix + char lc_path[TSDB_FILENAME_LEN]; + tstrncpy(lc_path, fobj->fname, TSDB_FQDN_LEN); + + char *object_name = taosDirEntryBaseName(lc_path); + int32_t node_id = fobj->nlevel; + char object_name_prefix[TSDB_FILENAME_LEN]; + snprintf(object_name_prefix, TSDB_FQDN_LEN, "%d/%s", node_id, object_name); + + char *dot = strrchr(object_name_prefix, '.'); + if (!dot) { + tsdbError("unexpected path: %s", object_name_prefix); + return; + } + *(dot + 1) = 0; + + s3DeleteObjectsByPrefix(object_name_prefix); + + // remove local last chunk file + dot = strrchr(lc_path, '.'); + if (!dot) { + tsdbError("unexpected path: %s", lc_path); + return; + } + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - lc_path), "%d.data", fobj->f->lcn); + + remove_file(lc_path); + } +} + int32_t tsdbTFileObjRemove(STFileObj *fobj) { taosThreadMutexLock(&fobj->mutex); ASSERT(fobj->state == TSDB_FSTATE_LIVE && fobj->ref > 0); @@ -274,7 +329,21 @@ int32_t tsdbTFileObjRemove(STFileObj *fobj) { taosThreadMutexUnlock(&fobj->mutex); tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); if (nRef == 0) { - remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1); + tsdbTFileObjRemoveLC(fobj, true); + taosMemoryFree(fobj); + } + return 0; +} + +int32_t tsdbTFileObjRemoveUpdateLC(STFileObj *fobj) { + taosThreadMutexLock(&fobj->mutex); + ASSERT(fobj->state == TSDB_FSTATE_LIVE && fobj->ref > 0); + fobj->state = TSDB_FSTATE_DEAD; + int32_t nRef = --fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + if (nRef == 0) { + tsdbTFileObjRemoveLC(fobj, false); taosMemoryFree(fobj); } return 0; @@ -310,13 +379,45 @@ int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]) { return 0; } +int32_t tsdbTFileLastChunkName(STsdb *pTsdb, const STFile *f, char fname[]) { + SVnode *pVnode = pTsdb->pVnode; + STfs *pTfs = pVnode->pTfs; + + if (pTfs) { + snprintf(fname, // + TSDB_FILENAME_LEN, // + "%s%s%s%sv%df%dver%" PRId64 ".%d.%s", // + tfsGetDiskPath(pTfs, f->did), // + TD_DIRSEP, // + pTsdb->path, // + TD_DIRSEP, // + TD_VID(pVnode), // + f->fid, // + f->cid, // + f->lcn, // + g_tfile_info[f->type].suffix); + } else { + snprintf(fname, // + TSDB_FILENAME_LEN, // + "%s%sv%df%dver%" PRId64 ".%d.%s", // + pTsdb->path, // + TD_DIRSEP, // + TD_VID(pVnode), // + f->fid, // + f->cid, // + f->lcn, // + g_tfile_info[f->type].suffix); + } + return 0; +} + bool tsdbIsSameTFile(const STFile *f1, const STFile *f2) { if (f1->type != f2->type) return false; if (f1->did.level != f2->did.level) return false; if (f1->did.id != f2->did.id) return false; if (f1->fid != f2->fid) return false; if (f1->cid != f2->cid) return false; - if (f1->s3flag != f2->s3flag) return false; + if (f1->lcn != f2->lcn) return false; return true; } diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.h b/source/dnode/vnode/src/tsdb/tsdbFile2.h index b94f7a9fd0..70e167c1eb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.h @@ -45,6 +45,7 @@ enum { int32_t tsdbTFileToJson(const STFile *f, cJSON *json); int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f); int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]); +int32_t tsdbTFileLastChunkName(STsdb *pTsdb, const STFile *f, char fname[]); bool tsdbIsSameTFile(const STFile *f1, const STFile *f2); bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2); @@ -53,12 +54,13 @@ int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj); int32_t tsdbTFileObjRef(STFileObj *fobj); int32_t tsdbTFileObjUnref(STFileObj *fobj); int32_t tsdbTFileObjRemove(STFileObj *fobj); +int32_t tsdbTFileObjRemoveUpdateLC(STFileObj *fobj); int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2); struct STFile { tsdb_ftype_t type; SDiskID did; // disk id - int32_t s3flag; + int32_t lcn; // last chunk number int32_t fid; // file id int64_t cid; // commit id int64_t size; diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c index 7f33f08794..87f48acaac 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMerge.c +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -544,7 +544,7 @@ int32_t tsdbMerge(void *arg) { TSDB_CHECK_CODE(code, lino, _exit); if (merger->fset == NULL) return 0; - + /* bool skipMerge = false; { extern int8_t tsS3Enabled; @@ -565,7 +565,8 @@ int32_t tsdbMerge(void *arg) { if (mtime < now - tsS3UploadDelaySec) { skipMerge = true; } - } else /* if (s3Size(object_name) > 0) */ { + } else // if (s3Size(object_name) > 0) + { skipMerge = true; } } @@ -577,7 +578,7 @@ int32_t tsdbMerge(void *arg) { code = 0; goto _exit; } - + */ // do merge tsdbDebug("vgId:%d merge begin, fid:%d", TD_VID(tsdb->pVnode), merger->fid); code = tsdbDoMerge(merger); diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index babf8c75fb..c33f9c48d1 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -15,16 +15,48 @@ #include "cos.h" #include "tsdb.h" +#include "vnd.h" static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { int32_t code = 0; const char *path = pFD->path; int32_t szPage = pFD->szPage; int32_t flag = pFD->flag; + int64_t lc_size = 0; pFD->pFD = taosOpenFile(path, flag); if (pFD->pFD == NULL) { - int errsv = errno; + if (tsS3Enabled && pFD->lcn > 1 && !strncmp(path + strlen(path) - 5, ".data", 5)) { + char lc_path[TSDB_FILENAME_LEN]; + tstrncpy(lc_path, path, TSDB_FQDN_LEN); + + char *dot = strrchr(lc_path, '.'); + if (!dot) { + tsdbError("unexpected path: %s", lc_path); + code = TAOS_SYSTEM_ERROR(ENOENT); + goto _exit; + } + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - lc_path), "%d.data", pFD->lcn); + + pFD->pFD = taosOpenFile(lc_path, flag); + if (pFD->pFD == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + // taosMemoryFree(pFD); + goto _exit; + } + if (taosStatFile(lc_path, &lc_size, NULL, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + // taosCloseFile(&pFD->pFD); + // taosMemoryFree(pFD); + goto _exit; + } + } else { + tsdbInfo("no file: %s", path); + code = TAOS_SYSTEM_ERROR(errno); + // taosMemoryFree(pFD); + goto _exit; + } + /* const char *object_name = taosDirEntryBaseName((char *)path); long s3_size = 0; if (tsS3Enabled) { @@ -43,7 +75,6 @@ static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { s3Get(object_name, path); pFD->pFD = taosOpenFile(path, flag); - if (pFD->pFD == NULL) { code = TAOS_SYSTEM_ERROR(ENOENT); // taosMemoryFree(pFD); @@ -57,12 +88,7 @@ static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { pFD->objName = object_name; // pFD->szFile = s3_size; #endif - } else { - tsdbInfo("no file: %s", path); - code = TAOS_SYSTEM_ERROR(errsv); - // taosMemoryFree(pFD); - goto _exit; - } + */ } pFD->pBuf = taosMemoryCalloc(1, szPage); @@ -73,26 +99,33 @@ static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { goto _exit; } + if (lc_size > 0) { + SVnodeCfg *pCfg = &pFD->pTsdb->pVnode->config; + int64_t chunksize = (int64_t)pCfg->tsdbPageSize * pCfg->s3ChunkSize; + + pFD->szFile = lc_size + chunksize * (pFD->lcn - 1); + } + // not check file size when reading data files. - if (flag != TD_FILE_READ && !pFD->s3File) { - if (taosStatFile(path, &pFD->szFile, NULL, NULL) < 0) { + if (flag != TD_FILE_READ /* && !pFD->s3File*/) { + if (!lc_size && taosStatFile(path, &pFD->szFile, NULL, NULL) < 0) { code = TAOS_SYSTEM_ERROR(errno); // taosMemoryFree(pFD->pBuf); // taosCloseFile(&pFD->pFD); // taosMemoryFree(pFD); goto _exit; } - - ASSERT(pFD->szFile % szPage == 0); - pFD->szFile = pFD->szFile / szPage; } + ASSERT(pFD->szFile % szPage == 0); + pFD->szFile = pFD->szFile / szPage; + _exit: return code; } // =============== PAGE-WISE FILE =============== -int32_t tsdbOpenFile(const char *path, STsdb *pTsdb, int32_t flag, STsdbFD **ppFD) { +int32_t tsdbOpenFile(const char *path, STsdb *pTsdb, int32_t flag, STsdbFD **ppFD, int32_t lcn) { int32_t code = 0; STsdbFD *pFD = NULL; int32_t szPage = pTsdb->pVnode->config.tsdbPageSize; @@ -111,6 +144,7 @@ int32_t tsdbOpenFile(const char *path, STsdb *pTsdb, int32_t flag, STsdbFD **ppF pFD->flag = flag; pFD->szPage = szPage; pFD->pgno = 0; + pFD->lcn = lcn; pFD->pTsdb = pTsdb; *ppFD = pFD; @@ -123,9 +157,9 @@ void tsdbCloseFile(STsdbFD **ppFD) { STsdbFD *pFD = *ppFD; if (pFD) { taosMemoryFree(pFD->pBuf); - if (!pFD->s3File) { - taosCloseFile(&pFD->pFD); - } + // if (!pFD->s3File) { + taosCloseFile(&pFD->pFD); + //} taosMemoryFree(pFD); *ppFD = NULL; } @@ -141,12 +175,18 @@ static int32_t tsdbWriteFilePage(STsdbFD *pFD) { } } - if (pFD->s3File) { - tsdbWarn("%s file: %s", __func__, pFD->path); - return code; - } if (pFD->pgno > 0) { - int64_t n = taosLSeekFile(pFD->pFD, PAGE_OFFSET(pFD->pgno, pFD->szPage), SEEK_SET); + int64_t offset = PAGE_OFFSET(pFD->pgno, pFD->szPage); + if (pFD->lcn > 1) { + SVnodeCfg *pCfg = &pFD->pTsdb->pVnode->config; + int64_t chunksize = (int64_t)pCfg->tsdbPageSize * pCfg->s3ChunkSize; + int64_t chunkoffset = chunksize * (pFD->lcn - 1); + + offset -= chunkoffset; + } + ASSERT(offset >= 0); + + int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _exit; @@ -182,7 +222,15 @@ static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) { } int64_t offset = PAGE_OFFSET(pgno, pFD->szPage); + if (pFD->lcn > 1) { + SVnodeCfg *pCfg = &pFD->pTsdb->pVnode->config; + int64_t chunksize = (int64_t)pCfg->tsdbPageSize * pCfg->s3ChunkSize; + int64_t chunkoffset = chunksize * (pFD->lcn - 1); + offset -= chunkoffset; + } + ASSERT(offset >= 0); + /* if (pFD->s3File) { LRUHandle *handle = NULL; @@ -203,24 +251,25 @@ static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) { tsdbCacheRelease(pFD->pTsdb->bCache, handle); } else { - // seek - int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - - // read - n = taosReadFile(pFD->pFD, pFD->pBuf, pFD->szPage); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } else if (n < pFD->szPage) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _exit; - } + */ + // seek + int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; } + // read + n = taosReadFile(pFD->pFD, pFD->pBuf, pFD->szPage); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; + } else if (n < pFD->szPage) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + //} + // check if (pgno > 1 && !taosCheckChecksumWhole(pFD->pBuf, pFD->szPage)) { code = TSDB_CODE_FILE_CORRUPTED; @@ -294,6 +343,74 @@ _exit: return code; } +static int32_t tsdbReadFileBlock(STsdbFD *pFD, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) { + int32_t code = 0; + SVnodeCfg *pCfg = &pFD->pTsdb->pVnode->config; + int64_t chunksize = (int64_t)pCfg->tsdbPageSize * pCfg->s3ChunkSize; + int64_t cOffset = offset % chunksize; + int64_t n = 0; + + char *object_name = taosDirEntryBaseName(pFD->path); + char object_name_prefix[TSDB_FILENAME_LEN]; + int32_t node_id = vnodeNodeId(pFD->pTsdb->pVnode); + snprintf(object_name_prefix, TSDB_FQDN_LEN, "%d/%s", node_id, object_name); + + char *dot = strrchr(object_name_prefix, '.'); + if (!dot) { + tsdbError("unexpected path: %s", object_name_prefix); + code = TAOS_SYSTEM_ERROR(ENOENT); + goto _exit; + } + + char *buf = taosMemoryCalloc(1, size); + + for (int32_t chunkno = offset / chunksize + 1; n < size; ++chunkno) { + int64_t nRead = TMIN(chunksize - cOffset, size - n); + + if (chunkno >= pFD->lcn) { + // read last chunk + int64_t ret = taosLSeekFile(pFD->pFD, chunksize * (chunkno - pFD->lcn) + cOffset, SEEK_SET); + if (ret < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(buf); + goto _exit; + } + + ret = taosReadFile(pFD->pFD, buf + n, nRead); + if (ret < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(buf); + goto _exit; + } else if (ret < nRead) { + code = TSDB_CODE_FILE_CORRUPTED; + taosMemoryFree(buf); + goto _exit; + } + } else { + uint8_t *pBlock = NULL; + + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - object_name_prefix), "%d.data", chunkno); + + code = s3GetObjectBlock(object_name_prefix, cOffset, nRead, check, &pBlock); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFree(buf); + goto _exit; + } + + memcpy(buf + n, pBlock, nRead); + taosMemoryFree(pBlock); + } + + n += nRead; + cOffset = 0; + } + + *ppBlock = buf; + +_exit: + return code; +} + static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size, int64_t szHint) { int32_t code = 0; int64_t n = 0; @@ -356,15 +473,22 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 } int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; + /* code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock); if (code != TSDB_CODE_SUCCESS) { goto _exit; } - + */ + code = tsdbReadFileBlock(pFD, retrieve_offset, retrieve_size, 1, &pBlock); + if (code != TSDB_CODE_SUCCESS) { + goto _exit; + } // 3, Store Pages in Cache int nPage = pgnoEnd - pgno + 1; for (int i = 0; i < nPage; ++i) { - tsdbCacheSetPageS3(pFD->pTsdb->pgCache, pFD, pgno, pBlock + i * pFD->szPage); + if (pFD->szFile != pgno) { // DONOT cache last volatile page + tsdbCacheSetPageS3(pFD->pTsdb->pgCache, pFD, pgno, pBlock + i * pFD->szPage); + } if (szHint > 0 && n >= size) { ++pgno; @@ -404,7 +528,7 @@ int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size, } } - if (pFD->s3File && tsS3BlockSize < 0) { + if (pFD->lcn > 1 /*pFD->s3File && tsS3BlockSize < 0*/) { return tsdbReadFileS3(pFD, offset, pBuf, size, szHint); } else { return tsdbReadFileImp(pFD, offset, pBuf, size); @@ -416,11 +540,12 @@ _exit: int32_t tsdbFsyncFile(STsdbFD *pFD) { int32_t code = 0; - + /* if (pFD->s3File) { tsdbWarn("%s file: %s", __func__, pFD->path); return code; } + */ code = tsdbWriteFilePage(pFD); if (code) goto _exit; @@ -452,23 +577,23 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS // head tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); - code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->pHeadFD); + code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->pHeadFD, 0); TSDB_CHECK_CODE(code, lino, _exit); // data tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); - code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->pDataFD); + code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->pDataFD, 0); TSDB_CHECK_CODE(code, lino, _exit); // sma tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); - code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->pSmaFD); + code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->pSmaFD, 0); TSDB_CHECK_CODE(code, lino, _exit); // stt for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { tsdbSttFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSttF[iStt], fname); - code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->aSttFD[iStt]); + code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pReader->aSttFD[iStt], 0); TSDB_CHECK_CODE(code, lino, _exit); } @@ -875,7 +1000,7 @@ int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb pDelFReader->fDel = *pFile; tsdbDelFileName(pTsdb, pFile, fname); - code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pDelFReader->pReadH); + code = tsdbOpenFile(fname, pTsdb, TD_FILE_READ, &pDelFReader->pReadH, 0); if (code) { taosMemoryFree(pDelFReader); goto _exit; diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 9ecb2fd5ba..2e0b44f5f8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -38,24 +38,42 @@ static int32_t tsdbDoRemoveFileObject(SRTNer *rtner, const STFileObj *fobj) { return TARRAY2_APPEND(rtner->fopArr, op); } -static int32_t tsdbRemoveFileObjectS3(SRTNer *rtner, const STFileObj *fobj) { - int32_t code = 0, lino = 0; +static int32_t tsdbDoCopyFileLC(SRTNer *rtner, const STFileObj *from, const STFile *to) { + int32_t code = 0; + int32_t lino = 0; + TdFilePtr fdFrom = NULL, fdTo = NULL; + char fname_from[TSDB_FILENAME_LEN]; + char fname_to[TSDB_FILENAME_LEN]; - STFileOp op = { - .optype = TSDB_FOP_REMOVE, - .fid = fobj->f->fid, - .of = fobj->f[0], - }; + tsdbTFileLastChunkName(rtner->tsdb, from->f, fname_from); + tsdbTFileLastChunkName(rtner->tsdb, to, fname_to); - code = TARRAY2_APPEND(rtner->fopArr, op); + fdFrom = taosOpenFile(fname_from, TD_FILE_READ); + if (fdFrom == NULL) code = terrno; TSDB_CHECK_CODE(code, lino, _exit); - const char *object_name = taosDirEntryBaseName((char *)fobj->fname); - s3DeleteObjects(&object_name, 1); + tsdbInfo("vgId: %d, open tofile: %s size: %" PRId64, TD_VID(rtner->tsdb->pVnode), fname_to, from->f->size); + + fdTo = taosOpenFile(fname_to, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fdTo == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + SVnodeCfg *pCfg = &rtner->tsdb->pVnode->config; + int64_t chunksize = (int64_t)pCfg->tsdbPageSize * pCfg->s3ChunkSize; + int64_t lc_size = tsdbLogicToFileSize(to->size, rtner->szPage) - chunksize * (to->lcn - 1); + int64_t n = taosFSendFile(fdTo, fdFrom, 0, lc_size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); _exit: if (code) { TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + if (fdFrom) taosCloseFile(&fdFrom); + if (fdTo) taosCloseFile(&fdTo); } return code; } @@ -97,38 +115,11 @@ _exit: return code; } -static int32_t tsdbCopyFileS3(SRTNer *rtner, const STFileObj *from, const STFile *to) { - int32_t code = 0; - int32_t lino = 0; - - char fname[TSDB_FILENAME_LEN]; - TdFilePtr fdFrom = NULL; - // TdFilePtr fdTo = NULL; - - tsdbTFileName(rtner->tsdb, to, fname); - - fdFrom = taosOpenFile(from->fname, TD_FILE_READ); - if (fdFrom == NULL) code = terrno; - TSDB_CHECK_CODE(code, lino, _exit); - - char *object_name = taosDirEntryBaseName(fname); - code = s3PutObjectFromFile2(from->fname, object_name, 1); - TSDB_CHECK_CODE(code, lino, _exit); - - taosCloseFile(&fdFrom); - -_exit: - if (code) { - TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); - taosCloseFile(&fdFrom); - } - return code; -} - static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const SDiskID *did) { int32_t code = 0; int32_t lino = 0; STFileOp op = {0}; + int32_t lcn = fobj->f->lcn; // remove old op = (STFileOp){ @@ -153,6 +144,7 @@ static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const .maxVer = fobj->f->maxVer, .cid = fobj->f->cid, .size = fobj->f->size, + .lcn = lcn, .stt[0] = { .level = fobj->f->stt[0].level, @@ -164,59 +156,14 @@ static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const TSDB_CHECK_CODE(code, lino, _exit); // do copy the file - code = tsdbDoCopyFile(rtner, fobj, &op.nf); - TSDB_CHECK_CODE(code, lino, _exit); -_exit: - if (code) { - TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + if (lcn < 1) { + code = tsdbDoCopyFile(rtner, fobj, &op.nf); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDoCopyFileLC(rtner, fobj, &op.nf); + TSDB_CHECK_CODE(code, lino, _exit); } - return code; -} - -static int32_t tsdbMigrateDataFileS3(SRTNer *rtner, const STFileObj *fobj, const SDiskID *did) { - int32_t code = 0; - int32_t lino = 0; - STFileOp op = {0}; - - // remove old - op = (STFileOp){ - .optype = TSDB_FOP_REMOVE, - .fid = fobj->f->fid, - .of = fobj->f[0], - }; - - code = TARRAY2_APPEND(rtner->fopArr, op); - TSDB_CHECK_CODE(code, lino, _exit); - - // create new - op = (STFileOp){ - .optype = TSDB_FOP_CREATE, - .fid = fobj->f->fid, - .nf = - { - .type = fobj->f->type, - .did = did[0], - .fid = fobj->f->fid, - .minVer = fobj->f->minVer, - .maxVer = fobj->f->maxVer, - .cid = fobj->f->cid, - .size = fobj->f->size, - .stt[0] = - { - .level = fobj->f->stt[0].level, - }, - }, - }; - - op.nf.s3flag = true; - - code = TARRAY2_APPEND(rtner->fopArr, op); - TSDB_CHECK_CODE(code, lino, _exit); - - // do copy the file - code = tsdbCopyFileS3(rtner, fobj, &op.nf); - TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { @@ -327,38 +274,14 @@ static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) { for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = fset->farr[ftype], 1); ++ftype) { if (fobj == NULL) continue; - int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs); - if (fobj->f->did.level == did.level) { - if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && did.level == nlevel - 1 && - taosCheckExistFile(fobj->fname)) { - int32_t mtime = 0; - taosStatFile(fobj->fname, NULL, &mtime, NULL); - if (mtime < rtner->now - tsS3UploadDelaySec) { - tsdbInfo("file:%s size: %" PRId64 " do migrate s3", fobj->fname, fobj->f->size); - code = tsdbMigrateDataFileS3(rtner, fobj, &did); - TSDB_CHECK_CODE(code, lino, _exit); - } - } - + /* + code = tsdbCheckMigrateS3(rtner, fobj, ftype, &did); + TSDB_CHECK_CODE(code, lino, _exit); + */ continue; } - /* - if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && did.level == nlevel - 1) { - code = tsdbMigrateDataFileS3(rtner, fobj, &did); - TSDB_CHECK_CODE(code, lino, _exit); - } else { - if (tsS3Enabled) { - int64_t fsize = 0; - if (taosStatFile(fobj->fname, &fsize, NULL, NULL) < 0) { - code = TAOS_SYSTEM_ERROR(terrno); - tsdbError("vgId:%d %s failed since file:%s stat failed, reason:%s", TD_VID(rtner->tsdb->pVnode), - __func__, fobj->fname, tstrerror(code)); TSDB_CHECK_CODE(code, lino, _exit); - } - s3EvictCache(fobj->fname, fsize * 2); - } - */ if (fobj->f->did.level > did.level) { continue; } @@ -367,7 +290,6 @@ static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) { code = tsdbDoMigrateFileObj(rtner, fobj, &did); TSDB_CHECK_CODE(code, lino, _exit); - //} } // stt @@ -471,3 +393,414 @@ int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { return code; } + +static int32_t tsdbS3FidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int32_t s3KeepLocal, int64_t nowSec) { + int32_t localFid; + TSKEY key; + + if (pKeepCfg->precision == TSDB_TIME_PRECISION_MILLI) { + nowSec = nowSec * 1000; + } else if (pKeepCfg->precision == TSDB_TIME_PRECISION_MICRO) { + nowSec = nowSec * 1000000l; + } else if (pKeepCfg->precision == TSDB_TIME_PRECISION_NANO) { + nowSec = nowSec * 1000000000l; + } else { + ASSERT(0); + } + + nowSec = nowSec - pKeepCfg->keepTimeOffset * tsTickPerHour[pKeepCfg->precision]; + + key = nowSec - s3KeepLocal * tsTickPerMin[pKeepCfg->precision]; + localFid = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); + + if (fid >= localFid) { + return 0; + } else { + return 1; + } +} + +static int32_t tsdbCopyFileS3(SRTNer *rtner, const STFileObj *from, const STFile *to) { + int32_t code = 0; + int32_t lino = 0; + + char fname[TSDB_FILENAME_LEN]; + TdFilePtr fdFrom = NULL; + // TdFilePtr fdTo = NULL; + + tsdbTFileName(rtner->tsdb, to, fname); + + fdFrom = taosOpenFile(from->fname, TD_FILE_READ); + if (fdFrom == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + char *object_name = taosDirEntryBaseName(fname); + code = s3PutObjectFromFile2(from->fname, object_name, 1); + TSDB_CHECK_CODE(code, lino, _exit); + + taosCloseFile(&fdFrom); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + taosCloseFile(&fdFrom); + } + return code; +} + +static int32_t tsdbMigrateDataFileLCS3(SRTNer *rtner, const STFileObj *fobj, int64_t size, int64_t chunksize) { + int32_t code = 0; + int32_t lino = 0; + STFileOp op = {0}; + TdFilePtr fdFrom = NULL, fdTo = NULL; + int32_t lcn = fobj->f->lcn + (size - 1) / chunksize; + + // remove old + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // create new + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = fobj->f->fid, + .nf = + { + .type = fobj->f->type, + .did = fobj->f->did, + .fid = fobj->f->fid, + .minVer = fobj->f->minVer, + .maxVer = fobj->f->maxVer, + .cid = fobj->f->cid, + .size = fobj->f->size, + .lcn = lcn, + .stt[0] = + { + .level = fobj->f->stt[0].level, + }, + }, + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + char fname[TSDB_FILENAME_LEN]; + tsdbTFileName(rtner->tsdb, &op.nf, fname); + char *object_name = taosDirEntryBaseName(fname); + char object_name_prefix[TSDB_FILENAME_LEN]; + int32_t node_id = vnodeNodeId(rtner->tsdb->pVnode); + snprintf(object_name_prefix, TSDB_FQDN_LEN, "%d/%s", node_id, object_name); + + char *dot = strrchr(object_name_prefix, '.'); + if (!dot) { + tsdbError("vgId:%d, incorrect lcn: %d, %s at line %d", TD_VID(rtner->tsdb->pVnode), lcn, __func__, lino); + return -1; + } + + char *dot2 = strchr(object_name, '.'); + if (!dot) { + tsdbError("vgId:%d, incorrect lcn: %d, %s at line %d", TD_VID(rtner->tsdb->pVnode), lcn, __func__, lino); + return -1; + } + snprintf(dot2 + 1, TSDB_FQDN_LEN - (dot2 + 1 - object_name), "%d.data", fobj->f->lcn); + + // do copy the file + for (int32_t cn = fobj->f->lcn; cn < lcn; ++cn) { + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - object_name_prefix), "%d.data", cn); + int64_t c_offset = chunksize * (cn - fobj->f->lcn); + + code = s3PutObjectFromFileOffset(fname, object_name_prefix, c_offset, chunksize); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // copy last chunk + int64_t lc_offset = chunksize * (lcn - fobj->f->lcn); + int64_t lc_size = size - lc_offset; + + snprintf(dot2 + 1, TSDB_FQDN_LEN - (dot2 + 1 - object_name), "%d.data", fobj->f->lcn); + + fdFrom = taosOpenFile(fname, TD_FILE_READ); + if (fdFrom == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbInfo("vgId: %d, open lcfile: %s size: %" PRId64, TD_VID(rtner->tsdb->pVnode), fname, lc_size); + + snprintf(dot2 + 1, TSDB_FQDN_LEN - (dot2 + 1 - object_name), "%d.data", lcn); + fdTo = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fdTo == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + int64_t n = taosFSendFile(fdTo, fdFrom, &lc_offset, lc_size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + if (fdFrom) taosCloseFile(&fdFrom); + if (fdTo) taosCloseFile(&fdTo); + } + return code; +} + +static int32_t tsdbMigrateDataFileS3(SRTNer *rtner, const STFileObj *fobj, int64_t size, int64_t chunksize) { + int32_t code = 0; + int32_t lino = 0; + STFileOp op = {0}; + int32_t lcn = (size - 1) / chunksize + 1; + + // remove old + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // create new + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = fobj->f->fid, + .nf = + { + .type = fobj->f->type, + .did = fobj->f->did, + .fid = fobj->f->fid, + .minVer = fobj->f->minVer, + .maxVer = fobj->f->maxVer, + .cid = fobj->f->cid, + .size = fobj->f->size, + .lcn = lcn, + .stt[0] = + { + .level = fobj->f->stt[0].level, + }, + }, + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + char fname[TSDB_FILENAME_LEN]; + tsdbTFileName(rtner->tsdb, &op.nf, fname); + char *object_name = taosDirEntryBaseName(fname); + char object_name_prefix[TSDB_FILENAME_LEN]; + int32_t node_id = vnodeNodeId(rtner->tsdb->pVnode); + snprintf(object_name_prefix, TSDB_FQDN_LEN, "%d/%s", node_id, object_name); + + char *dot = strrchr(object_name_prefix, '.'); + if (!dot) { + tsdbError("vgId:%d, incorrect lcn: %d, %s at line %d", TD_VID(rtner->tsdb->pVnode), lcn, __func__, lino); + return -1; + } + + // do copy the file + for (int32_t cn = 1; cn < lcn; ++cn) { + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - object_name_prefix), "%d.data", cn); + int64_t c_offset = chunksize * (cn - 1); + + code = s3PutObjectFromFileOffset(fobj->fname, object_name_prefix, c_offset, chunksize); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // copy last chunk + TdFilePtr fdFrom = NULL, fdTo = NULL; + int64_t lc_offset = (int64_t)(lcn - 1) * chunksize; + int64_t lc_size = size - lc_offset; + + dot = strchr(object_name, '.'); + if (!dot) { + tsdbError("vgId:%d, incorrect lcn: %d, %s at line %d", TD_VID(rtner->tsdb->pVnode), lcn, __func__, lino); + return -1; + } + snprintf(dot + 1, TSDB_FQDN_LEN - (dot + 1 - object_name), "%d.data", lcn); + + fdFrom = taosOpenFile(fobj->fname, TD_FILE_READ); + if (fdFrom == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbInfo("vgId: %d, open lcfile: %s size: %" PRId64, TD_VID(rtner->tsdb->pVnode), fname, fobj->f->size); + + fdTo = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fdTo == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + int64_t n = taosFSendFile(fdTo, fdFrom, &lc_offset, lc_size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); + } + return code; +} + +static int32_t tsdbDoS3MigrateOnFileSet(SRTNer *rtner, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + STFileObj *fobj = fset->farr[TSDB_FTYPE_DATA]; + if (!fobj) return code; + + int32_t expLevel = tsdbFidLevel(fset->fid, &rtner->tsdb->keepCfg, rtner->now); + if (expLevel < 0) return code; // expired + + SVnodeCfg *pCfg = &rtner->tsdb->pVnode->config; + int32_t s3KeepLocal = pCfg->s3KeepLocal; + int32_t s3ExpLevel = tsdbS3FidLevel(fset->fid, &rtner->tsdb->keepCfg, s3KeepLocal, rtner->now); + if (s3ExpLevel < 1) return code; // keep on local storage + + int64_t chunksize = (int64_t)pCfg->tsdbPageSize * pCfg->s3ChunkSize; + int32_t lcn = fobj->f->lcn; + + if (lcn < 1 && taosCheckExistFile(fobj->fname)) { + int32_t mtime = 0; + int64_t size = 0; + taosStatFile(fobj->fname, &size, &mtime, NULL); + if (size > chunksize && mtime < rtner->now - tsS3UploadDelaySec) { + if (pCfg->s3Compact && lcn < 0) { + extern int32_t tsdbAsyncCompact(STsdb * tsdb, const STimeWindow *tw, bool sync); + + STimeWindow win = {0}; + tsdbFidKeyRange(fset->fid, rtner->tsdb->keepCfg.days, rtner->tsdb->keepCfg.precision, &win.skey, &win.ekey); + + tsdbInfo("vgId:%d, compact begin lcn: %d.", TD_VID(rtner->tsdb->pVnode), lcn); + tsdbAsyncCompact(rtner->tsdb, &win, pCfg->sttTrigger == 1); + tsdbInfo("vgId:%d, compact end lcn: %d.", TD_VID(rtner->tsdb->pVnode), lcn); + return code; + } + + code = tsdbMigrateDataFileS3(rtner, fobj, size, chunksize); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + if (lcn <= 1) { + tsdbError("vgId:%d, incorrect lcn: %d, %s at line %d", TD_VID(rtner->tsdb->pVnode), lcn, __func__, lino); + return code; + } + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileLastChunkName(rtner->tsdb, fobj->f, fname1); + + if (taosCheckExistFile(fname1)) { + int32_t mtime = 0; + int64_t size = 0; + taosStatFile(fname1, &size, &mtime, NULL); + if (size > chunksize && mtime < rtner->now - tsS3UploadDelaySec) { + code = tsdbMigrateDataFileLCS3(rtner, fobj, size, chunksize); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + tsdbError("vgId:%d, file: %s not found, %s at line %d", TD_VID(rtner->tsdb->pVnode), fname1, __func__, lino); + return code; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDoS3MigrateAsync(void *arg) { + int32_t code = 0; + int32_t lino = 0; + SRTNer rtner[1] = {0}; + + code = tsdbDoRetentionBegin(arg, rtner); + TSDB_CHECK_CODE(code, lino, _exit); + + STFileSet *fset; + TARRAY2_FOREACH(rtner->fsetArr, fset) { + if (fset->fid != ((SRtnArg *)arg)->fid) continue; + + code = tsdbDoS3MigrateOnFileSet(rtner, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDoRetentionEnd(rtner); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + if (TARRAY2_DATA(rtner->fopArr)) { + TARRAY2_DESTROY(rtner->fopArr, NULL); + } + TFileSetArray **fsetArr = &rtner->fsetArr; + if (fsetArr[0]) { + tsdbFSDestroyCopySnapshot(&rtner->fsetArr); + } + + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbS3Migrate(STsdb *tsdb, int64_t now, int32_t sync) { + int32_t code = 0; + + if (!tsS3Enabled) { + return code; + } + + taosThreadMutexLock(&tsdb->mutex); + + if (tsdb->bgTaskDisabled) { + taosThreadMutexUnlock(&tsdb->mutex); + return 0; + } + + STFileSet *fset; + TARRAY2_FOREACH(tsdb->pFS->fSetArr, fset) { + code = tsdbTFileSetOpenChannel(fset); + if (code) { + taosThreadMutexUnlock(&tsdb->mutex); + return code; + } + + SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); + if (arg == NULL) { + taosThreadMutexUnlock(&tsdb->mutex); + return TSDB_CODE_OUT_OF_MEMORY; + } + + arg->tsdb = tsdb; + arg->now = now; + arg->fid = fset->fid; + + if (sync) { + code = vnodeAsyncC(vnodeAsyncHandle[0], tsdb->pVnode->commitChannel, EVA_PRIORITY_LOW, tsdbDoS3MigrateAsync, + tsdbFreeRtnArg, arg, NULL); + } else { + code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_LOW, tsdbDoS3MigrateAsync, + tsdbFreeRtnArg, arg, NULL); + } + if (code) { + tsdbFreeRtnArg(arg); + taosThreadMutexUnlock(&tsdb->mutex); + return code; + } + } + + taosThreadMutexUnlock(&tsdb->mutex); + + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index f26c6540df..b2a091fbb3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -47,12 +47,12 @@ int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *con // open file if (fname) { - code = tsdbOpenFile(fname, config->tsdb, TD_FILE_READ, &reader[0]->fd); + code = tsdbOpenFile(fname, config->tsdb, TD_FILE_READ, &reader[0]->fd, 0); TSDB_CHECK_CODE(code, lino, _exit); } else { char fname1[TSDB_FILENAME_LEN]; tsdbTFileName(config->tsdb, config->file, fname1); - code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd); + code = tsdbOpenFile(fname1, config->tsdb, TD_FILE_READ, &reader[0]->fd, 0); TSDB_CHECK_CODE(code, lino, _exit); } @@ -642,7 +642,7 @@ static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) { char fname[TSDB_FILENAME_LEN]; tsdbTFileName(writer->config->tsdb, writer->file, fname); - code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd); + code = tsdbOpenFile(fname, writer->config->tsdb, flag, &writer->fd, 0); TSDB_CHECK_CODE(code, lino, _exit); uint8_t hdr[TSDB_FHDR_SIZE] = {0}; diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c index 876c0df4a0..b8872a0401 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -80,7 +80,7 @@ static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader * char fname[TSDB_FILENAME_LEN]; tsdbTFileName(tsdb, &file, fname); - code = tsdbOpenFile(fname, tsdb, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd); + code = tsdbOpenFile(fname, tsdb, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd, 0); TSDB_CHECK_CODE(code, lino, _exit); // convert @@ -258,7 +258,7 @@ static int32_t tsdbUpgradeSttFile(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReade code = tsdbTFileObjInit(tsdb, &file, &fobj); TSDB_CHECK_CODE(code, lino, _exit1); - code = tsdbOpenFile(fobj->fname, tsdb, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd); + code = tsdbOpenFile(fobj->fname, tsdb, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd, 0); TSDB_CHECK_CODE(code, lino, _exit1); for (int32_t iSttBlk = 0; iSttBlk < taosArrayGetSize(aSttBlk); iSttBlk++) { @@ -413,7 +413,7 @@ static int32_t tsdbUpgradeOpenTombFile(STsdb *tsdb, STFileSet *fset, STsdbFD **f } char fname[TSDB_FILENAME_LEN] = {0}; - code = tsdbOpenFile(fobj[0]->fname, tsdb, TD_FILE_READ | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_CREATE, fd); + code = tsdbOpenFile(fobj[0]->fname, tsdb, TD_FILE_READ | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_CREATE, fd, 0); TSDB_CHECK_CODE(code, lino, _exit); uint8_t hdr[TSDB_FHDR_SIZE] = {0}; diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 44621bf4e6..076db8b3ca 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -642,8 +642,8 @@ SColVal *tsdbRowIterNext(STSDBRowIter *pIter) { return &pIter->cv; } - if (pIter->iColData < pIter->pRow->pBlockData->nColData) { - tColDataGetValue(&pIter->pRow->pBlockData->aColData[pIter->iColData], pIter->pRow->iRow, &pIter->cv); + if (pIter->iColData <= pIter->pRow->pBlockData->nColData) { + tColDataGetValue(&pIter->pRow->pBlockData->aColData[pIter->iColData - 1], pIter->pRow->iRow, &pIter->cv); ++pIter->iColData; return &pIter->cv; } else { diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index 07bfa6c719..53edc4da76 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -51,6 +51,9 @@ const SVnodeCfg vnodeCfgDefault = {.vgId = -1, .hashEnd = 0, .hashMethod = 0, .sttTrigger = TSDB_DEFAULT_SST_TRIGGER, + .s3ChunkSize = TSDB_DEFAULT_S3_CHUNK_SIZE, + .s3KeepLocal = TSDB_DEFAULT_S3_KEEP_LOCAL, + .s3Compact = TSDB_DEFAULT_S3_COMPACT, .tsdbPageSize = TSDB_DEFAULT_PAGE_SIZE}; int vnodeCheckCfg(const SVnodeCfg *pCfg) { @@ -106,6 +109,9 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { if (tjsonAddIntegerToObject(pJson, "keep1", pCfg->tsdbCfg.keep1) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keep2", pCfg->tsdbCfg.keep2) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keepTimeOffset", pCfg->tsdbCfg.keepTimeOffset) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "s3ChunkSize", pCfg->s3ChunkSize) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "s3KeepLocal", pCfg->s3KeepLocal) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "s3Compact", pCfg->s3Compact) < 0) return -1; if (pCfg->tsdbCfg.retentions[0].keep > 0) { int32_t nRetention = 1; if (pCfg->tsdbCfg.retentions[1].freq > 0) { @@ -154,9 +160,8 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { SJson *nodeInfo = tjsonCreateArray(); if (nodeInfo == NULL) return -1; if (tjsonAddItemToObject(pJson, "syncCfg.nodeInfo", nodeInfo) < 0) return -1; - vDebug("vgId:%d, encode config, replicas:%d totalReplicas:%d selfIndex:%d changeVersion:%d", - pCfg->vgId, pCfg->syncCfg.replicaNum, - pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex, pCfg->syncCfg.changeVersion); + vDebug("vgId:%d, encode config, replicas:%d totalReplicas:%d selfIndex:%d changeVersion:%d", pCfg->vgId, + pCfg->syncCfg.replicaNum, pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex, pCfg->syncCfg.changeVersion); for (int i = 0; i < pCfg->syncCfg.totalReplicaNum; ++i) { SJson *info = tjsonCreateObject(); SNodeInfo *pNode = (SNodeInfo *)&pCfg->syncCfg.nodeInfo[i]; @@ -317,6 +322,19 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { pCfg->tsdbPageSize = TSDB_DEFAULT_TSDB_PAGESIZE * 1024; } + tjsonGetNumberValue(pJson, "s3ChunkSize", pCfg->s3ChunkSize, code); + if (code < 0) { + pCfg->s3ChunkSize = TSDB_DEFAULT_S3_CHUNK_SIZE; + } + tjsonGetNumberValue(pJson, "s3KeepLocal", pCfg->s3KeepLocal, code); + if (code < 0) { + pCfg->s3KeepLocal = TSDB_DEFAULT_S3_KEEP_LOCAL; + } + tjsonGetNumberValue(pJson, "s3Compact", pCfg->s3Compact, code); + if (code < 0) { + pCfg->s3Compact = TSDB_DEFAULT_S3_COMPACT; + } + return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 63ca7251f5..463077590e 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -305,14 +305,14 @@ int32_t vnodeRestoreVgroupId(const char *srcPath, const char *dstPath, int32_t s return dstVgId; } -void vnodeDestroy(int32_t vgId, const char *path, STfs *pTfs) { +void vnodeDestroy(int32_t vgId, const char *path, STfs *pTfs, int32_t nodeId) { vInfo("path:%s is removed while destroy vnode", path); tfsRmdir(pTfs, path); - int32_t nlevel = tfsGetLevel(pTfs); - if (vgId > 0 && nlevel > 1 && tsS3Enabled) { + // int32_t nlevel = tfsGetLevel(pTfs); + if (nodeId > 0 && vgId > 0 /*&& nlevel > 1*/ && tsS3Enabled) { char vnode_prefix[TSDB_FILENAME_LEN]; - snprintf(vnode_prefix, TSDB_FILENAME_LEN, "v%df", vgId); + snprintf(vnode_prefix, TSDB_FILENAME_LEN, "%d/v%df", nodeId, vgId); s3DeleteObjectsByPrefix(vnode_prefix); } } @@ -480,30 +480,29 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC vnodeRollback(pVnode); } - snprintf(pVnode->monitor.strClusterId, TSDB_CLUSTER_ID_LEN, "%"PRId64, pVnode->config.syncCfg.nodeInfo[0].clusterId); - snprintf(pVnode->monitor.strDnodeId, TSDB_NODE_ID_LEN, "%"PRId32, pVnode->config.syncCfg.nodeInfo[0].nodeId); - snprintf(pVnode->monitor.strVgId, TSDB_VGROUP_ID_LEN, "%"PRId32, pVnode->config.vgId); + snprintf(pVnode->monitor.strClusterId, TSDB_CLUSTER_ID_LEN, "%" PRId64, pVnode->config.syncCfg.nodeInfo[0].clusterId); + snprintf(pVnode->monitor.strDnodeId, TSDB_NODE_ID_LEN, "%" PRId32, pVnode->config.syncCfg.nodeInfo[0].nodeId); + snprintf(pVnode->monitor.strVgId, TSDB_VGROUP_ID_LEN, "%" PRId32, pVnode->config.vgId); - if(tsEnableMonitor && pVnode->monitor.insertCounter == NULL){ + if (tsEnableMonitor && pVnode->monitor.insertCounter == NULL) { taos_counter_t *counter = NULL; counter = taos_collector_registry_get_metric(VNODE_METRIC_SQL_COUNT); - if(counter == NULL){ - int32_t label_count = 7; - const char *sample_labels[] = {VNODE_METRIC_TAG_NAME_SQL_TYPE, VNODE_METRIC_TAG_NAME_CLUSTER_ID, - VNODE_METRIC_TAG_NAME_DNODE_ID, VNODE_METRIC_TAG_NAME_DNODE_EP, - VNODE_METRIC_TAG_NAME_VGROUP_ID, VNODE_METRIC_TAG_NAME_USERNAME, - VNODE_METRIC_TAG_NAME_RESULT}; - counter = taos_counter_new(VNODE_METRIC_SQL_COUNT, "counter for insert sql", - label_count, sample_labels); - vInfo("vgId:%d, new metric:%p",TD_VID(pVnode), counter); - if(taos_collector_registry_register_metric(counter) == 1){ + if (counter == NULL) { + int32_t label_count = 7; + const char *sample_labels[] = {VNODE_METRIC_TAG_NAME_SQL_TYPE, VNODE_METRIC_TAG_NAME_CLUSTER_ID, + VNODE_METRIC_TAG_NAME_DNODE_ID, VNODE_METRIC_TAG_NAME_DNODE_EP, + VNODE_METRIC_TAG_NAME_VGROUP_ID, VNODE_METRIC_TAG_NAME_USERNAME, + VNODE_METRIC_TAG_NAME_RESULT}; + counter = taos_counter_new(VNODE_METRIC_SQL_COUNT, "counter for insert sql", label_count, sample_labels); + vInfo("vgId:%d, new metric:%p", TD_VID(pVnode), counter); + if (taos_collector_registry_register_metric(counter) == 1) { taos_counter_destroy(counter); counter = taos_collector_registry_get_metric(VNODE_METRIC_SQL_COUNT); - vInfo("vgId:%d, get metric from registry:%p",TD_VID(pVnode), counter); + vInfo("vgId:%d, get metric from registry:%p", TD_VID(pVnode), counter); } } pVnode->monitor.insertCounter = counter; - vInfo("vgId:%d, succeed to set metric:%p",TD_VID(pVnode), counter); + vInfo("vgId:%d, succeed to set metric:%p", TD_VID(pVnode), counter); } return pVnode; diff --git a/source/dnode/vnode/src/vnd/vnodeRetention.c b/source/dnode/vnode/src/vnd/vnodeRetention.c index c510c0fe92..5db20b8fc7 100644 --- a/source/dnode/vnode/src/vnd/vnodeRetention.c +++ b/source/dnode/vnode/src/vnd/vnodeRetention.c @@ -23,4 +23,12 @@ int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) { if (TSDB_CODE_SUCCESS == code) code = smaRetention(pVnode->pSma, now); return code; -} \ No newline at end of file +} + +int32_t vnodeDoS3Migrate(SVnode *pVnode, int64_t now) { + int32_t code = TSDB_CODE_SUCCESS; + + code = tsdbS3Migrate(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); + + return code; +} diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index e32d4b70e0..2c2c64873b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -31,15 +31,16 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, static int32_t vnodeProcessAlterTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, SRpcMsg *pOriginRpc); -static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, - SRpcMsg *pOriginalMsg); +static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginalMsg); static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfigReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); -static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, - SRpcMsg *pOriginalMsg); +static int32_t vnodeProcessS3MigrateReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginalMsg); static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateIndexReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropIndexReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); @@ -540,6 +541,9 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg case TDMT_VND_TRIM: if (vnodeProcessTrimReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; break; + case TDMT_VND_S3MIGRATE: + if (vnodeProcessS3MigrateReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; + break; case TDMT_VND_CREATE_SMA: if (vnodeProcessCreateTSmaReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; break; @@ -846,6 +850,26 @@ _exit: return code; } +extern int32_t vnodeDoS3Migrate(SVnode *pVnode, int64_t now); + +static int32_t vnodeProcessS3MigrateReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { + int32_t code = 0; + SVS3MigrateDbReq s3migrateReq = {0}; + + // decode + if (tDeserializeSVS3MigrateDbReq(pReq, len, &s3migrateReq) != 0) { + code = TSDB_CODE_INVALID_MSG; + goto _exit; + } + + vInfo("vgId:%d, s3migrate vnode request will be processed, time:%d", pVnode->config.vgId, s3migrateReq.timestamp); + + code = vnodeDoS3Migrate(pVnode, s3migrateReq.timestamp); + +_exit: + return code; +} + static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { SVDropTtlTableReq ttlReq = {0}; if (tDeserializeSVDropTtlTableReq(pReq, len, &ttlReq) != 0) { @@ -1489,8 +1513,8 @@ static int32_t vnodeRebuildSubmitReqMsg(SSubmitReq2 *pSubmitReq, void **ppMsg) { return code; } -static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, - SRpcMsg *pOriginalMsg) { +static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginalMsg) { int32_t code = 0; terrno = 0; @@ -1709,10 +1733,14 @@ _exit: atomic_add_fetch_64(&pVnode->statis.nInsertSuccess, pSubmitRsp->affectedRows); atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1); - if(tsEnableMonitor && pSubmitRsp->affectedRows > 0 && strlen(pOriginalMsg->info.conn.user) > 0){ - const char *sample_labels[] = {VNODE_METRIC_TAG_VALUE_INSERT_AFFECTED_ROWS, pVnode->monitor.strClusterId, - pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, - pOriginalMsg->info.conn.user, "Success"}; + if (tsEnableMonitor && pSubmitRsp->affectedRows > 0 && strlen(pOriginalMsg->info.conn.user) > 0) { + const char *sample_labels[] = {VNODE_METRIC_TAG_VALUE_INSERT_AFFECTED_ROWS, + pVnode->monitor.strClusterId, + pVnode->monitor.strDnodeId, + tsLocalEp, + pVnode->monitor.strVgId, + pOriginalMsg->info.conn.user, + "Success"}; taos_counter_add(pVnode->monitor.insertCounter, pSubmitRsp->affectedRows, sample_labels); } @@ -1725,14 +1753,14 @@ _exit: atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1); code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len); - const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_INSERT, pVnode->monitor.strClusterId, - pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, + const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_INSERT, pVnode->monitor.strClusterId, + pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, pOriginalMsg->info.conn.user, "Success"}; taos_counter_inc(pVnode->monitor.insertCounter, batch_sample_labels); } else{ - const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_INSERT, pVnode->monitor.strClusterId, - pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, + const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_INSERT, pVnode->monitor.strClusterId, + pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, pOriginalMsg->info.conn.user, "Failed"}; taos_counter_inc(pVnode->monitor.insertCounter, batch_sample_labels); } @@ -1845,11 +1873,13 @@ static int32_t vnodeProcessAlterConfigReq(SVnode *pVnode, int64_t ver, void *pRe } vInfo("vgId:%d, start to alter vnode config, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64 - " cacheLast:%d cacheLastSize:%d days:%d keep0:%d keep1:%d keep2:%d keepTimeOffset:%d fsync:%d level:%d " + " cacheLast:%d cacheLastSize:%d days:%d keep0:%d keep1:%d keep2:%d keepTimeOffset:%d s3KeepLocal:%d " + "s3Compact:%d fsync:%d level:%d " "walRetentionPeriod:%d walRetentionSize:%d", TD_VID(pVnode), req.pages, req.pageSize, req.buffer, req.pageSize * 1024, (uint64_t)req.buffer * 1024 * 1024, req.cacheLast, req.cacheLastSize, req.daysPerFile, req.daysToKeep0, req.daysToKeep1, req.daysToKeep2, - req.keepTimeOffset, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, req.walRetentionSize); + req.keepTimeOffset, req.s3KeepLocal, req.s3Compact, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, + req.walRetentionSize); if (pVnode->config.cacheLastSize != req.cacheLastSize) { pVnode->config.cacheLastSize = req.cacheLastSize; @@ -1933,6 +1963,13 @@ static int32_t vnodeProcessAlterConfigReq(SVnode *pVnode, int64_t ver, void *pRe pVnode->config.tsdbCfg.minRows = req.minRows; } + if (req.s3KeepLocal != -1 && req.s3KeepLocal != pVnode->config.s3KeepLocal) { + pVnode->config.s3KeepLocal = req.s3KeepLocal; + } + if (req.s3Compact != -1 && req.s3Compact != pVnode->config.s3Compact) { + pVnode->config.s3Compact = req.s3Compact; + } + if (walChanged) { walAlter(pVnode->pWal, &pVnode->config.walCfg); } @@ -1992,8 +2029,8 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe return 0; } -static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, - SRpcMsg *pOriginalMsg) { +static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginalMsg) { int32_t code = 0; SDecoder *pCoder = &(SDecoder){0}; SDeleteRes *pRes = &(SDeleteRes){0}; @@ -2038,14 +2075,14 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in _err: /* if(code == TSDB_CODE_SUCCESS){ - const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_DELETE, pVnode->monitor.strClusterId, - pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, + const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_DELETE, pVnode->monitor.strClusterId, + pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, pOriginalMsg->info.conn.user, "Success"}; taos_counter_inc(pVnode->monitor.insertCounter, batch_sample_labels); } else{ - const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_DELETE, pVnode->monitor.strClusterId, - pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, + const char *batch_sample_labels[] = {VNODE_METRIC_TAG_VALUE_DELETE, pVnode->monitor.strClusterId, + pVnode->monitor.strDnodeId, tsLocalEp, pVnode->monitor.strVgId, pOriginalMsg->info.conn.user, "Failed"}; taos_counter_inc(pVnode->monitor.insertCounter, batch_sample_labels); } @@ -2123,4 +2160,5 @@ static int32_t vnodeProcessConfigChangeReq(SVnode *pVnode, int64_t ver, void *pR int32_t vnodeProcessCompactVnodeReqImpl(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { return 0; } +int32_t tsdbAsyncCompact(STsdb *tsdb, const STimeWindow *tw, bool sync); #endif