From 2dc480adf632360694843fd181665308cf1d85e8 Mon Sep 17 00:00:00 2001 From: lichuang Date: Fri, 29 Oct 2021 14:31:21 +0800 Subject: [PATCH 01/94] [TD-10645][raft]sync manager --- include/libs/sync/sync.h | 2 +- source/libs/sync/inc/{raftInt.h => raft.h} | 18 ++- source/libs/sync/inc/syncInt.h | 57 +++++++++ source/libs/sync/src/sync.c | 130 ++++++++++++++++++++- 4 files changed, 189 insertions(+), 18 deletions(-) rename source/libs/sync/inc/{raftInt.h => raft.h} (74%) create mode 100644 source/libs/sync/inc/syncInt.h diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 9ffd74c229..f9d348d77e 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -152,7 +152,7 @@ int32_t syncPropose(SSyncNode* syncNode, SSyncBuffer buffer, void* pData, bool i // int32_t syncRemoveNode(SSyncNode syncNode, const SNodeInfo *pNode); -extern int32_t syncDebugFlag; +extern int32_t sDebugFlag; #ifdef __cplusplus } diff --git a/source/libs/sync/inc/raftInt.h b/source/libs/sync/inc/raft.h similarity index 74% rename from source/libs/sync/inc/raftInt.h rename to source/libs/sync/inc/raft.h index 75c1c2187f..78c0c97ed6 100644 --- a/source/libs/sync/inc/raftInt.h +++ b/source/libs/sync/inc/raft.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 TAOS Data, Inc. + * Copyright (c) 2019 TAOS Data, Inc. * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 @@ -13,15 +13,11 @@ * along with this program. If not, see . */ -#ifndef _TD_RAFT_INT_H_ -#define _TD_RAFT_INT_H_ +#ifndef _TD_LIBS_SYNC_RAFT_H +#define _TD_LIBS_SYNC_RAFT_H -#ifdef __cplusplus -extern "C" { -#endif +typedef struct SSyncRaft { + +} SSyncRaft; -#ifdef __cplusplus -} -#endif - -#endif /*_TD_RAFT_INT_H_*/ \ No newline at end of file +#endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h new file mode 100644 index 0000000000..33cbd836a1 --- /dev/null +++ b/source/libs/sync/inc/syncInt.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_INT_H +#define _TD_LIBS_SYNC_INT_H + +#include "thash.h" +#include "os.h" +#include "sync.h" +#include "raft.h" +#include "tlog.h" + +#define TAOS_SYNC_MAX_WORKER 3 + +typedef struct SSyncWorker { + pthread_t thread; +} SSyncWorker; + +struct SSyncNode { + pthread_mutex_t mutex; + SyncGroupId vgId; + SSyncRaft raft; +}; + +typedef struct SSyncManager { + pthread_mutex_t mutex; + + // worker threads + SSyncWorker worker[TAOS_SYNC_MAX_WORKER]; + + // vgroup hash table + SHashObj* vgroupTable; + +} SSyncManager; + +extern SSyncManager* gSyncManager; + +#define syncFatal(...) do { if (sDebugFlag & DEBUG_FATAL) { taosPrintLog("SYNC FATAL ", 255, __VA_ARGS__); }} while(0) +#define syncError(...) do { if (sDebugFlag & DEBUG_ERROR) { taosPrintLog("SYNC ERROR ", 255, __VA_ARGS__); }} while(0) +#define syncWarn(...) do { if (sDebugFlag & DEBUG_WARN) { taosPrintLog("SYNC WARN ", 255, __VA_ARGS__); }} while(0) +#define syncInfo(...) do { if (sDebugFlag & DEBUG_INFO) { taosPrintLog("SYNC ", 255, __VA_ARGS__); }} while(0) +#define syncDebug(...) do { if (sDebugFlag & DEBUG_DEBUG) { taosPrintLog("SYNC ", sDebugFlag, __VA_ARGS__); }} while(0) +#define syncTrace(...) do { if (sDebugFlag & DEBUG_TRACE) { taosPrintLog("SYNC ", sDebugFlag, __VA_ARGS__); }} while(0) + +#endif /* _TD_LIBS_SYNC_INT_H */ \ No newline at end of file diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index 879f2d4f6d..a974a17ad2 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -13,14 +13,132 @@ * along with this program. If not, see . */ -#include "sync.h" +#include "syncInt.h" -int32_t syncInit() { return 0; } +SSyncManager* gSyncManager = NULL; -void syncCleanUp() {} +static int syncOpenWorkerPool(SSyncManager* syncManager); +static int syncCloseWorkerPool(SSyncManager* syncManager); +static void *syncWorkerMain(void *argv); -SSyncNode* syncStart(const SSyncInfo* pInfo) { return NULL; } +int32_t syncInit() { + if (gSyncManager != NULL) { + return 0; + } -void syncStop(const SSyncNode* pNode) {} + gSyncManager = (SSyncManager*)malloc(sizeof(SSyncManager)); + if (gSyncManager == NULL) { + syncError("malloc SSyncManager fail"); + return -1; + } -void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {} \ No newline at end of file + pthread_mutex_init(&gSyncManager->mutex, NULL); + // init worker pool + if (syncOpenWorkerPool(gSyncManager) != 0) { + syncCleanUp(); + return -1; + } + + // init vgroup hash table + gSyncManager->vgroupTable = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); + if (gSyncManager->vgroupTable == NULL) { + syncCleanUp(); + return -1; + } + return 0; +} + +void syncCleanUp() { + if (gSyncManager == NULL) { + return; + } + pthread_mutex_lock(&gSyncManager->mutex); + if (gSyncManager->vgroupTable) { + taosHashCleanup(gSyncManager->vgroupTable); + } + syncCloseWorkerPool(gSyncManager); + pthread_mutex_unlock(&gSyncManager->mutex); + pthread_mutex_destroy(&gSyncManager->mutex); + free(gSyncManager); + gSyncManager = NULL; +} + +SSyncNode* syncStart(const SSyncInfo* pInfo) { + pthread_mutex_lock(&gSyncManager->mutex); + + SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId)); + if (ppNode != NULL) { + syncInfo("vgroup %d already exist", pInfo->vgId); + pthread_mutex_unlock(&gSyncManager->mutex); + return *ppNode; + } + + SSyncNode *pNode = (SSyncNode*)malloc(sizeof(SSyncNode)); + if (pNode == NULL) { + syncInfo("malloc vgroup %d node fail", pInfo->vgId); + pthread_mutex_unlock(&gSyncManager->mutex); + return NULL; + } + + pthread_mutex_init(&pNode->mutex, NULL); + + taosHashPut(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId), &pNode, sizeof(SSyncNode *)); + + pthread_mutex_unlock(&gSyncManager->mutex); + return NULL; +} + +void syncStop(const SSyncNode* pNode) { + pthread_mutex_lock(&gSyncManager->mutex); + + SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId)); + if (ppNode == NULL) { + syncInfo("vgroup %d not exist", pNode->vgId); + pthread_mutex_unlock(&gSyncManager->mutex); + return; + } + assert(*ppNode == pNode); + + taosHashRemove(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId)); + pthread_mutex_unlock(&gSyncManager->mutex); + + pthread_mutex_destroy(&pNode->mutex); + free(*ppNode); +} + +void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {} + +static int syncOpenWorkerPool(SSyncManager* syncManager) { + int i; + pthread_attr_t thattr; + + pthread_attr_init(&thattr); + pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_JOINABLE); + + for (i = 0; i < TAOS_SYNC_MAX_WORKER; ++i) { + SSyncWorker* pWorker = &(syncManager->worker[i]); + + if (pthread_create(&(pWorker->thread), &thattr, (void *)syncWorkerMain, pWorker) != 0) { + syncError("failed to create sync worker since %s", strerror(errno)); + + return -1; + } + } + + pthread_attr_destroy(&thattr); + + return 0; +} + +static int syncCloseWorkerPool(SSyncManager* syncManager) { + return 0; +} + +static void *syncWorkerMain(void *argv) { + SSyncWorker* pWorker = (SSyncWorker *)argv; + + taosBlockSIGPIPE(); + setThreadName("syncWorker"); + + return NULL; +} \ No newline at end of file From c319d1cb12840088e0c60fc0db54b92ef0bd17a4 Mon Sep 17 00:00:00 2001 From: lichuang Date: Fri, 29 Oct 2021 16:05:25 +0800 Subject: [PATCH 02/94] [TD-10645][raft]add raft module --- include/libs/sync/sync.h | 7 ++- source/libs/sync/inc/raft.h | 12 ++++- source/libs/sync/inc/raft_message.h | 76 +++++++++++++++++++++++++++++ source/libs/sync/inc/syncInt.h | 3 ++ source/libs/sync/src/raft.c | 74 ++++++++++++++++++++++++++++ source/libs/sync/src/raft_message.c | 17 +++++++ source/libs/sync/src/sync.c | 21 +++++++- 7 files changed, 206 insertions(+), 4 deletions(-) create mode 100644 source/libs/sync/inc/raft_message.h create mode 100644 source/libs/sync/src/raft.c create mode 100644 source/libs/sync/src/raft_message.c diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index f9d348d77e..1c228675bd 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -89,6 +89,10 @@ typedef struct SSyncLogStore { // write log with given index int32_t (*logWrite)(struct SSyncLogStore* logStore, SyncIndex index, SSyncBuffer* pBuf); + // read log from given index with limit, return the actual num in nBuf + int32_t (*logRead)(struct SSyncLogStore* logStore, SyncIndex index, int limit, + SSyncBuffer* pBuf, int* nBuf); + // mark log with given index has been commtted int32_t (*logCommit)(struct SSyncLogStore* logStore, SyncIndex index); @@ -102,6 +106,7 @@ typedef struct SSyncLogStore { typedef struct SSyncServerState { SyncNodeId voteFor; SSyncTerm term; + SyncIndex commitIndex; } SSyncServerState; typedef struct SSyncClusterConfig { @@ -146,7 +151,7 @@ SSyncNode* syncStart(const SSyncInfo*); void syncReconfig(const SSyncNode*, const SSyncCluster*); void syncStop(const SSyncNode*); -int32_t syncPropose(SSyncNode* syncNode, SSyncBuffer buffer, void* pData, bool isWeak); +int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, bool isWeak); // int32_t syncAddNode(SSyncNode syncNode, const SNodeInfo *pNode); diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 78c0c97ed6..0df46db3fc 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -16,8 +16,18 @@ #ifndef _TD_LIBS_SYNC_RAFT_H #define _TD_LIBS_SYNC_RAFT_H +#include "sync.h" +#include "raft_message.h" + typedef struct SSyncRaft { - + // owner sync node + SSyncNode* pNode; + + SSyncInfo info; + } SSyncRaft; +int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); +int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg); + #endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h new file mode 100644 index 0000000000..cb0552500a --- /dev/null +++ b/source/libs/sync/inc/raft_message.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_RAFT_MESSAGE_H +#define _TD_LIBS_SYNC_RAFT_MESSAGE_H + +#include "sync.h" + +/** + * below define message type which handled by Raft node thread + * internal message, which communicate in threads, start with RAFT_MSG_INTERNAL_*, + * internal message use pointer only, need not to be decode/encode + * outter message start with RAFT_MSG_*, need to implement its decode/encode functions + **/ +typedef enum RaftMessageType { + // client propose a cmd + RAFT_MSG_INTERNAL_PROP = 1, + + RAFT_MSG_APPEND, + RAFT_MSG_APPEND_RESP, + + RAFT_MSG_VOTE, + RAFT_MSG_VOTE_RESP, + + RAFT_MSG_PRE_VOTE, + RAFT_MSG_PRE_VOTE_RESP, + +} RaftMessageType; + +typedef struct RaftMsgInternal_Prop { + const SSyncBuffer *pBuf; + bool isWeak; + void* pData; +} RaftMsgInternal_Prop; + +typedef struct RaftMessage { + RaftMessageType msgType; + SSyncTerm term; + SyncNodeId from; + SyncNodeId to; + + union { + RaftMsgInternal_Prop propose; + }; +} RaftMessage; + +static FORCE_INLINE RaftMessage* syncInitPropMsg(RaftMessage* pMsg, const SSyncBuffer* pBuf, void* pData, bool isWeak) { + *pMsg = (RaftMessage) { + .msgType = RAFT_MSG_INTERNAL_PROP, + .propose = (RaftMsgInternal_Prop) { + .isWeak = isWeak, + .pBuf = pBuf, + .pData = pData, + }, + }; + + return pMsg; +} + +static FORCE_INLINE bool syncIsInternalMsg(const RaftMessage* pMsg) { + return pMsg->msgType == RAFT_MSG_INTERNAL_PROP; +} + +#endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 33cbd836a1..c1c3ed17a8 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -40,6 +40,9 @@ typedef struct SSyncManager { // worker threads SSyncWorker worker[TAOS_SYNC_MAX_WORKER]; + // sync net worker + SSyncWorker netWorker; + // vgroup hash table SHashObj* vgroupTable; diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c new file mode 100644 index 0000000000..109b08902a --- /dev/null +++ b/source/libs/sync/src/raft.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "raft.h" +#include "syncInt.h" + +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#endif + +#define RAFT_READ_LOG_MAX_NUM 100 + +int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { + SSyncNode* pNode = pRaft->pNode; + SSyncServerState serverState; + SStateManager* stateManager; + SSyncLogStore* logStore; + SSyncFSM* fsm; + SyncIndex initIndex = pInfo->snapshotIndex; + SSyncBuffer buffer[RAFT_READ_LOG_MAX_NUM]; + int nBuf, limit, i; + + memcpy(&pRaft->info, pInfo, sizeof(SSyncInfo)); + stateManager = &(pRaft->info.stateManager); + logStore = &(pRaft->info.logStore); + fsm = &(pRaft->info.fsm); + + // read server state + if (stateManager->readServerState(stateManager, &serverState) != 0) { + syncError("readServerState for vgid %d fail", pInfo->vgId); + return -1; + } + assert(initIndex <= serverState.commitIndex); + + // restore fsm state from snapshot index + 1, until commitIndex + ++initIndex; + while (initIndex < serverState.commitIndex) { + limit = MIN(RAFT_READ_LOG_MAX_NUM, serverState.commitIndex - initIndex); + + if (logStore->logRead(logStore, initIndex, limit, buffer, &nBuf) != 0) { + return -1; + } + assert(limit == nBuf); + + for (i = 0; i < limit; ++i) { + fsm->applyLog(fsm, initIndex + i, &(buffer[i]), NULL); + free(buffer[i].data); + } + initIndex += nBuf; + } + assert(initIndex == serverState.commitIndex); + + syncInfo("restore vgid %d state: snapshot index:", pInfo->vgId); + return 0; +} + +int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg) { + if (!syncIsInternalMsg(pMsg)) { + free(pMsg); + } + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_message.c b/source/libs/sync/src/raft_message.c new file mode 100644 index 0000000000..d35efce9db --- /dev/null +++ b/source/libs/sync/src/raft_message.c @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "raft_message.h" + diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index a974a17ad2..e627cf8bc1 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -75,7 +75,15 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) { SSyncNode *pNode = (SSyncNode*)malloc(sizeof(SSyncNode)); if (pNode == NULL) { - syncInfo("malloc vgroup %d node fail", pInfo->vgId); + syncError("malloc vgroup %d node fail", pInfo->vgId); + pthread_mutex_unlock(&gSyncManager->mutex); + return NULL; + } + + // start raft + pNode->raft.pNode = pNode; + if (syncRaftStart(&pNode->raft, pInfo) != 0) { + syncError("raft start at %d node fail", pInfo->vgId); pthread_mutex_unlock(&gSyncManager->mutex); return NULL; } @@ -102,10 +110,19 @@ void syncStop(const SSyncNode* pNode) { taosHashRemove(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId)); pthread_mutex_unlock(&gSyncManager->mutex); - pthread_mutex_destroy(&pNode->mutex); + pthread_mutex_destroy(&((*ppNode)->mutex)); free(*ppNode); } +int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, bool isWeak) { + RaftMessage msg; + + pthread_mutex_lock(&syncNode->mutex); + int32_t ret = syncRaftStep(&syncNode->raft, syncInitPropMsg(&msg, pBuf, pData, isWeak)); + pthread_mutex_unlock(&syncNode->mutex); + return ret; +} + void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {} static int syncOpenWorkerPool(SSyncManager* syncManager) { From 5b7261d63fab335351123b0c6b025f28aa48c9fb Mon Sep 17 00:00:00 2001 From: lichuang Date: Fri, 29 Oct 2021 17:09:25 +0800 Subject: [PATCH 03/94] [TD-10645][raft]add sync node timer --- source/libs/sync/inc/raft.h | 1 + source/libs/sync/inc/raft_message.h | 2 ++ source/libs/sync/inc/syncInt.h | 5 +++++ source/libs/sync/src/raft.c | 8 ++++--- source/libs/sync/src/raft_message.c | 5 +++++ source/libs/sync/src/sync.c | 33 ++++++++++++++++++++++++++++- 6 files changed, 50 insertions(+), 4 deletions(-) diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 0df46db3fc..f81040658e 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -29,5 +29,6 @@ typedef struct SSyncRaft { int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg); +int32_t syncRaftTick(SSyncRaft* pRaft); #endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index cb0552500a..faf14840c9 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -73,4 +73,6 @@ static FORCE_INLINE bool syncIsInternalMsg(const RaftMessage* pMsg) { return pMsg->msgType == RAFT_MSG_INTERNAL_PROP; } +void syncFreeMessage(const RaftMessage* pMsg); + #endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index c1c3ed17a8..81cb686781 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -30,8 +30,10 @@ typedef struct SSyncWorker { struct SSyncNode { pthread_mutex_t mutex; + int32_t refCount; SyncGroupId vgId; SSyncRaft raft; + void* syncTimer; }; typedef struct SSyncManager { @@ -46,6 +48,9 @@ typedef struct SSyncManager { // vgroup hash table SHashObj* vgroupTable; + // timer manager + void* syncTimerManager; + } SSyncManager; extern SSyncManager* gSyncManager; diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 109b08902a..23442803c4 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -67,8 +67,10 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg) { - if (!syncIsInternalMsg(pMsg)) { - free(pMsg); - } + syncFreeMessage(pMsg); + return 0; +} + +int32_t syncRaftTick(SSyncRaft* pRaft) { return 0; } \ No newline at end of file diff --git a/source/libs/sync/src/raft_message.c b/source/libs/sync/src/raft_message.c index d35efce9db..912314daf2 100644 --- a/source/libs/sync/src/raft_message.c +++ b/source/libs/sync/src/raft_message.c @@ -15,3 +15,8 @@ #include "raft_message.h" +void syncFreeMessage(const RaftMessage* pMsg) { + if (!syncIsInternalMsg(pMsg)) { + free((RaftMessage*)pMsg); + } +} \ No newline at end of file diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index e627cf8bc1..a9df02f818 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -14,12 +14,16 @@ */ #include "syncInt.h" +#include "ttimer.h" SSyncManager* gSyncManager = NULL; +#define SYNC_TICK_TIMER 50 + static int syncOpenWorkerPool(SSyncManager* syncManager); static int syncCloseWorkerPool(SSyncManager* syncManager); static void *syncWorkerMain(void *argv); +static void syncNodeTick(void *param, void *tmrId); int32_t syncInit() { if (gSyncManager != NULL) { @@ -33,6 +37,14 @@ int32_t syncInit() { } pthread_mutex_init(&gSyncManager->mutex, NULL); + + // init sync timer manager + gSyncManager->syncTimerManager = taosTmrInit(1000, 50, 10000, "SYNC"); + if (gSyncManager->syncTimerManager == NULL) { + syncCleanUp(); + return -1; + } + // init worker pool if (syncOpenWorkerPool(gSyncManager) != 0) { syncCleanUp(); @@ -56,6 +68,7 @@ void syncCleanUp() { if (gSyncManager->vgroupTable) { taosHashCleanup(gSyncManager->vgroupTable); } + taosTmrCleanUp(gSyncManager->syncTimerManager); syncCloseWorkerPool(gSyncManager); pthread_mutex_unlock(&gSyncManager->mutex); pthread_mutex_destroy(&gSyncManager->mutex); @@ -80,6 +93,8 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) { return NULL; } + pNode->syncTimer = taosTmrStart(syncNodeTick, SYNC_TICK_TIMER, (void*)pInfo->vgId, gSyncManager->syncTimerManager); + // start raft pNode->raft.pNode = pNode; if (syncRaftStart(&pNode->raft, pInfo) != 0) { @@ -106,7 +121,8 @@ void syncStop(const SSyncNode* pNode) { return; } assert(*ppNode == pNode); - + taosTmrStop(pNode->syncTimer); + taosHashRemove(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId)); pthread_mutex_unlock(&gSyncManager->mutex); @@ -158,4 +174,19 @@ static void *syncWorkerMain(void *argv) { setThreadName("syncWorker"); return NULL; +} + +static void syncNodeTick(void *param, void *tmrId) { + SyncGroupId vgId = (SyncGroupId)param; + SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &vgId, sizeof(SyncGroupId)); + if (ppNode == NULL) { + return; + } + SSyncNode *pNode = *ppNode; + + pthread_mutex_lock(&pNode->mutex); + syncRaftTick(&pNode->raft); + pthread_mutex_unlock(&pNode->mutex); + + pNode->syncTimer = taosTmrStart(syncNodeTick, SYNC_TICK_TIMER, (void*)pNode->vgId, gSyncManager->syncTimerManager); } \ No newline at end of file From e779eed956acbcd47d531653ea72f1722ec327bf Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 10:33:53 +0800 Subject: [PATCH 04/94] more --- include/common/trequest.h | 31 +++++++++ include/server/vnode/meta/impl/metaImpl.h | 58 +++++++++++++++++ include/server/vnode/meta/meta.h | 74 +++++----------------- source/dnode/vnode/meta/src/metaMain.c | 12 ++-- source/dnode/vnode/meta/test/metaTests.cpp | 6 +- 5 files changed, 114 insertions(+), 67 deletions(-) create mode 100644 include/common/trequest.h create mode 100644 include/server/vnode/meta/impl/metaImpl.h diff --git a/include/common/trequest.h b/include/common/trequest.h new file mode 100644 index 0000000000..d9e5bf9a92 --- /dev/null +++ b/include/common/trequest.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TREQUEST_H_ +#define _TD_TREQUEST_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SRequest SRequest; +typedef struct SReqBatch SReqBatch; +typedef struct SReqBatchIter SReqBatchIter; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TREQUEST_H_*/ \ No newline at end of file diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h new file mode 100644 index 0000000000..1e2bf944ec --- /dev/null +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_IMPL_H_ +#define _TD_META_IMPL_H_ + +#include "os.h" + +#include "taosmsg.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef uint64_t tb_uid_t; + +typedef enum { META_INIT_TABLE = 0, META_SUPER_TABLE = 1, META_CHILD_TABLE = 2, META_NORMAL_TABLE = 3 } EMetaTableT; +typedef struct SSuperTableOpts { + tb_uid_t uid; + STSchema *pSchema; // (ts timestamp, a int) + STSchema *pTagSchema; // (tag1 binary(10), tag2 int) +} SSuperTableOpts; + +typedef struct SChildTableOpts { + tb_uid_t suid; // super table uid + SKVRow tags; // tag value of the child table +} SChildTableOpts; + +typedef struct SNormalTableOpts { + STSchema *pSchema; +} SNormalTableOpts; + +struct STableOptions { + int8_t type; + char * name; + union { + SSuperTableOpts superOpts; + SChildTableOpts childOpts; + SNormalTableOpts normalOpts; + }; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_IMPL_H_*/ \ No newline at end of file diff --git a/include/server/vnode/meta/meta.h b/include/server/vnode/meta/meta.h index ae81f995d7..264e8716cb 100644 --- a/include/server/vnode/meta/meta.h +++ b/include/server/vnode/meta/meta.h @@ -16,81 +16,37 @@ #ifndef _TD_META_H_ #define _TD_META_H_ -#include "taosmsg.h" - -#include "os.h" +#include "impl/metaImpl.h" #ifdef __cplusplus extern "C" { #endif -/* ------------------------ APIs Exposed ------------------------ */ - // Types exported -typedef uint64_t tb_uid_t; -typedef struct SMeta SMeta; -typedef struct SMetaOpts SMetaOpts; -typedef struct SMetaQueryHandle SMetaQueryHandle; -typedef struct SMetaQueryOpts SMetaQueryOpts; -typedef struct STableOpts STableOpts; +typedef struct SMeta SMeta; +typedef struct SMetaOptions SMetaOptions; +typedef struct STableOptions STableOptions; // SMeta operations -int metaCreate(const char *path); -void metaDestroy(const char *path); -SMeta *metaOpen(SMetaOpts *); +SMeta *metaOpen(const char *path, const SMetaOptions *); void metaClose(SMeta *); -int metaCreateTable(SMeta *, const STableOpts *); -int metaDropTable(SMeta *, uint64_t tuid_t); -int metaAlterTable(SMeta *, void *); +void metaRemove(const char *path); +int metaCreateTable(SMeta *pMeta, const STableOptions *); +int metaDropTable(SMeta *pMeta, tb_uid_t uid); int metaCommit(SMeta *); // Options -SMetaOpts *metaOptionsCreate(); -void metaOptionsDestroy(SMetaOpts *); -void metaOptionsSetCache(SMetaOpts *, size_t capacity); - -// SMetaQueryHandle -SMetaQueryHandle *metaQueryHandleCreate(SMetaQueryOpts *); -void metaQueryHandleDestroy(SMetaQueryHandle *); - -// SMetaQueryOpts -SMetaQueryOpts *metaQueryOptionsCreate(); -void metaQueryOptionsDestroy(SMetaQueryOpts *); +SMetaOptions *metaOptionsCreate(); +void metaOptionsDestroy(SMetaOptions *); +void metaOptionsSetCache(SMetaOptions *, size_t capacity); // STableOpts #define META_TABLE_OPTS_DECLARE(name) STableOpts name = {0} -void metaNormalTableOptsInit(STableOpts *, const char *name, const STSchema *pSchema); -void metaSuperTableOptsInit(STableOpts *, const char *name, tb_uid_t uid, const STSchema *pSchema, +void metaNormalTableOptsInit(STableOptions *, const char *name, const STSchema *pSchema); +void metaSuperTableOptsInit(STableOptions *, const char *name, tb_uid_t uid, const STSchema *pSchema, const STSchema *pTagSchema); -void metaChildTableOptsInit(STableOpts *, const char *name, tb_uid_t suid, const SKVRow tags); -void metaTableOptsClear(STableOpts *); - -/* ------------------------ Impl should hidden ------------------------ */ -typedef enum { META_INIT_TABLE = 0, META_SUPER_TABLE = 1, META_CHILD_TABLE = 2, META_NORMAL_TABLE = 3 } EMetaTableT; -typedef struct SSuperTableOpts { - tb_uid_t uid; - STSchema *pSchema; // (ts timestamp, a int) - STSchema *pTagSchema; // (tag1 binary(10), tag2 int) -} SSuperTableOpts; - -typedef struct SChildTableOpts { - tb_uid_t suid; // super table uid - SKVRow tags; // tag value of the child table -} SChildTableOpts; - -typedef struct SNormalTableOpts { - STSchema *pSchema; -} SNormalTableOpts; - -struct STableOpts { - int8_t type; - char * name; - union { - SSuperTableOpts superOpts; - SChildTableOpts childOpts; - SNormalTableOpts normalOpts; - }; -}; +void metaChildTableOptsInit(STableOptions *, const char *name, tb_uid_t suid, const SKVRow tags); +void metaTableOptsClear(STableOptions *); #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index 4efcd67908..8844055a98 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -21,7 +21,7 @@ static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTa static int metaCreateChildTable(SMeta *pMeta, const char *tbname, const SChildTableOpts *pChildTableOpts); static int metaCreateNormalTable(SMeta *pMeta, const char *tbname, const SNormalTableOpts *pNormalTableOpts); -SMeta *metaOpen(SMetaOpts *pMetaOpts) { +SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOpts) { SMeta *pMeta = NULL; pMeta = (SMeta *)calloc(1, sizeof(*pMeta)); @@ -81,7 +81,7 @@ void metaClose(SMeta *pMeta) { } } -int metaCreateTable(SMeta *pMeta, const STableOpts *pTableOpts) { +int metaCreateTable(SMeta *pMeta, const STableOptions *pTableOpts) { size_t vallen; char * pUid; @@ -213,13 +213,13 @@ static int metaCreateNormalTable(SMeta *pMeta, const char *tbname, const SNormal return 0; } -void metaNormalTableOptsInit(STableOpts *pTableOpts, const char *name, const STSchema *pSchema) { +void metaNormalTableOptsInit(STableOptions *pTableOpts, const char *name, const STSchema *pSchema) { pTableOpts->type = META_NORMAL_TABLE; pTableOpts->name = strdup(name); pTableOpts->normalOpts.pSchema = tdDupSchema(pSchema); } -void metaSuperTableOptsInit(STableOpts *pTableOpts, const char *name, tb_uid_t uid, const STSchema *pSchema, +void metaSuperTableOptsInit(STableOptions *pTableOpts, const char *name, tb_uid_t uid, const STSchema *pSchema, const STSchema *pTagSchema) { pTableOpts->type = META_SUPER_TABLE; pTableOpts->name = strdup(name); @@ -228,14 +228,14 @@ void metaSuperTableOptsInit(STableOpts *pTableOpts, const char *name, tb_uid_t u pTableOpts->superOpts.pTagSchema = tdDupSchema(pTagSchema); } -void metaChildTableOptsInit(STableOpts *pTableOpts, const char *name, tb_uid_t suid, const SKVRow tags) { +void metaChildTableOptsInit(STableOptions *pTableOpts, const char *name, tb_uid_t suid, const SKVRow tags) { pTableOpts->type = META_CHILD_TABLE; pTableOpts->name = strdup(name); pTableOpts->childOpts.suid = suid; pTableOpts->childOpts.tags = tdKVRowDup(tags); } -void metaTableOptsClear(STableOpts *pTableOpts) { +void metaTableOptsClear(STableOptions *pTableOpts) { switch (pTableOpts->type) { case META_NORMAL_TABLE: tfree(pTableOpts->name); diff --git a/source/dnode/vnode/meta/test/metaTests.cpp b/source/dnode/vnode/meta/test/metaTests.cpp index 727d44f341..49d6b99c9f 100644 --- a/source/dnode/vnode/meta/test/metaTests.cpp +++ b/source/dnode/vnode/meta/test/metaTests.cpp @@ -4,6 +4,7 @@ #include "meta.h" +#if 0 static STSchema *metaGetSimpleSchema() { STSchema * pSchema = NULL; STSchemaBuilder sb = {0}; @@ -38,7 +39,7 @@ static SKVRow metaGetSimpleTags() { TEST(MetaTest, DISABLED_meta_create_1m_normal_tables_test) { // Open Meta - SMeta *meta = metaOpen(NULL); + SMeta *meta = metaOpen(NULL, NULL); std::cout << "Meta is opened!" << std::endl; // Create 1000000 normal tables @@ -100,4 +101,5 @@ TEST(MetaTest, meta_create_1m_child_tables_test) { // Destroy Meta metaDestroy("meta"); std::cout << "Meta is destroyed!" << std::endl; -} \ No newline at end of file +} +#endif \ No newline at end of file From fca35ceb29ee8556474812abf7ce25c4e5c16a19 Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 2 Nov 2021 10:49:23 +0800 Subject: [PATCH 05/94] [TD-10645][raft]add sync rpc client and server --- source/libs/sync/CMakeLists.txt | 1 + source/libs/sync/inc/syncInt.h | 11 +++- source/libs/sync/src/sync.c | 106 +++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 5 deletions(-) diff --git a/source/libs/sync/CMakeLists.txt b/source/libs/sync/CMakeLists.txt index 124f4a1fee..37ee5194c8 100644 --- a/source/libs/sync/CMakeLists.txt +++ b/source/libs/sync/CMakeLists.txt @@ -4,6 +4,7 @@ add_library(sync ${SYNC_SRC}) target_link_libraries( sync PUBLIC common + PUBLIC transport PUBLIC util PUBLIC wal ) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 81cb686781..73015e87a1 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -39,12 +39,17 @@ struct SSyncNode { typedef struct SSyncManager { pthread_mutex_t mutex; + // sync server rpc + void* serverRpc; + // rpc server hash table base on FQDN:port key + SHashObj* rpcServerTable; + + // sync client rpc + void* clientRpc; + // worker threads SSyncWorker worker[TAOS_SYNC_MAX_WORKER]; - // sync net worker - SSyncWorker netWorker; - // vgroup hash table SHashObj* vgroupTable; diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index a9df02f818..e3d0606c08 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -14,12 +14,20 @@ */ #include "syncInt.h" +#include "trpc.h" #include "ttimer.h" SSyncManager* gSyncManager = NULL; #define SYNC_TICK_TIMER 50 +#define SYNC_ACTIVITY_TIMER 5 +#define SYNC_SERVER_WORKER 2 +static void syncProcessRsp(SRpcMsg *pMsg, SRpcEpSet *pEpSet); +static void syncProcessReqMsg(SRpcMsg *pMsg, SRpcEpSet *pEpSet); + +static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg); +static int syncInitRpcClient(SSyncManager* syncManager); static int syncOpenWorkerPool(SSyncManager* syncManager); static int syncCloseWorkerPool(SSyncManager* syncManager); static void *syncWorkerMain(void *argv); @@ -30,7 +38,7 @@ int32_t syncInit() { return 0; } - gSyncManager = (SSyncManager*)malloc(sizeof(SSyncManager)); + gSyncManager = (SSyncManager*)calloc(sizeof(SSyncManager), 0); if (gSyncManager == NULL) { syncError("malloc SSyncManager fail"); return -1; @@ -38,6 +46,12 @@ int32_t syncInit() { pthread_mutex_init(&gSyncManager->mutex, NULL); + // init client rpc + if (syncInitRpcClient(gSyncManager) != 0) { + syncCleanUp(); + return -1; + } + // init sync timer manager gSyncManager->syncTimerManager = taosTmrInit(1000, 50, 10000, "SYNC"); if (gSyncManager->syncTimerManager == NULL) { @@ -68,7 +82,13 @@ void syncCleanUp() { if (gSyncManager->vgroupTable) { taosHashCleanup(gSyncManager->vgroupTable); } - taosTmrCleanUp(gSyncManager->syncTimerManager); + if (gSyncManager->clientRpc) { + rpcClose(gSyncManager->clientRpc); + syncInfo("sync inter-sync rpc client is closed"); + } + if (gSyncManager->syncTimerManager) { + taosTmrCleanUp(gSyncManager->syncTimerManager); + } syncCloseWorkerPool(gSyncManager); pthread_mutex_unlock(&gSyncManager->mutex); pthread_mutex_destroy(&gSyncManager->mutex); @@ -86,6 +106,12 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) { return *ppNode; } + // init rpc server + if (syncInitRpcServer(gSyncManager, &pInfo->syncCfg) != 0) { + pthread_mutex_unlock(&gSyncManager->mutex); + return NULL; + } + SSyncNode *pNode = (SSyncNode*)malloc(sizeof(SSyncNode)); if (pNode == NULL) { syncError("malloc vgroup %d node fail", pInfo->vgId); @@ -141,6 +167,82 @@ int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, b void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {} +// process rpc rsp message from other sync server +static void syncProcessRsp(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { + +} + +// process rpc message from other sync server +static void syncProcessReqMsg(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { + +} + +static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg) { + if (gSyncManager->rpcServerTable == NULL) { + gSyncManager->rpcServerTable = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (gSyncManager->rpcServerTable == NULL) { + syncError("init sync rpc server hash table error"); + return -1; + } + } + assert(pSyncCfg->selfIndex < pSyncCfg->replica && pSyncCfg->selfIndex >= 0); + const SNodeInfo* pNode = &(pSyncCfg->nodeInfo[pSyncCfg->replica]); + char buffer[20] = {'\0'}; + snprintf(buffer, sizeof(buffer), "%s:%d", &(pNode->nodeFqdn[0]), pNode->nodePort); + size_t len = strlen(buffer); + void** ppRpcServer = taosHashGet(gSyncManager->rpcServerTable, buffer, len); + if (ppRpcServer != NULL) { + // already inited + syncInfo("sync rpc server for %s already exist", buffer); + return 0; + } + + SRpcInit rpcInit; + memset(&rpcInit, 0, sizeof(rpcInit)); + rpcInit.localPort = pNode->nodePort; + rpcInit.label = "sync-server"; + rpcInit.numOfThreads = SYNC_SERVER_WORKER; + rpcInit.cfp = syncProcessReqMsg; + rpcInit.sessions = TSDB_MAX_VNODES << 4; + rpcInit.connType = TAOS_CONN_SERVER; + rpcInit.idleTime = SYNC_ACTIVITY_TIMER * 1000; + + void* rpcServer = rpcOpen(&rpcInit); + if (rpcServer == NULL) { + syncInfo("rpcOpen for sync rpc server for %s fail", buffer); + return -1; + } + + taosHashPut(gSyncManager->rpcServerTable, buffer, strlen(buffer), rpcServer, len); + syncInfo("sync rpc server for %s init success", buffer); + + return 0; +} + +static int syncInitRpcClient(SSyncManager* syncManager) { + char secret[TSDB_KEY_LEN] = "secret"; + SRpcInit rpcInit; + memset(&rpcInit, 0, sizeof(rpcInit)); + rpcInit.label = "sync-client"; + rpcInit.numOfThreads = 1; + rpcInit.cfp = syncProcessRsp; + rpcInit.sessions = TSDB_MAX_VNODES << 4; + rpcInit.connType = TAOS_CONN_CLIENT; + rpcInit.idleTime = SYNC_ACTIVITY_TIMER * 1000; + rpcInit.user = "t"; + rpcInit.ckey = "key"; + rpcInit.secret = secret; + + syncManager->clientRpc = rpcOpen(&rpcInit); + if (syncManager->clientRpc == NULL) { + syncError("failed to init sync rpc client"); + return -1; + } + + syncInfo("sync inter-sync rpc client is initialized"); + return 0; +} + static int syncOpenWorkerPool(SSyncManager* syncManager) { int i; pthread_attr_t thattr; From 40d58ce1a2d94f1a816e15adb534e6f81d7357fc Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 13:57:16 +0800 Subject: [PATCH 06/94] more --- include/server/vnode/meta/impl/metaImpl.h | 4 +++ include/server/vnode/meta/meta.h | 5 ++-- source/dnode/vnode/meta/src/metaCommit.c | 14 +++++++++++ source/dnode/vnode/meta/src/metaOptions.c | 30 +++++++++++++++++++++++ source/dnode/vnode/meta/src/metaTable.c | 14 +++++++++++ 5 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 source/dnode/vnode/meta/src/metaCommit.c create mode 100644 source/dnode/vnode/meta/src/metaOptions.c create mode 100644 source/dnode/vnode/meta/src/metaTable.c diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h index 1e2bf944ec..a9d70500cb 100644 --- a/include/server/vnode/meta/impl/metaImpl.h +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -25,6 +25,10 @@ extern "C" { #endif typedef uint64_t tb_uid_t; +struct SMetaOptions { + size_t lruCacheSize; // LRU cache size +}; + typedef enum { META_INIT_TABLE = 0, META_SUPER_TABLE = 1, META_CHILD_TABLE = 2, META_NORMAL_TABLE = 3 } EMetaTableT; typedef struct SSuperTableOpts { tb_uid_t uid; diff --git a/include/server/vnode/meta/meta.h b/include/server/vnode/meta/meta.h index 264e8716cb..f1d18e27ff 100644 --- a/include/server/vnode/meta/meta.h +++ b/include/server/vnode/meta/meta.h @@ -36,9 +36,8 @@ int metaDropTable(SMeta *pMeta, tb_uid_t uid); int metaCommit(SMeta *); // Options -SMetaOptions *metaOptionsCreate(); -void metaOptionsDestroy(SMetaOptions *); -void metaOptionsSetCache(SMetaOptions *, size_t capacity); +void metaOptionsInit(SMetaOptions *); +void metaOptionsClear(SMetaOptions *); // STableOpts #define META_TABLE_OPTS_DECLARE(name) STableOpts name = {0} diff --git a/source/dnode/vnode/meta/src/metaCommit.c b/source/dnode/vnode/meta/src/metaCommit.c new file mode 100644 index 0000000000..f2f48bbc8a --- /dev/null +++ b/source/dnode/vnode/meta/src/metaCommit.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ diff --git a/source/dnode/vnode/meta/src/metaOptions.c b/source/dnode/vnode/meta/src/metaOptions.c new file mode 100644 index 0000000000..646ac873b1 --- /dev/null +++ b/source/dnode/vnode/meta/src/metaOptions.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "meta.h" + +const static SMetaOptions defaultMetaOptions = {.lruCacheSize = 0}; + +static void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc); + +/* ------------------------ EXPOSED METHODS ------------------------ */ +void metaOptionsInit(SMetaOptions *pMetaOptions) { metaOptionsCopy(pMetaOptions, &defaultMetaOptions); } + +void metaOptionsClear(SMetaOptions *pMetaOptions) { + // TODO +} + +/* ------------------------ STATIC METHODS ------------------------ */ +static void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc) { memcpy(pDest, pSrc, sizeof(*pSrc)); } \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaTable.c b/source/dnode/vnode/meta/src/metaTable.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/meta/src/metaTable.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file From e9bf4fceb057819994e36541d7fd354bb332a150 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 15:06:56 +0800 Subject: [PATCH 07/94] refact --- include/os/osDir.h | 2 +- source/dnode/vnode/meta/inc/metaDB.h | 39 +++++++ source/dnode/vnode/meta/inc/metaDef.h | 14 +-- source/dnode/vnode/meta/inc/metaOptions.h | 33 ++++++ source/dnode/vnode/meta/inc/metaTbOptions.h | 27 +++++ source/dnode/vnode/meta/inc/metaUid.h | 13 +-- source/dnode/vnode/meta/src/metaDB.c | 14 +++ source/dnode/vnode/meta/src/metaMain.c | 113 ++++++++++++-------- source/dnode/vnode/meta/src/metaOptions.c | 10 +- source/dnode/vnode/meta/src/metaTbOptions.c | 14 +++ source/dnode/vnode/meta/src/metaUid.c | 4 +- source/dnode/vnode/meta/test/CMakeLists.txt | 48 ++++----- source/os/src/osDir.c | 2 +- 13 files changed, 239 insertions(+), 94 deletions(-) create mode 100644 source/dnode/vnode/meta/inc/metaDB.h create mode 100644 source/dnode/vnode/meta/inc/metaOptions.h create mode 100644 source/dnode/vnode/meta/inc/metaTbOptions.h create mode 100644 source/dnode/vnode/meta/src/metaDB.c create mode 100644 source/dnode/vnode/meta/src/metaTbOptions.c diff --git a/include/os/osDir.h b/include/os/osDir.h index 3ee3be2c10..8aefaa171a 100644 --- a/include/os/osDir.h +++ b/include/os/osDir.h @@ -22,7 +22,7 @@ extern "C" { void taosRemoveDir(const char *dirname); bool taosDirExist(char *dirname); -bool taosMkDir(char *dirname); +bool taosMkDir(const char *dirname); void taosRemoveOldFiles(char *dirname, int32_t keepDays); bool taosExpandDir(char *dirname, char *outname, int32_t maxlen); bool taosRealPath(char *dirname, int32_t maxlen); diff --git a/source/dnode/vnode/meta/inc/metaDB.h b/source/dnode/vnode/meta/inc/metaDB.h new file mode 100644 index 0000000000..299b8e0350 --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaDB.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_DB_H_ +#define _TD_META_DB_H_ + +#include "meta.h" +#include "tkv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SMetaDB { + STkvDb * pDB; + STkvDb * pIdx; + STkvCache *pCache; +} SMetaDB; + +int metaOpenDB(SMeta *pMeta); +void metaCloseDB(SMeta *pMeta); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_DB_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index f5d0b7f74c..ef0cc9e4c2 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -16,22 +16,18 @@ #ifndef _TD_META_DEF_H_ #define _TD_META_DEF_H_ +#include "metaDB.h" #include "metaUid.h" -#include "tkv.h" #ifdef __cplusplus extern "C" { #endif struct SMeta { - STableUidGenerator uidGenerator; - - STkvDb* tableDb; // uid->table obj - STkvDb* tbnameDb; // tbname --> uid - STkvDb* schemaDb; // uid+version --> schema - STkvDb* tagDb; // uid --> tag - STkvDb* tagIdx; // TODO: need to integrate lucene or our own - // STkvCache* metaCache; // TODO: add a global cache here + char* path; // path of current meta + STbUidGenerator uidGenerator; // meta table UID generator + SMetaDB* pMetaDB; // meta DB for real storage engine + SMetaOptions options; // meta option }; #ifdef __cplusplus diff --git a/source/dnode/vnode/meta/inc/metaOptions.h b/source/dnode/vnode/meta/inc/metaOptions.h new file mode 100644 index 0000000000..7033a873df --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaOptions.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_OPTIONS_H_ +#define _TD_META_OPTIONS_H_ + +#include "meta.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const SMetaOptions defaultMetaOptions; + +int metaValidateOptions(const SMetaOptions *); +void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc); +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_OPTIONS_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/inc/metaTbOptions.h b/source/dnode/vnode/meta/inc/metaTbOptions.h new file mode 100644 index 0000000000..cd2b2ee0e8 --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaTbOptions.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_TABLE_OPTIONS_H_ +#define _TD_META_TABLE_OPTIONS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_TABLE_OPTIONS_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/inc/metaUid.h b/source/dnode/vnode/meta/inc/metaUid.h index 37c3fac6ba..86f4e26cec 100644 --- a/source/dnode/vnode/meta/inc/metaUid.h +++ b/source/dnode/vnode/meta/inc/metaUid.h @@ -23,21 +23,18 @@ extern "C" { #endif /* ------------------------ APIS EXPOSED ------------------------ */ -typedef struct STableUidGenerator STableUidGenerator; +typedef struct STbUidGenerator { + tb_uid_t nextUid; +} STbUidGenerator; // tb_uid_t #define IVLD_TB_UID 0 -tb_uid_t generateUid(STableUidGenerator *); +tb_uid_t generateUid(STbUidGenerator *); // STableUidGenerator -void tableUidGeneratorInit(STableUidGenerator *, tb_uid_t suid); +void tableUidGeneratorInit(STbUidGenerator *, tb_uid_t suid); #define tableUidGeneratorClear(ug) -/* ------------------------ FOR TEST AND COMPILE ONLY ------------------------ */ -struct STableUidGenerator { - tb_uid_t nextUid; -}; - #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index 8844055a98..0615bf1e50 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -13,74 +13,62 @@ * along with this program. If not, see . */ -#include "meta.h" -#include "metaDef.h" #include "tcoding.h" -static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts); -static int metaCreateChildTable(SMeta *pMeta, const char *tbname, const SChildTableOpts *pChildTableOpts); -static int metaCreateNormalTable(SMeta *pMeta, const char *tbname, const SNormalTableOpts *pNormalTableOpts); +#include "meta.h" +#include "metaDef.h" +#include "metaOptions.h" +#include "metaDB.h" -SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOpts) { +static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions); +static void metaFree(SMeta *pMeta); +static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts); +static int metaCreateChildTable(SMeta *pMeta, const char *tbname, const SChildTableOpts *pChildTableOpts); +static int metaCreateNormalTable(SMeta *pMeta, const char *tbname, const SNormalTableOpts *pNormalTableOpts); + +SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { SMeta *pMeta = NULL; - pMeta = (SMeta *)calloc(1, sizeof(*pMeta)); - if (pMeta == NULL) { + // Set default options + if (pMetaOptions == NULL) { + pMetaOptions = &defaultMetaOptions; + } + + // Validate the options + if (metaValidateOptions(pMetaOptions) < 0) { + // TODO: deal with error return NULL; } - // TODO: check if file exists and handle the error - taosMkDir("meta"); + // Allocate handle + pMeta = metaNew(path, pMetaOptions); + if (pMeta == NULL) { + // TODO: handle error + return NULL; + } - // Open tableDb - STkvOpts *tableDbOpts = tkvOptsCreate(); - tkvOptsSetCreateIfMissing(tableDbOpts, 1); - pMeta->tableDb = tkvOpen(tableDbOpts, "meta/table_db"); - tkvOptsDestroy(tableDbOpts); + // Create META path + taosMkDir(path); - // Open tbnameDb - STkvOpts *tbnameDbOpts = tkvOptsCreate(); - tkvOptsSetCreateIfMissing(tbnameDbOpts, 1); - pMeta->tbnameDb = tkvOpen(tbnameDbOpts, "meta/tbname_db"); - tkvOptsDestroy(tbnameDbOpts); + // Open the DBs needed + if (metaOpenDB(pMeta) < 0) { + // TODO: handle error + metaFree(pMeta); + return NULL; + } - // Open schemaDb - STkvOpts *schemaDbOpts = tkvOptsCreate(); - tkvOptsSetCreateIfMissing(schemaDbOpts, 1); - pMeta->schemaDb = tkvOpen(schemaDbOpts, "meta/schema_db"); - tkvOptsDestroy(schemaDbOpts); - - // Open tagDb - STkvOpts *tagDbOpts = tkvOptsCreate(); - tkvOptsSetCreateIfMissing(tagDbOpts, 1); - pMeta->tagDb = tkvOpen(tagDbOpts, "meta/tag_db"); - tkvOptsDestroy(tagDbOpts); - - // Open tagIdx - STkvOpts *tagIdxDbOpts = tkvOptsCreate(); - tkvOptsSetCreateIfMissing(tagIdxDbOpts, 1); - pMeta->tagIdx = tkvOpen(tagIdxDbOpts, "meta/tag_idx_db"); - tkvOptsDestroy(tagIdxDbOpts); - - // TODO: need to figure out how to persist the START UID - tableUidGeneratorInit(&(pMeta->uidGenerator), IVLD_TB_UID); return pMeta; } void metaClose(SMeta *pMeta) { if (pMeta) { tableUidGeneratorClear(&pMeta->uidGenerator); - - tkvClose(pMeta->tagIdx); - tkvClose(pMeta->tagDb); - tkvClose(pMeta->schemaDb); - tkvClose(pMeta->tbnameDb); - tkvClose(pMeta->tableDb); - + metaCloseDB(pMeta); free(pMeta); } } +#if 0 int metaCreateTable(SMeta *pMeta, const STableOptions *pTableOpts) { size_t vallen; char * pUid; @@ -106,8 +94,37 @@ int metaCreateTable(SMeta *pMeta, const STableOptions *pTableOpts) { return 0; } +#endif /* ------------------------ STATIC METHODS ------------------------ */ +static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions) { + SMeta *pMeta; + size_t psize = strlen(path); + + pMeta = (SMeta *)calloc(1, sizeof(*pMeta)); + if (pMeta == NULL) { + return NULL; + } + + pMeta->path = strdup(path); + if (pMeta->path == NULL) { + return NULL; + } + + metaOptionsCopy(&(pMeta->options), pMetaOptions); + + return pMeta; +}; + +static void metaFree(SMeta *pMeta) { + if (pMeta) { + tfree(pMeta->path); + free(pMeta); + } +} + +// OLD ------------------------------------------------------------------- +#if 0 static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts) { size_t vallen; size_t keylen; @@ -258,3 +275,5 @@ void metaTableOptsClear(STableOptions *pTableOpts) { } void metaDestroy(const char *path) { taosRemoveDir(path); } + +#endif \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaOptions.c b/source/dnode/vnode/meta/src/metaOptions.c index 646ac873b1..0f6ba9a9fb 100644 --- a/source/dnode/vnode/meta/src/metaOptions.c +++ b/source/dnode/vnode/meta/src/metaOptions.c @@ -26,5 +26,11 @@ void metaOptionsClear(SMetaOptions *pMetaOptions) { // TODO } -/* ------------------------ STATIC METHODS ------------------------ */ -static void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc) { memcpy(pDest, pSrc, sizeof(*pSrc)); } \ No newline at end of file +int metaValidateOptions(const SMetaOptions *pMetaOptions) { + // TODO + return 0; +} + +void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc) { memcpy(pDest, pSrc, sizeof(*pSrc)); } + +/* ------------------------ STATIC METHODS ------------------------ */ \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaTbOptions.c b/source/dnode/vnode/meta/src/metaTbOptions.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/meta/src/metaTbOptions.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaUid.c b/source/dnode/vnode/meta/src/metaUid.c index 80afa490f3..eadacf67d3 100644 --- a/source/dnode/vnode/meta/src/metaUid.c +++ b/source/dnode/vnode/meta/src/metaUid.c @@ -15,12 +15,12 @@ #include "metaUid.h" -tb_uid_t generateUid(STableUidGenerator *pGen) { +tb_uid_t generateUid(STbUidGenerator *pGen) { // Generate a new table UID return ++(pGen->nextUid); } -void tableUidGeneratorInit(STableUidGenerator *pGen, tb_uid_t suid) { +void tableUidGeneratorInit(STbUidGenerator *pGen, tb_uid_t suid) { // Init a generator pGen->nextUid = suid; } \ No newline at end of file diff --git a/source/dnode/vnode/meta/test/CMakeLists.txt b/source/dnode/vnode/meta/test/CMakeLists.txt index b37ba6abd4..e4ea7839c8 100644 --- a/source/dnode/vnode/meta/test/CMakeLists.txt +++ b/source/dnode/vnode/meta/test/CMakeLists.txt @@ -1,24 +1,24 @@ -add_executable(metaTest "") -target_sources(metaTest - PRIVATE - "../src/metaMain.c" - "../src/metaUid.c" - "metaTests.cpp" -) -target_include_directories(metaTest - PUBLIC - "${CMAKE_SOURCE_DIR}/include/server/vnode/meta" - "${CMAKE_CURRENT_SOURCE_DIR}/../inc" -) -target_link_libraries(metaTest - os - util - common - gtest_main - tkv -) -enable_testing() -add_test( - NAME meta_test - COMMAND metaTest -) \ No newline at end of file +# add_executable(metaTest "") +# target_sources(metaTest +# PRIVATE +# "../src/metaMain.c" +# "../src/metaUid.c" +# "metaTests.cpp" +# ) +# target_include_directories(metaTest +# PUBLIC +# "${CMAKE_SOURCE_DIR}/include/server/vnode/meta" +# "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +# ) +# target_link_libraries(metaTest +# os +# util +# common +# gtest_main +# tkv +# ) +# enable_testing() +# add_test( +# NAME meta_test +# COMMAND metaTest +# ) \ No newline at end of file diff --git a/source/os/src/osDir.c b/source/os/src/osDir.c index 17ab88edf6..cd5f561918 100644 --- a/source/os/src/osDir.c +++ b/source/os/src/osDir.c @@ -60,7 +60,7 @@ void taosRemoveDir(const char *dirname) { bool taosDirExist(char *dirname) { return access(dirname, F_OK) == 0; } -bool taosMkDir(char *dirname) { +bool taosMkDir(const char *dirname) { int32_t code = mkdir(dirname, 0755); if (code < 0 && errno == EEXIST) { return true; From 1984f4031609c5ad0e64ee1352508938c721a9ed Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 15:24:55 +0800 Subject: [PATCH 08/94] refact --- source/dnode/vnode/meta/inc/metaDef.h | 4 +- source/dnode/vnode/meta/src/metaCommit.c | 7 +++ source/dnode/vnode/meta/src/metaMain.c | 58 ++++++++++++------------ source/dnode/vnode/meta/src/metaTable.c | 14 +++++- 4 files changed, 52 insertions(+), 31 deletions(-) diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index ef0cc9e4c2..bfb297ea4d 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -25,9 +25,9 @@ extern "C" { struct SMeta { char* path; // path of current meta - STbUidGenerator uidGenerator; // meta table UID generator - SMetaDB* pMetaDB; // meta DB for real storage engine SMetaOptions options; // meta option + SMetaDB* pMetaDB; // meta DB for real storage engine + STbUidGenerator uidGenerator; // meta table UID generator }; #ifdef __cplusplus diff --git a/source/dnode/vnode/meta/src/metaCommit.c b/source/dnode/vnode/meta/src/metaCommit.c index f2f48bbc8a..805e4f3e3d 100644 --- a/source/dnode/vnode/meta/src/metaCommit.c +++ b/source/dnode/vnode/meta/src/metaCommit.c @@ -12,3 +12,10 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ + +#include "meta.h" + +int metaCommit(SMeta *pMeta) { + // TODO + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index 0615bf1e50..c0fcf6898a 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -16,9 +16,9 @@ #include "tcoding.h" #include "meta.h" +#include "metaDB.h" #include "metaDef.h" #include "metaOptions.h" -#include "metaDB.h" static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions); static void metaFree(SMeta *pMeta); @@ -57,6 +57,8 @@ SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { return NULL; } + tableUidGeneratorInit(&(pMeta->uidGenerator), IVLD_TB_UID); + return pMeta; } @@ -68,33 +70,7 @@ void metaClose(SMeta *pMeta) { } } -#if 0 -int metaCreateTable(SMeta *pMeta, const STableOptions *pTableOpts) { - size_t vallen; - char * pUid; - - // Check if table already exists - pUid = tkvGet(pMeta->tbnameDb, NULL, pTableOpts->name, strlen(pTableOpts->name), &vallen); - if (pUid) { - free(pUid); - // Table already exists, return error code - return -1; - } - - switch (pTableOpts->type) { - case META_SUPER_TABLE: - return metaCreateSuperTable(pMeta, pTableOpts->name, &(pTableOpts->superOpts)); - case META_CHILD_TABLE: - return metaCreateChildTable(pMeta, pTableOpts->name, &(pTableOpts->childOpts)); - case META_NORMAL_TABLE: - return metaCreateNormalTable(pMeta, pTableOpts->name, &(pTableOpts->normalOpts)); - default: - ASSERT(0); - } - - return 0; -} -#endif +void metaRemove(const char *path) { taosRemoveDir(path); } /* ------------------------ STATIC METHODS ------------------------ */ static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions) { @@ -125,6 +101,32 @@ static void metaFree(SMeta *pMeta) { // OLD ------------------------------------------------------------------- #if 0 +int metaCreateTable(SMeta *pMeta, const STableOptions *pTableOpts) { + size_t vallen; + char * pUid; + + // Check if table already exists + pUid = tkvGet(pMeta->tbnameDb, NULL, pTableOpts->name, strlen(pTableOpts->name), &vallen); + if (pUid) { + free(pUid); + // Table already exists, return error code + return -1; + } + + switch (pTableOpts->type) { + case META_SUPER_TABLE: + return metaCreateSuperTable(pMeta, pTableOpts->name, &(pTableOpts->superOpts)); + case META_CHILD_TABLE: + return metaCreateChildTable(pMeta, pTableOpts->name, &(pTableOpts->childOpts)); + case META_NORMAL_TABLE: + return metaCreateNormalTable(pMeta, pTableOpts->name, &(pTableOpts->normalOpts)); + default: + ASSERT(0); + } + + return 0; +} + static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts) { size_t vallen; size_t keylen; diff --git a/source/dnode/vnode/meta/src/metaTable.c b/source/dnode/vnode/meta/src/metaTable.c index 6dea4a4e57..029d43dc85 100644 --- a/source/dnode/vnode/meta/src/metaTable.c +++ b/source/dnode/vnode/meta/src/metaTable.c @@ -11,4 +11,16 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ \ No newline at end of file + */ + +#include "meta.h" + +int metaCreateTable(SMeta *pMeta, const STableOptions *pTbOptions) { + // TODO + return 0; +} + +int metaDropTable(SMeta *pMeta, tb_uid_t uid) { + // TODO + return 0; +} From ff33e67f5b716573686a2dcf0c30dbe389a158d5 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 15:27:14 +0800 Subject: [PATCH 09/94] refact --- source/dnode/vnode/meta/inc/metaDef.h | 2 +- source/dnode/vnode/meta/inc/{metaUid.h => metaTbUid.h} | 0 source/dnode/vnode/meta/src/{metaUid.c => metaTbUid.c} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename source/dnode/vnode/meta/inc/{metaUid.h => metaTbUid.h} (100%) rename source/dnode/vnode/meta/src/{metaUid.c => metaTbUid.c} (97%) diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index bfb297ea4d..aaae07d6f4 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -17,7 +17,7 @@ #define _TD_META_DEF_H_ #include "metaDB.h" -#include "metaUid.h" +#include "metaTbUid.h" #ifdef __cplusplus extern "C" { diff --git a/source/dnode/vnode/meta/inc/metaUid.h b/source/dnode/vnode/meta/inc/metaTbUid.h similarity index 100% rename from source/dnode/vnode/meta/inc/metaUid.h rename to source/dnode/vnode/meta/inc/metaTbUid.h diff --git a/source/dnode/vnode/meta/src/metaUid.c b/source/dnode/vnode/meta/src/metaTbUid.c similarity index 97% rename from source/dnode/vnode/meta/src/metaUid.c rename to source/dnode/vnode/meta/src/metaTbUid.c index eadacf67d3..87b1199fd9 100644 --- a/source/dnode/vnode/meta/src/metaUid.c +++ b/source/dnode/vnode/meta/src/metaTbUid.c @@ -13,7 +13,7 @@ * along with this program. If not, see . */ -#include "metaUid.h" +#include "metaTbUid.h" tb_uid_t generateUid(STbUidGenerator *pGen) { // Generate a new table UID From 24a0966da6afdb6dcde16b204675fe6ecf6dde3b Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 2 Nov 2021 15:50:27 +0800 Subject: [PATCH 10/94] [TD-10645][raft]add raft progress --- include/libs/sync/sync.h | 16 +- source/libs/sync/inc/raft.h | 35 ++- source/libs/sync/inc/raft_progress.h | 181 +++++++++++++ source/libs/sync/inc/raft_unstable_log.h | 115 ++++++++ source/libs/sync/inc/syncInt.h | 1 + source/libs/sync/inc/sync_type.h | 33 +++ source/libs/sync/src/raft.c | 23 +- source/libs/sync/src/raft_progress.c | 317 +++++++++++++++++++++++ source/libs/sync/src/raft_unstable_log.c | 21 ++ 9 files changed, 727 insertions(+), 15 deletions(-) create mode 100644 source/libs/sync/inc/raft_progress.h create mode 100644 source/libs/sync/inc/raft_unstable_log.h create mode 100644 source/libs/sync/inc/sync_type.h create mode 100644 source/libs/sync/src/raft_progress.c create mode 100644 source/libs/sync/src/raft_unstable_log.c diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 1c228675bd..ef8773f5cc 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -61,13 +61,13 @@ typedef struct { typedef struct SSyncFSM { void* pData; - // apply committed log, bufs will be free by raft module + // apply committed log, bufs will be free by sync module int32_t (*applyLog)(struct SSyncFSM* fsm, SyncIndex index, const SSyncBuffer* buf, void* pData); // cluster commit callback int32_t (*onClusterChanged)(struct SSyncFSM* fsm, const SSyncCluster* cluster, void* pData); - // fsm return snapshot in ppBuf, bufs will be free by raft module + // fsm return snapshot in ppBuf, bufs will be free by sync module // TODO: getSnapshot SHOULD be async? int32_t (*getSnapshot)(struct SSyncFSM* fsm, SSyncBuffer** ppBuf, int32_t* objId, bool* isLast); @@ -89,18 +89,24 @@ typedef struct SSyncLogStore { // write log with given index int32_t (*logWrite)(struct SSyncLogStore* logStore, SyncIndex index, SSyncBuffer* pBuf); - // read log from given index with limit, return the actual num in nBuf + /** + * read log from given index(included) with limit, return the actual num in nBuf, + * pBuf will be free in sync module + **/ int32_t (*logRead)(struct SSyncLogStore* logStore, SyncIndex index, int limit, SSyncBuffer* pBuf, int* nBuf); // mark log with given index has been commtted int32_t (*logCommit)(struct SSyncLogStore* logStore, SyncIndex index); - // prune log before given index + // prune log before given index(not included) int32_t (*logPrune)(struct SSyncLogStore* logStore, SyncIndex index); - // rollback log after given index + // rollback log after given index(included) int32_t (*logRollback)(struct SSyncLogStore* logStore, SyncIndex index); + + // return last index of log + SyncIndex (*logLastIndex)(struct SSyncLogStore* logStore); } SSyncLogStore; typedef struct SSyncServerState { diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index f81040658e..869baecdda 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -17,15 +17,46 @@ #define _TD_LIBS_SYNC_RAFT_H #include "sync.h" +#include "sync_type.h" #include "raft_message.h" -typedef struct SSyncRaft { +typedef struct SSyncRaftProgress SSyncRaftProgress; + +typedef struct RaftLeaderState { + int nProgress; + SSyncRaftProgress* progress; +} RaftLeaderState; + +typedef struct SSyncRaftIOMethods { + SyncTime (*time)(SSyncRaft*); + +} SSyncRaftIOMethods; + +struct SSyncRaft { // owner sync node SSyncNode* pNode; SSyncInfo info; -} SSyncRaft; + // election timeout tick(random in [3:6] tick) + uint16_t electionTick; + + // heartbeat timeout tick(default: 1 tick) + uint16_t heartbeatTick; + + int installSnapShotTimeoutMS; + + // + int heartbeatTimeoutMS; + + bool preVote; + + SSyncRaftIOMethods io; + + RaftLeaderState leaderState; + + SSyncRaftUnstableLog *log; +}; int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg); diff --git a/source/libs/sync/inc/raft_progress.h b/source/libs/sync/inc/raft_progress.h new file mode 100644 index 0000000000..73aa9db59f --- /dev/null +++ b/source/libs/sync/inc/raft_progress.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_PROGRESS_H +#define TD_SYNC_RAFT_PROGRESS_H + +#include "sync_type.h" + +/** + * SSyncRaftInflights is a sliding window for the inflight messages. + * Thus inflight effectively limits both the number of inflight messages + * and the bandwidth each Progress can use. + * When inflights is full, no more message should be sent. + * When a leader sends out a message, the index of the last + * entry should be added to inflights. The index MUST be added + * into inflights in order. + * When a leader receives a reply, the previous inflights should + * be freed by calling syncRaftInflightFreeTo with the index of the last + * received entry. + **/ +typedef struct SSyncRaftInflights { + /* the starting index in the buffer */ + int start; + + /* number of inflights in the buffer */ + int count; + + /* the size of the buffer */ + int size; + + /** + * buffer contains the index of the last entry + * inside one message. + **/ + SyncIndex* buffer; +} SSyncRaftInflights; + +/** + * State defines how the leader should interact with the follower. + * + * When in PROGRESS_PROBE, leader sends at most one replication message + * per heartbeat interval. It also probes actual progress of the follower. + * + * When in PROGRESS_REPLICATE, leader optimistically increases next + * to the latest entry sent after sending replication message. This is + * an optimized state for fast replicating log entries to the follower. + * + * When in PROGRESS_SNAPSHOT, leader should have sent out snapshot + * before and stops sending any replication message. + * + * PROGRESS_PROBE is the initial state. + **/ +typedef enum RaftProgressState { + PROGRESS_PROBE = 0, + PROGRESS_REPLICATE, + PROGRESS_SNAPSHOT, +} RaftProgressState; + +/** + * Progress represents a follower’s progress in the view of the leader. Leader maintains + * progresses of all followers, and sends entries to the follower based on its progress. + **/ +struct SSyncRaftProgress { + SyncIndex nextIndex; + + SyncIndex matchIndex; + + RaftProgressState state; + + /** + * paused is used in PROGRESS_PROBE. + * When paused is true, raft should pause sending replication message to this peer. + **/ + bool paused; + + /** + * pendingSnapshotIndex is used in PROGRESS_SNAPSHOT. + * If there is a pending snapshot, the pendingSnapshotIndex will be set to the + * index of the snapshot. If pendingSnapshotIndex is set, the replication process of + * this Progress will be paused. raft will not resend snapshot until the pending one + * is reported to be failed. + **/ + SyncIndex pendingSnapshotIndex; + + /** + * recentActive is true if the progress is recently active. Receiving any messages + * from the corresponding follower indicates the progress is active. + * RecentActive can be reset to false after an election timeout. + **/ + bool recentActive; + + /** + * flow control sliding window + **/ + SSyncRaftInflights inflights; +}; + +int syncRaftProgressCreate(SSyncRaft* pRaft); +//int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configuration); + +/** + * syncRaftProgressMaybeUpdate returns false if the given lastIndex index comes from i-th node's log. + * Otherwise it updates the progress and returns true. + **/ +bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex); + +void syncRaftProgressOptimisticNextIndex(SSyncRaft* pRaft, int i, SyncIndex nextIndex); + +/** + * syncRaftProgressMaybeDecrTo returns false if the given to index comes from an out of order message. + * Otherwise it decreases the progress next index to min(rejected, last) and returns true. + **/ +bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i, + SyncIndex rejected, SyncIndex lastIndex); + +/** + * syncRaftProgressIsPaused returns whether sending log entries to this node has been + * paused. A node may be paused because it has rejected recent + * MsgApps, is currently waiting for a snapshot, or has reached the + * MaxInflightMsgs limit. + **/ +bool syncRaftProgressIsPaused(SSyncRaft* pRaft, int i); + +void syncRaftProgressFailure(SSyncRaft* pRaft, int i); + +bool syncRaftProgressNeedAbortSnapshot(SSyncRaft* pRaft, int i); + +/** + * return true if i-th node's log is up-todate + **/ +bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, int i); + +void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i); + +void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i); + +void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotIndex); + +int syncRaftInflightReset(SSyncRaftInflights* inflights); +bool syncRaftInflightFull(SSyncRaftInflights* inflights); +void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex); +void syncRaftInflightFreeTo(SSyncRaftInflights* inflights, SyncIndex toIndex); +void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights); + +#if 0 + +void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); + +SyncIndex syncRaftProgressNextIndex(SSyncRaft* pRaft, int i); + +SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i); + +void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i); + +void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i); + +bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i); + +void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i); + +bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i); + +void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); + +RaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i); + +#endif + +#endif /* TD_SYNC_RAFT_PROGRESS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_unstable_log.h b/source/libs/sync/inc/raft_unstable_log.h new file mode 100644 index 0000000000..2b7b30c15a --- /dev/null +++ b/source/libs/sync/inc/raft_unstable_log.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_UNSTABLE_LOG_H +#define TD_SYNC_RAFT_UNSTABLE_LOG_H + +#include "sync_type.h" + +/* in-memory unstable raft log storage */ +struct SSyncRaftUnstableLog { +#if 0 + /* Circular buffer of log entries */ + RaftEntry *entries; + + /* size of Circular buffer */ + int size; + + /* Indexes of used slots [front, back) */ + int front, back; + + /* Index of first entry is offset + 1 */ + SyncIndex offset; + + /* meta data of snapshot */ + SSyncRaftUnstableLog snapshot; +#endif +}; + +/** + * return index of last in memory log, return 0 if log is empty + **/ +SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog); + +#if 0 +void raftLogInit(RaftLog* pLog); + +void raftLogClose(RaftLog* pLog); + +/** + * When startup populating log entrues loaded from disk, + * init raft memory log with snapshot index,term and log start idnex. + **/ +/* +void raftLogStart(RaftLog* pLog, + RaftSnapshotMeta snapshot, + SyncIndex startIndex); +*/ +/** + * Get the number of entries the log. + **/ +int raftLogNumEntries(const RaftLog* pLog); + + + +/** + * return last term of in memory log, return 0 if log is empty + **/ +SSyncTerm raftLogLastTerm(RaftLog* pLog); + +/** + * return term of log with the given index, return 0 if the term of index cannot be found + * , errCode will save the error code. + **/ +SSyncTerm raftLogTermOf(RaftLog* pLog, SyncIndex index, RaftCode* errCode); + +/** + * Get the last index of the most recent snapshot. Return 0 if there are no * + * snapshots. + **/ +SyncIndex raftLogSnapshotIndex(RaftLog* pLog); + +/* Append a new entry to the log. */ +int raftLogAppend(RaftLog* pLog, + SSyncTerm term, + const SSyncBuffer *buf); + +/** + * acquire log from given index onwards. + **/ +/* +int raftLogAcquire(RaftLog* pLog, + SyncIndex index, + RaftEntry **ppEntries, + int *n); + +void raftLogRelease(RaftLog* pLog, + SyncIndex index, + RaftEntry *pEntries, + int n); +*/ +/* Delete all entries from the given index (included) onwards. */ +void raftLogTruncate(RaftLog* pLog, SyncIndex index); + +/** + * when taking a new snapshot, the function will update the last snapshot information and delete + * all entries up last_index - trailing (included). If the log contains no entry + * a last_index - trailing, then no entry will be deleted. + **/ +void raftLogSnapshot(RaftLog* pLog, SyncIndex index, SyncIndex trailing); + +#endif + +#endif /* TD_SYNC_RAFT_UNSTABLE_LOG_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 73015e87a1..f99fb066ae 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -19,6 +19,7 @@ #include "thash.h" #include "os.h" #include "sync.h" +#include "sync_type.h" #include "raft.h" #include "tlog.h" diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h new file mode 100644 index 0000000000..2c9f24287a --- /dev/null +++ b/source/libs/sync/inc/sync_type.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_TYPE_H +#define _TD_LIBS_SYNC_TYPE_H + +typedef int32_t SyncTime; + +typedef struct SSyncRaftUnstableLog SSyncRaftUnstableLog; + +typedef struct SSyncRaft SSyncRaft; + +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#endif + +#ifndef MAX +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#endif + +#endif /* _TD_LIBS_SYNC_TYPE_H */ diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 23442803c4..42b220e642 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -16,12 +16,10 @@ #include "raft.h" #include "syncInt.h" -#ifndef MIN -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) -#endif - #define RAFT_READ_LOG_MAX_NUM 100 +static void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term); + int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { SSyncNode* pNode = pRaft->pNode; SSyncServerState serverState; @@ -44,10 +42,10 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } assert(initIndex <= serverState.commitIndex); - // restore fsm state from snapshot index + 1, until commitIndex + // restore fsm state from snapshot index + 1 until commitIndex ++initIndex; - while (initIndex < serverState.commitIndex) { - limit = MIN(RAFT_READ_LOG_MAX_NUM, serverState.commitIndex - initIndex); + while (initIndex <= serverState.commitIndex) { + limit = MIN(RAFT_READ_LOG_MAX_NUM, serverState.commitIndex - initIndex + 1); if (logStore->logRead(logStore, initIndex, limit, buffer, &nBuf) != 0) { return -1; @@ -62,7 +60,11 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } assert(initIndex == serverState.commitIndex); - syncInfo("restore vgid %d state: snapshot index:", pInfo->vgId); + pRaft->heartbeatTick = 1; + + syncRaftBecomeFollower(pRaft, 1); + + syncInfo("restore vgid %d state: snapshot index success", pInfo->vgId); return 0; } @@ -73,4 +75,9 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg) { int32_t syncRaftTick(SSyncRaft* pRaft) { return 0; +} + +static void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term) { + pRaft->electionTick = taosRand() % 3 + 3; + return; } \ No newline at end of file diff --git a/source/libs/sync/src/raft_progress.c b/source/libs/sync/src/raft_progress.c new file mode 100644 index 0000000000..0f51d20531 --- /dev/null +++ b/source/libs/sync/src/raft_progress.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "raft.h" +#include "raft_unstable_log.h" +#include "raft_progress.h" +#include "sync.h" +#include "syncInt.h" + +static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state); + +static void resumeProgress(SSyncRaftProgress* progress); +static void pauseProgress(SSyncRaftProgress* progress); + +int syncRaftProgressCreate(SSyncRaft* pRaft) { + +/* + inflights->buffer = (SyncIndex*)malloc(sizeof(SyncIndex) * pRaft->maxInflightMsgs); + if (inflights->buffer == NULL) { + return RAFT_OOM; + } + inflights->size = pRaft->maxInflightMsgs; +*/ +} + +/* +int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configuration) { + +} +*/ + +bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + bool updated = false; + + if (progress->matchIndex < lastIndex) { + progress->matchIndex = lastIndex; + updated = true; + resumeProgress(progress); + } + if (progress->nextIndex < lastIndex + 1) { + progress->nextIndex = lastIndex + 1; + } + + return updated; +} + +void syncRaftProgressOptimisticNextIndex(SSyncRaft* pRaft, int i, SyncIndex nextIndex) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + pRaft->leaderState.progress[i].nextIndex = nextIndex + 1; +} + +bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i, + SyncIndex rejected, SyncIndex lastIndex) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + + if (progress->state == PROGRESS_REPLICATE) { + /** + * the rejection must be stale if the progress has matched and "rejected" + * is smaller than "match". + **/ + if (rejected <= progress->matchIndex) { + syncDebug("match index is up to date,ignore"); + return false; + } + + /* directly decrease next to match + 1 */ + progress->nextIndex = progress->matchIndex + 1; + //syncRaftProgressBecomeProbe(raft, i); + return true; + } + + if (rejected != progress->nextIndex - 1) { + syncDebug("rejected index %" PRId64 " different from next index %" PRId64 " -> ignore" + , rejected, progress->nextIndex); + return false; + } + + progress->nextIndex = MIN(rejected, lastIndex + 1); + if (progress->nextIndex < 1) { + progress->nextIndex = 1; + } + + resumeProgress(progress); + return true; +} + +static void resumeProgress(SSyncRaftProgress* progress) { + progress->paused = false; +} + +static void pauseProgress(SSyncRaftProgress* progress) { + progress->paused = true; +} + +bool syncRaftProgressIsPaused(SSyncRaft* pRaft, int i) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + + switch (progress->state) { + case PROGRESS_PROBE: + return progress->paused; + case PROGRESS_REPLICATE: + return syncRaftInflightFull(&progress->inflights); + case PROGRESS_SNAPSHOT: + return true; + default: + syncFatal("error sync state:%d", progress->state); + } +} + +void syncRaftProgressFailure(SSyncRaft* pRaft, int i) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + + progress->pendingSnapshotIndex = 0; +} + +bool syncRaftProgressNeedAbortSnapshot(SSyncRaft* pRaft, int i) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + + return progress->state == PROGRESS_SNAPSHOT && progress->matchIndex >= progress->pendingSnapshotIndex; +} + +bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, int i) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex; +} + +void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + /** + * If the original state is ProgressStateSnapshot, progress knows that + * the pending snapshot has been sent to this peer successfully, then + * probes from pendingSnapshot + 1. + **/ + if (progress->state == PROGRESS_SNAPSHOT) { + SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; + resetProgressState(progress, PROGRESS_PROBE); + progress->nextIndex = MAX(progress->matchIndex + 1, pendingSnapshotIndex + 1); + } else { + resetProgressState(progress, PROGRESS_PROBE); + progress->nextIndex = progress->matchIndex + 1; + } +} + +void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + resetProgressState(progress, PROGRESS_REPLICATE); + progress->nextIndex = progress->matchIndex + 1; +} + +void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotIndex) { + assert(i >= 0 && i < pRaft->leaderState.nProgress); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + resetProgressState(progress, PROGRESS_SNAPSHOT); + progress->pendingSnapshotIndex = snapshotIndex; +} + +static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state) { + progress->paused = false; + progress->pendingSnapshotIndex = 0; + progress->state = state; + syncRaftInflightReset(&(progress->inflights)); +} + + +int syncRaftInflightReset(SSyncRaftInflights* inflights) { + inflights->count = 0; + inflights->start = 0; + + return 0; +} + +bool syncRaftInflightFull(SSyncRaftInflights* inflights) { + return inflights->count == inflights->size; +} + +void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) { + assert(!syncRaftInflightFull(inflights)); + + int next = inflights->start + inflights->count; + int size = inflights->size; + /* is next wrapped around buffer? */ + if (next >= size) { + next -= size; + } + + inflights->buffer[next] = inflightIndex; + inflights->count++; +} + +void syncRaftInflightFreeTo(SSyncRaftInflights* inflights, SyncIndex toIndex) { + if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) { + return; + } + + int i, idx; + for (i = 0, idx = inflights->start; i < inflights->count; i++) { + if (toIndex < inflights->buffer[idx]) { + break; + } + + int size = inflights->size; + idx++; + if (idx >= size) { + idx -= size; + } + } + + inflights->count -= i; + inflights->start = idx; + assert(inflights->count >= 0); + if (inflights->count == 0) { + inflights->start = 0; + } +} + +void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) { + syncRaftInflightFreeTo(inflights, inflights->buffer[inflights->start]); +} + + + + + +#if 0 + +SyncIndex syncRaftProgressNextIndex(SSyncRaft* pRaft, int i) { + return pRaft->leaderState.progress[i].nextIndex; +} + +SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i) { + return pRaft->leaderState.progress[i].matchIndex; +} + +void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i) { + pRaft->leaderState.progress[i].lastSend = pRaft->io.time(pRaft); +} + +void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i) { + pRaft->leaderState.progress[i].lastSendSnapshot = pRaft->io.time(pRaft); +} + +bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i) { + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + bool prev = progress->recentRecv; + progress->recentRecv = false; + return prev; +} + +void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i) { + pRaft->leaderState.progress[i].recentRecv = true; +} + +bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i) { + return pRaft->leaderState.progress[i].recentRecv; +} + +void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i) { + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + resetProgressState(progress, PROGRESS_SNAPSHOT); + progress->pendingSnapshotIndex = raftLogSnapshotIndex(pRaft->log); +} + +void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) { + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + + if (progress->state == PROGRESS_SNAPSHOT) { + assert(progress->pendingSnapshotIndex > 0); + SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; + resetProgressState(progress, PROGRESS_PROBE); + progress->nextIndex = max(progress->matchIndex + 1, pendingSnapshotIndex); + } else { + resetProgressState(progress, PROGRESS_PROBE); + progress->nextIndex = progress->matchIndex + 1; + } +} + +void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) { + resetProgressState(pRaft->leaderState.progress, PROGRESS_REPLICATE); + pRaft->leaderState.progress->nextIndex = pRaft->leaderState.progress->matchIndex + 1; +} + +void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i) { + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + progress->pendingSnapshotIndex = 0; + progress->state = PROGRESS_PROBE; +} + +RaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i) { + return pRaft->leaderState.progress[i].state; +} + + + +#endif \ No newline at end of file diff --git a/source/libs/sync/src/raft_unstable_log.c b/source/libs/sync/src/raft_unstable_log.c new file mode 100644 index 0000000000..4735242d3c --- /dev/null +++ b/source/libs/sync/src/raft_unstable_log.c @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "sync.h" +#include "raft_unstable_log.h" + +SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog) { + return 0; +} \ No newline at end of file From e04bccdca4207141adb6acee82113966d5c34019 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 16:17:49 +0800 Subject: [PATCH 11/94] refact --- source/dnode/vnode/meta/inc/metaDef.h | 4 +-- source/dnode/vnode/meta/src/metaDB.c | 39 +++++++++++++++++++++++++- source/dnode/vnode/meta/src/metaMain.c | 4 +-- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index aaae07d6f4..71ba42b985 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -26,8 +26,8 @@ extern "C" { struct SMeta { char* path; // path of current meta SMetaOptions options; // meta option - SMetaDB* pMetaDB; // meta DB for real storage engine - STbUidGenerator uidGenerator; // meta table UID generator + SMetaDB metaDB; // meta DB for real storage engine + STbUidGenerator uidGnrt; // meta table UID generator }; #ifdef __cplusplus diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 6dea4a4e57..73d54d7682 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -11,4 +11,41 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ \ No newline at end of file + */ + +#include "meta.h" +#include "metaDef.h" + +int metaOpenDB(SMeta *pMeta) { + /* TODO */ + pMeta->metaDB.pDB = tkvOpen(NULL, "db"); + if (pMeta->metaDB.pDB == NULL) { + // TODO + return -1; + } + + pMeta->metaDB.pIdx = tkvOpen(NULL, "index"); + if (pMeta->metaDB.pIdx == NULL) { + /* TODO */ + return -1; + } + + { /* TODO: for cache*/ + } + + return 0; +} + +void metaCloseDB(SMeta *pMeta) { /* TODO */ + { + // TODO: clear cache + } + + if (pMeta->metaDB.pIdx) { + tkvClose(pMeta->metaDB.pIdx); + } + + if (pMeta->metaDB.pDB) { + tkvClose(pMeta->metaDB.pIdx); + } +} \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index c0fcf6898a..d30c938989 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -57,14 +57,14 @@ SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { return NULL; } - tableUidGeneratorInit(&(pMeta->uidGenerator), IVLD_TB_UID); + tableUidGeneratorInit(&(pMeta->uidGnrt), IVLD_TB_UID); return pMeta; } void metaClose(SMeta *pMeta) { if (pMeta) { - tableUidGeneratorClear(&pMeta->uidGenerator); + tableUidGeneratorClear(&pMeta->uidGnrt); metaCloseDB(pMeta); free(pMeta); } From 7f26ac3de07a7632663ea80b3df864d1ee1cf7be Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 16:19:44 +0800 Subject: [PATCH 12/94] refact --- source/dnode/vnode/meta/src/metaMain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index d30c938989..c04bee265f 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -66,7 +66,7 @@ void metaClose(SMeta *pMeta) { if (pMeta) { tableUidGeneratorClear(&pMeta->uidGnrt); metaCloseDB(pMeta); - free(pMeta); + metaFree(pMeta); } } From 330013c506d0710278cc1618c7d0403cac1fabec Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 16:21:55 +0800 Subject: [PATCH 13/94] refact --- source/dnode/vnode/meta/src/metaMain.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index c04bee265f..5f82ca4e87 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -22,9 +22,6 @@ static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions); static void metaFree(SMeta *pMeta); -static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts); -static int metaCreateChildTable(SMeta *pMeta, const char *tbname, const SChildTableOpts *pChildTableOpts); -static int metaCreateNormalTable(SMeta *pMeta, const char *tbname, const SNormalTableOpts *pNormalTableOpts); SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { SMeta *pMeta = NULL; @@ -101,6 +98,10 @@ static void metaFree(SMeta *pMeta) { // OLD ------------------------------------------------------------------- #if 0 +static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts); +static int metaCreateChildTable(SMeta *pMeta, const char *tbname, const SChildTableOpts *pChildTableOpts); +static int metaCreateNormalTable(SMeta *pMeta, const char *tbname, const SNormalTableOpts *pNormalTableOpts); + int metaCreateTable(SMeta *pMeta, const STableOptions *pTableOpts) { size_t vallen; char * pUid; From 45de6cfee00671e874db2aceaddf574f4ff64b3c Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 16:51:20 +0800 Subject: [PATCH 14/94] more --- cmake/cmake.options | 2 +- source/dnode/vnode/meta/inc/metaDB.h | 10 +++--- source/dnode/vnode/meta/src/metaDB.c | 50 ++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/cmake/cmake.options b/cmake/cmake.options index 74b0d9fdbb..c1964762c7 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -16,7 +16,7 @@ option( option( BUILD_WITH_ROCKSDB "If build with rocksdb" - OFF + ON ) option( diff --git a/source/dnode/vnode/meta/inc/metaDB.h b/source/dnode/vnode/meta/inc/metaDB.h index 299b8e0350..7a9dcc9c34 100644 --- a/source/dnode/vnode/meta/inc/metaDB.h +++ b/source/dnode/vnode/meta/inc/metaDB.h @@ -17,16 +17,18 @@ #define _TD_META_DB_H_ #include "meta.h" -#include "tkv.h" +// #include "tkv.h" + +#include "rocksdb/c.h" #ifdef __cplusplus extern "C" { #endif typedef struct SMetaDB { - STkvDb * pDB; - STkvDb * pIdx; - STkvCache *pCache; + rocksdb_t * pDB; + rocksdb_t * pIdx; + rocksdb_cache_t *pCache; } SMetaDB; int metaOpenDB(SMeta *pMeta); diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 73d54d7682..6b7ea1a0b2 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -17,35 +17,57 @@ #include "metaDef.h" int metaOpenDB(SMeta *pMeta) { - /* TODO */ - pMeta->metaDB.pDB = tkvOpen(NULL, "db"); + char * err = NULL; + rocksdb_options_t *pOpts; + + pOpts = rocksdb_options_create(); + if (pOpts == NULL) { + // TODO: handle error + return -1; + } + + // Create LRU cache + if (pMeta->options.lruCacheSize) { + pMeta->metaDB.pCache = rocksdb_cache_create_lru(pMeta->options.lruCacheSize); + if (pMeta->metaDB.pCache == NULL) { + // TODO: handle error + return -1; + } + + rocksdb_options_set_row_cache(pOpts, pMeta->metaDB.pCache); + } + + // Open raw data DB + pMeta->metaDB.pDB = rocksdb_open(pOpts, "db", &err); if (pMeta->metaDB.pDB == NULL) { - // TODO + // TODO: handle error return -1; } - pMeta->metaDB.pIdx = tkvOpen(NULL, "index"); + // Open index DB + pMeta->metaDB.pIdx = rocksdb_open(pOpts, "index", &err); if (pMeta->metaDB.pIdx == NULL) { - /* TODO */ + // TODO: handle error + rocksdb_close(pMeta->metaDB.pDB); return -1; } - { /* TODO: for cache*/ - } - return 0; } void metaCloseDB(SMeta *pMeta) { /* TODO */ - { - // TODO: clear cache - } - + // Close index DB if (pMeta->metaDB.pIdx) { - tkvClose(pMeta->metaDB.pIdx); + rocksdb_close(pMeta->metaDB.pIdx); } + // Close raw data DB if (pMeta->metaDB.pDB) { - tkvClose(pMeta->metaDB.pIdx); + rocksdb_close(pMeta->metaDB.pDB); + } + + // Destroy cache + if (pMeta->metaDB.pCache) { + rocksdb_cache_destroy(pMeta->metaDB.pCache); } } \ No newline at end of file From 733506d432ce4896975561b54c2b4945323f1fee Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 17:02:03 +0800 Subject: [PATCH 15/94] refact --- source/dnode/vnode/meta/src/metaDB.c | 43 +++++++++++++++++++--------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 6b7ea1a0b2..8c30c32d5d 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -18,13 +18,8 @@ int metaOpenDB(SMeta *pMeta) { char * err = NULL; - rocksdb_options_t *pOpts; - - pOpts = rocksdb_options_create(); - if (pOpts == NULL) { - // TODO: handle error - return -1; - } + rocksdb_options_t *dbOptions; + rocksdb_options_t *idxOptions; // Create LRU cache if (pMeta->options.lruCacheSize) { @@ -33,25 +28,47 @@ int metaOpenDB(SMeta *pMeta) { // TODO: handle error return -1; } - - rocksdb_options_set_row_cache(pOpts, pMeta->metaDB.pCache); } - // Open raw data DB - pMeta->metaDB.pDB = rocksdb_open(pOpts, "db", &err); + // Open raw data DB --------------------------- + dbOptions = rocksdb_options_create(); + if (dbOptions == NULL) { + // TODO: handle error + return -1; + } + + if (pMeta->metaDB.pCache) { + rocksdb_options_set_row_cache(dbOptions, pMeta->metaDB.pCache); + } + + pMeta->metaDB.pDB = rocksdb_open(dbOptions, "db", &err); if (pMeta->metaDB.pDB == NULL) { // TODO: handle error return -1; } - // Open index DB - pMeta->metaDB.pIdx = rocksdb_open(pOpts, "index", &err); + rocksdb_options_destroy(dbOptions); + + // Open index DB --------------------------- + idxOptions = rocksdb_options_create(); + if (idxOptions == NULL) { + // TODO: handle error + return -1; + } + + if (pMeta->metaDB.pCache) { + rocksdb_options_set_row_cache(dbOptions, pMeta->metaDB.pCache); + } + + pMeta->metaDB.pIdx = rocksdb_open(idxOptions, "index", &err); if (pMeta->metaDB.pIdx == NULL) { // TODO: handle error rocksdb_close(pMeta->metaDB.pDB); return -1; } + rocksdb_options_destroy(idxOptions); + return 0; } From 5469e93829868e474c010efdb3609500749f6e88 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 17:30:59 +0800 Subject: [PATCH 16/94] refact --- source/dnode/vnode/meta/inc/metaDef.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index 71ba42b985..b5976a943f 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -24,9 +24,9 @@ extern "C" { #endif struct SMeta { - char* path; // path of current meta - SMetaOptions options; // meta option - SMetaDB metaDB; // meta DB for real storage engine + char* path; // path of current meta + SMetaOptions options; // meta option + SMetaDB metaDB; // meta DB for real storage engine STbUidGenerator uidGnrt; // meta table UID generator }; From 36d3adc23fa9d258a7d2fbe05e1110ad8adc1e1a Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 17:49:10 +0800 Subject: [PATCH 17/94] refact --- source/dnode/vnode/meta/inc/metaCache.h | 32 +++++++++++++++++++++++++ source/dnode/vnode/meta/inc/metaDB.h | 9 ------- source/dnode/vnode/meta/inc/metaDef.h | 13 ++++++---- source/dnode/vnode/meta/inc/metaIdx.h | 32 +++++++++++++++++++++++++ source/dnode/vnode/meta/src/metaCache.c | 26 ++++++++++++++++++++ source/dnode/vnode/meta/src/metaDB.c | 4 ++++ source/dnode/vnode/meta/src/metaIdx.c | 23 ++++++++++++++++++ 7 files changed, 125 insertions(+), 14 deletions(-) create mode 100644 source/dnode/vnode/meta/inc/metaCache.h create mode 100644 source/dnode/vnode/meta/inc/metaIdx.h create mode 100644 source/dnode/vnode/meta/src/metaCache.c create mode 100644 source/dnode/vnode/meta/src/metaIdx.c diff --git a/source/dnode/vnode/meta/inc/metaCache.h b/source/dnode/vnode/meta/inc/metaCache.h new file mode 100644 index 0000000000..60cedce29c --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaCache.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_CACHE_H_ +#define _TD_META_CACHE_H_ + +#include "meta.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int metaOpenCache(SMeta *pMeta); +void metaCloseCache(SMeta *pMeta); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_CACHE_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/inc/metaDB.h b/source/dnode/vnode/meta/inc/metaDB.h index 7a9dcc9c34..2c075d751c 100644 --- a/source/dnode/vnode/meta/inc/metaDB.h +++ b/source/dnode/vnode/meta/inc/metaDB.h @@ -17,20 +17,11 @@ #define _TD_META_DB_H_ #include "meta.h" -// #include "tkv.h" - -#include "rocksdb/c.h" #ifdef __cplusplus extern "C" { #endif -typedef struct SMetaDB { - rocksdb_t * pDB; - rocksdb_t * pIdx; - rocksdb_cache_t *pCache; -} SMetaDB; - int metaOpenDB(SMeta *pMeta); void metaCloseDB(SMeta *pMeta); diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index b5976a943f..395ca7c7bd 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -16,7 +16,8 @@ #ifndef _TD_META_DEF_H_ #define _TD_META_DEF_H_ -#include "metaDB.h" +#include "rocksdb/c.h" + #include "metaTbUid.h" #ifdef __cplusplus @@ -24,10 +25,12 @@ extern "C" { #endif struct SMeta { - char* path; // path of current meta - SMetaOptions options; // meta option - SMetaDB metaDB; // meta DB for real storage engine - STbUidGenerator uidGnrt; // meta table UID generator + char * path; // path of current meta + SMetaOptions options; // meta option + rocksdb_t * pDB; + rocksdb_t * pIdx; + rocksdb_cache_t *pCache; + STbUidGenerator uidGnrt; // meta table UID generator }; #ifdef __cplusplus diff --git a/source/dnode/vnode/meta/inc/metaIdx.h b/source/dnode/vnode/meta/inc/metaIdx.h new file mode 100644 index 0000000000..ed1f6ac94c --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaIdx.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_IDX_H_ +#define _TD_META_IDX_H_ + +#include "meta.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int metaOpenIdx(SMeta *pMeta); +void metaCloseIdx(SMeta *pMeta); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_IDX_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaCache.c b/source/dnode/vnode/meta/src/metaCache.c new file mode 100644 index 0000000000..1e848d434e --- /dev/null +++ b/source/dnode/vnode/meta/src/metaCache.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "meta.h" +#include "metaDef.h" + +int metaOpenCache(SMeta *pMeta) { + // TODO + return 0; +} + +void metaCloseCache(SMeta *pMeta) { + // TODO +} \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 8c30c32d5d..5c3d9d0a02 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -17,6 +17,7 @@ #include "metaDef.h" int metaOpenDB(SMeta *pMeta) { +#if 0 char * err = NULL; rocksdb_options_t *dbOptions; rocksdb_options_t *idxOptions; @@ -69,10 +70,12 @@ int metaOpenDB(SMeta *pMeta) { rocksdb_options_destroy(idxOptions); +#endif return 0; } void metaCloseDB(SMeta *pMeta) { /* TODO */ +#if 0 // Close index DB if (pMeta->metaDB.pIdx) { rocksdb_close(pMeta->metaDB.pIdx); @@ -87,4 +90,5 @@ void metaCloseDB(SMeta *pMeta) { /* TODO */ if (pMeta->metaDB.pCache) { rocksdb_cache_destroy(pMeta->metaDB.pCache); } +#endif } \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaIdx.c b/source/dnode/vnode/meta/src/metaIdx.c new file mode 100644 index 0000000000..f04489d21e --- /dev/null +++ b/source/dnode/vnode/meta/src/metaIdx.c @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "metaIdx.h" + +int metaOpenIdx(SMeta *pMeta) { + /* TODO */ + return 0; +} + +void metaCloseIdx(SMeta *pMeta) { /* TODO */ } \ No newline at end of file From 8fbd3f49cabaeb0bda46bdc544b3a5c480919d9c Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 17:54:55 +0800 Subject: [PATCH 18/94] refact --- source/dnode/vnode/meta/inc/metaCache.h | 4 ++++ source/dnode/vnode/meta/inc/metaDB.h | 4 ++++ source/dnode/vnode/meta/inc/metaDef.h | 17 +++++++++-------- source/dnode/vnode/meta/inc/metaIdx.h | 4 ++++ 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/source/dnode/vnode/meta/inc/metaCache.h b/source/dnode/vnode/meta/inc/metaCache.h index 60cedce29c..92c5a09c0c 100644 --- a/source/dnode/vnode/meta/inc/metaCache.h +++ b/source/dnode/vnode/meta/inc/metaCache.h @@ -16,12 +16,16 @@ #ifndef _TD_META_CACHE_H_ #define _TD_META_CACHE_H_ +#include "rocksdb/c.h" + #include "meta.h" #ifdef __cplusplus extern "C" { #endif +typedef rocksdb_cache_t meta_cache_t; + int metaOpenCache(SMeta *pMeta); void metaCloseCache(SMeta *pMeta); diff --git a/source/dnode/vnode/meta/inc/metaDB.h b/source/dnode/vnode/meta/inc/metaDB.h index 2c075d751c..3381b05f22 100644 --- a/source/dnode/vnode/meta/inc/metaDB.h +++ b/source/dnode/vnode/meta/inc/metaDB.h @@ -16,12 +16,16 @@ #ifndef _TD_META_DB_H_ #define _TD_META_DB_H_ +#include "rocksdb/c.h" + #include "meta.h" #ifdef __cplusplus extern "C" { #endif +typedef rocksdb_t meta_db_t; + int metaOpenDB(SMeta *pMeta); void metaCloseDB(SMeta *pMeta); diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index 395ca7c7bd..a81bd931e4 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -16,8 +16,9 @@ #ifndef _TD_META_DEF_H_ #define _TD_META_DEF_H_ -#include "rocksdb/c.h" - +#include "metaCache.h" +#include "metaDB.h" +#include "metaIdx.h" #include "metaTbUid.h" #ifdef __cplusplus @@ -25,12 +26,12 @@ extern "C" { #endif struct SMeta { - char * path; // path of current meta - SMetaOptions options; // meta option - rocksdb_t * pDB; - rocksdb_t * pIdx; - rocksdb_cache_t *pCache; - STbUidGenerator uidGnrt; // meta table UID generator + char* path; // path of current meta + SMetaOptions options; // meta option + meta_db_t* pDB; // raw data db + meta_index_t* pIdx; // tag index + meta_cache_t* pCache; // LRU cache + STbUidGenerator uidGnrt; // meta table UID generator }; #ifdef __cplusplus diff --git a/source/dnode/vnode/meta/inc/metaIdx.h b/source/dnode/vnode/meta/inc/metaIdx.h index ed1f6ac94c..4a897228a9 100644 --- a/source/dnode/vnode/meta/inc/metaIdx.h +++ b/source/dnode/vnode/meta/inc/metaIdx.h @@ -16,12 +16,16 @@ #ifndef _TD_META_IDX_H_ #define _TD_META_IDX_H_ +#include "rocksdb/c.h" + #include "meta.h" #ifdef __cplusplus extern "C" { #endif +typedef rocksdb_t meta_index_t; + int metaOpenIdx(SMeta *pMeta); void metaCloseIdx(SMeta *pMeta); From f184d1d3d4755cae8acd6081028de0fc37c8cc4b Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 2 Nov 2021 18:20:16 +0800 Subject: [PATCH 19/94] refact --- source/dnode/vnode/meta/inc/metaOptions.h | 1 + source/dnode/vnode/meta/inc/metaQuery.h | 27 +++++++++++++++++++++++ source/dnode/vnode/meta/src/metaQuery.c | 14 ++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 source/dnode/vnode/meta/inc/metaQuery.h create mode 100644 source/dnode/vnode/meta/src/metaQuery.c diff --git a/source/dnode/vnode/meta/inc/metaOptions.h b/source/dnode/vnode/meta/inc/metaOptions.h index 7033a873df..500f2d5e59 100644 --- a/source/dnode/vnode/meta/inc/metaOptions.h +++ b/source/dnode/vnode/meta/inc/metaOptions.h @@ -26,6 +26,7 @@ extern const SMetaOptions defaultMetaOptions; int metaValidateOptions(const SMetaOptions *); void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/meta/inc/metaQuery.h b/source/dnode/vnode/meta/inc/metaQuery.h new file mode 100644 index 0000000000..110df8dd45 --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaQuery.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_QUERY_H_ +#define _TD_META_QUERY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_QUERY_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaQuery.c b/source/dnode/vnode/meta/src/metaQuery.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/meta/src/metaQuery.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file From 27f465a8849f8371fa1f9ea734297dc619d12486 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 10:00:49 +0800 Subject: [PATCH 20/94] refact --- source/dnode/vnode/meta/inc/metaTbUid.h | 10 ++++----- source/dnode/vnode/meta/src/metaMain.c | 29 ++++++++++++++++++++----- source/dnode/vnode/meta/src/metaTbUid.c | 18 +++++++++------ 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/source/dnode/vnode/meta/inc/metaTbUid.h b/source/dnode/vnode/meta/inc/metaTbUid.h index 86f4e26cec..07d1f6635b 100644 --- a/source/dnode/vnode/meta/inc/metaTbUid.h +++ b/source/dnode/vnode/meta/inc/metaTbUid.h @@ -27,13 +27,13 @@ typedef struct STbUidGenerator { tb_uid_t nextUid; } STbUidGenerator; +// STableUidGenerator +int metaOpenUidGnrt(SMeta *pMeta); +void metaCloseUidGnrt(SMeta *pMeta); + // tb_uid_t #define IVLD_TB_UID 0 -tb_uid_t generateUid(STbUidGenerator *); - -// STableUidGenerator -void tableUidGeneratorInit(STbUidGenerator *, tb_uid_t suid); -#define tableUidGeneratorClear(ug) +tb_uid_t metaGenerateUid(SMeta *pMeta); #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index 5f82ca4e87..5ffcfb86cb 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -44,25 +44,41 @@ SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { return NULL; } - // Create META path + // Create META path (TODO) taosMkDir(path); - // Open the DBs needed - if (metaOpenDB(pMeta) < 0) { + // Open meta cache + if (metaOpenCache(pMeta) < 0) { // TODO: handle error - metaFree(pMeta); return NULL; } - tableUidGeneratorInit(&(pMeta->uidGnrt), IVLD_TB_UID); + // Open meta db + if (metaOpenDB(pMeta) < 0) { + // TODO: handle error + return NULL; + } + + // Open meta index + if (metaOpenIdx(pMeta) < 0) { + // TODO: handle error + return NULL; + } + + // Open meta table uid generator + if (metaOpenUidGnrt(pMeta) < 0) { + return NULL; + } return pMeta; } void metaClose(SMeta *pMeta) { if (pMeta) { - tableUidGeneratorClear(&pMeta->uidGnrt); + metaCloseUidGnrt(pMeta); + metaCloseIdx(pMeta); metaCloseDB(pMeta); + metaCloseCache(pMeta); metaFree(pMeta); } } @@ -81,6 +97,7 @@ static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions) { pMeta->path = strdup(path); if (pMeta->path == NULL) { + metaFree(pMeta); return NULL; } diff --git a/source/dnode/vnode/meta/src/metaTbUid.c b/source/dnode/vnode/meta/src/metaTbUid.c index 87b1199fd9..be85b45d95 100644 --- a/source/dnode/vnode/meta/src/metaTbUid.c +++ b/source/dnode/vnode/meta/src/metaTbUid.c @@ -13,14 +13,18 @@ * along with this program. If not, see . */ -#include "metaTbUid.h" +#include "meta.h" +#include "metaDef.h" -tb_uid_t generateUid(STbUidGenerator *pGen) { - // Generate a new table UID - return ++(pGen->nextUid); +int metaOpenUidGnrt(SMeta *pMeta) { + // Init a generator + pMeta->uidGnrt.nextUid = IVLD_TB_UID; + return 0; } -void tableUidGeneratorInit(STbUidGenerator *pGen, tb_uid_t suid) { - // Init a generator - pGen->nextUid = suid; +void metaCloseUidGnrt(SMeta *pMeta) { /* TODO */ } + +tb_uid_t metaGenerateUid(SMeta *pMeta) { + // Generate a new table UID + return ++(pMeta->uidGnrt.nextUid); } \ No newline at end of file From 15b8a7f55a7417e6694e67486a801d158cbf2c07 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 10:34:53 +0800 Subject: [PATCH 21/94] more --- source/dnode/vnode/meta/src/metaCache.c | 13 ++++- source/dnode/vnode/meta/src/metaDB.c | 70 +------------------------ source/dnode/vnode/meta/src/metaIdx.c | 31 +++++++++-- source/dnode/vnode/meta/src/metaMain.c | 69 +++++++++++++++--------- 4 files changed, 86 insertions(+), 97 deletions(-) diff --git a/source/dnode/vnode/meta/src/metaCache.c b/source/dnode/vnode/meta/src/metaCache.c index 1e848d434e..9166f1724a 100644 --- a/source/dnode/vnode/meta/src/metaCache.c +++ b/source/dnode/vnode/meta/src/metaCache.c @@ -18,9 +18,20 @@ int metaOpenCache(SMeta *pMeta) { // TODO + if (pMeta->options.lruCacheSize) { + pMeta->pCache = rocksdb_cache_create_lru(pMeta->options.lruCacheSize); + if (pMeta->pCache == NULL) { + // TODO: handle error + return -1; + } + } + return 0; } void metaCloseCache(SMeta *pMeta) { - // TODO + if (pMeta->pCache) { + rocksdb_cache_destroy(pMeta->pCache); + pMeta->pCache = NULL; + } } \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 5c3d9d0a02..2fcf424971 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -17,78 +17,10 @@ #include "metaDef.h" int metaOpenDB(SMeta *pMeta) { -#if 0 - char * err = NULL; - rocksdb_options_t *dbOptions; - rocksdb_options_t *idxOptions; + /* TODO */ - // Create LRU cache - if (pMeta->options.lruCacheSize) { - pMeta->metaDB.pCache = rocksdb_cache_create_lru(pMeta->options.lruCacheSize); - if (pMeta->metaDB.pCache == NULL) { - // TODO: handle error - return -1; - } - } - - // Open raw data DB --------------------------- - dbOptions = rocksdb_options_create(); - if (dbOptions == NULL) { - // TODO: handle error - return -1; - } - - if (pMeta->metaDB.pCache) { - rocksdb_options_set_row_cache(dbOptions, pMeta->metaDB.pCache); - } - - pMeta->metaDB.pDB = rocksdb_open(dbOptions, "db", &err); - if (pMeta->metaDB.pDB == NULL) { - // TODO: handle error - return -1; - } - - rocksdb_options_destroy(dbOptions); - - // Open index DB --------------------------- - idxOptions = rocksdb_options_create(); - if (idxOptions == NULL) { - // TODO: handle error - return -1; - } - - if (pMeta->metaDB.pCache) { - rocksdb_options_set_row_cache(dbOptions, pMeta->metaDB.pCache); - } - - pMeta->metaDB.pIdx = rocksdb_open(idxOptions, "index", &err); - if (pMeta->metaDB.pIdx == NULL) { - // TODO: handle error - rocksdb_close(pMeta->metaDB.pDB); - return -1; - } - - rocksdb_options_destroy(idxOptions); - -#endif return 0; } void metaCloseDB(SMeta *pMeta) { /* TODO */ -#if 0 - // Close index DB - if (pMeta->metaDB.pIdx) { - rocksdb_close(pMeta->metaDB.pIdx); - } - - // Close raw data DB - if (pMeta->metaDB.pDB) { - rocksdb_close(pMeta->metaDB.pDB); - } - - // Destroy cache - if (pMeta->metaDB.pCache) { - rocksdb_cache_destroy(pMeta->metaDB.pCache); - } -#endif } \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaIdx.c b/source/dnode/vnode/meta/src/metaIdx.c index f04489d21e..786b2d1018 100644 --- a/source/dnode/vnode/meta/src/metaIdx.c +++ b/source/dnode/vnode/meta/src/metaIdx.c @@ -13,11 +13,36 @@ * along with this program. If not, see . */ -#include "metaIdx.h" +#include "meta.h" +#include "metaDef.h" int metaOpenIdx(SMeta *pMeta) { - /* TODO */ + char idxDir[128]; // TODO + char * err = NULL; + rocksdb_options_t *options = rocksdb_options_create(); + + // TODO + sprintf(idxDir, "%s/index", pMeta->path); + + if (pMeta->pCache) { + rocksdb_options_set_row_cache(options, pMeta->pCache); + } + + pMeta->pIdx = rocksdb_open(options, idxDir, &err); + if (pMeta->pIdx == NULL) { + // TODO: handle error + rocksdb_options_destroy(options); + return -1; + } + + rocksdb_options_destroy(options); + return 0; } -void metaCloseIdx(SMeta *pMeta) { /* TODO */ } \ No newline at end of file +void metaCloseIdx(SMeta *pMeta) { /* TODO */ + if (pMeta->pIdx) { + rocksdb_close(pMeta->pIdx); + pMeta->pIdx = NULL; + } +} \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaMain.c b/source/dnode/vnode/meta/src/metaMain.c index 5ffcfb86cb..000b10a126 100644 --- a/source/dnode/vnode/meta/src/metaMain.c +++ b/source/dnode/vnode/meta/src/metaMain.c @@ -22,6 +22,8 @@ static SMeta *metaNew(const char *path, const SMetaOptions *pMetaOptions); static void metaFree(SMeta *pMeta); +static int metaOpenImpl(SMeta *pMeta); +static void metaCloseImpl(SMeta *pMeta); SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { SMeta *pMeta = NULL; @@ -47,26 +49,9 @@ SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { // Create META path (TODO) taosMkDir(path); - // Open meta cache - if (metaOpenCache(pMeta) < 0) { - // TODO: handle error - return NULL; - } - - // Open meta db - if (metaOpenDB(pMeta) < 0) { - // TODO: handle error - return NULL; - } - - // Open meta index - if (metaOpenIdx(pMeta) < 0) { - // TODO: handle error - return NULL; - } - - // Open meta table uid generator - if (metaOpenUidGnrt(pMeta) < 0) { + // Open meta + if (metaOpenImpl(pMeta) < 0) { + metaFree(pMeta); return NULL; } @@ -75,10 +60,7 @@ SMeta *metaOpen(const char *path, const SMetaOptions *pMetaOptions) { void metaClose(SMeta *pMeta) { if (pMeta) { - metaCloseUidGnrt(pMeta); - metaCloseIdx(pMeta); - metaCloseDB(pMeta); - metaCloseCache(pMeta); + metaCloseImpl(pMeta); metaFree(pMeta); } } @@ -113,6 +95,45 @@ static void metaFree(SMeta *pMeta) { } } +static int metaOpenImpl(SMeta *pMeta) { + // Open meta cache + if (metaOpenCache(pMeta) < 0) { + // TODO: handle error + metaCloseImpl(pMeta); + return -1; + } + + // Open meta db + if (metaOpenDB(pMeta) < 0) { + // TODO: handle error + metaCloseImpl(pMeta); + return -1; + } + + // Open meta index + if (metaOpenIdx(pMeta) < 0) { + // TODO: handle error + metaCloseImpl(pMeta); + return -1; + } + + // Open meta table uid generator + if (metaOpenUidGnrt(pMeta) < 0) { + // TODO: handle error + metaCloseImpl(pMeta); + return -1; + } + + return 0; +} + +static void metaCloseImpl(SMeta *pMeta) { + metaCloseUidGnrt(pMeta); + metaCloseIdx(pMeta); + metaCloseDB(pMeta); + metaCloseCache(pMeta); +} + // OLD ------------------------------------------------------------------- #if 0 static int metaCreateSuperTable(SMeta *pMeta, const char *tbname, const SSuperTableOpts *pSuperTableOpts); From 5f0e19747f2ea73ec21586ddf5787f5ed0d286c3 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 10:41:07 +0800 Subject: [PATCH 22/94] more --- source/dnode/vnode/meta/src/metaDB.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 2fcf424971..04c43d343e 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -17,10 +17,32 @@ #include "metaDef.h" int metaOpenDB(SMeta *pMeta) { - /* TODO */ + char dbDir[128]; + char * err = NULL; + rocksdb_options_t *options = rocksdb_options_create(); + + // TODO + sprintf(dbDir, "%s/db", pMeta->path); + + if (pMeta->pCache) { + rocksdb_options_set_row_cache(options, pMeta->pCache); + } + + pMeta->pDB = rocksdb_open(options, dbDir, &err); + if (pMeta->pDB == NULL) { + // TODO: handle error + rocksdb_options_destroy(options); + return -1; + } + + rocksdb_options_destroy(options); return 0; } -void metaCloseDB(SMeta *pMeta) { /* TODO */ +void metaCloseDB(SMeta *pMeta) { + if (pMeta->pDB) { + rocksdb_close(pMeta->pDB); + pMeta->pDB = NULL; + } } \ No newline at end of file From 903236fa5ef4412af6022a041ccd3a50029178a7 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 11:42:17 +0800 Subject: [PATCH 23/94] more --- include/server/vnode/tsdb/impl/tsdbImpl.h | 1 + source/dnode/vnode/tsdb/inc/tsdbOptions.h | 4 ++++ source/dnode/vnode/tsdb/src/tsdbMain.c | 18 +++++++++++++++++- source/dnode/vnode/tsdb/src/tsdbOptions.c | 20 +++++++++++++++++++- 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/server/vnode/tsdb/impl/tsdbImpl.h b/include/server/vnode/tsdb/impl/tsdbImpl.h index 15f611c703..04d2ec43c9 100644 --- a/include/server/vnode/tsdb/impl/tsdbImpl.h +++ b/include/server/vnode/tsdb/impl/tsdbImpl.h @@ -23,6 +23,7 @@ extern "C" { #endif struct STsdbOptions { + size_t lruCacheSize; /* TODO */ }; diff --git a/source/dnode/vnode/tsdb/inc/tsdbOptions.h b/source/dnode/vnode/tsdb/inc/tsdbOptions.h index 4d6a250424..a186482939 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbOptions.h +++ b/source/dnode/vnode/tsdb/inc/tsdbOptions.h @@ -20,6 +20,10 @@ extern "C" { #endif +extern const STsdbOptions defautlTsdbOptions; + +int tsdbValidateOptions(const STsdbOptions *); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/tsdb/src/tsdbMain.c b/source/dnode/vnode/tsdb/src/tsdbMain.c index 61e887dd45..9f473c3be1 100644 --- a/source/dnode/vnode/tsdb/src/tsdbMain.c +++ b/source/dnode/vnode/tsdb/src/tsdbMain.c @@ -13,4 +13,20 @@ * along with this program. If not, see . */ -#include "tsdbDef.h" \ No newline at end of file +#include "tsdbDef.h" + +STsdb *tsdbOpen(const char *path, const STsdbOptions *pTsdbOptions) { + STsdb *pTsdb = NULL; + /* TODO */ + return pTsdb; +} + +void tsdbClose(STsdb *pTsdb) { + if (pTsdb) { + /* TODO */ + } +} + +void tsdbRemove(const char *path) { taosRemoveDir(path); } + +/* ------------------------ STATIC METHODS ------------------------ */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/src/tsdbOptions.c b/source/dnode/vnode/tsdb/src/tsdbOptions.c index 6dea4a4e57..1e1a859285 100644 --- a/source/dnode/vnode/tsdb/src/tsdbOptions.c +++ b/source/dnode/vnode/tsdb/src/tsdbOptions.c @@ -11,4 +11,22 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ \ No newline at end of file + */ + +#include "tsdbDef.h" + +const STsdbOptions defautlTsdbOptions = {.lruCacheSize = 0}; + +int tsdbOptionsInit(STsdbOptions *pTsdbOptions) { + // TODO + return 0; +} + +void tsdbOptionsClear(STsdbOptions *pTsdbOptions) { + // TODO +} + +int tsdbValidateOptions(const STsdbOptions *pTsdbOptions) { + // TODO + return 0; +} From 349a6a47711dbd26dcc3d97df411b2aa2a74b185 Mon Sep 17 00:00:00 2001 From: lichuang Date: Wed, 3 Nov 2021 11:47:44 +0800 Subject: [PATCH 24/94] [TD-10645][raft]add raft message handle --- include/libs/sync/sync.h | 4 +- source/libs/sync/inc/raft.h | 66 +++++++++-- source/libs/sync/inc/raft_message.h | 56 ++++++--- source/libs/sync/inc/raft_progress.h | 1 + source/libs/sync/src/raft.c | 168 +++++++++++++++++++++++++-- source/libs/sync/src/raft_message.c | 4 +- source/libs/sync/src/raft_progress.c | 15 +-- source/libs/sync/src/sync.c | 2 +- 8 files changed, 271 insertions(+), 45 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index ef8773f5cc..ced9cc72fc 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -133,9 +133,9 @@ typedef struct SStateManager { int32_t (*readServerState)(struct SStateManager* stateMng, SSyncServerState* state); - // void (*saveCluster)(struct SStateManager* stateMng, const SSyncClusterConfig* cluster); + void (*saveCluster)(struct SStateManager* stateMng, const SSyncClusterConfig* cluster); - // const SSyncClusterConfig* (*readCluster)(struct SStateManager* stateMng); + const SSyncClusterConfig* (*readCluster)(struct SStateManager* stateMng); } SStateManager; typedef struct { diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 869baecdda..0e2d1769b3 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -20,6 +20,8 @@ #include "sync_type.h" #include "raft_message.h" +#define SYNC_NON_NODE_ID -1 + typedef struct SSyncRaftProgress SSyncRaftProgress; typedef struct RaftLeaderState { @@ -28,38 +30,84 @@ typedef struct RaftLeaderState { } RaftLeaderState; typedef struct SSyncRaftIOMethods { - SyncTime (*time)(SSyncRaft*); + } SSyncRaftIOMethods; +typedef int (*SyncRaftStepFp)(SSyncRaft* pRaft, const SSyncMessage* pMsg); +typedef void (*SyncRaftTickFp)(SSyncRaft* pRaft); + struct SSyncRaft { // owner sync node SSyncNode* pNode; SSyncInfo info; + SSyncTerm term; + SyncNodeId voteFor; + + SyncNodeId selfId; + + /** + * the leader id + **/ + SyncNodeId leaderId; + + /** + * leadTransferee is id of the leader transfer target when its value is not zero. + * Follow the procedure defined in raft thesis 3.10. + **/ + SyncNodeId leadTransferee; + + /** + * New configuration is ignored if there exists unapplied configuration. + **/ + bool pendingConf; + + ESyncRole state; + + /** + * number of ticks since it reached last electionTimeout when it is leader + * or candidate. + * number of ticks since it reached last electionTimeout or received a + * valid message from current leader when it is a follower. + **/ + uint16_t electionElapsed; + + /** + * number of ticks since it reached last heartbeatTimeout. + * only leader keeps heartbeatElapsed. + **/ + uint16_t heartbeatElapsed; + // election timeout tick(random in [3:6] tick) - uint16_t electionTick; + uint16_t electionTimeoutTick; // heartbeat timeout tick(default: 1 tick) - uint16_t heartbeatTick; - - int installSnapShotTimeoutMS; - - // - int heartbeatTimeoutMS; + uint16_t heartbeatTimeoutTick; bool preVote; + bool checkQuorum; SSyncRaftIOMethods io; RaftLeaderState leaderState; SSyncRaftUnstableLog *log; + + SyncRaftStepFp stepFp; + + SyncRaftTickFp tickFp; }; int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); -int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg); +int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg); int32_t syncRaftTick(SSyncRaft* pRaft); + +void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term, SyncNodeId leaderId); +void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); +bool syncRaftIsPromotable(SSyncRaft* pRaft); +bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft); + #endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index faf14840c9..9e690855c7 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -28,15 +28,14 @@ typedef enum RaftMessageType { // client propose a cmd RAFT_MSG_INTERNAL_PROP = 1, - RAFT_MSG_APPEND, - RAFT_MSG_APPEND_RESP, + // node election timeout + RAFT_MSG_INTERNAL_ELECTION = 2, - RAFT_MSG_VOTE, - RAFT_MSG_VOTE_RESP, - - RAFT_MSG_PRE_VOTE, - RAFT_MSG_PRE_VOTE_RESP, + RAFT_MSG_VOTE = 3, + RAFT_MSG_VOTE_RESP = 4, + RAFT_MSG_PRE_VOTE = 5, + RAFT_MSG_PRE_VOTE_RESP = 6, } RaftMessageType; typedef struct RaftMsgInternal_Prop { @@ -45,7 +44,15 @@ typedef struct RaftMsgInternal_Prop { void* pData; } RaftMsgInternal_Prop; -typedef struct RaftMessage { +typedef struct RaftMsgInternal_Election { + +} RaftMsgInternal_Election; + +typedef struct RaftMsg_PreVoteResp { + bool reject; +} RaftMsg_PreVoteResp; + +typedef struct SSyncMessage { RaftMessageType msgType; SSyncTerm term; SyncNodeId from; @@ -53,12 +60,17 @@ typedef struct RaftMessage { union { RaftMsgInternal_Prop propose; - }; -} RaftMessage; -static FORCE_INLINE RaftMessage* syncInitPropMsg(RaftMessage* pMsg, const SSyncBuffer* pBuf, void* pData, bool isWeak) { - *pMsg = (RaftMessage) { + RaftMsgInternal_Election election; + + RaftMsg_PreVoteResp preVoteResp; + }; +} SSyncMessage; + +static FORCE_INLINE SSyncMessage* syncInitPropMsg(SSyncMessage* pMsg, const SSyncBuffer* pBuf, void* pData, bool isWeak) { + *pMsg = (SSyncMessage) { .msgType = RAFT_MSG_INTERNAL_PROP, + .term = 0, .propose = (RaftMsgInternal_Prop) { .isWeak = isWeak, .pBuf = pBuf, @@ -69,10 +81,24 @@ static FORCE_INLINE RaftMessage* syncInitPropMsg(RaftMessage* pMsg, const SSyncB return pMsg; } -static FORCE_INLINE bool syncIsInternalMsg(const RaftMessage* pMsg) { - return pMsg->msgType == RAFT_MSG_INTERNAL_PROP; +static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNodeId from) { + *pMsg = (SSyncMessage) { + .msgType = RAFT_MSG_INTERNAL_ELECTION, + .term = 0, + .from = from, + .election = (RaftMsgInternal_Election) { + + }, + }; + + return pMsg; } -void syncFreeMessage(const RaftMessage* pMsg); +static FORCE_INLINE bool syncIsInternalMsg(const SSyncMessage* pMsg) { + return pMsg->msgType == RAFT_MSG_INTERNAL_PROP || + pMsg->msgType == RAFT_MSG_INTERNAL_ELECTION; +} + +void syncFreeMessage(const SSyncMessage* pMsg); #endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_progress.h b/source/libs/sync/inc/raft_progress.h index 73aa9db59f..159a80fa0e 100644 --- a/source/libs/sync/inc/raft_progress.h +++ b/source/libs/sync/inc/raft_progress.h @@ -148,6 +148,7 @@ void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i); void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotIndex); +/* inflights APIs */ int syncRaftInflightReset(SSyncRaftInflights* inflights); bool syncRaftInflightFull(SSyncRaftInflights* inflights); void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex); diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 42b220e642..09f29cbd28 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -18,7 +18,20 @@ #define RAFT_READ_LOG_MAX_NUM 100 -static void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term); +static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); + +static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg); +static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg); +static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg); + +static void tickElection(SSyncRaft* pRaft); +static void tickHeartbeat(SSyncRaft* pRaft); + +static void abortLeaderTransfer(SSyncRaft* pRaft); + +static void resetRaft(SSyncRaft* pRaft, SSyncTerm term); int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { SSyncNode* pNode = pRaft->pNode; @@ -30,6 +43,8 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { SSyncBuffer buffer[RAFT_READ_LOG_MAX_NUM]; int nBuf, limit, i; + memset(pRaft, 0, sizeof(SSyncRaft)); + memcpy(&pRaft->info, pInfo, sizeof(SSyncInfo)); stateManager = &(pRaft->info.stateManager); logStore = &(pRaft->info.logStore); @@ -60,15 +75,30 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } assert(initIndex == serverState.commitIndex); - pRaft->heartbeatTick = 1; + pRaft->heartbeatTimeoutTick = 1; - syncRaftBecomeFollower(pRaft, 1); + syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); syncInfo("restore vgid %d state: snapshot index success", pInfo->vgId); return 0; } -int32_t syncRaftStep(SSyncRaft* pRaft, const RaftMessage* pMsg) { +int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + syncDebug("from "); + if (preHandleMessage(pRaft, pMsg)) { + syncFreeMessage(pMsg); + return 0; + } + + RaftMessageType msgType = pMsg->msgType; + if (msgType == RAFT_MSG_INTERNAL_ELECTION) { + + } else if (msgType == RAFT_MSG_VOTE || msgType == RAFT_MSG_PRE_VOTE) { + + } else { + pRaft->stepFp(pRaft, pMsg); + } + syncFreeMessage(pMsg); return 0; } @@ -77,7 +107,131 @@ int32_t syncRaftTick(SSyncRaft* pRaft) { return 0; } -static void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term) { - pRaft->electionTick = taosRand() % 3 + 3; - return; +void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term, SyncNodeId leaderId) { + pRaft->stepFp = stepFollower; + resetRaft(pRaft, term); + pRaft->tickFp = tickElection; + pRaft->leaderId = leaderId; + pRaft->state = TAOS_SYNC_ROLE_FOLLOWER; +} + +void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { + // electionTimeoutTick in [3,6] tick + pRaft->electionTimeoutTick = taosRand() % 4 + 3; +} + +bool syncRaftIsPromotable(SSyncRaft* pRaft) { + return pRaft->info.syncCfg.selfIndex >= 0 && + pRaft->info.syncCfg.selfIndex < pRaft->info.syncCfg.replica && + pRaft->selfId != SYNC_NON_NODE_ID; +} + +bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { + return pRaft->electionElapsed >= pRaft->electionTimeoutTick; +} + +/** + * pre-handle message, return true is no need to continue + * Handle the message term, which may result in our stepping down to a follower. + **/ +static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + // local message? + if (pMsg->term == 0) { + return false; + } + + if (pMsg->term > pRaft->term) { + return preHandleNewTermMessage(pRaft, pMsg); + } + + return preHandleOldTermMessage(pRaft, pMsg);; +} + +static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + SyncNodeId leaderId = pMsg->from; + RaftMessageType msgType = pMsg->msgType; + + if (msgType == RAFT_MSG_VOTE || msgType == RAFT_MSG_PRE_VOTE) { + leaderId = SYNC_NON_NODE_ID; + } + + if (msgType == RAFT_MSG_PRE_VOTE) { + // Never change our term in response to a PreVote + } else if (msgType == RAFT_MSG_PRE_VOTE_RESP && !pMsg->preVoteResp.reject) { + /** + * We send pre-vote requests with a term in our future. If the + * pre-vote is granted, we will increment our term when we get a + * quorum. If it is not, the term comes from the node that + * rejected our vote so we should become a follower at the new + * term. + **/ + } else { + syncRaftBecomeFollower(pRaft, pMsg->term, leaderId); + } + + return false; +} + +static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + + // if receive old term message, no need to continue + return true; +} + +static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + return 0; +} + +static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + return 0; +} + +static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + return 0; +} + +/** + * tickElection is run by followers and candidates per tick. + **/ +static void tickElection(SSyncRaft* pRaft) { + pRaft->electionElapsed += 1; + + if (!syncRaftIsPromotable(pRaft)) { + return; + } + + if (!syncRaftIsPastElectionTimeout(pRaft)) { + return; + } + + // election timeout + pRaft->electionElapsed = 0; + SSyncMessage msg; + syncRaftStep(pRaft, syncInitElectionMsg(&msg, pRaft->selfId)); +} + +static void tickHeartbeat(SSyncRaft* pRaft) { + +} + +static void abortLeaderTransfer(SSyncRaft* pRaft) { + pRaft->leadTransferee = SYNC_NON_NODE_ID; +} + +static void resetRaft(SSyncRaft* pRaft, SSyncTerm term) { + if (pRaft->term != term) { + pRaft->term = term; + pRaft->voteFor = SYNC_NON_NODE_ID; + } + + pRaft->leaderId = SYNC_NON_NODE_ID; + + pRaft->electionElapsed = 0; + pRaft->heartbeatElapsed = 0; + + syncRaftRandomizedElectionTimeout(pRaft); + + abortLeaderTransfer(pRaft); + + pRaft->pendingConf = false; } \ No newline at end of file diff --git a/source/libs/sync/src/raft_message.c b/source/libs/sync/src/raft_message.c index 912314daf2..d17a5b732b 100644 --- a/source/libs/sync/src/raft_message.c +++ b/source/libs/sync/src/raft_message.c @@ -15,8 +15,8 @@ #include "raft_message.h" -void syncFreeMessage(const RaftMessage* pMsg) { +void syncFreeMessage(const SSyncMessage* pMsg) { if (!syncIsInternalMsg(pMsg)) { - free((RaftMessage*)pMsg); + free((SSyncMessage*)pMsg); } } \ No newline at end of file diff --git a/source/libs/sync/src/raft_progress.c b/source/libs/sync/src/raft_progress.c index 0f51d20531..ba09973f48 100644 --- a/source/libs/sync/src/raft_progress.c +++ b/source/libs/sync/src/raft_progress.c @@ -177,14 +177,6 @@ void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotI progress->pendingSnapshotIndex = snapshotIndex; } -static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state) { - progress->paused = false; - progress->pendingSnapshotIndex = 0; - progress->state = state; - syncRaftInflightReset(&(progress->inflights)); -} - - int syncRaftInflightReset(SSyncRaftInflights* inflights) { inflights->count = 0; inflights->start = 0; @@ -240,7 +232,12 @@ void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) { syncRaftInflightFreeTo(inflights, inflights->buffer[inflights->start]); } - +static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state) { + progress->paused = false; + progress->pendingSnapshotIndex = 0; + progress->state = state; + syncRaftInflightReset(&(progress->inflights)); +} diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index e3d0606c08..9077be3f2d 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -157,7 +157,7 @@ void syncStop(const SSyncNode* pNode) { } int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, bool isWeak) { - RaftMessage msg; + SSyncMessage msg; pthread_mutex_lock(&syncNode->mutex); int32_t ret = syncRaftStep(&syncNode->raft, syncInitPropMsg(&msg, pBuf, pData, isWeak)); From 552051da5a6c7075cadf00918006afa0e59cf6d4 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 13:37:26 +0800 Subject: [PATCH 25/94] refact --- include/server/vnode/meta/impl/metaImpl.h | 2 ++ include/server/vnode/meta/meta.h | 16 ++++++++-------- source/dnode/vnode/meta/src/metaDB.c | 1 + source/dnode/vnode/meta/src/metaIdx.c | 1 + source/dnode/vnode/meta/src/metaTable.c | 2 +- .../meta/test/{metaTests.cpp => metaAPITest.cpp} | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) rename source/dnode/vnode/meta/test/{metaTests.cpp => metaAPITest.cpp} (100%) diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h index a9d70500cb..7987ddf203 100644 --- a/include/server/vnode/meta/impl/metaImpl.h +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -29,6 +29,7 @@ struct SMetaOptions { size_t lruCacheSize; // LRU cache size }; +#if 0 typedef enum { META_INIT_TABLE = 0, META_SUPER_TABLE = 1, META_CHILD_TABLE = 2, META_NORMAL_TABLE = 3 } EMetaTableT; typedef struct SSuperTableOpts { tb_uid_t uid; @@ -54,6 +55,7 @@ struct STableOptions { SNormalTableOpts normalOpts; }; }; +#endif #ifdef __cplusplus } diff --git a/include/server/vnode/meta/meta.h b/include/server/vnode/meta/meta.h index f1d18e27ff..b94ffc7a5d 100644 --- a/include/server/vnode/meta/meta.h +++ b/include/server/vnode/meta/meta.h @@ -23,15 +23,15 @@ extern "C" { #endif // Types exported -typedef struct SMeta SMeta; -typedef struct SMetaOptions SMetaOptions; -typedef struct STableOptions STableOptions; +typedef struct SMeta SMeta; +typedef struct SMetaOptions SMetaOptions; +typedef struct STbOptions STbOptions; // SMeta operations SMeta *metaOpen(const char *path, const SMetaOptions *); void metaClose(SMeta *); void metaRemove(const char *path); -int metaCreateTable(SMeta *pMeta, const STableOptions *); +int metaCreateTable(SMeta *pMeta, const STbOptions *); int metaDropTable(SMeta *pMeta, tb_uid_t uid); int metaCommit(SMeta *); @@ -41,11 +41,11 @@ void metaOptionsClear(SMetaOptions *); // STableOpts #define META_TABLE_OPTS_DECLARE(name) STableOpts name = {0} -void metaNormalTableOptsInit(STableOptions *, const char *name, const STSchema *pSchema); -void metaSuperTableOptsInit(STableOptions *, const char *name, tb_uid_t uid, const STSchema *pSchema, +void metaNormalTableOptsInit(STbOptions *, const char *name, const STSchema *pSchema); +void metaSuperTableOptsInit(STbOptions *, const char *name, tb_uid_t uid, const STSchema *pSchema, const STSchema *pTagSchema); -void metaChildTableOptsInit(STableOptions *, const char *name, tb_uid_t suid, const SKVRow tags); -void metaTableOptsClear(STableOptions *); +void metaChildTableOptsInit(STbOptions *, const char *name, tb_uid_t suid, const SKVRow tags); +void metaTableOptsClear(STbOptions *); #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 04c43d343e..1dbb88587a 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -27,6 +27,7 @@ int metaOpenDB(SMeta *pMeta) { if (pMeta->pCache) { rocksdb_options_set_row_cache(options, pMeta->pCache); } + rocksdb_options_set_create_if_missing(options, 1); pMeta->pDB = rocksdb_open(options, dbDir, &err); if (pMeta->pDB == NULL) { diff --git a/source/dnode/vnode/meta/src/metaIdx.c b/source/dnode/vnode/meta/src/metaIdx.c index 786b2d1018..29353cd511 100644 --- a/source/dnode/vnode/meta/src/metaIdx.c +++ b/source/dnode/vnode/meta/src/metaIdx.c @@ -27,6 +27,7 @@ int metaOpenIdx(SMeta *pMeta) { if (pMeta->pCache) { rocksdb_options_set_row_cache(options, pMeta->pCache); } + rocksdb_options_set_create_if_missing(options, 1); pMeta->pIdx = rocksdb_open(options, idxDir, &err); if (pMeta->pIdx == NULL) { diff --git a/source/dnode/vnode/meta/src/metaTable.c b/source/dnode/vnode/meta/src/metaTable.c index 029d43dc85..de1f8bba90 100644 --- a/source/dnode/vnode/meta/src/metaTable.c +++ b/source/dnode/vnode/meta/src/metaTable.c @@ -15,7 +15,7 @@ #include "meta.h" -int metaCreateTable(SMeta *pMeta, const STableOptions *pTbOptions) { +int metaCreateTable(SMeta *pMeta, const STbOptions *pTbOptions) { // TODO return 0; } diff --git a/source/dnode/vnode/meta/test/metaTests.cpp b/source/dnode/vnode/meta/test/metaAPITest.cpp similarity index 100% rename from source/dnode/vnode/meta/test/metaTests.cpp rename to source/dnode/vnode/meta/test/metaAPITest.cpp index 49d6b99c9f..0d79882018 100644 --- a/source/dnode/vnode/meta/test/metaTests.cpp +++ b/source/dnode/vnode/meta/test/metaAPITest.cpp @@ -1,10 +1,10 @@ +#if 0 #include #include #include #include "meta.h" -#if 0 static STSchema *metaGetSimpleSchema() { STSchema * pSchema = NULL; STSchemaBuilder sb = {0}; From 770b1bb1c0c7e2ecad388dfff8bed9931762c03a Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 14:49:20 +0800 Subject: [PATCH 26/94] more --- include/server/vnode/meta/impl/metaImpl.h | 49 ++++++++++++--------- source/dnode/vnode/meta/inc/metaDB.h | 2 + source/dnode/vnode/meta/inc/metaDef.h | 2 + source/dnode/vnode/meta/inc/metaIdx.h | 2 + source/dnode/vnode/meta/inc/metaTbOptions.h | 4 ++ source/dnode/vnode/meta/src/metaDB.c | 10 +++++ source/dnode/vnode/meta/src/metaIdx.c | 6 ++- source/dnode/vnode/meta/src/metaTable.c | 32 ++++++++++++-- source/dnode/vnode/meta/src/metaTbOptions.c | 9 +++- 9 files changed, 90 insertions(+), 26 deletions(-) diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h index 7987ddf203..d6f3bbbcfe 100644 --- a/include/server/vnode/meta/impl/metaImpl.h +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -25,37 +25,44 @@ extern "C" { #endif typedef uint64_t tb_uid_t; +/* ------------------------ SMetaOptions ------------------------ */ struct SMetaOptions { - size_t lruCacheSize; // LRU cache size + size_t lruCacheSize; // LRU cache size }; -#if 0 -typedef enum { META_INIT_TABLE = 0, META_SUPER_TABLE = 1, META_CHILD_TABLE = 2, META_NORMAL_TABLE = 3 } EMetaTableT; -typedef struct SSuperTableOpts { +/* ------------------------ STbOptions ------------------------ */ +typedef struct { +} SSMAOptions; + +// super table options +typedef struct { tb_uid_t uid; - STSchema *pSchema; // (ts timestamp, a int) - STSchema *pTagSchema; // (tag1 binary(10), tag2 int) -} SSuperTableOpts; + STSchema* pSchema; + STSchema* pTagSchema; +} SSTbOptions; -typedef struct SChildTableOpts { - tb_uid_t suid; // super table uid - SKVRow tags; // tag value of the child table -} SChildTableOpts; +// child table options +typedef struct { + tb_uid_t suid; + SKVRow tags; +} SCTbOptions; -typedef struct SNormalTableOpts { - STSchema *pSchema; -} SNormalTableOpts; +// normal table options +typedef struct { + SSchema* pSchame; +} SNTbOptions; -struct STableOptions { - int8_t type; - char * name; +struct STbOptions { + uint8_t type; + char* name; + uint64_t ttl; // time to live + SSMAOptions bsma; // Block-wise sma union { - SSuperTableOpts superOpts; - SChildTableOpts childOpts; - SNormalTableOpts normalOpts; + SSTbOptions stbOptions; + SNTbOptions ntbOptions; + SCTbOptions ctbOptions; }; }; -#endif #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/inc/metaDB.h b/source/dnode/vnode/meta/inc/metaDB.h index 3381b05f22..8d7482acbb 100644 --- a/source/dnode/vnode/meta/inc/metaDB.h +++ b/source/dnode/vnode/meta/inc/metaDB.h @@ -28,6 +28,8 @@ typedef rocksdb_t meta_db_t; int metaOpenDB(SMeta *pMeta); void metaCloseDB(SMeta *pMeta); +int metaSaveTableToDB(SMeta *pMeta, const STbOptions *pTbOptions); +int metaRemoveTableFromDb(SMeta *pMeta, tb_uid_t uid); #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index a81bd931e4..5c4ae3428c 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -16,9 +16,11 @@ #ifndef _TD_META_DEF_H_ #define _TD_META_DEF_H_ +#include "meta.h" #include "metaCache.h" #include "metaDB.h" #include "metaIdx.h" +#include "metaOptions.h" #include "metaTbUid.h" #ifdef __cplusplus diff --git a/source/dnode/vnode/meta/inc/metaIdx.h b/source/dnode/vnode/meta/inc/metaIdx.h index 4a897228a9..d43df9afc3 100644 --- a/source/dnode/vnode/meta/inc/metaIdx.h +++ b/source/dnode/vnode/meta/inc/metaIdx.h @@ -28,6 +28,8 @@ typedef rocksdb_t meta_index_t; int metaOpenIdx(SMeta *pMeta); void metaCloseIdx(SMeta *pMeta); +int metaSaveTableToIdx(SMeta *pMeta, const STbOptions *pTbOptions); +int metaRemoveTableFromIdx(SMeta *pMeta, tb_uid_t uid); #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/inc/metaTbOptions.h b/source/dnode/vnode/meta/inc/metaTbOptions.h index cd2b2ee0e8..1da68ffd52 100644 --- a/source/dnode/vnode/meta/inc/metaTbOptions.h +++ b/source/dnode/vnode/meta/inc/metaTbOptions.h @@ -16,10 +16,14 @@ #ifndef _TD_META_TABLE_OPTIONS_H_ #define _TD_META_TABLE_OPTIONS_H_ +#include "meta.h" + #ifdef __cplusplus extern "C" { #endif +int metaValidateTbOptions(SMeta *pMeta, const STbOptions *); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 1dbb88587a..a8e63b6156 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -46,4 +46,14 @@ void metaCloseDB(SMeta *pMeta) { rocksdb_close(pMeta->pDB); pMeta->pDB = NULL; } +} + +int metaSaveTableToDB(SMeta *pMeta, const STbOptions *pTbOptions) { + // TODO + return 0; +} + +int metaRemoveTableFromDb(SMeta *pMeta, tb_uid_t uid) { + /* TODO */ + return 0; } \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaIdx.c b/source/dnode/vnode/meta/src/metaIdx.c index 29353cd511..54cc8bd461 100644 --- a/source/dnode/vnode/meta/src/metaIdx.c +++ b/source/dnode/vnode/meta/src/metaIdx.c @@ -13,7 +13,6 @@ * along with this program. If not, see . */ -#include "meta.h" #include "metaDef.h" int metaOpenIdx(SMeta *pMeta) { @@ -46,4 +45,9 @@ void metaCloseIdx(SMeta *pMeta) { /* TODO */ rocksdb_close(pMeta->pIdx); pMeta->pIdx = NULL; } +} + +int metaSaveTableToIdx(SMeta *pMeta, const STbOptions *pTbOptions) { + // TODO + return 0; } \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaTable.c b/source/dnode/vnode/meta/src/metaTable.c index de1f8bba90..b41d9313d5 100644 --- a/source/dnode/vnode/meta/src/metaTable.c +++ b/source/dnode/vnode/meta/src/metaTable.c @@ -13,14 +13,40 @@ * along with this program. If not, see . */ -#include "meta.h" +#include "metaDef.h" int metaCreateTable(SMeta *pMeta, const STbOptions *pTbOptions) { - // TODO + // Validate the tbOptions + if (metaValidateTbOptions(pTbOptions) < 0) { + // TODO: handle error + return -1; + } + + // TODO: add atomicity + + if (metaSaveTableToDB(pMeta, pTbOptions) < 0) { + // TODO: handle error + return -1; + } + + if (metaSaveTableToIdx(pMeta, pTbOptions) < 0) { + // TODO: handle error + return -1; + } + return 0; } int metaDropTable(SMeta *pMeta, tb_uid_t uid) { - // TODO + if (metaRemoveTableFromIdx(pMeta, uid) < 0) { + // TODO: handle error + return -1; + } + + if (metaRemoveTableFromIdx(pMeta, uid) < 0) { + // TODO + return -1; + } + return 0; } diff --git a/source/dnode/vnode/meta/src/metaTbOptions.c b/source/dnode/vnode/meta/src/metaTbOptions.c index 6dea4a4e57..1f855aef23 100644 --- a/source/dnode/vnode/meta/src/metaTbOptions.c +++ b/source/dnode/vnode/meta/src/metaTbOptions.c @@ -11,4 +11,11 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ \ No newline at end of file + */ + +#include "metaDef.h" + +int metaValidateTbOptions(SMeta *pMeta, const STbOptions *pTbOptions) { + // TODO + return 0; +} \ No newline at end of file From aab981f667f930117088f1e17c65d22d1488ed32 Mon Sep 17 00:00:00 2001 From: lichuang Date: Wed, 3 Nov 2021 15:30:54 +0800 Subject: [PATCH 27/94] [TD-10645][raft]add raft election message handle --- include/libs/sync/sync.h | 5 +- source/libs/sync/inc/raft.h | 16 ++++- source/libs/sync/inc/raft_message.h | 17 +++-- source/libs/sync/inc/raft_unstable_log.h | 6 +- source/libs/sync/src/raft.c | 62 ++++++++++++++-- .../sync/src/raft_handle_election_message.c | 71 +++++++++++++++++++ source/libs/sync/src/raft_message.c | 2 +- 7 files changed, 162 insertions(+), 17 deletions(-) create mode 100644 source/libs/sync/src/raft_handle_election_message.c diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index ced9cc72fc..b938bbba77 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -26,12 +26,13 @@ extern "C" { typedef int32_t SyncNodeId; typedef int32_t SyncGroupId; typedef int64_t SyncIndex; -typedef uint64_t SSyncTerm; +typedef uint64_t SyncTerm; typedef enum { TAOS_SYNC_ROLE_FOLLOWER = 0, TAOS_SYNC_ROLE_CANDIDATE = 1, TAOS_SYNC_ROLE_LEADER = 2, + TAOS_SYNC_ROLE_PRE_CANDIDATE = 3, } ESyncRole; typedef struct { @@ -111,7 +112,7 @@ typedef struct SSyncLogStore { typedef struct SSyncServerState { SyncNodeId voteFor; - SSyncTerm term; + SyncTerm term; SyncIndex commitIndex; } SSyncServerState; diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 0e2d1769b3..702fcd00cf 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -43,10 +43,11 @@ struct SSyncRaft { SSyncInfo info; - SSyncTerm term; + SyncTerm term; SyncNodeId voteFor; SyncNodeId selfId; + SyncGroupId selfGroupId; /** * the leader id @@ -100,14 +101,25 @@ struct SSyncRaft { SyncRaftTickFp tickFp; }; +typedef enum { + SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, + SYNC_RAFT_CAMPAIGN_ELECTION = 1, +} SyncRaftCampaignType; + int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg); int32_t syncRaftTick(SSyncRaft* pRaft); -void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term, SyncNodeId leaderId); +void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId); +void syncRaftBecomePreCandidate(SSyncRaft* pRaft); +void syncRaftBecomeCandidate(SSyncRaft* pRaft); +void syncRaftBecomeLeader(SSyncRaft* pRaft); + void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); bool syncRaftIsPromotable(SSyncRaft* pRaft); bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft); +int syncRaftQuorum(SSyncRaft* pRaft); +int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, RaftMessageType msgType, bool accept); #endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index 9e690855c7..71fe37bebd 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -17,6 +17,7 @@ #define _TD_LIBS_SYNC_RAFT_MESSAGE_H #include "sync.h" +#include "sync_type.h" /** * below define message type which handled by Raft node thread @@ -54,7 +55,7 @@ typedef struct RaftMsg_PreVoteResp { typedef struct SSyncMessage { RaftMessageType msgType; - SSyncTerm term; + SyncTerm term; SyncNodeId from; SyncNodeId to; @@ -94,11 +95,19 @@ static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNo return pMsg; } -static FORCE_INLINE bool syncIsInternalMsg(const SSyncMessage* pMsg) { - return pMsg->msgType == RAFT_MSG_INTERNAL_PROP || - pMsg->msgType == RAFT_MSG_INTERNAL_ELECTION; +static FORCE_INLINE bool syncIsInternalMsg(RaftMessageType msgType) { + return msgType == RAFT_MSG_INTERNAL_PROP || + msgType == RAFT_MSG_INTERNAL_ELECTION; +} + +static FORCE_INLINE RaftMessageType SyncRaftVoteRespMsgType(RaftMessageType msgType) { + if (msgType == RAFT_MSG_VOTE) return RAFT_MSG_PRE_VOTE_RESP; + return RAFT_MSG_PRE_VOTE_RESP; } void syncFreeMessage(const SSyncMessage* pMsg); +// message handlers +void syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); + #endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_unstable_log.h b/source/libs/sync/inc/raft_unstable_log.h index 2b7b30c15a..0c9957cb90 100644 --- a/source/libs/sync/inc/raft_unstable_log.h +++ b/source/libs/sync/inc/raft_unstable_log.h @@ -67,13 +67,13 @@ int raftLogNumEntries(const RaftLog* pLog); /** * return last term of in memory log, return 0 if log is empty **/ -SSyncTerm raftLogLastTerm(RaftLog* pLog); +SyncTerm raftLogLastTerm(RaftLog* pLog); /** * return term of log with the given index, return 0 if the term of index cannot be found * , errCode will save the error code. **/ -SSyncTerm raftLogTermOf(RaftLog* pLog, SyncIndex index, RaftCode* errCode); +SyncTerm raftLogTermOf(RaftLog* pLog, SyncIndex index, RaftCode* errCode); /** * Get the last index of the most recent snapshot. Return 0 if there are no * @@ -83,7 +83,7 @@ SyncIndex raftLogSnapshotIndex(RaftLog* pLog); /* Append a new entry to the log. */ int raftLogAppend(RaftLog* pLog, - SSyncTerm term, + SyncTerm term, const SSyncBuffer *buf); /** diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 09f29cbd28..87750eca9e 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -31,7 +31,7 @@ static void tickHeartbeat(SSyncRaft* pRaft); static void abortLeaderTransfer(SSyncRaft* pRaft); -static void resetRaft(SSyncRaft* pRaft, SSyncTerm term); +static void resetRaft(SSyncRaft* pRaft, SyncTerm term); int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { SSyncNode* pNode = pRaft->pNode; @@ -84,7 +84,9 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - syncDebug("from "); + syncDebug("from %d, to %d, type:%d, term:%" PRId64 ", state:%d", + pMsg->from, pMsg->to, pMsg->msgType, pMsg->term, pRaft->state); + if (preHandleMessage(pRaft, pMsg)) { syncFreeMessage(pMsg); return 0; @@ -92,7 +94,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { RaftMessageType msgType = pMsg->msgType; if (msgType == RAFT_MSG_INTERNAL_ELECTION) { - + syncRaftHandleElectionMessage(pRaft, pMsg); } else if (msgType == RAFT_MSG_VOTE || msgType == RAFT_MSG_PRE_VOTE) { } else { @@ -107,7 +109,7 @@ int32_t syncRaftTick(SSyncRaft* pRaft) { return 0; } -void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term, SyncNodeId leaderId) { +void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId) { pRaft->stepFp = stepFollower; resetRaft(pRaft, term); pRaft->tickFp = tickElection; @@ -115,6 +117,40 @@ void syncRaftBecomeFollower(SSyncRaft* pRaft, SSyncTerm term, SyncNodeId leaderI pRaft->state = TAOS_SYNC_ROLE_FOLLOWER; } +void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { + /** + * Becoming a pre-candidate changes our step functions and state, + * but doesn't change anything else. In particular it does not increase + * r.Term or change r.Vote. + **/ + pRaft->stepFp = stepCandidate; + pRaft->tickFp = tickElection; + pRaft->state = TAOS_SYNC_ROLE_PRE_CANDIDATE; + syncInfo("[%d:%d] became pre-candidate at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); +} + +void syncRaftBecomeCandidate(SSyncRaft* pRaft) { + pRaft->stepFp = stepCandidate; + // become candidate make term+1 + resetRaft(pRaft, pRaft->term + 1); + pRaft->tickFp = tickElection; + pRaft->voteFor = pRaft->selfId; + pRaft->state = TAOS_SYNC_ROLE_CANDIDATE; + syncInfo("[%d:%d] became candidate at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); +} + +void syncRaftBecomeLeader(SSyncRaft* pRaft) { + assert(pRaft->state != TAOS_SYNC_ROLE_FOLLOWER); + + pRaft->stepFp = stepLeader; + resetRaft(pRaft, pRaft->term); + pRaft->leaderId = pRaft->leaderId; + pRaft->state = TAOS_SYNC_ROLE_LEADER; + // TODO: check if there is pending config log + + syncInfo("[%d:%d] became leader at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); +} + void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { // electionTimeoutTick in [3,6] tick pRaft->electionTimeoutTick = taosRand() % 4 + 3; @@ -130,6 +166,20 @@ bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { return pRaft->electionElapsed >= pRaft->electionTimeoutTick; } +int syncRaftQuorum(SSyncRaft* pRaft) { + return pRaft->leaderState.nProgress / 2 + 1; +} + +int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, RaftMessageType msgType, bool accept) { + if (accept) { + + } else { + + } + + +} + /** * pre-handle message, return true is no need to continue * Handle the message term, which may result in our stepping down to a follower. @@ -166,6 +216,8 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) * term. **/ } else { + syncInfo("%d [term:%" PRId64 "] received a %d message with higher term from %d [term:%" PRId64 "]", + pRaft->selfId, pRaft->term, msgType, pMsg->from, pMsg->term); syncRaftBecomeFollower(pRaft, pMsg->term, leaderId); } @@ -218,7 +270,7 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) { pRaft->leadTransferee = SYNC_NON_NODE_ID; } -static void resetRaft(SSyncRaft* pRaft, SSyncTerm term) { +static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { if (pRaft->term != term) { pRaft->term = term; pRaft->voteFor = SYNC_NON_NODE_ID; diff --git a/source/libs/sync/src/raft_handle_election_message.c b/source/libs/sync/src/raft_handle_election_message.c new file mode 100644 index 0000000000..2586cd918d --- /dev/null +++ b/source/libs/sync/src/raft_handle_election_message.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "syncInt.h" +#include "raft.h" +#include "raft_message.h" + +static void campaign(SSyncRaft* pRaft, SyncRaftCampaignType cType); + +void syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + if (pRaft->state == TAOS_SYNC_ROLE_LEADER) { + syncDebug("%d ignoring RAFT_MSG_INTERNAL_ELECTION because already leader", pRaft->selfId); + return; + } + + // TODO: is there pending uncommitted config? + + syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); + + if (pRaft->preVote) { + + } else { + + } +} + +static void campaign(SSyncRaft* pRaft, SyncRaftCampaignType cType) { + SyncTerm term; + RaftMessageType voteMsgType; + + if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { + syncRaftBecomePreCandidate(pRaft); + voteMsgType = RAFT_MSG_PRE_VOTE; + // PreVote RPCs are sent for the next term before we've incremented r.Term. + term = pRaft->term + 1; + } else { + syncRaftBecomeCandidate(pRaft); + voteMsgType = RAFT_MSG_VOTE; + term = pRaft->term; + } + + int quorum = syncRaftQuorum(pRaft); + int granted = syncRaftNumOfGranted(pRaft, pRaft->selfId, SyncRaftVoteRespMsgType(voteMsgType), true); + if (quorum <= granted) { + /** + * We won the election after voting for ourselves (which must mean that + * this is a single-node cluster). Advance to the next state. + **/ + if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { + campaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); + } else { + syncRaftBecomeLeader(pRaft); + } + return; + } + + // broadcast vote message to other peers + +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_message.c b/source/libs/sync/src/raft_message.c index d17a5b732b..e706127f29 100644 --- a/source/libs/sync/src/raft_message.c +++ b/source/libs/sync/src/raft_message.c @@ -16,7 +16,7 @@ #include "raft_message.h" void syncFreeMessage(const SSyncMessage* pMsg) { - if (!syncIsInternalMsg(pMsg)) { + if (!syncIsInternalMsg(pMsg->msgType)) { free((SSyncMessage*)pMsg); } } \ No newline at end of file From 2c60c32ec48f35d65038a3963a48864e517ed2a3 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 16:13:18 +0800 Subject: [PATCH 28/94] refact --- source/dnode/vnode/meta/inc/metaDB.h | 8 ++++- source/dnode/vnode/meta/src/metaDB.c | 51 +++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/meta/inc/metaDB.h b/source/dnode/vnode/meta/inc/metaDB.h index 8d7482acbb..b1531d2fd7 100644 --- a/source/dnode/vnode/meta/inc/metaDB.h +++ b/source/dnode/vnode/meta/inc/metaDB.h @@ -24,7 +24,13 @@ extern "C" { #endif -typedef rocksdb_t meta_db_t; +typedef struct { + rocksdb_t *tbDb; // uid -> tb obj + rocksdb_t *nameDb; // name -> uid + rocksdb_t *tagDb; // uid -> tag + rocksdb_t *schemaDb; // uid+version -> schema + rocksdb_t *mapDb; // suid -> uid_list +} meta_db_t; int metaOpenDB(SMeta *pMeta); void metaCloseDB(SMeta *pMeta); diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index a8e63b6156..d7e1d39c61 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -13,11 +13,21 @@ * along with this program. If not, see . */ -#include "meta.h" #include "metaDef.h" +#define META_OPEN_DB_IMPL(pDB, options, dir, err) \ + do { \ + pDB = rocksdb_open(options, dir, &err); \ + if (pDB == NULL) { \ + metaCloseDB(pMeta); \ + rocksdb_options_destroy(options); \ + return -1; \ + } \ + } while (0) + int metaOpenDB(SMeta *pMeta) { char dbDir[128]; + char dir[128]; char * err = NULL; rocksdb_options_t *options = rocksdb_options_create(); @@ -29,21 +39,52 @@ int metaOpenDB(SMeta *pMeta) { } rocksdb_options_set_create_if_missing(options, 1); - pMeta->pDB = rocksdb_open(options, dbDir, &err); + pMeta->pDB = (meta_db_t *)calloc(1, sizeof(*(pMeta->pDB))); if (pMeta->pDB == NULL) { // TODO: handle error - rocksdb_options_destroy(options); return -1; } - rocksdb_options_destroy(options); + // tbDb + sprintf(dir, "%s/tb_db", dbDir); + META_OPEN_DB_IMPL(pMeta->pDB->tbDb, options, dir, err); + // nameDb + sprintf(dir, "%s/name_db", dbDir); + META_OPEN_DB_IMPL(pMeta->pDB->nameDb, options, dir, err); + + // tagDb + sprintf(dir, "%s/tag_db", dbDir); + META_OPEN_DB_IMPL(pMeta->pDB->tagDb, options, dir, err); + + // schemaDb + sprintf(dir, "%s/schema_db", dbDir); + META_OPEN_DB_IMPL(pMeta->pDB->schemaDb, options, dir, err); + + // mapDb + sprintf(dir, "%s/map_db", dbDir); + META_OPEN_DB_IMPL(pMeta->pDB->mapDb, options, dir, err); + + rocksdb_options_destroy(options); return 0; } +#define META_CLOSE_DB_IMPL(pDB) \ + do { \ + if (pDB) { \ + rocksdb_close(pDB); \ + pDB = NULL; \ + } \ + } while (0) + void metaCloseDB(SMeta *pMeta) { if (pMeta->pDB) { - rocksdb_close(pMeta->pDB); + META_CLOSE_DB_IMPL(pMeta->pDB->mapDb); + META_CLOSE_DB_IMPL(pMeta->pDB->schemaDb); + META_CLOSE_DB_IMPL(pMeta->pDB->tagDb); + META_CLOSE_DB_IMPL(pMeta->pDB->nameDb); + META_CLOSE_DB_IMPL(pMeta->pDB->tbDb); + free(pMeta->pDB); pMeta->pDB = NULL; } } From 3a527836cc64b293d61229ac1b7752d07dd3da3c Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 16:21:14 +0800 Subject: [PATCH 29/94] more --- source/dnode/vnode/meta/src/metaDB.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index d7e1d39c61..6630eb8a2c 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -42,6 +42,7 @@ int metaOpenDB(SMeta *pMeta) { pMeta->pDB = (meta_db_t *)calloc(1, sizeof(*(pMeta->pDB))); if (pMeta->pDB == NULL) { // TODO: handle error + rocksdb_options_destroy(options); return -1; } From 8cfcade69dcfaddc4159972290f5808a2c736ca5 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 17:55:56 +0800 Subject: [PATCH 30/94] more --- include/server/vnode/meta/impl/metaImpl.h | 4 ++ source/dnode/vnode/meta/inc/metaTbOptions.h | 3 +- source/dnode/vnode/meta/src/metaDB.c | 48 ++++++++++++++++++++- source/dnode/vnode/meta/src/metaTbOptions.c | 5 +++ 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h index d6f3bbbcfe..e6cf2de901 100644 --- a/include/server/vnode/meta/impl/metaImpl.h +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -31,6 +31,10 @@ struct SMetaOptions { }; /* ------------------------ STbOptions ------------------------ */ +#define META_NORMAL_TABLE ((uint8_t)1) +#define META_SUPER_TABLE ((uint8_t)2) +#define META_CHILD_TABLE ((uint8_t)3) + typedef struct { } SSMAOptions; diff --git a/source/dnode/vnode/meta/inc/metaTbOptions.h b/source/dnode/vnode/meta/inc/metaTbOptions.h index 1da68ffd52..b0fbd3a463 100644 --- a/source/dnode/vnode/meta/inc/metaTbOptions.h +++ b/source/dnode/vnode/meta/inc/metaTbOptions.h @@ -22,7 +22,8 @@ extern "C" { #endif -int metaValidateTbOptions(SMeta *pMeta, const STbOptions *); +int metaValidateTbOptions(SMeta *pMeta, const STbOptions *); +size_t metaEncodeTbObjFromTbOptions(const STbOptions *, void *pBuf, size_t bsize); #ifdef __cplusplus } diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 6630eb8a2c..580608e851 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -91,7 +91,53 @@ void metaCloseDB(SMeta *pMeta) { } int metaSaveTableToDB(SMeta *pMeta, const STbOptions *pTbOptions) { - // TODO + tb_uid_t uid; + char * err = NULL; + size_t size; + char pBuf[1024]; // TODO + + rocksdb_writeoptions_t *wopt = rocksdb_writeoptions_create(); + + // Generate a uid for child and normal table + if (pTbOptions->type == META_SUPER_TABLE) { + uid = pTbOptions->stbOptions.uid; + } else { + uid = metaGenerateUid(pMeta); + } + + // Save tbname -> uid to tbnameDB + rocksdb_put(pMeta->pDB->nameDb, wopt, pTbOptions->name, strlen(pTbOptions->name), (char *)(&uid), sizeof(uid), &err); + + // Save uid -> tb_obj to tbDB + size = metaEncodeTbObjFromTbOptions(pTbOptions, pBuf, 1024); + rocksdb_put(pMeta->pDB->tbDb, wopt, (char *)(&uid), sizeof(uid), pBuf, size, &err); + + switch (pTbOptions->type) { + case META_NORMAL_TABLE: + // save schemaDB + rocksdb_put(pMeta->pDB->schemaDb, wopt, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, &err); + break; + case META_SUPER_TABLE: + // save schemaDB + rocksdb_put(pMeta->pDB->schemaDb, wopt, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, &err); + + // save mapDB (really need?) + rocksdb_put(pMeta->pDB->mapDb, wopt, (char *)(&uid), sizeof(uid), "", 0, &err); + break; + case META_CHILD_TABLE: + // save tagDB + rocksdb_put(pMeta->pDB->tagDb, wopt, NULL /* TODO */, 0 /* TODO */, NULL /* TODO */, 0 /* TODO */, &err); + + // save mapDB + rocksdb_put(pMeta->pDB->mapDb, wopt, (char *)(&(pTbOptions->ctbOptions.suid)), sizeof(tb_uid_t), NULL /* TODO */, + 0 /* TODO */, &err); + break; + default: + ASSERT(0); + } + + rocksdb_writeoptions_destroy(wopt); + return 0; } diff --git a/source/dnode/vnode/meta/src/metaTbOptions.c b/source/dnode/vnode/meta/src/metaTbOptions.c index 1f855aef23..2c4093bf44 100644 --- a/source/dnode/vnode/meta/src/metaTbOptions.c +++ b/source/dnode/vnode/meta/src/metaTbOptions.c @@ -18,4 +18,9 @@ int metaValidateTbOptions(SMeta *pMeta, const STbOptions *pTbOptions) { // TODO return 0; +} + +size_t metaEncodeTbObjFromTbOptions(const STbOptions *pTbOptions, void *pBuf, size_t bsize) { + // TODO + return 0; } \ No newline at end of file From b8304e818d421372b236550c71e5e01eaaad9357 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 3 Nov 2021 17:57:49 +0800 Subject: [PATCH 31/94] refact --- source/dnode/vnode/meta/inc/metaDef.h | 1 + source/dnode/vnode/meta/src/metaTable.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index 5c4ae3428c..7fb2440bd1 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -22,6 +22,7 @@ #include "metaIdx.h" #include "metaOptions.h" #include "metaTbUid.h" +#include "metaTbOptions.h" #ifdef __cplusplus extern "C" { diff --git a/source/dnode/vnode/meta/src/metaTable.c b/source/dnode/vnode/meta/src/metaTable.c index b41d9313d5..d4a1ad3e38 100644 --- a/source/dnode/vnode/meta/src/metaTable.c +++ b/source/dnode/vnode/meta/src/metaTable.c @@ -17,7 +17,7 @@ int metaCreateTable(SMeta *pMeta, const STbOptions *pTbOptions) { // Validate the tbOptions - if (metaValidateTbOptions(pTbOptions) < 0) { + if (metaValidateTbOptions(pMeta, pTbOptions) < 0) { // TODO: handle error return -1; } From 446b14f315536822ad314cd661939acd5a236a51 Mon Sep 17 00:00:00 2001 From: lichuang Date: Thu, 4 Nov 2021 09:53:52 +0800 Subject: [PATCH 32/94] [TD-10645][raft]replace SRpcEpSet to SEpSet --- source/libs/sync/src/sync.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index 9077be3f2d..fa35917668 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -23,8 +23,8 @@ SSyncManager* gSyncManager = NULL; #define SYNC_ACTIVITY_TIMER 5 #define SYNC_SERVER_WORKER 2 -static void syncProcessRsp(SRpcMsg *pMsg, SRpcEpSet *pEpSet); -static void syncProcessReqMsg(SRpcMsg *pMsg, SRpcEpSet *pEpSet); +static void syncProcessRsp(SRpcMsg *pMsg, SEpSet *pEpSet); +static void syncProcessReqMsg(SRpcMsg *pMsg, SEpSet *pEpSet); static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg); static int syncInitRpcClient(SSyncManager* syncManager); @@ -168,12 +168,12 @@ int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, b void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {} // process rpc rsp message from other sync server -static void syncProcessRsp(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { +static void syncProcessRsp(SRpcMsg *pMsg, SEpSet *pEpSet) { } // process rpc message from other sync server -static void syncProcessReqMsg(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { +static void syncProcessReqMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { } From f7a8ef266cdaf56a63fee13691eae66b593e3ed2 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 4 Nov 2021 10:30:23 +0800 Subject: [PATCH 33/94] more --- include/server/vnode/meta/impl/metaImpl.h | 2 +- source/dnode/vnode/meta/src/metaDB.c | 66 +++++++++++++++++++++-- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h index e6cf2de901..c9506bc102 100644 --- a/include/server/vnode/meta/impl/metaImpl.h +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -53,7 +53,7 @@ typedef struct { // normal table options typedef struct { - SSchema* pSchame; + STSchema* pSchame; } SNTbOptions; struct STbOptions { diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index 580608e851..e4c9d8ce97 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -15,6 +15,12 @@ #include "metaDef.h" +static void metaSaveSchemaDB(SMeta *pMeta, tb_uid_t uid, STSchema *pSchema); +static void metaGetSchemaDBKey(char key[], tb_uid_t uid, int sversion); +static int metaSaveMapDB(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid); + +#define SCHEMA_KEY_LEN (sizeof(tb_uid_t) + sizeof(int)) + #define META_OPEN_DB_IMPL(pDB, options, dir, err) \ do { \ pDB = rocksdb_open(options, dir, &err); \ @@ -115,22 +121,22 @@ int metaSaveTableToDB(SMeta *pMeta, const STbOptions *pTbOptions) { switch (pTbOptions->type) { case META_NORMAL_TABLE: // save schemaDB - rocksdb_put(pMeta->pDB->schemaDb, wopt, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, &err); + metaSaveSchemaDB(pMeta, uid, pTbOptions->ntbOptions.pSchame); break; case META_SUPER_TABLE: // save schemaDB - rocksdb_put(pMeta->pDB->schemaDb, wopt, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, NULL /* TODO */, &err); + metaSaveSchemaDB(pMeta, uid, pTbOptions->stbOptions.pSchema); // save mapDB (really need?) rocksdb_put(pMeta->pDB->mapDb, wopt, (char *)(&uid), sizeof(uid), "", 0, &err); break; case META_CHILD_TABLE: // save tagDB - rocksdb_put(pMeta->pDB->tagDb, wopt, NULL /* TODO */, 0 /* TODO */, NULL /* TODO */, 0 /* TODO */, &err); + rocksdb_put(pMeta->pDB->tagDb, wopt, (char *)(&uid), sizeof(uid), pTbOptions->ctbOptions.tags, + kvRowLen(pTbOptions->ctbOptions.tags), &err); // save mapDB - rocksdb_put(pMeta->pDB->mapDb, wopt, (char *)(&(pTbOptions->ctbOptions.suid)), sizeof(tb_uid_t), NULL /* TODO */, - 0 /* TODO */, &err); + metaSaveMapDB(pMeta, pTbOptions->ctbOptions.suid, uid); break; default: ASSERT(0); @@ -143,5 +149,55 @@ int metaSaveTableToDB(SMeta *pMeta, const STbOptions *pTbOptions) { int metaRemoveTableFromDb(SMeta *pMeta, tb_uid_t uid) { /* TODO */ + return 0; +} + +/* ------------------------ STATIC METHODS ------------------------ */ +static void metaSaveSchemaDB(SMeta *pMeta, tb_uid_t uid, STSchema *pSchema) { + char key[64]; + char pBuf[1024]; + char * ppBuf = pBuf; + size_t vsize; + char * err = NULL; + + rocksdb_writeoptions_t *wopt = rocksdb_writeoptions_create(); + + metaGetSchemaDBKey(key, uid, schemaVersion(pSchema)); + vsize = tdEncodeSchema((void **)(&ppBuf), pSchema); + rocksdb_put(pMeta->pDB->schemaDb, wopt, key, SCHEMA_KEY_LEN, pBuf, vsize, &err); + + rocksdb_writeoptions_destroy(wopt); +} + +static void metaGetSchemaDBKey(char *key, tb_uid_t uid, int sversion) { + *(tb_uid_t *)key = uid; + *(int *)POINTER_SHIFT(key, sizeof(tb_uid_t)) = sversion; +} + +static int metaSaveMapDB(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid) { + size_t vlen; + char * val; + char * err = NULL; + + rocksdb_readoptions_t *ropt = rocksdb_readoptions_create(); + val = rocksdb_get(pMeta->pDB->mapDb, ropt, (char *)(&suid), sizeof(suid), &vlen, &err); + rocksdb_readoptions_destroy(ropt); + + void *nval = malloc(vlen + sizeof(uid)); + if (nval == NULL) { + return -1; + } + + if (vlen) { + memcpy(nval, val, vlen); + } + memcpy(POINTER_SHIFT(nval, vlen), (void *)(&uid), sizeof(uid)); + + rocksdb_writeoptions_t *wopt = rocksdb_writeoptions_create(); + + rocksdb_put(pMeta->pDB->mapDb, wopt, (char *)(&suid), sizeof(suid), nval, vlen + sizeof(uid), &err); + + rocksdb_writeoptions_destroy(wopt); + return 0; } \ No newline at end of file From d72faa1e6b382ddbd59b99e21f3433f17834f520 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 4 Nov 2021 10:54:52 +0800 Subject: [PATCH 34/94] more --- include/server/vnode/meta/impl/metaImpl.h | 2 +- source/dnode/vnode/meta/src/metaTbOptions.c | 27 +++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/server/vnode/meta/impl/metaImpl.h b/include/server/vnode/meta/impl/metaImpl.h index c9506bc102..90ced02f30 100644 --- a/include/server/vnode/meta/impl/metaImpl.h +++ b/include/server/vnode/meta/impl/metaImpl.h @@ -59,7 +59,7 @@ typedef struct { struct STbOptions { uint8_t type; char* name; - uint64_t ttl; // time to live + uint32_t ttl; // time to live in (SECONDS) SSMAOptions bsma; // Block-wise sma union { SSTbOptions stbOptions; diff --git a/source/dnode/vnode/meta/src/metaTbOptions.c b/source/dnode/vnode/meta/src/metaTbOptions.c index 2c4093bf44..9bf9607df7 100644 --- a/source/dnode/vnode/meta/src/metaTbOptions.c +++ b/source/dnode/vnode/meta/src/metaTbOptions.c @@ -14,6 +14,7 @@ */ #include "metaDef.h" +#include "tcoding.h" int metaValidateTbOptions(SMeta *pMeta, const STbOptions *pTbOptions) { // TODO @@ -21,6 +22,28 @@ int metaValidateTbOptions(SMeta *pMeta, const STbOptions *pTbOptions) { } size_t metaEncodeTbObjFromTbOptions(const STbOptions *pTbOptions, void *pBuf, size_t bsize) { - // TODO - return 0; + void **ppBuf = &pBuf; + int tlen = 0; + + tlen += taosEncodeFixedU8(ppBuf, pTbOptions->type); + tlen += taosEncodeString(ppBuf, pTbOptions->name); + tlen += taosEncodeFixedU32(ppBuf, pTbOptions->ttl); + + switch (pTbOptions->type) { + case META_SUPER_TABLE: + tlen += taosEncodeFixedU64(ppBuf, pTbOptions->stbOptions.uid); + tlen += tdEncodeSchema(ppBuf, pTbOptions->stbOptions.pTagSchema); + // TODO: encode schema version array + break; + case META_CHILD_TABLE: + tlen += taosEncodeFixedU64(ppBuf, pTbOptions->ctbOptions.suid); + break; + case META_NORMAL_TABLE: + // TODO: encode schema version array + break; + default: + break; + } + + return tlen; } \ No newline at end of file From 441d07f7460ff59f61fcc121305dee82a31d9251 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 4 Nov 2021 10:57:21 +0800 Subject: [PATCH 35/94] more --- source/dnode/vnode/meta/inc/metaTbTag.h | 27 +++++++++++++++++++++++++ source/dnode/vnode/meta/src/metaTbTag.c | 14 +++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 source/dnode/vnode/meta/inc/metaTbTag.h create mode 100644 source/dnode/vnode/meta/src/metaTbTag.c diff --git a/source/dnode/vnode/meta/inc/metaTbTag.h b/source/dnode/vnode/meta/inc/metaTbTag.h new file mode 100644 index 0000000000..15b660be92 --- /dev/null +++ b/source/dnode/vnode/meta/inc/metaTbTag.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_META_TB_TAG_H_ +#define _TD_META_TB_TAG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_META_TB_TAG_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/meta/src/metaTbTag.c b/source/dnode/vnode/meta/src/metaTbTag.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/meta/src/metaTbTag.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file From c9c6aca0eb24c7051f8d34dcffd6055488fbd699 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 4 Nov 2021 10:59:02 +0800 Subject: [PATCH 36/94] more --- source/dnode/vnode/meta/inc/metaDef.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/meta/inc/metaDef.h b/source/dnode/vnode/meta/inc/metaDef.h index 7fb2440bd1..562476a439 100644 --- a/source/dnode/vnode/meta/inc/metaDef.h +++ b/source/dnode/vnode/meta/inc/metaDef.h @@ -21,8 +21,9 @@ #include "metaDB.h" #include "metaIdx.h" #include "metaOptions.h" -#include "metaTbUid.h" #include "metaTbOptions.h" +#include "metaTbTag.h" +#include "metaTbUid.h" #ifdef __cplusplus extern "C" { From e05e6dba9aaa3448fb546eef7b0d34387ca258e6 Mon Sep 17 00:00:00 2001 From: lichuang Date: Thu, 4 Nov 2021 12:39:45 +0800 Subject: [PATCH 37/94] [TD-10645][raft]add raft election message handle --- include/libs/sync/sync.h | 1 - source/libs/sync/inc/raft.h | 35 ++++++---- source/libs/sync/inc/raft_configuration.h | 26 +++++++ source/libs/sync/inc/raft_log.h | 42 ++++++++++++ source/libs/sync/inc/raft_message.h | 49 ++++++++++--- source/libs/sync/inc/raft_unstable_log.h | 2 +- source/libs/sync/inc/sync_type.h | 10 ++- source/libs/sync/src/raft.c | 68 +++++++++++++------ source/libs/sync/src/raft_configuration.c | 25 +++++++ .../sync/src/raft_handle_election_message.c | 37 ++++++++-- source/libs/sync/src/raft_log.c | 36 ++++++++++ source/libs/sync/src/raft_unstable_log.c | 4 +- 12 files changed, 285 insertions(+), 50 deletions(-) create mode 100644 source/libs/sync/inc/raft_configuration.h create mode 100644 source/libs/sync/inc/raft_log.h create mode 100644 source/libs/sync/src/raft_configuration.c create mode 100644 source/libs/sync/src/raft_log.c diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index b938bbba77..726fbc0621 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -32,7 +32,6 @@ typedef enum { TAOS_SYNC_ROLE_FOLLOWER = 0, TAOS_SYNC_ROLE_CANDIDATE = 1, TAOS_SYNC_ROLE_LEADER = 2, - TAOS_SYNC_ROLE_PRE_CANDIDATE = 3, } ESyncRole; typedef struct { diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 702fcd00cf..44ee6a3b69 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -29,9 +29,16 @@ typedef struct RaftLeaderState { SSyncRaftProgress* progress; } RaftLeaderState; +typedef struct RaftCandidateState { + /* votes results */ + bool votes[TSDB_MAX_REPLICA]; + + /* true if in pre-vote phase */ + bool inPreVote; +} RaftCandidateState; + typedef struct SSyncRaftIOMethods { - - + int (*send)(const SSyncMessage* pMsg, const SNodeInfo* pNode); } SSyncRaftIOMethods; typedef int (*SyncRaftStepFp)(SSyncRaft* pRaft, const SSyncMessage* pMsg); @@ -41,7 +48,10 @@ struct SSyncRaft { // owner sync node SSyncNode* pNode; - SSyncInfo info; + //SSyncInfo info; + SSyncFSM fsm; + SSyncLogStore logStore; + SStateManager stateManager; SyncTerm term; SyncNodeId voteFor; @@ -65,6 +75,8 @@ struct SSyncRaft { **/ bool pendingConf; + SSyncCluster cluster; + ESyncRole state; /** @@ -92,25 +104,22 @@ struct SSyncRaft { SSyncRaftIOMethods io; - RaftLeaderState leaderState; - - SSyncRaftUnstableLog *log; + union { + RaftLeaderState leaderState; + RaftCandidateState candidateState; + }; + + SSyncRaftLog *log; SyncRaftStepFp stepFp; SyncRaftTickFp tickFp; }; -typedef enum { - SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, - SYNC_RAFT_CAMPAIGN_ELECTION = 1, -} SyncRaftCampaignType; - int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg); int32_t syncRaftTick(SSyncRaft* pRaft); - void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId); void syncRaftBecomePreCandidate(SSyncRaft* pRaft); void syncRaftBecomeCandidate(SSyncRaft* pRaft); @@ -120,6 +129,6 @@ void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); bool syncRaftIsPromotable(SSyncRaft* pRaft); bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft); int syncRaftQuorum(SSyncRaft* pRaft); -int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, RaftMessageType msgType, bool accept); +int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool accept); #endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_configuration.h b/source/libs/sync/inc/raft_configuration.h new file mode 100644 index 0000000000..ed0cc33115 --- /dev/null +++ b/source/libs/sync/inc/raft_configuration.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_RAFT_CONFIGURATION_H +#define _TD_LIBS_SYNC_RAFT_CONFIGURATION_H + +#include "sync.h" +#include "sync_type.h" + +int syncRaftConfigurationIndexOfVoter(SSyncRaft *pRaft, SyncNodeId id); + +int syncRaftConfigurationVoterCount(SSyncRaft *pRaft); + +#endif /* _TD_LIBS_SYNC_RAFT_CONFIGURATION_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_log.h b/source/libs/sync/inc/raft_log.h new file mode 100644 index 0000000000..7ffb946c82 --- /dev/null +++ b/source/libs/sync/inc/raft_log.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_RAFT_LOG_H +#define _TD_LIBS_SYNC_RAFT_LOG_H + +#include "sync.h" +#include "sync_type.h" + +struct SSyncRaftLog { + SyncIndex uncommittedConfigIndex; + + SyncIndex commitIndex; + + SyncIndex appliedIndex; + + +}; + +SSyncRaftLog* syncRaftLogOpen(); + +SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog); + +SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog); + +int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog); + +bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog); + +#endif /* _TD_LIBS_SYNC_RAFT_LOG_H */ diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index 71fe37bebd..d4736d6169 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -35,8 +35,7 @@ typedef enum RaftMessageType { RAFT_MSG_VOTE = 3, RAFT_MSG_VOTE_RESP = 4, - RAFT_MSG_PRE_VOTE = 5, - RAFT_MSG_PRE_VOTE_RESP = 6, + } RaftMessageType; typedef struct RaftMsgInternal_Prop { @@ -49,13 +48,21 @@ typedef struct RaftMsgInternal_Election { } RaftMsgInternal_Election; -typedef struct RaftMsg_PreVoteResp { +typedef struct RaftMsg_Vote { + SyncRaftCampaignType cType; + SyncIndex lastIndex; + SyncTerm lastTerm; +} RaftMsg_Vote; + +typedef struct RaftMsg_VoteResp { bool reject; -} RaftMsg_PreVoteResp; + SyncRaftCampaignType cType; +} RaftMsg_VoteResp; typedef struct SSyncMessage { RaftMessageType msgType; SyncTerm term; + SyncGroupId groupId; SyncNodeId from; SyncNodeId to; @@ -64,7 +71,8 @@ typedef struct SSyncMessage { RaftMsgInternal_Election election; - RaftMsg_PreVoteResp preVoteResp; + RaftMsg_Vote vote; + RaftMsg_VoteResp voteResp; }; } SSyncMessage; @@ -95,14 +103,39 @@ static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNo return pMsg; } +static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId from, SyncNodeId to, + SyncTerm term, SyncRaftCampaignType cType, + SyncIndex lastIndex, SyncTerm lastTerm) { + SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); + if (pMsg == NULL) { + return NULL; + } + *pMsg = (SSyncMessage) { + .groupId = groupId, + .from = from, + .to = to, + .term = term, + .vote = (RaftMsg_Vote) { + .cType = cType, + .lastIndex = lastIndex, + .lastTerm = lastTerm, + }, + }; + + return pMsg; +} + static FORCE_INLINE bool syncIsInternalMsg(RaftMessageType msgType) { return msgType == RAFT_MSG_INTERNAL_PROP || msgType == RAFT_MSG_INTERNAL_ELECTION; } -static FORCE_INLINE RaftMessageType SyncRaftVoteRespMsgType(RaftMessageType msgType) { - if (msgType == RAFT_MSG_VOTE) return RAFT_MSG_PRE_VOTE_RESP; - return RAFT_MSG_PRE_VOTE_RESP; +static FORCE_INLINE bool syncIsPreVoteRespMsg(SSyncMessage* pMsg) { + return pMsg->msgType == RAFT_MSG_VOTE_RESP && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; +} + +static FORCE_INLINE bool syncIsPreVoteMsg(SSyncMessage* pMsg) { + return pMsg->msgType == RAFT_MSG_VOTE && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; } void syncFreeMessage(const SSyncMessage* pMsg); diff --git a/source/libs/sync/inc/raft_unstable_log.h b/source/libs/sync/inc/raft_unstable_log.h index 0c9957cb90..0748a425a1 100644 --- a/source/libs/sync/inc/raft_unstable_log.h +++ b/source/libs/sync/inc/raft_unstable_log.h @@ -41,7 +41,7 @@ struct SSyncRaftUnstableLog { /** * return index of last in memory log, return 0 if log is empty **/ -SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog); +//SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog); #if 0 void raftLogInit(RaftLog* pLog); diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h index 2c9f24287a..4343e607cb 100644 --- a/source/libs/sync/inc/sync_type.h +++ b/source/libs/sync/inc/sync_type.h @@ -18,10 +18,10 @@ typedef int32_t SyncTime; -typedef struct SSyncRaftUnstableLog SSyncRaftUnstableLog; - typedef struct SSyncRaft SSyncRaft; +typedef struct SSyncRaftLog SSyncRaftLog; + #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif @@ -30,4 +30,10 @@ typedef struct SSyncRaft SSyncRaft; #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #endif +typedef enum { + SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, + SYNC_RAFT_CAMPAIGN_ELECTION = 1, + SYNC_RAFT_CAMPAIGN_TRANSFER = 3, +} SyncRaftCampaignType; + #endif /* _TD_LIBS_SYNC_TYPE_H */ diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 87750eca9e..a6e013758e 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -14,6 +14,7 @@ */ #include "raft.h" +#include "raft_configuration.h" #include "syncInt.h" #define RAFT_READ_LOG_MAX_NUM 100 @@ -22,6 +23,7 @@ static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +static int convertClear(SSyncRaft* pRaft); static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg); static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg); static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg); @@ -45,11 +47,18 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { memset(pRaft, 0, sizeof(SSyncRaft)); - memcpy(&pRaft->info, pInfo, sizeof(SSyncInfo)); - stateManager = &(pRaft->info.stateManager); - logStore = &(pRaft->info.logStore); - fsm = &(pRaft->info.fsm); + memcpy(&pRaft->fsm, &pInfo->fsm, sizeof(SSyncFSM)); + memcpy(&pRaft->logStore, &pInfo->logStore, sizeof(SSyncLogStore)); + memcpy(&pRaft->stateManager, &pInfo->stateManager, sizeof(SStateManager)); + stateManager = &(pRaft->stateManager); + logStore = &(pRaft->logStore); + fsm = &(pRaft->fsm); + + // open raft log + if ((pRaft->log = syncRaftLogOpen()) == NULL) { + return -1; + } // read server state if (stateManager->readServerState(stateManager, &serverState) != 0) { syncError("readServerState for vgid %d fail", pInfo->vgId); @@ -79,7 +88,8 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); - syncInfo("restore vgid %d state: snapshot index success", pInfo->vgId); + syncInfo("[%d:%d] restore vgid %d state: snapshot index success", + pRaft->selfGroupId, pRaft->selfId, pInfo->vgId); return 0; } @@ -95,7 +105,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { RaftMessageType msgType = pMsg->msgType; if (msgType == RAFT_MSG_INTERNAL_ELECTION) { syncRaftHandleElectionMessage(pRaft, pMsg); - } else if (msgType == RAFT_MSG_VOTE || msgType == RAFT_MSG_PRE_VOTE) { + } else if (msgType == RAFT_MSG_VOTE) { } else { pRaft->stepFp(pRaft, pMsg); @@ -125,11 +135,13 @@ void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { **/ pRaft->stepFp = stepCandidate; pRaft->tickFp = tickElection; - pRaft->state = TAOS_SYNC_ROLE_PRE_CANDIDATE; + pRaft->state = TAOS_SYNC_ROLE_CANDIDATE; + pRaft->candidateState.inPreVote = true; syncInfo("[%d:%d] became pre-candidate at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); } void syncRaftBecomeCandidate(SSyncRaft* pRaft) { + pRaft->candidateState.inPreVote = false; pRaft->stepFp = stepCandidate; // become candidate make term+1 resetRaft(pRaft, pRaft->term + 1); @@ -157,9 +169,7 @@ void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { } bool syncRaftIsPromotable(SSyncRaft* pRaft) { - return pRaft->info.syncCfg.selfIndex >= 0 && - pRaft->info.syncCfg.selfIndex < pRaft->info.syncCfg.replica && - pRaft->selfId != SYNC_NON_NODE_ID; + return pRaft->selfId != SYNC_NON_NODE_ID; } bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { @@ -167,17 +177,29 @@ bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { } int syncRaftQuorum(SSyncRaft* pRaft) { - return pRaft->leaderState.nProgress / 2 + 1; + return pRaft->cluster.replica / 2 + 1; } -int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, RaftMessageType msgType, bool accept) { +int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool accept) { if (accept) { - + syncInfo("[%d:%d] received (pre-vote %d) from %d at term %" PRId64 "", + pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); } else { - + syncInfo("[%d:%d] received rejection from %d at term %" PRId64 "", + pRaft->selfGroupId, pRaft->selfId, id, pRaft->term); } + int voteIndex = syncRaftConfigurationIndexOfVoter(pRaft, id); + assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0); + pRaft->candidateState.votes[voteIndex] = accept; + int granted = 0; + int i; + for (i = 0; i < pRaft->cluster.replica; ++i) { + if (pRaft->candidateState.votes[i]) granted++; + } + + return granted; } /** @@ -201,13 +223,13 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) SyncNodeId leaderId = pMsg->from; RaftMessageType msgType = pMsg->msgType; - if (msgType == RAFT_MSG_VOTE || msgType == RAFT_MSG_PRE_VOTE) { + if (msgType == RAFT_MSG_VOTE) { leaderId = SYNC_NON_NODE_ID; } - if (msgType == RAFT_MSG_PRE_VOTE) { + if (syncIsPreVoteMsg(pMsg)) { // Never change our term in response to a PreVote - } else if (msgType == RAFT_MSG_PRE_VOTE_RESP && !pMsg->preVoteResp.reject) { + } else if (syncIsPreVoteRespMsg(pMsg) && !pMsg->voteResp.reject) { /** * We send pre-vote requests with a term in our future. If the * pre-vote is granted, we will increment our term when we get a @@ -216,8 +238,8 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) * term. **/ } else { - syncInfo("%d [term:%" PRId64 "] received a %d message with higher term from %d [term:%" PRId64 "]", - pRaft->selfId, pRaft->term, msgType, pMsg->from, pMsg->term); + syncInfo("[%d:%d] [term:%" PRId64 "] received a %d message with higher term from %d [term:%" PRId64 "]", + pRaft->selfGroupId, pRaft->selfId, pRaft->term, msgType, pMsg->from, pMsg->term); syncRaftBecomeFollower(pRaft, pMsg->term, leaderId); } @@ -230,15 +252,23 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) return true; } +static int convertClear(SSyncRaft* pRaft) { + +} + static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + convertClear(pRaft); return 0; } static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + convertClear(pRaft); + memset(pRaft->candidateState.votes, 0, sizeof(bool) * TSDB_MAX_REPLICA); return 0; } static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + convertClear(pRaft); return 0; } diff --git a/source/libs/sync/src/raft_configuration.c b/source/libs/sync/src/raft_configuration.c new file mode 100644 index 0000000000..6f3a27e7c0 --- /dev/null +++ b/source/libs/sync/src/raft_configuration.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "raft_configuration.h" +#include "raft.h" + +int syncRaftConfigurationIndexOfVoter(SSyncRaft *pRaft, SyncNodeId id) { + return (int)(id); +} + +int syncRaftConfigurationVoterCount(SSyncRaft *pRaft) { + return pRaft->cluster.replica; +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_election_message.c b/source/libs/sync/src/raft_handle_election_message.c index 2586cd918d..0d2004dec2 100644 --- a/source/libs/sync/src/raft_handle_election_message.c +++ b/source/libs/sync/src/raft_handle_election_message.c @@ -25,34 +25,41 @@ void syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { return; } - // TODO: is there pending uncommitted config? + // if there is pending uncommitted config,cannot campaign + if (syncRaftLogNumOfPendingConf(pRaft->log) > 0 && syncRaftHasUnappliedLog(pRaft->log)) { + syncWarn("[%d:%d] cannot campaign at term %" PRId64 " since there are still pending configuration changes to apply", + pRaft->selfGroupId, pRaft->selfId, pRaft->term); + return; + } syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); if (pRaft->preVote) { - + campaign(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION); } else { - + campaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); } } static void campaign(SSyncRaft* pRaft, SyncRaftCampaignType cType) { SyncTerm term; + bool preVote; RaftMessageType voteMsgType; if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { syncRaftBecomePreCandidate(pRaft); - voteMsgType = RAFT_MSG_PRE_VOTE; + preVote = true; // PreVote RPCs are sent for the next term before we've incremented r.Term. term = pRaft->term + 1; } else { syncRaftBecomeCandidate(pRaft); voteMsgType = RAFT_MSG_VOTE; term = pRaft->term; + preVote = false; } int quorum = syncRaftQuorum(pRaft); - int granted = syncRaftNumOfGranted(pRaft, pRaft->selfId, SyncRaftVoteRespMsgType(voteMsgType), true); + int granted = syncRaftNumOfGranted(pRaft, pRaft->selfId, preVote, true); if (quorum <= granted) { /** * We won the election after voting for ourselves (which must mean that @@ -67,5 +74,25 @@ static void campaign(SSyncRaft* pRaft, SyncRaftCampaignType cType) { } // broadcast vote message to other peers + int i; + SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); + SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); + for (i = 0; i < pRaft->cluster.replica; ++i) { + if (i == pRaft->cluster.selfIndex) { + continue; + } + SyncNodeId nodeId = pRaft->cluster.nodeInfo[i].nodeId; + + SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId, nodeId, term, cType, lastIndex, lastTerm); + if (pMsg == NULL) { + continue; + } + + syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %d] sent %d request to %d at term %" PRId64 "", + pRaft->selfGroupId, pRaft->selfId, lastTerm, + lastIndex, voteMsgType, nodeId, pRaft->term); + + pRaft->io.send(pMsg, &(pRaft->cluster.nodeInfo[i])); + } } \ No newline at end of file diff --git a/source/libs/sync/src/raft_log.c b/source/libs/sync/src/raft_log.c new file mode 100644 index 0000000000..46c4e4b304 --- /dev/null +++ b/source/libs/sync/src/raft_log.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "raft_log.h" + +SSyncRaftLog* syncRaftLogOpen() { + return NULL; +} + +SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog) { + return 0; +} + +SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog) { + return 0; +} + +int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog) { + return 0; +} + +bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog) { + return pLog->commitIndex > pLog->appliedIndex; +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_unstable_log.c b/source/libs/sync/src/raft_unstable_log.c index 4735242d3c..e798e20662 100644 --- a/source/libs/sync/src/raft_unstable_log.c +++ b/source/libs/sync/src/raft_unstable_log.c @@ -16,6 +16,8 @@ #include "sync.h" #include "raft_unstable_log.h" +/* SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog) { return 0; -} \ No newline at end of file +} +*/ \ No newline at end of file From d8c74763f9441f035106040dddfec73d8e64d122 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 4 Nov 2021 14:00:19 +0800 Subject: [PATCH 38/94] more --- source/dnode/vnode/tsdb/inc/tsdbIdx.h | 27 +++++++++++++++++++++++++++ source/dnode/vnode/tsdb/inc/tsdbSMA.h | 27 +++++++++++++++++++++++++++ source/dnode/vnode/tsdb/src/tsdbIdx.c | 14 ++++++++++++++ source/dnode/vnode/tsdb/src/tsdbSMA.c | 14 ++++++++++++++ 4 files changed, 82 insertions(+) create mode 100644 source/dnode/vnode/tsdb/inc/tsdbIdx.h create mode 100644 source/dnode/vnode/tsdb/inc/tsdbSMA.h create mode 100644 source/dnode/vnode/tsdb/src/tsdbIdx.c create mode 100644 source/dnode/vnode/tsdb/src/tsdbSMA.c diff --git a/source/dnode/vnode/tsdb/inc/tsdbIdx.h b/source/dnode/vnode/tsdb/inc/tsdbIdx.h new file mode 100644 index 0000000000..73b2c5e6c5 --- /dev/null +++ b/source/dnode/vnode/tsdb/inc/tsdbIdx.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_IDX_H_ +#define _TD_TSDB_IDX_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TSDB_IDX_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/inc/tsdbSMA.h b/source/dnode/vnode/tsdb/inc/tsdbSMA.h new file mode 100644 index 0000000000..800a276bc8 --- /dev/null +++ b/source/dnode/vnode/tsdb/inc/tsdbSMA.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_TSDB_SMA_H_ +#define _TD_TSDB_SMA_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TSDB_SMA_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/src/tsdbIdx.c b/source/dnode/vnode/tsdb/src/tsdbIdx.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/tsdb/src/tsdbIdx.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/src/tsdbSMA.c b/source/dnode/vnode/tsdb/src/tsdbSMA.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/tsdb/src/tsdbSMA.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file From c25d174fc2499dcc39c17f6d6789c29ba0bf4204 Mon Sep 17 00:00:00 2001 From: lichuang Date: Thu, 4 Nov 2021 14:56:21 +0800 Subject: [PATCH 39/94] [TD-10645][raft]add raft vote resp message handle --- source/libs/sync/inc/raft.h | 9 ++- source/libs/sync/inc/raft_configuration.h | 1 + source/libs/sync/inc/raft_message.h | 13 ++-- source/libs/sync/inc/sync_type.h | 8 +- source/libs/sync/src/raft.c | 48 +++++++++--- source/libs/sync/src/raft_election.c | 75 +++++++++++++++++++ .../sync/src/raft_handle_election_message.c | 75 +++---------------- .../sync/src/raft_handle_vote_resp_message.c | 57 ++++++++++++++ source/libs/sync/src/raft_progress.c | 2 +- 9 files changed, 203 insertions(+), 85 deletions(-) create mode 100644 source/libs/sync/src/raft_election.c create mode 100644 source/libs/sync/src/raft_handle_vote_resp_message.c diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 44ee6a3b69..cba9434414 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -31,7 +31,7 @@ typedef struct RaftLeaderState { typedef struct RaftCandidateState { /* votes results */ - bool votes[TSDB_MAX_REPLICA]; + SyncRaftVoteRespType votes[TSDB_MAX_REPLICA]; /* true if in pre-vote phase */ bool inPreVote; @@ -125,10 +125,15 @@ void syncRaftBecomePreCandidate(SSyncRaft* pRaft); void syncRaftBecomeCandidate(SSyncRaft* pRaft); void syncRaftBecomeLeader(SSyncRaft* pRaft); +void syncRaftStartElection(SSyncRaft* pRaft, SyncRaftElectionType cType); + +void syncRaftTriggerReplicate(SSyncRaft* pRaft); + void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); bool syncRaftIsPromotable(SSyncRaft* pRaft); bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft); int syncRaftQuorum(SSyncRaft* pRaft); -int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool accept); +int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, + bool preVote, bool accept, int* rejectNum); #endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_configuration.h b/source/libs/sync/inc/raft_configuration.h index ed0cc33115..993f863f33 100644 --- a/source/libs/sync/inc/raft_configuration.h +++ b/source/libs/sync/inc/raft_configuration.h @@ -19,6 +19,7 @@ #include "sync.h" #include "sync_type.h" +// return -1 if cannot find this id int syncRaftConfigurationIndexOfVoter(SSyncRaft *pRaft, SyncNodeId id); int syncRaftConfigurationVoterCount(SSyncRaft *pRaft); diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index d4736d6169..da2e3bc52f 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -35,7 +35,7 @@ typedef enum RaftMessageType { RAFT_MSG_VOTE = 3, RAFT_MSG_VOTE_RESP = 4, - + RAFT_MSG_APPEND = 5, } RaftMessageType; typedef struct RaftMsgInternal_Prop { @@ -49,14 +49,14 @@ typedef struct RaftMsgInternal_Election { } RaftMsgInternal_Election; typedef struct RaftMsg_Vote { - SyncRaftCampaignType cType; + SyncRaftElectionType cType; SyncIndex lastIndex; SyncTerm lastTerm; } RaftMsg_Vote; typedef struct RaftMsg_VoteResp { bool reject; - SyncRaftCampaignType cType; + SyncRaftElectionType cType; } RaftMsg_VoteResp; typedef struct SSyncMessage { @@ -104,7 +104,7 @@ static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNo } static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId from, SyncNodeId to, - SyncTerm term, SyncRaftCampaignType cType, + SyncTerm term, SyncRaftElectionType cType, SyncIndex lastIndex, SyncTerm lastTerm) { SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); if (pMsg == NULL) { @@ -134,13 +134,14 @@ static FORCE_INLINE bool syncIsPreVoteRespMsg(SSyncMessage* pMsg) { return pMsg->msgType == RAFT_MSG_VOTE_RESP && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; } -static FORCE_INLINE bool syncIsPreVoteMsg(SSyncMessage* pMsg) { +static FORCE_INLINE bool syncIsPreVoteMsg(const SSyncMessage* pMsg) { return pMsg->msgType == RAFT_MSG_VOTE && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; } void syncFreeMessage(const SSyncMessage* pMsg); // message handlers -void syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); #endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h index 4343e607cb..f9632f6ae8 100644 --- a/source/libs/sync/inc/sync_type.h +++ b/source/libs/sync/inc/sync_type.h @@ -34,6 +34,12 @@ typedef enum { SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, SYNC_RAFT_CAMPAIGN_ELECTION = 1, SYNC_RAFT_CAMPAIGN_TRANSFER = 3, -} SyncRaftCampaignType; +} SyncRaftElectionType; + +typedef enum { + SYNC_RAFT_VOTE_RESP_UNKNOWN = 0, + SYNC_RAFT_VOTE_RESP_GRANT = 1, + SYNC_RAFT_VOTE_RESP_REJECT = 2, +} SyncRaftVoteRespType; #endif /* _TD_LIBS_SYNC_TYPE_H */ diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index a6e013758e..83ae76fa5e 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -15,6 +15,7 @@ #include "raft.h" #include "raft_configuration.h" +#include "raft_log.h" #include "syncInt.h" #define RAFT_READ_LOG_MAX_NUM 100 @@ -120,14 +121,19 @@ int32_t syncRaftTick(SSyncRaft* pRaft) { } void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId) { + convertClear(pRaft); + pRaft->stepFp = stepFollower; resetRaft(pRaft, term); pRaft->tickFp = tickElection; pRaft->leaderId = leaderId; pRaft->state = TAOS_SYNC_ROLE_FOLLOWER; + syncInfo("[%d:%d] became followe at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); } void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { + convertClear(pRaft); + memset(pRaft->candidateState.votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(SyncRaftVoteRespType) * TSDB_MAX_REPLICA); /** * Becoming a pre-candidate changes our step functions and state, * but doesn't change anything else. In particular it does not increase @@ -137,10 +143,13 @@ void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { pRaft->tickFp = tickElection; pRaft->state = TAOS_SYNC_ROLE_CANDIDATE; pRaft->candidateState.inPreVote = true; - syncInfo("[%d:%d] became pre-candidate at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); + syncInfo("[%d:%d] became pre-candidate at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); } void syncRaftBecomeCandidate(SSyncRaft* pRaft) { + convertClear(pRaft); + memset(pRaft->candidateState.votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(SyncRaftVoteRespType) * TSDB_MAX_REPLICA); + pRaft->candidateState.inPreVote = false; pRaft->stepFp = stepCandidate; // become candidate make term+1 @@ -148,7 +157,7 @@ void syncRaftBecomeCandidate(SSyncRaft* pRaft) { pRaft->tickFp = tickElection; pRaft->voteFor = pRaft->selfId; pRaft->state = TAOS_SYNC_ROLE_CANDIDATE; - syncInfo("[%d:%d] became candidate at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); + syncInfo("[%d:%d] became candidate at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); } void syncRaftBecomeLeader(SSyncRaft* pRaft) { @@ -160,7 +169,11 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) { pRaft->state = TAOS_SYNC_ROLE_LEADER; // TODO: check if there is pending config log - syncInfo("[%d:%d] became leader at term %d" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); + syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); +} + +void syncRaftTriggerReplicate(SSyncRaft* pRaft) { + } void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { @@ -180,7 +193,7 @@ int syncRaftQuorum(SSyncRaft* pRaft) { return pRaft->cluster.replica / 2 + 1; } -int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool accept) { +int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool accept, int* rejectNum) { if (accept) { syncInfo("[%d:%d] received (pre-vote %d) from %d at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); @@ -188,17 +201,20 @@ int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool acc syncInfo("[%d:%d] received rejection from %d at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, id, pRaft->term); } - + int voteIndex = syncRaftConfigurationIndexOfVoter(pRaft, id); assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0); + assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN); - pRaft->candidateState.votes[voteIndex] = accept; - int granted = 0; + pRaft->candidateState.votes[voteIndex] = accept ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT; + int granted = 0, rejected = 0; int i; for (i = 0; i < pRaft->cluster.replica; ++i) { - if (pRaft->candidateState.votes[i]) granted++; + if (pRaft->candidateState.votes[i] == SYNC_RAFT_VOTE_RESP_GRANT) granted++; + else if (pRaft->candidateState.votes[i] == SYNC_RAFT_VOTE_RESP_REJECT) rejected++; } + if (rejectNum) *rejectNum = rejected; return granted; } @@ -262,8 +278,20 @@ static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg) { } static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - convertClear(pRaft); - memset(pRaft->candidateState.votes, 0, sizeof(bool) * TSDB_MAX_REPLICA); + /** + * Only handle vote responses corresponding to our candidacy (while in + * StateCandidate, we may get stale MsgPreVoteResp messages in this term from + * our pre-candidate state). + **/ + RaftMessageType msgType = pMsg->msgType; + + if (msgType == RAFT_MSG_INTERNAL_PROP) { + return 0; + } + + if (msgType == RAFT_MSG_VOTE_RESP) { + return 0; + } return 0; } diff --git a/source/libs/sync/src/raft_election.c b/source/libs/sync/src/raft_election.c new file mode 100644 index 0000000000..7ebeb45254 --- /dev/null +++ b/source/libs/sync/src/raft_election.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "syncInt.h" +#include "raft.h" +#include "raft_message.h" + +void syncRaftStartElection(SSyncRaft* pRaft, SyncRaftElectionType cType) { + SyncTerm term; + bool preVote; + RaftMessageType voteMsgType; + + if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { + syncRaftBecomePreCandidate(pRaft); + preVote = true; + // PreVote RPCs are sent for the next term before we've incremented r.Term. + term = pRaft->term + 1; + } else { + syncRaftBecomeCandidate(pRaft); + voteMsgType = RAFT_MSG_VOTE; + term = pRaft->term; + preVote = false; + } + + int quorum = syncRaftQuorum(pRaft); + int granted = syncRaftNumOfGranted(pRaft, pRaft->selfId, preVote, true, NULL); + if (quorum <= granted) { + /** + * We won the election after voting for ourselves (which must mean that + * this is a single-node cluster). Advance to the next state. + **/ + if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { + syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); + } else { + syncRaftBecomeLeader(pRaft); + } + return; + } + + // broadcast vote message to other peers + int i; + SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); + SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); + for (i = 0; i < pRaft->cluster.replica; ++i) { + if (i == pRaft->cluster.selfIndex) { + continue; + } + + SyncNodeId nodeId = pRaft->cluster.nodeInfo[i].nodeId; + + SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId, + nodeId, term, cType, lastIndex, lastTerm); + if (pMsg == NULL) { + continue; + } + + syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %d] sent %d request to %d at term %" PRId64 "", + pRaft->selfGroupId, pRaft->selfId, lastTerm, + lastIndex, voteMsgType, nodeId, pRaft->term); + + pRaft->io.send(pMsg, &(pRaft->cluster.nodeInfo[i])); + } +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_election_message.c b/source/libs/sync/src/raft_handle_election_message.c index 0d2004dec2..19471846ba 100644 --- a/source/libs/sync/src/raft_handle_election_message.c +++ b/source/libs/sync/src/raft_handle_election_message.c @@ -15,84 +15,29 @@ #include "syncInt.h" #include "raft.h" +#include "raft_log.h" #include "raft_message.h" -static void campaign(SSyncRaft* pRaft, SyncRaftCampaignType cType); - -void syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { +int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (pRaft->state == TAOS_SYNC_ROLE_LEADER) { syncDebug("%d ignoring RAFT_MSG_INTERNAL_ELECTION because already leader", pRaft->selfId); - return; + return 0; } - // if there is pending uncommitted config,cannot campaign + // if there is pending uncommitted config,cannot start election if (syncRaftLogNumOfPendingConf(pRaft->log) > 0 && syncRaftHasUnappliedLog(pRaft->log)) { - syncWarn("[%d:%d] cannot campaign at term %" PRId64 " since there are still pending configuration changes to apply", + syncWarn("[%d:%d] cannot syncRaftStartElection at term %" PRId64 " since there are still pending configuration changes to apply", pRaft->selfGroupId, pRaft->selfId, pRaft->term); - return; + return 0; } syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); if (pRaft->preVote) { - campaign(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION); + syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION); } else { - campaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); + syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); } + + return 0; } - -static void campaign(SSyncRaft* pRaft, SyncRaftCampaignType cType) { - SyncTerm term; - bool preVote; - RaftMessageType voteMsgType; - - if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { - syncRaftBecomePreCandidate(pRaft); - preVote = true; - // PreVote RPCs are sent for the next term before we've incremented r.Term. - term = pRaft->term + 1; - } else { - syncRaftBecomeCandidate(pRaft); - voteMsgType = RAFT_MSG_VOTE; - term = pRaft->term; - preVote = false; - } - - int quorum = syncRaftQuorum(pRaft); - int granted = syncRaftNumOfGranted(pRaft, pRaft->selfId, preVote, true); - if (quorum <= granted) { - /** - * We won the election after voting for ourselves (which must mean that - * this is a single-node cluster). Advance to the next state. - **/ - if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { - campaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); - } else { - syncRaftBecomeLeader(pRaft); - } - return; - } - - // broadcast vote message to other peers - int i; - SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); - SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); - for (i = 0; i < pRaft->cluster.replica; ++i) { - if (i == pRaft->cluster.selfIndex) { - continue; - } - - SyncNodeId nodeId = pRaft->cluster.nodeInfo[i].nodeId; - - SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId, nodeId, term, cType, lastIndex, lastTerm); - if (pMsg == NULL) { - continue; - } - - syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %d] sent %d request to %d at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, lastTerm, - lastIndex, voteMsgType, nodeId, pRaft->term); - - pRaft->io.send(pMsg, &(pRaft->cluster.nodeInfo[i])); - } -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_vote_resp_message.c b/source/libs/sync/src/raft_handle_vote_resp_message.c new file mode 100644 index 0000000000..e5d5d6cae7 --- /dev/null +++ b/source/libs/sync/src/raft_handle_vote_resp_message.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "syncInt.h" +#include "raft.h" +#include "raft_message.h" + +int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + int granted, rejected; + int quorum; + int voterIndex; + + voterIndex = syncRaftConfigurationIndexOfVoter(pRaft, pMsg->from); + if (voterIndex == -1) { + syncError("[%d:%d] recv vote resp from unknown server %d", pRaft->selfGroupId, pRaft->selfId, pMsg->from); + return 0; + } + + if (pRaft->state != TAOS_SYNC_ROLE_CANDIDATE) { + syncError("[%d:%d] is not candidate, ignore vote resp", pRaft->selfGroupId, pRaft->selfId); + return 0; + } + + granted = syncRaftNumOfGranted(pRaft, pMsg->from, + pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION, + !pMsg->voteResp.reject, &rejected); + quorum = syncRaftQuorum(pRaft); + + syncInfo("[%d:%d] [quorum:%d] has received %d votes and %d vote rejections", + pRaft->selfGroupId, pRaft->selfId, quorum, granted, rejected); + + if (granted >= quorum) { + if (pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { + syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); + } else { + syncRaftBecomeLeader(pRaft); + syncRaftTriggerReplicate(pRaft); + } + + return 0; + } else if (rejected == quorum) { + syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); + } + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_progress.c b/source/libs/sync/src/raft_progress.c index ba09973f48..458f829394 100644 --- a/source/libs/sync/src/raft_progress.c +++ b/source/libs/sync/src/raft_progress.c @@ -14,7 +14,7 @@ */ #include "raft.h" -#include "raft_unstable_log.h" +#include "raft_log.h" #include "raft_progress.h" #include "sync.h" #include "syncInt.h" From da106e29b2b0435d8b466f36901e610990bece99 Mon Sep 17 00:00:00 2001 From: lichuang Date: Thu, 4 Nov 2021 15:51:30 +0800 Subject: [PATCH 40/94] [TD-10645][raft]add raft vote message handle --- source/libs/sync/inc/raft.h | 2 + source/libs/sync/inc/raft_log.h | 2 + source/libs/sync/inc/raft_message.h | 36 ++++++++++-- source/libs/sync/src/raft.c | 4 +- source/libs/sync/src/raft_election.c | 3 +- .../libs/sync/src/raft_handle_vote_message.c | 57 +++++++++++++++++++ .../sync/src/raft_handle_vote_resp_message.c | 2 +- source/libs/sync/src/raft_log.c | 4 ++ 8 files changed, 101 insertions(+), 9 deletions(-) create mode 100644 source/libs/sync/src/raft_handle_vote_message.c diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index cba9434414..2ce2dcb5de 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -38,6 +38,7 @@ typedef struct RaftCandidateState { } RaftCandidateState; typedef struct SSyncRaftIOMethods { + // send SSyncMessage to node int (*send)(const SSyncMessage* pMsg, const SNodeInfo* pNode); } SSyncRaftIOMethods; @@ -104,6 +105,7 @@ struct SSyncRaft { SSyncRaftIOMethods io; + // union different state data union { RaftLeaderState leaderState; RaftCandidateState candidateState; diff --git a/source/libs/sync/inc/raft_log.h b/source/libs/sync/inc/raft_log.h index 7ffb946c82..3545bf7ba1 100644 --- a/source/libs/sync/inc/raft_log.h +++ b/source/libs/sync/inc/raft_log.h @@ -35,6 +35,8 @@ SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog); SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog); +bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term); + int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog); bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog); diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index da2e3bc52f..d51822f8b3 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -20,10 +20,13 @@ #include "sync_type.h" /** - * below define message type which handled by Raft node thread - * internal message, which communicate in threads, start with RAFT_MSG_INTERNAL_*, - * internal message use pointer only, need not to be decode/encode - * outter message start with RAFT_MSG_*, need to implement its decode/encode functions + * below define message type which handled by Raft. + * + * internal message, which communicate between threads, start with RAFT_MSG_INTERNAL_*. + * internal message use pointer only and stack memory, need not to be decode/encode and free. + * + * outter message start with RAFT_MSG_*, which communicate between cluster peers, + * need to implement its decode/encode functions. **/ typedef enum RaftMessageType { // client propose a cmd @@ -36,6 +39,7 @@ typedef enum RaftMessageType { RAFT_MSG_VOTE_RESP = 4, RAFT_MSG_APPEND = 5, + RAFT_MSG_APPEND_RESP = 6, } RaftMessageType; typedef struct RaftMsgInternal_Prop { @@ -55,7 +59,7 @@ typedef struct RaftMsg_Vote { } RaftMsg_Vote; typedef struct RaftMsg_VoteResp { - bool reject; + bool rejected; SyncRaftElectionType cType; } RaftMsg_VoteResp; @@ -115,6 +119,7 @@ static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId .from = from, .to = to, .term = term, + .msgType = RAFT_MSG_VOTE, .vote = (RaftMsg_Vote) { .cType = cType, .lastIndex = lastIndex, @@ -125,6 +130,26 @@ static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId return pMsg; } +static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNodeId from, SyncNodeId to, + SyncRaftElectionType cType, bool rejected) { + SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); + if (pMsg == NULL) { + return NULL; + } + *pMsg = (SSyncMessage) { + .groupId = groupId, + .from = from, + .to = to, + .msgType = RAFT_MSG_VOTE_RESP, + .voteResp = (RaftMsg_VoteResp) { + .cType = cType, + .rejected = rejected, + }, + }; + + return pMsg; +} + static FORCE_INLINE bool syncIsInternalMsg(RaftMessageType msgType) { return msgType == RAFT_MSG_INTERNAL_PROP || msgType == RAFT_MSG_INTERNAL_ELECTION; @@ -142,6 +167,7 @@ void syncFreeMessage(const SSyncMessage* pMsg); // message handlers int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); #endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 83ae76fa5e..6e8e359305 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -107,7 +107,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (msgType == RAFT_MSG_INTERNAL_ELECTION) { syncRaftHandleElectionMessage(pRaft, pMsg); } else if (msgType == RAFT_MSG_VOTE) { - + syncRaftHandleVoteMessage(pRaft, pMsg); } else { pRaft->stepFp(pRaft, pMsg); } @@ -245,7 +245,7 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) if (syncIsPreVoteMsg(pMsg)) { // Never change our term in response to a PreVote - } else if (syncIsPreVoteRespMsg(pMsg) && !pMsg->voteResp.reject) { + } else if (syncIsPreVoteRespMsg(pMsg) && !pMsg->voteResp.rejected) { /** * We send pre-vote requests with a term in our future. If the * pre-vote is granted, we will increment our term when we get a diff --git a/source/libs/sync/src/raft_election.c b/source/libs/sync/src/raft_election.c index 7ebeb45254..bb4a7541c2 100644 --- a/source/libs/sync/src/raft_election.c +++ b/source/libs/sync/src/raft_election.c @@ -15,6 +15,7 @@ #include "syncInt.h" #include "raft.h" +#include "raft_log.h" #include "raft_message.h" void syncRaftStartElection(SSyncRaft* pRaft, SyncRaftElectionType cType) { @@ -66,7 +67,7 @@ void syncRaftStartElection(SSyncRaft* pRaft, SyncRaftElectionType cType) { continue; } - syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %d] sent %d request to %d at term %" PRId64 "", + syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 "] sent %d request to %d at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, lastTerm, lastIndex, voteMsgType, nodeId, pRaft->term); diff --git a/source/libs/sync/src/raft_handle_vote_message.c b/source/libs/sync/src/raft_handle_vote_message.c new file mode 100644 index 0000000000..a575c5df1a --- /dev/null +++ b/source/libs/sync/src/raft_handle_vote_message.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "syncInt.h" +#include "raft.h" +#include "raft_log.h" +#include "raft_message.h" + +static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); + +int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + SSyncMessage* pRespMsg; + int voteIndex = syncRaftConfigurationIndexOfVoter(pRaft, pMsg->from); + if (voteIndex == -1) { + return 0; + } + bool grant; + SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); + SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); + + grant = canGrantVoteMessage(pRaft, pMsg); + pRespMsg = syncNewVoteRespMsg(pRaft->selfGroupId, pRaft->selfId, pMsg->to, pMsg->vote.cType, !grant); + if (pRespMsg == NULL) { + return 0; + } + syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] %s for %d" \ + "[logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] at term %" PRId64 "", + pRaft->selfGroupId, pRaft->selfId, lastTerm, lastIndex, pRaft->voteFor, + grant ? "grant" : "reject", + pMsg->from, pMsg->vote.lastTerm, pMsg->vote.lastIndex, pRaft->term); + + pRaft->io.send(pRespMsg, &(pRaft->cluster.nodeInfo[voteIndex])); + return 0; +} + +static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + if (!(pRaft->voteFor == SYNC_NON_NODE_ID || pMsg->term > pRaft->term || pRaft->voteFor == pMsg->from)) { + return false; + } + if (!syncRaftLogIsUptodate(pRaft, pMsg->vote.lastIndex, pMsg->vote.lastTerm)) { + return false; + } + + return true; +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_vote_resp_message.c b/source/libs/sync/src/raft_handle_vote_resp_message.c index e5d5d6cae7..a155f0fe63 100644 --- a/source/libs/sync/src/raft_handle_vote_resp_message.c +++ b/source/libs/sync/src/raft_handle_vote_resp_message.c @@ -35,7 +35,7 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { granted = syncRaftNumOfGranted(pRaft, pMsg->from, pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION, - !pMsg->voteResp.reject, &rejected); + !pMsg->voteResp.rejected, &rejected); quorum = syncRaftQuorum(pRaft); syncInfo("[%d:%d] [quorum:%d] has received %d votes and %d vote rejections", diff --git a/source/libs/sync/src/raft_log.c b/source/libs/sync/src/raft_log.c index 46c4e4b304..f93595e9f3 100644 --- a/source/libs/sync/src/raft_log.c +++ b/source/libs/sync/src/raft_log.c @@ -27,6 +27,10 @@ SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog) { return 0; } +bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term) { + return true; +} + int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog) { return 0; } From aee5ebd1ced03863c7b9a3267176f317ffe53b8b Mon Sep 17 00:00:00 2001 From: lichuang Date: Fri, 5 Nov 2021 15:03:56 +0800 Subject: [PATCH 41/94] [TD-10645][raft]add raft append message handle --- source/libs/sync/inc/raft.h | 12 +-- source/libs/sync/inc/raft_log.h | 9 ++ source/libs/sync/inc/raft_message.h | 52 +++++++++-- source/libs/sync/inc/raft_progress.h | 43 +++++++-- source/libs/sync/inc/raft_replication.h | 25 ++++++ source/libs/sync/inc/sync_type.h | 15 +++- source/libs/sync/src/raft.c | 53 ++++++++--- source/libs/sync/src/raft_election.c | 2 +- .../libs/sync/src/raft_handle_vote_message.c | 7 +- .../sync/src/raft_handle_vote_resp_message.c | 4 +- source/libs/sync/src/raft_log.c | 9 ++ source/libs/sync/src/raft_progress.c | 16 +--- source/libs/sync/src/raft_replication.c | 90 +++++++++++++++++++ 13 files changed, 287 insertions(+), 50 deletions(-) create mode 100644 source/libs/sync/inc/raft_replication.h create mode 100644 source/libs/sync/src/raft_replication.c diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 2ce2dcb5de..dd3eed9e02 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -20,8 +20,6 @@ #include "sync_type.h" #include "raft_message.h" -#define SYNC_NON_NODE_ID -1 - typedef struct SSyncRaftProgress SSyncRaftProgress; typedef struct RaftLeaderState { @@ -49,7 +47,8 @@ struct SSyncRaft { // owner sync node SSyncNode* pNode; - //SSyncInfo info; + int maxMsgSize; + SSyncFSM fsm; SSyncLogStore logStore; SStateManager stateManager; @@ -74,7 +73,7 @@ struct SSyncRaft { /** * New configuration is ignored if there exists unapplied configuration. **/ - bool pendingConf; + bool hasPendingConf; SSyncCluster cluster; @@ -94,6 +93,9 @@ struct SSyncRaft { **/ uint16_t heartbeatElapsed; + // current tick count since start up + uint32_t currentTick; + // election timeout tick(random in [3:6] tick) uint16_t electionTimeoutTick; @@ -129,7 +131,7 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft); void syncRaftStartElection(SSyncRaft* pRaft, SyncRaftElectionType cType); -void syncRaftTriggerReplicate(SSyncRaft* pRaft); +void syncRaftTriggerHeartbeat(SSyncRaft* pRaft); void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); bool syncRaftIsPromotable(SSyncRaft* pRaft); diff --git a/source/libs/sync/inc/raft_log.h b/source/libs/sync/inc/raft_log.h index 3545bf7ba1..41b605b0d2 100644 --- a/source/libs/sync/inc/raft_log.h +++ b/source/libs/sync/inc/raft_log.h @@ -19,6 +19,10 @@ #include "sync.h" #include "sync_type.h" +struct SSyncRaftEntry { + +}; + struct SSyncRaftLog { SyncIndex uncommittedConfigIndex; @@ -41,4 +45,9 @@ int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog); bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog); +SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index); + +int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, + SSyncRaftEntry **ppEntries, int *n); + #endif /* _TD_LIBS_SYNC_RAFT_LOG_H */ diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index d51822f8b3..58090a31f1 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -63,12 +63,28 @@ typedef struct RaftMsg_VoteResp { SyncRaftElectionType cType; } RaftMsg_VoteResp; +typedef struct RaftMsg_Append_Entries { + // index of log entry preceeding new ones + SyncIndex prevIndex; + + // term of entry at prevIndex + SyncTerm prevTerm; + + // leader's commit index. + SyncIndex commitIndex; + + // size of the log entries array + int nEntries; + + // log entries array + SSyncRaftEntry* entries; +} RaftMsg_Append_Entries; + typedef struct SSyncMessage { RaftMessageType msgType; SyncTerm term; SyncGroupId groupId; SyncNodeId from; - SyncNodeId to; union { RaftMsgInternal_Prop propose; @@ -77,6 +93,8 @@ typedef struct SSyncMessage { RaftMsg_Vote vote; RaftMsg_VoteResp voteResp; + + RaftMsg_Append_Entries appendEntries; }; } SSyncMessage; @@ -107,7 +125,7 @@ static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNo return pMsg; } -static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId from, SyncNodeId to, +static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId from, SyncTerm term, SyncRaftElectionType cType, SyncIndex lastIndex, SyncTerm lastTerm) { SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); @@ -117,7 +135,6 @@ static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId *pMsg = (SSyncMessage) { .groupId = groupId, .from = from, - .to = to, .term = term, .msgType = RAFT_MSG_VOTE, .vote = (RaftMsg_Vote) { @@ -130,7 +147,7 @@ static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId return pMsg; } -static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNodeId from, SyncNodeId to, +static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNodeId from, SyncRaftElectionType cType, bool rejected) { SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); if (pMsg == NULL) { @@ -139,7 +156,6 @@ static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNo *pMsg = (SSyncMessage) { .groupId = groupId, .from = from, - .to = to, .msgType = RAFT_MSG_VOTE_RESP, .voteResp = (RaftMsg_VoteResp) { .cType = cType, @@ -150,12 +166,36 @@ static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNo return pMsg; } +static FORCE_INLINE SSyncMessage* syncNewAppendMsg(SyncGroupId groupId, SyncNodeId from, + SyncTerm term, SyncIndex prevIndex, SyncTerm prevTerm, + SyncIndex commitIndex, int nEntries, SSyncRaftEntry* entries) { + SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); + if (pMsg == NULL) { + return NULL; + } + *pMsg = (SSyncMessage) { + .groupId = groupId, + .from = from, + .term = term, + .msgType = RAFT_MSG_APPEND, + .appendEntries = (RaftMsg_Append_Entries) { + .prevIndex = prevIndex, + .prevTerm = prevTerm, + .commitIndex = commitIndex, + .nEntries = nEntries, + .entries = entries, + }, + }; + + return pMsg; +} + static FORCE_INLINE bool syncIsInternalMsg(RaftMessageType msgType) { return msgType == RAFT_MSG_INTERNAL_PROP || msgType == RAFT_MSG_INTERNAL_ELECTION; } -static FORCE_INLINE bool syncIsPreVoteRespMsg(SSyncMessage* pMsg) { +static FORCE_INLINE bool syncIsPreVoteRespMsg(const SSyncMessage* pMsg) { return pMsg->msgType == RAFT_MSG_VOTE_RESP && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; } diff --git a/source/libs/sync/inc/raft_progress.h b/source/libs/sync/inc/raft_progress.h index 159a80fa0e..5840468a5d 100644 --- a/source/libs/sync/inc/raft_progress.h +++ b/source/libs/sync/inc/raft_progress.h @@ -85,6 +85,9 @@ struct SSyncRaftProgress { **/ bool paused; + // last send append message tick + uint32_t lastSendTick; + /** * pendingSnapshotIndex is used in PROGRESS_SNAPSHOT. * If there is a pending snapshot, the pendingSnapshotIndex will be set to the @@ -116,7 +119,9 @@ int syncRaftProgressCreate(SSyncRaft* pRaft); **/ bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex); -void syncRaftProgressOptimisticNextIndex(SSyncRaft* pRaft, int i, SyncIndex nextIndex); +static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) { + progress->nextIndex = nextIndex + 1; +} /** * syncRaftProgressMaybeDecrTo returns false if the given to index comes from an out of order message. @@ -131,7 +136,35 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i, * MsgApps, is currently waiting for a snapshot, or has reached the * MaxInflightMsgs limit. **/ -bool syncRaftProgressIsPaused(SSyncRaft* pRaft, int i); +bool syncRaftProgressIsPaused(SSyncRaftProgress* progress); + +static FORCE_INLINE void syncRaftProgressPause(SSyncRaftProgress* progress) { + progress->paused = true; +} + +static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) { + return progress->nextIndex; +} + +static FORCE_INLINE RaftProgressState syncRaftProgressInReplicate(SSyncRaftProgress* progress) { + return progress->state == PROGRESS_REPLICATE; +} + +static FORCE_INLINE RaftProgressState syncRaftProgressInSnapshot(SSyncRaftProgress* progress) { + return progress->state == PROGRESS_SNAPSHOT; +} + +static FORCE_INLINE RaftProgressState syncRaftProgressInProbe(SSyncRaftProgress* progress) { + return progress->state == PROGRESS_PROBE; +} + +static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progress) { + return progress->recentActive; +} + +static FORCE_INLINE bool syncRaftProgressUpdateSendTick(SSyncRaftProgress* progress, SyncTick current) { + return progress->lastSendTick = current; +} void syncRaftProgressFailure(SSyncRaft* pRaft, int i); @@ -159,7 +192,7 @@ void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights); void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); -SyncIndex syncRaftProgressNextIndex(SSyncRaft* pRaft, int i); + SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i); @@ -171,12 +204,10 @@ bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i); void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i); -bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i); + void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); -RaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i); - #endif #endif /* TD_SYNC_RAFT_PROGRESS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_replication.h b/source/libs/sync/inc/raft_replication.h new file mode 100644 index 0000000000..e457063980 --- /dev/null +++ b/source/libs/sync/inc/raft_replication.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_REPLICATION_H +#define TD_SYNC_RAFT_REPLICATION_H + +#include "sync.h" +#include "syncInt.h" +#include "sync_type.h" + +int syncRaftReplicate(SSyncRaft* pRaft, int i); + +#endif /* TD_SYNC_RAFT_REPLICATION_H */ diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h index f9632f6ae8..130243a72a 100644 --- a/source/libs/sync/inc/sync_type.h +++ b/source/libs/sync/inc/sync_type.h @@ -16,12 +16,18 @@ #ifndef _TD_LIBS_SYNC_TYPE_H #define _TD_LIBS_SYNC_TYPE_H +#define SYNC_NON_NODE_ID -1 +#define SYNC_NON_TERM 0 + typedef int32_t SyncTime; +typedef uint32_t SyncTick; typedef struct SSyncRaft SSyncRaft; typedef struct SSyncRaftLog SSyncRaftLog; +typedef struct SSyncRaftEntry SSyncRaftEntry; + #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif @@ -32,13 +38,18 @@ typedef struct SSyncRaftLog SSyncRaftLog; typedef enum { SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, - SYNC_RAFT_CAMPAIGN_ELECTION = 1, - SYNC_RAFT_CAMPAIGN_TRANSFER = 3, + SYNC_RAFT_CAMPAIGN_ELECTION = 1, + SYNC_RAFT_CAMPAIGN_TRANSFER = 2, } SyncRaftElectionType; typedef enum { + // the init vote resp status SYNC_RAFT_VOTE_RESP_UNKNOWN = 0, + + // grant the vote request SYNC_RAFT_VOTE_RESP_GRANT = 1, + + //reject the vote request SYNC_RAFT_VOTE_RESP_REJECT = 2, } SyncRaftVoteRespType; diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 6e8e359305..dca5c4cf08 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -29,6 +29,8 @@ static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg); static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg); static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg); +static int triggerAll(SSyncRaft* pRaft); + static void tickElection(SSyncRaft* pRaft); static void tickHeartbeat(SSyncRaft* pRaft); @@ -95,8 +97,8 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - syncDebug("from %d, to %d, type:%d, term:%" PRId64 ", state:%d", - pMsg->from, pMsg->to, pMsg->msgType, pMsg->term, pRaft->state); + syncDebug("from %d, type:%d, term:%" PRId64 ", state:%d", + pMsg->from, pMsg->msgType, pMsg->term, pRaft->state); if (preHandleMessage(pRaft, pMsg)) { syncFreeMessage(pMsg); @@ -117,6 +119,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { } int32_t syncRaftTick(SSyncRaft* pRaft) { + pRaft->currentTick += 1; return 0; } @@ -168,12 +171,22 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) { pRaft->leaderId = pRaft->leaderId; pRaft->state = TAOS_SYNC_ROLE_LEADER; // TODO: check if there is pending config log + int nPendingConf = syncRaftLogNumOfPendingConf(pRaft->log); + if (nPendingConf > 1) { + syncFatal("unexpected multiple uncommitted config entry"); + } + if (nPendingConf == 1) { + pRaft->hasPendingConf = true; + } syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); + + // after become leader, send initial heartbeat + syncRaftTriggerHeartbeat(pRaft); } -void syncRaftTriggerReplicate(SSyncRaft* pRaft) { - +void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) { + triggerAll(pRaft); } void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { @@ -219,7 +232,7 @@ int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool acc } /** - * pre-handle message, return true is no need to continue + * pre-handle message, return true means no need to continue * Handle the message term, which may result in our stepping down to a follower. **/ static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { @@ -230,9 +243,11 @@ static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (pMsg->term > pRaft->term) { return preHandleNewTermMessage(pRaft, pMsg); + } else if (pMsg->term < pRaft->term) { + return preHandleOldTermMessage(pRaft, pMsg); } - return preHandleOldTermMessage(pRaft, pMsg);; + return false; } static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { @@ -240,6 +255,7 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) RaftMessageType msgType = pMsg->msgType; if (msgType == RAFT_MSG_VOTE) { + // TODO leaderId = SYNC_NON_NODE_ID; } @@ -263,7 +279,7 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) } static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - + // TODO // if receive old term message, no need to continue return true; } @@ -273,7 +289,7 @@ static int convertClear(SSyncRaft* pRaft) { } static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - convertClear(pRaft); + return 0; } @@ -290,6 +306,7 @@ static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { } if (msgType == RAFT_MSG_VOTE_RESP) { + syncRaftHandleVoteRespMessage(pRaft, pMsg); return 0; } return 0; @@ -324,6 +341,22 @@ static void tickHeartbeat(SSyncRaft* pRaft) { } +/** + * trigger I/O requests for newly appended log entries or heartbeats. + **/ +static int triggerAll(SSyncRaft* pRaft) { + assert(pRaft->state == TAOS_SYNC_ROLE_LEADER); + int i; + + for (i = 0; i < pRaft->cluster.replica; ++i) { + if (i == pRaft->cluster.selfIndex) { + continue; + } + + + } +} + static void abortLeaderTransfer(SSyncRaft* pRaft) { pRaft->leadTransferee = SYNC_NON_NODE_ID; } @@ -343,5 +376,5 @@ static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { abortLeaderTransfer(pRaft); - pRaft->pendingConf = false; -} \ No newline at end of file + pRaft->hasPendingConf = false; +} diff --git a/source/libs/sync/src/raft_election.c b/source/libs/sync/src/raft_election.c index bb4a7541c2..4ffb8d0943 100644 --- a/source/libs/sync/src/raft_election.c +++ b/source/libs/sync/src/raft_election.c @@ -62,7 +62,7 @@ void syncRaftStartElection(SSyncRaft* pRaft, SyncRaftElectionType cType) { SyncNodeId nodeId = pRaft->cluster.nodeInfo[i].nodeId; SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId, - nodeId, term, cType, lastIndex, lastTerm); + term, cType, lastIndex, lastTerm); if (pMsg == NULL) { continue; } diff --git a/source/libs/sync/src/raft_handle_vote_message.c b/source/libs/sync/src/raft_handle_vote_message.c index a575c5df1a..87ef468d57 100644 --- a/source/libs/sync/src/raft_handle_vote_message.c +++ b/source/libs/sync/src/raft_handle_vote_message.c @@ -15,6 +15,7 @@ #include "syncInt.h" #include "raft.h" +#include "raft_configuration.h" #include "raft_log.h" #include "raft_message.h" @@ -31,12 +32,12 @@ int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); grant = canGrantVoteMessage(pRaft, pMsg); - pRespMsg = syncNewVoteRespMsg(pRaft->selfGroupId, pRaft->selfId, pMsg->to, pMsg->vote.cType, !grant); + pRespMsg = syncNewVoteRespMsg(pRaft->selfGroupId, pRaft->selfId, pMsg->vote.cType, !grant); if (pRespMsg == NULL) { return 0; } syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] %s for %d" \ - "[logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] at term %" PRId64 "", + "[logterm: %" PRId64 ", index: %" PRId64 "] at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, lastTerm, lastIndex, pRaft->voteFor, grant ? "grant" : "reject", pMsg->from, pMsg->vote.lastTerm, pMsg->vote.lastIndex, pRaft->term); @@ -49,7 +50,7 @@ static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (!(pRaft->voteFor == SYNC_NON_NODE_ID || pMsg->term > pRaft->term || pRaft->voteFor == pMsg->from)) { return false; } - if (!syncRaftLogIsUptodate(pRaft, pMsg->vote.lastIndex, pMsg->vote.lastTerm)) { + if (!syncRaftLogIsUptodate(pRaft->log, pMsg->vote.lastIndex, pMsg->vote.lastTerm)) { return false; } diff --git a/source/libs/sync/src/raft_handle_vote_resp_message.c b/source/libs/sync/src/raft_handle_vote_resp_message.c index a155f0fe63..6e88b03b5a 100644 --- a/source/libs/sync/src/raft_handle_vote_resp_message.c +++ b/source/libs/sync/src/raft_handle_vote_resp_message.c @@ -15,6 +15,7 @@ #include "syncInt.h" #include "raft.h" +#include "raft_configuration.h" #include "raft_message.h" int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { @@ -45,8 +46,7 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); } else { - syncRaftBecomeLeader(pRaft); - syncRaftTriggerReplicate(pRaft); + syncRaftBecomeLeader(pRaft); } return 0; diff --git a/source/libs/sync/src/raft_log.c b/source/libs/sync/src/raft_log.c index f93595e9f3..ee51fcbef3 100644 --- a/source/libs/sync/src/raft_log.c +++ b/source/libs/sync/src/raft_log.c @@ -37,4 +37,13 @@ int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog) { bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog) { return pLog->commitIndex > pLog->appliedIndex; +} + +SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index) { + return SYNC_NON_TERM; +} + +int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, + SSyncRaftEntry **ppEntries, int *n) { + return 0; } \ No newline at end of file diff --git a/source/libs/sync/src/raft_progress.c b/source/libs/sync/src/raft_progress.c index 458f829394..8133b670ff 100644 --- a/source/libs/sync/src/raft_progress.c +++ b/source/libs/sync/src/raft_progress.c @@ -22,7 +22,6 @@ static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state); static void resumeProgress(SSyncRaftProgress* progress); -static void pauseProgress(SSyncRaftProgress* progress); int syncRaftProgressCreate(SSyncRaft* pRaft) { @@ -58,11 +57,6 @@ bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex) { return updated; } -void syncRaftProgressOptimisticNextIndex(SSyncRaft* pRaft, int i, SyncIndex nextIndex) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - pRaft->leaderState.progress[i].nextIndex = nextIndex + 1; -} - bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i, SyncIndex rejected, SyncIndex lastIndex) { assert(i >= 0 && i < pRaft->leaderState.nProgress); @@ -103,15 +97,7 @@ static void resumeProgress(SSyncRaftProgress* progress) { progress->paused = false; } -static void pauseProgress(SSyncRaftProgress* progress) { - progress->paused = true; -} - -bool syncRaftProgressIsPaused(SSyncRaft* pRaft, int i) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); - +bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) { switch (progress->state) { case PROGRESS_PROBE: return progress->paused; diff --git a/source/libs/sync/src/raft_replication.c b/source/libs/sync/src/raft_replication.c new file mode 100644 index 0000000000..02d9804f7e --- /dev/null +++ b/source/libs/sync/src/raft_replication.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "raft.h" +#include "raft_log.h" +#include "raft_progress.h" +#include "raft_replication.h" + +static int sendSnapshot(SSyncRaft* pRaft, int i); +static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex index, SyncTerm term); + +int syncRaftReplicate(SSyncRaft* pRaft, int i) { + assert(pRaft->state == TAOS_SYNC_ROLE_LEADER); + assert(i >= 0 && i < pRaft->leaderState.nProgress); + + SyncNodeId nodeId = pRaft->cluster.nodeInfo[i].nodeId; + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + if (syncRaftProgressIsPaused(progress)) { + syncInfo("node %d paused", nodeId); + return 0; + } + + SyncIndex nextIndex = syncRaftProgressNextIndex(progress); + SyncIndex prevIndex = nextIndex - 1; + SyncTerm prevTerm = syncRaftLogTermOf(pRaft->log, prevIndex); + + if (prevTerm == SYNC_NON_TERM && !syncRaftProgressInSnapshot(progress)) { + goto send_snapshot; + } + +send_snapshot: + if (syncRaftProgressRecentActive(progress)) { + /* Only send a snapshot when we have heard from the server */ + return sendSnapshot(pRaft, i); + } else { + /* Send empty AppendEntries RPC when we haven't heard from the server */ + prevIndex = syncRaftLogLastIndex(pRaft->log); + prevTerm = syncRaftLogLastTerm(pRaft->log); + return sendAppendEntries(pRaft, i, prevIndex, prevTerm); + } +} + +static int sendSnapshot(SSyncRaft* pRaft, int i) { + return 0; +} + +static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex prevIndex, SyncTerm prevTerm) { + SyncIndex nextIndex = prevIndex + 1; + SSyncRaftEntry *entries; + int nEntry; + SNodeInfo* pNode = &(pRaft->cluster.nodeInfo[i]); + SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); + syncRaftLogAcquire(pRaft->log, nextIndex, pRaft->maxMsgSize, &entries, &nEntry); + + SSyncMessage* msg = syncNewAppendMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term, + prevIndex, prevTerm, pRaft->log->commitIndex, + nEntry, entries); + + if (msg == NULL) { + return 0; + } + + pRaft->io.send(msg, pNode); + + if (syncRaftProgressInReplicate(progress)) { + SyncIndex lastIndex = nextIndex + nEntry; + syncRaftProgressOptimisticNextIndex(progress, lastIndex); + syncRaftInflightAdd(&progress->inflights, lastIndex); + } else if (syncRaftProgressInProbe(progress)) { + syncRaftProgressPause(progress); + } else { + + } + + syncRaftProgressUpdateSendTick(progress, pRaft->currentTick); + + return 0; +} \ No newline at end of file From 4022f360a7b87822d37801afe77dc6643af158f7 Mon Sep 17 00:00:00 2001 From: lichuang Date: Fri, 5 Nov 2021 16:35:07 +0800 Subject: [PATCH 42/94] [TD-10645][raft]add raft append message handle --- source/libs/sync/inc/raft_configuration.h | 2 +- source/libs/sync/inc/raft_log.h | 11 ++++ source/libs/sync/inc/raft_message.h | 34 ++++++++-- source/libs/sync/src/raft.c | 42 +++++++++++-- source/libs/sync/src/raft_configuration.c | 2 +- .../src/raft_handle_append_entries_message.c | 49 +++++++++++++++ .../libs/sync/src/raft_handle_vote_message.c | 2 +- .../sync/src/raft_handle_vote_resp_message.c | 2 +- source/libs/sync/src/raft_log.c | 9 +++ source/libs/sync/src/raft_replication.c | 63 +++++++++++++++++-- 10 files changed, 198 insertions(+), 18 deletions(-) create mode 100644 source/libs/sync/src/raft_handle_append_entries_message.c diff --git a/source/libs/sync/inc/raft_configuration.h b/source/libs/sync/inc/raft_configuration.h index 993f863f33..ac9bbb5e55 100644 --- a/source/libs/sync/inc/raft_configuration.h +++ b/source/libs/sync/inc/raft_configuration.h @@ -20,7 +20,7 @@ #include "sync_type.h" // return -1 if cannot find this id -int syncRaftConfigurationIndexOfVoter(SSyncRaft *pRaft, SyncNodeId id); +int syncRaftConfigurationIndexOfNode(SSyncRaft *pRaft, SyncNodeId id); int syncRaftConfigurationVoterCount(SSyncRaft *pRaft); diff --git a/source/libs/sync/inc/raft_log.h b/source/libs/sync/inc/raft_log.h index 41b605b0d2..bab9932fb5 100644 --- a/source/libs/sync/inc/raft_log.h +++ b/source/libs/sync/inc/raft_log.h @@ -37,6 +37,8 @@ SSyncRaftLog* syncRaftLogOpen(); SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog); +SyncIndex syncRaftLogSnapshotIndex(SSyncRaftLog* pLog); + SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog); bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term); @@ -50,4 +52,13 @@ SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index); int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, SSyncRaftEntry **ppEntries, int *n); +void syncRaftLogRelease(SSyncRaftLog* pLog, SyncIndex index, + SSyncRaftEntry *pEntries, int n); + +bool syncRaftLogMatchTerm(); + +static FORCE_INLINE bool syncRaftLogIsCommitted(SSyncRaftLog* pLog, SyncIndex index) { + return pLog->commitIndex > index; +} + #endif /* _TD_LIBS_SYNC_RAFT_LOG_H */ diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h index 58090a31f1..2cb625d1fb 100644 --- a/source/libs/sync/inc/raft_message.h +++ b/source/libs/sync/inc/raft_message.h @@ -65,10 +65,10 @@ typedef struct RaftMsg_VoteResp { typedef struct RaftMsg_Append_Entries { // index of log entry preceeding new ones - SyncIndex prevIndex; + SyncIndex index; // term of entry at prevIndex - SyncTerm prevTerm; + SyncTerm term; // leader's commit index. SyncIndex commitIndex; @@ -80,6 +80,10 @@ typedef struct RaftMsg_Append_Entries { SSyncRaftEntry* entries; } RaftMsg_Append_Entries; +typedef struct RaftMsg_Append_Resp { + SyncIndex index; +} RaftMsg_Append_Resp; + typedef struct SSyncMessage { RaftMessageType msgType; SyncTerm term; @@ -95,6 +99,7 @@ typedef struct SSyncMessage { RaftMsg_VoteResp voteResp; RaftMsg_Append_Entries appendEntries; + RaftMsg_Append_Resp appendResp; }; } SSyncMessage; @@ -167,7 +172,7 @@ static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNo } static FORCE_INLINE SSyncMessage* syncNewAppendMsg(SyncGroupId groupId, SyncNodeId from, - SyncTerm term, SyncIndex prevIndex, SyncTerm prevTerm, + SyncTerm term, SyncIndex logIndex, SyncTerm logTerm, SyncIndex commitIndex, int nEntries, SSyncRaftEntry* entries) { SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); if (pMsg == NULL) { @@ -179,8 +184,8 @@ static FORCE_INLINE SSyncMessage* syncNewAppendMsg(SyncGroupId groupId, SyncNode .term = term, .msgType = RAFT_MSG_APPEND, .appendEntries = (RaftMsg_Append_Entries) { - .prevIndex = prevIndex, - .prevTerm = prevTerm, + .index = logIndex, + .term = logTerm, .commitIndex = commitIndex, .nEntries = nEntries, .entries = entries, @@ -190,6 +195,24 @@ static FORCE_INLINE SSyncMessage* syncNewAppendMsg(SyncGroupId groupId, SyncNode return pMsg; } +static FORCE_INLINE SSyncMessage* syncNewEmptyAppendRespMsg(SyncGroupId groupId, SyncNodeId from, SyncTerm term) { + SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); + if (pMsg == NULL) { + return NULL; + } + *pMsg = (SSyncMessage) { + .groupId = groupId, + .from = from, + .term = term, + .msgType = RAFT_MSG_APPEND_RESP, + .appendResp = (RaftMsg_Append_Resp) { + + }, + }; + + return pMsg; +} + static FORCE_INLINE bool syncIsInternalMsg(RaftMessageType msgType) { return msgType == RAFT_MSG_INTERNAL_PROP || msgType == RAFT_MSG_INTERNAL_ELECTION; @@ -209,5 +232,6 @@ void syncFreeMessage(const SSyncMessage* pMsg); int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); +int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); #endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index dca5c4cf08..39e7a80d0b 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -16,6 +16,7 @@ #include "raft.h" #include "raft_configuration.h" #include "raft_log.h" +#include "raft_replication.h" #include "syncInt.h" #define RAFT_READ_LOG_MAX_NUM 100 @@ -215,7 +216,7 @@ int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool acc pRaft->selfGroupId, pRaft->selfId, id, pRaft->term); } - int voteIndex = syncRaftConfigurationIndexOfVoter(pRaft, id); + int voteIndex = syncRaftConfigurationIndexOfNode(pRaft, id); assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0); assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN); @@ -279,8 +280,38 @@ static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) } static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - // TODO - // if receive old term message, no need to continue + if (pRaft->checkQuorum && pMsg->msgType == RAFT_MSG_APPEND) { + /** + * We have received messages from a leader at a lower term. It is possible + * that these messages were simply delayed in the network, but this could + * also mean that this node has advanced its term number during a network + * partition, and it is now unable to either win an election or to rejoin + * the majority on the old term. If checkQuorum is false, this will be + * handled by incrementing term numbers in response to MsgVote with a + * higher term, but if checkQuorum is true we may not advance the term on + * MsgVote and must generate other messages to advance the term. The net + * result of these two features is to minimize the disruption caused by + * nodes that have been removed from the cluster's configuration: a + * removed node will send MsgVotes (or MsgPreVotes) which will be ignored, + * but it will not receive MsgApp or MsgHeartbeat, so it will not create + * disruptive term increases + **/ + int peerIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); + if (peerIndex < 0) { + return true; + } + SSyncMessage* msg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term); + if (msg == NULL) { + return true; + } + + pRaft->io.send(msg, &(pRaft->cluster.nodeInfo[peerIndex])); + } else { + // ignore other cases + syncInfo("[%d:%d] [term:%" PRId64 "] ignored a %d message with lower term from %d [term:%" PRId64 "]", + pRaft->selfGroupId, pRaft->selfId, pRaft->term, pMsg->msgType, pMsg->from, pMsg->term); + } + return true; } @@ -308,6 +339,9 @@ static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (msgType == RAFT_MSG_VOTE_RESP) { syncRaftHandleVoteRespMessage(pRaft, pMsg); return 0; + } else if (msgType == RAFT_MSG_APPEND) { + syncRaftBecomeFollower(pRaft, pRaft->term, pMsg->from); + syncRaftHandleAppendEntriesMessage(pRaft, pMsg); } return 0; } @@ -353,7 +387,7 @@ static int triggerAll(SSyncRaft* pRaft) { continue; } - + syncRaftReplicate(pRaft, i); } } diff --git a/source/libs/sync/src/raft_configuration.c b/source/libs/sync/src/raft_configuration.c index 6f3a27e7c0..e16cb34989 100644 --- a/source/libs/sync/src/raft_configuration.c +++ b/source/libs/sync/src/raft_configuration.c @@ -16,7 +16,7 @@ #include "raft_configuration.h" #include "raft.h" -int syncRaftConfigurationIndexOfVoter(SSyncRaft *pRaft, SyncNodeId id) { +int syncRaftConfigurationIndexOfNode(SSyncRaft *pRaft, SyncNodeId id) { return (int)(id); } diff --git a/source/libs/sync/src/raft_handle_append_entries_message.c b/source/libs/sync/src/raft_handle_append_entries_message.c new file mode 100644 index 0000000000..d4d362848f --- /dev/null +++ b/source/libs/sync/src/raft_handle_append_entries_message.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "syncInt.h" +#include "raft.h" +#include "raft_log.h" +#include "raft_configuration.h" +#include "raft_message.h" + +int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { + RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries); + + int peerIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); + + if (peerIndex < 0) { + return 0; + } + + SSyncMessage* pRespMsg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term); + if (pRespMsg == NULL) { + return 0; + } + + RaftMsg_Append_Entries *appendResp = &(pMsg->appendResp); + // ignore committed logs + if (syncRaftLogIsCommitted(pRaft->log, appendEntries->index)) { + appendResp->index = pRaft->log->commitIndex; + goto out; + } + + syncInfo("[%d:%d] recv append from %d index %" PRId64"", + pRaft->selfGroupId, pRaft->selfId, pMsg->from, appendEntries->index); + +out: + pRaft->io.send(pRespMsg, &(pRaft->cluster.nodeInfo[peerIndex])); + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_vote_message.c b/source/libs/sync/src/raft_handle_vote_message.c index 87ef468d57..2fab8ad5a9 100644 --- a/source/libs/sync/src/raft_handle_vote_message.c +++ b/source/libs/sync/src/raft_handle_vote_message.c @@ -23,7 +23,7 @@ static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { SSyncMessage* pRespMsg; - int voteIndex = syncRaftConfigurationIndexOfVoter(pRaft, pMsg->from); + int voteIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); if (voteIndex == -1) { return 0; } diff --git a/source/libs/sync/src/raft_handle_vote_resp_message.c b/source/libs/sync/src/raft_handle_vote_resp_message.c index 6e88b03b5a..05464256af 100644 --- a/source/libs/sync/src/raft_handle_vote_resp_message.c +++ b/source/libs/sync/src/raft_handle_vote_resp_message.c @@ -23,7 +23,7 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { int quorum; int voterIndex; - voterIndex = syncRaftConfigurationIndexOfVoter(pRaft, pMsg->from); + voterIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); if (voterIndex == -1) { syncError("[%d:%d] recv vote resp from unknown server %d", pRaft->selfGroupId, pRaft->selfId, pMsg->from); return 0; diff --git a/source/libs/sync/src/raft_log.c b/source/libs/sync/src/raft_log.c index ee51fcbef3..a26650cbb7 100644 --- a/source/libs/sync/src/raft_log.c +++ b/source/libs/sync/src/raft_log.c @@ -23,6 +23,10 @@ SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog) { return 0; } +SyncIndex syncRaftLogSnapshotIndex(SSyncRaftLog* pLog) { + return 0; +} + SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog) { return 0; } @@ -46,4 +50,9 @@ SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index) { int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, SSyncRaftEntry **ppEntries, int *n) { return 0; +} + +void syncRaftLogRelease(SSyncRaftLog* pLog, SyncIndex index, + SSyncRaftEntry *pEntries, int n) { + return; } \ No newline at end of file diff --git a/source/libs/sync/src/raft_replication.c b/source/libs/sync/src/raft_replication.c index 02d9804f7e..b6ff1fb329 100644 --- a/source/libs/sync/src/raft_replication.c +++ b/source/libs/sync/src/raft_replication.c @@ -33,13 +33,62 @@ int syncRaftReplicate(SSyncRaft* pRaft, int i) { } SyncIndex nextIndex = syncRaftProgressNextIndex(progress); - SyncIndex prevIndex = nextIndex - 1; - SyncTerm prevTerm = syncRaftLogTermOf(pRaft->log, prevIndex); + SyncIndex snapshotIndex = syncRaftLogSnapshotIndex(pRaft->log); + bool inSnapshot = syncRaftProgressInSnapshot(progress); + SyncIndex prevIndex; + SyncTerm prevTerm; - if (prevTerm == SYNC_NON_TERM && !syncRaftProgressInSnapshot(progress)) { - goto send_snapshot; + /** + * From Section 3.5: + * + * When sending an AppendEntries RPC, the leader includes the index and + * term of the entry in its log that immediately precedes the new + * entries. If the follower does not find an entry in its log with the + * same index and term, then it refuses the new entries. The consistency + * check acts as an induction step: the initial empty state of the logs + * satisfies the Log Matching Property, and the consistency check + * preserves the Log Matching Property whenever logs are extended. As a + * result, whenever AppendEntries returns successfully, the leader knows + * that the follower's log is identical to its own log up through the new + * entries (Log Matching Property in Figure 3.2). + **/ + if (nextIndex == 1) { + /** + * We're including the very first entry, so prevIndex and prevTerm are + * null. If the first entry is not available anymore, send the last + * snapshot if we're not already sending one. + **/ + if (snapshotIndex > 0 && !inSnapshot) { + goto send_snapshot; + } + + // otherwise send append entries from start + prevIndex = 0; + prevTerm = 0; + } else { + /** + * Set prevIndex and prevTerm to the index and term of the entry at + * nextIndex - 1. + **/ + prevIndex = nextIndex - 1; + prevTerm = syncRaftLogTermOf(pRaft->log, prevIndex); + /** + * If the entry is not anymore in our log, send the last snapshot if we're + * not doing so already. + **/ + if (prevTerm == SYNC_NON_TERM && !inSnapshot) { + goto send_snapshot; + } } + /* Send empty AppendEntries RPC when installing a snaphot */ + if (inSnapshot) { + prevIndex = syncRaftLogLastIndex(pRaft->log); + prevTerm = syncRaftLogLastTerm(pRaft->log); + } + + return sendAppendEntries(pRaft, i, prevIndex, prevTerm); + send_snapshot: if (syncRaftProgressRecentActive(progress)) { /* Only send a snapshot when we have heard from the server */ @@ -69,7 +118,7 @@ static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex prevIndex, SyncT nEntry, entries); if (msg == NULL) { - return 0; + goto err_release_log; } pRaft->io.send(msg, pNode); @@ -87,4 +136,8 @@ static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex prevIndex, SyncT syncRaftProgressUpdateSendTick(progress, pRaft->currentTick); return 0; + +err_release_log: + syncRaftLogRelease(pRaft->log, nextIndex, entries, nEntry); + return 0; } \ No newline at end of file From 9edc17e575b2bb1493650a52c37af099289deabe Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Fri, 5 Nov 2021 17:17:21 +0800 Subject: [PATCH 43/94] fix tqHandleDel bug --- source/dnode/vnode/tq/src/tq.c | 3 +++ source/dnode/vnode/tq/src/tqMetaStore.c | 22 +++++++++++----------- source/dnode/vnode/tq/test/tqMetaTest.cpp | 7 +++++++ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/tq/src/tq.c b/source/dnode/vnode/tq/src/tq.c index 1aa8f231c3..cf98e3e1a4 100644 --- a/source/dnode/vnode/tq/src/tq.c +++ b/source/dnode/vnode/tq/src/tq.c @@ -22,6 +22,9 @@ // //handle management message // + +int tqGetgHandleSSize(const TqGroupHandle *gHandle); + static int tqProtoCheck(TmqMsgHead *pMsg) { return pMsg->protoVer == 0; } diff --git a/source/dnode/vnode/tq/src/tqMetaStore.c b/source/dnode/vnode/tq/src/tqMetaStore.c index a4c2b90491..eb2c3404fc 100644 --- a/source/dnode/vnode/tq/src/tqMetaStore.c +++ b/source/dnode/vnode/tq/src/tqMetaStore.c @@ -27,15 +27,14 @@ static int32_t tqHandlePutCommitted(TqMetaStore*, int64_t key, void* value); static void* tqHandleGetUncommitted(TqMetaStore*, int64_t key); static inline void tqLinkUnpersist(TqMetaStore *pMeta, TqMetaList* pNode) { - if(pNode->unpersistNext == NULL) { - pNode->unpersistNext = pMeta->unpersistHead->unpersistNext; - pNode->unpersistPrev = pMeta->unpersistHead; - pMeta->unpersistHead->unpersistNext->unpersistPrev = pNode; - pMeta->unpersistHead->unpersistNext = pNode; - } + if(pNode->unpersistNext == NULL) { + pNode->unpersistNext = pMeta->unpersistHead->unpersistNext; + pNode->unpersistPrev = pMeta->unpersistHead; + pMeta->unpersistHead->unpersistNext->unpersistPrev = pNode; + pMeta->unpersistHead->unpersistNext = pNode; + } } - typedef struct TqMetaPageBuf { int16_t offset; char buffer[TQ_PAGE_SIZE]; @@ -401,7 +400,7 @@ void* tqHandleGet(TqMetaStore* pMeta, int64_t key) { TqMetaList* pNode = pMeta->bucket[bucketKey]; while(pNode) { if(pNode->handle.key == key) { - if(pNode->handle.valueInUse != NULL) { + if(pNode->handle.valueInUse != NULL && pNode->handle.valueInUse != TQ_DELETE_TOKEN) { return pNode->handle.valueInUse; } else { return NULL; @@ -546,9 +545,10 @@ int32_t tqHandleDel(TqMetaStore* pMeta, int64_t key) { int64_t bucketKey = key & TQ_BUCKET_SIZE; TqMetaList* pNode = pMeta->bucket[bucketKey]; while(pNode) { - if(pNode->handle.valueInTxn - && pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { - pMeta->deleter(pNode->handle.valueInTxn); + if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { + if(pNode->handle.valueInTxn) { + pMeta->deleter(pNode->handle.valueInTxn); + } pNode->handle.valueInTxn = TQ_DELETE_TOKEN; tqLinkUnpersist(pMeta, pNode); return 0; diff --git a/source/dnode/vnode/tq/test/tqMetaTest.cpp b/source/dnode/vnode/tq/test/tqMetaTest.cpp index 20a0368c4c..a1021233db 100644 --- a/source/dnode/vnode/tq/test/tqMetaTest.cpp +++ b/source/dnode/vnode/tq/test/tqMetaTest.cpp @@ -130,4 +130,11 @@ TEST_F(TqMetaTest, deleteTest) { tqHandleCommit(pMeta, 1); pFoo = (Foo*) tqHandleGet(pMeta, 1); EXPECT_EQ(pFoo == NULL, true); + + tqStoreClose(pMeta); + pMeta = tqStoreOpen(pathName, + FooSerializer, FooDeserializer, FooDeleter); + ASSERT(pMeta); + pFoo = (Foo*) tqHandleGet(pMeta, 1); + EXPECT_EQ(pFoo == NULL, true); } From 5ebc77961d36c786d2cccafcea607fd396852a7a Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Fri, 5 Nov 2021 17:48:58 +0800 Subject: [PATCH 44/94] fix tq invalid free --- source/dnode/vnode/tq/inc/tqMetaStore.h | 2 ++ source/dnode/vnode/tq/src/tqMetaStore.c | 11 +++++-- source/dnode/vnode/tq/test/tqMetaTest.cpp | 40 +++++++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/tq/inc/tqMetaStore.h b/source/dnode/vnode/tq/inc/tqMetaStore.h index 52cc767409..3d1473a443 100644 --- a/source/dnode/vnode/tq/inc/tqMetaStore.h +++ b/source/dnode/vnode/tq/inc/tqMetaStore.h @@ -92,6 +92,8 @@ int32_t tqStoreClose(TqMetaStore*); //int32_t tqStoreDelete(TqMetaStore*); //int32_t TqStoreCommitAll(TqMetaStore*); int32_t tqStorePersist(TqMetaStore*); +//clean deleted idx and data from persistent file +int32_t tqStoreCompact(TqMetaStore*); void* tqHandleGet(TqMetaStore*, int64_t key); int32_t tqHandleMovePut(TqMetaStore*, int64_t key, void* value); diff --git a/source/dnode/vnode/tq/src/tqMetaStore.c b/source/dnode/vnode/tq/src/tqMetaStore.c index eb2c3404fc..079aae7435 100644 --- a/source/dnode/vnode/tq/src/tqMetaStore.c +++ b/source/dnode/vnode/tq/src/tqMetaStore.c @@ -153,9 +153,9 @@ TqMetaStore* tqStoreOpen(const char* path, } else { pNode->handle.valueInUse = TQ_DELETE_TOKEN; } - serializedObj = POINTER_SHIFT(serializedObj, serializedObj->ssize); - if(serializedObj->ssize != sizeof(TqSerializedHead)) { - pMeta->deserializer(serializedObj, &pNode->handle.valueInTxn); + TqSerializedHead* ptr = POINTER_SHIFT(serializedObj, serializedObj->ssize); + if(ptr->ssize != sizeof(TqSerializedHead)) { + pMeta->deserializer(ptr, &pNode->handle.valueInTxn); } else { pNode->handle.valueInTxn = TQ_DELETE_TOKEN; } @@ -591,3 +591,8 @@ int32_t tqHandleClear(TqMetaStore* pMeta, int64_t key) { } return -2; } + +//TODO: clean deleted idx and data from persistent file +int32_t tqStoreCompact(TqMetaStore *pMeta) { + return 0; +} diff --git a/source/dnode/vnode/tq/test/tqMetaTest.cpp b/source/dnode/vnode/tq/test/tqMetaTest.cpp index a1021233db..4bf56a0a56 100644 --- a/source/dnode/vnode/tq/test/tqMetaTest.cpp +++ b/source/dnode/vnode/tq/test/tqMetaTest.cpp @@ -57,6 +57,10 @@ TEST_F(TqMetaTest, copyPutTest) { Foo* pFoo = (Foo*) tqHandleGet(pMeta, 1); EXPECT_EQ(pFoo == NULL, true); + + tqHandleCommit(pMeta, 1); + pFoo = (Foo*) tqHandleGet(pMeta, 1); + EXPECT_EQ(pFoo->a, 3); } TEST_F(TqMetaTest, persistTest) { @@ -135,6 +139,42 @@ TEST_F(TqMetaTest, deleteTest) { pMeta = tqStoreOpen(pathName, FooSerializer, FooDeserializer, FooDeleter); ASSERT(pMeta); + pFoo = (Foo*) tqHandleGet(pMeta, 1); EXPECT_EQ(pFoo == NULL, true); } + +TEST_F(TqMetaTest, intxnPersist) { + Foo* pFoo = (Foo*)malloc(sizeof(Foo)); + pFoo->a = 3; + tqHandleMovePut(pMeta, 1, pFoo); + tqHandleCommit(pMeta, 1); + + Foo* pBar = (Foo*)malloc(sizeof(Foo)); + pBar->a = 4; + tqHandleMovePut(pMeta, 1, pBar); + + Foo* pFoo1 = (Foo*)tqHandleGet(pMeta, 1); + EXPECT_EQ(pFoo1->a, 3); + + tqStoreClose(pMeta); + pMeta = tqStoreOpen(pathName, + FooSerializer, FooDeserializer, FooDeleter); + ASSERT(pMeta); + + pFoo1 = (Foo*)tqHandleGet(pMeta, 1); + EXPECT_EQ(pFoo1->a, 3); + + tqHandleCommit(pMeta, 1); + + pFoo1 = (Foo*)tqHandleGet(pMeta, 1); + EXPECT_EQ(pFoo1->a, 4); + + tqStoreClose(pMeta); + pMeta = tqStoreOpen(pathName, + FooSerializer, FooDeserializer, FooDeleter); + ASSERT(pMeta); + + pFoo1 = (Foo*)tqHandleGet(pMeta, 1); + EXPECT_EQ(pFoo1->a, 4); +} From 1f1f6c5af607724e10b7ee67fde1d8d465d9f628 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Fri, 5 Nov 2021 20:18:22 +0800 Subject: [PATCH 45/94] remove tqHandleClear interface --- source/dnode/vnode/tq/inc/tqMetaStore.h | 8 ++---- source/dnode/vnode/tq/src/tqMetaStore.c | 35 ++----------------------- 2 files changed, 4 insertions(+), 39 deletions(-) diff --git a/source/dnode/vnode/tq/inc/tqMetaStore.h b/source/dnode/vnode/tq/inc/tqMetaStore.h index 3d1473a443..73a3d26aeb 100644 --- a/source/dnode/vnode/tq/inc/tqMetaStore.h +++ b/source/dnode/vnode/tq/inc/tqMetaStore.h @@ -98,15 +98,11 @@ int32_t tqStoreCompact(TqMetaStore*); void* tqHandleGet(TqMetaStore*, int64_t key); int32_t tqHandleMovePut(TqMetaStore*, int64_t key, void* value); int32_t tqHandleCopyPut(TqMetaStore*, int64_t key, void* value, size_t vsize); -//do commit -int32_t tqHandleCommit(TqMetaStore*, int64_t key); -//delete uncommitted -int32_t tqHandleAbort(TqMetaStore*, int64_t key); //delete committed kv pair //notice that a delete action still needs to be committed int32_t tqHandleDel(TqMetaStore*, int64_t key); -//delete both committed and uncommitted -int32_t tqHandleClear(TqMetaStore*, int64_t key); +int32_t tqHandleCommit(TqMetaStore*, int64_t key); +int32_t tqHandleAbort(TqMetaStore*, int64_t key); #ifdef __cplusplus } diff --git a/source/dnode/vnode/tq/src/tqMetaStore.c b/source/dnode/vnode/tq/src/tqMetaStore.c index 079aae7435..9b91a8e051 100644 --- a/source/dnode/vnode/tq/src/tqMetaStore.c +++ b/source/dnode/vnode/tq/src/tqMetaStore.c @@ -400,7 +400,8 @@ void* tqHandleGet(TqMetaStore* pMeta, int64_t key) { TqMetaList* pNode = pMeta->bucket[bucketKey]; while(pNode) { if(pNode->handle.key == key) { - if(pNode->handle.valueInUse != NULL && pNode->handle.valueInUse != TQ_DELETE_TOKEN) { + if(pNode->handle.valueInUse != NULL + && pNode->handle.valueInUse != TQ_DELETE_TOKEN) { return pNode->handle.valueInUse; } else { return NULL; @@ -560,38 +561,6 @@ int32_t tqHandleDel(TqMetaStore* pMeta, int64_t key) { return -1; } -int32_t tqHandleClear(TqMetaStore* pMeta, int64_t key) { - int64_t bucketKey = key & TQ_BUCKET_SIZE; - TqMetaList* pNode = pMeta->bucket[bucketKey]; - bool exist = false; - while(pNode) { - if(pNode->handle.key == key) { - if(pNode->handle.valueInUse != NULL) { - exist = true; - if(pNode->handle.valueInUse != TQ_DELETE_TOKEN) { - pMeta->deleter(pNode->handle.valueInUse); - } - pNode->handle.valueInUse = TQ_DELETE_TOKEN; - } - if(pNode->handle.valueInTxn != NULL) { - exist = true; - if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { - pMeta->deleter(pNode->handle.valueInTxn); - } - pNode->handle.valueInTxn = TQ_DELETE_TOKEN; - } - if(exist) { - tqLinkUnpersist(pMeta, pNode); - return 0; - } - return -1; - } else { - pNode = pNode->next; - } - } - return -2; -} - //TODO: clean deleted idx and data from persistent file int32_t tqStoreCompact(TqMetaStore *pMeta) { return 0; From 993d28df0f6be539e60337862d13af04f253d47d Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sun, 7 Nov 2021 15:58:32 +0800 Subject: [PATCH 46/94] save --- include/server/vnode/impl/vnodeImpl.h | 39 ++++++++++++++++++++++ include/server/vnode/vnode.h | 26 +++++++++++---- source/dnode/vnode/impl/CMakeLists.txt | 1 + source/dnode/vnode/impl/inc/vnodeDef.h | 32 ++++++++++++++++++ source/dnode/vnode/impl/inc/vnodeInt.h | 10 ------ source/dnode/vnode/impl/inc/vnodeRead.h | 2 ++ source/dnode/vnode/impl/inc/vnodeWrite.h | 5 ++- source/dnode/vnode/impl/src/vnodeInt.c | 3 ++ source/dnode/vnode/impl/src/vnodeMain.c | 27 +++++++++++++++ source/dnode/vnode/impl/src/vnodeOptions.c | 20 +++++++++++ 10 files changed, 148 insertions(+), 17 deletions(-) create mode 100644 include/server/vnode/impl/vnodeImpl.h create mode 100644 source/dnode/vnode/impl/inc/vnodeDef.h create mode 100644 source/dnode/vnode/impl/src/vnodeMain.c create mode 100644 source/dnode/vnode/impl/src/vnodeOptions.c diff --git a/include/server/vnode/impl/vnodeImpl.h b/include/server/vnode/impl/vnodeImpl.h new file mode 100644 index 0000000000..580b770246 --- /dev/null +++ b/include/server/vnode/impl/vnodeImpl.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_IMPL_H_ +#define _TD_VNODE_IMPL_H_ + +#include "os.h" + +#include "meta.h" +#include "tq.h" +#include "tsdb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct SVnodeOptions { + STsdbOptions tsdbOptions; + SMetaOptions metaOptions; + // STqOptions tqOptions; // TODO +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_IMPL_H_*/ \ No newline at end of file diff --git a/include/server/vnode/vnode.h b/include/server/vnode/vnode.h index e570cf4261..501c934730 100644 --- a/include/server/vnode/vnode.h +++ b/include/server/vnode/vnode.h @@ -16,15 +16,29 @@ #ifndef _TD_VNODE_H_ #define _TD_VNODE_H_ -#include "os.h" -#include "taosmsg.h" -#include "trpc.h" +#include "impl/vnodeImpl.h" #ifdef __cplusplus extern "C" { #endif -typedef struct SVnode SVnode; +/* ------------------------ TYPES EXPOSED ------------------------ */ +typedef struct SVnode SVnode; +typedef struct SVnodeOptions SVnodeOptions; + +/* ------------------------ SVnode ------------------------ */ +SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions); +void vnodeClose(SVnode *pVnode); +void vnodeDestroy(const char *path); + +/* ------------------------ SVnodeOptions ------------------------ */ +void vnodeOptionsInit(SVnodeOptions *); +void vnodeOptionsClear(SVnodeOptions *); + +#if 1 + +#include "taosMsg.h" +#include "trpc.h" typedef struct { char db[TSDB_FULL_DB_NAME_LEN]; @@ -70,8 +84,6 @@ typedef struct { int32_t vnodeInit(SVnodePara); void vnodeCleanup(); -SVnode *vnodeOpen(int32_t vgId, const char *path); -void vnodeClose(SVnode *pVnode); int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg); SVnode *vnodeCreate(int32_t vgId, const char *path, const SVnodeCfg *pCfg); void vnodeDrop(SVnode *pVnode); @@ -85,6 +97,8 @@ int32_t vnodeAppendMsg(SVnodeMsg *pMsg, SRpcMsg *pRpcMsg); void vnodeCleanupMsg(SVnodeMsg *pMsg); void vnodeProcessMsg(SVnode *pVnode, SVnodeMsg *pMsg, EVnMsgType msgType); +#endif + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/impl/CMakeLists.txt b/source/dnode/vnode/impl/CMakeLists.txt index 81744df79c..fa26b18d0b 100644 --- a/source/dnode/vnode/impl/CMakeLists.txt +++ b/source/dnode/vnode/impl/CMakeLists.txt @@ -7,6 +7,7 @@ target_include_directories( ) target_link_libraries( vnode + PUBLIC os PUBLIC transport PUBLIC meta PUBLIC tq diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h new file mode 100644 index 0000000000..6a327ceefa --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_DEF_H_ +#define _TD_VNODE_DEF_H_ + +#include "vnode.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct SVnode { +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_DEF_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/inc/vnodeInt.h b/source/dnode/vnode/impl/inc/vnodeInt.h index d7b84be83d..957690a451 100644 --- a/source/dnode/vnode/impl/inc/vnodeInt.h +++ b/source/dnode/vnode/impl/inc/vnodeInt.h @@ -39,16 +39,6 @@ extern int32_t vDebugFlag; #define vDebug(...) { if (vDebugFlag & DEBUG_DEBUG) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} #define vTrace(...) { if (vDebugFlag & DEBUG_TRACE) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} -typedef struct SVnode { - int32_t vgId; - SVnodeCfg cfg; - SMeta *pMeta; - STsdb *pTsdb; - STQ *pTQ; - SWal *pWal; - SSyncNode *pSync; -} SVnode; - #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/impl/inc/vnodeRead.h b/source/dnode/vnode/impl/inc/vnodeRead.h index 5ce84b2ebf..6f5028d00e 100644 --- a/source/dnode/vnode/impl/inc/vnodeRead.h +++ b/source/dnode/vnode/impl/inc/vnodeRead.h @@ -21,6 +21,8 @@ extern "C" { #endif #include "vnodeInt.h" +typedef void SVnodeMsg; + void vnodeProcessReadMsg(SVnode *pVnode, SVnodeMsg *pMsg); #ifdef __cplusplus diff --git a/source/dnode/vnode/impl/inc/vnodeWrite.h b/source/dnode/vnode/impl/inc/vnodeWrite.h index 11fe9836f6..6ee773f79b 100644 --- a/source/dnode/vnode/impl/inc/vnodeWrite.h +++ b/source/dnode/vnode/impl/inc/vnodeWrite.h @@ -16,10 +16,13 @@ #ifndef _TD_VNODE_WRITE_H_ #define _TD_VNODE_WRITE_H_ +#include "vnode.h" + #ifdef __cplusplus extern "C" { #endif -#include "vnodeInt.h" + +typedef void SVnodeMsg; void vnodeProcessWriteMsg(SVnode* pVnode, SVnodeMsg* pMsg); diff --git a/source/dnode/vnode/impl/src/vnodeInt.c b/source/dnode/vnode/impl/src/vnodeInt.c index 427a6dae4d..aded02ab04 100644 --- a/source/dnode/vnode/impl/src/vnodeInt.c +++ b/source/dnode/vnode/impl/src/vnodeInt.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ +#if 0 #define _DEFAULT_SOURCE #include "vnodeInt.h" #include "tqueue.h" @@ -70,3 +71,5 @@ void vnodeProcessMsg(SVnode *pVnode, SVnodeMsg *pMsg, EVnMsgType msgType) { break; } } + +#endif \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c new file mode 100644 index 0000000000..f074dd9876 --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnodeDef.h" + +SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions) { + SVnode *pVnode = NULL; + /* TODO */ + return pVnode; +} + +void vnodeCloee(SVnode *pVnode) { /* TODO */ +} + +void vnodeDestroy(const char *path) { taosRemoveDir(path); } \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeOptions.c b/source/dnode/vnode/impl/src/vnodeOptions.c new file mode 100644 index 0000000000..a384eb9e53 --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeOptions.c @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnodeDef.h" + +void vnodeOptionsInit(SVnodeOptions *pVnodeOptions) { /* TODO */ } + +void vnodeOptionsClear(SVnodeOptions *pVnodeOptions) { /* TODO */ } \ No newline at end of file From 113bfd51ba40618179f196fda090854c0c2ef3ea Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 09:39:20 +0800 Subject: [PATCH 47/94] make compile --- include/server/vnode/vnode.h | 2 +- source/dnode/vnode/impl/inc/vnodeRead.h | 2 -- source/dnode/vnode/impl/inc/vnodeWrite.h | 2 -- source/dnode/vnode/impl/src/vnodeInt.c | 6 +----- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/include/server/vnode/vnode.h b/include/server/vnode/vnode.h index 71e28f72de..66e17bdf48 100644 --- a/include/server/vnode/vnode.h +++ b/include/server/vnode/vnode.h @@ -37,7 +37,7 @@ void vnodeOptionsClear(SVnodeOptions *); #if 1 -#include "taosMsg.h" +#include "taosmsg.h" #include "trpc.h" typedef struct { diff --git a/source/dnode/vnode/impl/inc/vnodeRead.h b/source/dnode/vnode/impl/inc/vnodeRead.h index 6f5028d00e..5ce84b2ebf 100644 --- a/source/dnode/vnode/impl/inc/vnodeRead.h +++ b/source/dnode/vnode/impl/inc/vnodeRead.h @@ -21,8 +21,6 @@ extern "C" { #endif #include "vnodeInt.h" -typedef void SVnodeMsg; - void vnodeProcessReadMsg(SVnode *pVnode, SVnodeMsg *pMsg); #ifdef __cplusplus diff --git a/source/dnode/vnode/impl/inc/vnodeWrite.h b/source/dnode/vnode/impl/inc/vnodeWrite.h index 6ee773f79b..57f19b11c4 100644 --- a/source/dnode/vnode/impl/inc/vnodeWrite.h +++ b/source/dnode/vnode/impl/inc/vnodeWrite.h @@ -22,8 +22,6 @@ extern "C" { #endif -typedef void SVnodeMsg; - void vnodeProcessWriteMsg(SVnode* pVnode, SVnodeMsg* pMsg); #ifdef __cplusplus diff --git a/source/dnode/vnode/impl/src/vnodeInt.c b/source/dnode/vnode/impl/src/vnodeInt.c index aded02ab04..5f40fa5f73 100644 --- a/source/dnode/vnode/impl/src/vnodeInt.c +++ b/source/dnode/vnode/impl/src/vnodeInt.c @@ -13,7 +13,6 @@ * along with this program. If not, see . */ -#if 0 #define _DEFAULT_SOURCE #include "vnodeInt.h" #include "tqueue.h" @@ -21,7 +20,6 @@ int32_t vnodeInit(SVnodePara para) { return 0; } void vnodeCleanup() {} -SVnode *vnodeOpen(int32_t vgId, const char *path) { return NULL; } void vnodeClose(SVnode *pVnode) {} int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg) { return 0; } SVnode *vnodeCreate(int32_t vgId, const char *path, const SVnodeCfg *pCfg) { return NULL; } @@ -70,6 +68,4 @@ void vnodeProcessMsg(SVnode *pVnode, SVnodeMsg *pMsg, EVnMsgType msgType) { case VN_MSG_TYPE_FETCH: break; } -} - -#endif \ No newline at end of file +} \ No newline at end of file From d78480998f278b7445769bcbbabdf9146dee9790 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 10:04:24 +0800 Subject: [PATCH 48/94] more --- source/dnode/mgmt/src/dnodeVnodes.c | 2 +- source/dnode/vnode/impl/inc/vnodeDef.h | 6 ++++ source/dnode/vnode/impl/inc/vnodeOptions.h | 33 ++++++++++++++++++ source/dnode/vnode/impl/src/vnodeInt.c | 1 - source/dnode/vnode/impl/src/vnodeMain.c | 40 ++++++++++++++++++++-- source/dnode/vnode/impl/src/vnodeOptions.c | 13 +++++-- 6 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 source/dnode/vnode/impl/inc/vnodeOptions.h diff --git a/source/dnode/mgmt/src/dnodeVnodes.c b/source/dnode/mgmt/src/dnodeVnodes.c index c23773f92f..4ec9e1dc60 100644 --- a/source/dnode/mgmt/src/dnodeVnodes.c +++ b/source/dnode/mgmt/src/dnodeVnodes.c @@ -376,7 +376,7 @@ static void *dnodeOpenVnodeFunc(void *param) { char path[PATH_MAX + 20] = {0}; snprintf(path, sizeof(path),"%s/vnode%d", tsVnodeDir, pVnode->vgId); - SVnode *pImpl = vnodeOpen(pVnode->vgId, path); + SVnode *pImpl = vnodeOpen(path, NULL); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode by thread:%d", pVnode->vgId, pThread->threadIndex); pThread->failed++; diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index 6a327ceefa..a42cd93b01 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -17,12 +17,18 @@ #define _TD_VNODE_DEF_H_ #include "vnode.h" +#include "vnodeOptions.h" #ifdef __cplusplus extern "C" { #endif struct SVnode { + char* path; + SVnodeOptions options; + SMeta* pMeta; + STsdb* pTsdb; + STQ* pTq; }; #ifdef __cplusplus diff --git a/source/dnode/vnode/impl/inc/vnodeOptions.h b/source/dnode/vnode/impl/inc/vnodeOptions.h new file mode 100644 index 0000000000..4b31de1966 --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeOptions.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_OPTIONS_H_ +#define _TD_VNODE_OPTIONS_H_ + +#include "vnode.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const SVnodeOptions defaultVnodeOptions; + +int vnodeValidateOptions(const SVnodeOptions*); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_OPTIONS_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeInt.c b/source/dnode/vnode/impl/src/vnodeInt.c index 5f40fa5f73..2cbdf318a2 100644 --- a/source/dnode/vnode/impl/src/vnodeInt.c +++ b/source/dnode/vnode/impl/src/vnodeInt.c @@ -20,7 +20,6 @@ int32_t vnodeInit(SVnodePara para) { return 0; } void vnodeCleanup() {} -void vnodeClose(SVnode *pVnode) {} int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg) { return 0; } SVnode *vnodeCreate(int32_t vgId, const char *path, const SVnodeCfg *pCfg) { return NULL; } void vnodeDrop(SVnode *pVnode) {} diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index f074dd9876..d531b44f15 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -15,13 +15,47 @@ #include "vnodeDef.h" +static SVnode *vnodeNew(const char *path, const SVnodeOptions *pVnodeOptions); +static void vnodeFree(SVnode *pVnode); + SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions) { SVnode *pVnode = NULL; - /* TODO */ + + // Set default options + if (pVnodeOptions == NULL) { + pVnodeOptions = &defaultVnodeOptions; + } + + // Validate options + if (vnodeValidateOptions(pVnodeOptions) < 0) { + // TODO + return NULL; + } + + pVnode = vnodeNew(path, pVnodeOptions); + if (pVnode == NULL) { + // TODO: handle error + return NULL; + } + + taosMkDir(path); + return pVnode; } -void vnodeCloee(SVnode *pVnode) { /* TODO */ +void vnodeClose(SVnode *pVnode) { /* TODO */ } -void vnodeDestroy(const char *path) { taosRemoveDir(path); } \ No newline at end of file +void vnodeDestroy(const char *path) { taosRemoveDir(path); } + +/* ------------------------ STATIC METHODS ------------------------ */ +static SVnode *vnodeNew(const char *path, const SVnodeOptions *pVnodeOptions) { + // TODO + return NULL; +} + +static void vnodeFree(SVnode *pVnode) { + if (pVnode) { + // TODO + } +} \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeOptions.c b/source/dnode/vnode/impl/src/vnodeOptions.c index a384eb9e53..ebf1dc35ba 100644 --- a/source/dnode/vnode/impl/src/vnodeOptions.c +++ b/source/dnode/vnode/impl/src/vnodeOptions.c @@ -15,6 +15,15 @@ #include "vnodeDef.h" -void vnodeOptionsInit(SVnodeOptions *pVnodeOptions) { /* TODO */ } +const SVnodeOptions defaultVnodeOptions = {0}; /* TODO */ -void vnodeOptionsClear(SVnodeOptions *pVnodeOptions) { /* TODO */ } \ No newline at end of file +void vnodeOptionsInit(SVnodeOptions *pVnodeOptions) { /* TODO */ +} + +void vnodeOptionsClear(SVnodeOptions *pVnodeOptions) { /* TODO */ +} + +int vnodeValidateOptions(const SVnodeOptions *pVnodeOptions) { + // TODO + return 0; +} \ No newline at end of file From d5af05860e3bde6f45ff42785494c27a6d1db139 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 10:17:51 +0800 Subject: [PATCH 49/94] refact --- source/dnode/vnode/impl/inc/vnodeOptions.h | 3 +- source/dnode/vnode/impl/src/vnodeMain.c | 39 ++++++++++++++++++++-- source/dnode/vnode/impl/src/vnodeOptions.c | 4 +++ 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/impl/inc/vnodeOptions.h b/source/dnode/vnode/impl/inc/vnodeOptions.h index 4b31de1966..edb4be2a77 100644 --- a/source/dnode/vnode/impl/inc/vnodeOptions.h +++ b/source/dnode/vnode/impl/inc/vnodeOptions.h @@ -24,7 +24,8 @@ extern "C" { extern const SVnodeOptions defaultVnodeOptions; -int vnodeValidateOptions(const SVnodeOptions*); +int vnodeValidateOptions(const SVnodeOptions *); +void vnodeOptionsCopy(SVnodeOptions *pDest, const SVnodeOptions *pSrc); #ifdef __cplusplus } diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index d531b44f15..cc9a1d76bc 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -17,6 +17,8 @@ static SVnode *vnodeNew(const char *path, const SVnodeOptions *pVnodeOptions); static void vnodeFree(SVnode *pVnode); +static int vnodeOpenImpl(SVnode *pVnode); +static void vnodeCloseImpl(SVnode *pVnode); SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions) { SVnode *pVnode = NULL; @@ -32,6 +34,7 @@ SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions) { return NULL; } + // Create the handle pVnode = vnodeNew(path, pVnodeOptions); if (pVnode == NULL) { // TODO: handle error @@ -40,22 +43,52 @@ SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions) { taosMkDir(path); + // Open the vnode + if (vnodeOpenImpl(pVnode) < 0) { + // TODO: handle error + return NULL; + } + return pVnode; } -void vnodeClose(SVnode *pVnode) { /* TODO */ +void vnodeClose(SVnode *pVnode) { + if (pVnode) { + vnodeCloseImpl(pVnode); + vnodeFree(pVnode); + } } void vnodeDestroy(const char *path) { taosRemoveDir(path); } /* ------------------------ STATIC METHODS ------------------------ */ static SVnode *vnodeNew(const char *path, const SVnodeOptions *pVnodeOptions) { - // TODO + SVnode *pVnode = NULL; + + pVnode = (SVnode *)calloc(1, sizeof(*pVnode)); + if (pVnode == NULL) { + // TODO + return NULL; + } + + pVnode->path = strdup(path); + vnodeOptionsCopy(&(pVnode->options), pVnodeOptions); + return NULL; } static void vnodeFree(SVnode *pVnode) { if (pVnode) { - // TODO + tfree(pVnode->path); + free(pVnode); } +} + +static int vnodeOpenImpl(SVnode *pVnode) { + // TODO + return 0; +} + +static void vnodeCloseImpl(SVnode *pVnode) { + // TODO } \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeOptions.c b/source/dnode/vnode/impl/src/vnodeOptions.c index ebf1dc35ba..d5f986d439 100644 --- a/source/dnode/vnode/impl/src/vnodeOptions.c +++ b/source/dnode/vnode/impl/src/vnodeOptions.c @@ -26,4 +26,8 @@ void vnodeOptionsClear(SVnodeOptions *pVnodeOptions) { /* TODO */ int vnodeValidateOptions(const SVnodeOptions *pVnodeOptions) { // TODO return 0; +} + +void vnodeOptionsCopy(SVnodeOptions *pDest, const SVnodeOptions *pSrc) { + memcpy((void *)pDest, (void *)pSrc, sizeof(SVnodeOptions)); } \ No newline at end of file From 8b608cd3b71d892bea310603b8e1b6e6c211d60d Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 10:32:36 +0800 Subject: [PATCH 50/94] refact --- include/server/vnode/impl/vnodeImpl.h | 1 + include/server/vnode/vnode.h | 4 ++++ source/dnode/vnode/impl/src/vnodeWrite.c | 13 +++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/server/vnode/impl/vnodeImpl.h b/include/server/vnode/impl/vnodeImpl.h index 580b770246..0e8a47742c 100644 --- a/include/server/vnode/impl/vnodeImpl.h +++ b/include/server/vnode/impl/vnodeImpl.h @@ -17,6 +17,7 @@ #define _TD_VNODE_IMPL_H_ #include "os.h" +#include "trequest.h" #include "meta.h" #include "tq.h" diff --git a/include/server/vnode/vnode.h b/include/server/vnode/vnode.h index 66e17bdf48..227bfe6594 100644 --- a/include/server/vnode/vnode.h +++ b/include/server/vnode/vnode.h @@ -30,6 +30,10 @@ typedef struct SVnodeOptions SVnodeOptions; SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions); void vnodeClose(SVnode *pVnode); void vnodeDestroy(const char *path); +int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch); +int vnodeApplyWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch); +int vnodeProcessReadReq(SVnode *pVnode, SRequest *pReq); +int vnodeProcessSyncReq(SVnode *pVnode, SRequest *pReq); /* ------------------------ SVnodeOptions ------------------------ */ void vnodeOptionsInit(SVnodeOptions *); diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index dc12ffb241..d4acadd695 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -13,5 +13,14 @@ * along with this program. If not, see . */ -#define _DEFAULT_SOURCE -#include "vnodeWrite.h" +#include "vnodeDef.h" + +int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { + /* TODO */ + return 0; +} + +int vnodeApplyWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { + /* TODO */ + return 0; +} \ No newline at end of file From 6538f86e5fb09c510d9ffe1a34dc08d56db76379 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 10:58:46 +0800 Subject: [PATCH 51/94] more --- include/common/trequest.h | 16 ++++++++++++++++ source/dnode/vnode/impl/src/vnodeWrite.c | 24 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/common/trequest.h b/include/common/trequest.h index d9e5bf9a92..6932122bc9 100644 --- a/include/common/trequest.h +++ b/include/common/trequest.h @@ -20,10 +20,26 @@ extern "C" { #endif +/* ------------------------ TYPES EXPOSED ------------------------ */ typedef struct SRequest SRequest; typedef struct SReqBatch SReqBatch; typedef struct SReqBatchIter SReqBatchIter; +// SRequest + +// SReqBatch + +// SReqBatchIter +void tdInitRBIter(SReqBatchIter *pIter, SReqBatch *pReqBatch); +const SRequest *tdRBIterNext(SReqBatchIter *pIter); +void tdClearRBIter(SReqBatchIter *pIter); + +/* ------------------------ TYPES DEFINITION ------------------------ */ +struct SReqBatchIter { + int iReq; + SReqBatch *pReqBatch; +}; + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index d4acadd695..179ff42cd9 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -15,12 +15,36 @@ #include "vnodeDef.h" +static int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest); + int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { /* TODO */ return 0; } int vnodeApplyWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { + SReqBatchIter rbIter; + + tdInitRBIter(&rbIter, pReqBatch); + + for (;;) { + const SRequest *pReq = tdRBIterNext(&rbIter); + if (pReq == NULL) { + break; + } + + if (vnodeApplyWriteRequest(pVnode, pReq) < 0) { + // TODO + } + } + + tdClearRBIter(&rbIter); + + return 0; +} + +/* ------------------------ STATIC METHODS ------------------------ */ +static int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest) { /* TODO */ return 0; } \ No newline at end of file From e26fbd80fda7b069d0338de4976d556242e071ff Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 11:43:39 +0800 Subject: [PATCH 52/94] integrate nuraft --- CMakeLists.txt | 5 +++++ cmake/cmake.options | 6 +++++ cmake/nuraft_CMakeLists.txt.in | 12 ++++++++++ deps/CMakeLists.txt | 6 +++++ include/server/vnode/vnode.h | 2 +- source/dnode/vnode/impl/src/vnodeWrite.c | 28 +++--------------------- 6 files changed, 33 insertions(+), 26 deletions(-) create mode 100644 cmake/nuraft_CMakeLists.txt.in diff --git a/CMakeLists.txt b/CMakeLists.txt index abb39c310a..ce3a4014ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,11 @@ if(${BUILD_WITH_LUCENE}) cat("${CMAKE_SUPPORT_DIR}/lucene_CMakeLists.txt.in" ${DEPS_TMP_FILE}) endif(${BUILD_WITH_LUCENE}) +## NuRaft +if(${BUILD_WITH_NURAFT}) + cat("${CMAKE_SUPPORT_DIR}/nuraft_CMakeLists.txt.in" ${DEPS_TMP_FILE}) +endif(${BUILD_WITH_NURAFT}) + ## download dependencies configure_file(${DEPS_TMP_FILE} "${CMAKE_SOURCE_DIR}/deps/deps-download/CMakeLists.txt") execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . diff --git a/cmake/cmake.options b/cmake/cmake.options index c1964762c7..1d789dd75e 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -25,6 +25,12 @@ option( OFF ) +option( + BUILD_WITH_NURAFT + "If build with NuRaft" + OFF +) + option( BUILD_DEPENDENCY_TESTS "If build dependency tests" diff --git a/cmake/nuraft_CMakeLists.txt.in b/cmake/nuraft_CMakeLists.txt.in new file mode 100644 index 0000000000..ba31480850 --- /dev/null +++ b/cmake/nuraft_CMakeLists.txt.in @@ -0,0 +1,12 @@ + +# NuRaft +ExternalProject_Add(NuRaft + GIT_REPOSITORY https://github.com/eBay/NuRaft.git + GIT_TAG v1.3.0 + SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/nuraft" + BINARY_DIR "${CMAKE_SOURCE_DIR}/deps/nuraft" + CONFIGURE_COMMAND "./prepare.sh" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) \ No newline at end of file diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index e35417b4c5..0dcb7a240c 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -67,6 +67,12 @@ if(${BUILD_WITH_LUCENE}) add_subdirectory(lucene) endif(${BUILD_WITH_LUCENE}) +# NuRaft +if(${BUILD_WITH_NURAFT}) + add_subdirectory(nuraft) +endif(${BUILD_WITH_NURAFT}) + + # ================================================================================================ # DEPENDENCY TEST # ================================================================================================ diff --git a/include/server/vnode/vnode.h b/include/server/vnode/vnode.h index 227bfe6594..2b678751f6 100644 --- a/include/server/vnode/vnode.h +++ b/include/server/vnode/vnode.h @@ -31,7 +31,7 @@ SVnode *vnodeOpen(const char *path, const SVnodeOptions *pVnodeOptions); void vnodeClose(SVnode *pVnode); void vnodeDestroy(const char *path); int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch); -int vnodeApplyWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch); +int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest); int vnodeProcessReadReq(SVnode *pVnode, SRequest *pReq); int vnodeProcessSyncReq(SVnode *pVnode, SRequest *pReq); diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index 179ff42cd9..486ff9c634 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -15,36 +15,14 @@ #include "vnodeDef.h" -static int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest); - int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { /* TODO */ return 0; } -int vnodeApplyWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { - SReqBatchIter rbIter; - - tdInitRBIter(&rbIter, pReqBatch); - - for (;;) { - const SRequest *pReq = tdRBIterNext(&rbIter); - if (pReq == NULL) { - break; - } - - if (vnodeApplyWriteRequest(pVnode, pReq) < 0) { - // TODO - } - } - - tdClearRBIter(&rbIter); - +int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest) { + /* TODO */ return 0; } -/* ------------------------ STATIC METHODS ------------------------ */ -static int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest) { - /* TODO */ - return 0; -} \ No newline at end of file +/* ------------------------ STATIC METHODS ------------------------ */ \ No newline at end of file From 5704d5e2ee63b700dd62b17d4adda8423e1d4bcb Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 13:40:45 +0800 Subject: [PATCH 53/94] refact --- source/dnode/vnode/impl/inc/vnodeRequest.h | 27 ++++++++++++++++++++++ source/dnode/vnode/impl/src/vnodeMain.c | 24 ++++++++++++++++++- source/dnode/vnode/impl/src/vnodeOptions.c | 1 + source/dnode/vnode/impl/src/vnodeRequest.c | 14 +++++++++++ source/dnode/vnode/impl/src/vnodeWrite.c | 1 + source/dnode/vnode/meta/src/metaOptions.c | 6 ++--- 6 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 source/dnode/vnode/impl/inc/vnodeRequest.h create mode 100644 source/dnode/vnode/impl/src/vnodeRequest.c diff --git a/source/dnode/vnode/impl/inc/vnodeRequest.h b/source/dnode/vnode/impl/inc/vnodeRequest.h new file mode 100644 index 0000000000..af909fb636 --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeRequest.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_REQUEST_H_ +#define _TD_VNODE_REQUEST_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_REQUEST_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index cc9a1d76bc..392f8a2ba8 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -85,10 +85,32 @@ static void vnodeFree(SVnode *pVnode) { } static int vnodeOpenImpl(SVnode *pVnode) { + char dir[TSDB_FILENAME_LEN]; + + // Open meta + sprintf(dir, "%s/meta", pVnode->path); + if (metaOpen(dir, &(pVnode->options.metaOptions)) < 0) { + // TODO: handle error + return -1; + } + + // Open tsdb + sprintf(dir, "%s/tsdb", pVnode->path); + if (tsdbOpen(dir, &(pVnode->options.tsdbOptions)) < 0) { + // TODO: handle error + return -1; + } + + // TODO: Open TQ + // TODO return 0; } static void vnodeCloseImpl(SVnode *pVnode) { - // TODO + if (pVnode) { + // TODO: Close TQ + tsdbClose(pVnode->pTsdb); + metaClose(pVnode->pMeta); + } } \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeOptions.c b/source/dnode/vnode/impl/src/vnodeOptions.c index d5f986d439..5f519416b9 100644 --- a/source/dnode/vnode/impl/src/vnodeOptions.c +++ b/source/dnode/vnode/impl/src/vnodeOptions.c @@ -18,6 +18,7 @@ const SVnodeOptions defaultVnodeOptions = {0}; /* TODO */ void vnodeOptionsInit(SVnodeOptions *pVnodeOptions) { /* TODO */ + vnodeOptionsCopy(pVnodeOptions, &defaultVnodeOptions); } void vnodeOptionsClear(SVnodeOptions *pVnodeOptions) { /* TODO */ diff --git a/source/dnode/vnode/impl/src/vnodeRequest.c b/source/dnode/vnode/impl/src/vnodeRequest.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeRequest.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index 486ff9c634..401c2add9c 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -21,6 +21,7 @@ int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { } int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest) { + int type; /* TODO */ return 0; } diff --git a/source/dnode/vnode/meta/src/metaOptions.c b/source/dnode/vnode/meta/src/metaOptions.c index 0f6ba9a9fb..f92cd73cae 100644 --- a/source/dnode/vnode/meta/src/metaOptions.c +++ b/source/dnode/vnode/meta/src/metaOptions.c @@ -13,11 +13,9 @@ * along with this program. If not, see . */ -#include "meta.h" +#include "metaDef.h" -const static SMetaOptions defaultMetaOptions = {.lruCacheSize = 0}; - -static void metaOptionsCopy(SMetaOptions *pDest, const SMetaOptions *pSrc); +const SMetaOptions defaultMetaOptions = {.lruCacheSize = 0}; /* ------------------------ EXPOSED METHODS ------------------------ */ void metaOptionsInit(SMetaOptions *pMetaOptions) { metaOptionsCopy(pMetaOptions, &defaultMetaOptions); } From 244b346f1c78081e1d78a2b4cbe6a4c1d0c192ad Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 13:49:26 +0800 Subject: [PATCH 54/94] more --- source/dnode/vnode/impl/test/CMakeLists.txt | 12 +++++----- .../dnode/vnode/impl/test/vnodeApiTests.cpp | 7 ++++++ .../vnode/impl/test/vnodeMemAllocatorTest.cpp | 22 ------------------- source/dnode/vnode/impl/test/vnodeTests.cpp | 0 4 files changed, 12 insertions(+), 29 deletions(-) create mode 100644 source/dnode/vnode/impl/test/vnodeApiTests.cpp delete mode 100644 source/dnode/vnode/impl/test/vnodeMemAllocatorTest.cpp delete mode 100644 source/dnode/vnode/impl/test/vnodeTests.cpp diff --git a/source/dnode/vnode/impl/test/CMakeLists.txt b/source/dnode/vnode/impl/test/CMakeLists.txt index 9c09be56fb..83506a4fde 100644 --- a/source/dnode/vnode/impl/test/CMakeLists.txt +++ b/source/dnode/vnode/impl/test/CMakeLists.txt @@ -1,9 +1,7 @@ -# vnodeMemAllocatorTest -add_executable(VMATest "") -target_sources(VMATest +# Vnode API test +add_executable(vnodeApiTests "") +target_sources(vnodeApiTests PRIVATE - "../src/vnodeMemAllocator.c" - "vnodeMemAllocatorTest.cpp" + "vnodeApiTests.cpp" ) -target_include_directories(VMATest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../inc") -target_link_libraries(VMATest os gtest_main vnode) \ No newline at end of file +target_link_libraries(vnodeApiTests vnode gtest gtest_main) \ No newline at end of file diff --git a/source/dnode/vnode/impl/test/vnodeApiTests.cpp b/source/dnode/vnode/impl/test/vnodeApiTests.cpp new file mode 100644 index 0000000000..81831fa167 --- /dev/null +++ b/source/dnode/vnode/impl/test/vnodeApiTests.cpp @@ -0,0 +1,7 @@ +#include +#include + +TEST(vnodeApiTest, vnodeOpen_test) { + // TODO + std::cout << "This is in vnodeApiTest" << std::endl; +} diff --git a/source/dnode/vnode/impl/test/vnodeMemAllocatorTest.cpp b/source/dnode/vnode/impl/test/vnodeMemAllocatorTest.cpp deleted file mode 100644 index f2d07e6aa5..0000000000 --- a/source/dnode/vnode/impl/test/vnodeMemAllocatorTest.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -#include "vnodeMemAllocator.h" - -TEST(VMATest, basic_create_and_destroy_test) { - SVnodeMemAllocator *vma = VMACreate(1024, 512, 64); - EXPECT_TRUE(vma != nullptr); - EXPECT_EQ(vma->full, false); - EXPECT_EQ(vma->ssize, 512); - EXPECT_EQ(vma->threshold, 64); - EXPECT_EQ(vma->inuse->tsize, 1024); - VMADestroy(vma); - - vma = VMACreate(1024, 512, 1024); - EXPECT_TRUE(vma != nullptr); - VMADestroy(vma); - - vma = VMACreate(1024, 512, 1025); - EXPECT_TRUE(vma == nullptr); - VMADestroy(vma); -} \ No newline at end of file diff --git a/source/dnode/vnode/impl/test/vnodeTests.cpp b/source/dnode/vnode/impl/test/vnodeTests.cpp deleted file mode 100644 index e69de29bb2..0000000000 From e7c9b83a9eb869c7566147f441a34f91e7d0c970 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 13:55:51 +0800 Subject: [PATCH 55/94] more --- source/dnode/vnode/impl/src/vnodeMain.c | 2 +- source/dnode/vnode/impl/test/vnodeApiTests.cpp | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index 392f8a2ba8..493edbb77c 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -74,7 +74,7 @@ static SVnode *vnodeNew(const char *path, const SVnodeOptions *pVnodeOptions) { pVnode->path = strdup(path); vnodeOptionsCopy(&(pVnode->options), pVnodeOptions); - return NULL; + return pVnode; } static void vnodeFree(SVnode *pVnode) { diff --git a/source/dnode/vnode/impl/test/vnodeApiTests.cpp b/source/dnode/vnode/impl/test/vnodeApiTests.cpp index 81831fa167..65aa0f506c 100644 --- a/source/dnode/vnode/impl/test/vnodeApiTests.cpp +++ b/source/dnode/vnode/impl/test/vnodeApiTests.cpp @@ -1,7 +1,13 @@ #include #include -TEST(vnodeApiTest, vnodeOpen_test) { - // TODO - std::cout << "This is in vnodeApiTest" << std::endl; +#include "vnode.h" + +TEST(vnodeApiTest, vnodeOpen_vnodeClose_test) { + // Create and open a vnode + SVnode *pVnode = vnodeOpen("vnode1", NULL); + ASSERT_NE(pVnode, nullptr); + + // Close the vnode + vnodeClose(pVnode); } From 641f5b567e1f86ba284a30dbb098a522a3ac61fd Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 14:06:15 +0800 Subject: [PATCH 56/94] more --- source/dnode/vnode/tsdb/inc/tsdbOptions.h | 3 +- source/dnode/vnode/tsdb/src/tsdbMain.c | 68 ++++++++++++++++++++++- source/dnode/vnode/tsdb/src/tsdbOptions.c | 2 + 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/tsdb/inc/tsdbOptions.h b/source/dnode/vnode/tsdb/inc/tsdbOptions.h index a186482939..ffd409099a 100644 --- a/source/dnode/vnode/tsdb/inc/tsdbOptions.h +++ b/source/dnode/vnode/tsdb/inc/tsdbOptions.h @@ -22,7 +22,8 @@ extern "C" { extern const STsdbOptions defautlTsdbOptions; -int tsdbValidateOptions(const STsdbOptions *); +int tsdbValidateOptions(const STsdbOptions *); +void tsdbOptionsCopy(STsdbOptions *pDest, const STsdbOptions *pSrc); #ifdef __cplusplus } diff --git a/source/dnode/vnode/tsdb/src/tsdbMain.c b/source/dnode/vnode/tsdb/src/tsdbMain.c index 9f473c3be1..10b6c2aa65 100644 --- a/source/dnode/vnode/tsdb/src/tsdbMain.c +++ b/source/dnode/vnode/tsdb/src/tsdbMain.c @@ -15,18 +15,80 @@ #include "tsdbDef.h" +static STsdb *tsdbNew(const char *path, const STsdbOptions *pTsdbOptions); +static void tsdbFree(STsdb *pTsdb); +static int tsdbOpenImpl(STsdb *pTsdb); +static void tsdbCloseImpl(STsdb *pTsdb); + STsdb *tsdbOpen(const char *path, const STsdbOptions *pTsdbOptions) { STsdb *pTsdb = NULL; - /* TODO */ + + // Set default TSDB Options + if (pTsdbOptions == NULL) { + pTsdbOptions = &defautlTsdbOptions; + } + + // Validate the options + if (tsdbValidateOptions(pTsdbOptions) < 0) { + // TODO: handle error + return NULL; + } + + // Create the handle + pTsdb = tsdbNew(path, pTsdbOptions); + if (pTsdb == NULL) { + // TODO: handle error + return NULL; + } + + taosMkDir(path); + + // Open the TSDB + if (tsdbOpenImpl(pTsdb) < 0) { + // TODO: handle error + return NULL; + } + return pTsdb; } void tsdbClose(STsdb *pTsdb) { if (pTsdb) { - /* TODO */ + tsdbCloseImpl(pTsdb); + tsdbFree(pTsdb); } } void tsdbRemove(const char *path) { taosRemoveDir(path); } -/* ------------------------ STATIC METHODS ------------------------ */ \ No newline at end of file +/* ------------------------ STATIC METHODS ------------------------ */ +static STsdb *tsdbNew(const char *path, const STsdbOptions *pTsdbOptions) { + STsdb *pTsdb = NULL; + + pTsdb = (STsdb *)calloc(1, sizeof(STsdb)); + if (pTsdb == NULL) { + // TODO: handle error + return NULL; + } + + pTsdb->path = strdup(path); + tsdbOptionsCopy(&(pTsdb->options), pTsdbOptions); + + return pTsdb; +} + +static void tsdbFree(STsdb *pTsdb) { + if (pTsdb) { + tfree(pTsdb->path); + free(pTsdb); + } +} + +static int tsdbOpenImpl(STsdb *pTsdb) { + // TODO + return 0; +} + +static void tsdbCloseImpl(STsdb *pTsdb) { + // TODO +} \ No newline at end of file diff --git a/source/dnode/vnode/tsdb/src/tsdbOptions.c b/source/dnode/vnode/tsdb/src/tsdbOptions.c index 1e1a859285..3a1102f048 100644 --- a/source/dnode/vnode/tsdb/src/tsdbOptions.c +++ b/source/dnode/vnode/tsdb/src/tsdbOptions.c @@ -30,3 +30,5 @@ int tsdbValidateOptions(const STsdbOptions *pTsdbOptions) { // TODO return 0; } + +void tsdbOptionsCopy(STsdbOptions *pDest, const STsdbOptions *pSrc) { memcpy(pDest, pSrc, sizeof(STsdbOptions)); } \ No newline at end of file From beef26b525fc94972a7e84b6442c85a3a271cc1a Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 14:21:10 +0800 Subject: [PATCH 57/94] more --- source/dnode/vnode/impl/src/vnodeMain.c | 6 ++++-- source/dnode/vnode/meta/src/metaDB.c | 14 +++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index 493edbb77c..a0c1d38ea9 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -89,14 +89,16 @@ static int vnodeOpenImpl(SVnode *pVnode) { // Open meta sprintf(dir, "%s/meta", pVnode->path); - if (metaOpen(dir, &(pVnode->options.metaOptions)) < 0) { + pVnode->pMeta = metaOpen(dir, &(pVnode->options.metaOptions)); + if (pVnode->pMeta == NULL) { // TODO: handle error return -1; } // Open tsdb sprintf(dir, "%s/tsdb", pVnode->path); - if (tsdbOpen(dir, &(pVnode->options.tsdbOptions)) < 0) { + pVnode->pTsdb = tsdbOpen(dir, &(pVnode->options.tsdbOptions)); + if (pVnode->pTsdb == NULL) { // TODO: handle error return -1; } diff --git a/source/dnode/vnode/meta/src/metaDB.c b/source/dnode/vnode/meta/src/metaDB.c index e4c9d8ce97..8865678508 100644 --- a/source/dnode/vnode/meta/src/metaDB.c +++ b/source/dnode/vnode/meta/src/metaDB.c @@ -32,14 +32,10 @@ static int metaSaveMapDB(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid); } while (0) int metaOpenDB(SMeta *pMeta) { - char dbDir[128]; char dir[128]; char * err = NULL; rocksdb_options_t *options = rocksdb_options_create(); - // TODO - sprintf(dbDir, "%s/db", pMeta->path); - if (pMeta->pCache) { rocksdb_options_set_row_cache(options, pMeta->pCache); } @@ -53,23 +49,23 @@ int metaOpenDB(SMeta *pMeta) { } // tbDb - sprintf(dir, "%s/tb_db", dbDir); + sprintf(dir, "%s/tb_db", pMeta->path); META_OPEN_DB_IMPL(pMeta->pDB->tbDb, options, dir, err); // nameDb - sprintf(dir, "%s/name_db", dbDir); + sprintf(dir, "%s/name_db", pMeta->path); META_OPEN_DB_IMPL(pMeta->pDB->nameDb, options, dir, err); // tagDb - sprintf(dir, "%s/tag_db", dbDir); + sprintf(dir, "%s/tag_db", pMeta->path); META_OPEN_DB_IMPL(pMeta->pDB->tagDb, options, dir, err); // schemaDb - sprintf(dir, "%s/schema_db", dbDir); + sprintf(dir, "%s/schema_db", pMeta->path); META_OPEN_DB_IMPL(pMeta->pDB->schemaDb, options, dir, err); // mapDb - sprintf(dir, "%s/map_db", dbDir); + sprintf(dir, "%s/map_db", pMeta->path); META_OPEN_DB_IMPL(pMeta->pDB->mapDb, options, dir, err); rocksdb_options_destroy(options); From a1a677f6503574bcf00b1fc02392e16061e91128 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 14:34:48 +0800 Subject: [PATCH 58/94] more --- source/dnode/vnode/impl/inc/vnodeDef.h | 2 ++ source/dnode/vnode/impl/inc/vnodeStateMgr.h | 30 +++++++++++++++++++++ source/dnode/vnode/impl/src/vnodeStateMgr.c | 14 ++++++++++ 3 files changed, 46 insertions(+) create mode 100644 source/dnode/vnode/impl/inc/vnodeStateMgr.h create mode 100644 source/dnode/vnode/impl/src/vnodeStateMgr.c diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index a42cd93b01..e5ce92c210 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -18,6 +18,7 @@ #include "vnode.h" #include "vnodeOptions.h" +#include "vnodeStateMgr.h" #ifdef __cplusplus extern "C" { @@ -26,6 +27,7 @@ extern "C" { struct SVnode { char* path; SVnodeOptions options; + SVState state; SMeta* pMeta; STsdb* pTsdb; STQ* pTq; diff --git a/source/dnode/vnode/impl/inc/vnodeStateMgr.h b/source/dnode/vnode/impl/inc/vnodeStateMgr.h new file mode 100644 index 0000000000..a32f682846 --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeStateMgr.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_STATE_MGR_H_ +#define _TD_VNODE_STATE_MGR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { +} SVState; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_STATE_MGR_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeStateMgr.c b/source/dnode/vnode/impl/src/vnodeStateMgr.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeStateMgr.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file From 9dd59b41e0614b51a5d19426afa7eeccb45b4ba4 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 14:50:20 +0800 Subject: [PATCH 59/94] more --- include/server/vnode/impl/vnodeImpl.h | 1 + .../dnode/vnode/impl/inc/vnodeAllocatorPool.h | 35 +++++ source/dnode/vnode/impl/inc/vnodeDef.h | 14 +- .../dnode/vnode/impl/inc/vnodeMemAllocator.h | 55 -------- .../dnode/vnode/impl/src/vnodeAllocatorPool.c | 25 ++++ source/dnode/vnode/impl/src/vnodeMain.c | 7 + .../dnode/vnode/impl/src/vnodeMemAllocator.c | 124 ------------------ 7 files changed, 76 insertions(+), 185 deletions(-) create mode 100644 source/dnode/vnode/impl/inc/vnodeAllocatorPool.h delete mode 100644 source/dnode/vnode/impl/inc/vnodeMemAllocator.h create mode 100644 source/dnode/vnode/impl/src/vnodeAllocatorPool.c delete mode 100644 source/dnode/vnode/impl/src/vnodeMemAllocator.c diff --git a/include/server/vnode/impl/vnodeImpl.h b/include/server/vnode/impl/vnodeImpl.h index 0e8a47742c..1b09361bc9 100644 --- a/include/server/vnode/impl/vnodeImpl.h +++ b/include/server/vnode/impl/vnodeImpl.h @@ -28,6 +28,7 @@ extern "C" { #endif struct SVnodeOptions { + size_t wsize; STsdbOptions tsdbOptions; SMetaOptions metaOptions; // STqOptions tqOptions; // TODO diff --git a/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h b/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h new file mode 100644 index 0000000000..ece9122513 --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_ALLOCATOR_POOL_H_ +#define _TD_VNODE_ALLOCATOR_POOL_H_ + +#include "vnode.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { +} SVAllocatorPool; + +int vnodeOpenAllocatorPool(SVnode *pVnode); +void vnodeCloseAllocatorPool(SVnode *pVnode); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_ALLOCATOR_POOL_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index e5ce92c210..649de7f5ff 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -17,6 +17,7 @@ #define _TD_VNODE_DEF_H_ #include "vnode.h" +#include "vnodeAllocatorPool.h" #include "vnodeOptions.h" #include "vnodeStateMgr.h" @@ -25,12 +26,13 @@ extern "C" { #endif struct SVnode { - char* path; - SVnodeOptions options; - SVState state; - SMeta* pMeta; - STsdb* pTsdb; - STQ* pTq; + char* path; + SVnodeOptions options; + SVState state; + SVAllocatorPool pool; + SMeta* pMeta; + STsdb* pTsdb; + STQ* pTq; }; #ifdef __cplusplus diff --git a/source/dnode/vnode/impl/inc/vnodeMemAllocator.h b/source/dnode/vnode/impl/inc/vnodeMemAllocator.h deleted file mode 100644 index 76aa2c6714..0000000000 --- a/source/dnode/vnode/impl/inc/vnodeMemAllocator.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_VNODE_MEM_ALLOCATOR_H_ -#define _TD_VNODE_MEM_ALLOCATOR_H_ - -#include "vnodeInt.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct SVnodeMemAllocator SVnodeMemAllocator; - -SVnodeMemAllocator *VMACreate(size_t size /* base size */, size_t ssize /* step size */, - size_t threshold /* threshold size when full*/); -void VMADestroy(SVnodeMemAllocator *pvma); -void VMAReset(SVnodeMemAllocator *pvma); -void * VMAMalloc(SVnodeMemAllocator *pvma, size_t size); -void VMAFree(SVnodeMemAllocator *pvma, void *ptr); -bool VMAIsFull(SVnodeMemAllocator *pvma); - -// ------------------ FOR TEST ONLY ------------------ -typedef struct SVMANode { - struct SVMANode *prev; - size_t tsize; - size_t used; - char data[]; -} SVMANode; - -struct SVnodeMemAllocator { - bool full; // if allocator is full - size_t threshold; // threshold; - size_t ssize; // step size to allocate - SVMANode *inuse; // inuse node to allocate - SVMANode node; // basic node to use -}; - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_VNODE_MEM_ALLOCATOR_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeAllocatorPool.c b/source/dnode/vnode/impl/src/vnodeAllocatorPool.c new file mode 100644 index 0000000000..36f3e7face --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeAllocatorPool.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnodeDef.h" + +int vnodeOpenAllocatorPool(SVnode *pVnode) { + // TODO + return 0; +} + +void vnodeCloseAllocatorPool(SVnode *pVnode) { + // TODO +} \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeMain.c b/source/dnode/vnode/impl/src/vnodeMain.c index a0c1d38ea9..6c0fe9c974 100644 --- a/source/dnode/vnode/impl/src/vnodeMain.c +++ b/source/dnode/vnode/impl/src/vnodeMain.c @@ -87,6 +87,12 @@ static void vnodeFree(SVnode *pVnode) { static int vnodeOpenImpl(SVnode *pVnode) { char dir[TSDB_FILENAME_LEN]; + // Open allocator pool + if (vnodeOpenAllocatorPool(pVnode) < 0) { + // TODO: handle error + return -1; + } + // Open meta sprintf(dir, "%s/meta", pVnode->path); pVnode->pMeta = metaOpen(dir, &(pVnode->options.metaOptions)); @@ -111,6 +117,7 @@ static int vnodeOpenImpl(SVnode *pVnode) { static void vnodeCloseImpl(SVnode *pVnode) { if (pVnode) { + vnodeCloseAllocatorPool(pVnode); // TODO: Close TQ tsdbClose(pVnode->pTsdb); metaClose(pVnode->pMeta); diff --git a/source/dnode/vnode/impl/src/vnodeMemAllocator.c b/source/dnode/vnode/impl/src/vnodeMemAllocator.c deleted file mode 100644 index 29909df491..0000000000 --- a/source/dnode/vnode/impl/src/vnodeMemAllocator.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "vnodeMemAllocator.h" - -#define VMA_IS_FULL(pvma) \ - (((pvma)->inuse != &((pvma)->node)) || ((pvma)->inuse->tsize - (pvma)->inuse->used < (pvma)->threshold)) - -static SVMANode *VMANodeNew(size_t size); -static void VMANodeFree(SVMANode *node); - -SVnodeMemAllocator *VMACreate(size_t size, size_t ssize, size_t threshold) { - SVnodeMemAllocator *pvma = NULL; - - if (size < threshold) { - return NULL; - } - - pvma = (SVnodeMemAllocator *)malloc(sizeof(*pvma) + size); - if (pvma) { - pvma->full = false; - pvma->threshold = threshold; - pvma->ssize = ssize; - pvma->inuse = &(pvma->node); - - pvma->inuse->prev = NULL; - pvma->inuse->tsize = size; - pvma->inuse->used = 0; - } - - return pvma; -} - -void VMADestroy(SVnodeMemAllocator *pvma) { - if (pvma) { - VMAReset(pvma); - free(pvma); - } -} - -void VMAReset(SVnodeMemAllocator *pvma) { - while (pvma->inuse != &(pvma->node)) { - SVMANode *node = pvma->inuse; - pvma->inuse = node->prev; - VMANodeFree(node); - } - - pvma->inuse->used = 0; - pvma->full = false; -} - -void *VMAMalloc(SVnodeMemAllocator *pvma, size_t size) { - void * ptr = NULL; - size_t tsize = size + sizeof(size_t); - - if (pvma->inuse->tsize - pvma->inuse->used < tsize) { - SVMANode *pNode = VMANodeNew(MAX(pvma->ssize, tsize)); - if (pNode == NULL) { - return NULL; - } - - pNode->prev = pvma->inuse; - pvma->inuse = pNode; - } - - ptr = pvma->inuse->data + pvma->inuse->used; - pvma->inuse->used += tsize; - *(size_t *)ptr = size; - ptr = POINTER_SHIFT(ptr, sizeof(size_t)); - - pvma->full = VMA_IS_FULL(pvma); - - return ptr; -} - -void VMAFree(SVnodeMemAllocator *pvma, void *ptr) { - if (ptr) { - size_t size = *(size_t *)POINTER_SHIFT(ptr, -sizeof(size_t)); - if (POINTER_SHIFT(ptr, size) == pvma->inuse->data + pvma->inuse->used) { - pvma->inuse->used -= (size + sizeof(size_t)); - - if ((pvma->inuse->used == 0) && (pvma->inuse != &(pvma->node))) { - SVMANode *node = pvma->inuse; - pvma->inuse = node->prev; - VMANodeFree(node); - } - - pvma->full = VMA_IS_FULL(pvma); - } - } -} - -bool VMAIsFull(SVnodeMemAllocator *pvma) { return pvma->full; } - -static SVMANode *VMANodeNew(size_t size) { - SVMANode *node = NULL; - - node = (SVMANode *)malloc(sizeof(*node) + size); - if (node) { - node->prev = NULL; - node->tsize = size; - node->used = 0; - } - - return node; -} - -static void VMANodeFree(SVMANode *node) { - if (node) { - free(node); - } -} \ No newline at end of file From 5a17541b7134a3dd2596131196990a763fa6d4fb Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 15:10:42 +0800 Subject: [PATCH 60/94] more --- .../dnode/vnode/impl/inc/vnodeAllocatorPool.h | 8 +++-- source/dnode/vnode/impl/inc/vnodeDef.h | 1 + .../dnode/vnode/impl/inc/vnodeMemAllocator.h | 30 +++++++++++++++++++ .../dnode/vnode/impl/src/vnodeAllocatorPool.c | 2 +- .../dnode/vnode/impl/src/vnodeMemAllocator.c | 14 +++++++++ 5 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 source/dnode/vnode/impl/inc/vnodeMemAllocator.h create mode 100644 source/dnode/vnode/impl/src/vnodeMemAllocator.c diff --git a/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h b/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h index ece9122513..53fa927241 100644 --- a/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h +++ b/source/dnode/vnode/impl/inc/vnodeAllocatorPool.h @@ -17,16 +17,20 @@ #define _TD_VNODE_ALLOCATOR_POOL_H_ #include "vnode.h" +#include "vnodeMemAllocator.h" #ifdef __cplusplus extern "C" { #endif typedef struct { + int nexta; + int enda; + SVMemAllocator allocators[3]; } SVAllocatorPool; -int vnodeOpenAllocatorPool(SVnode *pVnode); -void vnodeCloseAllocatorPool(SVnode *pVnode); +int vnodeOpenAllocatorPool(SVnode *pVnode); +void vnodeCloseAllocatorPool(SVnode *pVnode); #ifdef __cplusplus } diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index 649de7f5ff..012e6fc5d1 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -30,6 +30,7 @@ struct SVnode { SVnodeOptions options; SVState state; SVAllocatorPool pool; + SVMemAllocator* inuse; SMeta* pMeta; STsdb* pTsdb; STQ* pTq; diff --git a/source/dnode/vnode/impl/inc/vnodeMemAllocator.h b/source/dnode/vnode/impl/inc/vnodeMemAllocator.h new file mode 100644 index 0000000000..784c70dd68 --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeMemAllocator.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_MEM_ALLOCATOR_H_ +#define _TD_VNODE_MEM_ALLOCATOR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { +} SVMemAllocator; + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_MEM_ALLOCATOR_H_*/ diff --git a/source/dnode/vnode/impl/src/vnodeAllocatorPool.c b/source/dnode/vnode/impl/src/vnodeAllocatorPool.c index 36f3e7face..82dad85add 100644 --- a/source/dnode/vnode/impl/src/vnodeAllocatorPool.c +++ b/source/dnode/vnode/impl/src/vnodeAllocatorPool.c @@ -21,5 +21,5 @@ int vnodeOpenAllocatorPool(SVnode *pVnode) { } void vnodeCloseAllocatorPool(SVnode *pVnode) { - // TODO + // TODO } \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeMemAllocator.c b/source/dnode/vnode/impl/src/vnodeMemAllocator.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeMemAllocator.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file From ccf8f14fdb59a5301ccc4753dc4e3fd278685153 Mon Sep 17 00:00:00 2001 From: lichuang Date: Mon, 8 Nov 2021 15:58:19 +0800 Subject: [PATCH 61/94] [TD-10645][raft]add raft progress tracker --- source/libs/sync/inc/raft.h | 75 +++++++------ source/libs/sync/inc/raft_log.h | 14 ++- source/libs/sync/inc/raft_progress.h | 26 +++-- .../sync/inc/sync_raft_progress_tracker.h | 100 ++++++++++++++++++ source/libs/sync/inc/sync_raft_quorum_joint.h | 30 ++++++ source/libs/sync/inc/sync_type.h | 10 ++ source/libs/sync/src/raft.c | 78 ++++++++++++-- .../src/raft_handle_append_entries_message.c | 4 +- .../libs/sync/src/raft_handle_vote_message.c | 2 +- source/libs/sync/src/raft_log.c | 4 + source/libs/sync/src/raft_progress.c | 44 +++----- source/libs/sync/src/raft_replication.c | 5 + .../sync/src/sync_raft_progress_tracker.c | 41 +++++++ 13 files changed, 349 insertions(+), 84 deletions(-) create mode 100644 source/libs/sync/inc/sync_raft_progress_tracker.h create mode 100644 source/libs/sync/inc/sync_raft_quorum_joint.h create mode 100644 source/libs/sync/src/sync_raft_progress_tracker.c diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index dd3eed9e02..795ea7cc99 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -20,17 +20,12 @@ #include "sync_type.h" #include "raft_message.h" -typedef struct SSyncRaftProgress SSyncRaftProgress; typedef struct RaftLeaderState { - int nProgress; - SSyncRaftProgress* progress; + } RaftLeaderState; typedef struct RaftCandidateState { - /* votes results */ - SyncRaftVoteRespType votes[TSDB_MAX_REPLICA]; - /* true if in pre-vote phase */ bool inPreVote; } RaftCandidateState; @@ -47,17 +42,34 @@ struct SSyncRaft { // owner sync node SSyncNode* pNode; - int maxMsgSize; + SSyncCluster cluster; + + SyncNodeId selfId; + SyncGroupId selfGroupId; + + SSyncRaftIOMethods io; SSyncFSM fsm; SSyncLogStore logStore; SStateManager stateManager; + union { + RaftLeaderState leaderState; + RaftCandidateState candidateState; + }; + SyncTerm term; SyncNodeId voteFor; - SyncNodeId selfId; - SyncGroupId selfGroupId; + SSyncRaftLog *log; + + int maxMsgSize; + SSyncRaftProgressTracker *tracker; + + ESyncRole state; + + // isLearner is true if the local raft node is a learner. + bool isLearner; /** * the leader id @@ -70,15 +82,23 @@ struct SSyncRaft { **/ SyncNodeId leadTransferee; - /** - * New configuration is ignored if there exists unapplied configuration. + /** + * Only one conf change may be pending (in the log, but not yet + * applied) at a time. This is enforced via pendingConfIndex, which + * is set to a value >= the log index of the latest pending + * configuration change (if any). Config changes are only allowed to + * be proposed if the leader's applied index is greater than this + * value. **/ - bool hasPendingConf; - - SSyncCluster cluster; - - ESyncRole state; + SyncIndex pendingConfigIndex; + /** + * an estimate of the size of the uncommitted tail of the Raft log. Used to + * prevent unbounded log growth. Only maintained by the leader. Reset on + * term changes. + **/ + uint32_t uncommittedSize; + /** * number of ticks since it reached last electionTimeout when it is leader * or candidate. @@ -96,24 +116,19 @@ struct SSyncRaft { // current tick count since start up uint32_t currentTick; - // election timeout tick(random in [3:6] tick) - uint16_t electionTimeoutTick; - - // heartbeat timeout tick(default: 1 tick) - uint16_t heartbeatTimeoutTick; - bool preVote; bool checkQuorum; - SSyncRaftIOMethods io; + int heartbeatTimeout; + int electionTimeout; - // union different state data - union { - RaftLeaderState leaderState; - RaftCandidateState candidateState; - }; - - SSyncRaftLog *log; + /** + * randomizedElectionTimeout is a random number between + * [electiontimeout, 2 * electiontimeout - 1]. It gets reset + * when raft changes its state to follower or candidate. + **/ + int randomizedElectionTimeout; + bool disableProposalForwarding; SyncRaftStepFp stepFp; diff --git a/source/libs/sync/inc/raft_log.h b/source/libs/sync/inc/raft_log.h index bab9932fb5..a44f5a7273 100644 --- a/source/libs/sync/inc/raft_log.h +++ b/source/libs/sync/inc/raft_log.h @@ -19,8 +19,18 @@ #include "sync.h" #include "sync_type.h" -struct SSyncRaftEntry { +typedef enum SyncEntryType { + SYNC_ENTRY_TYPE_LOG = 1, +}SyncEntryType; +struct SSyncRaftEntry { + SyncTerm term; + + SyncIndex index; + + SyncEntryType type; + + SSyncBuffer buffer; }; struct SSyncRaftLog { @@ -49,6 +59,8 @@ bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog); SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index); +int syncRaftLogAppend(SSyncRaftLog* pLog, SSyncRaftEntry *pEntries, int n); + int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, SSyncRaftEntry **ppEntries, int *n); diff --git a/source/libs/sync/inc/raft_progress.h b/source/libs/sync/inc/raft_progress.h index 5840468a5d..41d66d59d0 100644 --- a/source/libs/sync/inc/raft_progress.h +++ b/source/libs/sync/inc/raft_progress.h @@ -73,6 +73,8 @@ typedef enum RaftProgressState { * progresses of all followers, and sends entries to the follower based on its progress. **/ struct SSyncRaftProgress { + SyncNodeId id; + SyncIndex nextIndex; SyncIndex matchIndex; @@ -108,16 +110,18 @@ struct SSyncRaftProgress { * flow control sliding window **/ SSyncRaftInflights inflights; + + // IsLearner is true if this progress is tracked for a learner. + bool isLearner; }; -int syncRaftProgressCreate(SSyncRaft* pRaft); -//int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configuration); +void syncRaftInitProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress); /** * syncRaftProgressMaybeUpdate returns false if the given lastIndex index comes from i-th node's log. * Otherwise it updates the progress and returns true. **/ -bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex); +bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex); static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) { progress->nextIndex = nextIndex + 1; @@ -127,7 +131,7 @@ static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* * syncRaftProgressMaybeDecrTo returns false if the given to index comes from an out of order message. * Otherwise it decreases the progress next index to min(rejected, last) and returns true. **/ -bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i, +bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, SyncIndex rejected, SyncIndex lastIndex); /** @@ -166,20 +170,20 @@ static FORCE_INLINE bool syncRaftProgressUpdateSendTick(SSyncRaftProgress* progr return progress->lastSendTick = current; } -void syncRaftProgressFailure(SSyncRaft* pRaft, int i); +void syncRaftProgressFailure(SSyncRaftProgress* progress); -bool syncRaftProgressNeedAbortSnapshot(SSyncRaft* pRaft, int i); +bool syncRaftProgressNeedAbortSnapshot(SSyncRaftProgress* progress); /** - * return true if i-th node's log is up-todate + * return true if progress's log is up-todate **/ -bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, int i); +bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); -void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i); +void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress); -void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i); +void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress); -void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotIndex); +void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); /* inflights APIs */ int syncRaftInflightReset(SSyncRaftInflights* inflights); diff --git a/source/libs/sync/inc/sync_raft_progress_tracker.h b/source/libs/sync/inc/sync_raft_progress_tracker.h new file mode 100644 index 0000000000..ffc134fec4 --- /dev/null +++ b/source/libs/sync/inc/sync_raft_progress_tracker.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H +#define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H + +#include "sync_type.h" +#include "sync_raft_quorum_joint.h" +#include "raft_progress.h" + +struct SSyncRaftProgressTrackerConfig { + SSyncRaftQuorumJointConfig voters; + + /** AutoLeave is true if the configuration is joint and a transition to the + * incoming configuration should be carried out automatically by Raft when + * this is possible. If false, the configuration will be joint until the + * application initiates the transition manually. + **/ + bool autoLeave; + + /** + * Learners is a set of IDs corresponding to the learners active in the + * current configuration. + * + * Invariant: Learners and Voters does not intersect, i.e. if a peer is in + * either half of the joint config, it can't be a learner; if it is a + * learner it can't be in either half of the joint config. This invariant + * simplifies the implementation since it allows peers to have clarity about + * its current role without taking into account joint consensus. + **/ + SyncNodeId learners[TSDB_MAX_REPLICA]; + + /** + * When we turn a voter into a learner during a joint consensus transition, + * we cannot add the learner directly when entering the joint state. This is + * because this would violate the invariant that the intersection of + * voters and learners is empty. For example, assume a Voter is removed and + * immediately re-added as a learner (or in other words, it is demoted): + * + * Initially, the configuration will be + * + * voters: {1 2 3} + * learners: {} + * + * and we want to demote 3. Entering the joint configuration, we naively get + * + * voters: {1 2} & {1 2 3} + * learners: {3} + * + * but this violates the invariant (3 is both voter and learner). Instead, + * we get + * + * voters: {1 2} & {1 2 3} + * learners: {} + * next_learners: {3} + * + * Where 3 is now still purely a voter, but we are remembering the intention + * to make it a learner upon transitioning into the final configuration: + * + * voters: {1 2} + * learners: {3} + * next_learners: {} + * + * Note that next_learners is not used while adding a learner that is not + * also a voter in the joint config. In this case, the learner is added + * right away when entering the joint configuration, so that it is caught up + * as soon as possible. + **/ + SyncNodeId learnersNext[TSDB_MAX_REPLICA]; +}; + +struct SSyncRaftProgressTracker { + SSyncRaftProgressTrackerConfig config; + + SSyncRaftProgress progressMap[TSDB_MAX_REPLICA]; + + SyncRaftVoteRespType votes[TSDB_MAX_REPLICA]; + int maxInflight; +}; + +SSyncRaftProgressTracker* syncRaftOpenProgressTracker(); + +void syncRaftResetVotes(SSyncRaftProgressTracker*); + +typedef void (*visitProgressFp)(SSyncRaftProgress* progress, void* arg); +void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, void* arg); + +#endif /* _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H */ diff --git a/source/libs/sync/inc/sync_raft_quorum_joint.h b/source/libs/sync/inc/sync_raft_quorum_joint.h new file mode 100644 index 0000000000..4f7424db7e --- /dev/null +++ b/source/libs/sync/inc/sync_raft_quorum_joint.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H +#define _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H + +#include "taosdef.h" +#include "sync.h" + +/** + * JointConfig is a configuration of two groups of (possibly overlapping) + * majority configurations. Decisions require the support of both majorities. + **/ +typedef struct SSyncRaftQuorumJointConfig { + SyncNodeId majorityConfig[2][TSDB_MAX_REPLICA]; +}SSyncRaftQuorumJointConfig; + +#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */ diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h index 130243a72a..9faebe94b2 100644 --- a/source/libs/sync/inc/sync_type.h +++ b/source/libs/sync/inc/sync_type.h @@ -16,6 +16,9 @@ #ifndef _TD_LIBS_SYNC_TYPE_H #define _TD_LIBS_SYNC_TYPE_H +#include +#include "osMath.h" + #define SYNC_NON_NODE_ID -1 #define SYNC_NON_TERM 0 @@ -24,10 +27,16 @@ typedef uint32_t SyncTick; typedef struct SSyncRaft SSyncRaft; +typedef struct SSyncRaftProgress SSyncRaftProgress; +typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig; + +typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker; + typedef struct SSyncRaftLog SSyncRaftLog; typedef struct SSyncRaftEntry SSyncRaftEntry; +#if 0 #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif @@ -35,6 +44,7 @@ typedef struct SSyncRaftEntry SSyncRaftEntry; #ifndef MAX #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #endif +#endif typedef enum { SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 39e7a80d0b..4a3654131c 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -17,6 +17,7 @@ #include "raft_configuration.h" #include "raft_log.h" #include "raft_replication.h" +#include "sync_raft_progress_tracker.h" #include "syncInt.h" #define RAFT_READ_LOG_MAX_NUM 100 @@ -35,6 +36,9 @@ static int triggerAll(SSyncRaft* pRaft); static void tickElection(SSyncRaft* pRaft); static void tickHeartbeat(SSyncRaft* pRaft); +static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n); +static bool maybeCommit(SSyncRaft* pRaft); + static void abortLeaderTransfer(SSyncRaft* pRaft); static void resetRaft(SSyncRaft* pRaft, SyncTerm term); @@ -59,6 +63,12 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { logStore = &(pRaft->logStore); fsm = &(pRaft->fsm); + // init progress tracker + pRaft->tracker = syncRaftOpenProgressTracker(); + if (pRaft->tracker == NULL) { + return -1; + } + // open raft log if ((pRaft->log = syncRaftLogOpen()) == NULL) { return -1; @@ -88,7 +98,7 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { } assert(initIndex == serverState.commitIndex); - pRaft->heartbeatTimeoutTick = 1; + //pRaft->heartbeatTimeoutTick = 1; syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); @@ -137,7 +147,7 @@ void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { convertClear(pRaft); - memset(pRaft->candidateState.votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(SyncRaftVoteRespType) * TSDB_MAX_REPLICA); + /** * Becoming a pre-candidate changes our step functions and state, * but doesn't change anything else. In particular it does not increase @@ -152,7 +162,6 @@ void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { void syncRaftBecomeCandidate(SSyncRaft* pRaft) { convertClear(pRaft); - memset(pRaft->candidateState.votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(SyncRaftVoteRespType) * TSDB_MAX_REPLICA); pRaft->candidateState.inPreVote = false; pRaft->stepFp = stepCandidate; @@ -176,14 +185,22 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) { if (nPendingConf > 1) { syncFatal("unexpected multiple uncommitted config entry"); } - if (nPendingConf == 1) { - pRaft->hasPendingConf = true; - } syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); - // after become leader, send initial heartbeat - syncRaftTriggerHeartbeat(pRaft); + // after become leader, send a no-op log + SSyncRaftEntry* entry = (SSyncRaftEntry*)malloc(sizeof(SSyncRaftEntry)); + if (entry == NULL) { + return; + } + *entry = (SSyncRaftEntry) { + .buffer = (SSyncBuffer) { + .data = NULL, + .len = 0, + } + }; + appendEntries(pRaft, entry, 1); + //syncRaftTriggerHeartbeat(pRaft); } void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) { @@ -192,7 +209,7 @@ void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) { void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { // electionTimeoutTick in [3,6] tick - pRaft->electionTimeoutTick = taosRand() % 4 + 3; + pRaft->randomizedElectionTimeout = taosRand() % 4 + 3; } bool syncRaftIsPromotable(SSyncRaft* pRaft) { @@ -200,7 +217,7 @@ bool syncRaftIsPromotable(SSyncRaft* pRaft) { } bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { - return pRaft->electionElapsed >= pRaft->electionTimeoutTick; + return pRaft->electionElapsed >= pRaft->randomizedElectionTimeout; } int syncRaftQuorum(SSyncRaft* pRaft) { @@ -208,6 +225,7 @@ int syncRaftQuorum(SSyncRaft* pRaft) { } int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool accept, int* rejectNum) { +/* if (accept) { syncInfo("[%d:%d] received (pre-vote %d) from %d at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); @@ -230,6 +248,8 @@ int syncRaftNumOfGranted(SSyncRaft* pRaft, SyncNodeId id, bool preVote, bool acc if (rejectNum) *rejectNum = rejected; return granted; +*/ + return 0; } /** @@ -375,6 +395,34 @@ static void tickHeartbeat(SSyncRaft* pRaft) { } +static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { + SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); + SyncTerm term = pRaft->term; + int i; + + for (i = 0; i < n; ++i) { + entries[i].term = term; + entries[i].index = lastIndex + 1 + i; + } + + syncRaftLogAppend(pRaft->log, entries, n); + + SSyncRaftProgress* progress = &(pRaft->tracker->progressMap[pRaft->cluster.selfIndex]); + syncRaftProgressMaybeUpdate(progress, lastIndex); + // Regardless of maybeCommit's return, our caller will call bcastAppend. + maybeCommit(pRaft); +} + +/** + * maybeCommit attempts to advance the commit index. Returns true if + * the commit index changed (in which case the caller should call + * r.bcastAppend). + **/ +static bool maybeCommit(SSyncRaft* pRaft) { + + return true; +} + /** * trigger I/O requests for newly appended log entries or heartbeats. **/ @@ -395,6 +443,10 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) { pRaft->leadTransferee = SYNC_NON_NODE_ID; } +static void initProgress(SSyncRaftProgress* progress, void* arg) { + syncRaftInitProgress((SSyncRaft*)arg, progress); +} + static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { if (pRaft->term != term) { pRaft->term = term; @@ -410,5 +462,9 @@ static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { abortLeaderTransfer(pRaft); - pRaft->hasPendingConf = false; + syncRaftResetVotes(pRaft->tracker); + syncRaftProgressVisit(pRaft->tracker, initProgress, pRaft); + + pRaft->pendingConfigIndex = 0; + pRaft->uncommittedSize = 0; } diff --git a/source/libs/sync/src/raft_handle_append_entries_message.c b/source/libs/sync/src/raft_handle_append_entries_message.c index d4d362848f..8c014a56bc 100644 --- a/source/libs/sync/src/raft_handle_append_entries_message.c +++ b/source/libs/sync/src/raft_handle_append_entries_message.c @@ -20,7 +20,7 @@ #include "raft_message.h" int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries); + const RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries); int peerIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); @@ -33,7 +33,7 @@ int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMs return 0; } - RaftMsg_Append_Entries *appendResp = &(pMsg->appendResp); + RaftMsg_Append_Entries *appendResp = &(pRespMsg->appendResp); // ignore committed logs if (syncRaftLogIsCommitted(pRaft->log, appendEntries->index)) { appendResp->index = pRaft->log->commitIndex; diff --git a/source/libs/sync/src/raft_handle_vote_message.c b/source/libs/sync/src/raft_handle_vote_message.c index 2fab8ad5a9..709e319c3e 100644 --- a/source/libs/sync/src/raft_handle_vote_message.c +++ b/source/libs/sync/src/raft_handle_vote_message.c @@ -36,7 +36,7 @@ int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { if (pRespMsg == NULL) { return 0; } - syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] %s for %d" \ + syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] %s for %d"\ "[logterm: %" PRId64 ", index: %" PRId64 "] at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, lastTerm, lastIndex, pRaft->voteFor, grant ? "grant" : "reject", diff --git a/source/libs/sync/src/raft_log.c b/source/libs/sync/src/raft_log.c index a26650cbb7..0654dbea6b 100644 --- a/source/libs/sync/src/raft_log.c +++ b/source/libs/sync/src/raft_log.c @@ -47,6 +47,10 @@ SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index) { return SYNC_NON_TERM; } +int syncRaftLogAppend(SSyncRaftLog* pLog, SSyncRaftEntry *pEntries, int n) { + +} + int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, SSyncRaftEntry **ppEntries, int *n) { return 0; diff --git a/source/libs/sync/src/raft_progress.c b/source/libs/sync/src/raft_progress.c index 8133b670ff..6edc808698 100644 --- a/source/libs/sync/src/raft_progress.c +++ b/source/libs/sync/src/raft_progress.c @@ -40,9 +40,15 @@ int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configur } */ -bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); +void syncRaftInitProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress) { + *progress = (SSyncRaftProgress) { + .matchIndex = progress->id == pRaft->selfId ? syncRaftLogLastIndex(pRaft->log) : 0, + .nextIndex = syncRaftLogLastIndex(pRaft->log) + 1, + //.inflights = + }; +} + +bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex) { bool updated = false; if (progress->matchIndex < lastIndex) { @@ -57,11 +63,8 @@ bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex) { return updated; } -bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i, +bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, SyncIndex rejected, SyncIndex lastIndex) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); - if (progress->state == PROGRESS_REPLICATE) { /** * the rejection must be stale if the progress has matched and "rejected" @@ -110,30 +113,19 @@ bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) { } } -void syncRaftProgressFailure(SSyncRaft* pRaft, int i) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); - +void syncRaftProgressFailure(SSyncRaftProgress* progress) { progress->pendingSnapshotIndex = 0; } -bool syncRaftProgressNeedAbortSnapshot(SSyncRaft* pRaft, int i) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); - +bool syncRaftProgressNeedAbortSnapshot(SSyncRaftProgress* progress) { return progress->state == PROGRESS_SNAPSHOT && progress->matchIndex >= progress->pendingSnapshotIndex; } -bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, int i) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); +bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress) { return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex; } -void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); +void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) { /** * If the original state is ProgressStateSnapshot, progress knows that * the pending snapshot has been sent to this peer successfully, then @@ -149,16 +141,12 @@ void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) { } } -void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); +void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress) { resetProgressState(progress, PROGRESS_REPLICATE); progress->nextIndex = progress->matchIndex + 1; } -void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotIndex) { - assert(i >= 0 && i < pRaft->leaderState.nProgress); - SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); +void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex) { resetProgressState(progress, PROGRESS_SNAPSHOT); progress->pendingSnapshotIndex = snapshotIndex; } diff --git a/source/libs/sync/src/raft_replication.c b/source/libs/sync/src/raft_replication.c index b6ff1fb329..473499b795 100644 --- a/source/libs/sync/src/raft_replication.c +++ b/source/libs/sync/src/raft_replication.c @@ -22,6 +22,7 @@ static int sendSnapshot(SSyncRaft* pRaft, int i); static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex index, SyncTerm term); int syncRaftReplicate(SSyncRaft* pRaft, int i) { +#if 0 assert(pRaft->state == TAOS_SYNC_ROLE_LEADER); assert(i >= 0 && i < pRaft->leaderState.nProgress); @@ -99,6 +100,8 @@ send_snapshot: prevTerm = syncRaftLogLastTerm(pRaft->log); return sendAppendEntries(pRaft, i, prevIndex, prevTerm); } +#endif + return 0; } static int sendSnapshot(SSyncRaft* pRaft, int i) { @@ -106,6 +109,7 @@ static int sendSnapshot(SSyncRaft* pRaft, int i) { } static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex prevIndex, SyncTerm prevTerm) { +#if 0 SyncIndex nextIndex = prevIndex + 1; SSyncRaftEntry *entries; int nEntry; @@ -139,5 +143,6 @@ static int sendAppendEntries(SSyncRaft* pRaft, int i, SyncIndex prevIndex, SyncT err_release_log: syncRaftLogRelease(pRaft->log, nextIndex, entries, nEntry); +#endif return 0; } \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_progress_tracker.c b/source/libs/sync/src/sync_raft_progress_tracker.c new file mode 100644 index 0000000000..d349cbb9b2 --- /dev/null +++ b/source/libs/sync/src/sync_raft_progress_tracker.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "sync_raft_progress_tracker.h" + +SSyncRaftProgressTracker* syncRaftOpenProgressTracker() { + SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)malloc(sizeof(SSyncRaftProgressTracker)); + if (tracker == NULL) { + return NULL; + } + + return tracker; +} + +void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) { + memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(SyncRaftVoteRespType) * TSDB_MAX_REPLICA); +} + +void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) { + int i; + for (i = 0; i < TSDB_MAX_REPLICA; ++i) { + SSyncRaftProgress* progress = &(tracker->progressMap[i]); + if (progress->id == SYNC_NON_NODE_ID) { + continue; + } + + visit(progress, arg); + } +} \ No newline at end of file From 5e9909a1e67013617037d387fe323ff84a251b7e Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 17:13:22 +0800 Subject: [PATCH 62/94] more --- include/util/amalloc.h | 52 ---------------- include/util/mallocator.h | 57 +++++++++++++++++ source/dnode/vnode/impl/inc/vnodeInt.h | 1 - source/util/src/arenaAllocator.c | 14 +++++ source/util/src/heapAllocator.c | 86 ++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 53 deletions(-) delete mode 100644 include/util/amalloc.h create mode 100644 include/util/mallocator.h create mode 100644 source/util/src/arenaAllocator.c create mode 100644 source/util/src/heapAllocator.c diff --git a/include/util/amalloc.h b/include/util/amalloc.h deleted file mode 100644 index 938e1caa4c..0000000000 --- a/include/util/amalloc.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_AMALLOC_H_ -#define _TD_AMALLOC_H_ - -#include "os.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define AMALLOC_APIS \ - void *(*malloc)(void *, size_t size); \ - void *(*calloc)(void *, size_t nmemb, size_t size); \ - void *(*realloc)(void *, size_t size); \ - void (*free)(void *ptr); - -// Interfaces to implement -typedef struct { - AMALLOC_APIS -} SMemAllocatorIf; - -typedef struct { - void *impl; - AMALLOC_APIS -} SMemAllocator; - -#define amalloc(allocator, size) ((allocator) ? (*((allocator)->malloc))((allocator)->impl, (size)) : malloc(size)) -#define acalloc(allocator, nmemb, size) \ - ((allocator) ? (*((allocator)->calloc))((allocator)->impl, (nmemb), (size)) : calloc((nmemb), (size))) -#define arealloc(allocator, ptr, size) \ - ((allocator) ? (*((allocator)->realloc))((allocator)->impl, (ptr), (size)) : realloc((ptr), (size))) -#define afree(allocator, ptr, size) ((allocator) ? (*((allocator)->free))((allocator)->impl, (ptr), (size)) : free(ptr)) - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_AMALLOC_H_*/ \ No newline at end of file diff --git a/include/util/mallocator.h b/include/util/mallocator.h new file mode 100644 index 0000000000..87cccdbedf --- /dev/null +++ b/include/util/mallocator.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_MALLOCATOR_H_ +#define _TD_MALLOCATOR_H_ + +#include "os.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SMemAllocator SMemAllocator; + +#define MALLOCATOR_APIS \ + void *(*malloc)(SMemAllocator *, size_t size); \ + void *(*calloc)(SMemAllocator *, size_t nmemb, size_t size); \ + void *(*realloc)(SMemAllocator *, size_t size); \ + void (*free)(SMemAllocator *, void *ptr); \ + size_t (*usage)(SMemAllocator *); + +// Interfaces to implement +typedef struct { + MALLOCATOR_APIS +} SMemAllocatorIf; + +struct SMemAllocator { + void * impl; + size_t usize; + MALLOCATOR_APIS +}; + +// heap allocator +SMemAllocator *tdCreateHeapAllocator(); +void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator); + +// arena allocator +SMemAllocator *tdCreateArenaAllocator(size_t size); +void tdDestroyArenaAllocator(SMemAllocator *); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_MALLOCATOR_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/inc/vnodeInt.h b/source/dnode/vnode/impl/inc/vnodeInt.h index 957690a451..48977ff046 100644 --- a/source/dnode/vnode/impl/inc/vnodeInt.h +++ b/source/dnode/vnode/impl/inc/vnodeInt.h @@ -18,7 +18,6 @@ #include "vnode.h" -#include "amalloc.h" #include "meta.h" #include "sync.h" #include "tlog.h" diff --git a/source/util/src/arenaAllocator.c b/source/util/src/arenaAllocator.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/util/src/arenaAllocator.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/util/src/heapAllocator.c b/source/util/src/heapAllocator.c new file mode 100644 index 0000000000..3b0d975e7e --- /dev/null +++ b/source/util/src/heapAllocator.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "mallocator.h" + +typedef struct { + char name[64]; +} SHeapAllocator; + +static SHeapAllocator *haNew(); +static void haDestroy(SHeapAllocator *pha); +static void * haMalloc(SMemAllocator *pMemAllocator, size_t size); +void * haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size); +static void haFree(SMemAllocator *pMemAllocator, void *ptr); + +SMemAllocator *tdCreateHeapAllocator() { + SMemAllocator *pMemAllocator = NULL; + + pMemAllocator = (SMemAllocator *)calloc(1, sizeof(*pMemAllocator)); + if (pMemAllocator == NULL) { + // TODO: handle error + return NULL; + } + + pMemAllocator->impl = haNew(); + if (pMemAllocator->impl == NULL) { + tdDestroyHeapAllocator(pMemAllocator); + return NULL; + } + + pMemAllocator->usage = 0; + pMemAllocator->malloc = haMalloc; + pMemAllocator->calloc = haCalloc; + pMemAllocator->realloc = NULL; + pMemAllocator->free = haFree; + pMemAllocator->usage = NULL; + + return pMemAllocator; +} + +void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator) { + if (pMemAllocator) { + // TODO + } +} + +/* ------------------------ STATIC METHODS ------------------------ */ +static SHeapAllocator *haNew() { + SHeapAllocator *pha = NULL; + /* TODO */ + return pha; +} + +static void haDestroy(SHeapAllocator *pha) { + // TODO +} + +static void *haMalloc(SMemAllocator *pMemAllocator, size_t size) { + void *ptr = NULL; + + ptr = malloc(size); + if (ptr) { + } + + return ptr; +} + +void *haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size) { + /* TODO */ + return NULL; +} + +static void haFree(SMemAllocator *pMemAllocator, void *ptr) { /* TODO */ +} \ No newline at end of file From 867b90728774995cfb1103bffb9d938cf5a19b23 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 8 Nov 2021 17:47:28 +0800 Subject: [PATCH 63/94] refact --- source/util/src/heapAllocator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/util/src/heapAllocator.c b/source/util/src/heapAllocator.c index 3b0d975e7e..645277b386 100644 --- a/source/util/src/heapAllocator.c +++ b/source/util/src/heapAllocator.c @@ -40,7 +40,6 @@ SMemAllocator *tdCreateHeapAllocator() { return NULL; } - pMemAllocator->usage = 0; pMemAllocator->malloc = haMalloc; pMemAllocator->calloc = haCalloc; pMemAllocator->realloc = NULL; From 9dae1f317bb44b135a1fa3f92c9de9bc60f6ec66 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Mon, 8 Nov 2021 18:23:26 +0800 Subject: [PATCH 64/94] add buffer io for tq --- source/dnode/vnode/tq/inc/tqMetaStore.h | 11 ++- source/dnode/vnode/tq/src/tqMetaStore.c | 86 +++++++++++++++++------ source/dnode/vnode/tq/test/tqMetaTest.cpp | 2 - 3 files changed, 75 insertions(+), 24 deletions(-) diff --git a/source/dnode/vnode/tq/inc/tqMetaStore.h b/source/dnode/vnode/tq/inc/tqMetaStore.h index 73a3d26aeb..2b3ddc8765 100644 --- a/source/dnode/vnode/tq/inc/tqMetaStore.h +++ b/source/dnode/vnode/tq/inc/tqMetaStore.h @@ -27,10 +27,17 @@ extern "C" { #define TQ_BUCKET_SIZE 0xFF #define TQ_PAGE_SIZE 4096 //key + offset + size -#define TQ_IDX_ENTRY_SIZE 24 +#define TQ_IDX_SIZE 24 +//4096 / 24 +#define TQ_MAX_IDX_ONE_PAGE 170 +//24 * 170 +#define TQ_IDX_PAGE_BODY_SIZE 4080 +//4096 - 4080 +#define TQ_IDX_PAGE_HEAD_SIZE 16 + inline static int TqMaxEntryOnePage() { //170 - return TQ_PAGE_SIZE / TQ_IDX_ENTRY_SIZE; + return TQ_PAGE_SIZE / TQ_IDX_SIZE; } inline static int TqEmptyTail() { //16 diff --git a/source/dnode/vnode/tq/src/tqMetaStore.c b/source/dnode/vnode/tq/src/tqMetaStore.c index 9b91a8e051..e99d98edb3 100644 --- a/source/dnode/vnode/tq/src/tqMetaStore.c +++ b/source/dnode/vnode/tq/src/tqMetaStore.c @@ -35,10 +35,38 @@ static inline void tqLinkUnpersist(TqMetaStore *pMeta, TqMetaList* pNode) { } } -typedef struct TqMetaPageBuf { - int16_t offset; - char buffer[TQ_PAGE_SIZE]; -} TqMetaPageBuf; +static inline int tqSeekLastPage(int fd) { + int offset = lseek(fd, 0, SEEK_END); + int pageNo = offset / TQ_PAGE_SIZE; + int curPageOffset = pageNo * TQ_PAGE_SIZE; + return lseek(fd, curPageOffset, SEEK_SET); +} + +//TODO: the struct is tightly coupled with index entry +typedef struct TqIdxPageHead { + int16_t writeOffset; + int8_t unused[14]; +} TqIdxPageHead; + +typedef struct TqIdxPageBuf { + TqIdxPageHead head; + char buffer[TQ_IDX_PAGE_BODY_SIZE]; +} TqIdxPageBuf; + +static inline int tqReadLastPage(int fd, TqIdxPageBuf* pBuf) { + int offset = tqSeekLastPage(fd); + int nBytes; + if((nBytes = read(fd, pBuf, TQ_PAGE_SIZE)) == -1) { + return -1; + } + if(nBytes == 0) { + memset(pBuf, 0, TQ_PAGE_SIZE); + pBuf->head.writeOffset = TQ_IDX_PAGE_HEAD_SIZE; + } + ASSERT(nBytes == 0 || nBytes == pBuf->head.writeOffset); + + return lseek(fd, offset, SEEK_SET); +} TqMetaStore* tqStoreOpen(const char* path, int serializer(const void* pObj, TqSerializedHead** ppHead), @@ -102,27 +130,31 @@ TqMetaStore* tqStoreOpen(const char* path, pMeta->deleter = deleter; //read idx file and load into memory - char idxBuf[TQ_PAGE_SIZE]; + /*char idxBuf[TQ_PAGE_SIZE];*/ + TqIdxPageBuf idxBuf; TqSerializedHead* serializedObj = malloc(TQ_PAGE_SIZE); if(serializedObj == NULL) { //TODO:memory insufficient } int idxRead; int allocated = TQ_PAGE_SIZE; - while((idxRead = read(idxFd, idxBuf, TQ_PAGE_SIZE))) { + bool readEnd = false; + while((idxRead = read(idxFd, &idxBuf, TQ_PAGE_SIZE))) { if(idxRead == -1) { //TODO: handle error ASSERT(false); } + ASSERT(idxBuf.head.writeOffset == idxRead); //loop read every entry - for(int i = 0; i < idxRead; i += TQ_IDX_ENTRY_SIZE) { + for(int i = 0; i < idxBuf.head.writeOffset - TQ_IDX_PAGE_HEAD_SIZE; i += TQ_IDX_SIZE) { TqMetaList *pNode = malloc(sizeof(TqMetaList)); if(pNode == NULL) { //TODO: free memory and return error } memset(pNode, 0, sizeof(TqMetaList)); - memcpy(&pNode->handle, &idxBuf[i], TQ_IDX_ENTRY_SIZE); - lseek(fileFd, pNode->handle.offset, SEEK_CUR); + memcpy(&pNode->handle, &idxBuf.buffer[i], TQ_IDX_SIZE); + + lseek(fileFd, pNode->handle.offset, SEEK_SET); if(allocated < pNode->handle.serializedSize) { void *ptr = realloc(serializedObj, pNode->handle.serializedSize); if(ptr == NULL) { @@ -263,8 +295,8 @@ int32_t tqStoreDelete(TqMetaStore* pMeta) { //TODO: wrap in tfile int32_t tqStorePersist(TqMetaStore* pMeta) { - char writeBuf[TQ_PAGE_SIZE]; - int64_t* bufPtr = (int64_t*)writeBuf; + TqIdxPageBuf idxBuf; + int64_t* bufPtr = (int64_t*)idxBuf.buffer; TqMetaList *pHead = pMeta->unpersistHead; TqMetaList *pNode = pHead->unpersistNext; TqSerializedHead *pSHead = malloc(sizeof(TqSerializedHead)); @@ -277,6 +309,17 @@ int32_t tqStorePersist(TqMetaStore* pMeta) { pSHead->ssize = sizeof(TqSerializedHead); int allocatedSize = sizeof(TqSerializedHead); int offset = lseek(pMeta->fileFd, 0, SEEK_CUR); + + tqReadLastPage(pMeta->idxFd, &idxBuf); + + if(idxBuf.head.writeOffset == TQ_PAGE_SIZE) { + lseek(pMeta->idxFd, 0, SEEK_END); + memset(&idxBuf, 0, TQ_PAGE_SIZE); + idxBuf.head.writeOffset = TQ_IDX_PAGE_HEAD_SIZE; + } else { + bufPtr = POINTER_SHIFT(&idxBuf, idxBuf.head.writeOffset); + } + while(pHead != pNode) { int nBytes = 0; @@ -307,18 +350,21 @@ int32_t tqStorePersist(TqMetaStore* pMeta) { ASSERT(nBytesTxn == pSHead->ssize); nBytes += nBytesTxn; } + pNode->handle.offset = offset; + offset += nBytes; //write idx file //TODO: endian check and convert *(bufPtr++) = pNode->handle.key; *(bufPtr++) = pNode->handle.offset; *(bufPtr++) = (int64_t)nBytes; - if((char*)(bufPtr + 3) > writeBuf + TQ_PAGE_SIZE) { - nBytes = write(pMeta->idxFd, writeBuf, sizeof(writeBuf)); + idxBuf.head.writeOffset += TQ_IDX_SIZE; + if(idxBuf.head.writeOffset >= TQ_PAGE_SIZE) { + nBytes = write(pMeta->idxFd, &idxBuf, TQ_PAGE_SIZE); //TODO: handle error with tfile - ASSERT(nBytes == sizeof(writeBuf)); - memset(writeBuf, 0, TQ_PAGE_SIZE); - bufPtr = (int64_t*)writeBuf; + ASSERT(nBytes == TQ_PAGE_SIZE); + memset(&idxBuf, 0, TQ_PAGE_SIZE); + bufPtr = (int64_t*)&idxBuf.buffer; } //remove from unpersist list pHead->unpersistNext = pNode->unpersistNext; @@ -350,11 +396,11 @@ int32_t tqStorePersist(TqMetaStore* pMeta) { //write left bytes free(pSHead); - if((char*)bufPtr != writeBuf) { - int used = (char*)bufPtr - writeBuf; - int nBytes = write(pMeta->idxFd, writeBuf, used); + //TODO: write new version in tfile + if((char*)bufPtr != idxBuf.buffer) { + int nBytes = write(pMeta->idxFd, &idxBuf, idxBuf.head.writeOffset); //TODO: handle error in tfile - ASSERT(nBytes == used); + ASSERT(nBytes == idxBuf.head.writeOffset); } //TODO: using fsync in tfile fsync(pMeta->idxFd); diff --git a/source/dnode/vnode/tq/test/tqMetaTest.cpp b/source/dnode/vnode/tq/test/tqMetaTest.cpp index 4bf56a0a56..d0511c2e2c 100644 --- a/source/dnode/vnode/tq/test/tqMetaTest.cpp +++ b/source/dnode/vnode/tq/test/tqMetaTest.cpp @@ -86,8 +86,6 @@ TEST_F(TqMetaTest, persistTest) { pBar = (Foo*)tqHandleGet(pMeta, 2); EXPECT_EQ(pBar == NULL, true); - - //taosRemoveDir(pathName); } TEST_F(TqMetaTest, uncommittedTest) { From e17f573e0e9f18b02a5cbf367c5f454bb0b0d9b8 Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 9 Nov 2021 10:53:08 +0800 Subject: [PATCH 65/94] [TD-10645][raft]add raft progress tracker --- source/libs/sync/inc/raft.h | 7 +- source/libs/sync/inc/raft_progress.h | 217 ---------------- source/libs/sync/inc/sync_raft_inflights.h | 77 ++++++ source/libs/sync/inc/sync_raft_progress.h | 235 ++++++++++++++++++ .../sync/inc/sync_raft_progress_tracker.h | 4 +- source/libs/sync/src/raft.c | 6 +- source/libs/sync/src/raft_replication.c | 2 +- source/libs/sync/src/sync_raft_inflights.c | 104 ++++++++ .../{raft_progress.c => sync_raft_progress.c} | 188 ++++++-------- .../sync/src/sync_raft_progress_tracker.c | 6 +- 10 files changed, 498 insertions(+), 348 deletions(-) delete mode 100644 source/libs/sync/inc/raft_progress.h create mode 100644 source/libs/sync/inc/sync_raft_inflights.h create mode 100644 source/libs/sync/inc/sync_raft_progress.h create mode 100644 source/libs/sync/src/sync_raft_inflights.c rename source/libs/sync/src/{raft_progress.c => sync_raft_progress.c} (59%) diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h index 795ea7cc99..c8bf63f81c 100644 --- a/source/libs/sync/inc/raft.h +++ b/source/libs/sync/inc/raft.h @@ -44,6 +44,7 @@ struct SSyncRaft { SSyncCluster cluster; + int selfIndex; SyncNodeId selfId; SyncGroupId selfGroupId; @@ -113,9 +114,6 @@ struct SSyncRaft { **/ uint16_t heartbeatElapsed; - // current tick count since start up - uint32_t currentTick; - bool preVote; bool checkQuorum; @@ -130,6 +128,9 @@ struct SSyncRaft { int randomizedElectionTimeout; bool disableProposalForwarding; + // current tick count since start up + uint32_t currentTick; + SyncRaftStepFp stepFp; SyncRaftTickFp tickFp; diff --git a/source/libs/sync/inc/raft_progress.h b/source/libs/sync/inc/raft_progress.h deleted file mode 100644 index 41d66d59d0..0000000000 --- a/source/libs/sync/inc/raft_progress.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_PROGRESS_H -#define TD_SYNC_RAFT_PROGRESS_H - -#include "sync_type.h" - -/** - * SSyncRaftInflights is a sliding window for the inflight messages. - * Thus inflight effectively limits both the number of inflight messages - * and the bandwidth each Progress can use. - * When inflights is full, no more message should be sent. - * When a leader sends out a message, the index of the last - * entry should be added to inflights. The index MUST be added - * into inflights in order. - * When a leader receives a reply, the previous inflights should - * be freed by calling syncRaftInflightFreeTo with the index of the last - * received entry. - **/ -typedef struct SSyncRaftInflights { - /* the starting index in the buffer */ - int start; - - /* number of inflights in the buffer */ - int count; - - /* the size of the buffer */ - int size; - - /** - * buffer contains the index of the last entry - * inside one message. - **/ - SyncIndex* buffer; -} SSyncRaftInflights; - -/** - * State defines how the leader should interact with the follower. - * - * When in PROGRESS_PROBE, leader sends at most one replication message - * per heartbeat interval. It also probes actual progress of the follower. - * - * When in PROGRESS_REPLICATE, leader optimistically increases next - * to the latest entry sent after sending replication message. This is - * an optimized state for fast replicating log entries to the follower. - * - * When in PROGRESS_SNAPSHOT, leader should have sent out snapshot - * before and stops sending any replication message. - * - * PROGRESS_PROBE is the initial state. - **/ -typedef enum RaftProgressState { - PROGRESS_PROBE = 0, - PROGRESS_REPLICATE, - PROGRESS_SNAPSHOT, -} RaftProgressState; - -/** - * Progress represents a follower’s progress in the view of the leader. Leader maintains - * progresses of all followers, and sends entries to the follower based on its progress. - **/ -struct SSyncRaftProgress { - SyncNodeId id; - - SyncIndex nextIndex; - - SyncIndex matchIndex; - - RaftProgressState state; - - /** - * paused is used in PROGRESS_PROBE. - * When paused is true, raft should pause sending replication message to this peer. - **/ - bool paused; - - // last send append message tick - uint32_t lastSendTick; - - /** - * pendingSnapshotIndex is used in PROGRESS_SNAPSHOT. - * If there is a pending snapshot, the pendingSnapshotIndex will be set to the - * index of the snapshot. If pendingSnapshotIndex is set, the replication process of - * this Progress will be paused. raft will not resend snapshot until the pending one - * is reported to be failed. - **/ - SyncIndex pendingSnapshotIndex; - - /** - * recentActive is true if the progress is recently active. Receiving any messages - * from the corresponding follower indicates the progress is active. - * RecentActive can be reset to false after an election timeout. - **/ - bool recentActive; - - /** - * flow control sliding window - **/ - SSyncRaftInflights inflights; - - // IsLearner is true if this progress is tracked for a learner. - bool isLearner; -}; - -void syncRaftInitProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress); - -/** - * syncRaftProgressMaybeUpdate returns false if the given lastIndex index comes from i-th node's log. - * Otherwise it updates the progress and returns true. - **/ -bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex); - -static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) { - progress->nextIndex = nextIndex + 1; -} - -/** - * syncRaftProgressMaybeDecrTo returns false if the given to index comes from an out of order message. - * Otherwise it decreases the progress next index to min(rejected, last) and returns true. - **/ -bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, - SyncIndex rejected, SyncIndex lastIndex); - -/** - * syncRaftProgressIsPaused returns whether sending log entries to this node has been - * paused. A node may be paused because it has rejected recent - * MsgApps, is currently waiting for a snapshot, or has reached the - * MaxInflightMsgs limit. - **/ -bool syncRaftProgressIsPaused(SSyncRaftProgress* progress); - -static FORCE_INLINE void syncRaftProgressPause(SSyncRaftProgress* progress) { - progress->paused = true; -} - -static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) { - return progress->nextIndex; -} - -static FORCE_INLINE RaftProgressState syncRaftProgressInReplicate(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_REPLICATE; -} - -static FORCE_INLINE RaftProgressState syncRaftProgressInSnapshot(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_SNAPSHOT; -} - -static FORCE_INLINE RaftProgressState syncRaftProgressInProbe(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_PROBE; -} - -static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progress) { - return progress->recentActive; -} - -static FORCE_INLINE bool syncRaftProgressUpdateSendTick(SSyncRaftProgress* progress, SyncTick current) { - return progress->lastSendTick = current; -} - -void syncRaftProgressFailure(SSyncRaftProgress* progress); - -bool syncRaftProgressNeedAbortSnapshot(SSyncRaftProgress* progress); - -/** - * return true if progress's log is up-todate - **/ -bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); - -void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress); - -void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress); - -void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); - -/* inflights APIs */ -int syncRaftInflightReset(SSyncRaftInflights* inflights); -bool syncRaftInflightFull(SSyncRaftInflights* inflights); -void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex); -void syncRaftInflightFreeTo(SSyncRaftInflights* inflights, SyncIndex toIndex); -void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights); - -#if 0 - -void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); - - - -SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i); - -void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i); - -void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i); - -bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i); - -void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i); - - - -void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); - -#endif - -#endif /* TD_SYNC_RAFT_PROGRESS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_inflights.h b/source/libs/sync/inc/sync_raft_inflights.h new file mode 100644 index 0000000000..6d249c9274 --- /dev/null +++ b/source/libs/sync/inc/sync_raft_inflights.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_INFLIGHTS_H +#define TD_SYNC_RAFT_INFLIGHTS_H + +#include "sync.h" + +/** + * SSyncRaftInflights limits the number of MsgApp (represented by the largest index + * contained within) sent to followers but not yet acknowledged by them. Callers + * use syncRaftInflightFull() to check whether more messages can be sent, + * call syncRaftInflightAdd() whenever they are sending a new append, + * and release "quota" via FreeLE() whenever an ack is received. +**/ +typedef struct SSyncRaftInflights { + /* the starting index in the buffer */ + int start; + + /* number of inflights in the buffer */ + int count; + + /* the size of the buffer */ + int size; + + /** + * buffer contains the index of the last entry + * inside one message. + **/ + SyncIndex* buffer; +} SSyncRaftInflights; + +SSyncRaftInflights* syncRaftOpenInflights(int size); +void syncRaftCloseInflights(SSyncRaftInflights*); + +static FORCE_INLINE void syncRaftInflightReset(SSyncRaftInflights* inflights) { + inflights->count = 0; + inflights->start = 0; +} + +static FORCE_INLINE bool syncRaftInflightFull(SSyncRaftInflights* inflights) { + return inflights->count == inflights->size; +} + +/** + * syncRaftInflightAdd notifies the Inflights that a new message with the given index is being + * dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd() + * to verify that there is room for one more message, + * and consecutive calls to add syncRaftInflightAdd() must provide a + * monotonic sequence of indexes. + **/ +void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex); + +/** + * syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight. + **/ +void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex); + +/** + * syncRaftInflightFreeFirstOne releases the first inflight. + * This is a no-op if nothing is inflight. + **/ +void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights); + +#endif /* TD_SYNC_RAFT_INFLIGHTS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_progress.h b/source/libs/sync/inc/sync_raft_progress.h new file mode 100644 index 0000000000..1f693219be --- /dev/null +++ b/source/libs/sync/inc/sync_raft_progress.h @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TD_SYNC_RAFT_PROGRESS_H +#define TD_SYNC_RAFT_PROGRESS_H + +#include "sync_type.h" +#include "sync_raft_inflights.h" + +/** + * State defines how the leader should interact with the follower. + * + * When in PROGRESS_STATE_PROBE, leader sends at most one replication message + * per heartbeat interval. It also probes actual progress of the follower. + * + * When in PROGRESS_STATE_REPLICATE, leader optimistically increases next + * to the latest entry sent after sending replication message. This is + * an optimized state for fast replicating log entries to the follower. + * + * When in PROGRESS_STATE_SNAPSHOT, leader should have sent out snapshot + * before and stops sending any replication message. + * + * PROGRESS_STATE_PROBE is the initial state. + **/ +typedef enum RaftProgressState { + /** + * StateProbe indicates a follower whose last index isn't known. Such a + * follower is "probed" (i.e. an append sent periodically) to narrow down + * its last index. In the ideal (and common) case, only one round of probing + * is necessary as the follower will react with a hint. Followers that are + * probed over extended periods of time are often offline. + **/ + PROGRESS_STATE_PROBE = 0, + + /** + * StateReplicate is the state steady in which a follower eagerly receives + * log entries to append to its log. + **/ + PROGRESS_STATE_REPLICATE, + + /** + * StateSnapshot indicates a follower that needs log entries not available + * from the leader's Raft log. Such a follower needs a full snapshot to + * return to StateReplicate. + **/ + PROGRESS_STATE_SNAPSHOT, +} RaftProgressState; + +/** + * Progress represents a follower’s progress in the view of the leader. Leader maintains + * progresses of all followers, and sends entries to the follower based on its progress. + **/ +struct SSyncRaftProgress { + SyncIndex nextIndex; + + SyncIndex matchIndex; + + /** + * State defines how the leader should interact with the follower. + * + * When in StateProbe, leader sends at most one replication message + * per heartbeat interval. It also probes actual progress of the follower. + * + * When in StateReplicate, leader optimistically increases next + * to the latest entry sent after sending replication message. This is + * an optimized state for fast replicating log entries to the follower. + * + * When in StateSnapshot, leader should have sent out snapshot + * before and stops sending any replication message. + **/ + RaftProgressState state; + + /** + * pendingSnapshotIndex is used in PROGRESS_STATE_SNAPSHOT. + * If there is a pending snapshot, the pendingSnapshotIndex will be set to the + * index of the snapshot. If pendingSnapshotIndex is set, the replication process of + * this Progress will be paused. raft will not resend snapshot until the pending one + * is reported to be failed. + **/ + SyncIndex pendingSnapshotIndex; + + /** + * recentActive is true if the progress is recently active. Receiving any messages + * from the corresponding follower indicates the progress is active. + * RecentActive can be reset to false after an election timeout. + **/ + bool recentActive; + + /** + * probeSent is used while this follower is in StateProbe. When probeSent is + * true, raft should pause sending replication message to this peer until + * probeSent is reset. See ProbeAcked() and IsPaused(). + **/ + bool probeSent; + + /** + * inflights is a sliding window for the inflight messages. + * Each inflight message contains one or more log entries. + * The max number of entries per message is defined in raft config as MaxSizePerMsg. + * Thus inflight effectively limits both the number of inflight messages + * and the bandwidth each Progress can use. + * When inflights is Full, no more message should be sent. + * When a leader sends out a message, the index of the last + * entry should be added to inflights. The index MUST be added + * into inflights in order. + * When a leader receives a reply, the previous inflights should + * be freed by calling inflights.FreeLE with the index of the last + * received entry. + **/ + SSyncRaftInflights* inflights; + + /** + * IsLearner is true if this progress is tracked for a learner. + **/ + bool isLearner; +}; + +void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress); + +/** + * syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or, + * optionally and if larger, the index of the pending snapshot. + **/ +void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress); + +/** + * syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1. + **/ +void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress); + +/** + * syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the + * index acked by it. The method returns false if the given n index comes from + * an outdated message. Otherwise it updates the progress and returns true. + **/ +bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex); + +/** + * syncRaftProgressOptimisticNextIndex signals that appends all the way up to and including index n + * are in-flight. As a result, Next is increased to n+1. + **/ +static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) { + progress->nextIndex = nextIndex + 1; +} + +/** + * syncRaftProgressMaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The + * arguments are the index of the append message rejected by the follower, and + * the hint that we want to decrease to. + * + * Rejections can happen spuriously as messages are sent out of order or + * duplicated. In such cases, the rejection pertains to an index that the + * Progress already knows were previously acknowledged, and false is returned + * without changing the Progress. + * + * If the rejection is genuine, Next is lowered sensibly, and the Progress is + * cleared for sending log entries. +**/ +bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, + SyncIndex rejected, SyncIndex matchHint); + +/** + * syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled. + * This is done when a node has rejected recent MsgApps, is currently waiting + * for a snapshot, or has reached the MaxInflightMsgs limit. In normal + * operation, this is false. A throttled node will be contacted less frequently + * until it has reached a state in which it's able to accept a steady stream of + * log entries again. + **/ +bool syncRaftProgressIsPaused(SSyncRaftProgress* progress); + +static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) { + return progress->nextIndex; +} + +static FORCE_INLINE RaftProgressState syncRaftProgressInReplicate(SSyncRaftProgress* progress) { + return progress->state == PROGRESS_STATE_REPLICATE; +} + +static FORCE_INLINE RaftProgressState syncRaftProgressInSnapshot(SSyncRaftProgress* progress) { + return progress->state == PROGRESS_STATE_SNAPSHOT; +} + +static FORCE_INLINE RaftProgressState syncRaftProgressInProbe(SSyncRaftProgress* progress) { + return progress->state == PROGRESS_STATE_PROBE; +} + +static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progress) { + return progress->recentActive; +} + +/** + * return true if progress's log is up-todate + **/ +bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); + +void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); + + + +#if 0 + +void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); + + + +SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i); + +void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i); + +void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i); + +bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i); + +void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i); + + + +void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); + +#endif + +#endif /* TD_SYNC_RAFT_PROGRESS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_progress_tracker.h b/source/libs/sync/inc/sync_raft_progress_tracker.h index ffc134fec4..40d43895c8 100644 --- a/source/libs/sync/inc/sync_raft_progress_tracker.h +++ b/source/libs/sync/inc/sync_raft_progress_tracker.h @@ -18,7 +18,7 @@ #include "sync_type.h" #include "sync_raft_quorum_joint.h" -#include "raft_progress.h" +#include "sync_raft_progress.h" struct SSyncRaftProgressTrackerConfig { SSyncRaftQuorumJointConfig voters; @@ -94,7 +94,7 @@ SSyncRaftProgressTracker* syncRaftOpenProgressTracker(); void syncRaftResetVotes(SSyncRaftProgressTracker*); -typedef void (*visitProgressFp)(SSyncRaftProgress* progress, void* arg); +typedef void (*visitProgressFp)(int i, SSyncRaftProgress* progress, void* arg); void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, void* arg); #endif /* _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H */ diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c index 4a3654131c..b43a35c03e 100644 --- a/source/libs/sync/src/raft.c +++ b/source/libs/sync/src/raft.c @@ -102,6 +102,8 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); + pRaft->selfIndex = pRaft->cluster.selfIndex; + syncInfo("[%d:%d] restore vgid %d state: snapshot index success", pRaft->selfGroupId, pRaft->selfId, pInfo->vgId); return 0; @@ -443,8 +445,8 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) { pRaft->leadTransferee = SYNC_NON_NODE_ID; } -static void initProgress(SSyncRaftProgress* progress, void* arg) { - syncRaftInitProgress((SSyncRaft*)arg, progress); +static void initProgress(int i, SSyncRaftProgress* progress, void* arg) { + syncRaftInitProgress(i, (SSyncRaft*)arg, progress); } static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { diff --git a/source/libs/sync/src/raft_replication.c b/source/libs/sync/src/raft_replication.c index 473499b795..3c7216239a 100644 --- a/source/libs/sync/src/raft_replication.c +++ b/source/libs/sync/src/raft_replication.c @@ -15,7 +15,7 @@ #include "raft.h" #include "raft_log.h" -#include "raft_progress.h" +#include "sync_raft_progress.h" #include "raft_replication.h" static int sendSnapshot(SSyncRaft* pRaft, int i); diff --git a/source/libs/sync/src/sync_raft_inflights.c b/source/libs/sync/src/sync_raft_inflights.c new file mode 100644 index 0000000000..3d740b5a9e --- /dev/null +++ b/source/libs/sync/src/sync_raft_inflights.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "sync_raft_inflights.h" + +SSyncRaftInflights* syncRaftOpenInflights(int size) { + SSyncRaftInflights* inflights = (SSyncRaftInflights*)malloc(sizeof(SSyncRaftInflights)); + if (inflights == NULL) { + return NULL; + } + SyncIndex* buffer = (SyncIndex*)malloc(sizeof(SyncIndex) * size); + if (buffer == NULL) { + free(inflights); + return NULL; + } + *inflights = (SSyncRaftInflights) { + .buffer = buffer, + .count = 0, + .size = 0, + .start = 0, + }; + + return inflights; +} + +void syncRaftCloseInflights(SSyncRaftInflights* inflights) { + free(inflights->buffer); + free(inflights); +} + +/** + * syncRaftInflightAdd notifies the Inflights that a new message with the given index is being + * dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd() + * to verify that there is room for one more message, + * and consecutive calls to add syncRaftInflightAdd() must provide a + * monotonic sequence of indexes. + **/ +void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) { + assert(!syncRaftInflightFull(inflights)); + + int next = inflights->start + inflights->count; + int size = inflights->size; + /* is next wrapped around buffer? */ + if (next >= size) { + next -= size; + } + + inflights->buffer[next] = inflightIndex; + inflights->count++; +} + +/** + * syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight. + **/ +void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) { + if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) { + /* out of the left side of the window */ + return; + } + + int i, idx; + for (i = 0, idx = inflights->start; i < inflights->count; i++) { + if (toIndex < inflights->buffer[idx]) { // found the first large inflight + break; + } + + // increase index and maybe rotate + int size = inflights->size; + idx++; + if (idx >= size) { + idx -= size; + } + } + + // free i inflights and set new start index + inflights->count -= i; + inflights->start = idx; + assert(inflights->count >= 0); + if (inflights->count == 0) { + // inflights is empty, reset the start index so that we don't grow the + // buffer unnecessarily. + inflights->start = 0; + } +} + +/** + * syncRaftInflightFreeFirstOne releases the first inflight. + * This is a no-op if nothing is inflight. + **/ +void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) { + syncRaftInflightFreeLE(inflights, inflights->buffer[inflights->start]); +} diff --git a/source/libs/sync/src/raft_progress.c b/source/libs/sync/src/sync_raft_progress.c similarity index 59% rename from source/libs/sync/src/raft_progress.c rename to source/libs/sync/src/sync_raft_progress.c index 6edc808698..ec98be7dfa 100644 --- a/source/libs/sync/src/raft_progress.c +++ b/source/libs/sync/src/sync_raft_progress.c @@ -15,57 +15,50 @@ #include "raft.h" #include "raft_log.h" -#include "raft_progress.h" +#include "sync_raft_progress.h" +#include "sync_raft_progress_tracker.h" #include "sync.h" #include "syncInt.h" static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state); +static void probeAcked(SSyncRaftProgress* progress); static void resumeProgress(SSyncRaftProgress* progress); -int syncRaftProgressCreate(SSyncRaft* pRaft) { - -/* - inflights->buffer = (SyncIndex*)malloc(sizeof(SyncIndex) * pRaft->maxInflightMsgs); - if (inflights->buffer == NULL) { - return RAFT_OOM; +void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress) { + SSyncRaftInflights* inflights = syncRaftOpenInflights(pRaft->tracker->maxInflight); + if (inflights == NULL) { + return; } - inflights->size = pRaft->maxInflightMsgs; -*/ -} - -/* -int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configuration) { - -} -*/ - -void syncRaftInitProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress) { *progress = (SSyncRaftProgress) { - .matchIndex = progress->id == pRaft->selfId ? syncRaftLogLastIndex(pRaft->log) : 0, + .matchIndex = i == pRaft->selfIndex ? syncRaftLogLastIndex(pRaft->log) : 0, .nextIndex = syncRaftLogLastIndex(pRaft->log) + 1, - //.inflights = + .inflights = inflights, }; } +/** + * syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the + * index acked by it. The method returns false if the given n index comes from + * an outdated message. Otherwise it updates the progress and returns true. + **/ bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex) { bool updated = false; if (progress->matchIndex < lastIndex) { progress->matchIndex = lastIndex; updated = true; - resumeProgress(progress); - } - if (progress->nextIndex < lastIndex + 1) { - progress->nextIndex = lastIndex + 1; + probeAcked(progress); } + progress->nextIndex = MAX(progress->nextIndex, lastIndex + 1); + return updated; } bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, - SyncIndex rejected, SyncIndex lastIndex) { - if (progress->state == PROGRESS_REPLICATE) { + SyncIndex rejected, SyncIndex matchHint) { + if (progress->state == PROGRESS_STATE_REPLICATE) { /** * the rejection must be stale if the progress has matched and "rejected" * is smaller than "match". @@ -77,143 +70,102 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, /* directly decrease next to match + 1 */ progress->nextIndex = progress->matchIndex + 1; - //syncRaftProgressBecomeProbe(raft, i); return true; } + /** + * The rejection must be stale if "rejected" does not match next - 1. This + * is because non-replicating followers are probed one entry at a time. + **/ if (rejected != progress->nextIndex - 1) { syncDebug("rejected index %" PRId64 " different from next index %" PRId64 " -> ignore" , rejected, progress->nextIndex); return false; } - progress->nextIndex = MIN(rejected, lastIndex + 1); - if (progress->nextIndex < 1) { - progress->nextIndex = 1; - } + progress->nextIndex = MAX(MIN(rejected, matchHint + 1), 1); - resumeProgress(progress); + progress->probeSent = false; return true; } -static void resumeProgress(SSyncRaftProgress* progress) { - progress->paused = false; -} - +/** + * syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled. + * This is done when a node has rejected recent MsgApps, is currently waiting + * for a snapshot, or has reached the MaxInflightMsgs limit. In normal + * operation, this is false. A throttled node will be contacted less frequently + * until it has reached a state in which it's able to accept a steady stream of + * log entries again. + **/ bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) { switch (progress->state) { - case PROGRESS_PROBE: - return progress->paused; - case PROGRESS_REPLICATE: - return syncRaftInflightFull(&progress->inflights); - case PROGRESS_SNAPSHOT: + case PROGRESS_STATE_PROBE: + return progress->probeSent; + case PROGRESS_STATE_REPLICATE: + return syncRaftInflightFull(progress->inflights); + case PROGRESS_STATE_SNAPSHOT: return true; default: syncFatal("error sync state:%d", progress->state); } } -void syncRaftProgressFailure(SSyncRaftProgress* progress) { - progress->pendingSnapshotIndex = 0; -} - -bool syncRaftProgressNeedAbortSnapshot(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_SNAPSHOT && progress->matchIndex >= progress->pendingSnapshotIndex; -} - bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress) { return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex; } +/** + * syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or, + * optionally and if larger, the index of the pending snapshot. + **/ void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) { /** * If the original state is ProgressStateSnapshot, progress knows that * the pending snapshot has been sent to this peer successfully, then * probes from pendingSnapshot + 1. **/ - if (progress->state == PROGRESS_SNAPSHOT) { + if (progress->state == PROGRESS_STATE_SNAPSHOT) { SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; - resetProgressState(progress, PROGRESS_PROBE); + resetProgressState(progress, PROGRESS_STATE_PROBE); progress->nextIndex = MAX(progress->matchIndex + 1, pendingSnapshotIndex + 1); } else { - resetProgressState(progress, PROGRESS_PROBE); + resetProgressState(progress, PROGRESS_STATE_PROBE); progress->nextIndex = progress->matchIndex + 1; } } +/** + * syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1. + **/ void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress) { - resetProgressState(progress, PROGRESS_REPLICATE); + resetProgressState(progress, PROGRESS_STATE_REPLICATE); progress->nextIndex = progress->matchIndex + 1; } void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex) { - resetProgressState(progress, PROGRESS_SNAPSHOT); + resetProgressState(progress, PROGRESS_STATE_SNAPSHOT); progress->pendingSnapshotIndex = snapshotIndex; } -int syncRaftInflightReset(SSyncRaftInflights* inflights) { - inflights->count = 0; - inflights->start = 0; - - return 0; -} - -bool syncRaftInflightFull(SSyncRaftInflights* inflights) { - return inflights->count == inflights->size; -} - -void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) { - assert(!syncRaftInflightFull(inflights)); - - int next = inflights->start + inflights->count; - int size = inflights->size; - /* is next wrapped around buffer? */ - if (next >= size) { - next -= size; - } - - inflights->buffer[next] = inflightIndex; - inflights->count++; -} - -void syncRaftInflightFreeTo(SSyncRaftInflights* inflights, SyncIndex toIndex) { - if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) { - return; - } - - int i, idx; - for (i = 0, idx = inflights->start; i < inflights->count; i++) { - if (toIndex < inflights->buffer[idx]) { - break; - } - - int size = inflights->size; - idx++; - if (idx >= size) { - idx -= size; - } - } - - inflights->count -= i; - inflights->start = idx; - assert(inflights->count >= 0); - if (inflights->count == 0) { - inflights->start = 0; - } -} - -void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) { - syncRaftInflightFreeTo(inflights, inflights->buffer[inflights->start]); -} - +/** + * ResetState moves the Progress into the specified State, resetting ProbeSent, + * PendingSnapshot, and Inflights. + **/ static void resetProgressState(SSyncRaftProgress* progress, RaftProgressState state) { - progress->paused = false; + progress->probeSent = false; progress->pendingSnapshotIndex = 0; progress->state = state; - syncRaftInflightReset(&(progress->inflights)); + syncRaftInflightReset(progress->inflights); } - +/** + * probeAcked is called when this peer has accepted an append. It resets + * ProbeSent to signal that additional append messages should be sent without + * further delay. + **/ +static void probeAcked(SSyncRaftProgress* progress) { + progress->probeSent = false; +} #if 0 @@ -250,33 +202,33 @@ bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i) { void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i) { SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); - resetProgressState(progress, PROGRESS_SNAPSHOT); + resetProgressState(progress, PROGRESS_STATE_SNAPSHOT); progress->pendingSnapshotIndex = raftLogSnapshotIndex(pRaft->log); } void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) { SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); - if (progress->state == PROGRESS_SNAPSHOT) { + if (progress->state == PROGRESS_STATE_SNAPSHOT) { assert(progress->pendingSnapshotIndex > 0); SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; - resetProgressState(progress, PROGRESS_PROBE); + resetProgressState(progress, PROGRESS_STATE_PROBE); progress->nextIndex = max(progress->matchIndex + 1, pendingSnapshotIndex); } else { - resetProgressState(progress, PROGRESS_PROBE); + resetProgressState(progress, PROGRESS_STATE_PROBE); progress->nextIndex = progress->matchIndex + 1; } } void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) { - resetProgressState(pRaft->leaderState.progress, PROGRESS_REPLICATE); + resetProgressState(pRaft->leaderState.progress, PROGRESS_STATE_REPLICATE); pRaft->leaderState.progress->nextIndex = pRaft->leaderState.progress->matchIndex + 1; } void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i) { SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); progress->pendingSnapshotIndex = 0; - progress->state = PROGRESS_PROBE; + progress->state = PROGRESS_STATE_PROBE; } RaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i) { diff --git a/source/libs/sync/src/sync_raft_progress_tracker.c b/source/libs/sync/src/sync_raft_progress_tracker.c index d349cbb9b2..7104794cbb 100644 --- a/source/libs/sync/src/sync_raft_progress_tracker.c +++ b/source/libs/sync/src/sync_raft_progress_tracker.c @@ -32,10 +32,6 @@ void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp vi int i; for (i = 0; i < TSDB_MAX_REPLICA; ++i) { SSyncRaftProgress* progress = &(tracker->progressMap[i]); - if (progress->id == SYNC_NON_NODE_ID) { - continue; - } - - visit(progress, arg); + visit(i, progress, arg); } } \ No newline at end of file From 17578d2427b98a2800a34827b7a0174a02629acb Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 9 Nov 2021 10:59:34 +0800 Subject: [PATCH 66/94] more --- include/util/mallocator.h | 8 +- source/util/src/arenaAllocator.c | 14 --- source/util/src/heapAllocator.c | 85 --------------- source/util/src/mallocator.c | 180 +++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 100 deletions(-) delete mode 100644 source/util/src/arenaAllocator.c delete mode 100644 source/util/src/heapAllocator.c create mode 100644 source/util/src/mallocator.c diff --git a/include/util/mallocator.h b/include/util/mallocator.h index 87cccdbedf..a4705bdd2c 100644 --- a/include/util/mallocator.h +++ b/include/util/mallocator.h @@ -27,7 +27,7 @@ typedef struct SMemAllocator SMemAllocator; #define MALLOCATOR_APIS \ void *(*malloc)(SMemAllocator *, size_t size); \ void *(*calloc)(SMemAllocator *, size_t nmemb, size_t size); \ - void *(*realloc)(SMemAllocator *, size_t size); \ + void *(*realloc)(SMemAllocator *, void *ptr, size_t size); \ void (*free)(SMemAllocator *, void *ptr); \ size_t (*usage)(SMemAllocator *); @@ -50,6 +50,12 @@ void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator); SMemAllocator *tdCreateArenaAllocator(size_t size); void tdDestroyArenaAllocator(SMemAllocator *); +#define mMalloc(pMemAllocator, size) (*(pMemAllocator->malloc))(pMemAllocator, size) +#define mCalloc(pMemAllocator, nmemb, size) (*(pMemAllocator->calloc))(pMemAllocator, nmemb, size) +#define mRealloc(pMemAllocator, ptr, size) (*(pMemAllocator->realloc))(pMemAllocator, ptr, size) +#define mFree(pMemAllocator, ptr) (*(pMemAllocator->free))(pMemAllocator, ptr) +#define mUsage(pMemAllocator) (*(pMemAllocator->usage))(pMemAllocator) + #ifdef __cplusplus } #endif diff --git a/source/util/src/arenaAllocator.c b/source/util/src/arenaAllocator.c deleted file mode 100644 index 6dea4a4e57..0000000000 --- a/source/util/src/arenaAllocator.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ \ No newline at end of file diff --git a/source/util/src/heapAllocator.c b/source/util/src/heapAllocator.c deleted file mode 100644 index 645277b386..0000000000 --- a/source/util/src/heapAllocator.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "mallocator.h" - -typedef struct { - char name[64]; -} SHeapAllocator; - -static SHeapAllocator *haNew(); -static void haDestroy(SHeapAllocator *pha); -static void * haMalloc(SMemAllocator *pMemAllocator, size_t size); -void * haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size); -static void haFree(SMemAllocator *pMemAllocator, void *ptr); - -SMemAllocator *tdCreateHeapAllocator() { - SMemAllocator *pMemAllocator = NULL; - - pMemAllocator = (SMemAllocator *)calloc(1, sizeof(*pMemAllocator)); - if (pMemAllocator == NULL) { - // TODO: handle error - return NULL; - } - - pMemAllocator->impl = haNew(); - if (pMemAllocator->impl == NULL) { - tdDestroyHeapAllocator(pMemAllocator); - return NULL; - } - - pMemAllocator->malloc = haMalloc; - pMemAllocator->calloc = haCalloc; - pMemAllocator->realloc = NULL; - pMemAllocator->free = haFree; - pMemAllocator->usage = NULL; - - return pMemAllocator; -} - -void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator) { - if (pMemAllocator) { - // TODO - } -} - -/* ------------------------ STATIC METHODS ------------------------ */ -static SHeapAllocator *haNew() { - SHeapAllocator *pha = NULL; - /* TODO */ - return pha; -} - -static void haDestroy(SHeapAllocator *pha) { - // TODO -} - -static void *haMalloc(SMemAllocator *pMemAllocator, size_t size) { - void *ptr = NULL; - - ptr = malloc(size); - if (ptr) { - } - - return ptr; -} - -void *haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size) { - /* TODO */ - return NULL; -} - -static void haFree(SMemAllocator *pMemAllocator, void *ptr) { /* TODO */ -} \ No newline at end of file diff --git a/source/util/src/mallocator.c b/source/util/src/mallocator.c new file mode 100644 index 0000000000..0bd8f0742a --- /dev/null +++ b/source/util/src/mallocator.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "mallocator.h" + +/* ------------------------ HEAP ALLOCATOR ------------------------ */ +typedef struct { + size_t tusage; +} SHeapAllocator; + +static void * haMalloc(SMemAllocator *pma, size_t size); +static void * haCalloc(SMemAllocator *pma, size_t nmemb, size_t size); +static void * haRealloc(SMemAllocator *pma, void *ptr, size_t size); +static void haFree(SMemAllocator *pma, void *ptr); +static size_t haUsage(SMemAllocator *pma); + +SMemAllocator *tdCreateHeapAllocator() { + SMemAllocator *pma = NULL; + + pma = calloc(1, sizeof(SMemAllocator) + sizeof(SHeapAllocator)); + if (pma) { + pma->impl = POINTER_SHIFT(pma, sizeof(SMemAllocator)); + pma->malloc = haMalloc; + pma->calloc = haCalloc; + pma->realloc = haRealloc; + pma->free = haFree; + pma->usage = haUsage; + } + + return pma; +} + +void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator) { + // TODO +} + +static void *haMalloc(SMemAllocator *pma, size_t size) { + void * ptr; + size_t tsize = size + sizeof(size_t); + SHeapAllocator *pha = (SHeapAllocator *)(pma->impl); + + ptr = malloc(tsize); + if (ptr) { + *(size_t *)ptr = size; + ptr = POINTER_SHIFT(ptr, sizeof(size_t)); + atomic_fetch_add_64(&(pha->tusage), tsize); + } + + return ptr; +} + +static void *haCalloc(SMemAllocator *pma, size_t nmemb, size_t size) { + void * ptr; + size_t tsize = nmemb * size; + + ptr = haMalloc(pma, tsize); + if (ptr) { + memset(ptr, 0, tsize); + } + + return ptr; +} + +static void *haRealloc(SMemAllocator *pma, void *ptr, size_t size) { + size_t psize; + size_t tsize = size + sizeof(size_t); + + if (ptr == NULL) { + psize = 0; + } else { + psize = *(size_t *)POINTER_SHIFT(ptr, -sizeof(size_t)); + } + + if (psize < size) { + // TODO + } else { + return ptr; + } +} + +static void haFree(SMemAllocator *pma, void *ptr) { /* TODO */ + SHeapAllocator *pha = (SHeapAllocator *)(pma->impl); + if (ptr) { + size_t tsize = *(size_t *)POINTER_SHIFT(ptr, -sizeof(size_t)) + sizeof(size_t); + atomic_fetch_sub_64(&(pha->tusage), tsize); + free(POINTER_SHIFT(ptr, -sizeof(size_t))); + } +} + +static size_t haUsage(SMemAllocator *pma) { return ((SHeapAllocator *)(pma->impl))->tusage; } + +/* ------------------------ ARENA ALLOCATOR ------------------------ */ +typedef struct { + size_t usage; +} SArenaAllocator; + +#if 0 +SMemAllocator *pDefaultMA; + +typedef struct { + char name[64]; +} SHeapAllocator; + +static SHeapAllocator *haNew(); +static void haDestroy(SHeapAllocator *pha); +static void * haMalloc(SMemAllocator *pMemAllocator, size_t size); +void * haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size); +static void haFree(SMemAllocator *pMemAllocator, void *ptr); + +SMemAllocator *tdCreateHeapAllocator() { + SMemAllocator *pMemAllocator = NULL; + + pMemAllocator = (SMemAllocator *)calloc(1, sizeof(*pMemAllocator)); + if (pMemAllocator == NULL) { + // TODO: handle error + return NULL; + } + + pMemAllocator->impl = haNew(); + if (pMemAllocator->impl == NULL) { + tdDestroyHeapAllocator(pMemAllocator); + return NULL; + } + + pMemAllocator->malloc = haMalloc; + pMemAllocator->calloc = haCalloc; + pMemAllocator->realloc = NULL; + pMemAllocator->free = haFree; + pMemAllocator->usage = NULL; + + return pMemAllocator; +} + +void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator) { + if (pMemAllocator) { + // TODO + } +} + +/* ------------------------ STATIC METHODS ------------------------ */ +static SHeapAllocator *haNew() { + SHeapAllocator *pha = NULL; + /* TODO */ + return pha; +} + +static void haDestroy(SHeapAllocator *pha) { + // TODO +} + +static void *haMalloc(SMemAllocator *pMemAllocator, size_t size) { + void *ptr = NULL; + + ptr = malloc(size); + if (ptr) { + } + + return ptr; +} + +void *haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size) { + /* TODO */ + return NULL; +} + +static void haFree(SMemAllocator *pMemAllocator, void *ptr) { /* TODO */ +} +#endif \ No newline at end of file From e495508337fbe21e45f37f0f6659d2cd4bd49be2 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 9 Nov 2021 11:00:47 +0800 Subject: [PATCH 67/94] more --- source/util/src/mallocator.c | 75 +----------------------------------- 1 file changed, 1 insertion(+), 74 deletions(-) diff --git a/source/util/src/mallocator.c b/source/util/src/mallocator.c index 0bd8f0742a..1819396ccd 100644 --- a/source/util/src/mallocator.c +++ b/source/util/src/mallocator.c @@ -104,77 +104,4 @@ static size_t haUsage(SMemAllocator *pma) { return ((SHeapAllocator *)(pma->impl /* ------------------------ ARENA ALLOCATOR ------------------------ */ typedef struct { size_t usage; -} SArenaAllocator; - -#if 0 -SMemAllocator *pDefaultMA; - -typedef struct { - char name[64]; -} SHeapAllocator; - -static SHeapAllocator *haNew(); -static void haDestroy(SHeapAllocator *pha); -static void * haMalloc(SMemAllocator *pMemAllocator, size_t size); -void * haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size); -static void haFree(SMemAllocator *pMemAllocator, void *ptr); - -SMemAllocator *tdCreateHeapAllocator() { - SMemAllocator *pMemAllocator = NULL; - - pMemAllocator = (SMemAllocator *)calloc(1, sizeof(*pMemAllocator)); - if (pMemAllocator == NULL) { - // TODO: handle error - return NULL; - } - - pMemAllocator->impl = haNew(); - if (pMemAllocator->impl == NULL) { - tdDestroyHeapAllocator(pMemAllocator); - return NULL; - } - - pMemAllocator->malloc = haMalloc; - pMemAllocator->calloc = haCalloc; - pMemAllocator->realloc = NULL; - pMemAllocator->free = haFree; - pMemAllocator->usage = NULL; - - return pMemAllocator; -} - -void tdDestroyHeapAllocator(SMemAllocator *pMemAllocator) { - if (pMemAllocator) { - // TODO - } -} - -/* ------------------------ STATIC METHODS ------------------------ */ -static SHeapAllocator *haNew() { - SHeapAllocator *pha = NULL; - /* TODO */ - return pha; -} - -static void haDestroy(SHeapAllocator *pha) { - // TODO -} - -static void *haMalloc(SMemAllocator *pMemAllocator, size_t size) { - void *ptr = NULL; - - ptr = malloc(size); - if (ptr) { - } - - return ptr; -} - -void *haCalloc(SMemAllocator *pMemAllocator, size_t nmemb, size_t size) { - /* TODO */ - return NULL; -} - -static void haFree(SMemAllocator *pMemAllocator, void *ptr) { /* TODO */ -} -#endif \ No newline at end of file +} SArenaAllocator; \ No newline at end of file From 1d7b85d93006ceb561154aebcb726abfe9e63722 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 9 Nov 2021 11:22:25 +0800 Subject: [PATCH 68/94] refact --- source/dnode/vnode/impl/inc/vnodeCommit.h | 5 ++-- source/dnode/vnode/impl/inc/vnodeDef.h | 4 ++- source/dnode/vnode/impl/src/vnodeCommit.c | 6 ++-- source/dnode/vnode/impl/src/vnodeWrite.c | 36 +++++++++++++++++++++-- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/source/dnode/vnode/impl/inc/vnodeCommit.h b/source/dnode/vnode/impl/inc/vnodeCommit.h index 544a42f0e8..c8ff4947aa 100644 --- a/source/dnode/vnode/impl/inc/vnodeCommit.h +++ b/source/dnode/vnode/impl/inc/vnodeCommit.h @@ -16,13 +16,14 @@ #ifndef _TD_VNODE_COMMIT_H_ #define _TD_VNODE_COMMIT_H_ -#include "vnodeInt.h" +#include "vnode.h" #ifdef __cplusplus extern "C" { #endif -int vnodeAsyncCommit(SVnode *pVnode); +bool vnodeShouldCommit(SVnode *pVnode); +int vnodeAsyncCommit(SVnode *pVnode); #ifdef __cplusplus } diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index 012e6fc5d1..62b8ea0b3a 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -16,10 +16,12 @@ #ifndef _TD_VNODE_DEF_H_ #define _TD_VNODE_DEF_H_ +#include "mallocator.h" #include "vnode.h" #include "vnodeAllocatorPool.h" #include "vnodeOptions.h" #include "vnodeStateMgr.h" +#include "vnodeCommit.h" #ifdef __cplusplus extern "C" { @@ -30,7 +32,7 @@ struct SVnode { SVnodeOptions options; SVState state; SVAllocatorPool pool; - SVMemAllocator* inuse; + SMemAllocator* inuse; SMeta* pMeta; STsdb* pTsdb; STQ* pTq; diff --git a/source/dnode/vnode/impl/src/vnodeCommit.c b/source/dnode/vnode/impl/src/vnodeCommit.c index 826589e8c9..18a0c6d91d 100644 --- a/source/dnode/vnode/impl/src/vnodeCommit.c +++ b/source/dnode/vnode/impl/src/vnodeCommit.c @@ -13,13 +13,15 @@ * along with this program. If not, see . */ -#include "vnodeInt.h" +#include "vnodeDef.h" static int vnodeStartCommit(SVnode *pVnode); static int vnodeEndCommit(SVnode *pVnode); +bool vnodeShouldCommit(SVnode *pVnode) { return false; } + int vnodeAsyncCommit(SVnode *pVnode) { - #if 0 +#if 0 if (vnodeStartCommit(pVnode) < 0) { // TODO } diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index 401c2add9c..3126633411 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -21,9 +21,39 @@ int vnodeProcessWriteReqs(SVnode *pVnode, SReqBatch *pReqBatch) { } int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest) { - int type; - /* TODO */ - return 0; + int reqType; /* TODO */ + size_t reqSize; /* TODO */ + int code = 0; + + // Copy the request to vnode buffer + SRequest *pReq = mMalloc(pVnode->inuse, reqSize); + if (pReq == NULL) { + // TODO: handle error + } + + // Push the request to TQ so consumers can consume + tqPushMsg(pVnode->pTq, pReq, 0); + + // Process the request + switch (reqType) { + case TSDB_MSG_TYPE_CREATE_TABLE: + code = metaCreateTable(pVnode->pMeta, NULL /* TODO */); + break; + case TSDB_MSG_TYPE_DROP_TABLE: + code = metaDropTable(pVnode->pMeta, 0 /* TODO */); + break; + /* TODO */ + default: + break; + } + + if (vnodeShouldCommit(pVnode)) { + if (vnodeAsyncCommit(pVnode) < 0) { + // TODO: handle error + } + } + + return code; } /* ------------------------ STATIC METHODS ------------------------ */ \ No newline at end of file From 694d8aa9c9679a53cdee1bef1c3032bcbe86ddba Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 9 Nov 2021 11:45:01 +0800 Subject: [PATCH 69/94] more --- source/dnode/vnode/impl/inc/vnodeDef.h | 3 ++- source/dnode/vnode/impl/inc/vnodeSync.h | 27 ++++++++++++++++++++++++ source/dnode/vnode/impl/src/vnodeSync.c | 14 ++++++++++++ source/dnode/vnode/impl/src/vnodeWrite.c | 2 ++ 4 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 source/dnode/vnode/impl/inc/vnodeSync.h create mode 100644 source/dnode/vnode/impl/src/vnodeSync.c diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index 62b8ea0b3a..3443c072d7 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -17,11 +17,12 @@ #define _TD_VNODE_DEF_H_ #include "mallocator.h" +#include "sync.h" #include "vnode.h" #include "vnodeAllocatorPool.h" +#include "vnodeCommit.h" #include "vnodeOptions.h" #include "vnodeStateMgr.h" -#include "vnodeCommit.h" #ifdef __cplusplus extern "C" { diff --git a/source/dnode/vnode/impl/inc/vnodeSync.h b/source/dnode/vnode/impl/inc/vnodeSync.h new file mode 100644 index 0000000000..7831c8ca80 --- /dev/null +++ b/source/dnode/vnode/impl/inc/vnodeSync.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_VNODE_SYNC_H_ +#define _TD_VNODE_SYNC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_SYNC_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeSync.c b/source/dnode/vnode/impl/src/vnodeSync.c new file mode 100644 index 0000000000..6dea4a4e57 --- /dev/null +++ b/source/dnode/vnode/impl/src/vnodeSync.c @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ \ No newline at end of file diff --git a/source/dnode/vnode/impl/src/vnodeWrite.c b/source/dnode/vnode/impl/src/vnodeWrite.c index 3126633411..764f034810 100644 --- a/source/dnode/vnode/impl/src/vnodeWrite.c +++ b/source/dnode/vnode/impl/src/vnodeWrite.c @@ -31,6 +31,8 @@ int vnodeApplyWriteRequest(SVnode *pVnode, const SRequest *pRequest) { // TODO: handle error } + memcpy(pReq, pRequest, reqSize); + // Push the request to TQ so consumers can consume tqPushMsg(pVnode->pTq, pReq, 0); From 79e5aeaa093e0fb6e2bcf91454c0784cdb9ccd2f Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 9 Nov 2021 13:24:22 +0800 Subject: [PATCH 70/94] refact --- include/server/vnode/impl/vnodeImpl.h | 41 ------------------------- include/server/vnode/vnode.h | 15 ++++++++- source/dnode/vnode/impl/inc/vnodeDef.h | 1 + source/dnode/vnode/impl/inc/vnodeSync.h | 4 +++ 4 files changed, 19 insertions(+), 42 deletions(-) delete mode 100644 include/server/vnode/impl/vnodeImpl.h diff --git a/include/server/vnode/impl/vnodeImpl.h b/include/server/vnode/impl/vnodeImpl.h deleted file mode 100644 index 1b09361bc9..0000000000 --- a/include/server/vnode/impl/vnodeImpl.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_VNODE_IMPL_H_ -#define _TD_VNODE_IMPL_H_ - -#include "os.h" -#include "trequest.h" - -#include "meta.h" -#include "tq.h" -#include "tsdb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct SVnodeOptions { - size_t wsize; - STsdbOptions tsdbOptions; - SMetaOptions metaOptions; - // STqOptions tqOptions; // TODO -}; - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_VNODE_IMPL_H_*/ \ No newline at end of file diff --git a/include/server/vnode/vnode.h b/include/server/vnode/vnode.h index 2b678751f6..44f8ffdc66 100644 --- a/include/server/vnode/vnode.h +++ b/include/server/vnode/vnode.h @@ -16,7 +16,12 @@ #ifndef _TD_VNODE_H_ #define _TD_VNODE_H_ -#include "impl/vnodeImpl.h" +#include "os.h" +#include "trequest.h" + +#include "meta.h" +#include "tq.h" +#include "tsdb.h" #ifdef __cplusplus extern "C" { @@ -39,6 +44,14 @@ int vnodeProcessSyncReq(SVnode *pVnode, SRequest *pReq); void vnodeOptionsInit(SVnodeOptions *); void vnodeOptionsClear(SVnodeOptions *); +/* ------------------------ STRUCT DEFINITIONS ------------------------ */ +struct SVnodeOptions { + size_t wsize; + STsdbOptions tsdbOptions; + SMetaOptions metaOptions; + // STqOptions tqOptions; // TODO +}; + #if 1 #include "taosmsg.h" diff --git a/source/dnode/vnode/impl/inc/vnodeDef.h b/source/dnode/vnode/impl/inc/vnodeDef.h index 3443c072d7..07a65ae4c0 100644 --- a/source/dnode/vnode/impl/inc/vnodeDef.h +++ b/source/dnode/vnode/impl/inc/vnodeDef.h @@ -23,6 +23,7 @@ #include "vnodeCommit.h" #include "vnodeOptions.h" #include "vnodeStateMgr.h" +#include "vnodeSync.h" #ifdef __cplusplus extern "C" { diff --git a/source/dnode/vnode/impl/inc/vnodeSync.h b/source/dnode/vnode/impl/inc/vnodeSync.h index 7831c8ca80..712958bf56 100644 --- a/source/dnode/vnode/impl/inc/vnodeSync.h +++ b/source/dnode/vnode/impl/inc/vnodeSync.h @@ -20,6 +20,10 @@ extern "C" { #endif +typedef struct { + /* data */ +} SVnodeSync; + #ifdef __cplusplus } #endif From 4b5030deceaa0246e9b810dc36e13f0cf45e3956 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 9 Nov 2021 13:37:11 +0800 Subject: [PATCH 71/94] refact --- docs/Doxyfile | 2579 +++++++++++++++++++++++ source/dnode/vnode/impl/inc/vnodeDef.h | 16 +- source/dnode/vnode/impl/src/vnodeRead.c | 3 +- 3 files changed, 2588 insertions(+), 10 deletions(-) create mode 100644 docs/Doxyfile diff --git a/docs/Doxyfile b/docs/Doxyfile new file mode 100644 index 0000000000..c3e86e0081 --- /dev/null +++ b/docs/Doxyfile @@ -0,0 +1,2579 @@ +# Doxyfile 1.8.17 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "TDengine 3.0" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = /mnt1/hzcheng/work/TDengine/debug + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all generated output in the proper direction. +# Possible values are: None, LTR, RTL and Context. +# The default value is: None. + +OUTPUT_TEXT_DIRECTION = None + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines (in the resulting output). You can put ^^ in the value part of an +# alias to insert a newline as if a physical newline was in the original file. +# When you need a literal { or } or , in the value part of an alias you have to +# escape them by means of a backslash (\), this can lead to conflicts with the +# commands \{ and \} for these it is advised to use the version @{ and @} or use +# a double escape (\\{ and \\}) + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, +# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat +# .inc files as Fortran files (default is PHP), and .f files as C (default is +# Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See https://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 5. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 5 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# declarations. If set to NO, these declarations will be included in the +# documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# (including Cygwin) ands Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. If +# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = include source + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: https://www.gnu.org/software/libiconv/) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), +# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen +# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.doc \ + *.txt \ + *.py \ + *.pyw \ + *.f90 \ + *.f95 \ + *.f03 \ + *.f08 \ + *.f \ + *.for \ + *.tcl \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.ice + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# entity all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see https://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +# If clang assisted parsing is enabled you can provide the clang parser with the +# path to the compilation database (see: +# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files +# were built. This is equivalent to specifying the "-p" option to a clang tool, +# such as clang-check. These options will then be passed to the parser. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. + +CLANG_DATABASE_PATH = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: https://developer.apple.com/xcode/), introduced with OSX +# 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANSPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. + +FORMULA_MACROFILE = + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# https://www.mathjax.org) which uses client side JavaScript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from https://www.mathjax.org before deployment. +# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /