[TD-4599]<fix>: fix false dnode offline

This commit is contained in:
Minglei Jin 2021-06-09 20:31:11 +08:00
parent 3bf8a5dd7e
commit 490caa967b
5 changed files with 18 additions and 19 deletions

View File

@ -405,7 +405,7 @@ void bnReset() {
if (pDnode == NULL) break; if (pDnode == NULL) break;
// while master change, should reset dnode to offline // while master change, should reset dnode to offline
mInfo("dnode:%d set access:%d to 0", pDnode->dnodeId, pDnode->lastAccess); mInfo("dnode:%d set access:%" PRId64 " to 0", pDnode->dnodeId, pDnode->lastAccess);
pDnode->lastAccess = 0; pDnode->lastAccess = 0;
if (pDnode->status != TAOS_DN_STATUS_DROPPING) { if (pDnode->status != TAOS_DN_STATUS_DROPPING) {
pDnode->status = TAOS_DN_STATUS_OFFLINE; pDnode->status = TAOS_DN_STATUS_OFFLINE;
@ -499,7 +499,7 @@ static bool bnMontiorDropping() {
if (dnodeIsMasterEp(pDnode->dnodeEp)) continue; if (dnodeIsMasterEp(pDnode->dnodeEp)) continue;
if (mnodeGetDnodesNum() <= 1) continue; if (mnodeGetDnodesNum() <= 1) continue;
mLInfo("dnode:%d, set to removing state for it offline:%d seconds", pDnode->dnodeId, mLInfo("dnode:%d, set to removing state for it offline:%" PRId64 " seconds", pDnode->dnodeId,
tsAccessSquence - pDnode->lastAccess); tsAccessSquence - pDnode->lastAccess);
pDnode->status = TAOS_DN_STATUS_DROPPING; pDnode->status = TAOS_DN_STATUS_DROPPING;
@ -574,8 +574,8 @@ void bnCheckStatus() {
if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) { if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) {
pDnode->status = TAOS_DN_STATUS_OFFLINE; pDnode->status = TAOS_DN_STATUS_OFFLINE;
pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT; pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT;
mInfo("dnode:%d, set to offline state, access seq:%d last seq:%d laststat:%d", pDnode->dnodeId, tsAccessSquence, mInfo("dnode:%d, set to offline state, access seq:%" PRId64 " last seq:%" PRId64 " laststat:%d", pDnode->dnodeId,
pDnode->lastAccess, pDnode->status); tsAccessSquence, pDnode->lastAccess, pDnode->status);
bnSetVgroupOffline(pDnode); bnSetVgroupOffline(pDnode);
bnStartTimer(3000); bnStartTimer(3000);
} }

View File

@ -101,13 +101,13 @@ static void bnProcessTimer(void *handle, void *tmrId) {
if (!sdbIsMaster()) return; if (!sdbIsMaster()) return;
if (tsBnThread.stop) return; if (tsBnThread.stop) return;
tsBnThread.timer = NULL;
tsAccessSquence++;
bnStartTimer(-1);
bnCheckStatus();
if (handle == NULL) { if (handle == NULL) {
tsBnThread.timer = NULL;
++tsAccessSquence;
bnStartTimer(-1);
bnCheckStatus();
if (tsAccessSquence % tsBalanceInterval == 0) { if (tsAccessSquence % tsBalanceInterval == 0) {
mDebug("balance function is scheduled by timer"); mDebug("balance function is scheduled by timer");
bnPostSignal(); bnPostSignal();

View File

@ -48,9 +48,8 @@ typedef struct SDnodeObj {
int32_t dnodeId; int32_t dnodeId;
int32_t openVnodes; int32_t openVnodes;
int64_t createdTime; int64_t createdTime;
int32_t resever0; // from dnode status msg, config information int64_t lastAccess;
int32_t customScore; // config by user int32_t customScore; // config by user
uint32_t lastAccess;
uint16_t numOfCores; // from dnode status msg uint16_t numOfCores; // from dnode status msg
uint16_t dnodePort; uint16_t dnodePort;
char dnodeFqdn[TSDB_FQDN_LEN]; char dnodeFqdn[TSDB_FQDN_LEN];

View File

@ -77,7 +77,7 @@ void * mnodeGetDnodeByEp(char *ep);
void mnodeUpdateDnode(SDnodeObj *pDnode); void mnodeUpdateDnode(SDnodeObj *pDnode);
int32_t mnodeDropDnode(SDnodeObj *pDnode, void *pMsg); int32_t mnodeDropDnode(SDnodeObj *pDnode, void *pMsg);
extern int32_t tsAccessSquence; extern int64_t tsAccessSquence;
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -39,8 +39,8 @@
#include "mnodePeer.h" #include "mnodePeer.h"
#include "mnodeCluster.h" #include "mnodeCluster.h"
int32_t tsAccessSquence = 0; int64_t tsAccessSquence = 0;
int64_t tsDnodeRid = -1; int64_t tsDnodeRid = -1;
static void * tsDnodeSdb = NULL; static void * tsDnodeSdb = NULL;
static int32_t tsDnodeUpdateSize = 0; static int32_t tsDnodeUpdateSize = 0;
extern void * tsMnodeSdb; extern void * tsMnodeSdb;
@ -567,7 +567,7 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
mnodeGetClusterId()); mnodeGetClusterId());
return TSDB_CODE_MND_INVALID_CLUSTER_ID; return TSDB_CODE_MND_INVALID_CLUSTER_ID;
} else { } else {
mTrace("dnode:%d, status received, access times %d openVnodes:%d:%d", pDnode->dnodeId, pDnode->lastAccess, mTrace("dnode:%d, status received, access times %" PRId64 " openVnodes:%d:%d", pDnode->dnodeId, pDnode->lastAccess,
htons(pStatus->openVnodes), pDnode->openVnodes); htons(pStatus->openVnodes), pDnode->openVnodes);
} }
} }
@ -629,9 +629,9 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
bnNotify(); bnNotify();
} }
if (!tsEnableBalance) { int32_t numOfMnodes = mnodeGetMnodesNum();
int32_t numOfMnodes = mnodeGetMnodesNum(); if (numOfMnodes < tsNumOfMnodes && numOfMnodes < mnodeGetOnlineDnodesNum()) {
if (numOfMnodes < tsNumOfMnodes) bnNotify(); bnNotify();
} }
if (openVnodes != pDnode->openVnodes) { if (openVnodes != pDnode->openVnodes) {