Merge pull request #6442 from taosdata/fix/TD-4599

[TD-4599]<fix>: fix false dnode offline
This commit is contained in:
Shengliang Guan 2021-06-11 10:10:58 +08:00 committed by GitHub
commit 9f5943c46b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 16 additions and 22 deletions

View File

@ -220,10 +220,6 @@ int32_t bnAllocVnodes(SVgObj *pVgroup) {
} }
static bool bnCheckVgroupReady(SVgObj *pVgroup, SVnodeGid *pRmVnode) { static bool bnCheckVgroupReady(SVgObj *pVgroup, SVnodeGid *pRmVnode) {
if (pVgroup->lbTime + 5 * tsStatusInterval > tsAccessSquence) {
return false;
}
int32_t rmVnodeVer = 0; int32_t rmVnodeVer = 0;
for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) {
SVnodeGid *pVnode = pVgroup->vnodeGid + i; SVnodeGid *pVnode = pVgroup->vnodeGid + i;
@ -405,7 +401,7 @@ void bnReset() {
if (pDnode == NULL) break; if (pDnode == NULL) break;
// while master change, should reset dnode to offline // while master change, should reset dnode to offline
mInfo("dnode:%d set access:%d to 0", pDnode->dnodeId, pDnode->lastAccess); mInfo("dnode:%d set access:%" PRId64 " to 0", pDnode->dnodeId, pDnode->lastAccess);
pDnode->lastAccess = 0; pDnode->lastAccess = 0;
if (pDnode->status != TAOS_DN_STATUS_DROPPING) { if (pDnode->status != TAOS_DN_STATUS_DROPPING) {
pDnode->status = TAOS_DN_STATUS_OFFLINE; pDnode->status = TAOS_DN_STATUS_OFFLINE;
@ -499,7 +495,7 @@ static bool bnMontiorDropping() {
if (dnodeIsMasterEp(pDnode->dnodeEp)) continue; if (dnodeIsMasterEp(pDnode->dnodeEp)) continue;
if (mnodeGetDnodesNum() <= 1) continue; if (mnodeGetDnodesNum() <= 1) continue;
mLInfo("dnode:%d, set to removing state for it offline:%d seconds", pDnode->dnodeId, mLInfo("dnode:%d, set to removing state for it offline:%" PRId64 " seconds", pDnode->dnodeId,
tsAccessSquence - pDnode->lastAccess); tsAccessSquence - pDnode->lastAccess);
pDnode->status = TAOS_DN_STATUS_DROPPING; pDnode->status = TAOS_DN_STATUS_DROPPING;
@ -574,8 +570,8 @@ void bnCheckStatus() {
if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) { if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) {
pDnode->status = TAOS_DN_STATUS_OFFLINE; pDnode->status = TAOS_DN_STATUS_OFFLINE;
pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT; pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT;
mInfo("dnode:%d, set to offline state, access seq:%d last seq:%d laststat:%d", pDnode->dnodeId, tsAccessSquence, mInfo("dnode:%d, set to offline state, access seq:%" PRId64 " last seq:%" PRId64 " laststat:%d", pDnode->dnodeId,
pDnode->lastAccess, pDnode->status); tsAccessSquence, pDnode->lastAccess, pDnode->status);
bnSetVgroupOffline(pDnode); bnSetVgroupOffline(pDnode);
bnStartTimer(3000); bnStartTimer(3000);
} }

View File

@ -102,12 +102,12 @@ static void bnProcessTimer(void *handle, void *tmrId) {
if (tsBnThread.stop) return; if (tsBnThread.stop) return;
tsBnThread.timer = NULL; tsBnThread.timer = NULL;
tsAccessSquence++;
bnStartTimer(-1); bnStartTimer(-1);
bnCheckStatus(); bnCheckStatus();
if (handle == NULL) { if (handle == NULL) {
++tsAccessSquence;
if (tsAccessSquence % tsBalanceInterval == 0) { if (tsAccessSquence % tsBalanceInterval == 0) {
mDebug("balance function is scheduled by timer"); mDebug("balance function is scheduled by timer");
bnPostSignal(); bnPostSignal();
@ -122,8 +122,7 @@ static void bnProcessTimer(void *handle, void *tmrId) {
void bnStartTimer(int32_t mseconds) { void bnStartTimer(int32_t mseconds) {
if (tsBnThread.stop) return; if (tsBnThread.stop) return;
bool updateSoon = (mseconds != -1); if (mseconds != -1) {
if (updateSoon) {
mTrace("balance function will be called after %d ms", mseconds); mTrace("balance function will be called after %d ms", mseconds);
taosTmrReset(bnProcessTimer, mseconds, (void *)(int64_t)mseconds, tsMnodeTmr, &tsBnThread.timer); taosTmrReset(bnProcessTimer, mseconds, (void *)(int64_t)mseconds, tsMnodeTmr, &tsBnThread.timer);
} else { } else {
@ -132,5 +131,5 @@ void bnStartTimer(int32_t mseconds) {
} }
void bnNotify() { void bnNotify() {
bnStartTimer(500); bnStartTimer(500);
} }

View File

@ -48,9 +48,8 @@ typedef struct SDnodeObj {
int32_t dnodeId; int32_t dnodeId;
int32_t openVnodes; int32_t openVnodes;
int64_t createdTime; int64_t createdTime;
int32_t resever0; // from dnode status msg, config information int64_t lastAccess;
int32_t customScore; // config by user int32_t customScore; // config by user
uint32_t lastAccess;
uint16_t numOfCores; // from dnode status msg uint16_t numOfCores; // from dnode status msg
uint16_t dnodePort; uint16_t dnodePort;
char dnodeFqdn[TSDB_FQDN_LEN]; char dnodeFqdn[TSDB_FQDN_LEN];

View File

@ -77,7 +77,7 @@ void * mnodeGetDnodeByEp(char *ep);
void mnodeUpdateDnode(SDnodeObj *pDnode); void mnodeUpdateDnode(SDnodeObj *pDnode);
int32_t mnodeDropDnode(SDnodeObj *pDnode, void *pMsg); int32_t mnodeDropDnode(SDnodeObj *pDnode, void *pMsg);
extern int32_t tsAccessSquence; extern int64_t tsAccessSquence;
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -39,8 +39,8 @@
#include "mnodePeer.h" #include "mnodePeer.h"
#include "mnodeCluster.h" #include "mnodeCluster.h"
int32_t tsAccessSquence = 0; int64_t tsAccessSquence = 0;
int64_t tsDnodeRid = -1; int64_t tsDnodeRid = -1;
static void * tsDnodeSdb = NULL; static void * tsDnodeSdb = NULL;
static int32_t tsDnodeUpdateSize = 0; static int32_t tsDnodeUpdateSize = 0;
extern void * tsMnodeSdb; extern void * tsMnodeSdb;
@ -567,7 +567,7 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
mnodeGetClusterId()); mnodeGetClusterId());
return TSDB_CODE_MND_INVALID_CLUSTER_ID; return TSDB_CODE_MND_INVALID_CLUSTER_ID;
} else { } else {
mTrace("dnode:%d, status received, access times %d openVnodes:%d:%d", pDnode->dnodeId, pDnode->lastAccess, mTrace("dnode:%d, status received, access times %" PRId64 " openVnodes:%d:%d", pDnode->dnodeId, pDnode->lastAccess,
htons(pStatus->openVnodes), pDnode->openVnodes); htons(pStatus->openVnodes), pDnode->openVnodes);
} }
} }
@ -629,9 +629,9 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
bnNotify(); bnNotify();
} }
if (!tsEnableBalance) { int32_t numOfMnodes = mnodeGetMnodesNum();
int32_t numOfMnodes = mnodeGetMnodesNum(); if (numOfMnodes < tsNumOfMnodes && numOfMnodes < mnodeGetOnlineDnodesNum() && !pDnode->isMgmt) {
if (numOfMnodes < tsNumOfMnodes) bnNotify(); bnNotify();
} }
if (openVnodes != pDnode->openVnodes) { if (openVnodes != pDnode->openVnodes) {