enh: add sync offline state

This commit is contained in:
Shengliang Guan 2022-11-30 12:02:54 +08:00
parent 3850b58a3d
commit 5556fd0c45
6 changed files with 14 additions and 9 deletions

View File

@ -67,6 +67,7 @@ typedef struct SWal SWal;
typedef struct SSyncRaftEntry SSyncRaftEntry; typedef struct SSyncRaftEntry SSyncRaftEntry;
typedef enum { typedef enum {
TAOS_SYNC_STATE_OFFLINE = 0,
TAOS_SYNC_STATE_FOLLOWER = 100, TAOS_SYNC_STATE_FOLLOWER = 100,
TAOS_SYNC_STATE_CANDIDATE = 101, TAOS_SYNC_STATE_CANDIDATE = 101,
TAOS_SYNC_STATE_LEADER = 102, TAOS_SYNC_STATE_LEADER = 102,

View File

@ -150,7 +150,7 @@ static void dmGetServerRunStatus(SDnodeMgmt *pMgmt, SServerStatusRsp *pStatus) {
SServerStatusRsp statusRsp = {0}; SServerStatusRsp statusRsp = {0};
SMonMloadInfo minfo = {0}; SMonMloadInfo minfo = {0};
(*pMgmt->getMnodeLoadsFp)(&minfo); (*pMgmt->getMnodeLoadsFp)(&minfo);
if (minfo.isMnode && minfo.load.syncState == TAOS_SYNC_STATE_ERROR) { if (minfo.isMnode && (minfo.load.syncState == TAOS_SYNC_STATE_ERROR || minfo.load.syncState == TAOS_SYNC_STATE_OFFLINE)) {
pStatus->statusCode = TSDB_SRV_STATUS_SERVICE_DEGRADED; pStatus->statusCode = TSDB_SRV_STATUS_SERVICE_DEGRADED;
snprintf(pStatus->details, sizeof(pStatus->details), "mnode sync state is %s", syncStr(minfo.load.syncState)); snprintf(pStatus->details, sizeof(pStatus->details), "mnode sync state is %s", syncStr(minfo.load.syncState));
return; return;
@ -160,7 +160,7 @@ static void dmGetServerRunStatus(SDnodeMgmt *pMgmt, SServerStatusRsp *pStatus) {
(*pMgmt->getVnodeLoadsFp)(&vinfo); (*pMgmt->getVnodeLoadsFp)(&vinfo);
for (int32_t i = 0; i < taosArrayGetSize(vinfo.pVloads); ++i) { for (int32_t i = 0; i < taosArrayGetSize(vinfo.pVloads); ++i) {
SVnodeLoad *pLoad = taosArrayGet(vinfo.pVloads, i); SVnodeLoad *pLoad = taosArrayGet(vinfo.pVloads, i);
if (pLoad->syncState == TAOS_SYNC_STATE_ERROR) { if (pLoad->syncState == TAOS_SYNC_STATE_ERROR || pLoad->syncState == TAOS_SYNC_STATE_OFFLINE) {
pStatus->statusCode = TSDB_SRV_STATUS_SERVICE_DEGRADED; pStatus->statusCode = TSDB_SRV_STATUS_SERVICE_DEGRADED;
snprintf(pStatus->details, sizeof(pStatus->details), "vnode:%d sync state is %s", pLoad->vgId, snprintf(pStatus->details, sizeof(pStatus->details), "vnode:%d sync state is %s", pLoad->vgId,
syncStr(pLoad->syncState)); syncStr(pLoad->syncState));

View File

@ -151,10 +151,10 @@ static void mndSetVgroupOffline(SMnode *pMnode, int32_t dnodeId, int64_t curMs)
bool roleChanged = false; bool roleChanged = false;
for (int32_t vg = 0; vg < pVgroup->replica; ++vg) { for (int32_t vg = 0; vg < pVgroup->replica; ++vg) {
if (pVgroup->vnodeGid[vg].dnodeId == dnodeId) { if (pVgroup->vnodeGid[vg].dnodeId == dnodeId) {
if (pVgroup->vnodeGid[vg].syncState != TAOS_SYNC_STATE_ERROR) { if (pVgroup->vnodeGid[vg].syncState != TAOS_SYNC_STATE_OFFLINE) {
mInfo("vgId:%d, state changed by offline check, old state:%s restored:%d new state:error restored:0", mInfo("vgId:%d, state changed by offline check, old state:%s restored:%d new state:error restored:0",
pVgroup->vgId, syncStr(pVgroup->vnodeGid[vg].syncState), pVgroup->vnodeGid[vg].syncRestore); pVgroup->vgId, syncStr(pVgroup->vnodeGid[vg].syncState), pVgroup->vnodeGid[vg].syncRestore);
pVgroup->vnodeGid[vg].syncState = TAOS_SYNC_STATE_ERROR; pVgroup->vnodeGid[vg].syncState = TAOS_SYNC_STATE_OFFLINE;
pVgroup->vnodeGid[vg].syncRestore = 0; pVgroup->vnodeGid[vg].syncRestore = 0;
roleChanged = true; roleChanged = true;
} }
@ -756,7 +756,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr
tstrncpy(desc.status, "ready", sizeof(desc.status)); tstrncpy(desc.status, "ready", sizeof(desc.status));
pClusterInfo->vgroups_alive++; pClusterInfo->vgroups_alive++;
} }
if (pVgid->syncState != TAOS_SYNC_STATE_ERROR) { if (pVgid->syncState != TAOS_SYNC_STATE_ERROR && pVgid->syncState != TAOS_SYNC_STATE_OFFLINE) {
pClusterInfo->vnodes_alive++; pClusterInfo->vnodes_alive++;
} }
pClusterInfo->vnodes_total++; pClusterInfo->vnodes_total++;

View File

@ -185,7 +185,7 @@ static int32_t mndMnodeActionInsert(SSdb *pSdb, SMnodeObj *pObj) {
return -1; return -1;
} }
pObj->syncState = TAOS_SYNC_STATE_ERROR; pObj->syncState = TAOS_SYNC_STATE_OFFLINE;
mndReloadSyncConfig(pSdb->pMnode); mndReloadSyncConfig(pSdb->pMnode);
return 0; return 0;
} }

View File

@ -887,7 +887,7 @@ int32_t mndAddVnodeToVgroup(SMnode *pMnode, SVgObj *pVgroup, SArray *pArray) {
} }
pVgid->dnodeId = pDnode->id; pVgid->dnodeId = pDnode->id;
pVgid->syncState = TAOS_SYNC_STATE_ERROR; pVgid->syncState = TAOS_SYNC_STATE_OFFLINE;
mInfo("db:%s, vgId:%d, vn:%d is added, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64, mInfo("db:%s, vgId:%d, vn:%d is added, memory:%" PRId64 ", dnode:%d avail:%" PRId64 " used:%" PRId64,
pVgroup->dbName, pVgroup->vgId, pVgroup->replica, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed); pVgroup->dbName, pVgroup->vgId, pVgroup->replica, vgMem, pVgid->dnodeId, pDnode->memAvail, pDnode->memUsed);
@ -1193,7 +1193,7 @@ static int32_t mndAddIncVgroupReplicaToTrans(SMnode *pMnode, STrans *pTrans, SDb
SVnodeGid *pGid = &pVgroup->vnodeGid[pVgroup->replica]; SVnodeGid *pGid = &pVgroup->vnodeGid[pVgroup->replica];
pVgroup->replica++; pVgroup->replica++;
pGid->dnodeId = newDnodeId; pGid->dnodeId = newDnodeId;
pGid->syncState = TAOS_SYNC_STATE_ERROR; pGid->syncState = TAOS_SYNC_STATE_OFFLINE;
for (int32_t i = 0; i < pVgroup->replica - 1; ++i) { for (int32_t i = 0; i < pVgroup->replica - 1; ++i) {
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId) != 0) return -1; if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, pVgroup, pVgroup->vnodeGid[i].dnodeId) != 0) return -1;

View File

@ -2457,8 +2457,12 @@ const char* syncStr(ESyncState state) {
return "candidate"; return "candidate";
case TAOS_SYNC_STATE_LEADER: case TAOS_SYNC_STATE_LEADER:
return "leader"; return "leader";
default: case TAOS_SYNC_STATE_ERROR:
return "error"; return "error";
case TAOS_SYNC_STATE_OFFLINE:
return "offline";
default:
return "unknown";
} }
} }