process the status msg only once

This commit is contained in:
Shengliang Guan 2021-12-29 22:22:09 -08:00
parent 28359096b3
commit 62f620fa65
4 changed files with 66 additions and 59 deletions

View File

@ -397,7 +397,7 @@ void dndSendStatusMsg(SDnode *pDnode) {
static void dndUpdateDnodeCfg(SDnode *pDnode, SDnodeCfg *pCfg) { static void dndUpdateDnodeCfg(SDnode *pDnode, SDnodeCfg *pCfg) {
SDnodeMgmt *pMgmt = &pDnode->dmgmt; SDnodeMgmt *pMgmt = &pDnode->dmgmt;
if (pMgmt->dnodeId == 0) { if (pMgmt->dnodeId == 0) {
dInfo("set dnodeId:%d clusterId:% " PRId64, pCfg->dnodeId, pCfg->clusterId); dInfo("set dnodeId:%d clusterId:%" PRId64, pCfg->dnodeId, pCfg->clusterId);
taosWLockLatch(&pMgmt->latch); taosWLockLatch(&pMgmt->latch);
pMgmt->dnodeId = pCfg->dnodeId; pMgmt->dnodeId = pCfg->dnodeId;
pMgmt->clusterId = pCfg->clusterId; pMgmt->clusterId = pCfg->clusterId;
@ -440,19 +440,21 @@ static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pMsg) {
} }
SStatusRsp *pRsp = pMsg->pCont; SStatusRsp *pRsp = pMsg->pCont;
SDnodeCfg *pCfg = &pRsp->dnodeCfg; if (pMsg->pCont != NULL) {
pCfg->dnodeId = htonl(pCfg->dnodeId); SDnodeCfg *pCfg = &pRsp->dnodeCfg;
pCfg->clusterId = htobe64(pCfg->clusterId); pCfg->dnodeId = htonl(pCfg->dnodeId);
dndUpdateDnodeCfg(pDnode, pCfg); pCfg->clusterId = htobe64(pCfg->clusterId);
dndUpdateDnodeCfg(pDnode, pCfg);
SDnodeEps *pDnodeEps = &pRsp->dnodeEps; SDnodeEps *pDnodeEps = &pRsp->dnodeEps;
pDnodeEps->num = htonl(pDnodeEps->num); pDnodeEps->num = htonl(pDnodeEps->num);
for (int32_t i = 0; i < pDnodeEps->num; ++i) { for (int32_t i = 0; i < pDnodeEps->num; ++i) {
pDnodeEps->eps[i].id = htonl(pDnodeEps->eps[i].id); pDnodeEps->eps[i].id = htonl(pDnodeEps->eps[i].id);
pDnodeEps->eps[i].port = htons(pDnodeEps->eps[i].port); pDnodeEps->eps[i].port = htons(pDnodeEps->eps[i].port);
}
dndUpdateDnodeEps(pDnode, pDnodeEps);
} }
dndUpdateDnodeEps(pDnode, pDnodeEps);
pMgmt->statusSent = 0; pMgmt->statusSent = 0;
} }

View File

@ -28,7 +28,7 @@ SDnodeObj *mndAcquireDnode(SMnode *pMnode, int32_t dnodeId);
void mndReleaseDnode(SMnode *pMnode, SDnodeObj *pDnode); void mndReleaseDnode(SMnode *pMnode, SDnodeObj *pDnode);
SEpSet mndGetDnodeEpset(SDnodeObj *pDnode); SEpSet mndGetDnodeEpset(SDnodeObj *pDnode);
int32_t mndGetDnodeSize(SMnode *pMnode); int32_t mndGetDnodeSize(SMnode *pMnode);
bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode); bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode, int64_t curMs);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -206,9 +206,8 @@ int32_t mndGetDnodeSize(SMnode *pMnode) {
return sdbGetSize(pSdb, SDB_DNODE); return sdbGetSize(pSdb, SDB_DNODE);
} }
bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode) { bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode, int64_t curMs) {
int64_t ms = taosGetTimestampMs(); int64_t interval = ABS(pDnode->lastAccessTime - curMs);
int64_t interval = ABS(pDnode->lastAccessTime - ms);
if (interval > 3500 * pMnode->cfg.statusInterval) { if (interval > 3500 * pMnode->cfg.statusInterval) {
if (pDnode->rebootTime > 0) { if (pDnode->rebootTime > 0) {
pDnode->offlineReason = DND_REASON_STATUS_MSG_TIMEOUT; pDnode->offlineReason = DND_REASON_STATUS_MSG_TIMEOUT;
@ -313,33 +312,37 @@ static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg) {
} }
} }
if (pStatus->sver != pMnode->cfg.sver) { int64_t curMs = taosGetTimestampMs();
if (pDnode != NULL) { bool online = mndIsDnodeOnline(pMnode, pDnode, curMs);
pDnode->offlineReason = DND_REASON_VERSION_NOT_MATCH; bool needCheckCfg = !(online && pDnode->rebootTime == pStatus->rebootTime);
}
mError("dnode:%d, status msg version:%d not match cluster:%d", pStatus->dnodeId, pStatus->sver, pMnode->cfg.sver);
terrno = TSDB_CODE_MND_INVALID_MSG_VERSION;
goto PROCESS_STATUS_MSG_OVER;
}
if (pStatus->dnodeId == 0) { if (needCheckCfg) {
mDebug("dnode:%d %s, first access, set clusterId %" PRId64, pDnode->id, pDnode->ep, pMnode->clusterId); if (pStatus->sver != pMnode->cfg.sver) {
} else {
if (pStatus->clusterId != pMnode->clusterId) {
if (pDnode != NULL) { if (pDnode != NULL) {
pDnode->offlineReason = DND_REASON_CLUSTER_ID_NOT_MATCH; pDnode->offlineReason = DND_REASON_VERSION_NOT_MATCH;
} }
mError("dnode:%d, clusterId %" PRId64 " not match exist %" PRId64, pDnode->id, pStatus->clusterId, mError("dnode:%d, status msg version:%d not match cluster:%d", pStatus->dnodeId, pStatus->sver, pMnode->cfg.sver);
pMnode->clusterId); terrno = TSDB_CODE_MND_INVALID_MSG_VERSION;
terrno = TSDB_CODE_MND_INVALID_CLUSTER_ID;
goto PROCESS_STATUS_MSG_OVER; goto PROCESS_STATUS_MSG_OVER;
} else {
pDnode->accessTimes++;
mTrace("dnode:%d, status received, access times %d", pDnode->id, pDnode->accessTimes);
} }
}
if (/*pDnode->status == DND_STATUS_OFFLINE*/1) { if (pStatus->dnodeId == 0) {
mDebug("dnode:%d %s, first access, set clusterId %" PRId64, pDnode->id, pDnode->ep, pMnode->clusterId);
} else {
if (pStatus->clusterId != pMnode->clusterId) {
if (pDnode != NULL) {
pDnode->offlineReason = DND_REASON_CLUSTER_ID_NOT_MATCH;
}
mError("dnode:%d, clusterId %" PRId64 " not match exist %" PRId64, pDnode->id, pStatus->clusterId,
pMnode->clusterId);
terrno = TSDB_CODE_MND_INVALID_CLUSTER_ID;
goto PROCESS_STATUS_MSG_OVER;
} else {
pDnode->accessTimes++;
mTrace("dnode:%d, status received, access times %d", pDnode->id, pDnode->accessTimes);
}
}
// Verify whether the cluster parameters are consistent when status change from offline to ready // Verify whether the cluster parameters are consistent when status change from offline to ready
int32_t ret = mndCheckClusterCfgPara(pMnode, &pStatus->clusterCfg); int32_t ret = mndCheckClusterCfgPara(pMnode, &pStatus->clusterCfg);
if (0 != ret) { if (0 != ret) {
@ -350,28 +353,28 @@ static int32_t mndProcessStatusMsg(SMnodeMsg *pMsg) {
} }
mInfo("dnode:%d, from offline to online", pDnode->id); mInfo("dnode:%d, from offline to online", pDnode->id);
pDnode->rebootTime = pStatus->rebootTime;
pDnode->numOfCores = pStatus->numOfCores;
pDnode->numOfSupportVnodes = pStatus->numOfSupportVnodes;
int32_t numOfEps = mndGetDnodeSize(pMnode);
int32_t contLen = sizeof(SStatusRsp) + numOfEps * sizeof(SDnodeEp);
SStatusRsp *pRsp = rpcMallocCont(contLen);
if (pRsp == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto PROCESS_STATUS_MSG_OVER;
}
pRsp->dnodeCfg.dnodeId = htonl(pDnode->id);
pRsp->dnodeCfg.clusterId = htobe64(pMnode->clusterId);
mndGetDnodeData(pMnode, &pRsp->dnodeEps, numOfEps);
pMsg->contLen = contLen;
pMsg->pCont = pRsp;
} }
pDnode->rebootTime = pStatus->rebootTime; pDnode->lastAccessTime = curMs;
pDnode->numOfCores = pStatus->numOfCores;
pDnode->numOfSupportVnodes = pStatus->numOfSupportVnodes;
pDnode->lastAccessTime = taosGetTimestampMs();
int32_t numOfEps = mndGetDnodeSize(pMnode);
int32_t contLen = sizeof(SStatusRsp) + numOfEps * sizeof(SDnodeEp);
SStatusRsp *pRsp = rpcMallocCont(contLen);
if (pRsp == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto PROCESS_STATUS_MSG_OVER;
}
pRsp->dnodeCfg.dnodeId = htonl(pDnode->id);
pRsp->dnodeCfg.clusterId = htobe64(pMnode->clusterId);
// mndGetDnodeData(pMnode, &pRsp->dnodeEps, numOfEps);
pMsg->contLen = contLen;
pMsg->pCont = pRsp;
code = 0; code = 0;
PROCESS_STATUS_MSG_OVER: PROCESS_STATUS_MSG_OVER:
@ -682,11 +685,12 @@ static int32_t mndRetrieveDnodes(SMnodeMsg *pMsg, SShowObj *pShow, char *data, i
int32_t cols = 0; int32_t cols = 0;
SDnodeObj *pDnode = NULL; SDnodeObj *pDnode = NULL;
char *pWrite; char *pWrite;
int64_t curMs = taosGetTimestampMs();
while (numOfRows < rows) { while (numOfRows < rows) {
pShow->pIter = sdbFetch(pSdb, SDB_DNODE, pShow->pIter, (void **)&pDnode); pShow->pIter = sdbFetch(pSdb, SDB_DNODE, pShow->pIter, (void **)&pDnode);
if (pShow->pIter == NULL) break; if (pShow->pIter == NULL) break;
bool online = mndIsDnodeOnline(pMnode, pDnode); bool online = mndIsDnodeOnline(pMnode, pDnode, curMs);
cols = 0; cols = 0;

View File

@ -260,7 +260,8 @@ static SArray *mndBuildDnodesArray(SMnode *pMnode) {
pDnode->numOfVnodes++; pDnode->numOfVnodes++;
} }
bool online = mndIsDnodeOnline(pMnode, pDnode); int64_t curMs = taosGetTimestampMs();
bool online = mndIsDnodeOnline(pMnode, pDnode, curMs);
if (online) { if (online) {
taosArrayPush(pArray, pDnode); taosArrayPush(pArray, pDnode);
} }