From 95ca88432f92e586956ef452ac65e02c38d8a0e5 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sat, 11 Jun 2022 10:58:57 +0800 Subject: [PATCH 1/3] fix: save sync config in mnode --- source/dnode/mnode/sdb/src/sdb.c | 5 ++++- source/dnode/mnode/sdb/src/sdbFile.c | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index c11aaaaa8a..060bba0ce4 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -163,7 +163,10 @@ void sdbSetApplyIndex(SSdb *pSdb, int64_t index) { pSdb->curVer = index; } void sdbSetApplyTerm(SSdb *pSdb, int64_t term) { pSdb->curTerm = term; } -void sdbSetCurConfig(SSdb *pSdb, int64_t config) { pSdb->curConfig = config; } +void sdbSetCurConfig(SSdb *pSdb, int64_t config) { + mInfo("mnode sync config set from %" PRId64 " to %" PRId64, pSdb->curConfig, config); + pSdb->curConfig = config; +} int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index b32abc3eaa..9a0afb51ea 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -432,8 +432,8 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { } else { pSdb->lastCommitVer = pSdb->curVer; pSdb->lastCommitTerm = pSdb->curTerm; - mDebug("write sdb file successfully, ver:%" PRId64 " term:%" PRId64 " file:%s", pSdb->lastCommitVer, - pSdb->lastCommitTerm, curfile); + mDebug("write sdb file successfully, index:%" PRId64 " term:%" PRId64 " config:%" PRId64 " file:%s", + pSdb->lastCommitVer, pSdb->lastCommitTerm, pSdb->curConfig, curfile); } terrno = code; From d702cb1486fa3353294a2ef0b6a34a7c800e921d Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sat, 11 Jun 2022 12:55:16 +0800 Subject: [PATCH 2/3] refactor: adjust mnode log --- source/dnode/mnode/impl/src/mndSync.c | 4 ++-- source/dnode/mnode/impl/src/mndTrans.c | 6 +++--- source/dnode/mnode/sdb/src/sdb.c | 6 ++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index a0f722b3d2..adc86df829 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -200,14 +200,14 @@ int32_t mndInitSync(SMnode *pMnode) { return -1; } - mDebug("mnode sync is opened, id:%" PRId64, pMgmt->sync); + mDebug("mnode-sync is opened, id:%" PRId64, pMgmt->sync); return 0; } void mndCleanupSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; syncStop(pMgmt->sync); - mDebug("mnode sync is stopped, id:%" PRId64, pMgmt->sync); + mDebug("mnode-sync is stopped, id:%" PRId64, pMgmt->sync); tsem_destroy(&pMgmt->syncSem); memset(pMgmt, 0, sizeof(SSyncMgmt)); diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index a689c89037..310f2fffbc 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -297,7 +297,7 @@ static SSdbRow *mndTransActionDecode(SSdbRaw *pRaw) { SDB_GET_INT32(pRaw, dataPos, &dataLen, _OVER) action.pRaw = taosMemoryMalloc(dataLen); if (action.pRaw == NULL) goto _OVER; - mTrace("raw:%p, is created", pData); + // mTrace("raw:%p, is created", pData); SDB_GET_BINARY(pRaw, dataPos, (void *)action.pRaw, dataLen, _OVER); if (taosArrayPush(pTrans->redoActions, &action) == NULL) goto _OVER; action.pRaw = NULL; @@ -330,7 +330,7 @@ static SSdbRow *mndTransActionDecode(SSdbRaw *pRaw) { SDB_GET_INT32(pRaw, dataPos, &dataLen, _OVER) action.pRaw = taosMemoryMalloc(dataLen); if (action.pRaw == NULL) goto _OVER; - mTrace("raw:%p, is created", pData); + // mTrace("raw:%p, is created", action.pRaw); SDB_GET_BINARY(pRaw, dataPos, (void *)action.pRaw, dataLen, _OVER); if (taosArrayPush(pTrans->undoActions, &action) == NULL) goto _OVER; action.pRaw = NULL; @@ -363,7 +363,7 @@ static SSdbRow *mndTransActionDecode(SSdbRaw *pRaw) { SDB_GET_INT32(pRaw, dataPos, &dataLen, _OVER) action.pRaw = taosMemoryMalloc(dataLen); if (action.pRaw == NULL) goto _OVER; - mTrace("raw:%p, is created", action.pRaw); + // mTrace("raw:%p, is created", action.pRaw); SDB_GET_BINARY(pRaw, dataPos, (void *)action.pRaw, dataLen, _OVER); if (taosArrayPush(pTrans->commitActions, &action) == NULL) goto _OVER; action.pRaw = NULL; diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index 060bba0ce4..39e9c75888 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -164,8 +164,10 @@ void sdbSetApplyIndex(SSdb *pSdb, int64_t index) { pSdb->curVer = index; } void sdbSetApplyTerm(SSdb *pSdb, int64_t term) { pSdb->curTerm = term; } void sdbSetCurConfig(SSdb *pSdb, int64_t config) { - mInfo("mnode sync config set from %" PRId64 " to %" PRId64, pSdb->curConfig, config); - pSdb->curConfig = config; + if (pSdb->curConfig != config) { + mDebug("mnode sync config set from %" PRId64 " to %" PRId64, pSdb->curConfig, config); + pSdb->curConfig = config; + } } int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } From 3fc793dbe03e45614aec512388de00d0715b9256 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sat, 11 Jun 2022 13:09:45 +0800 Subject: [PATCH 3/3] enh: save dnodeVer in dnodeEps.json --- source/dnode/mgmt/mgmt_dnode/src/dmHandle.c | 12 +++++--- source/dnode/mgmt/node_util/src/dmEps.c | 31 ++++++++------------- source/dnode/mnode/impl/src/mndDnode.c | 4 +-- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c index fbd46db183..3e55469a4a 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c @@ -38,9 +38,13 @@ static void dmProcessStatusRsp(SDnodeMgmt *pMgmt, SRpcMsg *pRsp) { SStatusRsp statusRsp = {0}; if (pRsp->pCont != NULL && pRsp->contLen > 0 && tDeserializeSStatusRsp(pRsp->pCont, pRsp->contLen, &statusRsp) == 0) { - pMgmt->pData->dnodeVer = statusRsp.dnodeVer; - dmUpdateDnodeCfg(pMgmt, &statusRsp.dnodeCfg); - dmUpdateEps(pMgmt->pData, statusRsp.pDnodeEps); + dTrace("status msg received from mnode, dnodeVer:%" PRId64 " saved:%" PRId64, statusRsp.dnodeVer, + pMgmt->pData->dnodeVer); + if (pMgmt->pData->dnodeVer != statusRsp.dnodeVer) { + pMgmt->pData->dnodeVer = statusRsp.dnodeVer; + dmUpdateDnodeCfg(pMgmt, &statusRsp.dnodeCfg); + dmUpdateEps(pMgmt->pData, statusRsp.pDnodeEps); + } } rpcFreeCont(pRsp->pCont); tFreeSStatusRsp(&statusRsp); @@ -89,7 +93,7 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { SRpcMsg rpcMsg = {.pCont = pHead, .contLen = contLen, .msgType = TDMT_MND_STATUS, .info.ahandle = (void *)0x9527}; SRpcMsg rpcRsp = {0}; - dTrace("send status msg to mnode"); + dTrace("send status msg to mnode, dnodeVer:%" PRId64, req.dnodeVer); SEpSet epSet = {0}; dmGetMnodeEpSet(pMgmt->pData, &epSet); diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 6f6896ff9f..096fc753b2 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -81,6 +81,13 @@ int32_t dmReadEps(SDnodeData *pData) { } pData->dnodeId = dnodeId->valueint; + cJSON *dnodeVer = cJSON_GetObjectItem(root, "dnodeVer"); + if (!dnodeVer || dnodeVer->type != cJSON_String) { + dError("failed to read %s since dnodeVer not found", file); + goto _OVER; + } + pData->dnodeVer = atoll(dnodeVer->valuestring); + cJSON *clusterId = cJSON_GetObjectItem(root, "clusterId"); if (!clusterId || clusterId->type != cJSON_String) { dError("failed to read %s since clusterId not found", file); @@ -193,6 +200,7 @@ int32_t dmWriteEps(SDnodeData *pData) { len += snprintf(content + len, maxLen - len, "{\n"); len += snprintf(content + len, maxLen - len, " \"dnodeId\": %d,\n", pData->dnodeId); + len += snprintf(content + len, maxLen - len, " \"dnodeVer\": \"%" PRId64 "\",\n", pData->dnodeVer); len += snprintf(content + len, maxLen - len, " \"clusterId\": \"%" PRId64 "\",\n", pData->clusterId); len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pData->dropped); len += snprintf(content + len, maxLen - len, " \"dnodes\": [{\n"); @@ -224,30 +232,15 @@ int32_t dmWriteEps(SDnodeData *pData) { } pData->updateTime = taosGetTimestampMs(); - dDebug("successed to write %s", realfile); + dDebug("successed to write %s, dnodeVer:%" PRId64, realfile, pData->dnodeVer); return 0; } void dmUpdateEps(SDnodeData *pData, SArray *eps) { - int32_t numOfEps = taosArrayGetSize(eps); - if (numOfEps <= 0) return; - taosThreadRwlockWrlock(&pData->lock); - - int32_t numOfEpsOld = (int32_t)taosArrayGetSize(pData->dnodeEps); - if (numOfEps != numOfEpsOld) { - dDebug("new dnode list get from mnode"); - dmResetEps(pData, eps); - dmWriteEps(pData); - } else { - int32_t size = numOfEps * sizeof(SDnodeEp); - if (memcmp(pData->dnodeEps->pData, eps->pData, size) != 0) { - dDebug("new dnode list get from mnode"); - dmResetEps(pData, eps); - dmWriteEps(pData); - } - } - + dDebug("new dnode list get from mnode, dnodeVer:%" PRId64, pData->dnodeVer); + dmResetEps(pData, eps); + dmWriteEps(pData); taosThreadRwlockUnlock(&pData->lock); } diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 3fab870277..73eea70195 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -385,7 +385,7 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { int64_t dnodeVer = sdbGetTableVer(pMnode->pSdb, SDB_DNODE) + sdbGetTableVer(pMnode->pSdb, SDB_MNODE); int64_t curMs = taosGetTimestampMs(); bool online = mndIsDnodeOnline(pDnode, curMs); - bool dnodeChanged = (statusReq.dnodeVer != dnodeVer); + bool dnodeChanged = (statusReq.dnodeVer == 0) || (statusReq.dnodeVer != dnodeVer); bool reboot = (pDnode->rebootTime != statusReq.rebootTime); bool needCheck = !online || dnodeChanged || reboot; @@ -427,7 +427,7 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { if (!online) { mInfo("dnode:%d, from offline to online", pDnode->id); } else { - mDebug("dnode:%d, send dnode epset, online:%d dnode_ver:%" PRId64 ":%" PRId64 " reboot:%d", pDnode->id, online, + mDebug("dnode:%d, send dnode epset, online:%d dnodeVer:%" PRId64 ":%" PRId64 " reboot:%d", pDnode->id, online, statusReq.dnodeVer, dnodeVer, reboot); }