From 56eba758064cc4bcb92d7f53b3f67ae13ab0bbcc Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 6 Mar 2023 19:26:51 +0800 Subject: [PATCH 1/3] enh: print mnode epSet on sending dnode status failure --- source/dnode/mgmt/mgmt_dnode/src/dmHandle.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c index 0724fcc63a..1dd87d0543 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c @@ -61,6 +61,16 @@ static void dmProcessStatusRsp(SDnodeMgmt *pMgmt, SRpcMsg *pRsp) { rpcFreeCont(pRsp->pCont); } +void dmEpSetToStr(char *buf, int32_t len, SEpSet *epSet) { + int32_t n = 0; + n += snprintf(buf + n, len - n, "%s", "{"); + for (int i = 0; i < epSet->numOfEps; i++) { + n += snprintf(buf + n, len - n, "%s:%d%s", epSet->eps[i].fqdn, epSet->eps[i].port, + (i + 1 < epSet->numOfEps ? ", " : "")); + } + n += snprintf(buf + n, len - n, "%s", "}"); +} + void dmSendStatusReq(SDnodeMgmt *pMgmt) { SStatusReq req = {0}; @@ -119,11 +129,9 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { dmGetMnodeEpSet(pMgmt->pData, &epSet); rpcSendRecv(pMgmt->msgCb.clientRpc, &epSet, &rpcMsg, &rpcRsp); if (rpcRsp.code != 0) { - dError("failed to send status req since %s, numOfEps:%d inUse:%d", tstrerror(rpcRsp.code), epSet.numOfEps, - epSet.inUse); - for (int32_t i = 0; i < epSet.numOfEps; ++i) { - dDebug("index:%d, mnode ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port); - } + char tbuf[256]; + dmEpSetToStr(tbuf, sizeof(tbuf), &epSet); + dError("failed to send status req since %s, epSet:%s, inUse:%d", tstrerror(rpcRsp.code), tbuf, epSet.inUse); } dmProcessStatusRsp(pMgmt, &rpcRsp); } From c0a835ff58e99312637efd810680e061efa6e49c Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 6 Mar 2023 19:34:24 +0800 Subject: [PATCH 2/3] enh: rotate mnode epSet on sending dnode status failure --- source/dnode/mgmt/mgmt_dnode/src/dmHandle.c | 1 + source/dnode/mgmt/node_util/inc/dmUtil.h | 1 + source/dnode/mgmt/node_util/src/dmEps.c | 22 +++++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c index 1dd87d0543..228f301aec 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c @@ -129,6 +129,7 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { dmGetMnodeEpSet(pMgmt->pData, &epSet); rpcSendRecv(pMgmt->msgCb.clientRpc, &epSet, &rpcMsg, &rpcRsp); if (rpcRsp.code != 0) { + dmRotateMnodeEpSet(pMgmt->pData); char tbuf[256]; dmEpSetToStr(tbuf, sizeof(tbuf), &epSet); dError("failed to send status req since %s, epSet:%s, inUse:%d", tstrerror(rpcRsp.code), tbuf, epSet.inUse); diff --git a/source/dnode/mgmt/node_util/inc/dmUtil.h b/source/dnode/mgmt/node_util/inc/dmUtil.h index 55ee6d6973..cfdea40477 100644 --- a/source/dnode/mgmt/node_util/inc/dmUtil.h +++ b/source/dnode/mgmt/node_util/inc/dmUtil.h @@ -166,6 +166,7 @@ int32_t dmReadEps(SDnodeData *pData); int32_t dmWriteEps(SDnodeData *pData); void dmUpdateEps(SDnodeData *pData, SArray *pDnodeEps); void dmGetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet); +void dmRotateMnodeEpSet(SDnodeData *pData); void dmGetMnodeEpSetForRedirect(SDnodeData *pData, SRpcMsg *pMsg, SEpSet *pEpSet); void dmSetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet); bool dmUpdateDnodeInfo(void *pData, int32_t *dnodeId, int64_t *clusterId, char *fqdn, uint16_t *port); diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index e9ab8a0460..784d2b425b 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -325,6 +325,28 @@ void dmGetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet) { taosThreadRwlockUnlock(&pData->lock); } +static FORCE_INLINE void dmSwapEps(SEp *epLhs, SEp *epRhs) { + SEp epTmp; + + epTmp.port = epLhs->port; + tstrncpy(epTmp.fqdn, epLhs->fqdn, tListLen(epTmp.fqdn)); + + epLhs->port = epRhs->port; + tstrncpy(epLhs->fqdn, epRhs->fqdn, tListLen(epLhs->fqdn)); + + epRhs->port = epTmp.port; + tstrncpy(epRhs->fqdn, epTmp.fqdn, tListLen(epRhs->fqdn)); +} + +void dmRotateMnodeEpSet(SDnodeData *pData) { + taosThreadRwlockRdlock(&pData->lock); + SEpSet *pEpSet = &pData->mnodeEps; + for (int i = 1; i < pEpSet->numOfEps; i++) { + dmSwapEps(&pEpSet->eps[i - 1], &pEpSet->eps[i]); + } + taosThreadRwlockUnlock(&pData->lock); +} + void dmGetMnodeEpSetForRedirect(SDnodeData *pData, SRpcMsg *pMsg, SEpSet *pEpSet) { dmGetMnodeEpSet(pData, pEpSet); dTrace("msg is redirected, handle:%p num:%d use:%d", pMsg->info.handle, pEpSet->numOfEps, pEpSet->inUse); From 62d4729eb84c94e778ca7df4c6b9fb9e1e734414 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 7 Mar 2023 15:19:47 +0800 Subject: [PATCH 3/3] enh: print term in logging msg of raftStoreWriteFile --- source/libs/sync/src/syncRaftStore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index 68e735cf0d..bd15567c87 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -139,7 +139,7 @@ int32_t raftStoreWriteFile(SSyncNode *pNode) { if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; - sInfo("vgId:%d, succeed to write raft store file:%s, len:%d", pNode->vgId, realfile, len); + sInfo("vgId:%d, succeed to write raft store file:%s, term:%" PRId64, pNode->vgId, realfile, pStore->currentTerm); _OVER: if (pJson != NULL) tjsonDelete(pJson);