From f30d9a034e3da70238d1325e5a41ad40c647eb7b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 10 Jun 2022 15:39:11 +0800 Subject: [PATCH 1/3] feat: redistribute vgroup to dnodes --- include/util/taoserror.h | 3 +- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 3 + source/dnode/mnode/impl/src/mndMain.c | 4 +- source/dnode/mnode/impl/src/mndTrans.c | 4 +- source/dnode/mnode/impl/src/mndVgroup.c | 142 +++++++++------ source/dnode/vnode/src/vnd/vnodeSync.c | 1 + source/util/src/terror.c | 3 +- ...istribute_vgroup_replica3_move_1_vnode.sim | 165 ++++++++++++++++++ 8 files changed, 270 insertions(+), 55 deletions(-) create mode 100644 tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ae0191e6d2..ce6a3f2ce7 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -85,7 +85,6 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_NETWORK_UNAVAIL TAOS_DEF_ERROR_CODE(0, 0x0102) #define TSDB_CODE_RPC_FQDN_ERROR TAOS_DEF_ERROR_CODE(0, 0x0103) #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0104) -#define TSDB_CODE_RPC_INDIRECT_NETWORK_UNAVAIL TAOS_DEF_ERROR_CODE(0, 0x0105) //client #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) @@ -220,6 +219,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_VGROUP_ALREADY_IN_DNODE TAOS_DEF_ERROR_CODE(0, 0x0392) #define TSDB_CODE_MND_VGROUP_UN_CHANGED TAOS_DEF_ERROR_CODE(0, 0x0393) #define TSDB_CODE_MND_HAS_OFFLINE_DNODE TAOS_DEF_ERROR_CODE(0, 0x0394) +#define TSDB_CODE_MND_INVALID_REPLICA TAOS_DEF_ERROR_CODE(0, 0x0395) // mnode-stable #define TSDB_CODE_MND_STB_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A0) @@ -260,6 +260,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_TRANS_CONFLICT TAOS_DEF_ERROR_CODE(0, 0x03D3) #define TSDB_CODE_MND_TRANS_UNKNOW_ERROR TAOS_DEF_ERROR_CODE(0, 0x03D4) #define TSDB_CODE_MND_TRANS_CLOG_IS_NULL TAOS_DEF_ERROR_CODE(0, 0x03D5) +#define TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL TAOS_DEF_ERROR_CODE(0, 0x03D6) // mnode-mq #define TSDB_CODE_MND_TOPIC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E0) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 264dc74e36..2589bbd690 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -170,6 +170,9 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_COMPACT_DB, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_GET_DB_CFG, mmPutNodeMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_VGROUP_LIST, mmPutNodeMsgToReadQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_REDISTRIBUTE_VGROUP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_MERGE_VGROUP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_BALANCE_VGROUP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_FUNC, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_RETRIEVE_FUNC, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_DROP_FUNC, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 813e4c30b5..27d13d66b6 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -217,8 +217,8 @@ static int32_t mndInitSteps(SMnode *pMnode) { if (mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster) != 0) return -1; if (mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode) != 0) return -1; if (mndAllocStep(pMnode, "mnode-qnode", mndInitQnode, mndCleanupQnode) != 0) return -1; - if (mndAllocStep(pMnode, "mnode-qnode", mndInitSnode, mndCleanupSnode) != 0) return -1; - if (mndAllocStep(pMnode, "mnode-qnode", mndInitBnode, mndCleanupBnode) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-snode", mndInitSnode, mndCleanupSnode) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-bnode", mndInitBnode, mndCleanupBnode) != 0) return -1; if (mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode) != 0) return -1; if (mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser) != 0) return -1; if (mndAllocStep(pMnode, "mnode-grant", mndInitGrant, mndCleanupGrant) != 0) return -1; diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 1e98a3bbf9..1124baf286 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -781,7 +781,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { sendRsp = true; } } else { - if (pTrans->stage == TRN_STAGE_REDO_ACTION && pTrans->failedTimes > 3) { + if (pTrans->stage == TRN_STAGE_REDO_ACTION && pTrans->failedTimes > 2) { if (code == 0) code = TSDB_CODE_MND_TRANS_UNKNOW_ERROR; sendRsp = true; } @@ -791,7 +791,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { mDebug("trans:%d, send rsp, code:0x%x stage:%s app:%p", pTrans->id, code, mndTransStr(pTrans->stage), pTrans->rpcInfo.ahandle); if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - code = TSDB_CODE_RPC_INDIRECT_NETWORK_UNAVAIL; + code = TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL; } SRpcMsg rspMsg = {.code = code, .info = pTrans->rpcInfo}; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 76e65ddd92..1c395fa767 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -59,6 +59,10 @@ int32_t mndInitVgroup(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_DND_DROP_VNODE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg); + mndSetMsgHandle(pMnode, TDMT_MND_MERGE_VGROUP, mndProcessSplitVgroupMsg); + mndSetMsgHandle(pMnode, TDMT_MND_BALANCE_VGROUP, mndProcessBalanceVgroupMsg); + mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndRetrieveVgroups); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_VGROUP, mndCancelGetNextVgroup); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_VNODES, mndRetrieveVnodes); @@ -1009,10 +1013,10 @@ static int32_t mndAddDecVgroupReplicaFromTrans(SMnode *pMnode, STrans *pTrans, S if (pGid == NULL) return 0; + pVgroup->replica--; memcpy(&delGid, pGid, sizeof(SVnodeGid)); memcpy(pGid, &pVgroup->vnodeGid[pVgroup->replica], sizeof(SVnodeGid)); memset(&pVgroup->vnodeGid[pVgroup->replica], 0, sizeof(SVnodeGid)); - pVgroup->replica--; if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, pVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; if (mndAddDropVnodeAction(pMnode, pTrans, pDb, pVgroup, &delGid, true) != 0) return -1; @@ -1040,11 +1044,36 @@ static int32_t mndRedistributeVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, mInfo("vgId:%d, vnode:%d dnode:%d", newVg.vgId, i, newVg.vnodeGid[i].dnodeId); } - if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew1->id) != 0) goto _OVER; - if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld1->id) != 0) goto _OVER; - if (pNew2 != NULL) { + if (pNew1 != pOld1) { + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew1->id); + if (numOfVnodes >= pNew1->numOfSupportVnodes) { + mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew1->id, numOfVnodes, + pNew1->numOfSupportVnodes); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + goto _OVER; + } + if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew1->id) != 0) goto _OVER; + if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld1->id) != 0) goto _OVER; + } + if (pNew2 != pOld2) { + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew2->id); + if (numOfVnodes >= pNew2->numOfSupportVnodes) { + mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew2->id, numOfVnodes, + pNew2->numOfSupportVnodes); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + goto _OVER; + } if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew2->id) != 0) goto _OVER; if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld2->id) != 0) goto _OVER; + } + if (pNew3 != pOld3) { + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pNew3->id); + if (numOfVnodes >= pNew3->numOfSupportVnodes) { + mError("vgId:%d, no enough vnodes in dnode:%d, numOfVnodes:%d support:%d", newVg.vgId, pNew3->id, numOfVnodes, + pNew3->numOfSupportVnodes); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + goto _OVER; + } if (mndAddIncVgroupReplicaToTrans(pMnode, pTrans, pDb, &newVg, pNew3->id) != 0) goto _OVER; if (mndAddDecVgroupReplicaFromTrans(pMnode, pTrans, pDb, &newVg, pOld3->id) != 0) goto _OVER; } @@ -1070,88 +1099,105 @@ _OVER: } static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SUserObj *pUser = NULL; - SDnodeObj *pNew1 = NULL; - SDnodeObj *pNew2 = NULL; - SDnodeObj *pNew3 = NULL; - SDnodeObj *pOld1 = NULL; - SDnodeObj *pOld2 = NULL; - SDnodeObj *pOld3 = NULL; - SVgObj *pVgroup = NULL; - SDbObj *pDb = NULL; - int32_t code = -1; - int64_t curMs = taosGetTimestampMs(); - SMDropMnodeReq redReq = {0}; + SMnode *pMnode = pReq->info.node; + SUserObj *pUser = NULL; + SDnodeObj *pNew1 = NULL; + SDnodeObj *pNew2 = NULL; + SDnodeObj *pNew3 = NULL; + SDnodeObj *pOld1 = NULL; + SDnodeObj *pOld2 = NULL; + SDnodeObj *pOld3 = NULL; + SVgObj *pVgroup = NULL; + SDbObj *pDb = NULL; + int32_t code = -1; + int64_t curMs = taosGetTimestampMs(); -#if 0 - if (tDeserializeSCreateDropMQSBNodeReq(pReq->pCont, pReq->contLen, &dropReq) != 0) { + SRedistributeVgroupReq redReq = {0}; + if (tDeserializeSRedistributeVgroupReq(pReq->pCont, pReq->contLen, &redReq) != 0) { terrno = TSDB_CODE_INVALID_MSG; goto _OVER; } -#endif - mDebug("vgId:%d, start to redistribute", 2); + mInfo("vgId:%d, start to redistribute to dnode %d:%d:%d", redReq.vgId, redReq.dnodeId1, redReq.dnodeId2, + redReq.dnodeId3); pUser = mndAcquireUser(pMnode, pReq->conn.user); if (pUser == NULL) { terrno = TSDB_CODE_MND_NO_USER_FROM_CONN; goto _OVER; } - if (mndCheckNodeAuth(pUser) != 0) { - goto _OVER; - } + if (mndCheckNodeAuth(pUser) != 0) goto _OVER; - pVgroup = mndAcquireVgroup(pMnode, 2); + pVgroup = mndAcquireVgroup(pMnode, redReq.vgId); if (pVgroup == NULL) goto _OVER; pDb = mndAcquireDb(pMnode, pVgroup->dbName); if (pDb == NULL) goto _OVER; if (pVgroup->replica == 1) { - pNew1 = mndAcquireDnode(pMnode, 1); + if (redReq.dnodeId2 != -1 || redReq.dnodeId3 != -1) { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; + } + pNew1 = mndAcquireDnode(pMnode, redReq.dnodeId1); pOld1 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[0].dnodeId); - if (pNew1 == NULL || pOld1 == NULL) goto _OVER; - if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs)) { - terrno = TSDB_CODE_NODE_OFFLINE; + if (pNew1 == NULL || pOld1 == NULL) { + terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; goto _OVER; } if (pNew1 == pOld1) { terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED; goto _OVER; } - if (mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, NULL, NULL, NULL, NULL) != 0) goto _OVER; - } - - if (pVgroup->replica == 3) { - pNew1 = mndAcquireDnode(pMnode, 1); - pNew2 = mndAcquireDnode(pMnode, 2); - pNew3 = mndAcquireDnode(pMnode, 3); + if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs)) { + terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE; + goto _OVER; + } + code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, NULL, NULL, NULL, NULL); + } else if (pVgroup->replica == 3) { + if (redReq.dnodeId2 == -1 || redReq.dnodeId3 == -1) { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; + } + pNew1 = mndAcquireDnode(pMnode, redReq.dnodeId1); + pNew2 = mndAcquireDnode(pMnode, redReq.dnodeId2); + pNew3 = mndAcquireDnode(pMnode, redReq.dnodeId3); pOld1 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[0].dnodeId); pOld2 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[1].dnodeId); pOld3 = mndAcquireDnode(pMnode, pVgroup->vnodeGid[2].dnodeId); - if (pNew1 == NULL || pOld1 == NULL || pNew2 == NULL || pOld2 == NULL || pNew3 == NULL || pOld3 == NULL) goto _OVER; - if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs) || !mndIsDnodeOnline(pNew2, curMs) || - !mndIsDnodeOnline(pOld2, curMs) || !mndIsDnodeOnline(pNew3, curMs) || !mndIsDnodeOnline(pOld3, curMs)) { - terrno = TSDB_CODE_NODE_OFFLINE; + if (pNew1 == NULL || pOld1 == NULL || pNew2 == NULL || pOld2 == NULL || pNew3 == NULL || pOld3 == NULL) { + terrno = TSDB_CODE_MND_DNODE_NOT_EXIST; goto _OVER; } - bool changed = true; - if (pNew1 != pOld1 || pNew1 != pOld2 || pNew1 != pOld3) changed = true; - if (pNew2 != pOld1 || pNew2 != pOld2 || pNew2 != pOld3) changed = true; - if (pNew3 != pOld1 || pNew3 != pOld2 || pNew3 != pOld3) changed = true; + if (pNew1 == pNew2 || pNew1 == pNew3 || pNew2 == pNew3) { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; + } + bool changed = false; + if (pNew1 != pOld1 && pNew1 != pOld2 && pNew1 != pOld3) changed = true; + if (pNew2 != pOld1 && pNew2 != pOld2 && pNew2 != pOld3) changed = true; + if (pNew3 != pOld1 && pNew3 != pOld2 && pNew3 != pOld3) changed = true; if (!changed) { terrno = TSDB_CODE_MND_VGROUP_UN_CHANGED; goto _OVER; } - if (mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, pNew2, pOld2, pNew3, pOld3) != 0) goto _OVER; + if (!mndIsDnodeOnline(pNew1, curMs) || !mndIsDnodeOnline(pOld1, curMs) || !mndIsDnodeOnline(pNew2, curMs) || + !mndIsDnodeOnline(pOld2, curMs) || !mndIsDnodeOnline(pNew3, curMs) || !mndIsDnodeOnline(pOld3, curMs)) { + terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE; + goto _OVER; + } + code = mndRedistributeVgroup(pMnode, pReq, pDb, pVgroup, pNew1, pOld1, pNew2, pOld2, pNew3, pOld3); + } else { + terrno = TSDB_CODE_MND_INVALID_REPLICA; + goto _OVER; } if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { - mDebug("vgId:%d, failed to redistribute since %s", 1, terrstr()); + mError("vgId:%d, failed to redistribute to dnode %d %d %d since %s", redReq.vgId, redReq.dnodeId1, redReq.dnodeId2, + redReq.dnodeId3, terrstr()); } mndReleaseDnode(pMnode, pNew1); @@ -1303,9 +1349,7 @@ static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { goto _OVER; } - if (mndCheckNodeAuth(pUser) != 0) { - goto _OVER; - } + if (mndCheckNodeAuth(pUser) != 0) goto _OVER; code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 816c0cfac9..087dcac7b7 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -187,6 +187,7 @@ static void vnodeSyncReconfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigC // todo rpc response here // build rpc msg // put into apply queue + vnodePostBlockMsg(pVnode, TDMT_VND_ALTER_REPLICA); } static void vnodeSyncCommitMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 71c348f810..b5c54b780d 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -90,7 +90,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_AUTH_FAILURE, "Authentication failur TAOS_DEFINE_ERROR(TSDB_CODE_RPC_NETWORK_UNAVAIL, "Unable to establish connection") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQDN") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INDIRECT_NETWORK_UNAVAIL, "Unable to establish connection") //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_OPERATION, "Invalid operation") @@ -225,6 +224,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_IN_DNODE, "Vgroup not in dnode") TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_EXIST, "Vgroup does not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_UN_CHANGED, "Vgroup distribution has not changed") TAOS_DEFINE_ERROR(TSDB_CODE_MND_HAS_OFFLINE_DNODE, "Offline dnode exists") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_REPLICA, "Invalid vgroup replica") // mnode-stable TAOS_DEFINE_ERROR(TSDB_CODE_MND_STB_ALREADY_EXIST, "STable already exists") @@ -265,6 +265,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_INVALID_STAGE, "Invalid stage to kill TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CONFLICT, "Conflict transaction not completed") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_UNKNOW_ERROR, "Unknown transaction error") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CLOG_IS_NULL, "Transaction commitlog is null") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL, "Unable to establish connection While execute transaction") // mnode-mq TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_ALREADY_EXIST, "Topic already exists") diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim new file mode 100644 index 0000000000..f44061ce6e --- /dev/null +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim @@ -0,0 +1,165 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 +system sh/deploy.sh -n dnode3 -i 3 +system sh/deploy.sh -n dnode4 -i 4 +system sh/deploy.sh -n dnode5 -i 5 +system sh/cfg.sh -n dnode1 -c supportVnodes -v 0 +system sh/exec.sh -n dnode1 -s start +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode3 -s start +system sh/exec.sh -n dnode4 -s start +#system sh/exec.sh -n dnode5 -s start +sql connect +sql create user u1 pass 'taosdata' + +print =============== step1 create dnode2 +sql create dnode $hostname port 7200 +sql create dnode $hostname port 7300 +sql create dnode $hostname port 7400 +sql create dnode $hostname port 7500 + +$x = 0 +step1: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql show dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 +print ===> $data40 $data41 $data42 $data43 $data44 $data45 +if $rows != 5 then + return -1 +endi +if $data(1)[4] != ready then + goto step1 +endi +if $data(2)[4] != ready then + goto step1 +endi +if $data(3)[4] != ready then + goto step1 +endi +if $data(4)[4] != ready then + goto step1 +endi +#if $data(5)[4] != ready then +# goto step1 +#endi + +print =============== step2: create db +sql create database d1 vgroups 1 replica 3 + +# Invalid vgroup +sql_error redistribute vgroup 3 dnode 5 dnode 3 dnode 4 +# un changed +sql_error redistribute vgroup 2 dnode 2 dnode 3 dnode 4 +# no enought vnodes +sql_error redistribute vgroup 2 dnode 1 dnode 3 dnode 4 +# offline vnodes +sql_error redistribute vgroup 2 dnode 5 dnode 3 dnode 4 +# Invalid replica +sql_error redistribute vgroup 2 dnode 5 +sql_error redistribute vgroup 2 dnode 5 dnode 3 +sql_error redistribute vgroup 2 dnode 2 dnode 3 +sql_error redistribute vgroup 2 dnode 2 dnode 2 +sql_error redistribute vgroup 3 dnode 2 dnode 2 + +system sh/exec.sh -n dnode5 -s start +$x = 0 +step2: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql show dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 +print ===> $data40 $data41 $data42 $data43 $data44 $data45 +if $rows != 5 then + return -1 +endi +if $data(1)[4] != ready then + goto step2 +endi +if $data(2)[4] != ready then + goto step2 +endi +if $data(3)[4] != ready then + goto step2 +endi +if $data(4)[4] != ready then + goto step2 +endi +if $data(5)[4] != ready then + goto step2 +endi + +return +print =============== step3: move follower +$leaderExist = 0 +$leaderVnode = 0 +$follower1 = 0 +$follower2 = 0 + +$x = 0 +step3: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> db not ready! + return -1 + endi +sql show d1.vgroups +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +if $rows != 1 then + return -1 +endi +if $data(2)[3] == leader then + $leaderExist = 1 + $leaderVnode = 4 + $follower1 = 2 + $follower2 = 3 +endi +if $data(2)[4] != ready then + $leaderExist = 1 + $leaderVnode = 3 + $follower1 = 2 + $follower2 = 4 +endi +if $data(3)[4] != ready then + $leaderExist = 1 + $leaderVnode = 2 + $follower1 = 3 + $follower2 = 4 +endi +if $leaderExist != 1 then + goto step3 +endi + +print redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 +sql redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 + +print =============== step4: move leader + + +return + +print =============== step3: drop dnode 3 + + +return +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s stop -x SIGINT +system sh/exec.sh -n dnode3 -s stop -x SIGINT +system sh/exec.sh -n dnode4 -s stop -x SIGINT +system sh/exec.sh -n dnode5 -s stop -x SIGINT From b90f1f246c5251adfab1a8c0d4a3fff00fc25cc0 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 10 Jun 2022 16:31:33 +0800 Subject: [PATCH 2/3] refactor: adjust mnode trace log --- source/dnode/mnode/sdb/src/sdbFile.c | 2 +- source/dnode/mnode/sdb/src/sdbHash.c | 2 ++ source/dnode/mnode/sdb/src/sdbRaw.c | 4 ++++ source/dnode/mnode/sdb/src/sdbRow.c | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index f98ecf5343..b32abc3eaa 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -357,7 +357,7 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { SdbEncodeFp encodeFp = pSdb->encodeFps[i]; if (encodeFp == NULL) continue; - mTrace("write %s to sdb file, total %d rows", sdbTableName(i), sdbGetSize(pSdb, i)); + mDebug("write %s to sdb file, total %d rows", sdbTableName(i), sdbGetSize(pSdb, i)); SHashObj *hash = pSdb->hashObjs[i]; TdThreadRwlock *pLock = &pSdb->locks[i]; diff --git a/source/dnode/mnode/sdb/src/sdbHash.c b/source/dnode/mnode/sdb/src/sdbHash.c index d1b1e31635..71792a2354 100644 --- a/source/dnode/mnode/sdb/src/sdbHash.c +++ b/source/dnode/mnode/sdb/src/sdbHash.c @@ -83,6 +83,7 @@ const char *sdbStatusName(ESdbStatus status) { } void sdbPrintOper(SSdb *pSdb, SSdbRow *pRow, const char *oper) { +#if 0 EKeyType keyType = pSdb->keyTypes[pRow->type]; if (keyType == SDB_KEY_BINARY) { @@ -96,6 +97,7 @@ void sdbPrintOper(SSdb *pSdb, SSdbRow *pRow, const char *oper) { pRow->refCount, oper, pRow->pObj, sdbStatusName(pRow->status)); } else { } +#endif } static SHashObj *sdbGetHash(SSdb *pSdb, int32_t type) { diff --git a/source/dnode/mnode/sdb/src/sdbRaw.c b/source/dnode/mnode/sdb/src/sdbRaw.c index 7720a8e88a..95985cd3d9 100644 --- a/source/dnode/mnode/sdb/src/sdbRaw.c +++ b/source/dnode/mnode/sdb/src/sdbRaw.c @@ -37,13 +37,17 @@ SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) { pRaw->sver = sver; pRaw->dataLen = dataLen; +#if 0 mTrace("raw:%p, is created, len:%d table:%s", pRaw, dataLen, sdbTableName(type)); +#endif return pRaw; } void sdbFreeRaw(SSdbRaw *pRaw) { if (pRaw != NULL) { +#if 0 mTrace("raw:%p, is freed", pRaw); +#endif taosMemoryFree(pRaw); } } diff --git a/source/dnode/mnode/sdb/src/sdbRow.c b/source/dnode/mnode/sdb/src/sdbRow.c index e57a6b028b..b362ee3a45 100644 --- a/source/dnode/mnode/sdb/src/sdbRow.c +++ b/source/dnode/mnode/sdb/src/sdbRow.c @@ -23,7 +23,9 @@ SSdbRow *sdbAllocRow(int32_t objSize) { return NULL; } +#if 0 mTrace("row:%p, is created, len:%d", pRow->pObj, objSize); +#endif return pRow; } @@ -45,6 +47,8 @@ void sdbFreeRow(SSdb *pSdb, SSdbRow *pRow, bool callFunc) { sdbPrintOper(pSdb, pRow, "free"); +#if 0 mTrace("row:%p, is freed", pRow->pObj); +#endif taosMemoryFreeClear(pRow); } From 56ce7b1baa0511553f18242423895d89eff2a353 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 10 Jun 2022 16:45:31 +0800 Subject: [PATCH 3/3] enh: adjust show transcations output --- source/common/src/systable.c | 2 +- source/dnode/mnode/impl/inc/mndDef.h | 6 +- source/dnode/mnode/impl/src/mndTrans.c | 90 ++++++++++++------- ...istribute_vgroup_replica3_move_1_vnode.sim | 32 +++++-- 4 files changed, 87 insertions(+), 43 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 2b59354d60..cb38a3cf70 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -221,7 +221,7 @@ static const SSysDbTableSchema transSchema[] = { {.name = "db", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR}, {.name = "failed_times", .bytes = 4, .type = TSDB_DATA_TYPE_INT}, {.name = "last_exec_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP}, - {.name = "last_error", .bytes = (TSDB_TRANS_ERROR_LEN - 1) + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR}, + {.name = "last_action_info", .bytes = (TSDB_TRANS_ERROR_LEN - 1) + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR}, }; static const SSysDbTableSchema configSchema[] = { diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 382f8dd55f..ad0a384507 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -120,10 +120,10 @@ typedef struct { SArray* commitActions; int64_t createdTime; int64_t lastExecTime; - int32_t lastErrorAction; + int32_t lastAction; int32_t lastErrorNo; - tmsg_t lastErrorMsgType; - SEpSet lastErrorEpset; + tmsg_t lastMsgType; + SEpSet lastEpset; char dbname[TSDB_DB_FNAME_LEN]; int32_t startFunc; int32_t stopFunc; diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 1124baf286..a689c89037 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -894,10 +894,19 @@ static int32_t mndTransWriteSingleLog(SMnode *pMnode, STrans *pTrans, STransActi code = 0; mDebug("trans:%d, %s:%d write to sdb, type:%s status:%s", pTrans->id, mndTransStr(pAction->stage), pAction->id, sdbTableName(pAction->pRaw->type), sdbStatusName(pAction->pRaw->status)); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo = 0; } else { pAction->errCode = (terrno != 0) ? terrno : code; mError("trans:%d, %s:%d failed to write sdb since %s, type:%s status:%s", pTrans->id, mndTransStr(pAction->stage), pAction->id, terrstr(), sdbTableName(pAction->pRaw->type), sdbStatusName(pAction->pRaw->status)); + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo = pAction->errCode; } return code; @@ -933,27 +942,48 @@ static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransActio pAction->msgReceived = 0; pAction->errCode = 0; mDebug("trans:%d, %s:%d is sent, %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, detail); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + if (pTrans->lastErrorNo == 0) { + pTrans->lastErrorNo = TSDB_CODE_ACTION_IN_PROGRESS; + } } else { pAction->msgSent = 0; pAction->msgReceived = 0; pAction->errCode = (terrno != 0) ? terrno : code; mError("trans:%d, %s:%d not send since %s, %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, terrstr(), detail); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo = pAction->errCode; } return code; } +static int32_t mndTransExecNullMsg(SMnode *pMnode, STrans *pTrans, STransAction *pAction) { + pAction->rawWritten = 0; + pAction->errCode = 0; + mDebug("trans:%d, %s:%d null action executed", pTrans->id, mndTransStr(pAction->stage), pAction->id); + + pTrans->lastAction = pAction->id; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastEpset = pAction->epSet; + pTrans->lastErrorNo == 0; + return 0; +} + static int32_t mndTransExecSingleAction(SMnode *pMnode, STrans *pTrans, STransAction *pAction) { if (pAction->actionType == TRANS_ACTION_RAW) { return mndTransWriteSingleLog(pMnode, pTrans, pAction); } else if (pAction->actionType == TRANS_ACTION_MSG) { return mndTransSendSingleMsg(pMnode, pTrans, pAction); } else { - pAction->rawWritten = 0; - pAction->errCode = 0; - mDebug("trans:%d, %s:%d null action executed", pTrans->id, mndTransStr(pAction->stage), pAction->id); - return 0; + return mndTransExecNullMsg(pMnode, pTrans, pAction); } } @@ -994,19 +1024,19 @@ static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pA if (numOfExecuted == numOfActions) { if (errCode == 0) { - pTrans->lastErrorAction = 0; + pTrans->lastAction = 0; pTrans->lastErrorNo = 0; - pTrans->lastErrorMsgType = 0; - memset(&pTrans->lastErrorEpset, 0, sizeof(pTrans->lastErrorEpset)); + pTrans->lastMsgType = 0; + memset(&pTrans->lastEpset, 0, sizeof(pTrans->lastEpset)); mDebug("trans:%d, all %d actions execute successfully", pTrans->id, numOfActions); return 0; } else { mError("trans:%d, all %d actions executed, code:0x%x", pTrans->id, numOfActions, errCode & 0XFFFF); if (pErrAction != NULL) { - pTrans->lastErrorMsgType = pErrAction->msgType; - pTrans->lastErrorAction = pErrAction->id; + pTrans->lastMsgType = pErrAction->msgType; + pTrans->lastAction = pErrAction->id; pTrans->lastErrorNo = pErrAction->errCode; - pTrans->lastErrorEpset = pErrAction->epSet; + pTrans->lastEpset = pErrAction->epSet; } mndTransResetActions(pMnode, pTrans, pArray); terrno = errCode; @@ -1073,15 +1103,15 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } if (code == 0) { - pTrans->lastErrorAction = 0; + pTrans->lastAction = 0; pTrans->lastErrorNo = 0; - pTrans->lastErrorMsgType = 0; - memset(&pTrans->lastErrorEpset, 0, sizeof(pTrans->lastErrorEpset)); + pTrans->lastMsgType = 0; + memset(&pTrans->lastEpset, 0, sizeof(pTrans->lastEpset)); } else { - pTrans->lastErrorMsgType = pAction->msgType; - pTrans->lastErrorAction = action; - pTrans->lastErrorNo = pAction->errCode; - pTrans->lastErrorEpset = pAction->epSet; + pTrans->lastMsgType = pAction->msgType; + pTrans->lastAction = action; + pTrans->lastErrorNo = code; + pTrans->lastEpset = pAction->epSet; } if (code == 0) { @@ -1432,23 +1462,21 @@ static int32_t mndRetrieveTrans(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBl pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataAppend(pColInfo, numOfRows, (const char *)&pTrans->lastExecTime, false); - char lastError[TSDB_TRANS_ERROR_LEN + VARSTR_HEADER_SIZE] = {0}; - char detail[TSDB_TRANS_ERROR_LEN] = {0}; - if (pTrans->lastErrorNo != 0) { - int32_t len = snprintf(detail, sizeof(detail), "action:%d errno:0x%x(%s) ", pTrans->lastErrorAction, - pTrans->lastErrorNo & 0xFFFF, tstrerror(pTrans->lastErrorNo)); - SEpSet epset = pTrans->lastErrorEpset; - if (epset.numOfEps > 0) { - len += snprintf(detail + len, sizeof(detail) - len, "msgType:%s numOfEps:%d inUse:%d ", - TMSG_INFO(pTrans->lastErrorMsgType), epset.numOfEps, epset.inUse); - for (int32_t i = 0; i < pTrans->lastErrorEpset.numOfEps; ++i) { - len += snprintf(detail + len, sizeof(detail) - len, "ep:%d-%s:%u ", i, epset.eps[i].fqdn, epset.eps[i].port); - } + char lastInfo[TSDB_TRANS_ERROR_LEN + VARSTR_HEADER_SIZE] = {0}; + char detail[TSDB_TRANS_ERROR_LEN] = {0}; + int32_t len = snprintf(detail, sizeof(detail), "action:%d code:0x%x(%s) ", pTrans->lastAction, + pTrans->lastErrorNo & 0xFFFF, tstrerror(pTrans->lastErrorNo)); + SEpSet epset = pTrans->lastEpset; + if (epset.numOfEps > 0) { + len += snprintf(detail + len, sizeof(detail) - len, "msgType:%s numOfEps:%d inUse:%d ", + TMSG_INFO(pTrans->lastMsgType), epset.numOfEps, epset.inUse); + for (int32_t i = 0; i < pTrans->lastEpset.numOfEps; ++i) { + len += snprintf(detail + len, sizeof(detail) - len, "ep:%d-%s:%u ", i, epset.eps[i].fqdn, epset.eps[i].port); } } - STR_WITH_MAXSIZE_TO_VARSTR(lastError, detail, pShow->pMeta->pSchemas[cols].bytes); + STR_WITH_MAXSIZE_TO_VARSTR(lastInfo, detail, pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)lastError, false); + colDataAppend(pColInfo, numOfRows, (const char *)lastInfo, false); numOfRows++; sdbRelease(pSdb, pTrans); diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim index f44061ce6e..cb9f4173ac 100644 --- a/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_move_1_vnode.sim @@ -104,8 +104,7 @@ if $data(5)[4] != ready then goto step2 endi -return -print =============== step3: move follower +print =============== step31: move follower $leaderExist = 0 $leaderVnode = 0 $follower1 = 0 @@ -120,23 +119,23 @@ step3: return -1 endi sql show d1.vgroups -print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 if $rows != 1 then return -1 endi -if $data(2)[3] == leader then +if $data(2)[4] == leader then $leaderExist = 1 $leaderVnode = 4 $follower1 = 2 $follower2 = 3 endi -if $data(2)[4] != ready then +if $data(2)[6] == leader then $leaderExist = 1 $leaderVnode = 3 $follower1 = 2 $follower2 = 4 endi -if $data(3)[4] != ready then +if $data(2)[8] == leader then $leaderExist = 1 $leaderVnode = 2 $follower1 = 3 @@ -146,8 +145,25 @@ if $leaderExist != 1 then goto step3 endi -print redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 -sql redistribute vgroup 2 dnode $follower1 dnode $follower2 dnode 5 +print leader $leaderVnode +print follower1 $follower1 +print follower2 $follower2 + +print =============== step32: move follower2 +print redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 +sql redistribute vgroup 2 dnode $leaderVnode dnode $follower2 dnode 5 +return +print =============== step33: move follower1 +print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 +sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 + +print =============== step34: move follower2 +print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 +sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower2 + +print =============== step35: move follower1 +print redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 +sql redistribute vgroup 2 dnode $leaderVnode dnode 5 dnode $follower1 print =============== step4: move leader