From 150dd5af3f78619fec7eaacabc29d8c051fc5cfa Mon Sep 17 00:00:00 2001 From: dmchen Date: Tue, 24 Dec 2024 10:50:56 +0000 Subject: [PATCH 01/11] fix/TS-5805-check-arb --- source/dnode/mnode/impl/src/mndArbGroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index 3879e8e6e2..57de63b31d 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -634,6 +634,7 @@ void mndArbCheckSync(SArbGroup *pArbGroup, int64_t nowMs, ECheckSyncOp *pOp, SAr mInfo("arb skip to set assigned leader to vgId:%d dnodeId:%d, arb group is not sync", vgId, pMember->info.dnodeId); } + *pOp = CHECK_SYNC_CHECK_SYNC; return; } From 0da606c7fbcdc6a589863ce33851ae1ea39f6f6a Mon Sep 17 00:00:00 2001 From: dmchen Date: Tue, 24 Dec 2024 12:14:47 +0000 Subject: [PATCH 02/11] show code --- include/libs/sync/sync.h | 2 +- source/common/src/systable.c | 3 +- source/dnode/mnode/impl/inc/mndArbGroup.h | 2 +- source/dnode/mnode/impl/inc/mndDef.h | 1 + source/dnode/mnode/impl/src/mndArbGroup.c | 23 +++++-- source/dnode/vnode/src/vnd/vnodeSvr.c | 16 ++--- source/libs/sync/src/syncMain.c | 2 +- source/os/src/osLocale.c | 2 +- tests/army/cluster/arbgroup_sync.py | 81 +++++++++++++++++++++++ 9 files changed, 114 insertions(+), 18 deletions(-) create mode 100644 tests/army/cluster/arbgroup_sync.py diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 50c096258e..3ded45bc64 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -292,7 +292,7 @@ int32_t syncUpdateArbTerm(int64_t rid, SyncTerm arbTerm); SSyncState syncGetState(int64_t rid); int32_t syncGetArbToken(int64_t rid, char* outToken); -int32_t syncGetAssignedLogSynced(int64_t rid); +int32_t syncCheckSynced(int64_t rid); void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet); const char* syncStr(ESyncState state); diff --git a/source/common/src/systable.c b/source/common/src/systable.c index cb08046399..d1ddb3204f 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -73,7 +73,8 @@ static const SSysDbTableSchema arbGroupsSchema[] = { {.name = "vgroup_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "v1_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "v2_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, - {.name = "is_sync", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, + {.name = "is_sync", .bytes = 100, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + //{.name = "sync_code", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "assigned_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "assigned_token", .bytes = TSDB_ARB_TOKEN_SIZE + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "assigned_acked", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, diff --git a/source/dnode/mnode/impl/inc/mndArbGroup.h b/source/dnode/mnode/impl/inc/mndArbGroup.h index 66ef3f766b..8abc435205 100644 --- a/source/dnode/mnode/impl/inc/mndArbGroup.h +++ b/source/dnode/mnode/impl/inc/mndArbGroup.h @@ -41,7 +41,7 @@ int32_t mndSetDropArbGroupCommitLogs(STrans *pTrans, SArbGroup *pGroup); bool mndUpdateArbGroupByHeartBeat(SArbGroup *pGroup, SVArbHbRspMember *pRspMember, int64_t nowMs, int32_t dnodeId, SArbGroup *pNewGroup); bool mndUpdateArbGroupByCheckSync(SArbGroup *pGroup, int32_t vgId, char *member0Token, char *member1Token, - bool newIsSync, SArbGroup *pNewGroup); + bool newIsSync, SArbGroup *pNewGroup, int32_t code); bool mndUpdateArbGroupBySetAssignedLeader(SArbGroup *pGroup, int32_t vgId, char *memberToken, int32_t errcode, SArbGroup *pNewGroup); diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index e3d2ad6d34..2cf55b88ea 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -308,6 +308,7 @@ typedef struct { int64_t dbUid; SArbGroupMember members[TSDB_ARB_GROUP_MEMBER_NUM]; int8_t isSync; + int32_t code; SArbAssignedLeader assignedLeader; int64_t version; diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index 57de63b31d..d57f265add 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -1066,7 +1066,7 @@ static int32_t mndUpdateArbHeartBeat(SMnode *pMnode, int32_t dnodeId, SArray *me } bool mndUpdateArbGroupByCheckSync(SArbGroup *pGroup, int32_t vgId, char *member0Token, char *member1Token, - bool newIsSync, SArbGroup *pNewGroup) { + bool newIsSync, SArbGroup *pNewGroup, int32_t code) { bool updateIsSync = false; (void)taosThreadMutexLock(&pGroup->mutex); @@ -1089,6 +1089,7 @@ bool mndUpdateArbGroupByCheckSync(SArbGroup *pGroup, int32_t vgId, char *member0 if (pGroup->isSync != newIsSync) { mndArbGroupDupObj(pGroup, pNewGroup); pNewGroup->isSync = newIsSync; + pNewGroup->code = code; mInfo("vgId:%d, arb isSync updating, new isSync:%d", vgId, newIsSync); updateIsSync = true; @@ -1099,7 +1100,8 @@ _OVER: return updateIsSync; } -static int32_t mndUpdateArbSync(SMnode *pMnode, int32_t vgId, char *member0Token, char *member1Token, bool newIsSync) { +static int32_t mndUpdateArbSync(SMnode *pMnode, int32_t vgId, char *member0Token, char *member1Token, bool newIsSync, + int32_t rsp_code) { int32_t code = 0; SArbGroup *pGroup = sdbAcquire(pMnode->pSdb, SDB_ARBGROUP, &vgId); if (pGroup == NULL) { @@ -1110,7 +1112,8 @@ static int32_t mndUpdateArbSync(SMnode *pMnode, int32_t vgId, char *member0Token } SArbGroup newGroup = {0}; - bool updateIsSync = mndUpdateArbGroupByCheckSync(pGroup, vgId, member0Token, member1Token, newIsSync, &newGroup); + bool updateIsSync = + mndUpdateArbGroupByCheckSync(pGroup, vgId, member0Token, member1Token, newIsSync, &newGroup, rsp_code); if (updateIsSync) { if (mndPullupArbUpdateGroup(pMnode, &newGroup) != 0) { mInfo("failed to pullup update arb sync, vgId:%d, since %s", vgId, terrstr()); @@ -1186,6 +1189,7 @@ static int32_t mndProcessArbCheckSyncRsp(SRpcMsg *pRsp) { TAOS_RETURN(code); } + mInfo("vgId:%d, arb check-sync-rsp received, errCode:%d", syncRsp.vgId, syncRsp.errCode); if (mndArbCheckToken(arbToken, syncRsp.arbToken) != 0) { mInfo("skip update arb sync for vgId:%d, arb token mismatch, local:[%s] msg:[%s]", syncRsp.vgId, arbToken, syncRsp.arbToken); @@ -1194,7 +1198,8 @@ static int32_t mndProcessArbCheckSyncRsp(SRpcMsg *pRsp) { } bool newIsSync = (syncRsp.errCode == TSDB_CODE_SUCCESS); - if ((code = mndUpdateArbSync(pMnode, syncRsp.vgId, syncRsp.member0Token, syncRsp.member1Token, newIsSync)) != 0) { + if ((code = mndUpdateArbSync(pMnode, syncRsp.vgId, syncRsp.member0Token, syncRsp.member1Token, newIsSync, + syncRsp.errCode)) != 0) { mInfo("failed to update arb sync for vgId:%d, since:%s", syncRsp.vgId, terrstr()); goto _OVER; } @@ -1333,8 +1338,16 @@ static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock &lino, _OVER); } + char strSync[100] = {0}; + sprintf(strSync, "%d,%d", pGroup->isSync, pGroup->code); + char sync[100 + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(sync, strSync, 100 + VARSTR_HEADER_SIZE); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)&pGroup->isSync, false), pGroup, &lino, _OVER); + RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)sync, false), pGroup, &lino, _OVER); + + // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + // RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)&pGroup->code, false), pGroup, &lino, + // _OVER); if (pGroup->assignedLeader.dnodeId != 0) { pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index e82209e03f..837af4b0a8 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -53,8 +53,8 @@ static int32_t vnodeProcessArbCheckSyncReq(SVnode *pVnode, void *pReq, int32_t l static int32_t vnodeProcessDropTSmaCtbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, SRpcMsg *pOriginRpc); -static int32_t vnodePreCheckAssignedLogSyncd(SVnode *pVnode, char *member0Token, char *member1Token); -static int32_t vnodeCheckAssignedLogSyncd(SVnode *pVnode, char *member0Token, char *member1Token); +static int32_t vnodeCheckToken(SVnode *pVnode, char *member0Token, char *member1Token); +static int32_t vnodeCheckSyncd(SVnode *pVnode, char *member0Token, char *member1Token); static int32_t vnodeProcessFetchTtlExpiredTbs(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); extern int32_t vnodeProcessKillCompactReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); @@ -480,7 +480,7 @@ static int32_t vnodePreProcessArbCheckSyncMsg(SVnode *pVnode, SRpcMsg *pMsg) { return TSDB_CODE_INVALID_MSG; } - int32_t ret = vnodePreCheckAssignedLogSyncd(pVnode, syncReq.member0Token, syncReq.member1Token); + int32_t ret = vnodeCheckToken(pVnode, syncReq.member0Token, syncReq.member1Token); if (ret != 0) { vError("vgId:%d, failed to preprocess arb check sync request since %s", TD_VID(pVnode), tstrerror(ret)); } @@ -2501,7 +2501,7 @@ static int32_t vnodeProcessConfigChangeReq(SVnode *pVnode, int64_t ver, void *pR return 0; } -static int32_t vnodePreCheckAssignedLogSyncd(SVnode *pVnode, char *member0Token, char *member1Token) { +static int32_t vnodeCheckToken(SVnode *pVnode, char *member0Token, char *member1Token) { SSyncState syncState = syncGetState(pVnode->sync); if (syncState.state != TAOS_SYNC_STATE_LEADER) { return terrno = TSDB_CODE_SYN_NOT_LEADER; @@ -2521,13 +2521,13 @@ static int32_t vnodePreCheckAssignedLogSyncd(SVnode *pVnode, char *member0Token, return 0; } -static int32_t vnodeCheckAssignedLogSyncd(SVnode *pVnode, char *member0Token, char *member1Token) { - int32_t code = vnodePreCheckAssignedLogSyncd(pVnode, member0Token, member1Token); +static int32_t vnodeCheckSyncd(SVnode *pVnode, char *member0Token, char *member1Token) { + int32_t code = vnodeCheckToken(pVnode, member0Token, member1Token); if (code != 0) { return code; } - return syncGetAssignedLogSynced(pVnode->sync); + return syncCheckSynced(pVnode->sync); } static int32_t vnodeProcessArbCheckSyncReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp) { @@ -2551,7 +2551,7 @@ static int32_t vnodeProcessArbCheckSyncReq(SVnode *pVnode, void *pReq, int32_t l syncRsp.member1Token = syncReq.member1Token; syncRsp.vgId = TD_VID(pVnode); - if (vnodeCheckAssignedLogSyncd(pVnode, syncReq.member0Token, syncReq.member1Token) != 0) { + if (vnodeCheckSyncd(pVnode, syncReq.member0Token, syncReq.member1Token) != 0) { vError("vgId:%d, failed to check assigned log syncd", TD_VID(pVnode)); } syncRsp.errCode = terrno; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index fbde104f4e..ddb7400eee 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -699,7 +699,7 @@ int32_t syncGetArbToken(int64_t rid, char* outToken) { TAOS_RETURN(code); } -int32_t syncGetAssignedLogSynced(int64_t rid) { +int32_t syncCheckSynced(int64_t rid) { int32_t code = 0; SSyncNode* pSyncNode = syncNodeAcquire(rid); if (pSyncNode == NULL) { diff --git a/source/os/src/osLocale.c b/source/os/src/osLocale.c index 484436fafe..ba5b19adde 100644 --- a/source/os/src/osLocale.c +++ b/source/os/src/osLocale.c @@ -84,7 +84,7 @@ int32_t taosSetSystemLocale(const char *inLocale) { if (NULL == locale) { terrno = TSDB_CODE_INVALID_PARA; uError("failed to set locale:%s", inLocale); - return terrno; + return 0; } tstrncpy(tsLocale, locale, TD_LOCALE_LEN); diff --git a/tests/army/cluster/arbgroup_sync.py b/tests/army/cluster/arbgroup_sync.py new file mode 100644 index 0000000000..9fd8e7b1f3 --- /dev/null +++ b/tests/army/cluster/arbgroup_sync.py @@ -0,0 +1,81 @@ +import taos +import sys +import os +import subprocess +import glob +import shutil +import time + +from frame.log import * +from frame.cases import * +from frame.sql import * +from frame.srvCtl import * +from frame.caseBase import * +from frame import * +from frame.autogen import * +from frame import epath +# from frame.server.dnodes import * +# from frame.server.cluster import * + + +class TDTestCase(TBase): + + def init(self, conn, logSql, replicaVar=1): + updatecfgDict = {'dDebugFlag':131} + super(TDTestCase, self).init(conn, logSql, replicaVar=1, checkColName="c1") + + self.valgrind = 0 + self.db = "test" + self.stb = "meters" + self.childtable_count = 10 + tdSql.init(conn.cursor(), logSql) + + def run(self): + tdSql.execute('CREATE DATABASE db vgroups 1 replica 2;') + + time.sleep(1) + + count = 0 + + while count < 100: + tdSql.query("show arbgroups;") + + if tdSql.getData(0, 4) == 1: + break + + tdLog.info("wait 1 seconds for is sync") + time.sleep(1) + + count += 1 + + + tdSql.query("show db.vgroups;") + + if(tdSql.getData(0, 4) == "follower") and (tdSql.getData(0, 6) == "leader"): + tdLog.info("stop dnode2") + sc.dnodeStop(2) + + if(tdSql.getData(0, 6) == "follower") and (tdSql.getData(0, 4) == "leader"): + tdLog.info("stop dnode 3") + sc.dnodeStop(3) + + + count = 0 + while count < 100: + tdSql.query("show db.vgroups;") + + if(tdSql.getData(0, 4) == "assigned ") or (tdSql.getData(0, 6) == "assigned "): + break + + tdLog.info("wait 1 seconds for set assigned") + time.sleep(1) + + count += 1 + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From d3ab816838807291857f89b170d2b6532ba96b50 Mon Sep 17 00:00:00 2001 From: dmchen Date: Wed, 25 Dec 2024 02:10:21 +0000 Subject: [PATCH 03/11] fix/TS-5805-check-arb-updatetime --- include/common/tmsg.h | 2 + source/common/src/msg/tmsg.c | 4 ++ source/dnode/mnode/impl/inc/mndDef.h | 1 + source/dnode/mnode/impl/src/mndArbGroup.c | 61 +++++++++++++++++++++-- 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 0b6a8b3f1b..dcb971fd45 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2608,6 +2608,8 @@ typedef struct { int8_t assignedAcked; SMArbUpdateGroupAssigned assignedLeader; int64_t version; + int32_t code; + int64_t updateTimeMs; } SMArbUpdateGroup; typedef struct { diff --git a/source/common/src/msg/tmsg.c b/source/common/src/msg/tmsg.c index 166c889947..4a42b8b9a8 100644 --- a/source/common/src/msg/tmsg.c +++ b/source/common/src/msg/tmsg.c @@ -8295,6 +8295,8 @@ int32_t tSerializeSMArbUpdateGroupBatchReq(void *buf, int32_t bufLen, SMArbUpdat TAOS_CHECK_EXIT(tEncodeI32(&encoder, pGroup->assignedLeader.dnodeId)); TAOS_CHECK_EXIT(tEncodeCStr(&encoder, pGroup->assignedLeader.token)); TAOS_CHECK_EXIT(tEncodeI64(&encoder, pGroup->version)); + TAOS_CHECK_EXIT(tEncodeI32(&encoder, pGroup->code)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pGroup->updateTimeMs)); } for (int32_t i = 0; i < sz; i++) { @@ -8348,6 +8350,8 @@ int32_t tDeserializeSMArbUpdateGroupBatchReq(void *buf, int32_t bufLen, SMArbUpd TAOS_CHECK_EXIT(tDecodeCStrTo(&decoder, group.assignedLeader.token)); TAOS_CHECK_EXIT(tDecodeI64(&decoder, &group.version)); group.assignedLeader.acked = false; + TAOS_CHECK_EXIT(tDecodeI32(&decoder, &group.code)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &group.updateTimeMs)); if (taosArrayPush(updateArray, &group) == NULL) { TAOS_CHECK_EXIT(terrno); diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 2cf55b88ea..a44b4f4de1 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -309,6 +309,7 @@ typedef struct { SArbGroupMember members[TSDB_ARB_GROUP_MEMBER_NUM]; int8_t isSync; int32_t code; + int64_t updateTimeMs; SArbAssignedLeader assignedLeader; int64_t version; diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index d57f265add..3e4c301e70 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -138,6 +138,8 @@ SSdbRaw *mndArbGroupActionEncode(SArbGroup *pGroup) { SDB_SET_BINARY(pRaw, dataPos, pLeader->token, TSDB_ARB_TOKEN_SIZE, _OVER) SDB_SET_INT64(pRaw, dataPos, pGroup->version, _OVER) SDB_SET_INT8(pRaw, dataPos, pLeader->acked, _OVER) + SDB_SET_INT32(pRaw, dataPos, pGroup->code, _OVER) + SDB_SET_INT64(pRaw, dataPos, pGroup->updateTimeMs, _OVER) SDB_SET_RESERVE(pRaw, dataPos, ARBGROUP_RESERVE_SIZE, _OVER) @@ -194,6 +196,8 @@ SSdbRow *mndArbGroupActionDecode(SSdbRaw *pRaw) { SDB_GET_BINARY(pRaw, dataPos, pLeader->token, TSDB_ARB_TOKEN_SIZE, _OVER) SDB_GET_INT64(pRaw, dataPos, &pGroup->version, _OVER) SDB_GET_INT8(pRaw, dataPos, &pLeader->acked, _OVER) + SDB_GET_INT32(pRaw, dataPos, &pGroup->code, _OVER) + SDB_GET_INT64(pRaw, dataPos, &pGroup->updateTimeMs, _OVER) pGroup->mutexInited = false; @@ -249,6 +253,8 @@ static int32_t mndArbGroupActionUpdate(SSdb *pSdb, SArbGroup *pOld, SArbGroup *p tstrncpy(pOld->assignedLeader.token, pNew->assignedLeader.token, TSDB_ARB_TOKEN_SIZE); pOld->assignedLeader.acked = pNew->assignedLeader.acked; pOld->version++; + pOld->code = pNew->code; + pOld->updateTimeMs = pNew->updateTimeMs; mInfo( "arbgroup:%d, perform update action. members[0].token:%s, members[1].token:%s, isSync:%d, as-dnodeid:%d, " @@ -762,6 +768,8 @@ static void mndInitArbUpdateGroup(SArbGroup *pGroup, SMArbUpdateGroup *outGroup) outGroup->assignedLeader.token = pGroup->assignedLeader.token; // just copy the pointer outGroup->assignedLeader.acked = pGroup->assignedLeader.acked; outGroup->version = pGroup->version; + outGroup->code = pGroup->code; + outGroup->updateTimeMs = pGroup->updateTimeMs; } static int32_t mndPullupArbUpdateGroup(SMnode *pMnode, SArbGroup *pNewGroup) { @@ -858,7 +866,7 @@ static int32_t mndProcessArbUpdateGroupBatchReq(SRpcMsg *pReq) { } SMnode *pMnode = pReq->info.node; - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_ARBGROUP, NULL, "update-arbgroup"); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_ARBGROUP, NULL, "upd-bat-arbgroup"); if (pTrans == NULL) { mError("failed to update arbgroup in create trans, since %s", terrstr()); goto _OVER; @@ -880,6 +888,16 @@ static int32_t mndProcessArbUpdateGroupBatchReq(SRpcMsg *pReq) { tstrncpy(newGroup.assignedLeader.token, pUpdateGroup->assignedLeader.token, TSDB_ARB_TOKEN_SIZE); newGroup.assignedLeader.acked = pUpdateGroup->assignedLeader.acked; newGroup.version = pUpdateGroup->version; + newGroup.code = pUpdateGroup->code; + newGroup.updateTimeMs = pUpdateGroup->updateTimeMs; + + mInfo( + "trans:%d, used to update arbgroup:%d, member0:[%d][%s] member1:[%d][%s] isSync:%d assigned:[%d][%s][%d], %d, " + "%" PRId64, + pTrans->id, newGroup.vgId, newGroup.members[0].info.dnodeId, newGroup.members[0].state.token, + newGroup.members[1].info.dnodeId, newGroup.members[1].state.token, newGroup.isSync, + newGroup.assignedLeader.dnodeId, newGroup.assignedLeader.token, newGroup.assignedLeader.acked, + pUpdateGroup->code, pUpdateGroup->updateTimeMs); SArbGroup *pOldGroup = sdbAcquire(pMnode->pSdb, SDB_ARBGROUP, &newGroup.vgId); if (!pOldGroup) { @@ -1090,8 +1108,9 @@ bool mndUpdateArbGroupByCheckSync(SArbGroup *pGroup, int32_t vgId, char *member0 mndArbGroupDupObj(pGroup, pNewGroup); pNewGroup->isSync = newIsSync; pNewGroup->code = code; + pNewGroup->updateTimeMs = taosGetTimestampMs(); - mInfo("vgId:%d, arb isSync updating, new isSync:%d", vgId, newIsSync); + mInfo("vgId:%d, arb isSync updating, new isSync:%d, timeStamp:%" PRId64, vgId, newIsSync, pNewGroup->updateTimeMs); updateIsSync = true; } @@ -1189,7 +1208,7 @@ static int32_t mndProcessArbCheckSyncRsp(SRpcMsg *pRsp) { TAOS_RETURN(code); } - mInfo("vgId:%d, arb check-sync-rsp received, errCode:%d", syncRsp.vgId, syncRsp.errCode); + mInfo("vgId:%d, vnode-arb-check-sync-rsp received, errCode:%d", syncRsp.vgId, syncRsp.errCode); if (mndArbCheckToken(arbToken, syncRsp.arbToken) != 0) { mInfo("skip update arb sync for vgId:%d, arb token mismatch, local:[%s] msg:[%s]", syncRsp.vgId, arbToken, syncRsp.arbToken); @@ -1297,6 +1316,35 @@ _OVER: return code; } +static char *formatTimestamp(char *buf, int64_t val, int precision) { + time_t tt; + if (precision == TSDB_TIME_PRECISION_MICRO) { + tt = (time_t)(val / 1000000); + } + if (precision == TSDB_TIME_PRECISION_NANO) { + tt = (time_t)(val / 1000000000); + } else { + tt = (time_t)(val / 1000); + } + + struct tm tm; + if (taosLocalTime(&tt, &tm, NULL, 0, NULL) == NULL) { + mError("failed to get local time"); + return NULL; + } + size_t pos = taosStrfTime(buf, 32, "%Y-%m-%d %H:%M:%S", &tm); + + if (precision == TSDB_TIME_PRECISION_MICRO) { + sprintf(buf + pos, ".%06d", (int)(val % 1000000)); + } else if (precision == TSDB_TIME_PRECISION_NANO) { + sprintf(buf + pos, ".%09d", (int)(val % 1000000000)); + } else { + sprintf(buf + pos, ".%03d", (int)(val % 1000)); + } + + return buf; +} + static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; @@ -1338,8 +1386,13 @@ static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock &lino, _OVER); } + mInfo("vgId:%d, arb group sync:%d, code:%d, update time:%" PRId64, pGroup->vgId, pGroup->isSync, pGroup->code, + pGroup->updateTimeMs); + char strSync[100] = {0}; - sprintf(strSync, "%d,%d", pGroup->isSync, pGroup->code); + char bufUpdateTime[40] = {0}; + (void)formatTimestamp(bufUpdateTime, pGroup->updateTimeMs, TSDB_TIME_PRECISION_MILLI); + sprintf(strSync, "%d,%d,%s", pGroup->isSync, pGroup->code, bufUpdateTime); char sync[100 + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(sync, strSync, 100 + VARSTR_HEADER_SIZE); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); From 1bc47199be856462baafce08a92280af2d7dbb2d Mon Sep 17 00:00:00 2001 From: dmchen Date: Wed, 25 Dec 2024 02:43:51 +0000 Subject: [PATCH 04/11] fix/TS-5805-check-arb --- source/dnode/mnode/impl/inc/mndArbGroup.h | 4 +- source/dnode/mnode/impl/src/mndArbGroup.c | 80 +++++++++---------- .../mnode/impl/test/arbgroup/arbgroup.cpp | 6 +- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndArbGroup.h b/source/dnode/mnode/impl/inc/mndArbGroup.h index 8abc435205..b3fe1dacd4 100644 --- a/source/dnode/mnode/impl/inc/mndArbGroup.h +++ b/source/dnode/mnode/impl/inc/mndArbGroup.h @@ -38,8 +38,8 @@ int32_t mndSetCreateArbGroupCommitLogs(STrans *pTrans, SArbGroup *pGroup); int32_t mndSetDropArbGroupPrepareLogs(STrans *pTrans, SArbGroup *pGroup); int32_t mndSetDropArbGroupCommitLogs(STrans *pTrans, SArbGroup *pGroup); -bool mndUpdateArbGroupByHeartBeat(SArbGroup *pGroup, SVArbHbRspMember *pRspMember, int64_t nowMs, int32_t dnodeId, - SArbGroup *pNewGroup); +bool mndCheckArbGroupByHeartBeat(SArbGroup *pGroup, SVArbHbRspMember *pRspMember, int64_t nowMs, int32_t dnodeId, + SArbGroup *pNewGroup); bool mndUpdateArbGroupByCheckSync(SArbGroup *pGroup, int32_t vgId, char *member0Token, char *member1Token, bool newIsSync, SArbGroup *pNewGroup, int32_t code); bool mndUpdateArbGroupBySetAssignedLeader(SArbGroup *pGroup, int32_t vgId, char *memberToken, int32_t errcode, diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index 3e4c301e70..89192950e1 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -34,9 +34,9 @@ static void mndArbGroupDupObj(SArbGroup *pGroup, SArbGroup *pNew); static void mndArbGroupSetAssignedLeader(SArbGroup *pGroup, int32_t index); static void mndArbGroupResetAssignedLeader(SArbGroup *pGroup); -static int32_t mndArbGroupUpdateTrans(SMnode *pMnode, SArbGroup *pNew); -static int32_t mndPullupArbUpdateGroup(SMnode *pMnode, SArbGroup *pNewGroup); -static int32_t mndPullupArbUpdateGroupBatch(SMnode *pMnode, SArray *newGroupArray); +// static int32_t mndArbGroupUpdateTrans(SMnode *pMnode, SArbGroup *pNew); +static int32_t mndUpdateArbGroup(SMnode *pMnode, SArbGroup *pNewGroup); +static int32_t mndBatchUpdateArbGroup(SMnode *pMnode, SArray *newGroupArray); static int32_t mndProcessArbHbTimer(SRpcMsg *pReq); static int32_t mndProcessArbCheckSyncTimer(SRpcMsg *pReq); @@ -728,7 +728,7 @@ static int32_t mndProcessArbCheckSyncTimer(SRpcMsg *pReq) { } } - TAOS_CHECK_EXIT(mndPullupArbUpdateGroupBatch(pMnode, pUpdateArray)); + TAOS_CHECK_EXIT(mndBatchUpdateArbGroup(pMnode, pUpdateArray)); _exit: if (code != 0) { @@ -772,7 +772,7 @@ static void mndInitArbUpdateGroup(SArbGroup *pGroup, SMArbUpdateGroup *outGroup) outGroup->updateTimeMs = pGroup->updateTimeMs; } -static int32_t mndPullupArbUpdateGroup(SMnode *pMnode, SArbGroup *pNewGroup) { +static int32_t mndUpdateArbGroup(SMnode *pMnode, SArbGroup *pNewGroup) { if (taosHashGet(arbUpdateHash, &pNewGroup->vgId, sizeof(pNewGroup->vgId)) != NULL) { mInfo("vgId:%d, arb skip to pullup arb-update-group request, since it is in process", pNewGroup->vgId); return 0; @@ -805,7 +805,7 @@ _OVER: return ret; } -static int32_t mndPullupArbUpdateGroupBatch(SMnode *pMnode, SArray *newGroupArray) { +static int32_t mndBatchUpdateArbGroup(SMnode *pMnode, SArray *newGroupArray) { int32_t ret = -1; size_t sz = taosArrayGetSize(newGroupArray); @@ -963,41 +963,41 @@ static void mndArbGroupResetAssignedLeader(SArbGroup *pGroup) { pGroup->assignedLeader.acked = false; } -static int32_t mndArbGroupUpdateTrans(SMnode *pMnode, SArbGroup *pNew) { - int32_t code = -1; - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_ARBGROUP, NULL, "update-arbgroup"); - if (pTrans == NULL) { - mError("failed to update arbgroup in create trans, vgId:%d, since %s", pNew->vgId, terrstr()); - if (terrno != 0) code = terrno; - goto _OVER; - } +// static int32_t mndArbGroupUpdateTrans(SMnode *pMnode, SArbGroup *pNew) { +// int32_t code = -1; +// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_ARBGROUP, NULL, "update-arbgroup"); +// if (pTrans == NULL) { +// mError("failed to update arbgroup in create trans, vgId:%d, since %s", pNew->vgId, terrstr()); +// if (terrno != 0) code = terrno; +// goto _OVER; +// } - mInfo("trans:%d, used to update arbgroup:%d, member0:[%d][%s] member1:[%d][%s] isSync:%d assigned:[%d][%s][%d]", - pTrans->id, pNew->vgId, pNew->members[0].info.dnodeId, pNew->members[0].state.token, - pNew->members[1].info.dnodeId, pNew->members[1].state.token, pNew->isSync, pNew->assignedLeader.dnodeId, - pNew->assignedLeader.token, pNew->assignedLeader.acked); +// mInfo("trans:%d, used to update arbgroup:%d, member0:[%d][%s] member1:[%d][%s] isSync:%d assigned:[%d][%s][%d]", +// pTrans->id, pNew->vgId, pNew->members[0].info.dnodeId, pNew->members[0].state.token, +// pNew->members[1].info.dnodeId, pNew->members[1].state.token, pNew->isSync, pNew->assignedLeader.dnodeId, +// pNew->assignedLeader.token, pNew->assignedLeader.acked); - mndTransAddArbGroupId(pTrans, pNew->vgId); - if ((code = mndTransCheckConflict(pMnode, pTrans)) != 0) { - goto _OVER; - } +// mndTransAddArbGroupId(pTrans, pNew->vgId); +// if ((code = mndTransCheckConflict(pMnode, pTrans)) != 0) { +// goto _OVER; +// } - if ((code = mndSetCreateArbGroupCommitLogs(pTrans, pNew)) != 0) { - mError("failed to update arbgroup in set commit log, vgId:%d, since %s", pNew->vgId, tstrerror(code)); - goto _OVER; - } +// if ((code = mndSetCreateArbGroupCommitLogs(pTrans, pNew)) != 0) { +// mError("failed to update arbgroup in set commit log, vgId:%d, since %s", pNew->vgId, tstrerror(code)); +// goto _OVER; +// } - if ((code = mndTransPrepare(pMnode, pTrans)) != 0) goto _OVER; +// if ((code = mndTransPrepare(pMnode, pTrans)) != 0) goto _OVER; - code = 0; +// code = 0; -_OVER: - mndTransDrop(pTrans); - return code; -} +// _OVER: +// mndTransDrop(pTrans); +// return code; +// } -bool mndUpdateArbGroupByHeartBeat(SArbGroup *pGroup, SVArbHbRspMember *pRspMember, int64_t nowMs, int32_t dnodeId, - SArbGroup *pNewGroup) { +bool mndCheckArbGroupByHeartBeat(SArbGroup *pGroup, SVArbHbRspMember *pRspMember, int64_t nowMs, int32_t dnodeId, + SArbGroup *pNewGroup) { bool updateToken = false; SArbGroupMember *pMember = NULL; @@ -1052,7 +1052,7 @@ _OVER: return updateToken; } -static int32_t mndUpdateArbHeartBeat(SMnode *pMnode, int32_t dnodeId, SArray *memberArray) { +static int32_t mndUpdateArbGroupsByHeartBeat(SMnode *pMnode, int32_t dnodeId, SArray *memberArray) { int64_t nowMs = taosGetTimestampMs(); size_t size = taosArrayGetSize(memberArray); SArray *pUpdateArray = taosArrayInit(size, sizeof(SArbGroup)); @@ -1067,7 +1067,7 @@ static int32_t mndUpdateArbHeartBeat(SMnode *pMnode, int32_t dnodeId, SArray *me continue; } - bool updateToken = mndUpdateArbGroupByHeartBeat(pGroup, pRspMember, nowMs, dnodeId, &newGroup); + bool updateToken = mndCheckArbGroupByHeartBeat(pGroup, pRspMember, nowMs, dnodeId, &newGroup); if (updateToken) { if (taosArrayPush(pUpdateArray, &newGroup) == NULL) { mError("failed to push newGroup to updateArray, but continue at this hearbear"); @@ -1077,7 +1077,7 @@ static int32_t mndUpdateArbHeartBeat(SMnode *pMnode, int32_t dnodeId, SArray *me sdbRelease(pMnode->pSdb, pGroup); } - TAOS_CHECK_RETURN(mndPullupArbUpdateGroupBatch(pMnode, pUpdateArray)); + TAOS_CHECK_RETURN(mndBatchUpdateArbGroup(pMnode, pUpdateArray)); taosArrayDestroy(pUpdateArray); return 0; @@ -1134,7 +1134,7 @@ static int32_t mndUpdateArbSync(SMnode *pMnode, int32_t vgId, char *member0Token bool updateIsSync = mndUpdateArbGroupByCheckSync(pGroup, vgId, member0Token, member1Token, newIsSync, &newGroup, rsp_code); if (updateIsSync) { - if (mndPullupArbUpdateGroup(pMnode, &newGroup) != 0) { + if (mndUpdateArbGroup(pMnode, &newGroup) != 0) { mInfo("failed to pullup update arb sync, vgId:%d, since %s", vgId, terrstr()); } } @@ -1173,7 +1173,7 @@ static int32_t mndProcessArbHbRsp(SRpcMsg *pRsp) { goto _OVER; } - TAOS_CHECK_GOTO(mndUpdateArbHeartBeat(pMnode, arbHbRsp.dnodeId, arbHbRsp.hbMembers), NULL, _OVER); + TAOS_CHECK_GOTO(mndUpdateArbGroupsByHeartBeat(pMnode, arbHbRsp.dnodeId, arbHbRsp.hbMembers), NULL, _OVER); code = 0; _OVER: @@ -1303,7 +1303,7 @@ static int32_t mndProcessArbSetAssignedLeaderRsp(SRpcMsg *pRsp) { bool updateAssigned = mndUpdateArbGroupBySetAssignedLeader(pGroup, setAssignedRsp.vgId, setAssignedRsp.memberToken, pRsp->code, &newGroup); if (updateAssigned) { - if ((code = mndPullupArbUpdateGroup(pMnode, &newGroup)) != 0) { + if ((code = mndUpdateArbGroup(pMnode, &newGroup)) != 0) { mInfo("failed to pullup update arb assigned for vgId:%d, since:%s", setAssignedRsp.vgId, tstrerror(code)); goto _OVER; } diff --git a/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp b/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp index d5ca019420..0c67d7f487 100644 --- a/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp +++ b/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp @@ -127,7 +127,7 @@ TEST_F(ArbgroupTest, 02_process_heart_beat_rsp) { int32_t nowMs = group.members[0].state.lastHbMs + 10; SArbGroup newGroup = {0}; - bool updateToken = mndUpdateArbGroupByHeartBeat(&group, &rspMember, nowMs, dnodeId, &newGroup); + bool updateToken = mndCheckArbGroupByHeartBeat(&group, &rspMember, nowMs, dnodeId, &newGroup); ASSERT_EQ(updateToken, false); ASSERT_NE(group.members[0].state.responsedHbSeq, rspMember.hbSeq); @@ -142,7 +142,7 @@ TEST_F(ArbgroupTest, 02_process_heart_beat_rsp) { int32_t nowMs = group.members[0].state.lastHbMs + 10; SArbGroup newGroup = {0}; - bool updateToken = mndUpdateArbGroupByHeartBeat(&group, &rspMember, nowMs, dnodeId, &newGroup); + bool updateToken = mndCheckArbGroupByHeartBeat(&group, &rspMember, nowMs, dnodeId, &newGroup); ASSERT_EQ(updateToken, false); ASSERT_EQ(group.members[0].state.responsedHbSeq, rspMember.hbSeq); @@ -157,7 +157,7 @@ TEST_F(ArbgroupTest, 02_process_heart_beat_rsp) { int32_t nowMs = group.members[0].state.lastHbMs + 10; SArbGroup newGroup = {0}; - bool updateToken = mndUpdateArbGroupByHeartBeat(&group, &rspMember, nowMs, dnodeId, &newGroup); + bool updateToken = mndCheckArbGroupByHeartBeat(&group, &rspMember, nowMs, dnodeId, &newGroup); ASSERT_EQ(updateToken, true); ASSERT_EQ(group.members[0].state.responsedHbSeq, rspMember.hbSeq); From 1aec478c455df4a5b43d55f7d03aa2d09d627083 Mon Sep 17 00:00:00 2001 From: dmchen Date: Thu, 26 Dec 2024 03:06:33 +0000 Subject: [PATCH 05/11] fix/TS-5805-check-arb --- source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp b/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp index 0c67d7f487..b5cf14fd72 100644 --- a/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp +++ b/source/dnode/mnode/impl/test/arbgroup/arbgroup.cpp @@ -201,7 +201,7 @@ TEST_F(ArbgroupTest, 03_process_check_sync_rsp) { bool newIsSync = false; SArbGroup newGroup = {0}; - bool updateIsSync = mndUpdateArbGroupByCheckSync(&group, vgId, member0Token, member1Token, newIsSync, &newGroup); + bool updateIsSync = mndUpdateArbGroupByCheckSync(&group, vgId, member0Token, member1Token, newIsSync, &newGroup, 0); ASSERT_EQ(updateIsSync, false); } @@ -214,7 +214,7 @@ TEST_F(ArbgroupTest, 03_process_check_sync_rsp) { bool newIsSync = true; SArbGroup newGroup = {0}; - bool updateIsSync = mndUpdateArbGroupByCheckSync(&group, vgId, member0Token, member1Token, newIsSync, &newGroup); + bool updateIsSync = mndUpdateArbGroupByCheckSync(&group, vgId, member0Token, member1Token, newIsSync, &newGroup, 0); ASSERT_EQ(updateIsSync, true); ASSERT_EQ(newGroup.isSync, true); From 946a0c1bb51c451a19cd5da80b234849dfb85946 Mon Sep 17 00:00:00 2001 From: dmchen Date: Mon, 10 Feb 2025 02:58:42 +0000 Subject: [PATCH 06/11] fix/TS-5805-check-arb-remve-useless-code --- source/dnode/mnode/impl/src/mndArbGroup.c | 45 +++-------------------- source/os/src/osLocale.c | 2 +- 2 files changed, 7 insertions(+), 40 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index 89192950e1..7ea0be38e6 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -34,7 +34,6 @@ static void mndArbGroupDupObj(SArbGroup *pGroup, SArbGroup *pNew); static void mndArbGroupSetAssignedLeader(SArbGroup *pGroup, int32_t index); static void mndArbGroupResetAssignedLeader(SArbGroup *pGroup); -// static int32_t mndArbGroupUpdateTrans(SMnode *pMnode, SArbGroup *pNew); static int32_t mndUpdateArbGroup(SMnode *pMnode, SArbGroup *pNewGroup); static int32_t mndBatchUpdateArbGroup(SMnode *pMnode, SArray *newGroupArray); @@ -963,39 +962,6 @@ static void mndArbGroupResetAssignedLeader(SArbGroup *pGroup) { pGroup->assignedLeader.acked = false; } -// static int32_t mndArbGroupUpdateTrans(SMnode *pMnode, SArbGroup *pNew) { -// int32_t code = -1; -// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_ARBGROUP, NULL, "update-arbgroup"); -// if (pTrans == NULL) { -// mError("failed to update arbgroup in create trans, vgId:%d, since %s", pNew->vgId, terrstr()); -// if (terrno != 0) code = terrno; -// goto _OVER; -// } - -// mInfo("trans:%d, used to update arbgroup:%d, member0:[%d][%s] member1:[%d][%s] isSync:%d assigned:[%d][%s][%d]", -// pTrans->id, pNew->vgId, pNew->members[0].info.dnodeId, pNew->members[0].state.token, -// pNew->members[1].info.dnodeId, pNew->members[1].state.token, pNew->isSync, pNew->assignedLeader.dnodeId, -// pNew->assignedLeader.token, pNew->assignedLeader.acked); - -// mndTransAddArbGroupId(pTrans, pNew->vgId); -// if ((code = mndTransCheckConflict(pMnode, pTrans)) != 0) { -// goto _OVER; -// } - -// if ((code = mndSetCreateArbGroupCommitLogs(pTrans, pNew)) != 0) { -// mError("failed to update arbgroup in set commit log, vgId:%d, since %s", pNew->vgId, tstrerror(code)); -// goto _OVER; -// } - -// if ((code = mndTransPrepare(pMnode, pTrans)) != 0) goto _OVER; - -// code = 0; - -// _OVER: -// mndTransDrop(pTrans); -// return code; -// } - bool mndCheckArbGroupByHeartBeat(SArbGroup *pGroup, SVArbHbRspMember *pRspMember, int64_t nowMs, int32_t dnodeId, SArbGroup *pNewGroup) { bool updateToken = false; @@ -1392,16 +1358,17 @@ static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock char strSync[100] = {0}; char bufUpdateTime[40] = {0}; (void)formatTimestamp(bufUpdateTime, pGroup->updateTimeMs, TSDB_TIME_PRECISION_MILLI); - sprintf(strSync, "%d,%d,%s", pGroup->isSync, pGroup->code, bufUpdateTime); + if (pGroup->isSync == 1) { + tsnprintf(strSync, 100, "true(code:%d,%s)", pGroup->code, bufUpdateTime); + } else { + tsnprintf(strSync, 100, "false(code:%d,%s)", 100, pGroup->code, bufUpdateTime); + } + char sync[100 + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(sync, strSync, 100 + VARSTR_HEADER_SIZE); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)sync, false), pGroup, &lino, _OVER); - // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - // RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)&pGroup->code, false), pGroup, &lino, - // _OVER); - if (pGroup->assignedLeader.dnodeId != 0) { pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)&pGroup->assignedLeader.dnodeId, false), diff --git a/source/os/src/osLocale.c b/source/os/src/osLocale.c index ba5b19adde..484436fafe 100644 --- a/source/os/src/osLocale.c +++ b/source/os/src/osLocale.c @@ -84,7 +84,7 @@ int32_t taosSetSystemLocale(const char *inLocale) { if (NULL == locale) { terrno = TSDB_CODE_INVALID_PARA; uError("failed to set locale:%s", inLocale); - return 0; + return terrno; } tstrncpy(tsLocale, locale, TD_LOCALE_LEN); From b1dc31a2bde7777ffc2eebb3e4964fbd60fee128 Mon Sep 17 00:00:00 2001 From: dmchen Date: Mon, 10 Feb 2025 06:57:50 +0000 Subject: [PATCH 07/11] fix/TS-5805-check-arb-add-column --- source/common/src/systable.c | 4 ++-- source/dnode/mnode/impl/src/mndArbGroup.c | 18 +++++++++++++----- tests/army/cluster/arbgroup_sync.py | 15 +++++++++++---- tests/army/cluster/arbitrator.py | 14 +++++++++++--- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 72b9fa0082..bc2c52e570 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -73,8 +73,8 @@ static const SSysDbTableSchema arbGroupsSchema[] = { {.name = "vgroup_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "v1_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "v2_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, - {.name = "is_sync", .bytes = 100, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, - //{.name = "sync_code", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, + {.name = "is_sync", .bytes = 6, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + {.name = "check_sync_code", .bytes = 100, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "assigned_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "assigned_token", .bytes = TSDB_ARB_TOKEN_SIZE + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "assigned_acked", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index 7ea0be38e6..bf35c3bc5e 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -1355,13 +1355,11 @@ static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock mInfo("vgId:%d, arb group sync:%d, code:%d, update time:%" PRId64, pGroup->vgId, pGroup->isSync, pGroup->code, pGroup->updateTimeMs); - char strSync[100] = {0}; - char bufUpdateTime[40] = {0}; - (void)formatTimestamp(bufUpdateTime, pGroup->updateTimeMs, TSDB_TIME_PRECISION_MILLI); + char strSync[6] = {0}; if (pGroup->isSync == 1) { - tsnprintf(strSync, 100, "true(code:%d,%s)", pGroup->code, bufUpdateTime); + tsnprintf(strSync, 6, "true"); } else { - tsnprintf(strSync, 100, "false(code:%d,%s)", 100, pGroup->code, bufUpdateTime); + tsnprintf(strSync, 6, "false"); } char sync[100 + VARSTR_HEADER_SIZE] = {0}; @@ -1369,6 +1367,16 @@ static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)sync, false), pGroup, &lino, _OVER); + char strCheckSyncCode[100] = {0}; + char bufUpdateTime[40] = {0}; + (void)formatTimestamp(bufUpdateTime, pGroup->updateTimeMs, TSDB_TIME_PRECISION_MILLI); + tsnprintf(strCheckSyncCode, 100, "%d(%s)", pGroup->code, bufUpdateTime); + + char checkSyncCode[100 + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(checkSyncCode, strCheckSyncCode, 100 + VARSTR_HEADER_SIZE); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)checkSyncCode, false), pGroup, &lino, _OVER); + if (pGroup->assignedLeader.dnodeId != 0) { pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)&pGroup->assignedLeader.dnodeId, false), diff --git a/tests/army/cluster/arbgroup_sync.py b/tests/army/cluster/arbgroup_sync.py index 9fd8e7b1f3..9152263a59 100644 --- a/tests/army/cluster/arbgroup_sync.py +++ b/tests/army/cluster/arbgroup_sync.py @@ -40,14 +40,17 @@ class TDTestCase(TBase): while count < 100: tdSql.query("show arbgroups;") - if tdSql.getData(0, 4) == 1: + if tdSql.getData(0, 4) == 'true': break - tdLog.info("wait 1 seconds for is sync") + tdLog.info("wait %d seconds for is sync"%count) time.sleep(1) count += 1 - + + if count == 100: + tdLog.exit("arbgroup sync failed") + return tdSql.query("show db.vgroups;") @@ -67,10 +70,14 @@ class TDTestCase(TBase): if(tdSql.getData(0, 4) == "assigned ") or (tdSql.getData(0, 6) == "assigned "): break - tdLog.info("wait 1 seconds for set assigned") + tdLog.info("wait %d seconds for set assigned"%count) time.sleep(1) count += 1 + + if count == 100: + tdLog.exit("check assigned failed") + return def stop(self): tdSql.close() diff --git a/tests/army/cluster/arbitrator.py b/tests/army/cluster/arbitrator.py index 385358e5cc..947397b395 100644 --- a/tests/army/cluster/arbitrator.py +++ b/tests/army/cluster/arbitrator.py @@ -46,13 +46,17 @@ class TDTestCase(TBase): while count < 100: tdSql.query("show arbgroups;") - if tdSql.getData(0, 4) == 1: + if tdSql.getData(0, 4) == 'true': break - tdLog.info("wait 1 seconds for is sync") + tdLog.info("wait %d seconds for is sync"%count) time.sleep(1) count += 1 + + if count == 100: + tdLog.exit("arbgroup sync failed") + return tdSql.query("show db.vgroups;") @@ -73,10 +77,14 @@ class TDTestCase(TBase): if(tdSql.getData(0, 4) == "assigned ") or (tdSql.getData(0, 6) == "assigned "): break - tdLog.info("wait 1 seconds for set assigned") + tdLog.info("wait %d seconds for set assigned"%count) time.sleep(1) count += 1 + + if count == 100: + tdLog.exit("check assigned failed") + return tdSql.execute("INSERT INTO d0 VALUES (NOW, 10.3, 219, 0.31);") From 966e36bc83221836ee3b8abd39b1c2d656a94a9f Mon Sep 17 00:00:00 2001 From: dmchen Date: Mon, 10 Feb 2025 09:15:16 +0000 Subject: [PATCH 08/11] fix/TS-5805-check-arb-fix-case --- tests/system-test/0-others/information_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 390bf3d9dd..5bdb744f6c 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -222,7 +222,7 @@ class TDTestCase: tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'") tdLog.info(len(tdSql.queryResult)) - tdSql.checkEqual(True, len(tdSql.queryResult) in range(312, 313)) + tdSql.checkEqual(True, len(tdSql.queryResult) in range(313, 314)) tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'") tdSql.checkEqual(61, len(tdSql.queryResult)) From 13d0a9b5e28495583d9a22fc89cdbcca7ce9dee1 Mon Sep 17 00:00:00 2001 From: dmchen Date: Tue, 11 Feb 2025 01:47:52 +0000 Subject: [PATCH 09/11] fix/TS-5805-check-arb-msg --- source/common/src/msg/tmsg.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/source/common/src/msg/tmsg.c b/source/common/src/msg/tmsg.c index c6a1522a37..052fcbe2ba 100644 --- a/source/common/src/msg/tmsg.c +++ b/source/common/src/msg/tmsg.c @@ -8295,8 +8295,6 @@ int32_t tSerializeSMArbUpdateGroupBatchReq(void *buf, int32_t bufLen, SMArbUpdat TAOS_CHECK_EXIT(tEncodeI32(&encoder, pGroup->assignedLeader.dnodeId)); TAOS_CHECK_EXIT(tEncodeCStr(&encoder, pGroup->assignedLeader.token)); TAOS_CHECK_EXIT(tEncodeI64(&encoder, pGroup->version)); - TAOS_CHECK_EXIT(tEncodeI32(&encoder, pGroup->code)); - TAOS_CHECK_EXIT(tEncodeI64(&encoder, pGroup->updateTimeMs)); } for (int32_t i = 0; i < sz; i++) { @@ -8304,6 +8302,12 @@ int32_t tSerializeSMArbUpdateGroupBatchReq(void *buf, int32_t bufLen, SMArbUpdat TAOS_CHECK_EXIT(tEncodeI8(&encoder, pGroup->assignedLeader.acked)); } + for (int32_t i = 0; i < sz; i++) { + SMArbUpdateGroup *pGroup = taosArrayGet(pReq->updateArray, i); + TAOS_CHECK_EXIT(tEncodeI32(&encoder, pGroup->code)); + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pGroup->updateTimeMs)); + } + tEndEncode(&encoder); _exit: @@ -8350,8 +8354,6 @@ int32_t tDeserializeSMArbUpdateGroupBatchReq(void *buf, int32_t bufLen, SMArbUpd TAOS_CHECK_EXIT(tDecodeCStrTo(&decoder, group.assignedLeader.token)); TAOS_CHECK_EXIT(tDecodeI64(&decoder, &group.version)); group.assignedLeader.acked = false; - TAOS_CHECK_EXIT(tDecodeI32(&decoder, &group.code)); - TAOS_CHECK_EXIT(tDecodeI64(&decoder, &group.updateTimeMs)); if (taosArrayPush(updateArray, &group) == NULL) { TAOS_CHECK_EXIT(terrno); @@ -8365,6 +8367,14 @@ int32_t tDeserializeSMArbUpdateGroupBatchReq(void *buf, int32_t bufLen, SMArbUpd } } + if (!tDecodeIsEnd(&decoder)) { + for (int32_t i = 0; i < sz; i++) { + SMArbUpdateGroup *pGroup = taosArrayGet(updateArray, i); + TAOS_CHECK_EXIT(tDecodeI32(&decoder, &pGroup->code)); + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pGroup->updateTimeMs)); + } + } + pReq->updateArray = updateArray; tEndDecode(&decoder); From 635b4a5cb1be08a8bf73e567505444bf15ffa193 Mon Sep 17 00:00:00 2001 From: dmchen Date: Tue, 11 Feb 2025 02:56:12 +0000 Subject: [PATCH 10/11] fix/TS-5805-check-arb-trans-compatable --- source/dnode/mnode/impl/src/mndArbGroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index bf35c3bc5e..03aeb2e0c0 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -22,7 +22,7 @@ #include "mndVgroup.h" #define ARBGROUP_VER_NUMBER 1 -#define ARBGROUP_RESERVE_SIZE 63 +#define ARBGROUP_RESERVE_SIZE 51 static SHashObj *arbUpdateHash = NULL; @@ -639,7 +639,7 @@ void mndArbCheckSync(SArbGroup *pArbGroup, int64_t nowMs, ECheckSyncOp *pOp, SAr mInfo("arb skip to set assigned leader to vgId:%d dnodeId:%d, arb group is not sync", vgId, pMember->info.dnodeId); } - *pOp = CHECK_SYNC_CHECK_SYNC; + //*pOp = CHECK_SYNC_CHECK_SYNC; return; } From d128c3927ddcea1c42e423e69919cfdb6603de2d Mon Sep 17 00:00:00 2001 From: dmchen Date: Wed, 12 Feb 2025 02:04:13 +0000 Subject: [PATCH 11/11] fix/TS-5805-check-arb-fix-comment --- source/common/src/systable.c | 2 +- source/dnode/mnode/impl/src/mndArbGroup.c | 17 ++++------------- tests/army/cluster/arbgroup_sync.py | 2 +- tests/army/cluster/arbitrator.py | 2 +- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index bc2c52e570..4deb1bba24 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -73,7 +73,7 @@ static const SSysDbTableSchema arbGroupsSchema[] = { {.name = "vgroup_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "v1_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "v2_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, - {.name = "is_sync", .bytes = 6, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + {.name = "is_sync", .bytes = 1, .type = TSDB_DATA_TYPE_BOOL, .sysInfo = true}, {.name = "check_sync_code", .bytes = 100, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "assigned_dnode", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "assigned_token", .bytes = TSDB_ARB_TOKEN_SIZE + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, diff --git a/source/dnode/mnode/impl/src/mndArbGroup.c b/source/dnode/mnode/impl/src/mndArbGroup.c index 03aeb2e0c0..aa893b667a 100644 --- a/source/dnode/mnode/impl/src/mndArbGroup.c +++ b/source/dnode/mnode/impl/src/mndArbGroup.c @@ -1352,25 +1352,16 @@ static int32_t mndRetrieveArbGroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock &lino, _OVER); } - mInfo("vgId:%d, arb group sync:%d, code:%d, update time:%" PRId64, pGroup->vgId, pGroup->isSync, pGroup->code, - pGroup->updateTimeMs); + mInfo("vgId:%d, arb group sync:%d, code:%s, update time:%" PRId64, pGroup->vgId, pGroup->isSync, + tstrerror(pGroup->code), pGroup->updateTimeMs); - char strSync[6] = {0}; - if (pGroup->isSync == 1) { - tsnprintf(strSync, 6, "true"); - } else { - tsnprintf(strSync, 6, "false"); - } - - char sync[100 + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(sync, strSync, 100 + VARSTR_HEADER_SIZE); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)sync, false), pGroup, &lino, _OVER); + RETRIEVE_CHECK_GOTO(colDataSetVal(pColInfo, numOfRows, (const char *)&pGroup->isSync, false), pGroup, &lino, _OVER); char strCheckSyncCode[100] = {0}; char bufUpdateTime[40] = {0}; (void)formatTimestamp(bufUpdateTime, pGroup->updateTimeMs, TSDB_TIME_PRECISION_MILLI); - tsnprintf(strCheckSyncCode, 100, "%d(%s)", pGroup->code, bufUpdateTime); + tsnprintf(strCheckSyncCode, 100, "%s(%s)", tstrerror(pGroup->code), bufUpdateTime); char checkSyncCode[100 + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(checkSyncCode, strCheckSyncCode, 100 + VARSTR_HEADER_SIZE); diff --git a/tests/army/cluster/arbgroup_sync.py b/tests/army/cluster/arbgroup_sync.py index 9152263a59..ccf45d2dc9 100644 --- a/tests/army/cluster/arbgroup_sync.py +++ b/tests/army/cluster/arbgroup_sync.py @@ -40,7 +40,7 @@ class TDTestCase(TBase): while count < 100: tdSql.query("show arbgroups;") - if tdSql.getData(0, 4) == 'true': + if tdSql.getData(0, 4) == True: break tdLog.info("wait %d seconds for is sync"%count) diff --git a/tests/army/cluster/arbitrator.py b/tests/army/cluster/arbitrator.py index 947397b395..98881c5fc4 100644 --- a/tests/army/cluster/arbitrator.py +++ b/tests/army/cluster/arbitrator.py @@ -46,7 +46,7 @@ class TDTestCase(TBase): while count < 100: tdSql.query("show arbgroups;") - if tdSql.getData(0, 4) == 'true': + if tdSql.getData(0, 4) == True: break tdLog.info("wait %d seconds for is sync"%count)