fix:rebalance not only one in once timer

This commit is contained in:
wangmm0220 2023-04-17 19:59:11 +08:00
parent f65cd36e80
commit 1771a54256
6 changed files with 31 additions and 63 deletions

View File

@ -2071,7 +2071,6 @@ static FORCE_INLINE void* tDeserializeSMVSubscribeReq(void* buf, SMVSubscribeReq
typedef struct { typedef struct {
char key[TSDB_SUBSCRIBE_KEY_LEN]; char key[TSDB_SUBSCRIBE_KEY_LEN];
SArray* lostConsumers; // SArray<int64_t>
SArray* removedConsumers; // SArray<int64_t> SArray* removedConsumers; // SArray<int64_t>
SArray* newConsumers; // SArray<int64_t> SArray* newConsumers; // SArray<int64_t>
} SMqRebInfo; } SMqRebInfo;
@ -2082,10 +2081,6 @@ static FORCE_INLINE SMqRebInfo* tNewSMqRebSubscribe(const char* key) {
return NULL; return NULL;
} }
tstrncpy(pRebInfo->key, key, TSDB_SUBSCRIBE_KEY_LEN); tstrncpy(pRebInfo->key, key, TSDB_SUBSCRIBE_KEY_LEN);
pRebInfo->lostConsumers = taosArrayInit(0, sizeof(int64_t));
if (pRebInfo->lostConsumers == NULL) {
goto _err;
}
pRebInfo->removedConsumers = taosArrayInit(0, sizeof(int64_t)); pRebInfo->removedConsumers = taosArrayInit(0, sizeof(int64_t));
if (pRebInfo->removedConsumers == NULL) { if (pRebInfo->removedConsumers == NULL) {
goto _err; goto _err;
@ -2096,7 +2091,6 @@ static FORCE_INLINE SMqRebInfo* tNewSMqRebSubscribe(const char* key) {
} }
return pRebInfo; return pRebInfo;
_err: _err:
taosArrayDestroy(pRebInfo->lostConsumers);
taosArrayDestroy(pRebInfo->removedConsumers); taosArrayDestroy(pRebInfo->removedConsumers);
taosArrayDestroy(pRebInfo->newConsumers); taosArrayDestroy(pRebInfo->newConsumers);
taosMemoryFreeClear(pRebInfo); taosMemoryFreeClear(pRebInfo);

View File

@ -137,12 +137,12 @@ typedef enum {
} EDndReason; } EDndReason;
typedef enum { typedef enum {
CONSUMER_UPDATE__TOUCH = 1, CONSUMER_UPDATE__TOUCH = 1, // rebalance req do not need change consume topic
CONSUMER_UPDATE__ADD, CONSUMER_UPDATE__ADD,
CONSUMER_UPDATE__REMOVE, CONSUMER_UPDATE__REMOVE,
CONSUMER_UPDATE__LOST, CONSUMER_UPDATE__LOST,
CONSUMER_UPDATE__RECOVER, CONSUMER_UPDATE__RECOVER,
CONSUMER_UPDATE__MODIFY, CONSUMER_UPDATE__MODIFY, // subscribe req need change consume topic
} ECsmUpdateType; } ECsmUpdateType;
typedef struct { typedef struct {
@ -624,7 +624,7 @@ typedef struct {
SArray* rebVgs; // SArray<SMqRebOutputVg> SArray* rebVgs; // SArray<SMqRebOutputVg>
SArray* newConsumers; // SArray<int64_t> SArray* newConsumers; // SArray<int64_t>
SArray* removedConsumers; // SArray<int64_t> SArray* removedConsumers; // SArray<int64_t>
SArray* touchedConsumers; // SArray<int64_t> SArray* modifyConsumers; // SArray<int64_t>
SMqSubscribeObj* pSub; SMqSubscribeObj* pSub;
SMqSubActionLogEntry* pLogEntry; SMqSubActionLogEntry* pLogEntry;
} SMqRebOutputObj; } SMqRebOutputObj;

View File

@ -247,7 +247,6 @@ static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
static void freeRebalanceItem(void *param) { static void freeRebalanceItem(void *param) {
SMqRebInfo *pInfo = param; SMqRebInfo *pInfo = param;
taosArrayDestroy(pInfo->lostConsumers);
taosArrayDestroy(pInfo->newConsumers); taosArrayDestroy(pInfo->newConsumers);
taosArrayDestroy(pInfo->removedConsumers); taosArrayDestroy(pInfo->removedConsumers);
} }
@ -335,7 +334,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) {
taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId); taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
} }
taosRUnLockLatch(&pConsumer->lock); taosRUnLockLatch(&pConsumer->lock);
} else if (status == MQ_CONSUMER_STATUS__MODIFY) { } else {
taosRLockLatch(&pConsumer->lock); taosRLockLatch(&pConsumer->lock);
int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics); int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics);
@ -356,8 +355,6 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) {
taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId); taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId);
} }
taosRUnLockLatch(&pConsumer->lock); taosRUnLockLatch(&pConsumer->lock);
} else {
// do nothing
} }
mndReleaseConsumer(pMnode, pConsumer); mndReleaseConsumer(pMnode, pConsumer);
@ -917,34 +914,22 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer,
taosWLockLatch(&pOldConsumer->lock); taosWLockLatch(&pOldConsumer->lock);
if (pNewConsumer->updateType == CONSUMER_UPDATE__MODIFY) { if (pNewConsumer->updateType == CONSUMER_UPDATE__MODIFY) {
/*A(taosArrayGetSize(pOldConsumer->rebNewTopics) == 0);*/ SArray *tmp = pOldConsumer->rebNewTopics;
/*A(taosArrayGetSize(pOldConsumer->rebRemovedTopics) == 0);*/ pOldConsumer->rebNewTopics = pNewConsumer->rebNewTopics;
pNewConsumer->rebNewTopics = tmp;
// this new consumer has identical topics with one existed consumers. tmp = pOldConsumer->rebRemovedTopics;
if (taosArrayGetSize(pNewConsumer->rebNewTopics) == 0 && taosArrayGetSize(pNewConsumer->rebRemovedTopics) == 0) { pOldConsumer->rebRemovedTopics = pNewConsumer->rebRemovedTopics;
pOldConsumer->status = MQ_CONSUMER_STATUS__READY; pNewConsumer->rebRemovedTopics = tmp;
} else {
SArray *tmp = pOldConsumer->rebNewTopics;
pOldConsumer->rebNewTopics = pNewConsumer->rebNewTopics;
pNewConsumer->rebNewTopics = tmp;
tmp = pOldConsumer->rebRemovedTopics; tmp = pOldConsumer->assignedTopics;
pOldConsumer->rebRemovedTopics = pNewConsumer->rebRemovedTopics; pOldConsumer->assignedTopics = pNewConsumer->assignedTopics;
pNewConsumer->rebRemovedTopics = tmp; pNewConsumer->assignedTopics = tmp;
tmp = pOldConsumer->assignedTopics; pOldConsumer->subscribeTime = pNewConsumer->upTime;
pOldConsumer->assignedTopics = pNewConsumer->assignedTopics; pOldConsumer->status = MQ_CONSUMER_STATUS__MODIFY;
pNewConsumer->assignedTopics = tmp;
pOldConsumer->subscribeTime = pNewConsumer->upTime;
pOldConsumer->status = MQ_CONSUMER_STATUS__MODIFY;
}
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__LOST) { } else if (pNewConsumer->updateType == CONSUMER_UPDATE__LOST) {
/*A(taosArrayGetSize(pOldConsumer->rebNewTopics) == 0);*/
/*A(taosArrayGetSize(pOldConsumer->rebRemovedTopics) == 0);*/
int32_t sz = taosArrayGetSize(pOldConsumer->currentTopics); int32_t sz = taosArrayGetSize(pOldConsumer->currentTopics);
/*pOldConsumer->rebRemovedTopics = taosArrayInit(sz, sizeof(void *));*/
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {
char *topic = taosStrdup(taosArrayGetP(pOldConsumer->currentTopics, i)); char *topic = taosStrdup(taosArrayGetP(pOldConsumer->currentTopics, i));
taosArrayPush(pOldConsumer->rebRemovedTopics, &topic); taosArrayPush(pOldConsumer->rebRemovedTopics, &topic);
@ -958,9 +943,6 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer,
pOldConsumer->consumerId, mndConsumerStatusName(status), mndConsumerStatusName(pOldConsumer->status), pOldConsumer->consumerId, mndConsumerStatusName(status), mndConsumerStatusName(pOldConsumer->status),
pOldConsumer->rebalanceTime, (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics)); pOldConsumer->rebalanceTime, (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics));
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__RECOVER) { } else if (pNewConsumer->updateType == CONSUMER_UPDATE__RECOVER) {
/*A(taosArrayGetSize(pOldConsumer->currentTopics) == 0);*/
/*A(taosArrayGetSize(pOldConsumer->rebNewTopics) == 0);*/
int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics); int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics);
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {
char *topic = taosStrdup(taosArrayGetP(pOldConsumer->assignedTopics, i)); char *topic = taosStrdup(taosArrayGetP(pOldConsumer->assignedTopics, i));
@ -976,7 +958,6 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer,
pOldConsumer->rebalanceTime = pNewConsumer->upTime; pOldConsumer->rebalanceTime = pNewConsumer->upTime;
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__ADD) { } else if (pNewConsumer->updateType == CONSUMER_UPDATE__ADD) {
ASSERT(taosArrayGetSize(pNewConsumer->rebNewTopics) == 1 && taosArrayGetSize(pNewConsumer->rebRemovedTopics) == 0);
char *pNewTopic = taosStrdup(taosArrayGetP(pNewConsumer->rebNewTopics, 0)); char *pNewTopic = taosStrdup(taosArrayGetP(pNewConsumer->rebNewTopics, 0));
// not exist in current topic // not exist in current topic
@ -1015,15 +996,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer,
(int)taosArrayGetSize(pOldConsumer->rebRemovedTopics)); (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics));
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__REMOVE) { } else if (pNewConsumer->updateType == CONSUMER_UPDATE__REMOVE) {
/*A(taosArrayGetSize(pNewConsumer->rebNewTopics) == 0);*/
/*A(taosArrayGetSize(pNewConsumer->rebRemovedTopics) == 1);*/
char *removedTopic = taosArrayGetP(pNewConsumer->rebRemovedTopics, 0); char *removedTopic = taosArrayGetP(pNewConsumer->rebRemovedTopics, 0);
#if 0
for (int32_t i = 0; i < taosArrayGetSize(pOldConsumer->rebNewTopics); i++) {
char *topic = taosArrayGetP(pOldConsumer->rebNewTopics, i);
A(strcmp(topic, removedTopic) != 0);
}
#endif
// remove from removed topic // remove from removed topic
removeFromRemoveTopicList(pOldConsumer, removedTopic); removeFromRemoveTopicList(pOldConsumer, removedTopic);

View File

@ -289,9 +289,9 @@ static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHas
SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter;
int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs); int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs);
// all old consumers still existing are touched // all old consumers still existing need to be modified
// TODO optimize: touch only consumer whose vgs changed // TODO optimize: modify only consumer whose vgs changed
taosArrayPush(pOutput->touchedConsumers, &pConsumerEp->consumerId); taosArrayPush(pOutput->modifyConsumers, &pConsumerEp->consumerId);
if (consumerVgNum > minVgCnt) { if (consumerVgNum > minVgCnt) {
if (imbCnt < imbConsumerNum) { if (imbCnt < imbConsumerNum) {
if (consumerVgNum == minVgCnt + 1) { if (consumerVgNum == minVgCnt + 1) {
@ -409,7 +409,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR
if (taosArrayGetSize(pConsumerEp->vgs) == minVgCnt) { if (taosArrayGetSize(pConsumerEp->vgs) == minVgCnt) {
pRemovedIter = taosHashIterate(pHash, pRemovedIter); pRemovedIter = taosHashIterate(pHash, pRemovedIter);
if (pRemovedIter == NULL) { if (pRemovedIter == NULL) {
mError("sub:%s removed iter is null", pSubKey); mInfo("sub:%s removed iter is null", pSubKey);
break; break;
} }
@ -496,9 +496,9 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu
// 3. commit log: consumer to update status and epoch // 3. commit log: consumer to update status and epoch
// 3.1 set touched consumer // 3.1 set touched consumer
int32_t consumerNum = taosArrayGetSize(pOutput->touchedConsumers); int32_t consumerNum = taosArrayGetSize(pOutput->modifyConsumers);
for (int32_t i = 0; i < consumerNum; i++) { for (int32_t i = 0; i < consumerNum; i++) {
int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->touchedConsumers, i); int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->modifyConsumers, i);
SMqConsumerObj *pConsumerOld = mndAcquireConsumer(pMnode, consumerId); SMqConsumerObj *pConsumerOld = mndAcquireConsumer(pMnode, consumerId);
SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumerOld->consumerId, pConsumerOld->cgroup); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumerOld->consumerId, pConsumerOld->cgroup);
pConsumerNew->updateType = CONSUMER_UPDATE__TOUCH; pConsumerNew->updateType = CONSUMER_UPDATE__TOUCH;
@ -578,15 +578,15 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
SMnode *pMnode = pMsg->info.node; SMnode *pMnode = pMsg->info.node;
SMqDoRebalanceMsg *pReq = pMsg->pCont; SMqDoRebalanceMsg *pReq = pMsg->pCont;
void *pIter = NULL; void *pIter = NULL;
bool rebalanceOnce = false; // to ensure only once. // bool rebalanceOnce = false; // to ensure only once.
mInfo("mq re-balance start, total required re-balanced trans:%d", taosHashGetSize(pReq->rebSubHash)); mInfo("mq re-balance start, total required re-balanced trans:%d", taosHashGetSize(pReq->rebSubHash));
// here we only handle one topic rebalance requirement to ensure the atomic execution of this transaction. // here we only handle one topic rebalance requirement to ensure the atomic execution of this transaction.
while (1) { while (1) {
if (rebalanceOnce) { // if (rebalanceOnce) {
break; // break;
} // }
pIter = taosHashIterate(pReq->rebSubHash, pIter); pIter = taosHashIterate(pReq->rebSubHash, pIter);
if (pIter == NULL) { if (pIter == NULL) {
@ -598,7 +598,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
SMqRebOutputObj rebOutput = {0}; SMqRebOutputObj rebOutput = {0};
rebOutput.newConsumers = taosArrayInit(0, sizeof(int64_t)); rebOutput.newConsumers = taosArrayInit(0, sizeof(int64_t));
rebOutput.removedConsumers = taosArrayInit(0, sizeof(int64_t)); rebOutput.removedConsumers = taosArrayInit(0, sizeof(int64_t));
rebOutput.touchedConsumers = taosArrayInit(0, sizeof(int64_t)); rebOutput.modifyConsumers = taosArrayInit(0, sizeof(int64_t));
rebOutput.rebVgs = taosArrayInit(0, sizeof(SMqRebOutputVg)); rebOutput.rebVgs = taosArrayInit(0, sizeof(SMqRebOutputVg));
SMqRebInfo *pRebInfo = (SMqRebInfo *)pIter; SMqRebInfo *pRebInfo = (SMqRebInfo *)pIter;
@ -656,13 +656,14 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
} }
taosArrayDestroy(rebOutput.newConsumers); taosArrayDestroy(rebOutput.newConsumers);
taosArrayDestroy(rebOutput.touchedConsumers); taosArrayDestroy(rebOutput.modifyConsumers);
taosArrayDestroy(rebOutput.removedConsumers); taosArrayDestroy(rebOutput.removedConsumers);
taosArrayDestroy(rebOutput.rebVgs); taosArrayDestroy(rebOutput.rebVgs);
tDeleteSubscribeObj(rebOutput.pSub); tDeleteSubscribeObj(rebOutput.pSub);
taosMemoryFree(rebOutput.pSub); taosMemoryFree(rebOutput.pSub);
rebalanceOnce = true; // taosSsleep(100);
// rebalanceOnce = true;
} }
// reset flag // reset flag

View File

@ -196,13 +196,13 @@ static int32_t sdbUpdateRow(SSdb *pSdb, SHashObj *hash, SSdbRaw *pRaw, SSdbRow *
SSdbRow *pOldRow = *ppOldRow; SSdbRow *pOldRow = *ppOldRow;
pOldRow->status = pRaw->status; pOldRow->status = pRaw->status;
sdbPrintOper(pSdb, pOldRow, "update"); sdbPrintOper(pSdb, pOldRow, "update");
sdbUnLock(pSdb, type);
int32_t code = 0; int32_t code = 0;
SdbUpdateFp updateFp = pSdb->updateFps[type]; SdbUpdateFp updateFp = pSdb->updateFps[type];
if (updateFp != NULL) { if (updateFp != NULL) {
code = (*updateFp)(pSdb, pOldRow->pObj, pNewRow->pObj); code = (*updateFp)(pSdb, pOldRow->pObj, pNewRow->pObj);
} }
sdbUnLock(pSdb, type);
// sdbUnLock(pSdb, type); // sdbUnLock(pSdb, type);
sdbFreeRow(pSdb, pNewRow, false); sdbFreeRow(pSdb, pNewRow, false);

View File

@ -226,7 +226,7 @@ class TDTestCase:
event.wait() event.wait()
tdLog.info("start consume processor") tdLog.info("start consume processor")
pollDelay = 100 pollDelay = 20
showMsg = 1 showMsg = 1
showRow = 1 showRow = 1
self.startTmqSimProcess(buildPath,cfgPath,pollDelay,parameterDict["dbName"],showMsg, showRow) self.startTmqSimProcess(buildPath,cfgPath,pollDelay,parameterDict["dbName"],showMsg, showRow)