From fc21140e03f1bf17885779cefc60b3170917ee0f Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 3 Jan 2023 15:33:50 +0800 Subject: [PATCH 1/4] enh: remove assert from mnode --- source/dnode/mgmt/mgmt_snode/src/smWorker.c | 6 ++++-- source/dnode/mnode/impl/src/mndDb.c | 2 +- source/dnode/mnode/impl/src/mndDef.c | 2 +- source/dnode/mnode/impl/src/mndMnode.c | 1 - source/dnode/mnode/impl/src/mndShow.c | 2 +- source/dnode/mnode/impl/src/mndSma.c | 18 +++++++++++------- source/dnode/mnode/impl/src/mndStb.c | 16 ++++++++++++---- source/dnode/mnode/impl/src/mndSubscribe.c | 1 + source/dnode/mnode/impl/src/mndTopic.c | 8 +++++--- 9 files changed, 36 insertions(+), 20 deletions(-) diff --git a/source/dnode/mgmt/mgmt_snode/src/smWorker.c b/source/dnode/mgmt/mgmt_snode/src/smWorker.c index 2ed960546a..e8402eb7c0 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smWorker.c +++ b/source/dnode/mgmt/mgmt_snode/src/smWorker.c @@ -157,8 +157,10 @@ int32_t smPutMsgToQueue(SSnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { smPutNodeMsgToWriteQueue(pMgmt, pMsg); break; default: - ASSERTS(0, "msg:%p failed to put into snode queue since %s, type:%s qtype:%d", pMsg, terrstr(), - TMSG_INFO(pMsg->msgType), qtype); + terrno = TSDB_CODE_INVALID_PARA; + rpcFreeCont(pMsg->pCont); + taosFreeQitem(pMsg); + return -1; } return 0; } diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 8a4f6c2195..87b5a5c42d 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -112,7 +112,6 @@ static SSdbRaw *mndDbActionEncode(SDbObj *pDb) { SDB_SET_INT8(pRaw, dataPos, pDb->cfg.hashMethod, _OVER) SDB_SET_INT32(pRaw, dataPos, pDb->cfg.numOfRetensions, _OVER) for (int32_t i = 0; i < pDb->cfg.numOfRetensions; ++i) { - ASSERT(taosArrayGetSize(pDb->cfg.pRetensions) == pDb->cfg.numOfRetensions); SRetention *pRetension = taosArrayGet(pDb->cfg.pRetensions, i); SDB_SET_INT64(pRaw, dataPos, pRetension->freq, _OVER) SDB_SET_INT64(pRaw, dataPos, pRetension->keep, _OVER) @@ -364,6 +363,7 @@ static int32_t mndCheckDbCfg(SMnode *pMnode, SDbCfg *pCfg) { if (pCfg->hashPrefix < TSDB_MIN_HASH_PREFIX || pCfg->hashPrefix > TSDB_MAX_HASH_PREFIX) return -1; if (pCfg->hashSuffix < TSDB_MIN_HASH_SUFFIX || pCfg->hashSuffix > TSDB_MAX_HASH_SUFFIX) return -1; if (pCfg->tsdbPageSize < TSDB_MIN_TSDB_PAGESIZE || pCfg->tsdbPageSize > TSDB_MAX_TSDB_PAGESIZE) return -1; + if (taosArrayGetSize(pCfg->pRetensions) != pCfg->numOfRetensions) return -1; terrno = 0; return terrno; diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index b5c2fb05b3..a5f77513de 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -489,7 +489,7 @@ int32_t tEncodeSubscribeObj(void **buf, const SMqSubscribeObj *pSub) { tlen += tEncodeSMqConsumerEp(buf, pConsumerEp); cnt++; } - ASSERT(cnt == sz); + if(cnt != sz) return -1; tlen += taosEncodeArray(buf, pSub->unassignedVgs, (FEncode)tEncodeSMqVgEp); tlen += taosEncodeString(buf, pSub->dbName); return tlen; diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index 37fc9f79e1..c8c8e06c5e 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -763,7 +763,6 @@ static void mndReloadSyncConfig(SMnode *pMnode) { mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes); return; } - // ASSERT(0); if (cfg.myIndex == -1) { #if 1 diff --git a/source/dnode/mnode/impl/src/mndShow.c b/source/dnode/mnode/impl/src/mndShow.c index 6a7e2aaa51..7a8de4099f 100644 --- a/source/dnode/mnode/impl/src/mndShow.c +++ b/source/dnode/mnode/impl/src/mndShow.c @@ -111,7 +111,7 @@ static int32_t convertToRetrieveType(char *name, int32_t len) { } else if (strncasecmp(name, TSDB_INS_TABLE_USER_PRIVILEGES, len) == 0) { type = TSDB_MGMT_TABLE_PRIVILEGES; } else { - // ASSERT(0); + mError("invalid show name:%s len:%d", name, len); } return type; diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 2d1f6eb505..141bb1df60 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -488,7 +488,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.db, pDb->name, TSDB_DB_FNAME_LEN); smaObj.createdTime = taosGetTimestampMs(); smaObj.uid = mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); - ASSERT(smaObj.uid != 0); + char resultTbName[TSDB_TABLE_FNAME_LEN + 16] = {0}; snprintf(resultTbName, TSDB_TABLE_FNAME_LEN + 16, "%s_td_tsma_rst_tb", pCreate->name); memcpy(smaObj.dstTbName, resultTbName, TSDB_TABLE_FNAME_LEN); @@ -558,13 +558,15 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea SNode *pAst = NULL; if (nodesStringToNode(streamObj.ast, &pAst) < 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_SMA_OPTION; + mError("sma:%s, failed to create since parse ast error", smaObj.name); return -1; } // extract output schema from ast if (qExtractResultSchema(pAst, (int32_t *)&streamObj.outputSchema.nCols, &streamObj.outputSchema.pSchema) != 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_SMA_OPTION; + mError("sma:%s, failed to create since extract result schema error", smaObj.name); return -1; } @@ -579,15 +581,18 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea }; if (qCreateQueryPlan(&cxt, &pPlan, NULL) < 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_SMA_OPTION; + mError("sma:%s, failed to create since create query plan error", smaObj.name); return -1; } // save physcial plan if (nodesNodeToString((SNode *)pPlan, false, &streamObj.physicalPlan, NULL) != 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_SMA_OPTION; + mError("sma:%s, failed to create since save physcial plan error", smaObj.name); return -1; } + if (pAst != NULL) nodesDestroyNode(pAst); nodesDestroyNode((SNode *)pPlan); @@ -826,14 +831,13 @@ static int32_t mndDropSma(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SSmaObj *p if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", pStream->name, terrstr()); sdbRelease(pMnode->pSdb, pStream); - ASSERT(0); goto _OVER; } // drop stream if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { + mError("stream:%s, failed to drop log since %s", pStream->name, terrstr()); sdbRelease(pMnode->pSdb, pStream); - ASSERT(0); goto _OVER; } } diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 54b3202d24..d504a94700 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -1177,7 +1177,9 @@ static int32_t mndCheckAlterColForTopic(SMnode *pMnode, const char *stbFullName, SNode *pAst = NULL; if (nodesStringToNode(pTopic->ast, &pAst) != 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_FIELD_CONFLICT_WITH_TOPIC; + mError("topic:%s, create ast error", pTopic->name); + sdbRelease(pSdb, pTopic); return -1; } @@ -1222,7 +1224,9 @@ static int32_t mndCheckAlterColForStream(SMnode *pMnode, const char *stbFullName SNode *pAst = NULL; if (nodesStringToNode(pStream->ast, &pAst) != 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_STREAM_OPTION; + mError("stream:%s, create ast error", pStream->name); + sdbRelease(pSdb, pStream); return -1; } @@ -2094,7 +2098,9 @@ static int32_t mndCheckDropStbForTopic(SMnode *pMnode, const char *stbFullName, SNode *pAst = NULL; if (nodesStringToNode(pTopic->ast, &pAst) != 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_TOPIC_OPTION; + mError("topic:%s, create ast error", pTopic->name); + sdbRelease(pSdb, pTopic); return -1; } @@ -2141,7 +2147,9 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, SNode *pAst = NULL; if (nodesStringToNode(pStream->ast, &pAst) != 0) { - ASSERT(0); + terrno = TSDB_CODE_MND_INVALID_STREAM_OPTION; + mError("stream:%s, create ast error", pStream->name); + sdbRelease(pSdb, pStream); return -1; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 418474baa6..854be1c52d 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -693,6 +693,7 @@ static SSdbRaw *mndSubActionEncode(SMqSubscribeObj *pSub) { terrno = TSDB_CODE_OUT_OF_MEMORY; void *buf = NULL; int32_t tlen = tEncodeSubscribeObj(NULL, pSub); + if (tlen <= 0) goto SUB_ENCODE_OVER; int32_t size = sizeof(int32_t) + tlen + MND_SUBSCRIBE_RESERVE_SIZE; SSdbRaw *pRaw = sdbAllocRaw(SDB_SUBSCRIBE, MND_SUBSCRIBE_VER_NUMBER, size); diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index 071edf992f..bf3827c090 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -384,7 +384,11 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq * topicObj.subType = pCreate->subType; topicObj.withMeta = pCreate->withMeta; if (topicObj.withMeta) { - ASSERT(topicObj.subType != TOPIC_SUB_TYPE__COLUMN); + if (topicObj.subType == TOPIC_SUB_TYPE__COLUMN) { + terrno = TSDB_CODE_MND_INVALID_TOPIC_OPTION; + mError("topic:%s, failed to create since %s", pCreate->name, terrstr()); + return -1; + } } if (pCreate->subType == TOPIC_SUB_TYPE__COLUMN) { @@ -499,7 +503,6 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq * if (code < 0) { sdbRelease(pSdb, pVgroup); mndTransDrop(pTrans); - ASSERT(0); return -1; } void *buf = taosMemoryCalloc(1, sizeof(SMsgHead) + len); @@ -723,7 +726,6 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { // TODO check if rebalancing if (mndDropSubByTopic(pMnode, pTrans, dropReq.name) < 0) { - /*ASSERT(0);*/ mError("topic:%s, failed to drop since %s", pTopic->name, terrstr()); mndTransDrop(pTrans); mndReleaseTopic(pMnode, pTopic); From 9a10242f7daf4bd980cfc7573acfba906e41f3ac Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 3 Jan 2023 16:16:58 +0800 Subject: [PATCH 2/4] enh: remove assert from mnode consumer --- include/util/taoserror.h | 1 + source/dnode/mnode/impl/src/mndSubscribe.c | 126 ++++++++++++++------- source/util/src/terror.c | 3 +- 3 files changed, 88 insertions(+), 42 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index b315432be1..51f336c3aa 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -345,6 +345,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_TOPIC_SUBSCRIBED TAOS_DEF_ERROR_CODE(0, 0x03EB) #define TSDB_CODE_MND_CGROUP_USED TAOS_DEF_ERROR_CODE(0, 0x03EC) #define TSDB_CODE_MND_TOPIC_MUST_BE_DELETED TAOS_DEF_ERROR_CODE(0, 0x03ED) +#define TSDB_CODE_MND_INVALID_SUB_OPTION TAOS_DEF_ERROR_CODE(0, 0x03EE) #define TSDB_CODE_MND_IN_REBALANCE TAOS_DEF_ERROR_CODE(0, 0x03EF) // mnode-stream diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 854be1c52d..5cd0adf084 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -96,8 +96,12 @@ static SMqSubscribeObj *mndCreateSub(SMnode *pMnode, const SMqTopicObj *pTopic, pSub->subType = pTopic->subType; pSub->withMeta = pTopic->withMeta; - ASSERT(pSub->unassignedVgs->size == 0); - ASSERT(taosHashGetSize(pSub->consumerHash) == 0); + if (pSub->unassignedVgs->size != 0 || taosHashGetSize(pSub->consumerHash) != 0) { + tDeleteSubscribeObj(pSub); + taosMemoryFree(pSub); + terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; + return NULL; + } if (mndSchedInitSubEp(pMnode, pTopic, pSub) < 0) { tDeleteSubscribeObj(pSub); @@ -105,8 +109,12 @@ static SMqSubscribeObj *mndCreateSub(SMnode *pMnode, const SMqTopicObj *pTopic, return NULL; } - ASSERT(pSub->unassignedVgs->size > 0); - ASSERT(taosHashGetSize(pSub->consumerHash) == 0); + if (pSub->unassignedVgs->size <= 0 || taosHashGetSize(pSub->consumerHash) != 0) { + tDeleteSubscribeObj(pSub); + taosMemoryFree(pSub); + terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; + return NULL; + } return pSub; } @@ -144,7 +152,10 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, const SMqSubscri static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, const SMqSubscribeObj *pSub, const SMqRebOutputVg *pRebVg) { - ASSERT(pRebVg->oldConsumerId != pRebVg->newConsumerId); + if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { + terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; + return -1; + } void *buf; int32_t tlen; @@ -155,8 +166,8 @@ static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, const SM int32_t vgId = pRebVg->pVgEp->vgId; SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId); if (pVgObj == NULL) { - ASSERT(0); taosMemoryFree(buf); + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } @@ -206,9 +217,9 @@ static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) { } static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqRebOutputObj *pOutput) { - int32_t totalVgNum = pOutput->pSub->vgNum; - - mInfo("mq rebalance: subscription: %s, vgNum: %d", pOutput->pSub->key, pOutput->pSub->vgNum); + int32_t totalVgNum = pOutput->pSub->vgNum; + const char *sub = pOutput->pSub->key; + mInfo("sub:%s, mq rebalance vgNum:%d", sub, pOutput->pSub->vgNum); // 1. build temporary hash(vgId -> SMqRebOutputVg) to store modified vg SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); @@ -218,11 +229,24 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR int32_t actualRemoved = 0; for (int32_t i = 0; i < removedNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pInput->pRebInfo->removedConsumers, i); - ASSERT(consumerId > 0); + if (consumerId <= 0) { + mError("sub:%s, mq rebalance cunsumerId:%" PRId64 " <= 0", sub, consumerId); + continue; + } + SMqConsumerEp *pConsumerEp = taosHashGet(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t)); - ASSERT(pConsumerEp); + if (pConsumerEp == NULL) { + mError("sub:%s, mq rebalance ep is null, cunsumberId:%" PRId64, sub, consumerId); + continue; + } + if (pConsumerEp) { - ASSERT(consumerId == pConsumerEp->consumerId); + if (consumerId != pConsumerEp->consumerId) { + mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " not matched saved:%" PRId64, sub, consumerId, + pConsumerEp->consumerId); + continue; + } + actualRemoved++; int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs); for (int32_t j = 0; j < consumerVgNum; j++) { @@ -233,7 +257,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR .pVgEp = pVgEp, }; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); - mInfo("mq rebalance: remove vgId:%d from consumer:%" PRId64, pVgEp->vgId, consumerId); + mInfo("sub:%s, mq rebalance remove vgId:%d from consumer:%" PRId64, sub, pVgEp->vgId, consumerId); } taosArrayDestroy(pConsumerEp->vgs); taosHashRemove(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t)); @@ -241,7 +265,10 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR taosArrayPush(pOutput->removedConsumers, &consumerId); } } - ASSERT(removedNum == actualRemoved); + + if (removedNum != actualRemoved) { + mError("sub:%s, mq rebalance removedNum:%d not matched with actual:%d", sub, removedNum, actualRemoved); + } // if previously no consumer, there are vgs not assigned { @@ -254,7 +281,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR .pVgEp = pVgEp, }; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &rebOutput, sizeof(SMqRebOutputVg)); - mInfo("mq rebalance: remove vgId:%d from unassigned", pVgEp->vgId); + mInfo("sub:%s, mq rebalance remove vgId:%d from unassigned", sub, pVgEp->vgId); } } @@ -268,8 +295,8 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR minVgCnt = totalVgNum / afterRebConsumerNum; imbConsumerNum = totalVgNum % afterRebConsumerNum; } - mInfo("mq rebalance: %d consumer after rebalance, at least %d vg each, %d consumer has more vg", afterRebConsumerNum, - minVgCnt, imbConsumerNum); + mInfo("sub:%s, mq rebalance %d consumer after rebalance, at least %d vg each, %d consumer has more vg", sub, + afterRebConsumerNum, minVgCnt, imbConsumerNum); // 4. first scan: remove consumer more than wanted, put to remove hash int32_t imbCnt = 0; @@ -278,7 +305,11 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); if (pIter == NULL) break; SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; - ASSERT(pConsumerEp->consumerId > 0); + if (pConsumerEp->consumerId <= 0) { + mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " <= 0", sub, pConsumerEp->consumerId); + continue; + } + int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs); // all old consumers still existing are touched // TODO optimize: touch only consumer whose vgs changed @@ -298,7 +329,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR .pVgEp = pVgEp, }; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); - mInfo("mq rebalance: remove vgId:%d from consumer:%" PRId64 ",(first scan)", pVgEp->vgId, + mInfo("sub:%s, mq rebalance remove vgId:%d from consumer:%" PRId64 ",(first scan)", sub, pVgEp->vgId, pConsumerEp->consumerId); } imbCnt++; @@ -313,7 +344,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR .pVgEp = pVgEp, }; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); - mInfo("mq rebalance: remove vgId:%d from consumer:%" PRId64 ",(first scan)", pVgEp->vgId, + mInfo("sub:%s, mq rebalance remove vgId:%d from consumer:%" PRId64 ",(first scan)", sub, pVgEp->vgId, pConsumerEp->consumerId); } } @@ -325,13 +356,17 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR int32_t consumerNum = taosArrayGetSize(pInput->pRebInfo->newConsumers); for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pInput->pRebInfo->newConsumers, i); - ASSERT(consumerId > 0); + if (consumerId <= 0) { + mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " <= 0", sub, consumerId); + continue; + } + SMqConsumerEp newConsumerEp; newConsumerEp.consumerId = consumerId; newConsumerEp.vgs = taosArrayInit(0, sizeof(void *)); taosHashPut(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t), &newConsumerEp, sizeof(SMqConsumerEp)); taosArrayPush(pOutput->newConsumers, &consumerId); - mInfo("mq rebalance: add new consumer:%" PRId64, consumerId); + mInfo("sub:%s, mq rebalance add new consumer:%" PRId64, sub, consumerId); } } @@ -344,13 +379,18 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); if (pIter == NULL) break; SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; - ASSERT(pConsumerEp->consumerId > 0); + if (pConsumerEp->consumerId <= 0) { + mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " <= 0", sub, pConsumerEp->consumerId); + continue; + } // push until equal minVg while (taosArrayGetSize(pConsumerEp->vgs) < minVgCnt) { // iter hash and find one vg pRemovedIter = taosHashIterate(pHash, pRemovedIter); - ASSERT(pRemovedIter); + mError("sub:%s, removed iter is null", sub); + continue; + pRebVg = (SMqRebOutputVg *)pRemovedIter; // push taosArrayPush(pConsumerEp->vgs, &pRebVg->pVgEp); @@ -361,7 +401,6 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR } } - ASSERT(pIter == NULL); // 7. handle unassigned vg if (taosHashGetSize(pOutput->pSub->consumerHash) != 0) { // if has consumer, assign all left vg @@ -377,9 +416,12 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR } while (1) { pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); - ASSERT(pIter); pConsumerEp = (SMqConsumerEp *)pIter; - ASSERT(pConsumerEp->consumerId > 0); + if (pConsumerEp == NULL || pConsumerEp->consumerId <= 0) { + mError("sub:%s, mq rebalance cunsumberId invalid", sub); + continue; + } + if (taosArrayGetSize(pConsumerEp->vgs) == minVgCnt) { break; } @@ -404,19 +446,23 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR pIter = taosHashIterate(pHash, pIter); if (pIter == NULL) break; pRebOutput = (SMqRebOutputVg *)pIter; - ASSERT(pRebOutput->newConsumerId == -1); + if (pRebOutput->newConsumerId != 1) { + mError("sub:%s, mq rebalance output consumerId:%" PRId64 " not -1", sub, pRebOutput->newConsumerId); + continue; + } + taosArrayPush(pOutput->pSub->unassignedVgs, &pRebOutput->pVgEp); taosArrayPush(pOutput->rebVgs, pRebOutput); - mInfo("mq rebalance: unassign vgId:%d (second scan)", pRebOutput->pVgEp->vgId); + mInfo("sub:%s, mq rebalance unassign vgId:%d (second scan)", sub, pRebOutput->pVgEp->vgId); } } // 8. generate logs - mInfo("mq rebalance: calculation completed, rebalanced vg:"); + mInfo("sub:%s, mq rebalance calculation completed, rebalanced vg", sub); for (int32_t i = 0; i < taosArrayGetSize(pOutput->rebVgs); i++) { SMqRebOutputVg *pOutputRebVg = taosArrayGet(pOutput->rebVgs, i); - mInfo("mq rebalance: vgId:%d, moved from consumer:%" PRId64 ", to consumer:%" PRId64, pOutputRebVg->pVgEp->vgId, - pOutputRebVg->oldConsumerId, pOutputRebVg->newConsumerId); + mInfo("sub:%s, mq rebalance vgId:%d, moved from consumer:%" PRId64 ", to consumer:%" PRId64, sub, + pOutputRebVg->pVgEp->vgId, pOutputRebVg->oldConsumerId, pOutputRebVg->newConsumerId); } { void *pIter = NULL; @@ -425,10 +471,11 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR if (pIter == NULL) break; SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; int32_t sz = taosArrayGetSize(pConsumerEp->vgs); - mInfo("mq rebalance: final cfg: consumer %" PRId64 " has %d vg", pConsumerEp->consumerId, sz); + mInfo("sub:%s, mq rebalance final cfg: consumer %" PRId64 " has %d vg", sub, pConsumerEp->consumerId, sz); for (int32_t i = 0; i < sz; i++) { SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, i); - mInfo("mq rebalance: final cfg: vg %d to consumer %" PRId64 "", pVgEp->vgId, pConsumerEp->consumerId); + mInfo("sub:%s, mq rebalance final cfg: vg %d to consumer %" PRId64 "", sub, pVgEp->vgId, + pConsumerEp->consumerId); } } } @@ -487,7 +534,8 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu consumerNum = taosArrayGetSize(pOutput->newConsumers); for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->newConsumers, i); - ASSERT(consumerId > 0); + if (consumerId <= 0) continue; + SMqConsumerObj *pConsumerOld = mndAcquireConsumer(pMnode, consumerId); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumerOld->consumerId, pConsumerOld->cgroup); pConsumerNew->updateType = CONSUMER_UPDATE__ADD; @@ -497,7 +545,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu taosArrayPush(pConsumerNew->rebNewTopics, &topic); mndReleaseConsumer(pMnode, pConsumerOld); if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) { - ASSERT(0); tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); goto REB_FAIL; @@ -510,7 +557,8 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu consumerNum = taosArrayGetSize(pOutput->removedConsumers); for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->removedConsumers, i); - ASSERT(consumerId > 0); + if (consumerId <= 0) continue; + SMqConsumerObj *pConsumerOld = mndAcquireConsumer(pMnode, consumerId); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumerOld->consumerId, pConsumerOld->cgroup); pConsumerNew->updateType = CONSUMER_UPDATE__REMOVE; @@ -520,7 +568,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu taosArrayPush(pConsumerNew->rebRemovedTopics, &topic); mndReleaseConsumer(pMnode, pConsumerOld); if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) { - ASSERT(0); tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); goto REB_FAIL; @@ -577,7 +624,6 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { char cgroup[TSDB_CGROUP_LEN]; mndSplitSubscribeKey(pRebInfo->key, topic, cgroup, true); SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic); - /*ASSERT(pTopic);*/ if (pTopic == NULL) { mError("mq rebalance %s failed since topic %s not exist, abort", pRebInfo->key, topic); continue; @@ -586,7 +632,6 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { rebOutput.pSub = mndCreateSub(pMnode, pTopic, pRebInfo->key); memcpy(rebOutput.pSub->dbName, pTopic->db, TSDB_DB_FNAME_LEN); - ASSERT(taosHashGetSize(rebOutput.pSub->consumerHash) == 0); taosRUnLockLatch(&pTopic->lock); mndReleaseTopic(pMnode, pTopic); @@ -606,7 +651,6 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { // if add more consumer to balanced subscribe, // possibly no vg is changed - /*ASSERT(taosArrayGetSize(rebOutput.rebVgs) != 0);*/ if (mndPersistRebResult(pMnode, pMsg, &rebOutput) < 0) { mError("mq rebalance persist rebalance output error, possibly vnode splitted or dropped"); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 6f8b0d8e04..14c3df38a7 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -276,8 +276,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST, "Subcribe not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_OFFSET_NOT_EXIST, "Offset not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_CONSUMER_NOT_READY, "Consumer not ready") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_SUBSCRIBED, "Topic subscribed cannot be dropped") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_MUST_BE_DELETED, "Topic must be dropped first") TAOS_DEFINE_ERROR(TSDB_CODE_MND_CGROUP_USED, "Consumer group being used by some consumer") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_MUST_BE_DELETED, "Topic must be dropped first") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_SUB_OPTION, "Invalid subscribe option") TAOS_DEFINE_ERROR(TSDB_CODE_MND_IN_REBALANCE, "Topic being rebalanced") // mnode-stream From e4ab8f986bd06a1eb41192f896b024114bed86b7 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 3 Jan 2023 17:06:43 +0800 Subject: [PATCH 3/4] remove assert --- source/dnode/mnode/impl/src/mndSubscribe.c | 57 +++------------------- 1 file changed, 7 insertions(+), 50 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 5cd0adf084..4a263b5bce 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -96,26 +96,12 @@ static SMqSubscribeObj *mndCreateSub(SMnode *pMnode, const SMqTopicObj *pTopic, pSub->subType = pTopic->subType; pSub->withMeta = pTopic->withMeta; - if (pSub->unassignedVgs->size != 0 || taosHashGetSize(pSub->consumerHash) != 0) { - tDeleteSubscribeObj(pSub); - taosMemoryFree(pSub); - terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; - return NULL; - } - if (mndSchedInitSubEp(pMnode, pTopic, pSub) < 0) { tDeleteSubscribeObj(pSub); taosMemoryFree(pSub); return NULL; } - if (pSub->unassignedVgs->size <= 0 || taosHashGetSize(pSub->consumerHash) != 0) { - tDeleteSubscribeObj(pSub); - taosMemoryFree(pSub); - terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; - return NULL; - } - return pSub; } @@ -229,24 +215,10 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR int32_t actualRemoved = 0; for (int32_t i = 0; i < removedNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pInput->pRebInfo->removedConsumers, i); - if (consumerId <= 0) { - mError("sub:%s, mq rebalance cunsumerId:%" PRId64 " <= 0", sub, consumerId); - continue; - } SMqConsumerEp *pConsumerEp = taosHashGet(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t)); - if (pConsumerEp == NULL) { - mError("sub:%s, mq rebalance ep is null, cunsumberId:%" PRId64, sub, consumerId); - continue; - } if (pConsumerEp) { - if (consumerId != pConsumerEp->consumerId) { - mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " not matched saved:%" PRId64, sub, consumerId, - pConsumerEp->consumerId); - continue; - } - actualRemoved++; int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs); for (int32_t j = 0; j < consumerVgNum; j++) { @@ -305,10 +277,6 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); if (pIter == NULL) break; SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; - if (pConsumerEp->consumerId <= 0) { - mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " <= 0", sub, pConsumerEp->consumerId); - continue; - } int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs); // all old consumers still existing are touched @@ -356,10 +324,6 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR int32_t consumerNum = taosArrayGetSize(pInput->pRebInfo->newConsumers); for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pInput->pRebInfo->newConsumers, i); - if (consumerId <= 0) { - mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " <= 0", sub, consumerId); - continue; - } SMqConsumerEp newConsumerEp; newConsumerEp.consumerId = consumerId; @@ -379,10 +343,6 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); if (pIter == NULL) break; SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; - if (pConsumerEp->consumerId <= 0) { - mError("sub:%s, mq rebalance cunsumberId:%" PRId64 " <= 0", sub, pConsumerEp->consumerId); - continue; - } // push until equal minVg while (taosArrayGetSize(pConsumerEp->vgs) < minVgCnt) { @@ -417,10 +377,6 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR while (1) { pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); pConsumerEp = (SMqConsumerEp *)pIter; - if (pConsumerEp == NULL || pConsumerEp->consumerId <= 0) { - mError("sub:%s, mq rebalance cunsumberId invalid", sub); - continue; - } if (taosArrayGetSize(pConsumerEp->vgs) == minVgCnt) { break; @@ -446,10 +402,6 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR pIter = taosHashIterate(pHash, pIter); if (pIter == NULL) break; pRebOutput = (SMqRebOutputVg *)pIter; - if (pRebOutput->newConsumerId != 1) { - mError("sub:%s, mq rebalance output consumerId:%" PRId64 " not -1", sub, pRebOutput->newConsumerId); - continue; - } taosArrayPush(pOutput->pSub->unassignedVgs, &pRebOutput->pVgEp); taosArrayPush(pOutput->rebVgs, pRebOutput); @@ -534,7 +486,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu consumerNum = taosArrayGetSize(pOutput->newConsumers); for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->newConsumers, i); - if (consumerId <= 0) continue; SMqConsumerObj *pConsumerOld = mndAcquireConsumer(pMnode, consumerId); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumerOld->consumerId, pConsumerOld->cgroup); @@ -557,7 +508,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu consumerNum = taosArrayGetSize(pOutput->removedConsumers); for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->removedConsumers, i); - if (consumerId <= 0) continue; SMqConsumerObj *pConsumerOld = mndAcquireConsumer(pMnode, consumerId); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumerOld->consumerId, pConsumerOld->cgroup); @@ -631,6 +581,13 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { taosRLockLatch(&pTopic->lock); rebOutput.pSub = mndCreateSub(pMnode, pTopic, pRebInfo->key); + + if (rebOutput.pSub == NULL) { + mError("mq rebalance %s failed create sub since %s, abort", pRebInfo->key, terrstr()); + taosRUnLockLatch(&pTopic->lock); + mndReleaseTopic(pMnode, pTopic); + continue; + } memcpy(rebOutput.pSub->dbName, pTopic->db, TSDB_DB_FNAME_LEN); taosRUnLockLatch(&pTopic->lock); From 9df158e1d9756a9812b75a9425cf88ab0e2e78f1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 3 Jan 2023 20:12:25 +0800 Subject: [PATCH 4/4] fix: update assert info --- source/dnode/mnode/impl/src/mndSubscribe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 4a263b5bce..b8ef185199 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -348,8 +348,10 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR while (taosArrayGetSize(pConsumerEp->vgs) < minVgCnt) { // iter hash and find one vg pRemovedIter = taosHashIterate(pHash, pRemovedIter); - mError("sub:%s, removed iter is null", sub); - continue; + if (pRemovedIter == NULL) { + mError("sub:%s, removed iter is null", sub); + continue; + } pRebVg = (SMqRebOutputVg *)pRemovedIter; // push