diff --git a/source/dnode/mgmt/daemon/src/daemon.c b/source/dnode/mgmt/daemon/src/daemon.c index 6c4fae406e..70dca0e4df 100644 --- a/source/dnode/mgmt/daemon/src/daemon.c +++ b/source/dnode/mgmt/daemon/src/daemon.c @@ -139,7 +139,7 @@ void dmnWaitSignal() { void dmnInitOption(SDnodeOpt *pOption) { pOption->sver = 30000000; //3.0.0.0 pOption->numOfCores = tsNumOfCores; - pOption->numOfSupportVnodes = 1; + pOption->numOfSupportVnodes = 16; pOption->numOfCommitThreads = 1; pOption->statusInterval = tsStatusInterval; pOption->numOfThreadsPerCore = tsNumOfThreadsPerCore; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index ac9fe35f53..1a1306c3da 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -124,6 +124,7 @@ typedef struct { int64_t rebootTime; int64_t lastAccessTime; int32_t accessTimes; + int16_t numOfVnodes; int16_t numOfSupportVnodes; int16_t numOfCores; EDndStatus status; diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index dd69a34dcc..9263fca695 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -442,7 +442,7 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) { } sdbSetRawStatus(pRaw, SDB_STATUS_READY); - mTrace("trans:%d, sync to other nodes", pTrans->id); + mDebug("trans:%d, sync to other nodes", pTrans->id); int32_t code = mndSyncPropose(pMnode, pRaw); if (code != 0) { mError("trans:%d, failed to sync since %s", pTrans->id, terrstr()); @@ -450,7 +450,7 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) { return -1; } - mTrace("trans:%d, sync finished", pTrans->id); + mDebug("trans:%d, sync finished", pTrans->id); code = sdbWrite(pMnode->pSdb, pRaw); if (code != 0) { diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 98382232ef..06e62d2528 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -86,7 +86,6 @@ SSdbRaw *mndVgroupActionEncode(SVgObj *pVgroup) { for (int8_t i = 0; i < pVgroup->replica; ++i) { SVnodeGid *pVgid = &pVgroup->vnodeGid[i]; SDB_SET_INT32(pRaw, dataPos, pVgid->dnodeId) - SDB_SET_INT8(pRaw, dataPos, pVgid->role) } SDB_SET_RESERVE(pRaw, dataPos, TSDB_VGROUP_RESERVE_SIZE) SDB_SET_DATALEN(pRaw, dataPos); @@ -121,7 +120,6 @@ SSdbRow *mndVgroupActionDecode(SSdbRaw *pRaw) { for (int8_t i = 0; i < pVgroup->replica; ++i) { SVnodeGid *pVgid = &pVgroup->vnodeGid[i]; SDB_GET_INT32(pRaw, pRow, dataPos, &pVgid->dnodeId) - SDB_GET_INT8(pRaw, pRow, dataPos, (int8_t *)&pVgid->role) } SDB_GET_RESERVE(pRaw, pRow, dataPos, TSDB_VGROUP_RESERVE_SIZE) @@ -237,44 +235,95 @@ SDropVnodeMsg *mndBuildDropVnodeMsg(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *p return pDrop; } -static int32_t mndGetAvailableDnode(SMnode *pMnode, SVgObj *pVgroup) { +static SArray *mndBuildDnodesArray(SMnode *pMnode) { SSdb *pSdb = pMnode->pSdb; - int32_t allocedVnodes = 0; - void *pIter = NULL; + int32_t numOfDnodes = mndGetDnodeSize(pMnode); + SArray *pArray = taosArrayInit(numOfDnodes, sizeof(SDnodeObj)); + if (pArray == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } - while (allocedVnodes < pVgroup->replica) { + void *pIter = NULL; + while (1) { SDnodeObj *pDnode = NULL; pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode); if (pIter == NULL) break; - // todo - if (mndIsDnodeInReadyStatus(pMnode, pDnode)) { - SVnodeGid *pVgid = &pVgroup->vnodeGid[allocedVnodes]; - pVgid->dnodeId = pDnode->id; - if (pVgroup->replica == 1) { - pVgid->role = TAOS_SYNC_STATE_LEADER; - } else { - pVgid->role = TAOS_SYNC_STATE_FOLLOWER; - } - allocedVnodes++; + int32_t numOfVnodes = mndGetVnodesNum(pMnode, pDnode->id); + + bool isMnode = mndIsMnode(pMnode, pDnode->id); + if (isMnode) { + pDnode->numOfVnodes++; } + + bool isReady = mndIsDnodeInReadyStatus(pMnode, pDnode); + if (isReady) { + taosArrayPush(pArray, pDnode); + } + + mDebug("dnode:%d, numOfVnodes:%d numOfSupportVnodes:%d isMnode:%d ready:%d", pDnode->id, numOfVnodes, + pDnode->numOfSupportVnodes, isMnode, isReady); sdbRelease(pSdb, pDnode); } - if (allocedVnodes != pVgroup->replica) { - terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; - return -1; + return pArray; +} + +static int32_t mndCompareDnodeVnodes(SDnodeObj *pDnode1, SDnodeObj *pDnode2) { + float d1Score = (float)pDnode1->numOfVnodes / pDnode1->numOfSupportVnodes; + float d2Score = (float)pDnode2->numOfVnodes / pDnode2->numOfSupportVnodes; + return d1Score > d2Score ? 0 : 1; +} + +static int32_t mndGetAvailableDnode(SMnode *pMnode, SVgObj *pVgroup, SArray *pArray) { + SSdb *pSdb = pMnode->pSdb; + int32_t allocedVnodes = 0; + void *pIter = NULL; + + taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); + + for (int32_t v = 0; v < pVgroup->replica; ++v) { + SVnodeGid *pVgid = &pVgroup->vnodeGid[v]; + SDnodeObj *pDnode = taosArrayGet(pArray, v); + if (pDnode == NULL || pDnode->numOfVnodes > pDnode->numOfSupportVnodes) { + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + return -1; + } + + pVgid->dnodeId = pDnode->id; + if (pVgroup->replica == 1) { + pVgid->role = TAOS_SYNC_STATE_LEADER; + } else { + pVgid->role = TAOS_SYNC_STATE_FOLLOWER; + } + + mDebug("db:%s, vgId:%d, vindex:%d dnodeId:%d is alloced", pVgroup->dbName, pVgroup->vgId, v, pVgid->dnodeId); + pDnode->numOfVnodes++; } + return 0; } int32_t mndAllocVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj **ppVgroups) { - SVgObj *pVgroups = calloc(pDb->cfg.numOfVgroups, sizeof(SVgObj)); + int32_t code = -1; + SArray *pArray = NULL; + SVgObj *pVgroups = NULL; + + pVgroups = calloc(pDb->cfg.numOfVgroups, sizeof(SVgObj)); if (pVgroups == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + goto ALLOC_VGROUP_OVER; } + pArray = mndBuildDnodesArray(pMnode); + if (pArray == NULL) { + goto ALLOC_VGROUP_OVER; + } + + mDebug("db:%s, total %d dnodes used to create %d vgroups (%d vnodes)", pDb->name, (int32_t)taosArrayGetSize(pArray), + pDb->cfg.numOfVgroups, pDb->cfg.numOfVgroups * pDb->cfg.replications); + int32_t allocedVgroups = 0; int32_t maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP); uint32_t hashMin = 0; @@ -298,17 +347,23 @@ int32_t mndAllocVgroup(SMnode *pMnode, SDbObj *pDb, SVgObj **ppVgroups) { pVgroup->dbUid = pDb->uid; pVgroup->replica = pDb->cfg.replications; - if (mndGetAvailableDnode(pMnode, pVgroup) != 0) { + if (mndGetAvailableDnode(pMnode, pVgroup, pArray) != 0) { terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; - free(pVgroups); - return -1; + goto ALLOC_VGROUP_OVER; } allocedVgroups++; } *ppVgroups = pVgroups; - return 0; + code = 0; + + mDebug("db:%s, %d vgroups is alloced, replica:%d", pDb->name, pDb->cfg.numOfVgroups, pDb->cfg.replications); + +ALLOC_VGROUP_OVER: + if (code != 0) free(pVgroups); + taosArrayDestroy(pArray); + return code; } SEpSet mndGetVgroupEpset(SMnode *pMnode, SVgObj *pVgroup) {