[TD-638]
This commit is contained in:
parent
0b7dea8ce2
commit
f6a3c2432c
|
@ -616,6 +616,16 @@ static void dnodeSendStatusMsg(void *handle, void *tmrId) {
|
|||
pStatus->numOfCores = htons((uint16_t) tsNumOfCores);
|
||||
pStatus->diskAvailable = tsAvailDataDirGB;
|
||||
pStatus->alternativeRole = (uint8_t) tsAlternativeRole;
|
||||
|
||||
// fill cluster cfg parameters
|
||||
pStatus->ClusterCfgPara.numOfMnodes = tsNumOfMnodes;
|
||||
pStatus->ClusterCfgPara.mnodeEqualVnodeNum = tsMnodeEqualVnodeNum;
|
||||
pStatus->ClusterCfgPara.offlineThreshold = tsOfflineThreshold;
|
||||
pStatus->ClusterCfgPara.statusInterval = tsStatusInterval;
|
||||
strcpy(pStatus->ClusterCfgPara.arbitrator, tsArbitrator);
|
||||
strcpy(pStatus->ClusterCfgPara.timezone, tsTimezone);
|
||||
strcpy(pStatus->ClusterCfgPara.locale, tsLocale);
|
||||
strcpy(pStatus->ClusterCfgPara.charset, tsCharset);
|
||||
|
||||
vnodeBuildStatusMsg(pStatus);
|
||||
contLen = sizeof(SDMStatusMsg) + pStatus->openVnodes * sizeof(SVnodeLoad);
|
||||
|
|
|
@ -121,6 +121,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_DNODE_NOT_EXIST, 0, 0x0331, "mnode dnod
|
|||
TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_EXIST, 0, 0x0332, "mnode vgroup not exist")
|
||||
TAOS_DEFINE_ERROR(TSDB_CODE_MND_NO_REMOVE_MASTER, 0, 0x0333, "mnode cant not remove master")
|
||||
TAOS_DEFINE_ERROR(TSDB_CODE_MND_NO_ENOUGH_DNODES, 0, 0x0334, "mnode no enough dnodes")
|
||||
TAOS_DEFINE_ERROR(TSDB_CODE_MND_CLUSTER_CFG_INCONSISTENT, 0, 0x0335, "mnode cluster cfg inconsistent")
|
||||
|
||||
TAOS_DEFINE_ERROR(TSDB_CODE_MND_ACCT_ALREADY_EXIST, 0, 0x0340, "mnode accounts already exist")
|
||||
TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ACCT, 0, 0x0341, "mnode invalid account")
|
||||
|
|
|
@ -557,18 +557,30 @@ typedef struct {
|
|||
} SDMMnodeInfos;
|
||||
|
||||
typedef struct {
|
||||
uint32_t version;
|
||||
int32_t dnodeId;
|
||||
char dnodeEp[TSDB_EP_LEN];
|
||||
uint32_t moduleStatus;
|
||||
uint32_t lastReboot; // time stamp for last reboot
|
||||
uint16_t numOfTotalVnodes; // from config file
|
||||
uint16_t openVnodes;
|
||||
uint16_t numOfCores;
|
||||
float diskAvailable; // GB
|
||||
uint8_t alternativeRole;
|
||||
uint8_t reserve[15];
|
||||
SVnodeLoad load[];
|
||||
int32_t numOfMnodes; // tsNumOfMnodes
|
||||
int32_t mnodeEqualVnodeNum; // tsMnodeEqualVnodeNum
|
||||
int32_t offlineThreshold; // tsOfflineThreshold
|
||||
int32_t statusInterval; // tsStatusInterval
|
||||
char arbitrator[TSDB_EP_LEN]; // tsArbitrator
|
||||
char timezone[64]; // tsTimezone
|
||||
char locale[TSDB_LOCALE_LEN]; // tsLocale
|
||||
char charset[TSDB_LOCALE_LEN]; // tsCharset
|
||||
} SClusterCfg;
|
||||
|
||||
typedef struct {
|
||||
uint32_t version;
|
||||
int32_t dnodeId;
|
||||
char dnodeEp[TSDB_EP_LEN];
|
||||
uint32_t moduleStatus;
|
||||
uint32_t lastReboot; // time stamp for last reboot
|
||||
uint16_t numOfTotalVnodes; // from config file
|
||||
uint16_t openVnodes;
|
||||
uint16_t numOfCores;
|
||||
float diskAvailable; // GB
|
||||
uint8_t alternativeRole;
|
||||
uint8_t reserve[15];
|
||||
SClusterCfg ClusterCfgPara;
|
||||
SVnodeLoad load[];
|
||||
} SDMStatusMsg;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -277,6 +277,20 @@ static void mnodeProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) {
|
|||
mPrint("cfg dnode rsp is received");
|
||||
}
|
||||
|
||||
static bool mnodeCheckClusterCfgPara(const SClusterCfg *clusterCfg) {
|
||||
if (clusterCfg->numOfMnodes != tsNumOfMnodes) return false;
|
||||
if (clusterCfg->mnodeEqualVnodeNum != tsMnodeEqualVnodeNum) return false;
|
||||
if (clusterCfg->offlineThreshold != tsOfflineThreshold) return false;
|
||||
if (clusterCfg->statusInterval != tsStatusInterval) return false;
|
||||
|
||||
if (0 != strncasecmp(clusterCfg->arbitrator, tsArbitrator, strlen(tsArbitrator))) return false;
|
||||
if (0 != strncasecmp(clusterCfg->timezone, tsTimezone, strlen(tsTimezone))) return false;
|
||||
if (0 != strncasecmp(clusterCfg->locale, tsLocale, strlen(tsLocale))) return false;
|
||||
if (0 != strncasecmp(clusterCfg->charset, tsCharset, strlen(tsCharset))) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
|
||||
SDMStatusMsg *pStatus = pMsg->rpcMsg.pCont;
|
||||
pStatus->dnodeId = htonl(pStatus->dnodeId);
|
||||
|
@ -312,7 +326,6 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
|
|||
pDnode->alternativeRole = pStatus->alternativeRole;
|
||||
pDnode->totalVnodes = pStatus->numOfTotalVnodes;
|
||||
pDnode->moduleStatus = pStatus->moduleStatus;
|
||||
pDnode->lastAccess = tsAccessSquence;
|
||||
|
||||
if (pStatus->dnodeId == 0) {
|
||||
mTrace("dnode:%d %s, first access", pDnode->dnodeId, pDnode->dnodeEp);
|
||||
|
@ -338,6 +351,14 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
|
|||
}
|
||||
|
||||
if (pDnode->status == TAOS_DN_STATUS_OFFLINE) {
|
||||
// Verify whether the cluster parameters are consistent when status change from offline to ready
|
||||
bool ret = mnodeCheckClusterCfgPara(&(pStatus->ClusterCfgPara));
|
||||
if (false == ret) {
|
||||
mnodeDecDnodeRef(pDnode);
|
||||
mError("dnode %s cluster cfg parameters inconsistent", pStatus->dnodeEp);
|
||||
return TSDB_CODE_MND_CLUSTER_CFG_INCONSISTENT;
|
||||
}
|
||||
|
||||
mTrace("dnode:%d, from offline to online", pDnode->dnodeId);
|
||||
pDnode->status = TAOS_DN_STATUS_READY;
|
||||
balanceUpdateMnode();
|
||||
|
@ -352,6 +373,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
|
|||
return TSDB_CODE_MND_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
pDnode->lastAccess = tsAccessSquence;
|
||||
|
||||
mnodeGetMnodeInfos(&pRsp->mnodes);
|
||||
|
||||
pRsp->dnodeCfg.dnodeId = htonl(pDnode->dnodeId);
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
system sh/stop_dnodes.sh
|
||||
system sh/deploy.sh -n dnode1 -i 1
|
||||
system sh/deploy.sh -n dnode2 -i 2
|
||||
system sh/deploy.sh -n dnode3 -i 3
|
||||
system sh/deploy.sh -n dnode4 -i 4
|
||||
system sh/deploy.sh -n dnode5 -i 5
|
||||
system sh/deploy.sh -n dnode6 -i 6
|
||||
system sh/deploy.sh -n dnode7 -i 7
|
||||
|
||||
|
||||
system sh/cfg.sh -n dnode1 -c numOfMnodes -v 2
|
||||
system sh/cfg.sh -n dnode1 -c mnodeEqualVnodeNum -v 4
|
||||
system sh/cfg.sh -n dnode1 -c offlineThreshold -v 15
|
||||
system sh/cfg.sh -n dnode1 -c statusInterval -v 3
|
||||
system sh/cfg.sh -n dnode1 -c arbitrator -v $arbitrator
|
||||
#system sh/cfg.sh -n dnode1 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode1 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode1 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode1 -c balanceInterval -v 10
|
||||
|
||||
######## dnode 2 the same with dnode1
|
||||
system sh/cfg.sh -n dnode2 -c numOfMnodes -v 2
|
||||
system sh/cfg.sh -n dnode2 -c mnodeEqualVnodeNum -v 4
|
||||
system sh/cfg.sh -n dnode2 -c offlineThreshold -v 15
|
||||
system sh/cfg.sh -n dnode2 -c statusInterval -v 3
|
||||
system sh/cfg.sh -n dnode2 -c arbitrator -v $arbitrator
|
||||
#system sh/cfg.sh -n dnode2 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode2 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode2 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode2 -c balanceInterval -v 10
|
||||
|
||||
######## dnode 3 one para no same with dnode1
|
||||
system sh/cfg.sh -n dnode3 -c numOfMnodes -v 3
|
||||
system sh/cfg.sh -n dnode3 -c mnodeEqualVnodeNum -v 4
|
||||
system sh/cfg.sh -n dnode3 -c offlineThreshold -v 15
|
||||
system sh/cfg.sh -n dnode3 -c statusInterval -v 3
|
||||
system sh/cfg.sh -n dnode3 -c arbitrator -v $arbitrator
|
||||
#system sh/cfg.sh -n dnode3 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode3 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode3 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode3 -c balanceInterval -v 10
|
||||
|
||||
######## dnode 4 one para no same with dnode1
|
||||
system sh/cfg.sh -n dnode4 -c numOfMnodes -v 2
|
||||
system sh/cfg.sh -n dnode4 -c mnodeEqualVnodeNum -v 5
|
||||
system sh/cfg.sh -n dnode4 -c offlineThreshold -v 15
|
||||
system sh/cfg.sh -n dnode4 -c statusInterval -v 3
|
||||
system sh/cfg.sh -n dnode4 -c arbitrator -v $arbitrator
|
||||
#system sh/cfg.sh -n dnode4 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode4 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode4 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode4 -c balanceInterval -v 10
|
||||
|
||||
######## dnode 5 one para no same with dnode1
|
||||
system sh/cfg.sh -n dnode5 -c numOfMnodes -v 2
|
||||
system sh/cfg.sh -n dnode5 -c mnodeEqualVnodeNum -v 4
|
||||
system sh/cfg.sh -n dnode5 -c offlineThreshold -v 16
|
||||
system sh/cfg.sh -n dnode5 -c statusInterval -v 3
|
||||
system sh/cfg.sh -n dnode5 -c arbitrator -v $arbitrator
|
||||
#system sh/cfg.sh -n dnode5 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode5 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode5 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode5 -c balanceInterval -v 10
|
||||
|
||||
|
||||
######## dnode 6 one para no same with dnode1
|
||||
system sh/cfg.sh -n dnode6 -c numOfMnodes -v 2
|
||||
system sh/cfg.sh -n dnode6 -c mnodeEqualVnodeNum -v 4
|
||||
system sh/cfg.sh -n dnode6 -c offlineThreshold -v 15
|
||||
system sh/cfg.sh -n dnode6 -c statusInterval -v 2
|
||||
system sh/cfg.sh -n dnode6 -c arbitrator -v $arbitrator
|
||||
#system sh/cfg.sh -n dnode6 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode6 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode6 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode6 -c balanceInterval -v 10
|
||||
|
||||
|
||||
######## dnode 7 one para no same with dnode1
|
||||
system sh/cfg.sh -n dnode7 -c numOfMnodes -v 2
|
||||
system sh/cfg.sh -n dnode7 -c mnodeEqualVnodeNum -v 4
|
||||
system sh/cfg.sh -n dnode7 -c offlineThreshold -v 15
|
||||
system sh/cfg.sh -n dnode7 -c statusInterval -v 3
|
||||
system sh/cfg.sh -n dnode7 -c arbitrator -v "plum-VirtualBox:8001"
|
||||
#system sh/cfg.sh -n dnode7 -c timezone -v ""
|
||||
#system sh/cfg.sh -n dnode7 -c locale -v ""
|
||||
#system sh/cfg.sh -n dnode7 -c charset -v ""
|
||||
system sh/cfg.sh -n dnode7 -c balanceInterval -v 10
|
||||
|
||||
print ============== step0: start tarbitrator
|
||||
system sh/exec_tarbitrator.sh -s start
|
||||
|
||||
print ============== step1: start dnode1
|
||||
system sh/exec.sh -n dnode1 -s start
|
||||
sleep 3000
|
||||
sql connect
|
||||
|
||||
print ============== step2: start dnode2~7 and add into cluster
|
||||
system sh/exec.sh -n dnode2 -s start
|
||||
system sh/exec.sh -n dnode3 -s start
|
||||
system sh/exec.sh -n dnode4 -s start
|
||||
system sh/exec.sh -n dnode5 -s start
|
||||
system sh/exec.sh -n dnode6 -s start
|
||||
system sh/exec.sh -n dnode7 -s start
|
||||
sql create dnode $hostname2
|
||||
sql create dnode $hostname3
|
||||
sql create dnode $hostname4
|
||||
sql create dnode $hostname5
|
||||
sql create dnode $hostname6
|
||||
sql create dnode $hostname7
|
||||
sleep 10000
|
||||
|
||||
wait_dnode_created:
|
||||
sql show dnodes
|
||||
if $rows != 7 then
|
||||
sleep 2000
|
||||
goto wait_dnode_created
|
||||
endi
|
||||
print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1
|
||||
print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2
|
||||
print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3
|
||||
print $data0_4 $data1_4 $data2_4 $data3_4 $data4_4
|
||||
print $data0_5 $data1_5 $data2_5 $data3_5 $data4_5
|
||||
print $data0_6 $data1_6 $data2_6 $data3_6 $data4_6
|
||||
print $data0_7 $data1_7 $data2_7 $data3_7 $data4_7
|
||||
$dnode1Status = $data4_1
|
||||
$dnode2Status = $data4_2
|
||||
$dnode3Status = $data4_3
|
||||
$dnode4Status = $data4_4
|
||||
$dnode5Status = $data4_5
|
||||
$dnode6Status = $data4_6
|
||||
$dnode7Status = $data4_7
|
||||
|
||||
if $dnode1Status != ready then
|
||||
return -1
|
||||
endi
|
||||
if $dnode2Status != ready then
|
||||
return -1
|
||||
endi
|
||||
if $dnode3Status != offline then
|
||||
return -1
|
||||
endi
|
||||
if $dnode4Status != offline then
|
||||
return -1
|
||||
endi
|
||||
if $dnode5Status != offline then
|
||||
return -1
|
||||
endi
|
||||
if $dnode6Status != offline then
|
||||
return -1
|
||||
endi
|
||||
if $dnode7Status != offline then
|
||||
return -1
|
||||
endi
|
||||
|
||||
sleep 10000
|
||||
|
||||
wait_dnode_offline_overtime_dropped:
|
||||
sql show dnodes
|
||||
print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1
|
||||
print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2
|
||||
print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3
|
||||
print $data0_4 $data1_4 $data2_4 $data3_4 $data4_4
|
||||
print $data0_5 $data1_5 $data2_5 $data3_5 $data4_5
|
||||
print $data0_6 $data1_6 $data2_6 $data3_6 $data4_6
|
||||
print $data0_7 $data1_7 $data2_7 $data3_7 $data4_7
|
||||
if $rows != 2 then
|
||||
sleep 2000
|
||||
goto wait_dnode_offline_overtime_dropped
|
||||
endi
|
||||
print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1
|
||||
print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2
|
||||
print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3
|
||||
print $data0_4 $data1_4 $data2_4 $data3_4 $data4_4
|
||||
print $data0_5 $data1_5 $data2_5 $data3_5 $data4_5
|
||||
print $data0_6 $data1_6 $data2_6 $data3_6 $data4_6
|
||||
print $data0_7 $data1_7 $data2_7 $data3_7 $data4_7
|
||||
$dnode1Status = $data4_1
|
||||
$dnode2Status = $data4_2
|
||||
$dnode3Status = $data4_3
|
||||
$dnode4Status = $data4_4
|
||||
$dnode5Status = $data4_5
|
||||
$dnode6Status = $data4_6
|
||||
$dnode7Status = $data4_7
|
||||
|
||||
if $dnode1Status != ready then
|
||||
return -1
|
||||
endi
|
||||
if $dnode2Status != ready then
|
||||
return -1
|
||||
endi
|
|
@ -5,11 +5,11 @@ system sh/deploy.sh -n dnode3 -i 3
|
|||
system sh/deploy.sh -n dnode4 -i 4
|
||||
system sh/deploy.sh -n dnode5 -i 5
|
||||
|
||||
system sh/cfg.sh -n dnode1 -c numOfMPeers -v 1
|
||||
system sh/cfg.sh -n dnode2 -c numOfMPeers -v 1
|
||||
system sh/cfg.sh -n dnode3 -c numOfMPeers -v 1
|
||||
system sh/cfg.sh -n dnode4 -c numOfMPeers -v 1
|
||||
system sh/cfg.sh -n dnode5 -c numOfMPeers -v 1
|
||||
system sh/cfg.sh -n dnode1 -c numOfMnodes -v 1
|
||||
system sh/cfg.sh -n dnode2 -c numOfMnodes -v 1
|
||||
system sh/cfg.sh -n dnode3 -c numOfMnodes -v 1
|
||||
system sh/cfg.sh -n dnode4 -c numOfMnodes -v 1
|
||||
system sh/cfg.sh -n dnode5 -c numOfMnodes -v 1
|
||||
|
||||
system sh/cfg.sh -n dnode1 -c walLevel -v 1
|
||||
system sh/cfg.sh -n dnode2 -c walLevel -v 1
|
||||
|
|
|
@ -96,7 +96,12 @@ endi
|
|||
print ============== step3: stop dnode4, and remove its vnodeX subdirector
|
||||
system sh/exec.sh -n dnode4 -s stop -x SIGINT
|
||||
sleep $sleepTimer
|
||||
$loopCnt = 0
|
||||
wait_dnode4_offline_0:
|
||||
$loopCnt = $loopCnt + 1
|
||||
if $loopCnt == 10 then
|
||||
return -1
|
||||
endi
|
||||
sql show dnodes
|
||||
if $rows != 4 then
|
||||
sleep 2000
|
||||
|
@ -148,7 +153,14 @@ sleep 1000
|
|||
print ============== step4: restart dnode4, waiting sync end
|
||||
system sh/exec.sh -n dnode4 -s start
|
||||
sleep $sleepTimer
|
||||
|
||||
$loopCnt = 0
|
||||
wait_dnode4_reready:
|
||||
$loopCnt = $loopCnt + 1
|
||||
if $loopCnt == 10 then
|
||||
return -1
|
||||
endi
|
||||
|
||||
sql show dnodes
|
||||
if $rows != 4 then
|
||||
sleep 2000
|
||||
|
@ -171,7 +183,13 @@ if $dnode4Status != ready then
|
|||
goto wait_dnode4_reready
|
||||
endi
|
||||
|
||||
$loopCnt = 0
|
||||
wait_dnode4_vgroup_slave:
|
||||
$loopCnt = $loopCnt + 1
|
||||
if $loopCnt == 10 then
|
||||
return -1
|
||||
endi
|
||||
|
||||
sql show vgroups
|
||||
if $rows != 1 then
|
||||
sleep 2000
|
||||
|
@ -200,7 +218,13 @@ system sh/exec.sh -n dnode2 -s stop
|
|||
system sh/exec.sh -n dnode3 -s stop
|
||||
sleep $sleepTimer
|
||||
|
||||
|
||||
$loopCnt = 0
|
||||
wait_dnode23_offline:
|
||||
$loopCnt = $loopCnt + 1
|
||||
if $loopCnt == 10 then
|
||||
return -1
|
||||
endi
|
||||
sql show dnodes
|
||||
if $rows != 4 then
|
||||
sleep 2000
|
||||
|
@ -231,7 +255,13 @@ if $dnode4Status != ready then
|
|||
goto wait_dnode23_offline
|
||||
endi
|
||||
|
||||
$loopCnt = 0
|
||||
wait_dnode4_vgroup_master:
|
||||
$loopCnt = $loopCnt + 1
|
||||
if $loopCnt == 10 then
|
||||
return -1
|
||||
endi
|
||||
|
||||
sql show vgroups
|
||||
if $rows != 1 then
|
||||
sleep 2000
|
||||
|
|
Loading…
Reference in New Issue