homework-jianmu/tests/system-test/eco-system/util/restartDnodes.py

84 lines
2.2 KiB
Python

import time
import os
import subprocess
import random
import platform
class dnode():
def __init__(self, pid, path):
self.pid = pid
self.path = path
# run exePath no wait finished
def runNoWait(exePath):
if platform.system().lower() == 'windows':
cmd = f"mintty -h never {exePath}"
else:
cmd = f"nohup {exePath} > /dev/null 2>&1 & "
if os.system(cmd) != 0:
return False
else:
return True
# get online dnodes
def getDnodes():
cmd = "ps aux | grep taosd | awk '{{print $2,$11,$12,$13}}'"
result = os.system(cmd)
result=subprocess.check_output(cmd,shell=True)
strout = result.decode('utf-8').split("\n")
dnodes = []
for line in strout:
cols = line.split(' ')
if len(cols) != 4:
continue
exepath = cols[1]
if len(exepath) < 5 :
continue
if exepath[-5:] != 'taosd':
continue
# add to list
path = cols[1] + " " + cols[2] + " " + cols[3]
dnodes.append(dnode(cols[0], path))
print(" show dnodes cnt=%d...\n"%(len(dnodes)))
for dn in dnodes:
print(f" pid={dn.pid} path={dn.path}")
return dnodes
def restartDnodes(dnodes, cnt, seconds):
print(f"start dnode cnt={cnt} wait={seconds}s")
selects = random.sample(dnodes, cnt)
for select in selects:
print(f" kill -9 {select.pid}")
cmd = f"kill -9 {select.pid}"
os.system(cmd)
print(f" restart {select.path}")
if runNoWait(select.path) == False:
print(f"run {select.path} failed.")
raise Exception("exe failed.")
print(f" sleep {seconds}s ...")
time.sleep(seconds)
def run():
# kill seconds interval
killLoop = 10
minKill = 1
maxKill = 10
for i in range(killLoop):
dnodes = getDnodes()
killCnt = 0
if len(dnodes) > 0:
killCnt = random.randint(1, len(dnodes))
restartDnodes(dnodes, killCnt, random.randint(1, 5))
seconds = random.randint(minKill, maxKill)
print(f"----------- kill loop i={i} killCnt={killCnt} done. do sleep {seconds}s ... \n")
time.sleep(seconds)
if __name__ == '__main__':
run()