Minor tweaks for crash_gen tool, re-demonstrating TD-997
This commit is contained in:
parent
b115abf1bc
commit
a79c5b1278
|
@ -136,7 +136,10 @@ class WorkerThread:
|
|||
|
||||
# clean up
|
||||
if (gConfig.per_thread_db_connection): # type: ignore
|
||||
self._dbConn.close()
|
||||
if self._dbConn.isOpen: #sometimes it is not open
|
||||
self._dbConn.close()
|
||||
else:
|
||||
logger.warning("Cleaning up worker thread, dbConn already closed")
|
||||
|
||||
def _doTaskLoop(self):
|
||||
# while self._curStep < self._pool.maxSteps:
|
||||
|
@ -146,6 +149,7 @@ class WorkerThread:
|
|||
try:
|
||||
tc.crossStepBarrier() # shared barrier first, INCLUDING the last one
|
||||
except threading.BrokenBarrierError as err: # main thread timed out
|
||||
print("_bto", end="")
|
||||
logger.debug("[TRD] Worker thread exiting due to main thread barrier time-out")
|
||||
break
|
||||
|
||||
|
@ -153,6 +157,7 @@ class WorkerThread:
|
|||
self.crossStepGate() # then per-thread gate, after being tapped
|
||||
logger.debug("[TRD] Worker thread [{}] exited step gate...".format(self._tid))
|
||||
if not self._tc.isRunning():
|
||||
print("_wts", end="")
|
||||
logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
|
||||
break
|
||||
|
||||
|
@ -169,6 +174,7 @@ class WorkerThread:
|
|||
logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))
|
||||
|
||||
self._dbInUse = False # there may be changes between steps
|
||||
# print("_wtd", end=None) # worker thread died
|
||||
|
||||
def verifyThreadSelf(self): # ensure we are called by this own thread
|
||||
if (threading.get_ident() != self._thread.ident):
|
||||
|
@ -197,12 +203,15 @@ class WorkerThread:
|
|||
# self._curStep += 1 # off to a new step...
|
||||
|
||||
def tapStepGate(self): # give it a tap, release the thread waiting there
|
||||
self.verifyThreadAlive()
|
||||
# self.verifyThreadAlive()
|
||||
self.verifyThreadMain() # only allowed for main thread
|
||||
|
||||
logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
|
||||
self._stepGate.set() # wake up!
|
||||
time.sleep(0) # let the released thread run a bit
|
||||
if self._thread.is_alive():
|
||||
logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
|
||||
self._stepGate.set() # wake up!
|
||||
time.sleep(0) # let the released thread run a bit
|
||||
else:
|
||||
print("_tad", end="") # Thread already dead
|
||||
|
||||
def execSql(self, sql): # TODO: expose DbConn directly
|
||||
return self.getDbConn().execute(sql)
|
||||
|
@ -332,6 +341,7 @@ class ThreadCoordinator:
|
|||
# end, and maybe signal them to stop
|
||||
else:
|
||||
raise
|
||||
return transitionFailed
|
||||
|
||||
self.resetExecutedTasks() # clear the tasks after we are done
|
||||
# Get ready for next step
|
||||
|
@ -373,7 +383,13 @@ class ThreadCoordinator:
|
|||
break # do transition only if tasks are error free
|
||||
|
||||
# Ending previous step
|
||||
transitionFailed = self._doTransition() # To start, we end step -1 first
|
||||
try:
|
||||
transitionFailed = self._doTransition() # To start, we end step -1 first
|
||||
except taos.error.ProgrammingError as err:
|
||||
transitionFailed = True
|
||||
errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno # correct error scheme
|
||||
logger.info("Transition failed: errno=0x{:X}, msg: {}".format(errno2, err))
|
||||
|
||||
# Then we move on to the next step
|
||||
self._releaseAllWorkerThreads(transitionFailed)
|
||||
|
||||
|
@ -788,12 +804,15 @@ class DbConnNative(DbConn):
|
|||
else:
|
||||
projPath = selfPath[:selfPath.find("tests")]
|
||||
|
||||
buildPath = None
|
||||
for root, dirs, files in os.walk(projPath):
|
||||
if ("taosd" in files):
|
||||
rootRealPath = os.path.dirname(os.path.realpath(root))
|
||||
if ("packaging" not in rootRealPath):
|
||||
buildPath = root[:len(root) - len("/build/bin")]
|
||||
break
|
||||
if buildPath == None:
|
||||
raise RuntimeError("Failed to determine buildPath, selfPath={}".format(self_path))
|
||||
return buildPath
|
||||
|
||||
|
||||
|
@ -1015,29 +1034,11 @@ class StateDbOnly(AnyState):
|
|||
if (not self.hasTask(tasks, TaskCreateDb)):
|
||||
# only if we don't create any more
|
||||
self.assertAtMostOneSuccess(tasks, TaskDropDb)
|
||||
self.assertIfExistThenSuccess(tasks, TaskDropDb)
|
||||
# self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
|
||||
# Nothing to be said about adding data task
|
||||
# if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
|
||||
# self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
|
||||
# self.assertAtMostOneSuccess(tasks, DropDbTask)
|
||||
# self._state = self.STATE_EMPTY
|
||||
# if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
|
||||
# # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
|
||||
# if ( not self.hasTask(tasks, TaskDropSuperTable) ):
|
||||
# self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
|
||||
# self.assertNoTask(tasks, DropDbTask) # should have have tried
|
||||
# if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
|
||||
# # can't say there's add-data attempts, since they may all fail
|
||||
# self._state = self.STATE_TABLE_ONLY
|
||||
# else:
|
||||
# self._state = self.STATE_HAS_DATA
|
||||
# What about AddFixedData?
|
||||
# elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
|
||||
# self._state = self.STATE_HAS_DATA
|
||||
# else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
|
||||
# # raise RuntimeError("Unexpected no-success scenario") # We might just landed all failure tasks,
|
||||
# self._state = self.STATE_DB_ONLY # no change
|
||||
|
||||
# TODO: restore the below, the problem exists, although unlikely in real-world
|
||||
# if (gSvcMgr!=None) and gSvcMgr.isRestarting():
|
||||
# if (gSvcMgr == None) or (not gSvcMgr.isRestarting()) :
|
||||
# self.assertIfExistThenSuccess(tasks, TaskDropDb)
|
||||
|
||||
|
||||
class StateSuperTableOnly(AnyState):
|
||||
|
@ -1192,7 +1193,7 @@ class StateMechine:
|
|||
# case of multiple creation and drops
|
||||
|
||||
if self._curState.canDropDb():
|
||||
if gSvcMgr == None: # only if we are not restarting service
|
||||
if gSvcMgr == None: # only if we are running as client-only
|
||||
self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
|
||||
# self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
|
||||
# case of drop-create-drop
|
||||
|
@ -1522,8 +1523,7 @@ class Task():
|
|||
try:
|
||||
self._executeInternal(te, wt) # TODO: no return value?
|
||||
except taos.error.ProgrammingError as err:
|
||||
errno2 = err.errno if (
|
||||
err.errno > 0) else 0x80000000 + err.errno # correct error scheme
|
||||
errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno # correct error scheme
|
||||
if (gConfig.continue_on_exception): # user choose to continue
|
||||
self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
|
||||
errno2, err, wt.getDbConn().getLastSql()))
|
||||
|
@ -1961,6 +1961,7 @@ class TaskRestartService(StateTransitionTask):
|
|||
return state.canDropFixedSuperTable() # Basicallly when we have the super table
|
||||
return False # don't run this otherwise
|
||||
|
||||
CHANCE_TO_RESTART_SERVICE = 100
|
||||
def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
|
||||
if not gConfig.auto_start_service: # only execute when we are in -a mode
|
||||
print("_a", end="", flush=True)
|
||||
|
@ -1972,7 +1973,7 @@ class TaskRestartService(StateTransitionTask):
|
|||
return
|
||||
self._isRunning = True
|
||||
|
||||
if Dice.throw(50) == 0: # 1 in N chance
|
||||
if Dice.throw(self.CHANCE_TO_RESTART_SERVICE) == 0: # 1 in N chance
|
||||
dbc = wt.getDbConn()
|
||||
dbc.execute("show databases") # simple delay, align timing with other workers
|
||||
gSvcMgr.restart()
|
||||
|
@ -2459,11 +2460,12 @@ class TdeSubProcess:
|
|||
# for f in filelist:
|
||||
# filePath = os.path.join(logPath, f)
|
||||
# print("Removing log file: {}".format(filePath))
|
||||
# os.remove(filePath)
|
||||
logPathSaved = logPath + "_" + time.strftime('%Y-%m-%d-%H-%M-%S')
|
||||
logger.info("Saving old log files to: {}".format(logPathSaved))
|
||||
os.rename(logPath, logPathSaved)
|
||||
os.mkdir(logPath) # recreate
|
||||
# os.remove(filePath)
|
||||
if os.path.exists(logPath):
|
||||
logPathSaved = logPath + "_" + time.strftime('%Y-%m-%d-%H-%M-%S')
|
||||
logger.info("Saving old log files to: {}".format(logPathSaved))
|
||||
os.rename(logPath, logPathSaved)
|
||||
# os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms
|
||||
|
||||
|
||||
svcCmd = [taosdPath, '-c', cfgPath]
|
||||
|
@ -2622,13 +2624,13 @@ class ClientManager:
|
|||
def prepare(self):
|
||||
self._printLastNumbers()
|
||||
|
||||
def run(self, svcMgr):
|
||||
def run(self, svcMgr):
|
||||
self._printLastNumbers()
|
||||
|
||||
dbManager = DbManager() # Regular function
|
||||
thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
|
||||
self.tc = ThreadCoordinator(thPool, dbManager)
|
||||
|
||||
|
||||
self.tc.run()
|
||||
# print("exec stats: {}".format(self.tc.getExecStats()))
|
||||
# print("TC failed = {}".format(self.tc.isFailed()))
|
||||
|
|
Loading…
Reference in New Issue