Merge pull request #15423 from taosdata/enh/crash_gen
Enhanced crash_gen tool to report better error for deadlocks
This commit is contained in:
commit
6f3bf5b741
|
@ -371,7 +371,9 @@ class ThreadCoordinator:
|
|||
if isinstance(err, CrashGenError): # our own transition failure
|
||||
Logging.info("State transition error")
|
||||
# TODO: saw an error here once, let's print out stack info for err?
|
||||
traceback.print_stack()
|
||||
traceback.print_stack() # Stack frame to here.
|
||||
Logging.info("Caused by:")
|
||||
traceback.print_exception(*sys.exc_info()) # Ref: https://www.geeksforgeeks.org/how-to-print-exception-stack-trace-in-python/
|
||||
transitionFailed = True
|
||||
self._te = None # Not running any more
|
||||
self._execStats.registerFailure("State transition error: {}".format(err))
|
||||
|
@ -741,7 +743,8 @@ class AnyState:
|
|||
sCnt += 1
|
||||
if (sCnt >= 2):
|
||||
raise CrashGenError(
|
||||
"Unexpected more than 1 success with task: {}, in task set: {}".format(
|
||||
"Unexpected more than 1 success at state: {}, with task: {}, in task set: {}".format(
|
||||
self.__class__.__name__,
|
||||
cls.__name__, # verified just now that isinstance(task, cls)
|
||||
[c.__class__.__name__ for c in tasks]
|
||||
))
|
||||
|
@ -756,8 +759,11 @@ class AnyState:
|
|||
if task.isSuccess():
|
||||
sCnt += 1
|
||||
if (exists and sCnt <= 0):
|
||||
raise CrashGenError("Unexpected zero success for task type: {}, from tasks: {}"
|
||||
.format(cls, tasks))
|
||||
raise CrashGenError("Unexpected zero success at state: {}, with task: {}, in task set: {}".format(
|
||||
self.__class__.__name__,
|
||||
cls.__name__, # verified just now that isinstance(task, cls)
|
||||
[c.__class__.__name__ for c in tasks]
|
||||
))
|
||||
|
||||
def assertNoTask(self, tasks, cls):
|
||||
for task in tasks:
|
||||
|
@ -809,8 +815,6 @@ class StateEmpty(AnyState):
|
|||
]
|
||||
|
||||
def verifyTasksToState(self, tasks, newState):
|
||||
if Config.getConfig().ignore_errors: # if we are asked to ignore certain errors, let's not verify CreateDB success.
|
||||
return
|
||||
if (self.hasSuccess(tasks, TaskCreateDb)
|
||||
): # at EMPTY, if there's succes in creating DB
|
||||
if (not self.hasTask(tasks, TaskDropDb)): # and no drop_db tasks
|
||||
|
@ -995,16 +999,17 @@ class StateMechine:
|
|||
dbc.execute("show dnodes")
|
||||
|
||||
# Generic Checks, first based on the start state
|
||||
if self._curState.canCreateDb():
|
||||
self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
|
||||
# self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
|
||||
# case of multiple creation and drops
|
||||
if not Config.getConfig().ignore_errors: # verify state, only if we are asked not to ignore certain errors.
|
||||
if self._curState.canCreateDb():
|
||||
self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
|
||||
# self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
|
||||
# case of multiple creation and drops
|
||||
|
||||
if self._curState.canDropDb():
|
||||
if gSvcMgr == None: # only if we are running as client-only
|
||||
self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
|
||||
# self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
|
||||
# case of drop-create-drop
|
||||
if self._curState.canDropDb():
|
||||
if gSvcMgr == None: # only if we are running as client-only
|
||||
self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
|
||||
# self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
|
||||
# case of drop-create-drop
|
||||
|
||||
# if self._state.canCreateFixedTable():
|
||||
# self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
|
||||
|
@ -1026,7 +1031,8 @@ class StateMechine:
|
|||
newState = self._findCurrentState(dbc)
|
||||
Logging.debug("[STT] New DB state determined: {}".format(newState))
|
||||
# can old state move to new state through the tasks?
|
||||
self._curState.verifyTasksToState(tasks, newState)
|
||||
if not Config.getConfig().ignore_errors: # verify state, only if we are asked not to ignore certain errors.
|
||||
self._curState.verifyTasksToState(tasks, newState)
|
||||
self._curState = newState
|
||||
|
||||
def pickTaskType(self):
|
||||
|
@ -2231,16 +2237,14 @@ class TaskAddData(StateTransitionTask):
|
|||
class ThreadStacks: # stack info for all threads
|
||||
def __init__(self):
|
||||
self._allStacks = {}
|
||||
allFrames = sys._current_frames() # All current stack frames
|
||||
allFrames = sys._current_frames() # All current stack frames, keyed with "ident"
|
||||
for th in threading.enumerate(): # For each thread
|
||||
if th.ident is None:
|
||||
continue
|
||||
stack = traceback.extract_stack(allFrames[th.ident]) # Get stack for a thread
|
||||
shortTid = th.ident % 10000
|
||||
stack = traceback.extract_stack(allFrames[th.ident]) #type: ignore # Get stack for a thread
|
||||
shortTid = th.native_id % 10000 #type: ignore
|
||||
self._allStacks[shortTid] = stack # Was using th.native_id
|
||||
|
||||
def print(self, filteredEndName = None, filterInternal = False):
|
||||
for tIdent, stack in self._allStacks.items(): # for each thread, stack frames top to bottom
|
||||
for shortTid, stack in self._allStacks.items(): # for each thread, stack frames top to bottom
|
||||
lastFrame = stack[-1]
|
||||
if filteredEndName: # we need to filter out stacks that match this name
|
||||
if lastFrame.name == filteredEndName : # end did not match
|
||||
|
@ -2252,7 +2256,9 @@ class ThreadStacks: # stack info for all threads
|
|||
'__init__']: # the thread that extracted the stack
|
||||
continue # ignore
|
||||
# Now print
|
||||
print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(tIdent))
|
||||
print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(shortTid))
|
||||
lastSqlForThread = DbConn.fetchSqlForThread(shortTid)
|
||||
print("Last SQL statement attempted from thread {} is: {}".format(shortTid, lastSqlForThread))
|
||||
stackFrame = 0
|
||||
for frame in stack: # was using: reversed(stack)
|
||||
# print(frame)
|
||||
|
|
|
@ -27,6 +27,26 @@ class DbConn:
|
|||
TYPE_REST = "rest-api"
|
||||
TYPE_INVALID = "invalid"
|
||||
|
||||
# class variables
|
||||
lastSqlFromThreads : dict[int, str] = {} # stored by thread id, obtained from threading.current_thread().ident%10000
|
||||
|
||||
@classmethod
|
||||
def saveSqlForCurrentThread(cls, sql: str):
|
||||
'''
|
||||
Let us save the last SQL statement on a per-thread basis, so that when later we
|
||||
run into a dead-lock situation, we can pick out the deadlocked thread, and use
|
||||
that information to find what what SQL statement is stuck.
|
||||
'''
|
||||
th = threading.current_thread()
|
||||
shortTid = th.native_id % 10000 #type: ignore
|
||||
cls.lastSqlFromThreads[shortTid] = sql # Save this for later
|
||||
|
||||
@classmethod
|
||||
def fetchSqlForThread(cls, shortTid : int) -> str :
|
||||
if shortTid not in cls.lastSqlFromThreads:
|
||||
raise CrashGenError("No last-attempted-SQL found for thread id: {}".format(shortTid))
|
||||
return cls.lastSqlFromThreads[shortTid]
|
||||
|
||||
@classmethod
|
||||
def create(cls, connType, dbTarget):
|
||||
if connType == cls.TYPE_NATIVE:
|
||||
|
@ -163,6 +183,7 @@ class DbConnRest(DbConn):
|
|||
|
||||
def _doSql(self, sql):
|
||||
self._lastSql = sql # remember this, last SQL attempted
|
||||
self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above
|
||||
try:
|
||||
r = requests.post(self._url,
|
||||
data = sql,
|
||||
|
@ -392,6 +413,7 @@ class DbConnNative(DbConn):
|
|||
"Cannot exec SQL unless db connection is open", CrashGenError.DB_CONNECTION_NOT_OPEN)
|
||||
Logging.debug("[SQL] Executing SQL: {}".format(sql))
|
||||
self._lastSql = sql
|
||||
self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above
|
||||
nRows = self._tdSql.execute(sql)
|
||||
cls = self.__class__
|
||||
cls.totalRequests += 1
|
||||
|
@ -407,6 +429,7 @@ class DbConnNative(DbConn):
|
|||
"Cannot query database until connection is open, restarting?", CrashGenError.DB_CONNECTION_NOT_OPEN)
|
||||
Logging.debug("[SQL] Executing SQL: {}".format(sql))
|
||||
self._lastSql = sql
|
||||
self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above
|
||||
nRows = self._tdSql.query(sql)
|
||||
cls = self.__class__
|
||||
cls.totalRequests += 1
|
||||
|
|
Loading…
Reference in New Issue