Merge pull request #2861 from taosdata/feature/crash_gen
Now able to create a service-side core dump problem
This commit is contained in:
commit
09dd9aabb9
|
@ -238,7 +238,7 @@ class WorkerThread:
|
||||||
|
|
||||||
|
|
||||||
class ThreadCoordinator:
|
class ThreadCoordinator:
|
||||||
WORKER_THREAD_TIMEOUT = 30
|
WORKER_THREAD_TIMEOUT = 60 # one minute
|
||||||
|
|
||||||
def __init__(self, pool: ThreadPool, dbManager):
|
def __init__(self, pool: ThreadPool, dbManager):
|
||||||
self._curStep = -1 # first step is 0
|
self._curStep = -1 # first step is 0
|
||||||
|
@ -388,7 +388,9 @@ class ThreadCoordinator:
|
||||||
except taos.error.ProgrammingError as err:
|
except taos.error.ProgrammingError as err:
|
||||||
transitionFailed = True
|
transitionFailed = True
|
||||||
errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno # correct error scheme
|
errno2 = err.errno if (err.errno > 0) else 0x80000000 + err.errno # correct error scheme
|
||||||
logger.info("Transition failed: errno=0x{:X}, msg: {}".format(errno2, err))
|
errMsg = "Transition failed: errno=0x{:X}, msg: {}".format(errno2, err)
|
||||||
|
logger.info(errMsg)
|
||||||
|
self._execStats.registerFailure(errMsg)
|
||||||
|
|
||||||
# Then we move on to the next step
|
# Then we move on to the next step
|
||||||
self._releaseAllWorkerThreads(transitionFailed)
|
self._releaseAllWorkerThreads(transitionFailed)
|
||||||
|
@ -812,7 +814,7 @@ class DbConnNative(DbConn):
|
||||||
buildPath = root[:len(root) - len("/build/bin")]
|
buildPath = root[:len(root) - len("/build/bin")]
|
||||||
break
|
break
|
||||||
if buildPath == None:
|
if buildPath == None:
|
||||||
raise RuntimeError("Failed to determine buildPath, selfPath={}".format(self_path))
|
raise RuntimeError("Failed to determine buildPath, selfPath={}".format(selfPath))
|
||||||
return buildPath
|
return buildPath
|
||||||
|
|
||||||
|
|
||||||
|
@ -2292,6 +2294,12 @@ class ServiceManagerThread:
|
||||||
self._thread.daemon = True # thread dies with the program
|
self._thread.daemon = True # thread dies with the program
|
||||||
self._thread.start()
|
self._thread.start()
|
||||||
|
|
||||||
|
self._thread2 = threading.Thread(
|
||||||
|
target=self.svcErrorReader,
|
||||||
|
args=(self._tdeSubProcess.getStdErr(), self._ipcQueue))
|
||||||
|
self._thread2.daemon = True # thread dies with the program
|
||||||
|
self._thread2.start()
|
||||||
|
|
||||||
# wait for service to start
|
# wait for service to start
|
||||||
for i in range(0, 10):
|
for i in range(0, 10):
|
||||||
time.sleep(1.0)
|
time.sleep(1.0)
|
||||||
|
@ -2320,12 +2328,12 @@ class ServiceManagerThread:
|
||||||
raise RuntimeError("sub process object missing")
|
raise RuntimeError("sub process object missing")
|
||||||
|
|
||||||
self._status = MainExec.STATUS_STOPPING
|
self._status = MainExec.STATUS_STOPPING
|
||||||
self._tdeSubProcess.stop()
|
retCode = self._tdeSubProcess.stop()
|
||||||
|
print("Attempted to stop sub process, got return code: {}".format(retCode))
|
||||||
|
|
||||||
if self._tdeSubProcess.isRunning(): # still running
|
if self._tdeSubProcess.isRunning(): # still running
|
||||||
print(
|
print("FAILED to stop sub process, it is still running... pid = {}".format(
|
||||||
"FAILED to stop sub process, it is still running... pid = {}".format(
|
self._tdeSubProcess.getPid()))
|
||||||
self.subProcess.pid))
|
|
||||||
else:
|
else:
|
||||||
self._tdeSubProcess = None # not running any more
|
self._tdeSubProcess = None # not running any more
|
||||||
self.join() # stop the thread, change the status, etc.
|
self.join() # stop the thread, change the status, etc.
|
||||||
|
@ -2341,6 +2349,9 @@ class ServiceManagerThread:
|
||||||
self._thread.join()
|
self._thread.join()
|
||||||
self._thread = None
|
self._thread = None
|
||||||
self._status = MainExec.STATUS_STOPPED
|
self._status = MainExec.STATUS_STOPPED
|
||||||
|
# STD ERR thread
|
||||||
|
self._thread2.join()
|
||||||
|
self._thread2 = None
|
||||||
else:
|
else:
|
||||||
print("Joining empty thread, doing nothing")
|
print("Joining empty thread, doing nothing")
|
||||||
|
|
||||||
|
@ -2421,6 +2432,10 @@ class ServiceManagerThread:
|
||||||
print("\nNo more output from IO thread managing TDengine service")
|
print("\nNo more output from IO thread managing TDengine service")
|
||||||
out.close()
|
out.close()
|
||||||
|
|
||||||
|
def svcErrorReader(self, err: IO, queue):
|
||||||
|
for line in iter(err.readline, b''):
|
||||||
|
print("\nTD Svc STDERR: {}".format(line))
|
||||||
|
|
||||||
|
|
||||||
class TdeSubProcess:
|
class TdeSubProcess:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -2429,9 +2444,15 @@ class TdeSubProcess:
|
||||||
def getStdOut(self):
|
def getStdOut(self):
|
||||||
return self.subProcess.stdout
|
return self.subProcess.stdout
|
||||||
|
|
||||||
|
def getStdErr(self):
|
||||||
|
return self.subProcess.stderr
|
||||||
|
|
||||||
def isRunning(self):
|
def isRunning(self):
|
||||||
return self.subProcess is not None
|
return self.subProcess is not None
|
||||||
|
|
||||||
|
def getPid(self):
|
||||||
|
return self.subProcess.pid
|
||||||
|
|
||||||
def getBuildPath(self):
|
def getBuildPath(self):
|
||||||
selfPath = os.path.dirname(os.path.realpath(__file__))
|
selfPath = os.path.dirname(os.path.realpath(__file__))
|
||||||
if ("community" in selfPath):
|
if ("community" in selfPath):
|
||||||
|
@ -2467,24 +2488,28 @@ class TdeSubProcess:
|
||||||
os.rename(logPath, logPathSaved)
|
os.rename(logPath, logPathSaved)
|
||||||
# os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms
|
# os.mkdir(logPath) # recreate, no need actually, TDengine will auto-create with proper perms
|
||||||
|
|
||||||
|
|
||||||
svcCmd = [taosdPath, '-c', cfgPath]
|
svcCmd = [taosdPath, '-c', cfgPath]
|
||||||
|
# svcCmdSingle = "{} -c {}".format(taosdPath, cfgPath)
|
||||||
# svcCmd = ['vmstat', '1']
|
# svcCmd = ['vmstat', '1']
|
||||||
if self.subProcess: # already there
|
if self.subProcess: # already there
|
||||||
raise RuntimeError("Corrupt process state")
|
raise RuntimeError("Corrupt process state")
|
||||||
|
|
||||||
|
# print("Starting service: {}".format(svcCmd))
|
||||||
self.subProcess = subprocess.Popen(
|
self.subProcess = subprocess.Popen(
|
||||||
svcCmd,
|
svcCmd, shell=False,
|
||||||
|
# svcCmdSingle, shell=True, # capture core dump?
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
# bufsize=1, # not supported in binary mode
|
# bufsize=1, # not supported in binary mode
|
||||||
close_fds=ON_POSIX) # had text=True, which interferred with reading EOF
|
close_fds=ON_POSIX
|
||||||
|
) # had text=True, which interferred with reading EOF
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
if not self.subProcess:
|
if not self.subProcess:
|
||||||
print("Sub process already stopped")
|
print("Sub process already stopped")
|
||||||
return
|
return -1
|
||||||
|
|
||||||
retCode = self.subProcess.poll()
|
retCode = self.subProcess.poll() # contains real sub process return code
|
||||||
if retCode: # valid return code, process ended
|
if retCode: # valid return code, process ended
|
||||||
self.subProcess = None
|
self.subProcess = None
|
||||||
else: # process still alive, let's interrupt it
|
else: # process still alive, let's interrupt it
|
||||||
|
@ -2495,11 +2520,15 @@ class TdeSubProcess:
|
||||||
self.subProcess.send_signal(signal.SIGINT)
|
self.subProcess.send_signal(signal.SIGINT)
|
||||||
try:
|
try:
|
||||||
self.subProcess.wait(10)
|
self.subProcess.wait(10)
|
||||||
|
retCode = self.subProcess.returncode
|
||||||
except subprocess.TimeoutExpired as err:
|
except subprocess.TimeoutExpired as err:
|
||||||
print("Time out waiting for TDengine service process to exit")
|
print("Time out waiting for TDengine service process to exit")
|
||||||
|
retCode = -3
|
||||||
else:
|
else:
|
||||||
print("TDengine service process terminated successfully from SIG_INT")
|
print("TDengine service process terminated successfully from SIG_INT")
|
||||||
|
retCode = -4
|
||||||
self.subProcess = None
|
self.subProcess = None
|
||||||
|
return retCode
|
||||||
|
|
||||||
class ThreadStacks: # stack info for all threads
|
class ThreadStacks: # stack info for all threads
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
Loading…
Reference in New Issue