Refactored crash_gen to have the TdeSubProcess own the SvcMgrThread object, also switched to Pylance
This commit is contained in:
parent
bcbb6017c0
commit
5d1d5cadc2
|
@ -1,6 +1,7 @@
|
|||
|
||||
from .connection import TDengineConnection
|
||||
from .cursor import TDengineCursor
|
||||
from .error import Error
|
||||
|
||||
# Globals
|
||||
threadsafety = 0
|
||||
|
|
|
@ -3,3 +3,4 @@ from crash_gen.service_manager import ServiceManager, TdeInstance, TdeSubProcess
|
|||
from crash_gen.misc import Logging, Status, CrashGenError, Dice, Helper, Progress
|
||||
from crash_gen.db import DbConn, MyTDSql, DbConnNative, DbManager
|
||||
from crash_gen.settings import Settings
|
||||
from crash_gen.types import DirPath
|
|
@ -15,7 +15,7 @@
|
|||
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Set
|
||||
from typing import Any, Set, Tuple
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional # Type hinting, ref: https://stackoverflow.com/questions/19202633/python-3-type-hinting-for-none
|
||||
|
@ -57,8 +57,8 @@ if sys.version_info[0] < 3:
|
|||
|
||||
# Command-line/Environment Configurations, will set a bit later
|
||||
# ConfigNameSpace = argparse.Namespace
|
||||
gConfig: argparse.Namespace
|
||||
gSvcMgr: ServiceManager # TODO: refactor this hack, use dep injection
|
||||
# gConfig: argparse.Namespace
|
||||
gSvcMgr: Optional[ServiceManager] # TODO: refactor this hack, use dep injection
|
||||
# logger: logging.Logger
|
||||
gContainer: Container
|
||||
|
||||
|
@ -81,20 +81,20 @@ class WorkerThread:
|
|||
self._stepGate = threading.Event()
|
||||
|
||||
# Let us have a DB connection of our own
|
||||
if (gConfig.per_thread_db_connection): # type: ignore
|
||||
if (Settings.getConfig().per_thread_db_connection): # type: ignore
|
||||
# print("connector_type = {}".format(gConfig.connector_type))
|
||||
tInst = gContainer.defTdeInstance
|
||||
if gConfig.connector_type == 'native':
|
||||
if Settings.getConfig().connector_type == 'native':
|
||||
self._dbConn = DbConn.createNative(tInst.getDbTarget())
|
||||
elif gConfig.connector_type == 'rest':
|
||||
elif Settings.getConfig().connector_type == 'rest':
|
||||
self._dbConn = DbConn.createRest(tInst.getDbTarget())
|
||||
elif gConfig.connector_type == 'mixed':
|
||||
elif Settings.getConfig().connector_type == 'mixed':
|
||||
if Dice.throw(2) == 0: # 1/2 chance
|
||||
self._dbConn = DbConn.createNative(tInst.getDbTarget())
|
||||
else:
|
||||
self._dbConn = DbConn.createRest(tInst.getDbTarget())
|
||||
else:
|
||||
raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))
|
||||
raise RuntimeError("Unexpected connector type: {}".format(Settings.getConfig().connector_type))
|
||||
|
||||
# self._dbInUse = False # if "use db" was executed already
|
||||
|
||||
|
@ -123,14 +123,14 @@ class WorkerThread:
|
|||
# self.isSleeping = False
|
||||
Logging.info("Starting to run thread: {}".format(self._tid))
|
||||
|
||||
if (gConfig.per_thread_db_connection): # type: ignore
|
||||
if (Settings.getConfig().per_thread_db_connection): # type: ignore
|
||||
Logging.debug("Worker thread openning database connection")
|
||||
self._dbConn.open()
|
||||
|
||||
self._doTaskLoop()
|
||||
|
||||
# clean up
|
||||
if (gConfig.per_thread_db_connection): # type: ignore
|
||||
if (Settings.getConfig().per_thread_db_connection): # type: ignore
|
||||
if self._dbConn.isOpen: #sometimes it is not open
|
||||
self._dbConn.close()
|
||||
else:
|
||||
|
@ -158,7 +158,7 @@ class WorkerThread:
|
|||
|
||||
# Before we fetch the task and run it, let's ensure we properly "use" the database (not needed any more)
|
||||
try:
|
||||
if (gConfig.per_thread_db_connection): # most likely TRUE
|
||||
if (Settings.getConfig().per_thread_db_connection): # most likely TRUE
|
||||
if not self._dbConn.isOpen: # might have been closed during server auto-restart
|
||||
self._dbConn.open()
|
||||
# self.useDb() # might encounter exceptions. TODO: catch
|
||||
|
@ -232,7 +232,7 @@ class WorkerThread:
|
|||
return self.getDbConn().getQueryResult()
|
||||
|
||||
def getDbConn(self) -> DbConn :
|
||||
if (gConfig.per_thread_db_connection):
|
||||
if (Settings.getConfig().per_thread_db_connection):
|
||||
return self._dbConn
|
||||
else:
|
||||
return self._tc.getDbManager().getDbConn()
|
||||
|
@ -254,7 +254,7 @@ class ThreadCoordinator:
|
|||
self._pool = pool
|
||||
# self._wd = wd
|
||||
self._te = None # prepare for every new step
|
||||
self._dbManager = dbManager
|
||||
self._dbManager = dbManager # type: Optional[DbManager] # may be freed
|
||||
self._executedTasks: List[Task] = [] # in a given step
|
||||
self._lock = threading.RLock() # sync access for a few things
|
||||
|
||||
|
@ -266,9 +266,13 @@ class ThreadCoordinator:
|
|||
self._stepStartTime = None # Track how long it takes to execute each step
|
||||
|
||||
def getTaskExecutor(self):
|
||||
if self._te is None:
|
||||
raise CrashGenError("Unexpected empty TE")
|
||||
return self._te
|
||||
|
||||
def getDbManager(self) -> DbManager:
|
||||
if self._dbManager is None:
|
||||
raise ChildProcessError("Unexpected empty _dbManager")
|
||||
return self._dbManager
|
||||
|
||||
def crossStepBarrier(self, timeout=None):
|
||||
|
@ -279,7 +283,7 @@ class ThreadCoordinator:
|
|||
self._execStats.registerFailure("User Interruption")
|
||||
|
||||
def _runShouldEnd(self, transitionFailed, hasAbortedTask, workerTimeout):
|
||||
maxSteps = gConfig.max_steps # type: ignore
|
||||
maxSteps = Settings.getConfig().max_steps # type: ignore
|
||||
if self._curStep >= (maxSteps - 1): # maxStep==10, last curStep should be 9
|
||||
return True
|
||||
if self._runStatus != Status.STATUS_RUNNING:
|
||||
|
@ -384,7 +388,7 @@ class ThreadCoordinator:
|
|||
hasAbortedTask = False
|
||||
workerTimeout = False
|
||||
while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout):
|
||||
if not gConfig.debug: # print this only if we are not in debug mode
|
||||
if not Settings.getConfig().debug: # print this only if we are not in debug mode
|
||||
Progress.emit(Progress.STEP_BOUNDARY)
|
||||
# print(".", end="", flush=True)
|
||||
# if (self._curStep % 2) == 0: # print memory usage once every 10 steps
|
||||
|
@ -469,7 +473,7 @@ class ThreadCoordinator:
|
|||
self._pool = None
|
||||
self._te = None
|
||||
self._dbManager = None
|
||||
self._executedTasks = None
|
||||
self._executedTasks = []
|
||||
self._lock = None
|
||||
self._stepBarrier = None
|
||||
self._execStats = None
|
||||
|
@ -508,18 +512,18 @@ class ThreadCoordinator:
|
|||
''' Initialize multiple databases, invoked at __ini__() time '''
|
||||
self._dbs = [] # type: List[Database]
|
||||
dbc = self.getDbManager().getDbConn()
|
||||
if gConfig.max_dbs == 0:
|
||||
if Settings.getConfig().max_dbs == 0:
|
||||
self._dbs.append(Database(0, dbc))
|
||||
else:
|
||||
baseDbNumber = int(datetime.datetime.now().timestamp( # Don't use Dice/random, as they are deterministic
|
||||
)*333) % 888 if gConfig.dynamic_db_table_names else 0
|
||||
for i in range(gConfig.max_dbs):
|
||||
)*333) % 888 if Settings.getConfig().dynamic_db_table_names else 0
|
||||
for i in range(Settings.getConfig().max_dbs):
|
||||
self._dbs.append(Database(baseDbNumber + i, dbc))
|
||||
|
||||
def pickDatabase(self):
|
||||
idxDb = 0
|
||||
if gConfig.max_dbs != 0 :
|
||||
idxDb = Dice.throw(gConfig.max_dbs) # 0 to N-1
|
||||
if Settings.getConfig().max_dbs != 0 :
|
||||
idxDb = Dice.throw(Settings.getConfig().max_dbs) # 0 to N-1
|
||||
db = self._dbs[idxDb] # type: Database
|
||||
return db
|
||||
|
||||
|
@ -563,7 +567,7 @@ class ThreadPool:
|
|||
workerThread._thread.join()
|
||||
|
||||
def cleanup(self):
|
||||
self.threadList = None # maybe clean up each?
|
||||
self.threadList = [] # maybe clean up each?
|
||||
|
||||
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
|
||||
# for new table names
|
||||
|
@ -673,7 +677,7 @@ class AnyState:
|
|||
|
||||
# Each sub state tells us the "info", about itself, so we can determine
|
||||
# on things like canDropDB()
|
||||
def getInfo(self):
|
||||
def getInfo(self) -> List[Any]:
|
||||
raise RuntimeError("Must be overriden by child classes")
|
||||
|
||||
def equals(self, other):
|
||||
|
@ -701,7 +705,7 @@ class AnyState:
|
|||
def canDropDb(self):
|
||||
# If user requests to run up to a number of DBs,
|
||||
# we'd then not do drop_db operations any more
|
||||
if gConfig.max_dbs > 0 or gConfig.use_shadow_db :
|
||||
if Settings.getConfig().max_dbs > 0 or Settings.getConfig().use_shadow_db :
|
||||
return False
|
||||
return self._info[self.CAN_DROP_DB]
|
||||
|
||||
|
@ -709,7 +713,7 @@ class AnyState:
|
|||
return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
|
||||
|
||||
def canDropFixedSuperTable(self):
|
||||
if gConfig.use_shadow_db: # duplicate writes to shaddow DB, in which case let's disable dropping s-table
|
||||
if Settings.getConfig().use_shadow_db: # duplicate writes to shaddow DB, in which case let's disable dropping s-table
|
||||
return False
|
||||
return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
|
||||
|
||||
|
@ -911,7 +915,7 @@ class StateMechine:
|
|||
|
||||
# May be slow, use cautionsly...
|
||||
def getTaskTypes(self): # those that can run (directly/indirectly) from the current state
|
||||
def typesToStrings(types):
|
||||
def typesToStrings(types) -> List:
|
||||
ss = []
|
||||
for t in types:
|
||||
ss.append(t.__name__)
|
||||
|
@ -1030,13 +1034,14 @@ class StateMechine:
|
|||
|
||||
# ref:
|
||||
# https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
|
||||
def _weighted_choice_sub(self, weights):
|
||||
def _weighted_choice_sub(self, weights) -> int:
|
||||
# TODO: use our dice to ensure it being determinstic?
|
||||
rnd = random.random() * sum(weights)
|
||||
for i, w in enumerate(weights):
|
||||
rnd -= w
|
||||
if rnd < 0:
|
||||
return i
|
||||
raise CrashGenError("Unexpected no choice")
|
||||
|
||||
class Database:
|
||||
''' We use this to represent an actual TDengine database inside a service instance,
|
||||
|
@ -1048,8 +1053,8 @@ class Database:
|
|||
'''
|
||||
_clsLock = threading.Lock() # class wide lock
|
||||
_lastInt = 101 # next one is initial integer
|
||||
_lastTick = 0
|
||||
_lastLaggingTick = 0 # lagging tick, for out-of-sequence (oos) data insertions
|
||||
_lastTick = None # Optional[datetime]
|
||||
_lastLaggingTick = None # Optional[datetime] # lagging tick, for out-of-sequence (oos) data insertions
|
||||
|
||||
def __init__(self, dbNum: int, dbc: DbConn): # TODO: remove dbc
|
||||
self._dbNum = dbNum # we assign a number to databases, for our testing purpose
|
||||
|
@ -1114,14 +1119,14 @@ class Database:
|
|||
Fetch a timestamp tick, with some random factor, may not be unique.
|
||||
'''
|
||||
with cls._clsLock: # prevent duplicate tick
|
||||
if cls._lastLaggingTick==0 or cls._lastTick==0 : # not initialized
|
||||
if cls._lastLaggingTick is None or cls._lastTick is None : # not initialized
|
||||
# 10k at 1/20 chance, should be enough to avoid overlaps
|
||||
tick = cls.setupLastTick()
|
||||
cls._lastTick = tick
|
||||
cls._lastLaggingTick = tick + datetime.timedelta(0, -60*2) # lagging behind 2 minutes, should catch up fast
|
||||
# if : # should be quite a bit into the future
|
||||
|
||||
if gConfig.mix_oos_data and Dice.throw(20) == 0: # if asked to do so, and 1 in 20 chance, return lagging tick
|
||||
if Settings.getConfig().mix_oos_data and Dice.throw(20) == 0: # if asked to do so, and 1 in 20 chance, return lagging tick
|
||||
cls._lastLaggingTick += datetime.timedelta(0, 1) # pick the next sequence from the lagging tick sequence
|
||||
return cls._lastLaggingTick
|
||||
else: # regular
|
||||
|
@ -1303,10 +1308,10 @@ class Task():
|
|||
]:
|
||||
return True # These are the ALWAYS-ACCEPTABLE ones
|
||||
# This case handled below already.
|
||||
# elif (errno in [ 0x0B ]) and gConfig.auto_start_service:
|
||||
# elif (errno in [ 0x0B ]) and Settings.getConfig().auto_start_service:
|
||||
# return True # We may get "network unavilable" when restarting service
|
||||
elif gConfig.ignore_errors: # something is specified on command line
|
||||
moreErrnos = [int(v, 0) for v in gConfig.ignore_errors.split(',')]
|
||||
elif Settings.getConfig().ignore_errors: # something is specified on command line
|
||||
moreErrnos = [int(v, 0) for v in Settings.getConfig().ignore_errors.split(',')]
|
||||
if errno in moreErrnos:
|
||||
return True
|
||||
elif errno == 0x200 : # invalid SQL, we need to div in a bit more
|
||||
|
@ -1342,7 +1347,7 @@ class Task():
|
|||
self._executeInternal(te, wt) # TODO: no return value?
|
||||
except taos.error.ProgrammingError as err:
|
||||
errno2 = Helper.convertErrno(err.errno)
|
||||
if (gConfig.continue_on_exception): # user choose to continue
|
||||
if (Settings.getConfig().continue_on_exception): # user choose to continue
|
||||
self.logDebug("[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
|
||||
errno2, err, wt.getDbConn().getLastSql()))
|
||||
self._err = err
|
||||
|
@ -1357,7 +1362,7 @@ class Task():
|
|||
self.__class__.__name__,
|
||||
errno2, err, wt.getDbConn().getLastSql())
|
||||
self.logDebug(errMsg)
|
||||
if gConfig.debug:
|
||||
if Settings.getConfig().debug:
|
||||
# raise # so that we see full stack
|
||||
traceback.print_exc()
|
||||
print(
|
||||
|
@ -1422,11 +1427,11 @@ class Task():
|
|||
class ExecutionStats:
|
||||
def __init__(self):
|
||||
# total/success times for a task
|
||||
self._execTimes: Dict[str, [int, int]] = {}
|
||||
self._execTimes: Dict[str, List[int]] = {}
|
||||
self._tasksInProgress = 0
|
||||
self._lock = threading.Lock()
|
||||
self._firstTaskStartTime = None
|
||||
self._execStartTime = None
|
||||
self._firstTaskStartTime = 0.0
|
||||
self._execStartTime = 0.0
|
||||
self._errors = {}
|
||||
self._elapsedTime = 0.0 # total elapsed time
|
||||
self._accRunTime = 0.0 # accumulated run time
|
||||
|
@ -1471,7 +1476,7 @@ class ExecutionStats:
|
|||
self._tasksInProgress -= 1
|
||||
if self._tasksInProgress == 0: # all tasks have stopped
|
||||
self._accRunTime += (time.time() - self._firstTaskStartTime)
|
||||
self._firstTaskStartTime = None
|
||||
self._firstTaskStartTime = 0.0
|
||||
|
||||
def registerFailure(self, reason):
|
||||
self._failed = True
|
||||
|
@ -1555,7 +1560,7 @@ class StateTransitionTask(Task):
|
|||
def getRegTableName(cls, i):
|
||||
if ( StateTransitionTask._baseTableNumber is None): # Set it one time
|
||||
StateTransitionTask._baseTableNumber = Dice.throw(
|
||||
999) if gConfig.dynamic_db_table_names else 0
|
||||
999) if Settings.getConfig().dynamic_db_table_names else 0
|
||||
return "reg_table_{}".format(StateTransitionTask._baseTableNumber + i)
|
||||
|
||||
def execute(self, wt: WorkerThread):
|
||||
|
@ -1575,14 +1580,14 @@ class TaskCreateDb(StateTransitionTask):
|
|||
def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
|
||||
# was: self.execWtSql(wt, "create database db")
|
||||
repStr = ""
|
||||
if gConfig.num_replicas != 1:
|
||||
# numReplica = Dice.throw(gConfig.max_replicas) + 1 # 1,2 ... N
|
||||
numReplica = gConfig.num_replicas # fixed, always
|
||||
if Settings.getConfig().num_replicas != 1:
|
||||
# numReplica = Dice.throw(Settings.getConfig().max_replicas) + 1 # 1,2 ... N
|
||||
numReplica = Settings.getConfig().num_replicas # fixed, always
|
||||
repStr = "replica {}".format(numReplica)
|
||||
updatePostfix = "update 1" if gConfig.verify_data else "" # allow update only when "verify data" is active
|
||||
updatePostfix = "update 1" if Settings.getConfig().verify_data else "" # allow update only when "verify data" is active
|
||||
dbName = self._db.getName()
|
||||
self.execWtSql(wt, "create database {} {} {} ".format(dbName, repStr, updatePostfix ) )
|
||||
if dbName == "db_0" and gConfig.use_shadow_db:
|
||||
if dbName == "db_0" and Settings.getConfig().use_shadow_db:
|
||||
self.execWtSql(wt, "create database {} {} {} ".format("db_s", repStr, updatePostfix ) )
|
||||
|
||||
class TaskDropDb(StateTransitionTask):
|
||||
|
@ -1887,7 +1892,7 @@ class TaskDropSuperTable(StateTransitionTask):
|
|||
if Dice.throw(2) == 0:
|
||||
# print("_7_", end="", flush=True)
|
||||
tblSeq = list(range(
|
||||
2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
|
||||
2 + (self.LARGE_NUMBER_OF_TABLES if Settings.getConfig().larger_data else self.SMALL_NUMBER_OF_TABLES)))
|
||||
random.shuffle(tblSeq)
|
||||
tickOutput = False # if we have spitted out a "d" character for "drop regular table"
|
||||
isSuccess = True
|
||||
|
@ -1953,13 +1958,13 @@ class TaskRestartService(StateTransitionTask):
|
|||
|
||||
@classmethod
|
||||
def canBeginFrom(cls, state: AnyState):
|
||||
if gConfig.auto_start_service:
|
||||
if Settings.getConfig().auto_start_service:
|
||||
return state.canDropFixedSuperTable() # Basicallly when we have the super table
|
||||
return False # don't run this otherwise
|
||||
|
||||
CHANCE_TO_RESTART_SERVICE = 200
|
||||
def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
|
||||
if not gConfig.auto_start_service: # only execute when we are in -a mode
|
||||
if not Settings.getConfig().auto_start_service: # only execute when we are in -a mode
|
||||
print("_a", end="", flush=True)
|
||||
return
|
||||
|
||||
|
@ -1981,12 +1986,12 @@ class TaskAddData(StateTransitionTask):
|
|||
activeTable: Set[int] = set()
|
||||
|
||||
# We use these two files to record operations to DB, useful for power-off tests
|
||||
fAddLogReady = None # type: io.TextIOWrapper
|
||||
fAddLogDone = None # type: io.TextIOWrapper
|
||||
fAddLogReady = None # type: Optional[io.TextIOWrapper]
|
||||
fAddLogDone = None # type: Optional[io.TextIOWrapper]
|
||||
|
||||
@classmethod
|
||||
def prepToRecordOps(cls):
|
||||
if gConfig.record_ops:
|
||||
if Settings.getConfig().record_ops:
|
||||
if (cls.fAddLogReady is None):
|
||||
Logging.info(
|
||||
"Recording in a file operations to be performed...")
|
||||
|
@ -2004,7 +2009,7 @@ class TaskAddData(StateTransitionTask):
|
|||
return state.canAddData()
|
||||
|
||||
def _addDataInBatch(self, db, dbc, regTableName, te: TaskExecutor):
|
||||
numRecords = self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS
|
||||
numRecords = self.LARGE_NUMBER_OF_RECORDS if Settings.getConfig().larger_data else self.SMALL_NUMBER_OF_RECORDS
|
||||
fullTableName = db.getName() + '.' + regTableName
|
||||
|
||||
sql = "INSERT INTO {} VALUES ".format(fullTableName)
|
||||
|
@ -2016,21 +2021,23 @@ class TaskAddData(StateTransitionTask):
|
|||
dbc.execute(sql)
|
||||
|
||||
def _addData(self, db: Database, dbc, regTableName, te: TaskExecutor): # implied: NOT in batches
|
||||
numRecords = self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS
|
||||
numRecords = self.LARGE_NUMBER_OF_RECORDS if Settings.getConfig().larger_data else self.SMALL_NUMBER_OF_RECORDS
|
||||
|
||||
for j in range(numRecords): # number of records per table
|
||||
nextInt = db.getNextInt()
|
||||
nextTick = db.getNextTick()
|
||||
nextColor = db.getNextColor()
|
||||
if gConfig.record_ops:
|
||||
if Settings.getConfig().record_ops:
|
||||
self.prepToRecordOps()
|
||||
if self.fAddLogReady is None:
|
||||
raise CrashGenError("Unexpected empty fAddLogReady")
|
||||
self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
|
||||
self.fAddLogReady.flush()
|
||||
os.fsync(self.fAddLogReady.fileno())
|
||||
|
||||
# TODO: too ugly trying to lock the table reliably, refactor...
|
||||
fullTableName = db.getName() + '.' + regTableName
|
||||
if gConfig.verify_data:
|
||||
if Settings.getConfig().verify_data:
|
||||
self.lockTable(fullTableName)
|
||||
# print("_w" + str(nextInt % 100), end="", flush=True) # Trace what was written
|
||||
|
||||
|
@ -2043,7 +2050,7 @@ class TaskAddData(StateTransitionTask):
|
|||
dbc.execute(sql)
|
||||
|
||||
# Quick hack, attach an update statement here. TODO: create an "update" task
|
||||
if (not gConfig.use_shadow_db) and Dice.throw(5) == 0: # 1 in N chance, plus not using shaddow DB
|
||||
if (not Settings.getConfig().use_shadow_db) and Dice.throw(5) == 0: # 1 in N chance, plus not using shaddow DB
|
||||
nextInt = db.getNextInt()
|
||||
nextColor = db.getNextColor()
|
||||
sql = "INSERt INTO {} VALUES ('{}', {}, '{}');".format( # "INSERt" means "update" here
|
||||
|
@ -2054,12 +2061,12 @@ class TaskAddData(StateTransitionTask):
|
|||
dbc.execute(sql)
|
||||
|
||||
except: # Any exception at all
|
||||
if gConfig.verify_data:
|
||||
if Settings.getConfig().verify_data:
|
||||
self.unlockTable(fullTableName)
|
||||
raise
|
||||
|
||||
# Now read it back and verify, we might encounter an error if table is dropped
|
||||
if gConfig.verify_data: # only if command line asks for it
|
||||
if Settings.getConfig().verify_data: # only if command line asks for it
|
||||
try:
|
||||
readBack = dbc.queryScalar("SELECT speed from {}.{} WHERE ts='{}'".
|
||||
format(db.getName(), regTableName, nextTick))
|
||||
|
@ -2086,7 +2093,9 @@ class TaskAddData(StateTransitionTask):
|
|||
# Successfully wrote the data into the DB, let's record it somehow
|
||||
te.recordDataMark(nextInt)
|
||||
|
||||
if gConfig.record_ops:
|
||||
if Settings.getConfig().record_ops:
|
||||
if self.fAddLogDone is None:
|
||||
raise CrashGenError("Unexpected empty fAddLogDone")
|
||||
self.fAddLogDone.write("Wrote {} to {}\n".format(nextInt, regTableName))
|
||||
self.fAddLogDone.flush()
|
||||
os.fsync(self.fAddLogDone.fileno())
|
||||
|
@ -2095,8 +2104,8 @@ class TaskAddData(StateTransitionTask):
|
|||
# ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
|
||||
db = self._db
|
||||
dbc = wt.getDbConn()
|
||||
numTables = self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES
|
||||
numRecords = self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS
|
||||
numTables = self.LARGE_NUMBER_OF_TABLES if Settings.getConfig().larger_data else self.SMALL_NUMBER_OF_TABLES
|
||||
numRecords = self.LARGE_NUMBER_OF_RECORDS if Settings.getConfig().larger_data else self.SMALL_NUMBER_OF_RECORDS
|
||||
tblSeq = list(range(numTables ))
|
||||
random.shuffle(tblSeq) # now we have random sequence
|
||||
for i in tblSeq:
|
||||
|
@ -2126,7 +2135,9 @@ class ThreadStacks: # stack info for all threads
|
|||
def __init__(self):
|
||||
self._allStacks = {}
|
||||
allFrames = sys._current_frames()
|
||||
for th in threading.enumerate():
|
||||
for th in threading.enumerate():
|
||||
if th.ident is None:
|
||||
continue
|
||||
stack = traceback.extract_stack(allFrames[th.ident])
|
||||
self._allStacks[th.native_id] = stack
|
||||
|
||||
|
@ -2247,14 +2258,15 @@ class ClientManager:
|
|||
|
||||
def run(self, svcMgr):
|
||||
# self._printLastNumbers()
|
||||
global gConfig
|
||||
# global gConfig
|
||||
|
||||
# Prepare Tde Instance
|
||||
global gContainer
|
||||
tInst = gContainer.defTdeInstance = TdeInstance() # "subdir to hold the instance"
|
||||
|
||||
dbManager = DbManager(gConfig.connector_type, tInst.getDbTarget()) # Regular function
|
||||
thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
|
||||
cfg = Settings.getConfig()
|
||||
dbManager = DbManager(cfg.connector_type, tInst.getDbTarget()) # Regular function
|
||||
thPool = ThreadPool(cfg.num_threads, cfg.max_steps)
|
||||
self.tc = ThreadCoordinator(thPool, dbManager)
|
||||
|
||||
Logging.info("Starting client instance: {}".format(tInst))
|
||||
|
@ -2267,7 +2279,8 @@ class ClientManager:
|
|||
|
||||
|
||||
# Release global variables
|
||||
gConfig = None
|
||||
# gConfig = None
|
||||
Settings.clearConfig()
|
||||
gSvcMgr = None
|
||||
logger = None
|
||||
|
||||
|
@ -2298,7 +2311,7 @@ class ClientManager:
|
|||
class MainExec:
|
||||
def __init__(self):
|
||||
self._clientMgr = None
|
||||
self._svcMgr = None # type: ServiceManager
|
||||
self._svcMgr = None # type: Optional[ServiceManager]
|
||||
|
||||
signal.signal(signal.SIGTERM, self.sigIntHandler)
|
||||
signal.signal(signal.SIGINT, self.sigIntHandler)
|
||||
|
@ -2318,7 +2331,7 @@ class MainExec:
|
|||
|
||||
def runClient(self):
|
||||
global gSvcMgr
|
||||
if gConfig.auto_start_service:
|
||||
if Settings.getConfig().auto_start_service:
|
||||
gSvcMgr = self._svcMgr = ServiceManager(1) # hack alert
|
||||
gSvcMgr.startTaosServices() # we start, don't run
|
||||
|
||||
|
@ -2327,13 +2340,13 @@ class MainExec:
|
|||
try:
|
||||
ret = self._clientMgr.run(self._svcMgr) # stop TAOS service inside
|
||||
except requests.exceptions.ConnectionError as err:
|
||||
Logging.warning("Failed to open REST connection to DB: {}".format(err.getMessage()))
|
||||
Logging.warning("Failed to open REST connection to DB: {}".format(err))
|
||||
# don't raise
|
||||
return ret
|
||||
|
||||
def runService(self):
|
||||
global gSvcMgr
|
||||
gSvcMgr = self._svcMgr = ServiceManager(gConfig.num_dnodes) # save it in a global variable TODO: hack alert
|
||||
gSvcMgr = self._svcMgr = ServiceManager(Settings.getConfig().num_dnodes) # save it in a global variable TODO: hack alert
|
||||
|
||||
gSvcMgr.run() # run to some end state
|
||||
gSvcMgr = self._svcMgr = None
|
||||
|
@ -2467,20 +2480,20 @@ class MainExec:
|
|||
action='store_true',
|
||||
help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
|
||||
|
||||
global gConfig
|
||||
gConfig = parser.parse_args()
|
||||
Settings.setConfig(gConfig) # TODO: fix this hack, consolidate this global var
|
||||
# global gConfig
|
||||
config = parser.parse_args()
|
||||
Settings.setConfig(config) # TODO: fix this hack, consolidate this global var
|
||||
|
||||
# Sanity check for arguments
|
||||
if gConfig.use_shadow_db and gConfig.max_dbs>1 :
|
||||
if Settings.getConfig().use_shadow_db and Settings.getConfig().max_dbs>1 :
|
||||
raise CrashGenError("Cannot combine use-shadow-db with max-dbs of more than 1")
|
||||
|
||||
Logging.clsInit(gConfig)
|
||||
Logging.clsInit(Settings.getConfig())
|
||||
|
||||
Dice.seed(0) # initial seeding of dice
|
||||
|
||||
def run(self):
|
||||
if gConfig.run_tdengine: # run server
|
||||
if Settings.getConfig().run_tdengine: # run server
|
||||
try:
|
||||
self.runService()
|
||||
return 0 # success
|
||||
|
|
|
@ -5,6 +5,7 @@ import time
|
|||
import threading
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from crash_gen.types import QueryResult
|
||||
|
||||
import taos
|
||||
from util.sql import *
|
||||
|
@ -18,7 +19,7 @@ import datetime
|
|||
import traceback
|
||||
# from .service_manager import TdeInstance
|
||||
|
||||
import crash_gen.settings
|
||||
from crash_gen.settings import Settings
|
||||
|
||||
class DbConn:
|
||||
TYPE_NATIVE = "native-c"
|
||||
|
@ -79,7 +80,7 @@ class DbConn:
|
|||
raise RuntimeError("Cannot query database until connection is open")
|
||||
nRows = self.query(sql)
|
||||
if nRows != 1:
|
||||
raise taos.error.ProgrammingError(
|
||||
raise CrashGenError(
|
||||
"Unexpected result for query: {}, rows = {}".format(sql, nRows),
|
||||
(CrashGenError.INVALID_EMPTY_RESULT if nRows==0 else CrashGenError.INVALID_MULTIPLE_RESULT)
|
||||
)
|
||||
|
@ -115,7 +116,7 @@ class DbConn:
|
|||
try:
|
||||
self.execute(sql)
|
||||
return True # ignore num of results, return success
|
||||
except taos.error.ProgrammingError as err:
|
||||
except taos.error.Error as err:
|
||||
return False # failed, for whatever TAOS reason
|
||||
# Not possile to reach here, non-TAOS exception would have been thrown
|
||||
|
||||
|
@ -126,7 +127,7 @@ class DbConn:
|
|||
def openByType(self):
|
||||
raise RuntimeError("Unexpected execution, should be overriden")
|
||||
|
||||
def getQueryResult(self):
|
||||
def getQueryResult(self) -> QueryResult :
|
||||
raise RuntimeError("Unexpected execution, should be overriden")
|
||||
|
||||
def getResultRows(self):
|
||||
|
@ -221,7 +222,7 @@ class DbConnRest(DbConn):
|
|||
class MyTDSql:
|
||||
# Class variables
|
||||
_clsLock = threading.Lock() # class wide locking
|
||||
longestQuery = None # type: str
|
||||
longestQuery = '' # type: str
|
||||
longestQueryTime = 0.0 # seconds
|
||||
lqStartTime = 0.0
|
||||
# lqEndTime = 0.0 # Not needed, as we have the two above already
|
||||
|
@ -261,7 +262,7 @@ class MyTDSql:
|
|||
cls.lqStartTime = startTime
|
||||
|
||||
# Now write to the shadow database
|
||||
if crash_gen.settings.gConfig.use_shadow_db:
|
||||
if Settings.getConfig().use_shadow_db:
|
||||
if sql[:11] == "INSERT INTO":
|
||||
if sql[:16] == "INSERT INTO db_0":
|
||||
sql2 = "INSERT INTO db_s" + sql[16:]
|
||||
|
@ -453,31 +454,11 @@ class DbManager():
|
|||
''' Release the underlying DB connection upon deletion of DbManager '''
|
||||
self.cleanUp()
|
||||
|
||||
def getDbConn(self):
|
||||
def getDbConn(self) -> DbConn :
|
||||
if self._dbConn is None:
|
||||
raise CrashGenError("Unexpected empty DbConn")
|
||||
return self._dbConn
|
||||
|
||||
# TODO: not used any more, to delete
|
||||
def pickAndAllocateTable(self): # pick any table, and "use" it
|
||||
return self.tableNumQueue.pickAndAllocate()
|
||||
|
||||
# TODO: Not used any more, to delete
|
||||
def addTable(self):
|
||||
with self._lock:
|
||||
tIndex = self.tableNumQueue.push()
|
||||
return tIndex
|
||||
|
||||
# Not used any more, to delete
|
||||
def releaseTable(self, i): # return the table back, so others can use it
|
||||
self.tableNumQueue.release(i)
|
||||
|
||||
# TODO: not used any more, delete
|
||||
def getTableNameToDelete(self):
|
||||
tblNum = self.tableNumQueue.pop() # TODO: race condition!
|
||||
if (not tblNum): # maybe false
|
||||
return False
|
||||
|
||||
return "table_{}".format(tblNum)
|
||||
|
||||
def cleanUp(self):
|
||||
if self._dbConn:
|
||||
self._dbConn.close()
|
||||
|
|
|
@ -3,6 +3,7 @@ import random
|
|||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import taos
|
||||
|
||||
|
@ -39,11 +40,11 @@ class MyLoggingAdapter(logging.LoggerAdapter):
|
|||
|
||||
|
||||
class Logging:
|
||||
logger = None
|
||||
logger = None # type: Optional[MyLoggingAdapter]
|
||||
|
||||
@classmethod
|
||||
def getLogger(cls):
|
||||
return logger
|
||||
return cls.logger
|
||||
|
||||
@classmethod
|
||||
def clsInit(cls, gConfig): # TODO: refactor away gConfig
|
||||
|
@ -60,7 +61,7 @@ class Logging:
|
|||
# Logging adapter, to be used as a logger
|
||||
# print("setting logger variable")
|
||||
# global logger
|
||||
cls.logger = MyLoggingAdapter(_logger, [])
|
||||
cls.logger = MyLoggingAdapter(_logger, {})
|
||||
|
||||
if (gConfig.debug):
|
||||
cls.logger.setLevel(logging.DEBUG) # default seems to be INFO
|
||||
|
@ -84,6 +85,7 @@ class Logging:
|
|||
cls.logger.error(msg)
|
||||
|
||||
class Status:
|
||||
STATUS_EMPTY = 99
|
||||
STATUS_STARTING = 1
|
||||
STATUS_RUNNING = 2
|
||||
STATUS_STOPPING = 3
|
||||
|
@ -95,12 +97,16 @@ class Status:
|
|||
def __repr__(self):
|
||||
return "[Status: v={}]".format(self._status)
|
||||
|
||||
def set(self, status):
|
||||
def set(self, status: int):
|
||||
self._status = status
|
||||
|
||||
def get(self):
|
||||
return self._status
|
||||
|
||||
def isEmpty(self):
|
||||
''' Empty/Undefined '''
|
||||
return self._status == Status.STATUS_EMPTY
|
||||
|
||||
def isStarting(self):
|
||||
return self._status == Status.STATUS_STARTING
|
||||
|
||||
|
@ -117,6 +123,9 @@ class Status:
|
|||
def isStable(self):
|
||||
return self.isRunning() or self.isStopped()
|
||||
|
||||
def isActive(self):
|
||||
return self.isStarting() or self.isRunning() or self.isStopping()
|
||||
|
||||
# Deterministic random number generator
|
||||
class Dice():
|
||||
seeded = False # static, uninitialized
|
||||
|
|
|
@ -3,12 +3,12 @@ from __future__ import annotations
|
|||
import os
|
||||
import io
|
||||
import sys
|
||||
from enum import Enum
|
||||
import threading
|
||||
import signal
|
||||
import logging
|
||||
import time
|
||||
from subprocess import PIPE, Popen, TimeoutExpired
|
||||
|
||||
from typing import IO, List, NewType, Optional
|
||||
|
||||
try:
|
||||
|
@ -16,12 +16,12 @@ try:
|
|||
except:
|
||||
print("Psutil module needed, please install: sudo pip3 install psutil")
|
||||
sys.exit(-1)
|
||||
|
||||
from queue import Queue, Empty
|
||||
|
||||
from .misc import Logging, Status, CrashGenError, Dice, Helper, Progress
|
||||
from .db import DbConn, DbTarget
|
||||
import crash_gen.settings
|
||||
from crash_gen.misc import CrashGenError, Dice, Helper, Logging, Progress, Status
|
||||
from crash_gen.db import DbConn, DbTarget
|
||||
from crash_gen.settings import Settings
|
||||
from crash_gen.types import DirPath
|
||||
|
||||
class TdeInstance():
|
||||
"""
|
||||
|
@ -70,7 +70,10 @@ class TdeInstance():
|
|||
self._fepPort = fepPort
|
||||
|
||||
self._tInstNum = tInstNum
|
||||
self._smThread = ServiceManagerThread()
|
||||
|
||||
# An "Tde Instance" will *contain* a "sub process" object, with will/may use a thread internally
|
||||
# self._smThread = ServiceManagerThread()
|
||||
self._subProcess = None # type: Optional[TdeSubProcess]
|
||||
|
||||
def getDbTarget(self):
|
||||
return DbTarget(self.getCfgDir(), self.getHostAddr(), self._port)
|
||||
|
@ -155,21 +158,21 @@ quorum 2
|
|||
def getExecFile(self): # .../taosd
|
||||
return self._buildDir + "/build/bin/taosd"
|
||||
|
||||
def getRunDir(self): # TODO: rename to "root dir" ?!
|
||||
return self._buildDir + self._subdir
|
||||
def getRunDir(self) -> DirPath : # TODO: rename to "root dir" ?!
|
||||
return DirPath(self._buildDir + self._subdir)
|
||||
|
||||
def getCfgDir(self): # path, not file
|
||||
return self.getRunDir() + "/cfg"
|
||||
def getCfgDir(self) -> DirPath : # path, not file
|
||||
return DirPath(self.getRunDir() + "/cfg")
|
||||
|
||||
def getLogDir(self):
|
||||
return self.getRunDir() + "/log"
|
||||
def getLogDir(self) -> DirPath :
|
||||
return DirPath(self.getRunDir() + "/log")
|
||||
|
||||
def getHostAddr(self):
|
||||
return "127.0.0.1"
|
||||
|
||||
def getServiceCmdLine(self): # to start the instance
|
||||
cmdLine = []
|
||||
if crash_gen.settings.gConfig.track_memory_leaks:
|
||||
if Settings.getConfig().track_memory_leaks:
|
||||
Logging.info("Invoking VALGRIND on service...")
|
||||
cmdLine = ['valgrind', '--leak-check=yes']
|
||||
# TODO: move "exec -c" into Popen(), we can both "use shell" and NOT fork so ask to lose kill control
|
||||
|
@ -199,27 +202,46 @@ quorum 2
|
|||
dbc.close()
|
||||
|
||||
def getStatus(self):
|
||||
return self._smThread.getStatus()
|
||||
# return self._smThread.getStatus()
|
||||
if self._subProcess is None:
|
||||
return Status(Status.STATUS_EMPTY)
|
||||
return self._subProcess.getStatus()
|
||||
|
||||
def getSmThread(self):
|
||||
return self._smThread
|
||||
# def getSmThread(self):
|
||||
# return self._smThread
|
||||
|
||||
def start(self):
|
||||
if not self.getStatus().isStopped():
|
||||
if self.getStatus().isActive():
|
||||
raise CrashGenError("Cannot start instance from status: {}".format(self.getStatus()))
|
||||
|
||||
Logging.info("Starting TDengine instance: {}".format(self))
|
||||
self.generateCfgFile() # service side generates config file, client does not
|
||||
self.rotateLogs()
|
||||
|
||||
self._smThread.start(self.getServiceCmdLine(), self.getLogDir()) # May raise exceptions
|
||||
# self._smThread.start(self.getServiceCmdLine(), self.getLogDir()) # May raise exceptions
|
||||
self._subProcess = TdeSubProcess(self.getServiceCmdLine(), self.getLogDir())
|
||||
|
||||
def stop(self):
|
||||
self._smThread.stop()
|
||||
self._subProcess.stop()
|
||||
self._subProcess = None
|
||||
|
||||
def isFirst(self):
|
||||
return self._tInstNum == 0
|
||||
|
||||
def printFirst10Lines(self):
|
||||
if self._subProcess is None:
|
||||
Logging.warning("Incorrect TI status for procIpcBatch-10 operation")
|
||||
return
|
||||
self._subProcess.procIpcBatch(trimToTarget=10, forceOutput=True)
|
||||
|
||||
def procIpcBatch(self):
|
||||
if self._subProcess is None:
|
||||
Logging.warning("Incorrect TI status for procIpcBatch operation")
|
||||
return
|
||||
self._subProcess.procIpcBatch() # may enounter EOF and change status to STOPPED
|
||||
if self._subProcess.getStatus().isStopped():
|
||||
self._subProcess.stop()
|
||||
self._subProcess = None
|
||||
|
||||
class TdeSubProcess:
|
||||
"""
|
||||
|
@ -237,16 +259,21 @@ class TdeSubProcess:
|
|||
# RET_TIME_OUT = -3
|
||||
# RET_SUCCESS = -4
|
||||
|
||||
def __init__(self, po: Popen):
|
||||
self._popen = po # type: Popen
|
||||
# if tInst is None:
|
||||
# raise CrashGenError("Empty instance not allowed in TdeSubProcess")
|
||||
# self._tInst = tInst # Default create at ServiceManagerThread
|
||||
def __init__(self, cmdLine: List[str], logDir: DirPath):
|
||||
# Create the process + managing thread immediately
|
||||
|
||||
Logging.info("Attempting to start TAOS sub process...")
|
||||
self._popen = self._start(cmdLine) # the actual sub process
|
||||
self._smThread = ServiceManagerThread(self, logDir) # A thread to manage the sub process, mostly to process the IO
|
||||
Logging.info("Successfully started TAOS process: {}".format(self))
|
||||
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
# if self.subProcess is None:
|
||||
# return '[TdeSubProc: Empty]'
|
||||
return '[TdeSubProc: pid = {}]'.format(self.getPid())
|
||||
return '[TdeSubProc: pid = {}, status = {}]'.format(
|
||||
self.getPid(), self.getStatus() )
|
||||
|
||||
def getStdOut(self):
|
||||
return self._popen.stdout
|
||||
|
@ -261,14 +288,14 @@ class TdeSubProcess:
|
|||
def getPid(self):
|
||||
return self._popen.pid
|
||||
|
||||
@classmethod
|
||||
def start(cls, cmdLine):
|
||||
def _start(self, cmdLine) -> Popen :
|
||||
ON_POSIX = 'posix' in sys.builtin_module_names
|
||||
|
||||
# Sanity check
|
||||
# if self.subProcess: # already there
|
||||
# raise RuntimeError("Corrupt process state")
|
||||
|
||||
|
||||
# Prepare environment variables for coverage information
|
||||
# Ref: https://stackoverflow.com/questions/2231227/python-subprocess-popen-with-a-modified-environment
|
||||
myEnv = os.environ.copy()
|
||||
|
@ -279,7 +306,7 @@ class TdeSubProcess:
|
|||
# print("Starting TDengine via Shell: {}".format(cmdLineStr))
|
||||
|
||||
# useShell = True # Needed to pass environments into it
|
||||
popen = Popen(
|
||||
return Popen(
|
||||
' '.join(cmdLine), # ' '.join(cmdLine) if useShell else cmdLine,
|
||||
shell=True, # Always use shell, since we need to pass ENV vars
|
||||
stdout=PIPE,
|
||||
|
@ -287,15 +314,15 @@ class TdeSubProcess:
|
|||
close_fds=ON_POSIX,
|
||||
env=myEnv
|
||||
) # had text=True, which interferred with reading EOF
|
||||
return cls(popen)
|
||||
|
||||
STOP_SIGNAL = signal.SIGINT # signal.SIGKILL/SIGINT # What signal to use (in kill) to stop a taosd process?
|
||||
SIG_KILL_RETCODE = 137 # ref: https://stackoverflow.com/questions/43268156/process-finished-with-exit-code-137-in-pycharm
|
||||
|
||||
@classmethod
|
||||
def stop(cls, tsp: TdeSubProcess):
|
||||
def stop(self):
|
||||
"""
|
||||
Stop a sub process, DO NOT return anything, process all conditions INSIDE
|
||||
Stop a sub process, DO NOT return anything, process all conditions INSIDE.
|
||||
|
||||
Calling function should immediately delete/unreference the object
|
||||
|
||||
Common POSIX signal values (from man -7 signal):
|
||||
SIGHUP 1
|
||||
|
@ -315,11 +342,17 @@ class TdeSubProcess:
|
|||
"""
|
||||
# self._popen should always be valid.
|
||||
|
||||
# if not self.subProcess:
|
||||
# Logging.error("Sub process already stopped")
|
||||
# return
|
||||
Logging.info("Terminating TDengine service running as the sub process...")
|
||||
if self.getStatus().isStopped():
|
||||
Logging.info("Service already stopped")
|
||||
return
|
||||
if self.getStatus().isStopping():
|
||||
Logging.info("Service is already being stopped, pid: {}".format(self.getPid()))
|
||||
return
|
||||
|
||||
retCode = tsp._popen.poll() # ret -N means killed with signal N, otherwise it's from exit(N)
|
||||
self.setStatus(Status.STATUS_STOPPING)
|
||||
|
||||
retCode = self._popen.poll() # ret -N means killed with signal N, otherwise it's from exit(N)
|
||||
if retCode: # valid return code, process ended
|
||||
# retCode = -retCode # only if valid
|
||||
Logging.warning("TSP.stop(): process ended itself")
|
||||
|
@ -327,9 +360,12 @@ class TdeSubProcess:
|
|||
return
|
||||
|
||||
# process still alive, let's interrupt it
|
||||
cls._stopForSure(tsp._popen, cls.STOP_SIGNAL) # success if no exception
|
||||
self._stopForSure(self._popen, self.STOP_SIGNAL) # success if no exception
|
||||
|
||||
# sub process should end, then IPC queue should end, causing IO thread to end
|
||||
# sub process should end, then IPC queue should end, causing IO thread to end
|
||||
self._smThread.stop() # stop for sure too
|
||||
|
||||
self.setStatus(Status.STATUS_STOPPED)
|
||||
|
||||
@classmethod
|
||||
def _stopForSure(cls, proc: Popen, sig: int):
|
||||
|
@ -357,13 +393,13 @@ class TdeSubProcess:
|
|||
Logging.info("Killing sub-sub process {} with signal {}".format(child.pid, sig))
|
||||
child.send_signal(sig)
|
||||
try:
|
||||
retCode = child.wait(20)
|
||||
if (- retCode) == signal.SIGSEGV: # Crashed
|
||||
retCode = child.wait(20) # type: ignore
|
||||
if (- retCode) == signal.SIGSEGV: # type: ignore # Crashed
|
||||
Logging.warning("Process {} CRASHED, please check CORE file!".format(child.pid))
|
||||
elif (- retCode) == sig :
|
||||
elif (- retCode) == sig : # type: ignore
|
||||
Logging.info("Sub-sub process terminated with expected return code {}".format(sig))
|
||||
else:
|
||||
Logging.warning("Process terminated, EXPECTING ret code {}, got {}".format(sig, -retCode))
|
||||
Logging.warning("Process terminated, EXPECTING ret code {}, got {}".format(sig, -retCode)) # type: ignore
|
||||
return True # terminated successfully
|
||||
except psutil.TimeoutExpired as err:
|
||||
Logging.warning("Failed to kill sub-sub process {} with signal {}".format(child.pid, sig))
|
||||
|
@ -408,6 +444,15 @@ class TdeSubProcess:
|
|||
return
|
||||
raise CrashGenError("Failed to stop process, pid={}".format(pid))
|
||||
|
||||
def getStatus(self):
|
||||
return self._smThread.getStatus()
|
||||
|
||||
def setStatus(self, status):
|
||||
self._smThread.setStatus(status)
|
||||
|
||||
def procIpcBatch(self, trimToTarget=0, forceOutput=False):
|
||||
self._smThread.procIpcBatch(trimToTarget, forceOutput)
|
||||
|
||||
class ServiceManager:
|
||||
PAUSE_BETWEEN_IPC_CHECK = 1.2 # seconds between checks on STDOUT of sub process
|
||||
|
||||
|
@ -504,10 +549,10 @@ class ServiceManager:
|
|||
def isActive(self):
|
||||
"""
|
||||
Determine if the service/cluster is active at all, i.e. at least
|
||||
one thread is not "stopped".
|
||||
one instance is active
|
||||
"""
|
||||
for ti in self._tInsts:
|
||||
if not ti.getStatus().isStopped():
|
||||
if ti.getStatus().isActive():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -545,10 +590,10 @@ class ServiceManager:
|
|||
# while self.isRunning() or self.isRestarting() : # for as long as the svc mgr thread is still here
|
||||
status = ti.getStatus()
|
||||
if status.isRunning():
|
||||
th = ti.getSmThread()
|
||||
th.procIpcBatch() # regular processing,
|
||||
# th = ti.getSmThread()
|
||||
ti.procIpcBatch() # regular processing,
|
||||
if status.isStopped():
|
||||
th.procIpcBatch() # one last time?
|
||||
ti.procIpcBatch() # one last time?
|
||||
# self._updateThreadStatus()
|
||||
|
||||
time.sleep(self.PAUSE_BETWEEN_IPC_CHECK) # pause, before next round
|
||||
|
@ -578,7 +623,8 @@ class ServiceManager:
|
|||
if not ti.isFirst():
|
||||
tFirst = self._getFirstInstance()
|
||||
tFirst.createDnode(ti.getDbTarget())
|
||||
ti.getSmThread().procIpcBatch(trimToTarget=10, forceOutput=True) # for printing 10 lines
|
||||
ti.printFirst10Lines()
|
||||
# ti.getSmThread().procIpcBatch(trimToTarget=10, forceOutput=True) # for printing 10 lines
|
||||
|
||||
def stopTaosServices(self):
|
||||
with self._lock:
|
||||
|
@ -624,21 +670,24 @@ class ServiceManagerThread:
|
|||
"""
|
||||
MAX_QUEUE_SIZE = 10000
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, subProc: TdeSubProcess, logDir: str):
|
||||
# Set the sub process
|
||||
self._tdeSubProcess = None # type: TdeSubProcess
|
||||
# self._tdeSubProcess = None # type: TdeSubProcess
|
||||
|
||||
# Arrange the TDengine instance
|
||||
# self._tInstNum = tInstNum # instance serial number in cluster, ZERO based
|
||||
# self._tInst = tInst or TdeInstance() # Need an instance
|
||||
|
||||
self._thread = None # The actual thread, # type: threading.Thread
|
||||
self._thread2 = None # watching stderr
|
||||
# self._thread = None # type: Optional[threading.Thread] # The actual thread, # type: threading.Thread
|
||||
# self._thread2 = None # type: Optional[threading.Thread] Thread # watching stderr
|
||||
self._status = Status(Status.STATUS_STOPPED) # The status of the underlying service, actually.
|
||||
|
||||
self._start(subProc, logDir)
|
||||
|
||||
def __repr__(self):
|
||||
return "[SvcMgrThread: status={}, subProc={}]".format(
|
||||
self.getStatus(), self._tdeSubProcess)
|
||||
raise CrashGenError("SMT status moved to TdeSubProcess")
|
||||
# return "[SvcMgrThread: status={}, subProc={}]".format(
|
||||
# self.getStatus(), self._tdeSubProcess)
|
||||
|
||||
def getStatus(self):
|
||||
'''
|
||||
|
@ -646,29 +695,33 @@ class ServiceManagerThread:
|
|||
'''
|
||||
return self._status
|
||||
|
||||
def setStatus(self, statusVal: int):
|
||||
self._status.set(statusVal)
|
||||
|
||||
# Start the thread (with sub process), and wait for the sub service
|
||||
# to become fully operational
|
||||
def start(self, cmdLine : str, logDir: str):
|
||||
def _start(self, subProc :TdeSubProcess, logDir: str):
|
||||
'''
|
||||
Request the manager thread to start a new sub process, and manage it.
|
||||
|
||||
:param cmdLine: the command line to invoke
|
||||
:param logDir: the logging directory, to hold stdout/stderr files
|
||||
'''
|
||||
if self._thread:
|
||||
raise RuntimeError("Unexpected _thread")
|
||||
if self._tdeSubProcess:
|
||||
raise RuntimeError("TDengine sub process already created/running")
|
||||
# if self._thread:
|
||||
# raise RuntimeError("Unexpected _thread")
|
||||
# if self._tdeSubProcess:
|
||||
# raise RuntimeError("TDengine sub process already created/running")
|
||||
|
||||
Logging.info("Attempting to start TAOS service: {}".format(self))
|
||||
# Moved to TdeSubProcess
|
||||
# Logging.info("Attempting to start TAOS service: {}".format(self))
|
||||
|
||||
self._status.set(Status.STATUS_STARTING)
|
||||
self._tdeSubProcess = TdeSubProcess.start(cmdLine) # TODO: verify process is running
|
||||
# self._tdeSubProcess = TdeSubProcess.start(cmdLine) # TODO: verify process is running
|
||||
|
||||
self._ipcQueue = Queue() # type: Queue
|
||||
self._thread = threading.Thread( # First thread captures server OUTPUT
|
||||
target=self.svcOutputReader,
|
||||
args=(self._tdeSubProcess.getStdOut(), self._ipcQueue, logDir))
|
||||
args=(subProc.getStdOut(), self._ipcQueue, logDir))
|
||||
self._thread.daemon = True # thread dies with the program
|
||||
self._thread.start()
|
||||
time.sleep(0.01)
|
||||
|
@ -680,7 +733,7 @@ class ServiceManagerThread:
|
|||
|
||||
self._thread2 = threading.Thread( # 2nd thread captures server ERRORs
|
||||
target=self.svcErrorReader,
|
||||
args=(self._tdeSubProcess.getStdErr(), self._ipcQueue, logDir))
|
||||
args=(subProc.getStdErr(), self._ipcQueue, logDir))
|
||||
self._thread2.daemon = True # thread dies with the program
|
||||
self._thread2.start()
|
||||
time.sleep(0.01)
|
||||
|
@ -695,14 +748,14 @@ class ServiceManagerThread:
|
|||
Progress.emit(Progress.SERVICE_START_NAP)
|
||||
# print("_zz_", end="", flush=True)
|
||||
if self._status.isRunning():
|
||||
Logging.info("[] TDengine service READY to process requests")
|
||||
Logging.info("[] TAOS service started: {}".format(self))
|
||||
Logging.info("[] TDengine service READY to process requests: pid={}".format(subProc.getPid()))
|
||||
# Logging.info("[] TAOS service started: {}".format(self))
|
||||
# self._verifyDnode(self._tInst) # query and ensure dnode is ready
|
||||
# Logging.debug("[] TAOS Dnode verified: {}".format(self))
|
||||
return # now we've started
|
||||
# TODO: handle failure-to-start better?
|
||||
self.procIpcBatch(100, True) # display output before cronking out, trim to last 20 msgs, force output
|
||||
raise RuntimeError("TDengine service did not start successfully: {}".format(self))
|
||||
raise RuntimeError("TDengine service DID NOT achieve READY status: pid={}".format(subProc.getPid()))
|
||||
|
||||
def _verifyDnode(self, tInst: TdeInstance):
|
||||
dbc = DbConn.createNative(tInst.getDbTarget())
|
||||
|
@ -722,29 +775,23 @@ class ServiceManagerThread:
|
|||
break
|
||||
if not isValid:
|
||||
print("Failed to start dnode, sleep for a while")
|
||||
time.sleep(600)
|
||||
time.sleep(10.0)
|
||||
raise RuntimeError("Failed to start Dnode, expected port not found: {}".
|
||||
format(tInst.getPort()))
|
||||
dbc.close()
|
||||
|
||||
def stop(self):
|
||||
# can be called from both main thread or signal handler
|
||||
Logging.info("Terminating TDengine service running as the sub process...")
|
||||
if self.getStatus().isStopped():
|
||||
Logging.info("Service already stopped")
|
||||
return
|
||||
if self.getStatus().isStopping():
|
||||
Logging.info("Service is already being stopped, pid: {}".format(self._tdeSubProcess.getPid()))
|
||||
return
|
||||
|
||||
# Linux will send Control-C generated SIGINT to the TDengine process
|
||||
# already, ref:
|
||||
# https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
|
||||
if not self._tdeSubProcess:
|
||||
raise RuntimeError("sub process object missing")
|
||||
# if not self._tdeSubProcess:
|
||||
# raise RuntimeError("sub process object missing")
|
||||
|
||||
self._status.set(Status.STATUS_STOPPING)
|
||||
TdeSubProcess.stop(self._tdeSubProcess) # must stop, no matter what
|
||||
self._tdeSubProcess = None
|
||||
# self._status.set(Status.STATUS_STOPPING)
|
||||
# TdeSubProcess.stop(self._tdeSubProcess) # must stop, no matter what
|
||||
# self._tdeSubProcess = None
|
||||
# if not self._tdeSubProcess.stop(): # everything withing
|
||||
# if self._tdeSubProcess.isRunning(): # still running, should now never happen
|
||||
# Logging.error("FAILED to stop sub process, it is still running... pid = {}".format(
|
||||
|
@ -757,29 +804,28 @@ class ServiceManagerThread:
|
|||
outputLines = 10 # for last output
|
||||
if self.getStatus().isStopped():
|
||||
self.procIpcBatch(outputLines) # one last time
|
||||
Logging.debug("End of TDengine Service Output: {}".format(self))
|
||||
Logging.debug("End of TDengine Service Output")
|
||||
Logging.info("----- TDengine Service (managed by SMT) is now terminated -----\n")
|
||||
else:
|
||||
print("WARNING: SMT did not terminate as expected: {}".format(self))
|
||||
print("WARNING: SMT did not terminate as expected")
|
||||
|
||||
def join(self):
|
||||
# TODO: sanity check
|
||||
if not self.getStatus().isStopping():
|
||||
s = self.getStatus()
|
||||
if s.isStopping() or s.isStopped(): # we may be stopping ourselves, or have been stopped/killed by others
|
||||
if self._thread or self._thread2 :
|
||||
if self._thread:
|
||||
self._thread.join()
|
||||
self._thread = None
|
||||
if self._thread2: # STD ERR thread
|
||||
self._thread2.join()
|
||||
self._thread2 = None
|
||||
else:
|
||||
Logging.warning("Joining empty thread, doing nothing")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"SMT.Join(): Unexpected status: {}".format(self._status))
|
||||
|
||||
if self._thread or self._thread2 :
|
||||
if self._thread:
|
||||
self._thread.join()
|
||||
self._thread = None
|
||||
if self._thread2: # STD ERR thread
|
||||
self._thread2.join()
|
||||
self._thread2 = None
|
||||
else:
|
||||
print("Joining empty thread, doing nothing")
|
||||
|
||||
self._status.set(Status.STATUS_STOPPED)
|
||||
|
||||
def _trimQueue(self, targetSize):
|
||||
if targetSize <= 0:
|
||||
return # do nothing
|
||||
|
@ -798,6 +844,10 @@ class ServiceManagerThread:
|
|||
TD_READY_MSG = "TDengine is initialized successfully"
|
||||
|
||||
def procIpcBatch(self, trimToTarget=0, forceOutput=False):
|
||||
'''
|
||||
Process a batch of STDOUT/STDERR data, until we read EMPTY from
|
||||
the pipe.
|
||||
'''
|
||||
self._trimQueue(trimToTarget) # trim if necessary
|
||||
# Process all the output generated by the underlying sub process,
|
||||
# managed by IO thread
|
||||
|
@ -887,7 +937,8 @@ class ServiceManagerThread:
|
|||
|
||||
# queue.put(line)
|
||||
# meaning sub process must have died
|
||||
Logging.info("EOF for TDengine STDOUT: {}".format(self))
|
||||
Logging.info("EOF found TDengine STDOUT, marking the process as terminated")
|
||||
self.setStatus(Status.STATUS_STOPPED)
|
||||
out.close() # Close the stream
|
||||
fOut.close() # Close the output file
|
||||
|
||||
|
@ -898,6 +949,6 @@ class ServiceManagerThread:
|
|||
for line in iter(err.readline, b''):
|
||||
fErr.write(line)
|
||||
Logging.info("TDengine STDERR: {}".format(line))
|
||||
Logging.info("EOF for TDengine STDERR: {}".format(self))
|
||||
Logging.info("EOF for TDengine STDERR")
|
||||
err.close()
|
||||
fErr.close()
|
|
@ -1,15 +1,29 @@
|
|||
from __future__ import annotations
|
||||
import argparse
|
||||
from typing import Optional
|
||||
|
||||
gConfig: argparse.Namespace
|
||||
from crash_gen.misc import CrashGenError
|
||||
|
||||
# gConfig: Optional[argparse.Namespace]
|
||||
|
||||
class Settings:
|
||||
_config = None # type Optional[argparse.Namespace]
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
global gConfig
|
||||
gConfig = []
|
||||
cls._config = None
|
||||
|
||||
@classmethod
|
||||
def setConfig(cls, config):
|
||||
global gConfig
|
||||
gConfig = config
|
||||
def setConfig(cls, config: argparse.Namespace):
|
||||
cls._config = config
|
||||
|
||||
@classmethod
|
||||
# TODO: check items instead of exposing everything
|
||||
def getConfig(cls) -> argparse.Namespace:
|
||||
if cls._config is None:
|
||||
raise CrashGenError("invalid state")
|
||||
return cls._config
|
||||
|
||||
@classmethod
|
||||
def clearConfig(cls):
|
||||
cls._config = None
|
|
@ -0,0 +1,5 @@
|
|||
from typing import Any, List, NewType
|
||||
|
||||
DirPath = NewType('DirPath', str)
|
||||
|
||||
QueryResult = NewType('QueryResult', List[List[Any]])
|
Loading…
Reference in New Issue