fix(stream): fix sma error.
This commit is contained in:
parent
9d8f6f37fe
commit
402c091df0
|
@ -267,11 +267,11 @@ typedef struct SCheckpointInfo {
|
||||||
|
|
||||||
typedef struct SStreamStatus {
|
typedef struct SStreamStatus {
|
||||||
int8_t taskStatus;
|
int8_t taskStatus;
|
||||||
int8_t checkDownstream;
|
int8_t checkDownstream; // downstream tasks are all ready now, if this flag is set
|
||||||
int8_t schedStatus;
|
int8_t schedStatus;
|
||||||
int8_t keepTaskStatus;
|
int8_t keepTaskStatus;
|
||||||
bool transferState;
|
bool transferState;
|
||||||
TdThreadMutex lock;
|
int8_t timerActive; // timer is active
|
||||||
} SStreamStatus;
|
} SStreamStatus;
|
||||||
|
|
||||||
typedef struct SHistDataRange {
|
typedef struct SHistDataRange {
|
||||||
|
|
|
@ -168,16 +168,53 @@ void tqNotifyClose(STQ* pTq) {
|
||||||
}
|
}
|
||||||
|
|
||||||
SStreamTask* pTask = *(SStreamTask**)pIter;
|
SStreamTask* pTask = *(SStreamTask**)pIter;
|
||||||
tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
|
tqDebug("vgId:%d s-task:%s set closing flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
|
||||||
pTask->status.taskStatus = TASK_STATUS__STOP;
|
pTask->status.taskStatus = TASK_STATUS__STOP;
|
||||||
|
|
||||||
int64_t st = taosGetTimestampMs();
|
int64_t st = taosGetTimestampMs();
|
||||||
qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
|
qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
|
||||||
|
|
||||||
int64_t el = taosGetTimestampMs() - st;
|
int64_t el = taosGetTimestampMs() - st;
|
||||||
tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
|
tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
|
||||||
}
|
}
|
||||||
|
|
||||||
taosWUnLockLatch(&pTq->pStreamMeta->lock);
|
taosWUnLockLatch(&pTq->pStreamMeta->lock);
|
||||||
|
|
||||||
|
tqDebug("vgId:%d start to check all tasks", pTq->pStreamMeta->vgId);
|
||||||
|
|
||||||
|
int64_t st = taosGetTimestampMs();
|
||||||
|
while(1) {
|
||||||
|
taosMsleep(1000);
|
||||||
|
|
||||||
|
bool inTimer = false;
|
||||||
|
|
||||||
|
taosWLockLatch(&pTq->pStreamMeta->lock);
|
||||||
|
pIter = NULL;
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter);
|
||||||
|
if (pIter == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SStreamTask* pTask = *(SStreamTask**)pIter;
|
||||||
|
if (pTask->status.timerActive == 1) {
|
||||||
|
inTimer = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
taosWUnLockLatch(&pTq->pStreamMeta->lock);
|
||||||
|
|
||||||
|
if (inTimer) {
|
||||||
|
tqDebug("vgId:%d some tasks in timer, wait for 1sec and recheck", pTq->pStreamMeta->vgId);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t el = taosGetTimestampMs() - st;
|
||||||
|
tqDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%"PRId64" ms", pTq->pStreamMeta->vgId, el);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -280,18 +280,53 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
|
void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
|
||||||
taosWLockLatch(&pMeta->lock);
|
SStreamTask* pTask = NULL;
|
||||||
|
|
||||||
|
// pre-delete operation
|
||||||
|
taosWLockLatch(&pMeta->lock);
|
||||||
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
|
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
|
||||||
if (ppTask) {
|
if (ppTask) {
|
||||||
SStreamTask* pTask = *ppTask;
|
pTask = *ppTask;
|
||||||
|
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
|
||||||
|
} else {
|
||||||
|
qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId);
|
||||||
|
taosWUnLockLatch(&pMeta->lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
taosWUnLockLatch(&pMeta->lock);
|
||||||
|
|
||||||
|
qDebug("s-task:0x%x set task status:%s", taskId, streamGetTaskStatusStr(TASK_STATUS__DROPPING));
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
taosRLockLatch(&pMeta->lock);
|
||||||
|
ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
|
||||||
|
|
||||||
|
if (ppTask) {
|
||||||
|
if ((*ppTask)->status.timerActive == 0) {
|
||||||
|
taosRUnLockLatch(&pMeta->lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
taosMsleep(10);
|
||||||
|
qDebug("s-task:%s wait for quit from timer", (*ppTask)->id.idStr);
|
||||||
|
taosRUnLockLatch(&pMeta->lock);
|
||||||
|
} else {
|
||||||
|
taosRUnLockLatch(&pMeta->lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// let's do delete of stream task
|
||||||
|
taosWLockLatch(&pMeta->lock);
|
||||||
|
ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
|
||||||
|
if (ppTask) {
|
||||||
taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t));
|
taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t));
|
||||||
tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn);
|
tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn);
|
||||||
|
|
||||||
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
|
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
|
||||||
int32_t num = taosArrayGetSize(pMeta->pTaskList);
|
ASSERT(pTask->status.timerActive == 0);
|
||||||
|
|
||||||
|
int32_t num = taosArrayGetSize(pMeta->pTaskList);
|
||||||
qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1);
|
qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1);
|
||||||
for (int32_t i = 0; i < num; ++i) {
|
for (int32_t i = 0; i < num; ++i) {
|
||||||
int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i);
|
int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i);
|
||||||
|
|
|
@ -446,20 +446,54 @@ static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) {
|
||||||
streamTaskCheckDownstreamTasks(pHTask);
|
streamTaskCheckDownstreamTasks(pHTask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct SStreamTaskRetryInfo {
|
||||||
|
SStreamMeta* pMeta;
|
||||||
|
int32_t taskId;
|
||||||
|
} SStreamTaskRetryInfo;
|
||||||
|
|
||||||
static void tryLaunchHistoryTask(void* param, void* tmrId) {
|
static void tryLaunchHistoryTask(void* param, void* tmrId) {
|
||||||
SStreamTask* pTask = param;
|
SStreamTaskRetryInfo* pInfo = param;
|
||||||
|
SStreamMeta* pMeta = pInfo->pMeta;
|
||||||
|
|
||||||
SStreamMeta* pMeta = pTask->pMeta;
|
qDebug("s-task:0x%x in timer to launch history task", pInfo->taskId);
|
||||||
SStreamTask** pHTask = taosHashGet(pMeta->pTasks, &pTask->historyTaskId.taskId, sizeof(pTask->historyTaskId.taskId));
|
|
||||||
if (pHTask == NULL) {
|
|
||||||
qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it is not built yet", pTask->id.idStr,
|
|
||||||
pMeta->vgId, pTask->historyTaskId.taskId);
|
|
||||||
|
|
||||||
taosTmrReset(tryLaunchHistoryTask, 100, pTask, streamEnv.timer, &pTask->timer);
|
taosWLockLatch(&pMeta->lock);
|
||||||
return;
|
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &pInfo->taskId, sizeof(int32_t));
|
||||||
|
if (ppTask) {
|
||||||
|
ASSERT((*ppTask)->status.timerActive == 1);
|
||||||
|
if (streamTaskShouldStop(&(*ppTask)->status)) {
|
||||||
|
qDebug("s-task:%s status:%s quit timer task", (*ppTask)->id.idStr,
|
||||||
|
streamGetTaskStatusStr((*ppTask)->status.taskStatus));
|
||||||
|
(*ppTask)->status.timerActive = 0;
|
||||||
|
taosWUnLockLatch(&pMeta->lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
taosWUnLockLatch(&pMeta->lock);
|
||||||
|
|
||||||
doCheckDownstreamStatus(pTask, *pHTask);
|
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->taskId);
|
||||||
|
if (pTask != NULL) {
|
||||||
|
ASSERT(pTask->status.timerActive == 1);
|
||||||
|
|
||||||
|
// abort the timer if intend to stop task
|
||||||
|
SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.taskId);
|
||||||
|
if (pHTask == NULL && pTask->status.taskStatus == TASK_STATUS__NORMAL) {
|
||||||
|
qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it may not be built or have been destroyed",
|
||||||
|
pTask->id.idStr, pMeta->vgId, pTask->historyTaskId.taskId);
|
||||||
|
|
||||||
|
taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->timer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
doCheckDownstreamStatus(pTask, pHTask);
|
||||||
|
|
||||||
|
// not in timer anymore
|
||||||
|
pTask->status.timerActive = 0;
|
||||||
|
streamMetaReleaseTask(pMeta, pHTask);
|
||||||
|
streamMetaReleaseTask(pMeta, pTask);
|
||||||
|
} else {
|
||||||
|
qError("s-task:0x%x failed to load task", pInfo->taskId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo fix the bug: 2. race condition
|
// todo fix the bug: 2. race condition
|
||||||
|
@ -474,9 +508,16 @@ int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask) {
|
||||||
pMeta->vgId, pTask->historyTaskId.taskId);
|
pMeta->vgId, pTask->historyTaskId.taskId);
|
||||||
|
|
||||||
if (pTask->timer == NULL) {
|
if (pTask->timer == NULL) {
|
||||||
pTask->timer = taosTmrStart(tryLaunchHistoryTask, 100, pTask, streamEnv.timer);
|
SStreamTaskRetryInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamTaskRetryInfo));
|
||||||
|
pInfo->taskId = pTask->id.taskId;
|
||||||
|
pInfo->pMeta = pTask->pMeta;
|
||||||
|
|
||||||
|
pTask->timer = taosTmrStart(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer);
|
||||||
if (pTask->timer == NULL) {
|
if (pTask->timer == NULL) {
|
||||||
// todo failed to create timer
|
// todo failed to create timer
|
||||||
|
} else {
|
||||||
|
pTask->status.timerActive = 1; // timer is active
|
||||||
|
qDebug("s-task:%s set time active flag", pTask->id.idStr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -602,6 +643,7 @@ int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishRe
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// todo handle race condition, this task may be destroyed
|
||||||
void streamPrepareNdoCheckDownstream(SStreamTask* pTask) {
|
void streamPrepareNdoCheckDownstream(SStreamTask* pTask) {
|
||||||
if (pTask->info.fillHistory) {
|
if (pTask->info.fillHistory) {
|
||||||
qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
|
qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
|
||||||
|
|
|
@ -22,7 +22,7 @@ class TDTestCase:
|
||||||
def init(self, conn, logSql, replicaVar=1):
|
def init(self, conn, logSql, replicaVar=1):
|
||||||
self.replicaVar = int(replicaVar)
|
self.replicaVar = int(replicaVar)
|
||||||
tdLog.debug("start to execute %s" % __file__)
|
tdLog.debug("start to execute %s" % __file__)
|
||||||
tdSql.init(conn.cursor())
|
tdSql.init(conn.cursor(), True)
|
||||||
self.dbname = 'db'
|
self.dbname = 'db'
|
||||||
self.setsql = TDSetSql()
|
self.setsql = TDSetSql()
|
||||||
self.stbname = 'stb'
|
self.stbname = 'stb'
|
||||||
|
@ -76,8 +76,8 @@ class TDTestCase:
|
||||||
tdSql.execute(f'drop database {self.dbname}')
|
tdSql.execute(f'drop database {self.dbname}')
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.topic_name_check()
|
# self.topic_name_check()
|
||||||
self.db_name_check()
|
# self.db_name_check()
|
||||||
self.stream_name_check()
|
self.stream_name_check()
|
||||||
def stop(self):
|
def stop(self):
|
||||||
tdSql.close()
|
tdSql.close()
|
||||||
|
|
Loading…
Reference in New Issue