From e010d35839f318bcb6ac0297a1d22c001d96d4a1 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 14 Feb 2023 14:12:17 +0800 Subject: [PATCH 1/3] fix: remove code --- tools/shell/src/shellAuto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/shell/src/shellAuto.c b/tools/shell/src/shellAuto.c index 6e50a97c02..0492a76f8d 100644 --- a/tools/shell/src/shellAuto.c +++ b/tools/shell/src/shellAuto.c @@ -640,7 +640,7 @@ bool shellAutoInit() { void shellSetConn(TAOS* conn) { varCon = conn; // init database and stable - updateTireValue(WT_VAR_DBNAME, false); + //updateTireValue(WT_VAR_DBNAME, false); } // exit shell auto funciton, shell exit call once @@ -1977,7 +1977,7 @@ void callbackAutoTab(char* sqlstr, TAOS* pSql, bool usedb) { if (dealUseDB(sql)) { // change to new db - updateTireValue(WT_VAR_STABLE, false); + //updateTireValue(WT_VAR_STABLE, false); return; } From 957ed637b042f19c407c661d375193f933092bae Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 15 Feb 2023 16:49:21 +0800 Subject: [PATCH 2/3] fix: rsp type mismatch after link broken is processed --- source/libs/scheduler/inc/schInt.h | 1 + source/libs/scheduler/src/schRemote.c | 8 ++++---- source/libs/scheduler/src/schTask.c | 5 ++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index e8216fcd7c..14eb21565b 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -230,6 +230,7 @@ typedef struct SSchTask { SSchRedirectCtx redirectCtx; // task redirect context bool waitRetry; // wait for retry int32_t execId; // task current execute index + int32_t failedExecId; // last failed task execute index SSchLevel *level; // level SRWLatch planLock; // task update plan lock SSubplan *plan; // subplan diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index b6de9383d7..9c4ed65dd2 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -34,12 +34,12 @@ int32_t schValidateRspMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgType) { if (lastMsgType != reqMsgType) { SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + SCH_ERR_RET(TSDB_CODE_QW_MSG_ERROR); } if (taskStatus != JOB_TASK_STATUS_PART_SUCC) { SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + SCH_ERR_RET(TSDB_CODE_QW_MSG_ERROR); } return TSDB_CODE_SUCCESS; @@ -60,13 +60,13 @@ int32_t schValidateRspMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgType) { if (lastMsgType != reqMsgType) { SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + SCH_ERR_RET(TSDB_CODE_QW_MSG_ERROR); } if (taskStatus != JOB_TASK_STATUS_EXEC) { SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + SCH_ERR_RET(TSDB_CODE_QW_MSG_ERROR); } return TSDB_CODE_SUCCESS; diff --git a/source/libs/scheduler/src/schTask.c b/source/libs/scheduler/src/schTask.c index 8e60222ca6..bdab739327 100644 --- a/source/libs/scheduler/src/schTask.c +++ b/source/libs/scheduler/src/schTask.c @@ -64,6 +64,7 @@ int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel * pTask->plan = pPlan; pTask->level = pLevel; pTask->execId = -1; + pTask->failedExecId = -2; pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC; pTask->taskId = schGenTaskId(); @@ -166,7 +167,7 @@ int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, bool dropExecNode, v schUpdateTaskExecNode(pJob, pTask, handle, execId); - if ((execId != pTask->execId) || pTask->waitRetry) { // ignore it + if ((execId != pTask->execId || execId <= pTask->failedExecId) || pTask->waitRetry) { // ignore it SCH_TASK_DLOG("handle not updated since execId %d is already not current execId %d, waitRetry %d", execId, pTask->execId, pTask->waitRetry); SCH_ERR_RET(TSDB_CODE_SCH_IGNORE_ERROR); @@ -182,6 +183,8 @@ int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode) return TSDB_CODE_SCH_IGNORE_ERROR; } + pTask->failedExecId = pTask->execId; + int8_t jobStatus = 0; if (schJobNeedToStop(pJob, &jobStatus)) { SCH_TASK_DLOG("no more task failure processing cause of job status %s", jobTaskStatusStr(jobStatus)); From 892d6c61c9961f834962cf20ada0e73ae24e150e Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 16 Feb 2023 10:06:01 +0800 Subject: [PATCH 3/3] fix: task dropped error code --- source/libs/qworker/inc/qwInt.h | 2 ++ source/libs/qworker/src/qwUtil.c | 26 +++++++------------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index bde05d4116..f14df8e57c 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -206,6 +206,8 @@ typedef struct SQWorkerMgmt { int32_t paramIdx; } SQWorkerMgmt; +#define QW_CTX_NOT_EXISTS_ERR_CODE(mgmt) (atomic_load_8(&(mgmt)->nodeStopped) ? TSDB_CODE_VND_STOPPED : TSDB_CODE_QRY_TASK_CTX_NOT_EXIST) + #define QW_FPARAMS_DEF SQWorker *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, int64_t rId, int32_t eId #define QW_IDS() sId, qId, tId, rId, eId #define QW_FPARAMS() mgmt, QW_IDS() diff --git a/source/libs/qworker/src/qwUtil.c b/source/libs/qworker/src/qwUtil.c index 7ee7c50c96..a342e48cc1 100644 --- a/source/libs/qworker/src/qwUtil.c +++ b/source/libs/qworker/src/qwUtil.c @@ -213,15 +213,9 @@ int32_t qwAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { QW_SET_QTID(id, qId, tId, eId); *ctx = taosHashAcquire(mgmt->ctxHash, id, sizeof(id)); - int8_t nodeStopped = atomic_load_8(&mgmt->nodeStopped); if (NULL == (*ctx)) { - if (!nodeStopped) { - QW_TASK_DLOG_E("task ctx not exist, may be dropped"); - QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); - } else { - QW_TASK_DLOG_E("node stopped"); - QW_ERR_RET(TSDB_CODE_VND_STOPPED); - } + QW_TASK_DLOG_E("acquired task ctx not exist, may be dropped"); + QW_ERR_RET(QW_CTX_NOT_EXISTS_ERR_CODE(mgmt)); } return TSDB_CODE_SUCCESS; @@ -232,16 +226,9 @@ int32_t qwGetTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { QW_SET_QTID(id, qId, tId, eId); *ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id)); - int8_t nodeStopped = atomic_load_8(&mgmt->nodeStopped); - if (NULL == (*ctx)) { - if (!nodeStopped) { - QW_TASK_DLOG_E("task ctx not exist, may be dropped"); - QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); - } else { - QW_TASK_DLOG_E("node stopped"); - QW_ERR_RET(TSDB_CODE_VND_STOPPED); - } + QW_TASK_DLOG_E("get task ctx not exist, may be dropped"); + QW_ERR_RET(QW_CTX_NOT_EXISTS_ERR_CODE(mgmt)); } return TSDB_CODE_SUCCESS; @@ -334,7 +321,8 @@ int32_t qwDropTaskCtx(QW_FPARAMS_DEF) { SQWTaskCtx *ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id)); if (NULL == ctx) { - QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); + QW_TASK_DLOG_E("drop task ctx not exist, may be dropped"); + QW_ERR_RET(QW_CTX_NOT_EXISTS_ERR_CODE(mgmt)); } octx = *ctx; @@ -346,7 +334,7 @@ int32_t qwDropTaskCtx(QW_FPARAMS_DEF) { if (taosHashRemove(mgmt->ctxHash, id, sizeof(id))) { QW_TASK_ELOG_E("taosHashRemove from ctx hash failed"); - QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST); + QW_ERR_RET(QW_CTX_NOT_EXISTS_ERR_CODE(mgmt)); } qwFreeTaskCtx(&octx);