rescheduler timeout task
This commit is contained in:
parent
f6c6083aad
commit
b6e60082ae
|
@ -564,7 +564,8 @@ int32_t* taosGetErrno();
|
||||||
#define TSDB_CODE_SCH_STATUS_ERROR TAOS_DEF_ERROR_CODE(0, 0x2501)
|
#define TSDB_CODE_SCH_STATUS_ERROR TAOS_DEF_ERROR_CODE(0, 0x2501)
|
||||||
#define TSDB_CODE_SCH_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x2502)
|
#define TSDB_CODE_SCH_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x2502)
|
||||||
#define TSDB_CODE_SCH_IGNORE_ERROR TAOS_DEF_ERROR_CODE(0, 0x2503)
|
#define TSDB_CODE_SCH_IGNORE_ERROR TAOS_DEF_ERROR_CODE(0, 0x2503)
|
||||||
#define TSDB_CODE_QW_MSG_ERROR TAOS_DEF_ERROR_CODE(0, 0x2504)
|
#define TSDB_CODE_SCH_TIMEOUT_ERROR TAOS_DEF_ERROR_CODE(0, 0x2504)
|
||||||
|
#define TSDB_CODE_QW_MSG_ERROR TAOS_DEF_ERROR_CODE(0, 0x2550)
|
||||||
|
|
||||||
//parser
|
//parser
|
||||||
#define TSDB_CODE_PAR_SYNTAX_ERROR TAOS_DEF_ERROR_CODE(0, 0x2600)
|
#define TSDB_CODE_PAR_SYNTAX_ERROR TAOS_DEF_ERROR_CODE(0, 0x2600)
|
||||||
|
|
|
@ -131,7 +131,6 @@ void destroyTscObj(void *pObj) {
|
||||||
hbDeregisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey);
|
hbDeregisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey);
|
||||||
atomic_sub_fetch_64(&pTscObj->pAppInfo->numOfConns, 1);
|
atomic_sub_fetch_64(&pTscObj->pAppInfo->numOfConns, 1);
|
||||||
closeAllRequests(pTscObj->pRequests);
|
closeAllRequests(pTscObj->pRequests);
|
||||||
schedulerStopTransport(pTscObj->pAppInfo->pTransporter);
|
|
||||||
tscDebug("connObj 0x%" PRIx64 " destroyed, totalConn:%" PRId64, pTscObj->id, pTscObj->pAppInfo->numOfConns);
|
tscDebug("connObj 0x%" PRIx64 " destroyed, totalConn:%" PRId64, pTscObj->id, pTscObj->pAppInfo->numOfConns);
|
||||||
taosThreadMutexDestroy(&pTscObj->mutex);
|
taosThreadMutexDestroy(&pTscObj->mutex);
|
||||||
taosMemoryFreeClear(pTscObj);
|
taosMemoryFreeClear(pTscObj);
|
||||||
|
|
|
@ -66,10 +66,11 @@ void taos_cleanup(void) {
|
||||||
|
|
||||||
hbMgrCleanUp();
|
hbMgrCleanUp();
|
||||||
|
|
||||||
rpcCleanup();
|
|
||||||
catalogDestroy();
|
catalogDestroy();
|
||||||
schedulerDestroy();
|
schedulerDestroy();
|
||||||
|
|
||||||
|
rpcCleanup();
|
||||||
|
|
||||||
tscInfo("all local resources released");
|
tscInfo("all local resources released");
|
||||||
taosCleanupCfg();
|
taosCleanupCfg();
|
||||||
taosCloseLog();
|
taosCloseLog();
|
||||||
|
|
|
@ -274,7 +274,7 @@ int32_t qWorkerPreprocessQueryMsg(void *qWorkerMgmt, SRpcMsg *pMsg) {
|
||||||
uint64_t tId = msg->taskId;
|
uint64_t tId = msg->taskId;
|
||||||
int64_t rId = msg->refId;
|
int64_t rId = msg->refId;
|
||||||
|
|
||||||
SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info};
|
SQWMsg qwMsg = {.msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info};
|
||||||
|
|
||||||
QW_SCH_TASK_DLOG("prerocessQuery start, handle:%p", pMsg->info.handle);
|
QW_SCH_TASK_DLOG("prerocessQuery start, handle:%p", pMsg->info.handle);
|
||||||
QW_ERR_RET(qwPrerocessQuery(QW_FPARAMS(), &qwMsg));
|
QW_ERR_RET(qwPrerocessQuery(QW_FPARAMS(), &qwMsg));
|
||||||
|
|
|
@ -163,9 +163,11 @@ typedef struct SSchTaskProfile {
|
||||||
|
|
||||||
typedef struct SSchTask {
|
typedef struct SSchTask {
|
||||||
uint64_t taskId; // task id
|
uint64_t taskId; // task id
|
||||||
int32_t execIdx; // task current execute try index
|
|
||||||
SRWLatch lock; // task lock
|
SRWLatch lock; // task lock
|
||||||
|
int32_t maxExecTimes; // task may exec times
|
||||||
|
int32_t execIdx; // task current execute try index
|
||||||
SSchLevel *level; // level
|
SSchLevel *level; // level
|
||||||
|
SRWLatch planLock; // task update plan lock
|
||||||
SSubplan *plan; // subplan
|
SSubplan *plan; // subplan
|
||||||
char *msg; // operator tree
|
char *msg; // operator tree
|
||||||
int32_t msgLen; // msg length
|
int32_t msgLen; // msg length
|
||||||
|
@ -233,23 +235,36 @@ extern SSchedulerMgmt schMgmt;
|
||||||
#define SCH_LOG_TASK_START_TS(_task) \
|
#define SCH_LOG_TASK_START_TS(_task) \
|
||||||
do { \
|
do { \
|
||||||
int64_t us = taosGetTimestampUs(); \
|
int64_t us = taosGetTimestampUs(); \
|
||||||
(_task)->profile.tryUseTime[(_task)->execIdx] = us; \
|
int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \
|
||||||
|
(_task)->profile.execUseTime[idx] = us; \
|
||||||
if (0 == (_task)->execIdx) { \
|
if (0 == (_task)->execIdx) { \
|
||||||
(_task)->profile.startTs = us; \
|
(_task)->profile.startTs = us; \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define SCH_LOG_TASK_WAIT_TS(_task) \
|
||||||
|
do { \
|
||||||
|
int64_t us = taosGetTimestampUs(); \
|
||||||
|
int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \
|
||||||
|
(_task)->profile.waitTime += us - (_task)->profile.execUseTime[idx]; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
#define SCH_LOG_TASK_END_TS(_task) \
|
#define SCH_LOG_TASK_END_TS(_task) \
|
||||||
do { \
|
do { \
|
||||||
int64_t us = taosGetTimestampUs(); \
|
int64_t us = taosGetTimestampUs(); \
|
||||||
(_task)->profile.tryUseTime[(_task)->execIdx] = us - (_task)->profile.tryUseTime[(_task)->execIdx]; \
|
int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \
|
||||||
|
(_task)->profile.execUseTime[idx] = us - (_task)->profile.execUseTime[idx]; \
|
||||||
(_task)->profile.endTs = us; \
|
(_task)->profile.endTs = us; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.tryUseTime[(_task)->execIdx]) > (_taks)->timeoutUsec)
|
#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.execUseTime[(_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES]) > (_task)->timeoutUsec)
|
||||||
|
|
||||||
#define SCH_TASK_READY_FOR_LAUNCH(readyNum, task) ((readyNum) >= taosArrayGetSize((task)->children))
|
#define SCH_TASK_READY_FOR_LAUNCH(readyNum, task) ((readyNum) >= taosArrayGetSize((task)->children))
|
||||||
|
|
||||||
|
#define SCH_LOCK_TASK(_task) SCH_LOCK(SCH_WRITE, &(_task)->lock)
|
||||||
|
#define SCH_UNLOCK_TASK(_task) SCH_UNLOCK(SCH_WRITE, &(_task)->lock)
|
||||||
|
|
||||||
#define SCH_TASK_ID(_task) ((_task) ? (_task)->taskId : -1)
|
#define SCH_TASK_ID(_task) ((_task) ? (_task)->taskId : -1)
|
||||||
#define SCH_SET_TASK_LASTMSG_TYPE(_task, _type) do { if(_task) { atomic_store_32(&(_task)->lastMsgType, _type); } } while (0)
|
#define SCH_SET_TASK_LASTMSG_TYPE(_task, _type) do { if(_task) { atomic_store_32(&(_task)->lastMsgType, _type); } } while (0)
|
||||||
#define SCH_GET_TASK_LASTMSG_TYPE(_task) ((_task) ? atomic_load_32(&(_task)->lastMsgType) : -1)
|
#define SCH_GET_TASK_LASTMSG_TYPE(_task) ((_task) ? atomic_load_32(&(_task)->lastMsgType) : -1)
|
||||||
|
@ -351,6 +366,7 @@ int32_t schAsyncExecJob(void *pTrans, SArray *pNodeList, SQueryPlan *pDag, int64
|
||||||
int32_t schFetchRows(SSchJob *pJob);
|
int32_t schFetchRows(SSchJob *pJob);
|
||||||
int32_t schAsyncFetchRows(SSchJob *pJob);
|
int32_t schAsyncFetchRows(SSchJob *pJob);
|
||||||
int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, int32_t msgType, void *handle, int32_t execIdx);
|
int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, int32_t msgType, void *handle, int32_t execIdx);
|
||||||
|
int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -29,6 +29,7 @@ int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *
|
||||||
pTask->plan = pPlan;
|
pTask->plan = pPlan;
|
||||||
pTask->level = pLevel;
|
pTask->level = pLevel;
|
||||||
pTask->execIdx = -1;
|
pTask->execIdx = -1;
|
||||||
|
pTask->maxExecTimes = SCH_TASK_MAX_EXEC_TIMES;
|
||||||
pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC;
|
pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC;
|
||||||
SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START);
|
SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START);
|
||||||
pTask->taskId = schGenTaskId();
|
pTask->taskId = schGenTaskId();
|
||||||
|
@ -142,6 +143,7 @@ void schDeregisterTaskHb(SSchJob *pJob, SSchTask *pTask) {
|
||||||
SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId));
|
SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId));
|
||||||
if (NULL == hb) {
|
if (NULL == hb) {
|
||||||
SCH_TASK_ELOG("nodeId %d fqdn %s port %d not in hb connections", epId.nodeId, epId.ep.fqdn, epId.ep.port);
|
SCH_TASK_ELOG("nodeId %d fqdn %s port %d not in hb connections", epId.nodeId, epId.ep.fqdn, epId.ep.port);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_sub_fetch_64(&hb->taskNum, 1);
|
atomic_sub_fetch_64(&hb->taskNum, 1);
|
||||||
|
@ -360,7 +362,7 @@ int32_t schRecordTaskSucceedNode(SSchJob *pJob, SSchTask *pTask) {
|
||||||
int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execIdx) {
|
int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execIdx) {
|
||||||
SSchNodeInfo nodeInfo = {.addr = *addr, .handle = NULL};
|
SSchNodeInfo nodeInfo = {.addr = *addr, .handle = NULL};
|
||||||
|
|
||||||
if (NULL == taosHashPut(pTask->execNodes, &execIdx, sizeof(execIdx), &nodeInfo, sizeof(nodeInfo))) {
|
if (taosHashPut(pTask->execNodes, &execIdx, sizeof(execIdx), &nodeInfo, sizeof(nodeInfo))) {
|
||||||
SCH_TASK_ELOG("taosHashPut nodeInfo to execNodes failed, errno:%d", errno);
|
SCH_TASK_ELOG("taosHashPut nodeInfo to execNodes failed, errno:%d", errno);
|
||||||
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||||
}
|
}
|
||||||
|
@ -384,7 +386,7 @@ int32_t schDropTaskExecNode(SSchJob *pJob, SSchTask *pTask, void *handle, int32_
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t schUpdateTaskExecNode(SSchTask *pTask, void *handle, int32_t execIdx) {
|
int32_t schUpdateTaskExecNode(SSchTask *pTask, void *handle, int32_t execIdx) {
|
||||||
if (taosArrayGetSize(pTask->execNodes) <= 0) {
|
if (taosHashGetSize(pTask->execNodes) <= 0) {
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -714,7 +716,17 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((pTask->execIdx + 1) >= SCH_TASK_MAX_EXEC_TIMES) {
|
if (TSDB_CODE_SCH_TIMEOUT_ERROR == errCode) {
|
||||||
|
pTask->maxExecTimes++;
|
||||||
|
if (pTask->timeoutUsec < SCH_MAX_TASK_TIMEOUT_USEC) {
|
||||||
|
pTask->timeoutUsec *= 2;
|
||||||
|
if (pTask->timeoutUsec > SCH_MAX_TASK_TIMEOUT_USEC) {
|
||||||
|
pTask->timeoutUsec = SCH_MAX_TASK_TIMEOUT_USEC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((pTask->execIdx + 1) >= pTask->maxExecTimes) {
|
||||||
*needRetry = false;
|
*needRetry = false;
|
||||||
SCH_TASK_DLOG("task no more retry since reach max try times, execIdx:%d", pTask->execIdx);
|
SCH_TASK_DLOG("task no more retry since reach max try times, execIdx:%d", pTask->execIdx);
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
|
@ -737,7 +749,7 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo
|
||||||
} else {
|
} else {
|
||||||
int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs);
|
int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs);
|
||||||
|
|
||||||
if ((pTask->candidateIdx + 1) >= candidateNum) {
|
if ((pTask->candidateIdx + 1) >= candidateNum && (TSDB_CODE_SCH_TIMEOUT_ERROR != errCode)) {
|
||||||
*needRetry = false;
|
*needRetry = false;
|
||||||
SCH_TASK_DLOG("task no more retry since all candiates tried, candidateIdx:%d, candidateNum:%d",
|
SCH_TASK_DLOG("task no more retry since all candiates tried, candidateIdx:%d, candidateNum:%d",
|
||||||
pTask->candidateIdx, candidateNum);
|
pTask->candidateIdx, candidateNum);
|
||||||
|
@ -767,7 +779,10 @@ int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) {
|
||||||
if (SCH_IS_DATA_SRC_TASK(pTask)) {
|
if (SCH_IS_DATA_SRC_TASK(pTask)) {
|
||||||
SCH_SWITCH_EPSET(&pTask->plan->execNode);
|
SCH_SWITCH_EPSET(&pTask->plan->execNode);
|
||||||
} else {
|
} else {
|
||||||
++pTask->candidateIdx;
|
int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs);
|
||||||
|
if (++pTask->candidateIdx >= candidateNum) {
|
||||||
|
pTask->candidateIdx = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SCH_ERR_RET(schLaunchTask(pJob, pTask));
|
SCH_ERR_RET(schLaunchTask(pJob, pTask));
|
||||||
|
@ -942,7 +957,11 @@ void schProcessOnDataFetched(SSchJob *job) {
|
||||||
int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode) {
|
int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode) {
|
||||||
int8_t status = 0;
|
int8_t status = 0;
|
||||||
|
|
||||||
|
if (errCode == TSDB_CODE_SCH_TIMEOUT_ERROR) {
|
||||||
|
SCH_LOG_TASK_WAIT_TS(pTask);
|
||||||
|
} else {
|
||||||
SCH_LOG_TASK_END_TS(pTask);
|
SCH_LOG_TASK_END_TS(pTask);
|
||||||
|
}
|
||||||
|
|
||||||
if (schJobNeedToStop(pJob, &status)) {
|
if (schJobNeedToStop(pJob, &status)) {
|
||||||
SCH_TASK_DLOG("task failed not processed cause of job status, job status:%s", jobTaskStatusStr(status));
|
SCH_TASK_DLOG("task failed not processed cause of job status, job status:%s", jobTaskStatusStr(status));
|
||||||
|
@ -1145,12 +1164,46 @@ int32_t schProcessOnExplainDone(SSchJob *pJob, SSchTask *pTask, SRetrieveTableRs
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void schDropTaskOnExecNode(SSchJob *pJob, SSchTask *pTask) {
|
||||||
|
if (NULL == pTask->execNodes) {
|
||||||
|
SCH_TASK_DLOG("no exec address, status:%s", SCH_GET_TASK_STATUS_STR(pTask));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t size = (int32_t)taosHashGetSize(pTask->execNodes);
|
||||||
|
|
||||||
|
if (size <= 0) {
|
||||||
|
SCH_TASK_DLOG("task has no execNodes, no need to drop it, status:%s", SCH_GET_TASK_STATUS_STR(pTask));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SSchNodeInfo *nodeInfo = taosHashIterate(pTask->execNodes, NULL);
|
||||||
|
while (nodeInfo) {
|
||||||
|
SCH_SET_TASK_HANDLE(pTask, nodeInfo->handle);
|
||||||
|
|
||||||
|
schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_VND_DROP_TASK);
|
||||||
|
|
||||||
|
nodeInfo = taosHashIterate(pTask->execNodes, nodeInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
SCH_TASK_DLOG("task has %d exec address", size);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) {
|
int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) {
|
||||||
if (SCH_IS_DATA_SRC_QRY_TASK(pTask)) {
|
if (SCH_IS_DATA_SRC_QRY_TASK(pTask)) {
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SCH_LOCK_TASK(pTask);
|
||||||
|
if (JOB_TASK_STATUS_EXECUTING == pTask->status && pJob->fetchTask != pTask) {
|
||||||
|
schDropTaskOnExecNode(pJob, pTask);
|
||||||
|
taosHashClear(pTask->execNodes);
|
||||||
|
schProcessOnTaskFailure(pJob, pTask, TSDB_CODE_SCH_TIMEOUT_ERROR);
|
||||||
|
}
|
||||||
|
SCH_UNLOCK_TASK(pTask);
|
||||||
|
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) {
|
int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) {
|
||||||
|
@ -1193,6 +1246,7 @@ int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) {
|
||||||
schReleaseJob(taskStatus->refId);
|
schReleaseJob(taskStatus->refId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1339,30 +1393,6 @@ int32_t schLaunchJob(SSchJob *pJob) {
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void schDropTaskOnExecNode(SSchJob *pJob, SSchTask *pTask) {
|
|
||||||
if (NULL == pTask->execNodes) {
|
|
||||||
SCH_TASK_DLOG("no exec address, status:%s", SCH_GET_TASK_STATUS_STR(pTask));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t size = (int32_t)taosHashGetSize(pTask->execNodes);
|
|
||||||
|
|
||||||
if (size <= 0) {
|
|
||||||
SCH_TASK_DLOG("task has no execNodes, no need to drop it, status:%s", SCH_GET_TASK_STATUS_STR(pTask));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
SSchNodeInfo *nodeInfo = taosHashIterate(pTask->execNodes, NULL);
|
|
||||||
while (nodeInfo) {
|
|
||||||
SCH_SET_TASK_HANDLE(pTask, nodeInfo->handle);
|
|
||||||
|
|
||||||
schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_VND_DROP_TASK);
|
|
||||||
|
|
||||||
nodeInfo = taosHashIterate(pTask->execNodes, nodeInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
SCH_TASK_DLOG("task has %d exec address", size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void schDropTaskInHashList(SSchJob *pJob, SHashObj *list) {
|
void schDropTaskInHashList(SSchJob *pJob, SHashObj *list) {
|
||||||
if (!SCH_IS_NEED_DROP_JOB(pJob)) {
|
if (!SCH_IS_NEED_DROP_JOB(pJob)) {
|
||||||
|
|
|
@ -92,8 +92,7 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch
|
||||||
int8_t status = 0;
|
int8_t status = 0;
|
||||||
|
|
||||||
if (schJobNeedToStop(pJob, &status)) {
|
if (schJobNeedToStop(pJob, &status)) {
|
||||||
SCH_TASK_ELOG("rsp not processed cause of job status, job status:%s, rspCode:0x%x", jobTaskStatusStr(status),
|
SCH_TASK_ELOG("rsp not processed cause of job status, job status:%s, rspCode:0x%x", jobTaskStatusStr(status), rspCode);
|
||||||
rspCode);
|
|
||||||
taosMemoryFreeClear(msg);
|
taosMemoryFreeClear(msg);
|
||||||
SCH_RET(atomic_load_32(&pJob->errCode));
|
SCH_RET(atomic_load_32(&pJob->errCode));
|
||||||
}
|
}
|
||||||
|
@ -364,6 +363,8 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in
|
||||||
|
|
||||||
SCH_ERR_JRET(schGetTaskInJob(pJob, pParam->taskId, &pTask));
|
SCH_ERR_JRET(schGetTaskInJob(pJob, pParam->taskId, &pTask));
|
||||||
|
|
||||||
|
SCH_LOCK_TASK(pTask);
|
||||||
|
|
||||||
SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode));
|
SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode));
|
||||||
|
|
||||||
if (pParam->execIdx != pTask->execIdx) {
|
if (pParam->execIdx != pTask->execIdx) {
|
||||||
|
@ -377,6 +378,10 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in
|
||||||
|
|
||||||
_return:
|
_return:
|
||||||
|
|
||||||
|
if (pTask) {
|
||||||
|
SCH_UNLOCK_TASK(pTask);
|
||||||
|
}
|
||||||
|
|
||||||
if (pJob) {
|
if (pJob) {
|
||||||
schReleaseJob(pParam->refId);
|
schReleaseJob(pParam->refId);
|
||||||
}
|
}
|
||||||
|
@ -667,7 +672,7 @@ int32_t schRegisterHbConnection(SSchJob *pJob, SSchTask *pTask, SQueryNodeEpId *
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) {
|
int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId, SArray* taskAction) {
|
||||||
SSchedulerHbReq req = {0};
|
SSchedulerHbReq req = {0};
|
||||||
int32_t code = 0;
|
int32_t code = 0;
|
||||||
SRpcCtx rpcCtx = {0};
|
SRpcCtx rpcCtx = {0};
|
||||||
|
|
|
@ -176,10 +176,6 @@ int32_t scheduleCancelJob(int64_t job) {
|
||||||
SCH_RET(code);
|
SCH_RET(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
void schedulerStopTransport(void *pTrans) {
|
|
||||||
// CLOSE && REMOVE RELATED HB CONNECTIONS
|
|
||||||
}
|
|
||||||
|
|
||||||
void schedulerFreeJob(int64_t job) {
|
void schedulerFreeJob(int64_t job) {
|
||||||
SSchJob *pJob = schAcquireJob(job);
|
SSchJob *pJob = schAcquireJob(job);
|
||||||
if (NULL == pJob) {
|
if (NULL == pJob) {
|
||||||
|
|
|
@ -444,6 +444,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_CTG_VG_META_MISMATCH, "table meta and vgroup
|
||||||
//scheduler
|
//scheduler
|
||||||
TAOS_DEFINE_ERROR(TSDB_CODE_SCH_STATUS_ERROR, "scheduler status error")
|
TAOS_DEFINE_ERROR(TSDB_CODE_SCH_STATUS_ERROR, "scheduler status error")
|
||||||
TAOS_DEFINE_ERROR(TSDB_CODE_SCH_INTERNAL_ERROR, "scheduler internal error")
|
TAOS_DEFINE_ERROR(TSDB_CODE_SCH_INTERNAL_ERROR, "scheduler internal error")
|
||||||
|
TAOS_DEFINE_ERROR(TSDB_CODE_SCH_TIMEOUT_ERROR, "Task timeout")
|
||||||
TAOS_DEFINE_ERROR(TSDB_CODE_QW_MSG_ERROR, "Invalid msg order")
|
TAOS_DEFINE_ERROR(TSDB_CODE_QW_MSG_ERROR, "Invalid msg order")
|
||||||
|
|
||||||
// parser
|
// parser
|
||||||
|
|
Loading…
Reference in New Issue