no refact
This commit is contained in:
parent
6e358fd42f
commit
af406ab31a
|
@ -47,7 +47,7 @@ option(
|
|||
option(
|
||||
BUILD_WITH_UV
|
||||
"If build with libuv"
|
||||
ON
|
||||
OFF
|
||||
)
|
||||
|
||||
option(
|
||||
|
|
|
@ -13,14 +13,16 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "catalog.h"
|
||||
#include "query.h"
|
||||
#include "schedulerInt.h"
|
||||
#include "tmsg.h"
|
||||
#include "query.h"
|
||||
#include "catalog.h"
|
||||
|
||||
static SSchedulerMgmt schMgmt = {0};
|
||||
|
||||
uint64_t schGenTaskId(void) { return atomic_add_fetch_64(&schMgmt.taskId, 1); }
|
||||
uint64_t schGenTaskId(void) {
|
||||
return atomic_add_fetch_64(&schMgmt.taskId, 1);
|
||||
}
|
||||
|
||||
uint64_t schGenUUID(void) {
|
||||
static uint64_t hashId = 0;
|
||||
|
@ -44,6 +46,7 @@ uint64_t schGenUUID(void) {
|
|||
return id;
|
||||
}
|
||||
|
||||
|
||||
int32_t schInitTask(SSchJob* pJob, SSchTask *pTask, SSubplan* pPlan, SSchLevel *pLevel) {
|
||||
pTask->plan = pPlan;
|
||||
pTask->level = pLevel;
|
||||
|
@ -78,6 +81,7 @@ void schFreeTask(SSchTask *pTask) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgType) {
|
||||
int32_t lastMsgType = atomic_load_32(&pTask->lastMsgType);
|
||||
|
||||
|
@ -93,10 +97,8 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m
|
|||
SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
|
||||
}
|
||||
|
||||
if (SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_EXECUTING &&
|
||||
SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_PARTIAL_SUCCEED) {
|
||||
SCH_TASK_ELOG("rsp msg conflicted with task status, status:%d, rspType:%d", SCH_GET_TASK_STATUS(pTask),
|
||||
msgType);
|
||||
if (SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_EXECUTING && SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_PARTIAL_SUCCEED) {
|
||||
SCH_TASK_ELOG("rsp msg conflicted with task status, status:%d, rspType:%d", SCH_GET_TASK_STATUS(pTask), msgType);
|
||||
SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
|
||||
}
|
||||
|
||||
|
@ -110,6 +112,7 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int32_t schCheckAndUpdateJobStatus(SSchJob *pJob, int8_t newStatus) {
|
||||
int32_t code = 0;
|
||||
|
||||
|
@ -136,16 +139,19 @@ int32_t schCheckAndUpdateJobStatus(SSchJob *pJob, int8_t newStatus) {
|
|||
|
||||
break;
|
||||
case JOB_TASK_STATUS_EXECUTING:
|
||||
if (newStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED && newStatus != JOB_TASK_STATUS_FAILED &&
|
||||
newStatus != JOB_TASK_STATUS_CANCELLING && newStatus != JOB_TASK_STATUS_CANCELLED &&
|
||||
newStatus != JOB_TASK_STATUS_DROPPING) {
|
||||
if (newStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED
|
||||
&& newStatus != JOB_TASK_STATUS_FAILED
|
||||
&& newStatus != JOB_TASK_STATUS_CANCELLING
|
||||
&& newStatus != JOB_TASK_STATUS_CANCELLED
|
||||
&& newStatus != JOB_TASK_STATUS_DROPPING) {
|
||||
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
|
||||
}
|
||||
|
||||
break;
|
||||
case JOB_TASK_STATUS_PARTIAL_SUCCEED:
|
||||
if (newStatus != JOB_TASK_STATUS_FAILED && newStatus != JOB_TASK_STATUS_SUCCEED &&
|
||||
newStatus != JOB_TASK_STATUS_DROPPING) {
|
||||
if (newStatus != JOB_TASK_STATUS_FAILED
|
||||
&& newStatus != JOB_TASK_STATUS_SUCCEED
|
||||
&& newStatus != JOB_TASK_STATUS_DROPPING) {
|
||||
SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
|
||||
}
|
||||
|
||||
|
@ -185,6 +191,7 @@ _return:
|
|||
SCH_ERR_RET(code);
|
||||
}
|
||||
|
||||
|
||||
int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) {
|
||||
for (int32_t i = 0; i < pJob->levelNum; ++i) {
|
||||
SSchLevel *pLevel = taosArrayGet(pJob->levels, i);
|
||||
|
@ -267,6 +274,7 @@ int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int32_t schRecordTaskSucceedNode(SSchTask *pTask) {
|
||||
SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, atomic_load_8(&pTask->candidateIdx));
|
||||
|
||||
|
@ -277,6 +285,7 @@ int32_t schRecordTaskSucceedNode(SSchTask *pTask) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr) {
|
||||
if (NULL == taosArrayPush(pTask->execAddrs, addr)) {
|
||||
SCH_TASK_ELOG("taosArrayPush addr to execAddr list failed, errno:%d", errno);
|
||||
|
@ -286,6 +295,7 @@ int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *ad
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int32_t schValidateAndBuildJob(SQueryDag *pDag, SSchJob *pJob) {
|
||||
int32_t code = 0;
|
||||
pJob->queryId = pDag->queryId;
|
||||
|
@ -301,10 +311,7 @@ int32_t schValidateAndBuildJob(SQueryDag *pDag, SSchJob *pJob) {
|
|||
SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
|
||||
}
|
||||
|
||||
SHashObj *planToTask = taosHashInit(
|
||||
SCHEDULE_DEFAULT_TASK_NUMBER,
|
||||
taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false,
|
||||
HASH_NO_LOCK);
|
||||
SHashObj *planToTask = taosHashInit(SCHEDULE_DEFAULT_TASK_NUMBER, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
|
||||
if (NULL == planToTask) {
|
||||
SCH_JOB_ELOG("taosHashInit %d failed", SCHEDULE_DEFAULT_TASK_NUMBER);
|
||||
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||
|
@ -521,6 +528,7 @@ int32_t schMoveTaskToFailList(SSchJob *pJob, SSchTask *pTask, bool *moved) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int32_t schMoveTaskToExecList(SSchJob *pJob, SSchTask *pTask, bool *moved) {
|
||||
if (0 != taosHashRemove(pJob->succTasks, &pTask->taskId, sizeof(pTask->taskId))) {
|
||||
SCH_TASK_WLOG("remove task from succTask list failed, may not exist, status:%d", SCH_GET_TASK_STATUS(pTask));
|
||||
|
@ -546,6 +554,7 @@ int32_t schMoveTaskToExecList(SSchJob *pJob, SSchTask *pTask, bool *moved) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int32_t schTaskCheckAndSetRetry(SSchJob *job, SSchTask *task, int32_t errCode, bool *needRetry) {
|
||||
// TODO set retry or not based on task type/errCode/retry times/job status/available eps...
|
||||
// TODO if needRetry, set task retry info
|
||||
|
@ -574,6 +583,8 @@ int32_t schProcessOnJobFailureImpl(SSchJob *pJob, int32_t status, int32_t errCod
|
|||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Note: no more error processing, handled in function internal
|
||||
int32_t schProcessOnJobFailure(SSchJob *pJob, int32_t errCode) {
|
||||
SCH_RET(schProcessOnJobFailureImpl(pJob, JOB_TASK_STATUS_FAILED, errCode));
|
||||
|
@ -584,6 +595,7 @@ int32_t schProcessOnJobDropped(SSchJob *pJob, int32_t errCode) {
|
|||
SCH_RET(schProcessOnJobFailureImpl(pJob, JOB_TASK_STATUS_DROPPING, errCode));
|
||||
}
|
||||
|
||||
|
||||
// Note: no more error processing, handled in function internal
|
||||
int32_t schFetchFromRemote(SSchJob *pJob) {
|
||||
int32_t code = 0;
|
||||
|
@ -614,6 +626,7 @@ _return:
|
|||
return code;
|
||||
}
|
||||
|
||||
|
||||
// Note: no more error processing, handled in function internal
|
||||
int32_t schProcessOnJobPartialSuccess(SSchJob *pJob) {
|
||||
int32_t code = 0;
|
||||
|
@ -692,6 +705,7 @@ _return:
|
|||
SCH_ERR_RET(errCode);
|
||||
}
|
||||
|
||||
|
||||
// Note: no more error processing, handled in function internal
|
||||
int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) {
|
||||
bool moved = false;
|
||||
|
@ -720,7 +734,7 @@ int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) {
|
|||
SCH_UNLOCK(SCH_WRITE, &pTask->level->lock);
|
||||
|
||||
if (taskDone < pTask->level->taskNum) {
|
||||
SCH_TASK_ELOG("wait all tasks, done:%d, all:%d", taskDone, pTask->level->taskNum);
|
||||
SCH_TASK_DLOG("wait all tasks, done:%d, all:%d", taskDone, pTask->level->taskNum);
|
||||
|
||||
return TSDB_CODE_SUCCESS;
|
||||
} else if (taskDone > pTask->level->taskNum) {
|
||||
|
@ -782,8 +796,7 @@ _return:
|
|||
SCH_ERR_RET(code);
|
||||
}
|
||||
|
||||
int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, char *msg, int32_t msgSize,
|
||||
int32_t rspCode) {
|
||||
int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, char *msg, int32_t msgSize, int32_t rspCode) {
|
||||
int32_t code = 0;
|
||||
|
||||
SCH_ERR_JRET(schValidateTaskReceivedMsgType(pJob, pTask, msgType));
|
||||
|
@ -837,7 +850,7 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch
|
|||
SResReadyRsp *rsp = (SResReadyRsp *)msg;
|
||||
|
||||
if (rspCode != TSDB_CODE_SUCCESS || NULL == msg || rsp->code != TSDB_CODE_SUCCESS) {
|
||||
SCH_ERR_RET(schProcessOnTaskFailure(pJob, pTask, rsp->code));
|
||||
SCH_ERR_RET(schProcessOnTaskFailure(pJob, pTask, rspCode));
|
||||
}
|
||||
|
||||
SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask));
|
||||
|
@ -857,12 +870,14 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch
|
|||
SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR);
|
||||
}
|
||||
|
||||
SCH_ERR_JRET(schProcessOnDataFetched(pJob));
|
||||
break;
|
||||
atomic_store_ptr(&pJob->res, rsp);
|
||||
atomic_store_32(&pJob->resNumOfRows, rsp->numOfRows);
|
||||
|
||||
if (rsp->completed) {
|
||||
SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_SUCCEED);
|
||||
}
|
||||
|
||||
SCH_ERR_JRET(schProcessOnDataFetched(pJob));
|
||||
|
||||
break;
|
||||
}
|
||||
case TDMT_VND_DROP_TASK: {
|
||||
|
@ -884,6 +899,7 @@ _return:
|
|||
SCH_RET(code);
|
||||
}
|
||||
|
||||
|
||||
int32_t schHandleCallback(void* param, const SDataBuf* pMsg, int32_t msgType, int32_t rspCode) {
|
||||
int32_t code = 0;
|
||||
SSchCallbackParam *pParam = (SSchCallbackParam *)param;
|
||||
|
@ -980,8 +996,8 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t schAsyncSendMsg(void *transport, SEpSet *epSet, uint64_t qId, uint64_t tId, int32_t msgType, void *msg,
|
||||
uint32_t msgSize) {
|
||||
|
||||
int32_t schAsyncSendMsg(void *transport, SEpSet* epSet, uint64_t qId, uint64_t tId, int32_t msgType, void *msg, uint32_t msgSize) {
|
||||
int32_t code = 0;
|
||||
SMsgSendInfo* pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo));
|
||||
if (NULL == pMsgSendInfo) {
|
||||
|
@ -1166,10 +1182,11 @@ static FORCE_INLINE bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus) {
|
|||
*pStatus = status;
|
||||
}
|
||||
|
||||
return (status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED ||
|
||||
status == JOB_TASK_STATUS_CANCELLING || status == JOB_TASK_STATUS_DROPPING);
|
||||
return (status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED
|
||||
|| status == JOB_TASK_STATUS_CANCELLING || status == JOB_TASK_STATUS_DROPPING);
|
||||
}
|
||||
|
||||
|
||||
// Note: no more error processing, handled in function internal
|
||||
int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) {
|
||||
int8_t status = 0;
|
||||
|
@ -1289,22 +1306,19 @@ int32_t schExecJobImpl(void *transport, SArray *nodeList, SQueryDag *pDag, struc
|
|||
|
||||
SCH_ERR_JRET(schValidateAndBuildJob(pDag, pJob));
|
||||
|
||||
pJob->execTasks =
|
||||
taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
pJob->execTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
if (NULL == pJob->execTasks) {
|
||||
SCH_JOB_ELOG("taosHashInit %d execTasks failed", pDag->numOfSubplans);
|
||||
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
pJob->succTasks =
|
||||
taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
pJob->succTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
if (NULL == pJob->succTasks) {
|
||||
SCH_JOB_ELOG("taosHashInit %d succTasks failed", pDag->numOfSubplans);
|
||||
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
pJob->failTasks =
|
||||
taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
pJob->failTasks = taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
if (NULL == pJob->failTasks) {
|
||||
SCH_JOB_ELOG("taosHashInit %d failTasks failed", pDag->numOfSubplans);
|
||||
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||
|
@ -1351,8 +1365,10 @@ int32_t schCancelJob(SSchJob *pJob) {
|
|||
//TODO
|
||||
|
||||
//TODO MOVE ALL TASKS FROM EXEC LIST TO FAIL LIST
|
||||
|
||||
}
|
||||
|
||||
|
||||
int32_t schedulerInit(SSchedulerCfg *cfg) {
|
||||
if (schMgmt.jobs) {
|
||||
qError("scheduler already initialized");
|
||||
|
@ -1369,8 +1385,7 @@ int32_t schedulerInit(SSchedulerCfg *cfg) {
|
|||
schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_JOB_NUMBER;
|
||||
}
|
||||
|
||||
schMgmt.jobs =
|
||||
taosHashInit(schMgmt.cfg.maxJobNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
schMgmt.jobs = taosHashInit(schMgmt.cfg.maxJobNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK);
|
||||
if (NULL == schMgmt.jobs) {
|
||||
qError("init schduler jobs failed, num:%u", schMgmt.cfg.maxJobNum);
|
||||
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||
|
@ -1535,6 +1550,7 @@ _return:
|
|||
SCH_RET(code);
|
||||
}
|
||||
|
||||
|
||||
int32_t scheduleFetchRows(SSchJob *pJob, void** pData) {
|
||||
if (NULL == pJob || NULL == pData) {
|
||||
SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
|
||||
|
@ -1713,3 +1729,4 @@ void schedulerDestroy(void) {
|
|||
schMgmt.jobs = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue