enh: add schedule policy
This commit is contained in:
parent
a8569fc44b
commit
a0f7cc6790
|
@ -26,8 +26,10 @@ extern "C" {
|
||||||
extern tsem_t schdRspSem;
|
extern tsem_t schdRspSem;
|
||||||
|
|
||||||
typedef struct SSchedulerCfg {
|
typedef struct SSchedulerCfg {
|
||||||
uint32_t maxJobNum;
|
uint32_t maxJobNum;
|
||||||
int32_t maxNodeTableNum;
|
int32_t maxNodeTableNum;
|
||||||
|
SCH_POLICY schPolicy;
|
||||||
|
bool enableReSchedule;
|
||||||
} SSchedulerCfg;
|
} SSchedulerCfg;
|
||||||
|
|
||||||
typedef struct SQueryProfileSummary {
|
typedef struct SQueryProfileSummary {
|
||||||
|
@ -96,6 +98,8 @@ int32_t schedulerGetTasksStatus(int64_t job, SArray *pSub);
|
||||||
|
|
||||||
void schedulerStopQueryHb(void *pTrans);
|
void schedulerStopQueryHb(void *pTrans);
|
||||||
|
|
||||||
|
int32_t schedulerUpdatePolicy(int32_t policy);
|
||||||
|
int32_t schedulerEnableReSchedule(bool enableResche);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cancel query job
|
* Cancel query job
|
||||||
|
|
|
@ -359,8 +359,7 @@ void taos_init_imp(void) {
|
||||||
SCatalogCfg cfg = {.maxDBCacheNum = 100, .maxTblCacheNum = 100};
|
SCatalogCfg cfg = {.maxDBCacheNum = 100, .maxTblCacheNum = 100};
|
||||||
catalogInit(&cfg);
|
catalogInit(&cfg);
|
||||||
|
|
||||||
SSchedulerCfg scfg = {.maxJobNum = 100};
|
schedulerInit();
|
||||||
schedulerInit(&scfg);
|
|
||||||
tscDebug("starting to initialize TAOS driver");
|
tscDebug("starting to initialize TAOS driver");
|
||||||
|
|
||||||
taosSetCoreDump(true);
|
taosSetCoreDump(true);
|
||||||
|
|
|
@ -129,6 +129,7 @@ void taos_close(TAOS *taos) {
|
||||||
|
|
||||||
STscObj *pObj = acquireTscObj(*(int64_t *)taos);
|
STscObj *pObj = acquireTscObj(*(int64_t *)taos);
|
||||||
if (NULL == pObj) {
|
if (NULL == pObj) {
|
||||||
|
taosMemoryFree(taos);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,10 @@ extern "C" {
|
||||||
#define EXPLAIN_MODE_FORMAT "mode=%s"
|
#define EXPLAIN_MODE_FORMAT "mode=%s"
|
||||||
#define EXPLAIN_STRING_TYPE_FORMAT "%s"
|
#define EXPLAIN_STRING_TYPE_FORMAT "%s"
|
||||||
|
|
||||||
|
#define COMMAND_RESET_LOG "resetLog"
|
||||||
|
#define COMMAND_SCHEDULE_POLICY "schedulePolicy"
|
||||||
|
#define COMMAND_ENABLE_RESCHEDULE "enableReSchedule"
|
||||||
|
|
||||||
typedef struct SExplainGroup {
|
typedef struct SExplainGroup {
|
||||||
int32_t nodeNum;
|
int32_t nodeNum;
|
||||||
int32_t physiPlanExecNum;
|
int32_t physiPlanExecNum;
|
||||||
|
|
|
@ -479,7 +479,42 @@ static int32_t execShowCreateSTable(SShowCreateTableStmt* pStmt, SRetrieveTableR
|
||||||
return execShowCreateTable(pStmt, pRsp);
|
return execShowCreateTable(pStmt, pRsp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t execAlterCmd(char* cmd, char* value, bool* processed) {
|
||||||
|
int32_t code = 0;
|
||||||
|
|
||||||
|
if (0 == strcasecmp(cmd, COMMAND_RESET_LOG)) {
|
||||||
|
taosResetLog();
|
||||||
|
cfgDumpCfg(tsCfg, 0, false);
|
||||||
|
} else if (0 == strcasecmp(cmd, COMMAND_SCHEDULE_POLICY)) {
|
||||||
|
code = schedulerUpdatePolicy(atoi(value));
|
||||||
|
} else if (0 == strcasecmp(cmd, COMMAND_ENABLE_RESCHEDULE)) {
|
||||||
|
code = schedulerEnableReSchedule(atoi(value));
|
||||||
|
} else {
|
||||||
|
goto _return;
|
||||||
|
}
|
||||||
|
|
||||||
|
*processed = true;
|
||||||
|
|
||||||
|
_return:
|
||||||
|
|
||||||
|
if (code) {
|
||||||
|
terrno = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t execAlterLocal(SAlterLocalStmt* pStmt) {
|
static int32_t execAlterLocal(SAlterLocalStmt* pStmt) {
|
||||||
|
bool processed = false;
|
||||||
|
|
||||||
|
if (execAlterCmd(pStmt->config, pStmt->value, &processed)) {
|
||||||
|
return terrno;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (processed) {
|
||||||
|
goto _return;
|
||||||
|
}
|
||||||
|
|
||||||
if (cfgSetItem(tsCfg, pStmt->config, pStmt->value, CFG_STYPE_ALTER_CMD)) {
|
if (cfgSetItem(tsCfg, pStmt->config, pStmt->value, CFG_STYPE_ALTER_CMD)) {
|
||||||
return terrno;
|
return terrno;
|
||||||
}
|
}
|
||||||
|
@ -488,6 +523,8 @@ static int32_t execAlterLocal(SAlterLocalStmt* pStmt) {
|
||||||
return terrno;
|
return terrno;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_return:
|
||||||
|
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -388,6 +388,11 @@ static void destroyDataSinkNode(SDataSinkNode* pNode) { nodesDestroyNode((SNode*
|
||||||
|
|
||||||
static void destroyExprNode(SExprNode* pExpr) { taosArrayDestroy(pExpr->pAssociation); }
|
static void destroyExprNode(SExprNode* pExpr) { taosArrayDestroy(pExpr->pAssociation); }
|
||||||
|
|
||||||
|
static void nodesDestroyNodePointer(void* node) {
|
||||||
|
SNode* pNode = *(SNode**)node;
|
||||||
|
nodesDestroyNode(pNode);
|
||||||
|
}
|
||||||
|
|
||||||
void nodesDestroyNode(SNode* pNode) {
|
void nodesDestroyNode(SNode* pNode) {
|
||||||
if (NULL == pNode) {
|
if (NULL == pNode) {
|
||||||
return;
|
return;
|
||||||
|
@ -718,6 +723,7 @@ void nodesDestroyNode(SNode* pNode) {
|
||||||
}
|
}
|
||||||
taosArrayDestroy(pQuery->pDbList);
|
taosArrayDestroy(pQuery->pDbList);
|
||||||
taosArrayDestroy(pQuery->pTableList);
|
taosArrayDestroy(pQuery->pTableList);
|
||||||
|
taosArrayDestroyEx(pQuery->pPlaceholderValues, nodesDestroyNodePointer);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case QUERY_NODE_LOGIC_PLAN_SCAN: {
|
case QUERY_NODE_LOGIC_PLAN_SCAN: {
|
||||||
|
|
|
@ -24,7 +24,7 @@ extern "C" {
|
||||||
#include "dataSinkMgt.h"
|
#include "dataSinkMgt.h"
|
||||||
|
|
||||||
int32_t qwAbortPrerocessQuery(QW_FPARAMS_DEF);
|
int32_t qwAbortPrerocessQuery(QW_FPARAMS_DEF);
|
||||||
int32_t qwPrerocessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg);
|
int32_t qwPreprocessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg);
|
||||||
int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, const char* sql);
|
int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, const char* sql);
|
||||||
int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg);
|
int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg);
|
||||||
int32_t qwProcessReady(QW_FPARAMS_DEF, SQWMsg *qwMsg);
|
int32_t qwProcessReady(QW_FPARAMS_DEF, SQWMsg *qwMsg);
|
||||||
|
|
|
@ -318,7 +318,7 @@ int32_t qWorkerPreprocessQueryMsg(void *qWorkerMgmt, SRpcMsg *pMsg) {
|
||||||
SQWMsg qwMsg = {.msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info};
|
SQWMsg qwMsg = {.msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info};
|
||||||
|
|
||||||
QW_SCH_TASK_DLOG("prerocessQuery start, handle:%p", pMsg->info.handle);
|
QW_SCH_TASK_DLOG("prerocessQuery start, handle:%p", pMsg->info.handle);
|
||||||
QW_ERR_RET(qwPrerocessQuery(QW_FPARAMS(), &qwMsg));
|
QW_ERR_RET(qwPreprocessQuery(QW_FPARAMS(), &qwMsg));
|
||||||
QW_SCH_TASK_DLOG("prerocessQuery end, handle:%p", pMsg->info.handle);
|
QW_SCH_TASK_DLOG("prerocessQuery end, handle:%p", pMsg->info.handle);
|
||||||
|
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
|
|
|
@ -469,7 +469,7 @@ int32_t qwAbortPrerocessQuery(QW_FPARAMS_DEF) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int32_t qwPrerocessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) {
|
int32_t qwPreprocessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) {
|
||||||
int32_t code = 0;
|
int32_t code = 0;
|
||||||
bool queryRsped = false;
|
bool queryRsped = false;
|
||||||
SSubplan *plan = NULL;
|
SSubplan *plan = NULL;
|
||||||
|
|
|
@ -28,15 +28,6 @@ extern "C" {
|
||||||
#include "trpc.h"
|
#include "trpc.h"
|
||||||
#include "command.h"
|
#include "command.h"
|
||||||
|
|
||||||
#define SCHEDULE_DEFAULT_MAX_JOB_NUM 1000
|
|
||||||
#define SCHEDULE_DEFAULT_MAX_TASK_NUM 1000
|
|
||||||
#define SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM 200 // unit is TSDB_TABLE_NUM_UNIT
|
|
||||||
|
|
||||||
#define SCH_DEFAULT_TASK_TIMEOUT_USEC 10000000
|
|
||||||
#define SCH_MAX_TASK_TIMEOUT_USEC 60000000
|
|
||||||
|
|
||||||
#define SCH_MAX_CANDIDATE_EP_NUM TSDB_MAX_REPLICA
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
SCH_READ = 1,
|
SCH_READ = 1,
|
||||||
SCH_WRITE,
|
SCH_WRITE,
|
||||||
|
@ -54,6 +45,24 @@ typedef enum {
|
||||||
SCH_OP_GET_STATUS,
|
SCH_OP_GET_STATUS,
|
||||||
} SCH_OP_TYPE;
|
} SCH_OP_TYPE;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
SCH_LOAD_SEQ = 1,
|
||||||
|
SCH_RANDOM,
|
||||||
|
SCH_ALL,
|
||||||
|
} SCH_POLICY;
|
||||||
|
|
||||||
|
#define SCHEDULE_DEFAULT_MAX_JOB_NUM 1000
|
||||||
|
#define SCHEDULE_DEFAULT_MAX_TASK_NUM 1000
|
||||||
|
#define SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM 200 // unit is TSDB_TABLE_NUM_UNIT
|
||||||
|
#define SCHEDULE_DEFAULT_POLICY SCH_LOAD_SEQ
|
||||||
|
|
||||||
|
#define SCH_DEFAULT_TASK_TIMEOUT_USEC 10000000
|
||||||
|
#define SCH_MAX_TASK_TIMEOUT_USEC 60000000
|
||||||
|
#define SCH_MAX_CANDIDATE_EP_NUM TSDB_MAX_REPLICA
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct SSchDebug {
|
typedef struct SSchDebug {
|
||||||
bool lockEnable;
|
bool lockEnable;
|
||||||
bool apiEnable;
|
bool apiEnable;
|
||||||
|
@ -184,34 +193,36 @@ typedef struct SSchLevel {
|
||||||
|
|
||||||
typedef struct SSchTaskProfile {
|
typedef struct SSchTaskProfile {
|
||||||
int64_t startTs;
|
int64_t startTs;
|
||||||
int64_t* execTime;
|
SArray* execTime;
|
||||||
int64_t waitTime;
|
int64_t waitTime;
|
||||||
int64_t endTs;
|
int64_t endTs;
|
||||||
} SSchTaskProfile;
|
} SSchTaskProfile;
|
||||||
|
|
||||||
typedef struct SSchTask {
|
typedef struct SSchTask {
|
||||||
uint64_t taskId; // task id
|
uint64_t taskId; // task id
|
||||||
SRWLatch lock; // task reentrant lock
|
SRWLatch lock; // task reentrant lock
|
||||||
int32_t maxExecTimes; // task may exec times
|
int32_t maxExecTimes; // task max exec times
|
||||||
int32_t execId; // task current execute try index
|
int32_t maxRetryTimes; // task max retry times
|
||||||
SSchLevel *level; // level
|
int32_t retryTimes; // task retry times
|
||||||
SRWLatch planLock; // task update plan lock
|
int32_t execId; // task current execute index
|
||||||
SSubplan *plan; // subplan
|
SSchLevel *level; // level
|
||||||
char *msg; // operator tree
|
SRWLatch planLock; // task update plan lock
|
||||||
int32_t msgLen; // msg length
|
SSubplan *plan; // subplan
|
||||||
int8_t status; // task status
|
char *msg; // operator tree
|
||||||
int32_t lastMsgType; // last sent msg type
|
int32_t msgLen; // msg length
|
||||||
int64_t timeoutUsec; // taks timeout useconds before reschedule
|
int8_t status; // task status
|
||||||
SQueryNodeAddr succeedAddr; // task executed success node address
|
int32_t lastMsgType; // last sent msg type
|
||||||
int8_t candidateIdx; // current try condidation index
|
int64_t timeoutUsec; // task timeout useconds before reschedule
|
||||||
SArray *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr
|
SQueryNodeAddr succeedAddr; // task executed success node address
|
||||||
SHashObj *execNodes; // all tried node for current task, element is SSchNodeInfo
|
int8_t candidateIdx; // current try condidation index
|
||||||
SSchTaskProfile profile; // task execution profile
|
SArray *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr
|
||||||
int32_t childReady; // child task ready number
|
SHashObj *execNodes; // all tried node for current task, element is SSchNodeInfo
|
||||||
SArray *children; // the datasource tasks,from which to fetch the result, element is SQueryTask*
|
SSchTaskProfile profile; // task execution profile
|
||||||
SArray *parents; // the data destination tasks, get data from current task, element is SQueryTask*
|
int32_t childReady; // child task ready number
|
||||||
void* handle; // task send handle
|
SArray *children; // the datasource tasks,from which to fetch the result, element is SQueryTask*
|
||||||
bool registerdHb; // registered in hb
|
SArray *parents; // the data destination tasks, get data from current task, element is SQueryTask*
|
||||||
|
void* handle; // task send handle
|
||||||
|
bool registerdHb; // registered in hb
|
||||||
} SSchTask;
|
} SSchTask;
|
||||||
|
|
||||||
typedef struct SSchJobAttr {
|
typedef struct SSchJobAttr {
|
||||||
|
@ -298,7 +309,6 @@ extern SSchedulerMgmt schMgmt;
|
||||||
#define SCH_TASK_NEED_FLOW_CTRL(_job, _task) (SCH_IS_DATA_BIND_QRY_TASK(_task) && SCH_JOB_NEED_FLOW_CTRL(_job) && SCH_IS_LEVEL_UNFINISHED((_task)->level))
|
#define SCH_TASK_NEED_FLOW_CTRL(_job, _task) (SCH_IS_DATA_BIND_QRY_TASK(_task) && SCH_JOB_NEED_FLOW_CTRL(_job) && SCH_IS_LEVEL_UNFINISHED((_task)->level))
|
||||||
#define SCH_FETCH_TYPE(_pSrcTask) (SCH_IS_DATA_BIND_QRY_TASK(_pSrcTask) ? TDMT_SCH_FETCH : TDMT_SCH_MERGE_FETCH)
|
#define SCH_FETCH_TYPE(_pSrcTask) (SCH_IS_DATA_BIND_QRY_TASK(_pSrcTask) ? TDMT_SCH_FETCH : TDMT_SCH_MERGE_FETCH)
|
||||||
#define SCH_TASK_NEED_FETCH(_task) ((_task)->plan->subplanType != SUBPLAN_TYPE_MODIFY)
|
#define SCH_TASK_NEED_FETCH(_task) ((_task)->plan->subplanType != SUBPLAN_TYPE_MODIFY)
|
||||||
#define SCH_TASK_MAX_EXEC_TIMES(_levelIdx, _levelNum) (SCH_MAX_CANDIDATE_EP_NUM * ((_levelNum) - (_levelIdx)))
|
|
||||||
|
|
||||||
#define SCH_SET_JOB_TYPE(_job, type) do { if ((type) != SUBPLAN_TYPE_MODIFY) { (_job)->attr.queryJob = true; } } while (0)
|
#define SCH_SET_JOB_TYPE(_job, type) do { if ((type) != SUBPLAN_TYPE_MODIFY) { (_job)->attr.queryJob = true; } } while (0)
|
||||||
#define SCH_IS_QUERY_JOB(_job) ((_job)->attr.queryJob)
|
#define SCH_IS_QUERY_JOB(_job) ((_job)->attr.queryJob)
|
||||||
|
@ -320,8 +330,7 @@ extern SSchedulerMgmt schMgmt;
|
||||||
#define SCH_LOG_TASK_START_TS(_task) \
|
#define SCH_LOG_TASK_START_TS(_task) \
|
||||||
do { \
|
do { \
|
||||||
int64_t us = taosGetTimestampUs(); \
|
int64_t us = taosGetTimestampUs(); \
|
||||||
int32_t idx = (_task)->execId % (_task)->maxExecTimes; \
|
taosArrayPush((_task)->profile.execTime, &us); \
|
||||||
(_task)->profile.execTime[idx] = us; \
|
|
||||||
if (0 == (_task)->execId) { \
|
if (0 == (_task)->execId) { \
|
||||||
(_task)->profile.startTs = us; \
|
(_task)->profile.startTs = us; \
|
||||||
} \
|
} \
|
||||||
|
@ -330,8 +339,7 @@ extern SSchedulerMgmt schMgmt;
|
||||||
#define SCH_LOG_TASK_WAIT_TS(_task) \
|
#define SCH_LOG_TASK_WAIT_TS(_task) \
|
||||||
do { \
|
do { \
|
||||||
int64_t us = taosGetTimestampUs(); \
|
int64_t us = taosGetTimestampUs(); \
|
||||||
int32_t idx = (_task)->execId % (_task)->maxExecTimes; \
|
(_task)->profile.waitTime += us - *(int64_t*)taosArrayGet((_task)->profile.execTime, (_task)->execId); \
|
||||||
(_task)->profile.waitTime += us - (_task)->profile.execTime[idx]; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
@ -339,7 +347,8 @@ extern SSchedulerMgmt schMgmt;
|
||||||
do { \
|
do { \
|
||||||
int64_t us = taosGetTimestampUs(); \
|
int64_t us = taosGetTimestampUs(); \
|
||||||
int32_t idx = (_task)->execId % (_task)->maxExecTimes; \
|
int32_t idx = (_task)->execId % (_task)->maxExecTimes; \
|
||||||
(_task)->profile.execTime[idx] = us - (_task)->profile.execTime[idx]; \
|
int64_t *startts = taosArrayGet((_task)->profile.execTime, (_task)->execId); \
|
||||||
|
*startts = us - *startts; \
|
||||||
(_task)->profile.endTs = us; \
|
(_task)->profile.endTs = us; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,18 @@ void schFreeTask(SSchJob *pJob, SSchTask *pTask) {
|
||||||
taosHashCleanup(pTask->execNodes);
|
taosHashCleanup(pTask->execNodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
taosMemoryFree(pTask->profile.execTime);
|
taosArrayDestroy(pTask->profile.execTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
void schInitTaskRetryTimes(SSchJob *pJob, SSchTask *pTask, SSchLevel *pLevel, int32_t levelNum) {
|
||||||
|
if (SCH_IS_DATA_BIND_TASK(pTask) || (!SCH_IS_QUERY_JOB(pJob)) || (SCH_ALL != schMgmt.cfg.schPolicy)) {
|
||||||
|
pTask->maxRetryTimes = SCH_MAX_CANDIDATE_EP_NUM;
|
||||||
|
} else {
|
||||||
|
int32_t nodeNum = taosArrayGetSize(pJob->nodeList);
|
||||||
|
pTask->maxRetryTimes = TMAX(nodeNum, SCH_MAX_CANDIDATE_EP_NUM);
|
||||||
|
}
|
||||||
|
|
||||||
|
pTask->maxExecTimes = pTask->maxRetryTimes * (levelNum - pLevel->level);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel, int32_t levelNum) {
|
int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel, int32_t levelNum) {
|
||||||
|
@ -51,12 +62,14 @@ int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *
|
||||||
pTask->plan = pPlan;
|
pTask->plan = pPlan;
|
||||||
pTask->level = pLevel;
|
pTask->level = pLevel;
|
||||||
pTask->execId = -1;
|
pTask->execId = -1;
|
||||||
pTask->maxExecTimes = SCH_TASK_MAX_EXEC_TIMES(pLevel->level, levelNum);
|
|
||||||
pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC;
|
pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC;
|
||||||
pTask->taskId = schGenTaskId();
|
pTask->taskId = schGenTaskId();
|
||||||
pTask->execNodes =
|
pTask->execNodes =
|
||||||
taosHashInit(SCH_MAX_CANDIDATE_EP_NUM, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
|
taosHashInit(SCH_MAX_CANDIDATE_EP_NUM, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
|
||||||
pTask->profile.execTime = taosMemoryCalloc(pTask->maxExecTimes, sizeof(int64_t));
|
|
||||||
|
schInitTaskRetryTimes(pJob, pTask, pLevel, levelNum);
|
||||||
|
|
||||||
|
pTask->profile.execTime = taosArrayInit(pTask->maxExecTimes, sizeof(int64_t));
|
||||||
if (NULL == pTask->execNodes || NULL == pTask->profile.execTime) {
|
if (NULL == pTask->execNodes || NULL == pTask->profile.execTime) {
|
||||||
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
|
||||||
}
|
}
|
||||||
|
@ -67,7 +80,7 @@ int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *
|
||||||
|
|
||||||
_return:
|
_return:
|
||||||
|
|
||||||
taosMemoryFreeClear(pTask->profile.execTime);
|
taosArrayDestroy(pTask->profile.execTime);
|
||||||
taosHashCleanup(pTask->execNodes);
|
taosHashCleanup(pTask->execNodes);
|
||||||
|
|
||||||
SCH_RET(code);
|
SCH_RET(code);
|
||||||
|
@ -285,6 +298,10 @@ int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) {
|
int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) {
|
||||||
|
if (!schMgmt.cfg.enableReSchedule) {
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
if (SCH_IS_DATA_BIND_TASK(pTask)) {
|
if (SCH_IS_DATA_BIND_TASK(pTask)) {
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -320,6 +337,7 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32
|
||||||
taosMemoryFreeClear(pTask->msg);
|
taosMemoryFreeClear(pTask->msg);
|
||||||
pTask->msgLen = 0;
|
pTask->msgLen = 0;
|
||||||
pTask->lastMsgType = 0;
|
pTask->lastMsgType = 0;
|
||||||
|
pTask->retryTimes = 0;
|
||||||
memset(&pTask->succeedAddr, 0, sizeof(pTask->succeedAddr));
|
memset(&pTask->succeedAddr, 0, sizeof(pTask->succeedAddr));
|
||||||
|
|
||||||
if (SCH_IS_DATA_BIND_TASK(pTask)) {
|
if (SCH_IS_DATA_BIND_TASK(pTask)) {
|
||||||
|
@ -493,9 +511,15 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((pTask->retryTimes + 1) > pTask->maxRetryTimes) {
|
||||||
|
*needRetry = false;
|
||||||
|
SCH_TASK_DLOG("task no more retry since reach max retry times, retryTimes:%d/%d", pTask->retryTimes, pTask->maxRetryTimes);
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
if ((pTask->execId + 1) >= pTask->maxExecTimes) {
|
if ((pTask->execId + 1) >= pTask->maxExecTimes) {
|
||||||
*needRetry = false;
|
*needRetry = false;
|
||||||
SCH_TASK_DLOG("task no more retry since reach max try times, execId:%d", pTask->execId);
|
SCH_TASK_DLOG("task no more retry since reach max exec times, execId:%d/%d", pTask->execId, pTask->maxExecTimes);
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -647,10 +671,31 @@ int32_t schUpdateTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask, SEpSet *pEpSe
|
||||||
|
|
||||||
int32_t schSwitchTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask) {
|
int32_t schSwitchTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask) {
|
||||||
int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs);
|
int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs);
|
||||||
if (++pTask->candidateIdx >= candidateNum) {
|
if (candidateNum <= 1) {
|
||||||
pTask->candidateIdx = 0;
|
goto _return;
|
||||||
}
|
}
|
||||||
SCH_TASK_DLOG("switch task candiateIdx to %d", pTask->candidateIdx);
|
|
||||||
|
switch (schMgmt.cfg.schPolicy) {
|
||||||
|
case SCH_LOAD_SEQ:
|
||||||
|
case SCH_ALL:
|
||||||
|
default:
|
||||||
|
if (++pTask->candidateIdx >= candidateNum) {
|
||||||
|
pTask->candidateIdx = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SCH_RANDOM: {
|
||||||
|
int32_t lastIdx = pTask->candidateIdx;
|
||||||
|
while (lastIdx == pTask->candidateIdx) {
|
||||||
|
pTask->candidateIdx = taosRand() % candidateNum;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_return:
|
||||||
|
|
||||||
|
SCH_TASK_DLOG("switch task candiateIdx to %d/%d", pTask->candidateIdx, candidateNum);
|
||||||
|
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -737,6 +782,7 @@ int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask) {
|
||||||
|
|
||||||
atomic_add_fetch_32(&pTask->level->taskLaunchedNum, 1);
|
atomic_add_fetch_32(&pTask->level->taskLaunchedNum, 1);
|
||||||
pTask->execId++;
|
pTask->execId++;
|
||||||
|
pTask->retryTimes++;
|
||||||
|
|
||||||
SCH_TASK_DLOG("start to launch task's %dth exec", pTask->execId);
|
SCH_TASK_DLOG("start to launch task's %dth exec", pTask->execId);
|
||||||
|
|
||||||
|
|
|
@ -22,26 +22,19 @@ SSchedulerMgmt schMgmt = {
|
||||||
.jobRef = -1,
|
.jobRef = -1,
|
||||||
};
|
};
|
||||||
|
|
||||||
int32_t schedulerInit(SSchedulerCfg *cfg) {
|
int32_t schedulerInit() {
|
||||||
if (schMgmt.jobRef >= 0) {
|
if (schMgmt.jobRef >= 0) {
|
||||||
qError("scheduler already initialized");
|
qError("scheduler already initialized");
|
||||||
return TSDB_CODE_QRY_INVALID_INPUT;
|
return TSDB_CODE_QRY_INVALID_INPUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cfg) {
|
schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
|
||||||
schMgmt.cfg = *cfg;
|
schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
|
||||||
|
schMgmt.cfg.schPolicy = SCHEDULE_DEFAULT_POLICY;
|
||||||
if (schMgmt.cfg.maxJobNum == 0) {
|
schMgmt.cfg.enableReSchedule = true;
|
||||||
schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
|
|
||||||
}
|
|
||||||
if (schMgmt.cfg.maxNodeTableNum <= 0) {
|
|
||||||
schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
schMgmt.cfg.maxJobNum = SCHEDULE_DEFAULT_MAX_JOB_NUM;
|
|
||||||
schMgmt.cfg.maxNodeTableNum = SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
qDebug("schedule policy init to %d", schMgmt.cfg.schPolicy);
|
||||||
|
|
||||||
schMgmt.jobRef = taosOpenRef(schMgmt.cfg.maxJobNum, schFreeJobImpl);
|
schMgmt.jobRef = taosOpenRef(schMgmt.cfg.maxJobNum, schFreeJobImpl);
|
||||||
if (schMgmt.jobRef < 0) {
|
if (schMgmt.jobRef < 0) {
|
||||||
qError("init schduler jobRef failed, num:%u", schMgmt.cfg.maxJobNum);
|
qError("init schduler jobRef failed, num:%u", schMgmt.cfg.maxJobNum);
|
||||||
|
@ -130,6 +123,26 @@ void schedulerStopQueryHb(void *pTrans) {
|
||||||
schCleanClusterHb(pTrans);
|
schCleanClusterHb(pTrans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t schedulerUpdatePolicy(int32_t policy) {
|
||||||
|
switch (policy) {
|
||||||
|
case SCH_LOAD_SEQ:
|
||||||
|
case SCH_RANDOM:
|
||||||
|
case SCH_ALL:
|
||||||
|
schMgmt.cfg.schPolicy = policy;
|
||||||
|
qDebug("schedule policy updated to %d", schMgmt.cfg.schPolicy);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return TSDB_CODE_TSC_INVALID_INPUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t schedulerEnableReSchedule(bool enableResche) {
|
||||||
|
schMgmt.cfg.enableReSchedule = enableResche;
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
void schedulerFreeJob(int64_t* jobId, int32_t errCode) {
|
void schedulerFreeJob(int64_t* jobId, int32_t errCode) {
|
||||||
if (0 == *jobId) {
|
if (0 == *jobId) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -477,7 +477,7 @@ void* schtRunJobThread(void *aa) {
|
||||||
schtInitLogFile();
|
schtInitLogFile();
|
||||||
|
|
||||||
|
|
||||||
int32_t code = schedulerInit(NULL);
|
int32_t code = schedulerInit();
|
||||||
assert(code == 0);
|
assert(code == 0);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2685,6 +2685,8 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
runAll(taos);
|
runAll(taos);
|
||||||
|
|
||||||
|
taos_close(taos);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue