378 lines
15 KiB
C
378 lines
15 KiB
C
/*
|
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
|
*
|
|
* This program is free software: you can use, redistribute, and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3
|
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef _TD_SCHEDULER_INT_H_
|
|
#define _TD_SCHEDULER_INT_H_
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#include "os.h"
|
|
#include "tarray.h"
|
|
#include "planner.h"
|
|
#include "scheduler.h"
|
|
#include "thash.h"
|
|
#include "trpc.h"
|
|
#include "command.h"
|
|
|
|
#define SCHEDULE_DEFAULT_MAX_JOB_NUM 1000
|
|
#define SCHEDULE_DEFAULT_MAX_TASK_NUM 1000
|
|
#define SCHEDULE_DEFAULT_MAX_NODE_TABLE_NUM 200 // unit is TSDB_TABLE_NUM_UNIT
|
|
|
|
#define SCH_DEFAULT_TASK_TIMEOUT_USEC 10000000
|
|
#define SCH_MAX_TASK_TIMEOUT_USEC 60000000
|
|
|
|
#define SCH_TASK_MAX_EXEC_TIMES 5
|
|
#define SCH_MAX_CANDIDATE_EP_NUM TSDB_MAX_REPLICA
|
|
|
|
enum {
|
|
SCH_READ = 1,
|
|
SCH_WRITE,
|
|
};
|
|
|
|
enum {
|
|
SCH_EXEC_CB = 1,
|
|
SCH_FETCH_CB,
|
|
};
|
|
|
|
typedef struct SSchTrans {
|
|
void *pTrans;
|
|
void *pHandle;
|
|
} SSchTrans;
|
|
|
|
typedef struct SSchHbTrans {
|
|
SRWLatch lock;
|
|
int64_t taskNum;
|
|
SRpcCtx rpcCtx;
|
|
SSchTrans trans;
|
|
} SSchHbTrans;
|
|
|
|
typedef struct SSchApiStat {
|
|
|
|
#ifdef WINDOWS
|
|
size_t avoidCompilationErrors;
|
|
#endif
|
|
|
|
} SSchApiStat;
|
|
|
|
typedef struct SSchRuntimeStat {
|
|
|
|
#ifdef WINDOWS
|
|
size_t avoidCompilationErrors;
|
|
#endif
|
|
|
|
} SSchRuntimeStat;
|
|
|
|
typedef struct SSchJobStat {
|
|
|
|
#ifdef WINDOWS
|
|
size_t avoidCompilationErrors;
|
|
#endif
|
|
|
|
} SSchJobStat;
|
|
|
|
typedef struct SSchStat {
|
|
SSchApiStat api;
|
|
SSchRuntimeStat runtime;
|
|
SSchJobStat job;
|
|
} SSchStat;
|
|
|
|
typedef struct SSchResInfo {
|
|
SQueryResult* queryRes;
|
|
void** fetchRes;
|
|
schedulerExecCallback execFp;
|
|
schedulerFetchCallback fetchFp;
|
|
void* userParam;
|
|
} SSchResInfo;
|
|
|
|
typedef struct SSchedulerMgmt {
|
|
uint64_t taskId; // sequential taksId
|
|
uint64_t sId; // schedulerId
|
|
SSchedulerCfg cfg;
|
|
bool exit;
|
|
int32_t jobRef;
|
|
int32_t jobNum;
|
|
SSchStat stat;
|
|
SRWLatch hbLock;
|
|
SHashObj *hbConnections;
|
|
} SSchedulerMgmt;
|
|
|
|
typedef struct SSchCallbackParamHeader {
|
|
bool isHbParam;
|
|
} SSchCallbackParamHeader;
|
|
|
|
typedef struct SSchTaskCallbackParam {
|
|
SSchCallbackParamHeader head;
|
|
uint64_t queryId;
|
|
int64_t refId;
|
|
uint64_t taskId;
|
|
int32_t execIdx;
|
|
void *pTrans;
|
|
} SSchTaskCallbackParam;
|
|
|
|
typedef struct SSchHbCallbackParam {
|
|
SSchCallbackParamHeader head;
|
|
SQueryNodeEpId nodeEpId;
|
|
void *pTrans;
|
|
} SSchHbCallbackParam;
|
|
|
|
typedef struct SSchFlowControl {
|
|
SRWLatch lock;
|
|
bool sorted;
|
|
int32_t tableNumSum;
|
|
uint32_t execTaskNum;
|
|
SArray *taskList; // Element is SSchTask*
|
|
} SSchFlowControl;
|
|
|
|
typedef struct SSchNodeInfo {
|
|
SQueryNodeAddr addr;
|
|
void *handle;
|
|
} SSchNodeInfo;
|
|
|
|
typedef struct SSchLevel {
|
|
int32_t level;
|
|
int8_t status;
|
|
SRWLatch lock;
|
|
int32_t taskFailed;
|
|
int32_t taskSucceed;
|
|
int32_t taskNum;
|
|
int32_t taskLaunchedNum;
|
|
int32_t taskDoneNum;
|
|
SArray *subTasks; // Element is SQueryTask
|
|
} SSchLevel;
|
|
|
|
typedef struct SSchTaskProfile {
|
|
int64_t startTs;
|
|
int64_t execUseTime[SCH_TASK_MAX_EXEC_TIMES];
|
|
int64_t waitTime;
|
|
int64_t endTs;
|
|
} SSchTaskProfile;
|
|
|
|
typedef struct SSchTask {
|
|
uint64_t taskId; // task id
|
|
SRWLatch lock; // task lock
|
|
int32_t maxExecTimes; // task may exec times
|
|
int32_t execIdx; // task current execute try index
|
|
SSchLevel *level; // level
|
|
SRWLatch planLock; // task update plan lock
|
|
SSubplan *plan; // subplan
|
|
char *msg; // operator tree
|
|
int32_t msgLen; // msg length
|
|
int8_t status; // task status
|
|
int32_t lastMsgType; // last sent msg type
|
|
int64_t timeoutUsec; // taks timeout useconds before reschedule
|
|
SQueryNodeAddr succeedAddr; // task executed success node address
|
|
int8_t candidateIdx; // current try condidation index
|
|
SArray *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr
|
|
SHashObj *execNodes; // all tried node for current task, element is SSchNodeInfo
|
|
SSchTaskProfile profile; // task execution profile
|
|
int32_t childReady; // child task ready number
|
|
SArray *children; // the datasource tasks,from which to fetch the result, element is SQueryTask*
|
|
SArray *parents; // the data destination tasks, get data from current task, element is SQueryTask*
|
|
void* handle; // task send handle
|
|
bool registerdHb; // registered in hb
|
|
} SSchTask;
|
|
|
|
typedef struct SSchJobAttr {
|
|
EExplainMode explainMode;
|
|
bool syncSchedule;
|
|
bool queryJob;
|
|
bool needFlowCtrl;
|
|
} SSchJobAttr;
|
|
|
|
typedef struct SSchJob {
|
|
int64_t refId;
|
|
uint64_t queryId;
|
|
SSchJobAttr attr;
|
|
int32_t levelNum;
|
|
int32_t taskNum;
|
|
SRequestConnInfo conn;
|
|
SArray *nodeList; // qnode/vnode list, SArray<SQueryNodeAddr>
|
|
SArray *levels; // starting from 0. SArray<SSchLevel>
|
|
SNodeList *subPlans; // subplan pointer copied from DAG, no need to free it in scheduler
|
|
|
|
SArray *dataSrcTasks; // SArray<SQueryTask*>
|
|
int32_t levelIdx;
|
|
SEpSet dataSrcEps;
|
|
SHashObj *taskList;
|
|
SHashObj *execTasks; // executing tasks, key:taskid, value:SQueryTask*
|
|
SHashObj *succTasks; // succeed tasks, key:taskid, value:SQueryTask*
|
|
SHashObj *failTasks; // failed tasks, key:taskid, value:SQueryTask*
|
|
SHashObj *flowCtrl; // key is ep, element is SSchFlowControl
|
|
|
|
SExplainCtx *explainCtx;
|
|
int8_t status;
|
|
SQueryNodeAddr resNode;
|
|
tsem_t rspSem;
|
|
int8_t userFetch;
|
|
int32_t remoteFetch;
|
|
SSchTask *fetchTask;
|
|
int32_t errCode;
|
|
SRWLatch resLock;
|
|
SQueryExecRes execRes;
|
|
void *resData; //TODO free it or not
|
|
int32_t resNumOfRows;
|
|
SSchResInfo userRes;
|
|
const char *sql;
|
|
int32_t userCb;
|
|
SQueryProfileSummary summary;
|
|
} SSchJob;
|
|
|
|
extern SSchedulerMgmt schMgmt;
|
|
|
|
#define SCH_LOG_TASK_START_TS(_task) \
|
|
do { \
|
|
int64_t us = taosGetTimestampUs(); \
|
|
int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \
|
|
(_task)->profile.execUseTime[idx] = us; \
|
|
if (0 == (_task)->execIdx) { \
|
|
(_task)->profile.startTs = us; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define SCH_LOG_TASK_WAIT_TS(_task) \
|
|
do { \
|
|
int64_t us = taosGetTimestampUs(); \
|
|
int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \
|
|
(_task)->profile.waitTime += us - (_task)->profile.execUseTime[idx]; \
|
|
} while (0)
|
|
|
|
|
|
#define SCH_LOG_TASK_END_TS(_task) \
|
|
do { \
|
|
int64_t us = taosGetTimestampUs(); \
|
|
int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \
|
|
(_task)->profile.execUseTime[idx] = us - (_task)->profile.execUseTime[idx]; \
|
|
(_task)->profile.endTs = us; \
|
|
} while (0)
|
|
|
|
#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.execUseTime[(_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES]) > (_task)->timeoutUsec)
|
|
|
|
#define SCH_TASK_READY_FOR_LAUNCH(readyNum, task) ((readyNum) >= taosArrayGetSize((task)->children))
|
|
|
|
#define SCH_LOCK_TASK(_task) SCH_LOCK(SCH_WRITE, &(_task)->lock)
|
|
#define SCH_UNLOCK_TASK(_task) SCH_UNLOCK(SCH_WRITE, &(_task)->lock)
|
|
|
|
#define SCH_TASK_ID(_task) ((_task) ? (_task)->taskId : -1)
|
|
#define SCH_SET_TASK_LASTMSG_TYPE(_task, _type) do { if(_task) { atomic_store_32(&(_task)->lastMsgType, _type); } } while (0)
|
|
#define SCH_GET_TASK_LASTMSG_TYPE(_task) ((_task) ? atomic_load_32(&(_task)->lastMsgType) : -1)
|
|
|
|
#define SCH_IS_DATA_SRC_QRY_TASK(task) ((task)->plan->subplanType == SUBPLAN_TYPE_SCAN)
|
|
#define SCH_IS_DATA_SRC_TASK(task) (((task)->plan->subplanType == SUBPLAN_TYPE_SCAN) || ((task)->plan->subplanType == SUBPLAN_TYPE_MODIFY))
|
|
#define SCH_IS_LEAF_TASK(_job, _task) (((_task)->level->level + 1) == (_job)->levelNum)
|
|
|
|
#define SCH_SET_TASK_STATUS(task, st) atomic_store_8(&(task)->status, st)
|
|
#define SCH_GET_TASK_STATUS(task) atomic_load_8(&(task)->status)
|
|
#define SCH_GET_TASK_STATUS_STR(task) jobTaskStatusStr(SCH_GET_TASK_STATUS(task))
|
|
|
|
#define SCH_GET_TASK_HANDLE(_task) ((_task) ? (_task)->handle : NULL)
|
|
#define SCH_SET_TASK_HANDLE(_task, _handle) ((_task)->handle = (_handle))
|
|
|
|
#define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st)
|
|
#define SCH_GET_JOB_STATUS(job) atomic_load_8(&(job)->status)
|
|
#define SCH_GET_JOB_STATUS_STR(job) jobTaskStatusStr(SCH_GET_JOB_STATUS(job))
|
|
|
|
#define SCH_SET_JOB_NEED_FLOW_CTRL(_job) (_job)->attr.needFlowCtrl = true
|
|
#define SCH_JOB_NEED_FLOW_CTRL(_job) ((_job)->attr.needFlowCtrl)
|
|
#define SCH_TASK_NEED_FLOW_CTRL(_job, _task) (SCH_IS_DATA_SRC_QRY_TASK(_task) && SCH_JOB_NEED_FLOW_CTRL(_job) && SCH_IS_LEVEL_UNFINISHED((_task)->level))
|
|
|
|
#define SCH_SET_JOB_TYPE(_job, type) (_job)->attr.queryJob = ((type) != SUBPLAN_TYPE_MODIFY)
|
|
#define SCH_IS_QUERY_JOB(_job) ((_job)->attr.queryJob)
|
|
#define SCH_JOB_NEED_FETCH(_job) SCH_IS_QUERY_JOB(_job)
|
|
#define SCH_IS_WAIT_ALL_JOB(_job) (!SCH_IS_QUERY_JOB(_job))
|
|
#define SCH_IS_NEED_DROP_JOB(_job) (SCH_IS_QUERY_JOB(_job))
|
|
#define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode)
|
|
|
|
#define SCH_IS_LEVEL_UNFINISHED(_level) ((_level)->taskLaunchedNum < (_level)->taskNum)
|
|
#define SCH_GET_CUR_EP(_addr) (&(_addr)->epSet.eps[(_addr)->epSet.inUse])
|
|
#define SCH_SWITCH_EPSET(_addr) ((_addr)->epSet.inUse = ((_addr)->epSet.inUse + 1) % (_addr)->epSet.numOfEps)
|
|
#define SCH_TASK_NUM_OF_EPS(_addr) ((_addr)->epSet.numOfEps)
|
|
|
|
#define SCH_JOB_ELOG(param, ...) qError("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__)
|
|
#define SCH_JOB_DLOG(param, ...) qDebug("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__)
|
|
|
|
#define SCH_TASK_ELOG(param, ...) \
|
|
qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__)
|
|
#define SCH_TASK_DLOG(param, ...) \
|
|
qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__)
|
|
#define SCH_TASK_DLOGL(param, ...) \
|
|
qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__)
|
|
#define SCH_TASK_WLOG(param, ...) \
|
|
qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__)
|
|
|
|
#define SCH_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
|
|
#define SCH_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)
|
|
#define SCH_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0)
|
|
|
|
#define SCH_LOCK(type, _lock) (SCH_READ == (type) ? taosRLockLatch(_lock) : taosWLockLatch(_lock))
|
|
#define SCH_UNLOCK(type, _lock) (SCH_READ == (type) ? taosRUnLockLatch(_lock) : taosWUnLockLatch(_lock))
|
|
|
|
|
|
void schDeregisterTaskHb(SSchJob *pJob, SSchTask *pTask);
|
|
void schCleanClusterHb(void* pTrans);
|
|
int32_t schLaunchTask(SSchJob *job, SSchTask *task);
|
|
int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, SQueryNodeAddr *addr, int32_t msgType);
|
|
SSchJob *schAcquireJob(int64_t refId);
|
|
int32_t schReleaseJob(int64_t refId);
|
|
void schFreeFlowCtrl(SSchJob *pJob);
|
|
int32_t schChkJobNeedFlowCtrl(SSchJob *pJob, SSchLevel *pLevel);
|
|
int32_t schDecTaskFlowQuota(SSchJob *pJob, SSchTask *pTask);
|
|
int32_t schCheckIncTaskFlowQuota(SSchJob *pJob, SSchTask *pTask, bool *enough);
|
|
int32_t schLaunchTasksInFlowCtrlList(SSchJob *pJob, SSchTask *pTask);
|
|
int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask);
|
|
int32_t schFetchFromRemote(SSchJob *pJob);
|
|
int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode);
|
|
int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId, SArray* taskAction);
|
|
int32_t schCloneSMsgSendInfo(void *src, void **dst);
|
|
int32_t schValidateAndBuildJob(SQueryPlan *pDag, SSchJob *pJob);
|
|
void schFreeJobImpl(void *job);
|
|
int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx);
|
|
int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask);
|
|
int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans);
|
|
int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code);
|
|
void schFreeRpcCtx(SRpcCtx *pCtx);
|
|
int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp);
|
|
bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus);
|
|
int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask);
|
|
int32_t schSaveJobQueryRes(SSchJob *pJob, SQueryTableRsp *rsp);
|
|
int32_t schProcessOnExplainDone(SSchJob *pJob, SSchTask *pTask, SRetrieveTableRsp *pRsp);
|
|
void schProcessOnDataFetched(SSchJob *job);
|
|
int32_t schGetTaskInJob(SSchJob *pJob, uint64_t taskId, SSchTask **pTask);
|
|
void schFreeRpcCtxVal(const void *arg);
|
|
int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal *brokenVal, bool isHb);
|
|
int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execIdx);
|
|
int32_t schExecStaticExplainJob(SSchedulerReq *pReq, int64_t *job, bool sync);
|
|
int32_t schExecJobImpl(SSchedulerReq *pReq, int64_t *job, SQueryResult* pRes, bool sync);
|
|
int32_t schChkUpdateJobStatus(SSchJob *pJob, int8_t newStatus);
|
|
int32_t schCancelJob(SSchJob *pJob);
|
|
int32_t schProcessOnJobDropped(SSchJob *pJob, int32_t errCode);
|
|
uint64_t schGenTaskId(void);
|
|
void schCloseJobRef(void);
|
|
int32_t schExecJob(SSchedulerReq *pReq, int64_t *pJob, SQueryResult *pRes);
|
|
int32_t schAsyncExecJob(SSchedulerReq *pReq, int64_t *pJob);
|
|
int32_t schFetchRows(SSchJob *pJob);
|
|
int32_t schAsyncFetchRows(SSchJob *pJob);
|
|
int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, bool dropExecNode, void *handle, int32_t execIdx);
|
|
int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList);
|
|
void schFreeSMsgSendInfo(SMsgSendInfo *msgSendInfo);
|
|
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /*_TD_SCHEDULER_INT_H_*/
|