From 4a55ed07b3a82bc6893f358dbd26b911a9908877 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 28 Jun 2022 10:34:51 +0800 Subject: [PATCH 01/14] feature: query redirect --- include/common/tmsgdef.h | 16 ++++++----- source/client/src/clientEnv.c | 2 +- source/client/src/clientImpl.c | 2 +- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 10 +++---- source/dnode/mgmt/mgmt_qnode/src/qmHandle.c | 12 ++++---- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 12 ++++---- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/dnode/mnode/impl/src/mndMain.c | 6 ++-- source/dnode/mnode/impl/src/mndQuery.c | 24 ++++++++-------- source/dnode/qnode/src/qnode.c | 14 +++++----- source/dnode/vnode/src/vnd/vnodeSvr.c | 14 +++++----- source/libs/executor/src/executorimpl.c | 2 +- source/libs/function/src/udfd.c | 2 +- source/libs/parser/src/parTranslater.c | 2 +- source/libs/planner/src/planPhysiCreater.c | 2 +- source/libs/qcom/src/queryUtil.c | 2 +- source/libs/qworker/src/qwMsg.c | 6 ++-- source/libs/qworker/test/qworkerTests.cpp | 16 +++++------ source/libs/scheduler/src/schJob.c | 6 ++-- source/libs/scheduler/src/schRemote.c | 28 +++++++++---------- source/libs/scheduler/test/schedulerTests.cpp | 10 +++---- 21 files changed, 96 insertions(+), 94 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index acf08bd47e..cbf0c4b2fe 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -167,10 +167,6 @@ enum { TD_NEW_MSG_SEG(TDMT_VND_MSG) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT, "submit", SSubmitReq, SSubmitRsp) - TD_DEF_MSG_TYPE(TDMT_VND_QUERY, "query", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_QUERY_CONTINUE, "query-continue", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_QUERY_HEARTBEAT, "query-heartbeat", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_FETCH, "fetch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_CREATE_TABLE, "create-table", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_TABLE, "alter-table", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_TABLE, "drop-table", NULL, NULL) @@ -184,12 +180,9 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_MQ_VG_CHANGE, "vnode-mq-vg-change", SMqRebVgReq, SMqRebVgRsp) TD_DEF_MSG_TYPE(TDMT_VND_MQ_VG_DELETE, "vnode-mq-vg-delete", SMqVDeleteReq, SMqVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_MQ_COMMIT_OFFSET, "vnode-commit-offset", STqOffset, STqOffset) - TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_TASK, "vnode-cancel-task", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_DROP_TASK, "vnode-drop-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_CREATE_TOPIC, "vnode-create-topic", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_TOPIC, "vnode-alter-topic", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_TOPIC, "vnode-drop-topic", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_EXPLAIN, "vnode-explain", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBSCRIBE, "vnode-subscribe", SMVSubscribeReq, SMVSubscribeRsp) TD_DEF_MSG_TYPE(TDMT_VND_CONSUME, "vnode-consume", SMqPollReq, SMqDataBlkRsp) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) @@ -206,6 +199,15 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_COMPACT, "compact", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_TTL_TABLE, "drop-ttl-stb", NULL, NULL) + TD_NEW_MSG_SEG(TDMT_SCH_MSG) + TD_DEF_MSG_TYPE(TDMT_SCH_QUERY, "query", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_CONTINUE, "query-continue", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_HEARTBEAT, "query-heartbeat", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_FETCH, "fetch", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_CANCEL_TASK, "vnode-cancel-task", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_DROP_TASK, "vnode-drop-task", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_EXPLAIN, "vnode-explain", NULL, NULL) + TD_NEW_MSG_SEG(TDMT_STREAM_MSG) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DEPLOY, "stream-task-deploy", SStreamTaskDeployReq, SStreamTaskDeployRsp) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DROP, "stream-task-drop", NULL, NULL) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 22b491994e..a36c7a0048 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -91,7 +91,7 @@ void closeTransporter(SAppInstInfo *pAppInfo) { static bool clientRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { - if (msgType == TDMT_VND_QUERY || msgType == TDMT_VND_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_FETCH) { return false; } return true; diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 8c63046323..632b8441c4 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -808,7 +808,7 @@ int32_t handleQueryExecRsp(SRequestObj* pRequest) { code = handleSubmitExecRes(pRequest, pRes->res, pCatalog, &epset); break; } - case TDMT_VND_QUERY: { + case TDMT_SCH_QUERY: { code = handleQueryExecRes(pRequest, pRes->res, pCatalog, &epset); break; } diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 3bf6d9b04f..fbf3379490 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -209,10 +209,10 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_SHOW_VARIABLES, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_SERVER_VERSION, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_CONTINUE, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; @@ -220,7 +220,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_SMA_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_MQ_VG_CHANGE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_MQ_VG_DELETE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_TASK, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_DROP_TASK, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c index 864f5b485a..1a7b5c1a1b 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c @@ -107,14 +107,14 @@ SArray *qmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MON_QM_INFO, qmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; // Requests handled by VNODE - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_CONTINUE, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH_RSP, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_CANCEL_TASK, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_TASK, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_CANCEL_TASK, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_DROP_TASK, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 00d83f0ad4..938e5f32b0 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -324,16 +324,16 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MON_VM_LOAD, vmPutMsgToMonitorQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_SUBMIT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_CONTINUE, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_TABLE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_UPDATE_TAG_VAL, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TABLE_META, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TABLE_CFG, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TABLES_META, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_CANCEL_TASK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_TASK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_CANCEL_TASK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_DROP_TASK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_STB, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_TTL_TABLE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_STB, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; @@ -349,7 +349,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_MQ_COMMIT_OFFSET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 7e31cc3144..c1b5b86d4c 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -251,7 +251,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { - if (msgType == TDMT_VND_QUERY || msgType == TDMT_VND_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_FETCH) { return false; } return true; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 136afb714e..0a9c5a82c8 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -527,9 +527,9 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { static int32_t mndCheckMnodeState(SRpcMsg *pMsg) { if (!IsReq(pMsg)) return 0; - if (pMsg->msgType == TDMT_VND_QUERY || pMsg->msgType == TDMT_VND_QUERY_CONTINUE || - pMsg->msgType == TDMT_VND_QUERY_HEARTBEAT || pMsg->msgType == TDMT_VND_FETCH || - pMsg->msgType == TDMT_VND_DROP_TASK) { + if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || + pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT || pMsg->msgType == TDMT_SCH_FETCH || + pMsg->msgType == TDMT_SCH_DROP_TASK) { return 0; } if (mndAcquireRpcRef(pMsg->info.node) == 0) return 0; diff --git a/source/dnode/mnode/impl/src/mndQuery.c b/source/dnode/mnode/impl/src/mndQuery.c index 671152f9c6..b67a11997e 100644 --- a/source/dnode/mnode/impl/src/mndQuery.c +++ b/source/dnode/mnode/impl/src/mndQuery.c @@ -19,13 +19,13 @@ #include "qworker.h" int32_t mndPreProcessQueryMsg(SRpcMsg *pMsg) { - if (TDMT_VND_QUERY != pMsg->msgType) return 0; + if (TDMT_SCH_QUERY != pMsg->msgType) return 0; SMnode *pMnode = pMsg->info.node; return qWorkerPreprocessQueryMsg(pMnode->pQuery, pMsg); } void mndPostProcessQueryMsg(SRpcMsg *pMsg) { - if (TDMT_VND_QUERY != pMsg->msgType) return; + if (TDMT_SCH_QUERY != pMsg->msgType) return; SMnode *pMnode = pMsg->info.node; qWorkerAbortPreprocessQueryMsg(pMnode->pQuery, pMsg); } @@ -37,19 +37,19 @@ int32_t mndProcessQueryMsg(SRpcMsg *pMsg) { mTrace("msg:%p, in query queue is processing", pMsg); switch (pMsg->msgType) { - case TDMT_VND_QUERY: + case TDMT_SCH_QUERY: code = qWorkerProcessQueryMsg(&handle, pMnode->pQuery, pMsg, 0); break; - case TDMT_VND_QUERY_CONTINUE: + case TDMT_SCH_QUERY_CONTINUE: code = qWorkerProcessCQueryMsg(&handle, pMnode->pQuery, pMsg, 0); break; - case TDMT_VND_FETCH: + case TDMT_SCH_FETCH: code = qWorkerProcessFetchMsg(pMnode, pMnode->pQuery, pMsg, 0); break; - case TDMT_VND_DROP_TASK: + case TDMT_SCH_DROP_TASK: code = qWorkerProcessDropMsg(pMnode, pMnode->pQuery, pMsg, 0); break; - case TDMT_VND_QUERY_HEARTBEAT: + case TDMT_SCH_QUERY_HEARTBEAT: code = qWorkerProcessHbMsg(pMnode, pMnode->pQuery, pMsg, 0); break; default: @@ -67,11 +67,11 @@ int32_t mndInitQuery(SMnode *pMnode) { return -1; } - mndSetMsgHandle(pMnode, TDMT_VND_QUERY, mndProcessQueryMsg); - mndSetMsgHandle(pMnode, TDMT_VND_QUERY_CONTINUE, mndProcessQueryMsg); - mndSetMsgHandle(pMnode, TDMT_VND_FETCH, mndProcessQueryMsg); - mndSetMsgHandle(pMnode, TDMT_VND_DROP_TASK, mndProcessQueryMsg); - mndSetMsgHandle(pMnode, TDMT_VND_QUERY_HEARTBEAT, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_QUERY, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_QUERY_CONTINUE, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_FETCH, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_DROP_TASK, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_QUERY_HEARTBEAT, mndProcessQueryMsg); return 0; } diff --git a/source/dnode/qnode/src/qnode.c b/source/dnode/qnode/src/qnode.c index ebaf73a952..a0aa640ed6 100644 --- a/source/dnode/qnode/src/qnode.c +++ b/source/dnode/qnode/src/qnode.c @@ -65,7 +65,7 @@ int32_t qndGetLoad(SQnode *pQnode, SQnodeLoad *pLoad) { } int32_t qndPreprocessQueryMsg(SQnode *pQnode, SRpcMsg * pMsg) { - if (TDMT_VND_QUERY != pMsg->msgType) { + if (TDMT_SCH_QUERY != pMsg->msgType) { return 0; } @@ -78,28 +78,28 @@ int32_t qndProcessQueryMsg(SQnode *pQnode, int64_t ts, SRpcMsg *pMsg) { qTrace("message in qnode queue is processing"); switch (pMsg->msgType) { - case TDMT_VND_QUERY: + case TDMT_SCH_QUERY: code = qWorkerProcessQueryMsg(&handle, pQnode->pQuery, pMsg, ts); break; - case TDMT_VND_QUERY_CONTINUE: + case TDMT_SCH_QUERY_CONTINUE: code = qWorkerProcessCQueryMsg(&handle, pQnode->pQuery, pMsg, ts); break; - case TDMT_VND_FETCH: + case TDMT_SCH_FETCH: code = qWorkerProcessFetchMsg(pQnode, pQnode->pQuery, pMsg, ts); break; case TDMT_VND_FETCH_RSP: code = qWorkerProcessFetchRsp(pQnode, pQnode->pQuery, pMsg, ts); break; - case TDMT_VND_CANCEL_TASK: + case TDMT_SCH_CANCEL_TASK: code = qWorkerProcessCancelMsg(pQnode, pQnode->pQuery, pMsg, ts); break; - case TDMT_VND_DROP_TASK: + case TDMT_SCH_DROP_TASK: code = qWorkerProcessDropMsg(pQnode, pQnode->pQuery, pMsg, ts); break; case TDMT_VND_CONSUME: // code = tqProcessConsumeReq(pQnode->pTq, pMsg); // break; - case TDMT_VND_QUERY_HEARTBEAT: + case TDMT_SCH_QUERY_HEARTBEAT: code = qWorkerProcessHbMsg(pQnode, pQnode->pQuery, pMsg, ts); break; default: diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 72c766e0ae..63b7289661 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -216,7 +216,7 @@ _err: } int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { - if (TDMT_VND_QUERY != pMsg->msgType) { + if (TDMT_SCH_QUERY != pMsg->msgType) { return 0; } @@ -227,9 +227,9 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { vTrace("message in vnode query queue is processing"); SReadHandle handle = {.meta = pVnode->pMeta, .config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; switch (pMsg->msgType) { - case TDMT_VND_QUERY: + case TDMT_SCH_QUERY: return qWorkerProcessQueryMsg(&handle, pVnode->pQuery, pMsg, 0); - case TDMT_VND_QUERY_CONTINUE: + case TDMT_SCH_QUERY_CONTINUE: return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg, 0); default: vError("unknown msg type:%d in query queue", pMsg->msgType); @@ -243,15 +243,15 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); switch (pMsg->msgType) { - case TDMT_VND_FETCH: + case TDMT_SCH_FETCH: return qWorkerProcessFetchMsg(pVnode, pVnode->pQuery, pMsg, 0); case TDMT_VND_FETCH_RSP: return qWorkerProcessFetchRsp(pVnode, pVnode->pQuery, pMsg, 0); - case TDMT_VND_CANCEL_TASK: + case TDMT_SCH_CANCEL_TASK: return qWorkerProcessCancelMsg(pVnode, pVnode->pQuery, pMsg, 0); - case TDMT_VND_DROP_TASK: + case TDMT_SCH_DROP_TASK: return qWorkerProcessDropMsg(pVnode, pVnode->pQuery, pMsg, 0); - case TDMT_VND_QUERY_HEARTBEAT: + case TDMT_SCH_QUERY_HEARTBEAT: return qWorkerProcessHbMsg(pVnode, pVnode->pQuery, pMsg, 0); case TDMT_VND_TABLE_META: return vnodeGetTableMeta(pVnode, pMsg); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 17e5482b60..a4a46c6e63 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2035,7 +2035,7 @@ static int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInf pMsgSendInfo->param = pWrapper; pMsgSendInfo->msgInfo.pData = pMsg; pMsgSendInfo->msgInfo.len = sizeof(SResFetchReq); - pMsgSendInfo->msgType = TDMT_VND_FETCH; + pMsgSendInfo->msgType = TDMT_SCH_FETCH; pMsgSendInfo->fp = loadRemoteDataCallback; int64_t transporterId = 0; diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index 364ee0692f..c6c1f00395 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -549,7 +549,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { - if (msgType == TDMT_VND_QUERY || msgType == TDMT_VND_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_FETCH) { return false; } return true; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 45240536e3..dc149f0224 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -5965,7 +5965,7 @@ static int32_t setQuery(STranslateContext* pCxt, SQuery* pQuery) { case QUERY_NODE_EXPLAIN_STMT: pQuery->execMode = QUERY_EXEC_MODE_SCHEDULE; pQuery->haveResultSet = true; - pQuery->msgType = TDMT_VND_QUERY; + pQuery->msgType = TDMT_SCH_QUERY; break; case QUERY_NODE_DELETE_STMT: pQuery->execMode = QUERY_EXEC_MODE_SCHEDULE; diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 46747af3a9..77df96f82d 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -1555,7 +1555,7 @@ static int32_t createPhysiSubplan(SPhysiPlanContext* pCxt, SLogicSubplan* pLogic if (SUBPLAN_TYPE_MODIFY == pLogicSubplan->subplanType) { code = buildVnodeModifySubplan(pCxt, pLogicSubplan, pSubplan); } else { - pSubplan->msgType = TDMT_VND_QUERY; + pSubplan->msgType = TDMT_SCH_QUERY; code = createPhysiNode(pCxt, pLogicSubplan->pNode, pSubplan, &pSubplan->pNode); if (TSDB_CODE_SUCCESS == code && !pCxt->pPlanCxt->streamQuery && !pCxt->pPlanCxt->topicQuery) { code = createDataDispatcher(pCxt, pSubplan->pNode, &pSubplan->pDataSink); diff --git a/source/libs/qcom/src/queryUtil.c b/source/libs/qcom/src/queryUtil.c index 9ba149a33b..40c5807057 100644 --- a/source/libs/qcom/src/queryUtil.c +++ b/source/libs/qcom/src/queryUtil.c @@ -216,7 +216,7 @@ void destroyQueryExecRes(SQueryExecRes* pRes) { tFreeSSubmitRsp((SSubmitRsp*)pRes->res); break; } - case TDMT_VND_QUERY: { + case TDMT_SCH_QUERY: { taosArrayDestroy((SArray*)pRes->res); break; } diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index 82a62b5c5a..8c1371fce9 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -169,7 +169,7 @@ int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn) { req->taskId = tId; SRpcMsg pNewMsg = { - .msgType = TDMT_VND_QUERY_CONTINUE, + .msgType = TDMT_SCH_QUERY_CONTINUE, .pCont = req, .contLen = sizeof(SQueryContinueReq), .code = 0, @@ -202,7 +202,7 @@ int32_t qwRegisterQueryBrokenLinkArg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn) { req->refId = htobe64(rId); SRpcMsg brokenMsg = { - .msgType = TDMT_VND_DROP_TASK, + .msgType = TDMT_SCH_DROP_TASK, .pCont = req, .contLen = sizeof(STaskDropReq), .code = TSDB_CODE_RPC_NETWORK_UNAVAIL, @@ -236,7 +236,7 @@ int32_t qwRegisterHbBrokenLinkArg(SQWorker *mgmt, uint64_t sId, SRpcHandleInfo * } SRpcMsg brokenMsg = { - .msgType = TDMT_VND_QUERY_HEARTBEAT, + .msgType = TDMT_SCH_QUERY_HEARTBEAT, .pCont = msg, .contLen = msgSize, .code = TSDB_CODE_RPC_NETWORK_UNAVAIL, diff --git a/source/libs/qworker/test/qworkerTests.cpp b/source/libs/qworker/test/qworkerTests.cpp index 5bb6acee60..7c7d262167 100644 --- a/source/libs/qworker/test/qworkerTests.cpp +++ b/source/libs/qworker/test/qworkerTests.cpp @@ -122,7 +122,7 @@ void qwtBuildQueryReqMsg(SRpcMsg *queryRpc) { qwtqueryMsg.taskId = htobe64(1); qwtqueryMsg.phyLen = htonl(100); qwtqueryMsg.sqlLen = 0; - queryRpc->msgType = TDMT_VND_QUERY; + queryRpc->msgType = TDMT_SCH_QUERY; queryRpc->pCont = &qwtqueryMsg; queryRpc->contLen = sizeof(SSubQueryMsg) + 100; } @@ -131,7 +131,7 @@ void qwtBuildFetchReqMsg(SResFetchReq *fetchMsg, SRpcMsg *fetchRpc) { fetchMsg->sId = htobe64(1); fetchMsg->queryId = htobe64(atomic_load_64(&qwtTestQueryId)); fetchMsg->taskId = htobe64(1); - fetchRpc->msgType = TDMT_VND_FETCH; + fetchRpc->msgType = TDMT_SCH_FETCH; fetchRpc->pCont = fetchMsg; fetchRpc->contLen = sizeof(SResFetchReq); } @@ -140,7 +140,7 @@ void qwtBuildDropReqMsg(STaskDropReq *dropMsg, SRpcMsg *dropRpc) { dropMsg->sId = htobe64(1); dropMsg->queryId = htobe64(atomic_load_64(&qwtTestQueryId)); dropMsg->taskId = htobe64(1); - dropRpc->msgType = TDMT_VND_DROP_TASK; + dropRpc->msgType = TDMT_SCH_DROP_TASK; dropRpc->pCont = dropMsg; dropRpc->contLen = sizeof(STaskDropReq); } @@ -756,9 +756,9 @@ void *queryQueueThread(void *param) { } } - if (TDMT_VND_QUERY == queryRpc->msgType) { + if (TDMT_SCH_QUERY == queryRpc->msgType) { qWorkerProcessQueryMsg(mockPointer, mgmt, queryRpc, 0); - } else if (TDMT_VND_QUERY_CONTINUE == queryRpc->msgType) { + } else if (TDMT_SCH_QUERY_CONTINUE == queryRpc->msgType) { qWorkerProcessCQueryMsg(mockPointer, mgmt, queryRpc, 0); } else { printf("unknown msg in query queue, type:%d\n", queryRpc->msgType); @@ -813,13 +813,13 @@ void *fetchQueueThread(void *param) { } switch (fetchRpc->msgType) { - case TDMT_VND_FETCH: + case TDMT_SCH_FETCH: qWorkerProcessFetchMsg(mockPointer, mgmt, fetchRpc, 0); break; - case TDMT_VND_CANCEL_TASK: + case TDMT_SCH_CANCEL_TASK: qWorkerProcessCancelMsg(mockPointer, mgmt, fetchRpc, 0); break; - case TDMT_VND_DROP_TASK: + case TDMT_SCH_DROP_TASK: qWorkerProcessDropMsg(mockPointer, mgmt, fetchRpc, 0); break; default: diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 89f355d78c..54d0d2f2e5 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -1183,7 +1183,7 @@ int32_t schFetchFromRemote(SSchJob *pJob) { return TSDB_CODE_SUCCESS; } - SCH_ERR_JRET(schBuildAndSendMsg(pJob, pJob->fetchTask, &pJob->resNode, TDMT_VND_FETCH)); + SCH_ERR_JRET(schBuildAndSendMsg(pJob, pJob->fetchTask, &pJob->resNode, TDMT_SCH_FETCH)); return TSDB_CODE_SUCCESS; @@ -1222,7 +1222,7 @@ void schDropTaskOnExecNode(SSchJob *pJob, SSchTask *pTask) { while (nodeInfo) { SCH_SET_TASK_HANDLE(pTask, nodeInfo->handle); - schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_VND_DROP_TASK); + schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_SCH_DROP_TASK); nodeInfo = taosHashIterate(pTask->execNodes, nodeInfo); } @@ -1307,7 +1307,7 @@ int32_t schSaveJobQueryRes(SSchJob *pJob, SQueryTableRsp *rsp) { tbInfo.tversion = rsp->tversion; taosArrayPush((SArray *)pJob->execRes.res, &tbInfo); - pJob->execRes.msgType = TDMT_VND_QUERY; + pJob->execRes.msgType = TDMT_SCH_QUERY; } return TSDB_CODE_SUCCESS; diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index 0bd747785c..a1691a3dd3 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -568,22 +568,22 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { case TDMT_VND_SUBMIT: *fp = schHandleSubmitCallback; break; - case TDMT_VND_QUERY: + case TDMT_SCH_QUERY: *fp = schHandleQueryCallback; break; case TDMT_VND_DELETE: *fp = schHandleDeleteCallback; break; - case TDMT_VND_EXPLAIN: + case TDMT_SCH_EXPLAIN: *fp = schHandleExplainCallback; break; - case TDMT_VND_FETCH: + case TDMT_SCH_FETCH: *fp = schHandleFetchCallback; break; - case TDMT_VND_DROP_TASK: + case TDMT_SCH_DROP_TASK: *fp = schHandleDropCallback; break; - case TDMT_VND_QUERY_HEARTBEAT: + case TDMT_SCH_QUERY_HEARTBEAT: *fp = schHandleHbCallback; break; case TDMT_SCH_LINK_BROKEN: @@ -694,7 +694,7 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { int32_t msgType = TDMT_VND_QUERY_HEARTBEAT_RSP; __async_send_cb_fn_t fp = NULL; - SCH_ERR_JRET(schGetCallbackFp(TDMT_VND_QUERY_HEARTBEAT, &fp)); + SCH_ERR_JRET(schGetCallbackFp(TDMT_SCH_QUERY_HEARTBEAT, &fp)); param->nodeEpId = epId; param->pTrans = pJob->conn.pTrans; @@ -784,7 +784,7 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { } SSchTrans trans = {.pTrans = pJob->conn.pTrans, .pHandle = SCH_GET_TASK_HANDLE(pTask)}; - SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, NULL, 0, TDMT_VND_EXPLAIN, &trans, false, &pExplainMsgSendInfo)); + SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, NULL, 0, TDMT_SCH_EXPLAIN, &trans, false, &pExplainMsgSendInfo)); int32_t msgType = TDMT_VND_EXPLAIN_RSP; SRpcCtxVal ctxVal = {.val = pExplainMsgSendInfo, .clone = schCloneSMsgSendInfo}; @@ -882,7 +882,7 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, SSchTrans *trans, SQuery SEpSet *epSet = &addr->epSet; SMsgSendInfo *pMsgSendInfo = NULL; - bool isHb = (TDMT_VND_QUERY_HEARTBEAT == msgType); + bool isHb = (TDMT_SCH_QUERY_HEARTBEAT == msgType); SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, msg, msgSize, msgType, trans, isHb, &pMsgSendInfo)); SCH_ERR_JRET(schUpdateSendTargetInfo(pMsgSendInfo, addr, pTask)); @@ -926,7 +926,7 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId, SArray* taskAction) { int32_t code = 0; SRpcCtx rpcCtx = {0}; SSchTrans trans = {0}; - int32_t msgType = TDMT_VND_QUERY_HEARTBEAT; + int32_t msgType = TDMT_SCH_QUERY_HEARTBEAT; req.header.vgId = nodeEpId->nodeId; req.sId = schMgmt.sId; @@ -1032,7 +1032,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, tSerializeSVDeleteReq(msg, msgSize, &req); break; } - case TDMT_VND_QUERY: { + case TDMT_SCH_QUERY: { SCH_ERR_RET(schMakeQueryRpcCtx(pJob, pTask, &rpcCtx)); uint32_t len = strlen(pJob->sql); @@ -1060,7 +1060,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, persistHandle = true; break; } - case TDMT_VND_FETCH: { + case TDMT_SCH_FETCH: { msgSize = sizeof(SResFetchReq); msg = taosMemoryCalloc(1, msgSize); if (NULL == msg) { @@ -1078,7 +1078,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, break; } - case TDMT_VND_DROP_TASK: { + case TDMT_SCH_DROP_TASK: { msgSize = sizeof(STaskDropReq); msg = taosMemoryCalloc(1, msgSize); if (NULL == msg) { @@ -1096,7 +1096,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, pMsg->refId = htobe64(pJob->refId); break; } - case TDMT_VND_QUERY_HEARTBEAT: { + case TDMT_SCH_QUERY_HEARTBEAT: { SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &rpcCtx)); SSchedulerHbReq req = {0}; @@ -1135,7 +1135,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, addr, msgType, msg, msgSize, persistHandle, (rpcCtx.args ? &rpcCtx : NULL))); - if (msgType == TDMT_VND_QUERY) { + if (msgType == TDMT_SCH_QUERY) { SCH_ERR_RET(schAppendTaskExecNode(pJob, pTask, addr, pTask->execIdx)); } diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index e5cc3cd481..43822e4f5b 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -118,7 +118,7 @@ void schtBuildQueryDag(SQueryPlan *dag) { scanPlan->level = 1; scanPlan->pParents = nodesMakeList(); scanPlan->pNode = (SPhysiNode*)taosMemoryCalloc(1, sizeof(SPhysiNode)); - scanPlan->msgType = TDMT_VND_QUERY; + scanPlan->msgType = TDMT_SCH_QUERY; mergePlan->id.queryId = qId; mergePlan->id.groupId = schtMergeTemplateId; @@ -130,7 +130,7 @@ void schtBuildQueryDag(SQueryPlan *dag) { mergePlan->pChildren = nodesMakeList(); mergePlan->pParents = NULL; mergePlan->pNode = (SPhysiNode*)taosMemoryCalloc(1, sizeof(SPhysiNode)); - mergePlan->msgType = TDMT_VND_QUERY; + mergePlan->msgType = TDMT_SCH_QUERY; merge->pNodeList = nodesMakeList(); scan->pNodeList = nodesMakeList(); @@ -181,7 +181,7 @@ void schtBuildQueryFlowCtrlDag(SQueryPlan *dag) { scanPlan[i].level = 1; scanPlan[i].pParents = nodesMakeList(); scanPlan[i].pNode = (SPhysiNode*)taosMemoryCalloc(1, sizeof(SPhysiNode)); - scanPlan[i].msgType = TDMT_VND_QUERY; + scanPlan[i].msgType = TDMT_SCH_QUERY; nodesListAppend(scanPlan[i].pParents, (SNode*)mergePlan); nodesListAppend(mergePlan->pChildren, (SNode*)(scanPlan + i)); @@ -198,7 +198,7 @@ void schtBuildQueryFlowCtrlDag(SQueryPlan *dag) { mergePlan->pParents = NULL; mergePlan->pNode = (SPhysiNode*)taosMemoryCalloc(1, sizeof(SPhysiNode)); - mergePlan->msgType = TDMT_VND_QUERY; + mergePlan->msgType = TDMT_SCH_QUERY; nodesListAppend(merge->pNodeList, (SNode*)mergePlan); @@ -896,7 +896,7 @@ TEST(queryTest, flowCtrlCase) { taosHashCancelIterate(pJob->execTasks, pIter); - if (task->lastMsgType == TDMT_VND_QUERY) { + if (task->lastMsgType == TDMT_SCH_QUERY) { SQueryTableRsp rsp = {0}; code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); From 6d8fd7e50607fc192b39c6407aa9596a1543c0c2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 29 Jun 2022 10:51:22 +0800 Subject: [PATCH 02/14] feat: query redirect --- include/common/tmsgdef.h | 7 +- include/libs/qcom/query.h | 16 ++- source/client/src/clientEnv.c | 5 +- source/client/src/clientImpl.c | 5 +- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 1 + source/dnode/mgmt/mgmt_qnode/src/qmHandle.c | 1 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/dnode/mnode/impl/src/mndMain.c | 6 +- source/dnode/mnode/impl/src/mndQuery.c | 6 +- source/dnode/qnode/src/qnode.c | 3 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 3 +- source/libs/function/src/udfd.c | 2 +- source/libs/planner/src/planPhysiCreater.c | 6 +- source/libs/qcom/src/queryUtil.c | 3 +- source/libs/scheduler/inc/schedulerInt.h | 1 + source/libs/scheduler/src/schJob.c | 107 +++++++++++++++++- source/libs/scheduler/src/schRemote.c | 31 +++-- 18 files changed, 170 insertions(+), 36 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index cbf0c4b2fe..df14b0b34e 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -201,12 +201,13 @@ enum { TD_NEW_MSG_SEG(TDMT_SCH_MSG) TD_DEF_MSG_TYPE(TDMT_SCH_QUERY, "query", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_MERGE_QUERY, "merge-query", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_CONTINUE, "query-continue", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_HEARTBEAT, "query-heartbeat", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_FETCH, "fetch", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_SCH_CANCEL_TASK, "vnode-cancel-task", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_SCH_DROP_TASK, "vnode-drop-task", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_SCH_EXPLAIN, "vnode-explain", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_CANCEL_TASK, "cancel-task", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_DROP_TASK, "drop-task", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_EXPLAIN, "explain", NULL, NULL) TD_NEW_MSG_SEG(TDMT_STREAM_MSG) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DEPLOY, "stream-task-deploy", SStreamTaskDeployReq, SStreamTaskDeployRsp) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 0b767e96f6..3681c2f071 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -138,6 +138,7 @@ typedef struct SDataBuf { void* pData; uint32_t len; void* handle; + SEpSet* pEpSet; } SDataBuf; typedef struct STargetInfo { @@ -234,13 +235,24 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t #define NEED_CLIENT_HANDLE_ERROR(_code) \ (NEED_CLIENT_RM_TBLMETA_ERROR(_code) || NEED_CLIENT_REFRESH_VG_ERROR(_code) || \ NEED_CLIENT_REFRESH_TBLMETA_ERROR(_code)) +#define NEED_REDIRECT_ERROR(_code) \ + ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ + (_code) == TSDB_CODE_NODE_NOT_DEPLOYED || (_code) == TSDB_CODE_SYN_NOT_LEADER || \ + (_code) == TSDB_CODE_APP_NOT_READY) + #define NEED_CLIENT_RM_TBLMETA_REQ(_type) \ ((_type) == TDMT_VND_CREATE_TABLE || (_type) == TDMT_VND_CREATE_STB || (_type) == TDMT_VND_DROP_TABLE || \ (_type) == TDMT_VND_DROP_STB) +#define NEED_SCHEDULER_REDIRECT_ERROR(_code) \ + ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_NODE_NOT_DEPLOYED || \ + (_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_APP_NOT_READY) + #define NEED_SCHEDULER_RETRY_ERROR(_code) \ - ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ - (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) + (NEED_SCHEDULER_REDIRECT_ERROR(_code) || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) + + + #define REQUEST_TOTAL_EXEC_TIMES 2 diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index a36c7a0048..4cc79096d1 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -89,9 +89,8 @@ void closeTransporter(SAppInstInfo *pAppInfo) { } static bool clientRpcRfp(int32_t code, tmsg_t msgType) { - if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || - code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { - if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_FETCH) { + if (NEED_REDIRECT_ERROR(code)) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { return false; } return true; diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 632b8441c4..68a225f64c 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -808,7 +808,8 @@ int32_t handleQueryExecRsp(SRequestObj* pRequest) { code = handleSubmitExecRes(pRequest, pRes->res, pCatalog, &epset); break; } - case TDMT_SCH_QUERY: { + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: { code = handleQueryExecRes(pRequest, pRes->res, pCatalog, &epset); break; } @@ -1306,7 +1307,7 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { updateTargetEpSet(pSendInfo, pTscObj, pMsg, pEpSet); - SDataBuf buf = {.len = pMsg->contLen, .pData = NULL, .handle = pMsg->info.handle}; + SDataBuf buf = {.len = pMsg->contLen, .pData = NULL, .handle = pMsg->info.handle, .pEpSet = pEpSet}; if (pMsg->contLen > 0) { buf.pData = taosMemoryCalloc(1, pMsg->contLen); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index fbf3379490..59d68b2110 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -210,6 +210,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_SERVER_VERSION, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c index 1a7b5c1a1b..5911daaa2b 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c @@ -108,6 +108,7 @@ SArray *qmGetMsgHandles() { // Requests handled by VNODE if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH_RSP, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 938e5f32b0..5bc9582527 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -325,6 +325,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_SUBMIT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_TABLE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index c1b5b86d4c..75e69e3716 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -251,7 +251,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { - if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { return false; } return true; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 0a9c5a82c8..0f353b20a3 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -527,9 +527,9 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { static int32_t mndCheckMnodeState(SRpcMsg *pMsg) { if (!IsReq(pMsg)) return 0; - if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || - pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT || pMsg->msgType == TDMT_SCH_FETCH || - pMsg->msgType == TDMT_SCH_DROP_TASK) { + if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY || + pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT || + pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK) { return 0; } if (mndAcquireRpcRef(pMsg->info.node) == 0) return 0; diff --git a/source/dnode/mnode/impl/src/mndQuery.c b/source/dnode/mnode/impl/src/mndQuery.c index b67a11997e..aec99fa3b7 100644 --- a/source/dnode/mnode/impl/src/mndQuery.c +++ b/source/dnode/mnode/impl/src/mndQuery.c @@ -19,13 +19,13 @@ #include "qworker.h" int32_t mndPreProcessQueryMsg(SRpcMsg *pMsg) { - if (TDMT_SCH_QUERY != pMsg->msgType) return 0; + if (TDMT_SCH_QUERY != pMsg->msgType && TDMT_SCH_MERGE_QUERY != pMsg->msgType) return 0; SMnode *pMnode = pMsg->info.node; return qWorkerPreprocessQueryMsg(pMnode->pQuery, pMsg); } void mndPostProcessQueryMsg(SRpcMsg *pMsg) { - if (TDMT_SCH_QUERY != pMsg->msgType) return; + if (TDMT_SCH_QUERY != pMsg->msgType && TDMT_SCH_MERGE_QUERY != pMsg->msgType) return; SMnode *pMnode = pMsg->info.node; qWorkerAbortPreprocessQueryMsg(pMnode->pQuery, pMsg); } @@ -38,6 +38,7 @@ int32_t mndProcessQueryMsg(SRpcMsg *pMsg) { mTrace("msg:%p, in query queue is processing", pMsg); switch (pMsg->msgType) { case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: code = qWorkerProcessQueryMsg(&handle, pMnode->pQuery, pMsg, 0); break; case TDMT_SCH_QUERY_CONTINUE: @@ -68,6 +69,7 @@ int32_t mndInitQuery(SMnode *pMnode) { } mndSetMsgHandle(pMnode, TDMT_SCH_QUERY, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_MERGE_QUERY, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_QUERY_CONTINUE, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_FETCH, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_DROP_TASK, mndProcessQueryMsg); diff --git a/source/dnode/qnode/src/qnode.c b/source/dnode/qnode/src/qnode.c index a0aa640ed6..5909d6f599 100644 --- a/source/dnode/qnode/src/qnode.c +++ b/source/dnode/qnode/src/qnode.c @@ -65,7 +65,7 @@ int32_t qndGetLoad(SQnode *pQnode, SQnodeLoad *pLoad) { } int32_t qndPreprocessQueryMsg(SQnode *pQnode, SRpcMsg * pMsg) { - if (TDMT_SCH_QUERY != pMsg->msgType) { + if (TDMT_SCH_QUERY != pMsg->msgType && TDMT_SCH_MERGE_QUERY != pMsg->msgType) { return 0; } @@ -79,6 +79,7 @@ int32_t qndProcessQueryMsg(SQnode *pQnode, int64_t ts, SRpcMsg *pMsg) { switch (pMsg->msgType) { case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: code = qWorkerProcessQueryMsg(&handle, pQnode->pQuery, pMsg, ts); break; case TDMT_SCH_QUERY_CONTINUE: diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 63b7289661..64fa6c705d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -216,7 +216,7 @@ _err: } int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { - if (TDMT_SCH_QUERY != pMsg->msgType) { + if (TDMT_SCH_QUERY != pMsg->msgType && TDMT_SCH_MERGE_QUERY != pMsg->msgType) { return 0; } @@ -228,6 +228,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { SReadHandle handle = {.meta = pVnode->pMeta, .config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; switch (pMsg->msgType) { case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: return qWorkerProcessQueryMsg(&handle, pVnode->pQuery, pMsg, 0); case TDMT_SCH_QUERY_CONTINUE: return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg, 0); diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index c6c1f00395..4aad544e19 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -549,7 +549,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { - if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { return false; } return true; diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 77df96f82d..cbb12fee0e 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -1555,7 +1555,11 @@ static int32_t createPhysiSubplan(SPhysiPlanContext* pCxt, SLogicSubplan* pLogic if (SUBPLAN_TYPE_MODIFY == pLogicSubplan->subplanType) { code = buildVnodeModifySubplan(pCxt, pLogicSubplan, pSubplan); } else { - pSubplan->msgType = TDMT_SCH_QUERY; + if (SUBPLAN_TYPE_SCAN == pSubplan->subplanType) { + pSubplan->msgType = TDMT_SCH_QUERY; + } else { + pSubplan->msgType = TDMT_SCH_MERGE_QUERY; + } code = createPhysiNode(pCxt, pLogicSubplan->pNode, pSubplan, &pSubplan->pNode); if (TSDB_CODE_SUCCESS == code && !pCxt->pPlanCxt->streamQuery && !pCxt->pPlanCxt->topicQuery) { code = createDataDispatcher(pCxt, pSubplan->pNode, &pSubplan->pDataSink); diff --git a/source/libs/qcom/src/queryUtil.c b/source/libs/qcom/src/queryUtil.c index 40c5807057..69d48b2179 100644 --- a/source/libs/qcom/src/queryUtil.c +++ b/source/libs/qcom/src/queryUtil.c @@ -216,7 +216,8 @@ void destroyQueryExecRes(SQueryExecRes* pRes) { tFreeSSubmitRsp((SSubmitRsp*)pRes->res); break; } - case TDMT_SCH_QUERY: { + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: { taosArrayDestroy((SArray*)pRes->res); break; } diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index a119795787..4e36bd3f10 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -383,6 +383,7 @@ char* schGetOpStr(SCH_OP_TYPE type); int32_t schBeginOperation(SSchJob *pJob, SCH_OP_TYPE type, bool sync); int32_t schInitJob(SSchedulerReq *pReq, SSchJob **pSchJob); int32_t schSetJobQueryRes(SSchJob* pJob, SQueryResult* pRes); +int32_t schUpdateTaskCandidateAddr(SSchTask *pTask, SEpSet* pEpSet); #ifdef __cplusplus diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 54d0d2f2e5..fd27193b0f 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -682,6 +682,25 @@ int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) { return TSDB_CODE_SUCCESS; } +int32_t schUpdateTaskCandidateAddr(SSchTask *pTask, SEpSet* pEpSet) { + if (NULL == pTask->candidateAddrs || 1 != taosArrayGetSize(pTask->candidateAddrs)) { + SCH_TASK_ELOG("not able to update cndidate addr, addr num %d", (pTask->candidateAddrs ? taosArrayGetSize(pTask->candidateAddrs): 0)); + SCH_ERR_RET(TSDB_CODE_APP_ERROR); + } + + SQueryNodeAddr* pAddr = taosArrayGet(pTask->candidateAddrs, 0); + + SEp* pOld = &pAddr->epSet.eps[pAddr->epSet.inUse]; + SEp* pNew = &pEpSet->eps[pEpSet->inUse]; + + SCH_TASK_DLOG("update task ep from %s:%d to %s:%d", pOld->fqdn, pOld->port, pNew->fqdn, pNew->port); + + memcpy(&pAddr->epSet, pEpSet, sizeof(pAddr->epSet)); + + return TSDB_CODE_SUCCESS; +} + + int32_t schRemoveTaskFromExecList(SSchJob *pJob, SSchTask *pTask) { int32_t code = taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId)); if (code) { @@ -821,7 +840,6 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo return TSDB_CODE_SUCCESS; } - // TODO CHECK epList/condidateList if (SCH_IS_DATA_SRC_TASK(pTask)) { if ((pTask->execIdx + 1) >= SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)) { *needRetry = false; @@ -853,7 +871,6 @@ int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) { SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { - SCH_ERR_RET(schDecTaskFlowQuota(pJob, pTask)); SCH_ERR_RET(schLaunchTasksInFlowCtrlList(pJob, pTask)); } @@ -1237,7 +1254,8 @@ int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) { } SCH_LOCK_TASK(pTask); - if (JOB_TASK_STATUS_EXECUTING == pTask->status && pJob->fetchTask != pTask && taosArrayGetSize(pTask->candidateAddrs) > 1) { + if (SCH_TASK_TIMEOUT(pTask) && JOB_TASK_STATUS_EXECUTING == pTask->status && + pJob->fetchTask != pTask && taosArrayGetSize(pTask->candidateAddrs) > 1) { SCH_TASK_DLOG("task execIdx %d will be rescheduled now", pTask->execIdx); schDropTaskOnExecNode(pJob, pTask); taosHashClear(pTask->execNodes); @@ -1281,7 +1299,7 @@ int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) { continue; } - if (taskStatus->status == JOB_TASK_STATUS_NOT_START && SCH_TASK_TIMEOUT(pTask)) { + if (taskStatus->status == JOB_TASK_STATUS_NOT_START) { schRescheduleTask(pJob, pTask); } @@ -1657,5 +1675,86 @@ _return: SCH_RET(code); } +int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { + int32_t code = 0; + + if ((pTask->execIdx + 1) >= pTask->maxExecTimes) { + SCH_TASK_DLOG("task no more retry since reach max try times, execIdx:%d", pTask->execIdx); + SCH_UNLOCK_TASK(pTask); + schProcessOnJobFailure(pJob, rspCode); + return TSDB_CODE_SUCCESS; + } + + SCH_TASK_DLOG("task will be redirected now, status:%d", SCH_GET_TASK_STATUS_STR(pTask)); + + schDropTaskOnExecNode(pJob, pTask); + taosHashClear(pTask->execNodes); + SCH_ERR_JRET(schRemoveTaskFromExecList(pJob, pTask)); + schDeregisterTaskHb(pJob, pTask); + atomic_sub_fetch_32(&pTask->level->taskLaunchedNum, 1); + taosMemoryFreeClear(pTask->msg); + pTask->msgLen = 0; + pTask->lastMsgType = 0; + memset(&pTask->succeedAddr, 0, sizeof(pTask->succeedAddr)); + + if (SCH_IS_DATA_SRC_QRY_TASK(pTask)) { + if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { + if (JOB_TASK_STATUS_EXECUTING == SCH_GET_TASK_STATUS(pTask)) { + SCH_ERR_JRET(schLaunchTasksInFlowCtrlList(pJob, pTask)); + } + } + } else { + pTask->childReady = 0; + + int32_t childrenNum = taosArrayGetSize(pTask->children); + for (int32_t i = 0; i < childrenNum; ++i) { + SSchTask* pChild = taosArrayGetP(pTask->children, i); + SCH_LOCK_TASK(pChild); + code = schDoTaskRedirect(pJob, pChild, rspCode); + SCH_UNLOCK_TASK(pChild); + SCH_ERR_JRET(code); + } + + qClearSubplanExecutionNode(pTask->plan); + } + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); + + SCH_ERR_JRET(schLaunchTask(pJob, pTask)); + + SCH_UNLOCK_TASK(pTask); + + return TSDB_CODE_SUCCESS; + +_return: + + code = schProcessOnTaskFailure(pJob, pTask, code); + + SCH_UNLOCK_TASK(pTask); + + SCH_RET(code); +} + +int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, int32_t msgType, SDataBuf* pData, int32_t rspCode) { + int32_t code = 0; + + if (SCH_IS_DATA_SRC_QRY_TASK(pTask)) { + if (NULL == pData->pEpSet) { + SCH_TASK_ELOG("no epset updated while got error %s", tstrerror(rspCode)); + SCH_ERR_JRET(rspCode); + } + + SCH_ERR_JRET(schUpdateTaskCandidateAddr(pTask, pData->pEpSet)); + } + + schDoTaskRedirect(pJob, pTask, rspCode); + +_return: + + schProcessOnTaskFailure(pJob, pTask, code); + + SCH_RET(code); +} + diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index a1691a3dd3..76d2061936 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -90,15 +90,6 @@ int32_t schValidateReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgTy int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, char *msg, int32_t msgSize, int32_t rspCode) { int32_t code = 0; - int8_t status = 0; - - if (schJobNeedToStop(pJob, &status)) { - SCH_TASK_ELOG("rsp not processed cause of job status, job status:%s, rspCode:0x%x", jobTaskStatusStr(status), rspCode); - taosMemoryFreeClear(msg); - SCH_RET(atomic_load_32(&pJob->errCode)); - } - - SCH_ERR_JRET(schValidateReceivedMsgType(pJob, pTask, msgType)); switch (msgType) { case TDMT_VND_CREATE_TABLE_RSP: { @@ -392,8 +383,23 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in bool dropExecNode = (msgType == TDMT_SCH_LINK_BROKEN || rspCode == TSDB_CODE_RPC_NETWORK_UNAVAIL); SCH_ERR_JRET(schUpdateTaskHandle(pJob, pTask, dropExecNode, pMsg->handle, pParam->execIdx)); + + int8_t status = 0; + if (schJobNeedToStop(pJob, &status)) { + SCH_TASK_ELOG("rsp will not be processed cause of job status %s, rspCode:0x%x", jobTaskStatusStr(status), rspCode); + code = atomic_load_32(&pJob->errCode); + goto _return; + } + + SCH_ERR_JRET(schValidateReceivedMsgType(pJob, pTask, msgType)); + + if (NEED_SCHEDULER_REDIRECT_ERROR(rspCode) || ((rspCode == TSDB_CODE_RPC_NETWORK_UNAVAIL) && msgSize > 0)) { + code = schHandleRedirect(pJob, pTask, msgType, pMsg, rspCode); + goto _return; + } SCH_ERR_JRET(schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode)); + pMsg->pData = NULL; _return: @@ -405,6 +411,7 @@ _return: schReleaseJob(pParam->refId); } + taosMemoryFreeClear(pMsg->pData); taosMemoryFreeClear(param); SCH_RET(code); } @@ -569,6 +576,7 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { *fp = schHandleSubmitCallback; break; case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: *fp = schHandleQueryCallback; break; case TDMT_VND_DELETE: @@ -1032,7 +1040,8 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, tSerializeSVDeleteReq(msg, msgSize, &req); break; } - case TDMT_SCH_QUERY: { + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: { SCH_ERR_RET(schMakeQueryRpcCtx(pJob, pTask, &rpcCtx)); uint32_t len = strlen(pJob->sql); @@ -1135,7 +1144,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, addr, msgType, msg, msgSize, persistHandle, (rpcCtx.args ? &rpcCtx : NULL))); - if (msgType == TDMT_SCH_QUERY) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY) { SCH_ERR_RET(schAppendTaskExecNode(pJob, pTask, addr, pTask->execIdx)); } From 5df4cd9054eeb3e17e383696ca0500ca1d69be88 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 29 Jun 2022 11:27:01 +0800 Subject: [PATCH 03/14] feat: query redirect --- include/libs/planner/planner.h | 2 +- source/libs/planner/src/planner.c | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index 727cdd8ad6..2ab703df95 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -48,7 +48,7 @@ int32_t qCreateQueryPlan(SPlanContext* pCxt, SQueryPlan** pPlan, SArray* pExecNo // @pSource one execution location of this group of datasource subplans int32_t qSetSubplanExecutionNode(SSubplan* pSubplan, int32_t groupId, SDownstreamSourceNode* pSource); -int32_t qClearSubplanExecutionNode(SSubplan* pSubplan, int32_t groupId); +int32_t qClearSubplanExecutionNode(SSubplan* pSubplan); // Convert to subplan to string for the scheduler to send to the executor int32_t qSubPlanToString(const SSubplan* pSubplan, char** pStr, int32_t* pLen); diff --git a/source/libs/planner/src/planner.c b/source/libs/planner/src/planner.c index 1b9d16311c..b1268358fb 100644 --- a/source/libs/planner/src/planner.c +++ b/source/libs/planner/src/planner.c @@ -85,11 +85,24 @@ int32_t qSetSubplanExecutionNode(SSubplan* subplan, int32_t groupId, SDownstream return setSubplanExecutionNode(subplan->pNode, groupId, pSource); } -int32_t qClearSubplanExecutionNode(SSubplan* pSubplan, int32_t groupId) { - // todo - return TSDB_CODE_FAILED; +static void clearSubplanExecutionNode(SPhysiNode* pNode) { + if (QUERY_NODE_PHYSICAL_PLAN_EXCHANGE == nodeType(pNode)) { + SExchangePhysiNode* pExchange = (SExchangePhysiNode*)pNode; + NODES_DESTORY_LIST(pExchange->pSrcEndPoints); + } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE == nodeType(pNode)) { + SMergePhysiNode* pMerge = (SMergePhysiNode*)pNode; + pMerge->numOfChannels = LIST_LENGTH(pMerge->node.pChildren); + SNode* pChild = NULL; + FOREACH(pChild, pMerge->node.pChildren) { NODES_DESTORY_LIST(((SExchangePhysiNode*)pChild)->pSrcEndPoints); } + } + + SNode* pChild = NULL; + FOREACH(pChild, pNode->pChildren) { clearSubplanExecutionNode((SPhysiNode*)pChild); } } +void qClearSubplanExecutionNode(SSubplan* pSubplan) { clearSubplanExecutionNode(pSubplan->pNode); } + + int32_t qSubPlanToString(const SSubplan* pSubplan, char** pStr, int32_t* pLen) { if (SUBPLAN_TYPE_MODIFY == pSubplan->subplanType && NULL == pSubplan->pNode) { SDataInserterNode* insert = (SDataInserterNode*)pSubplan->pDataSink; From c6440a7a3c08496f555cc81a7ba9c1570ece8dc2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 29 Jun 2022 11:33:37 +0800 Subject: [PATCH 04/14] feat: query redirect --- include/common/tmsgdef.h | 4 +--- source/libs/transport/src/transSvr.c | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index df14b0b34e..008bdcc294 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -208,6 +208,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_SCH_CANCEL_TASK, "cancel-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_DROP_TASK, "drop-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_EXPLAIN, "explain", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_LINK_BROKEN, "link-broken", NULL, NULL) TD_NEW_MSG_SEG(TDMT_STREAM_MSG) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DEPLOY, "stream-task-deploy", SStreamTaskDeployReq, SStreamTaskDeployRsp) @@ -217,9 +218,6 @@ enum { TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RECOVER, "stream-task-recover", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_SCH_MSG) - TD_DEF_MSG_TYPE(TDMT_SCH_LINK_BROKEN, "link-broken", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_MON_MSG) TD_DEF_MSG_TYPE(TDMT_MON_MM_INFO, "monitor-minfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MON_VM_INFO, "monitor-vinfo", NULL, NULL) diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 9809f3d564..f53507c8aa 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -1042,7 +1042,7 @@ void transReleaseSrvHandle(void* handle) { m->type = Release; tTrace("%s conn %p start to release", transLabel(pThrd->pTransInst), exh->handle); - transSendAsync(pThrd->asyncPool, &m->q); + transAsyncSend(pThrd->asyncPool, &m->q); transReleaseExHandle(refId); return; _return1: @@ -1071,7 +1071,7 @@ void transSendResponse(const STransMsg* msg) { STraceId* trace = (STraceId*)&msg->info.traceId; tGTrace("conn %p start to send resp (1/2)", exh->handle); - transSendAsync(pThrd->asyncPool, &m->q); + transAsyncSend(pThrd->asyncPool, &m->q); transReleaseExHandle(refId); return; _return1: @@ -1100,7 +1100,7 @@ void transRegisterMsg(const STransMsg* msg) { m->type = Register; tTrace("%s conn %p start to register brokenlink callback", transLabel(pThrd->pTransInst), exh->handle); - transSendAsync(pThrd->asyncPool, &m->q); + transAsyncSend(pThrd->asyncPool, &m->q); transReleaseExHandle(refId); return; From 3501475f602d3c54acdb2f3b5217c8d7d51df315 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 29 Jun 2022 16:11:41 +0800 Subject: [PATCH 05/14] enh: add new code --- include/util/taoserror.h | 1 + source/util/src/terror.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 08e09dca2f..db6f1c0e1e 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -86,6 +86,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_NETWORK_UNAVAIL TAOS_DEF_ERROR_CODE(0, 0x0102) #define TSDB_CODE_RPC_FQDN_ERROR TAOS_DEF_ERROR_CODE(0, 0x0103) #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0104) +#define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0105) //client #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 96f0fb21ca..3ab307dc3f 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -89,8 +89,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_REF_NOT_EXIST, "Ref is not there") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_REDIRECT, "Redirect") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_AUTH_FAILURE, "Authentication failure") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_NETWORK_UNAVAIL, "Unable to establish connection") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQDN") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_BROKEN_LINK, "Conn is broken") //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_OPERATION, "Invalid operation") From 86e379f081a59925c3798851d32d0940b3ed73f2 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 29 Jun 2022 17:11:30 +0800 Subject: [PATCH 06/14] feat: add retry --- source/libs/transport/src/transCli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 587e2bad98..e21237ec10 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -399,7 +399,7 @@ void cliHandleExcept(SCliConn* pConn) { STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; STransMsg transMsg = {0}; - transMsg.code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + transMsg.code = TSDB_CODE_RPC_BROKEN_LINK; transMsg.msgType = pMsg ? pMsg->msg.msgType + 1 : 0; transMsg.info.ahandle = NULL; From 914e06e8cb44d1f9139d6eb488571e6736f4e50b Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 29 Jun 2022 17:15:08 +0800 Subject: [PATCH 07/14] feat: query redirect --- include/libs/planner/planner.h | 2 +- include/libs/qcom/query.h | 3 +- source/client/src/clientHb.c | 2 +- source/client/src/clientImpl.c | 2 +- source/client/src/clientMsgHandler.c | 16 ++-- source/client/src/tmq.c | 8 +- source/dnode/mgmt/mgmt_qnode/src/qmHandle.c | 2 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/dnode/qnode/src/qnode.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- source/libs/catalog/src/ctgRemote.c | 2 +- source/libs/executor/src/executorimpl.c | 2 +- source/libs/executor/src/scanoperator.c | 2 +- source/libs/qworker/inc/qwInt.h | 3 + source/libs/qworker/inc/qwMsg.h | 2 +- source/libs/qworker/src/qwMsg.c | 18 ++-- source/libs/qworker/src/qworker.c | 17 ++-- source/libs/qworker/test/qworkerTests.cpp | 7 +- source/libs/scheduler/inc/schedulerInt.h | 5 +- source/libs/scheduler/src/schJob.c | 10 +-- source/libs/scheduler/src/schRemote.c | 85 +++++-------------- source/libs/scheduler/test/schedulerTests.cpp | 18 ++-- source/libs/transport/src/transSvr.c | 2 + 23 files changed, 87 insertions(+), 127 deletions(-) diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index 2ab703df95..b4c75da131 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -48,7 +48,7 @@ int32_t qCreateQueryPlan(SPlanContext* pCxt, SQueryPlan** pPlan, SArray* pExecNo // @pSource one execution location of this group of datasource subplans int32_t qSetSubplanExecutionNode(SSubplan* pSubplan, int32_t groupId, SDownstreamSourceNode* pSource); -int32_t qClearSubplanExecutionNode(SSubplan* pSubplan); +void qClearSubplanExecutionNode(SSubplan* pSubplan); // Convert to subplan to string for the scheduler to send to the executor int32_t qSubPlanToString(const SSubplan* pSubplan, char** pStr, int32_t* pLen); diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 5767926e5e..92131e354a 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -135,6 +135,7 @@ typedef struct STableMetaOutput { } STableMetaOutput; typedef struct SDataBuf { + int32_t msgType; void* pData; uint32_t len; void* handle; @@ -147,7 +148,7 @@ typedef struct STargetInfo { int32_t vgId; } STargetInfo; -typedef int32_t (*__async_send_cb_fn_t)(void* param, const SDataBuf* pMsg, int32_t code); +typedef int32_t (*__async_send_cb_fn_t)(void* param, SDataBuf* pMsg, int32_t code); typedef int32_t (*__async_exec_fn_t)(void* param); typedef struct SRequestConnInfo { diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 2de630e181..48fa2d7938 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -262,7 +262,7 @@ static int32_t hbQueryHbRspHandle(SAppHbMgr *pAppHbMgr, SClientHbRsp *pRsp) { return TSDB_CODE_SUCCESS; } -static int32_t hbAsyncCallBack(void *param, const SDataBuf *pMsg, int32_t code) { +static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { static int32_t emptyRspNum = 0; if (code != 0) { taosMemoryFreeClear(param); diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 68a225f64c..b20fd4b9b0 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1307,7 +1307,7 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { updateTargetEpSet(pSendInfo, pTscObj, pMsg, pEpSet); - SDataBuf buf = {.len = pMsg->contLen, .pData = NULL, .handle = pMsg->info.handle, .pEpSet = pEpSet}; + SDataBuf buf = {.msgType = pMsg->msgType, .len = pMsg->contLen, .pData = NULL, .handle = pMsg->info.handle, .pEpSet = pEpSet}; if (pMsg->contLen > 0) { buf.pData = taosMemoryCalloc(1, pMsg->contLen); diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 5c30df4ae2..ce866d2c20 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -26,7 +26,7 @@ static void setErrno(SRequestObj* pRequest, int32_t code) { terrno = code; } -int32_t genericRspCallback(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t genericRspCallback(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; setErrno(pRequest, code); @@ -39,7 +39,7 @@ int32_t genericRspCallback(void* param, const SDataBuf* pMsg, int32_t code) { return code; } -int32_t processConnectRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; if (code != TSDB_CODE_SUCCESS) { taosMemoryFree(pMsg->pData); @@ -116,7 +116,7 @@ SMsgSendInfo* buildMsgInfoImpl(SRequestObj* pRequest) { return pMsgSendInfo; } -int32_t processCreateDbRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processCreateDbRsp(void* param, SDataBuf* pMsg, int32_t code) { // todo rsp with the vnode id list SRequestObj* pRequest = param; taosMemoryFree(pMsg->pData); @@ -132,7 +132,7 @@ int32_t processCreateDbRsp(void* param, const SDataBuf* pMsg, int32_t code) { return code; } -int32_t processUseDbRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processUseDbRsp(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; if (TSDB_CODE_MND_DB_NOT_EXIST == code) { @@ -211,7 +211,7 @@ int32_t processUseDbRsp(void* param, const SDataBuf* pMsg, int32_t code) { return 0; } -int32_t processCreateSTableRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processCreateSTableRsp(void* param, SDataBuf* pMsg, int32_t code) { assert(pMsg != NULL && param != NULL); SRequestObj* pRequest = param; @@ -229,7 +229,7 @@ int32_t processCreateSTableRsp(void* param, const SDataBuf* pMsg, int32_t code) return code; } -int32_t processDropDbRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processDropDbRsp(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; if (code != TSDB_CODE_SUCCESS) { setErrno(pRequest, code); @@ -250,7 +250,7 @@ int32_t processDropDbRsp(void* param, const SDataBuf* pMsg, int32_t code) { return code; } -int32_t processAlterStbRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processAlterStbRsp(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; if (code != TSDB_CODE_SUCCESS) { setErrno(pRequest, code); @@ -357,7 +357,7 @@ static int32_t buildShowVariablesRsp(SArray* pVars, SRetrieveTableRsp** pRsp) { return TSDB_CODE_SUCCESS; } -int32_t processShowVariablesRsp(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t processShowVariablesRsp(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; if (code != TSDB_CODE_SUCCESS) { setErrno(pRequest, code); diff --git a/source/client/src/tmq.c b/source/client/src/tmq.c index 637a7ee5dd..165deaa6c5 100644 --- a/source/client/src/tmq.c +++ b/source/client/src/tmq.c @@ -372,7 +372,7 @@ int32_t tmqCommitCb(void* param, const SDataBuf* pMsg, int32_t code) { return 0; } -int32_t tmqCommitCb2(void* param, const SDataBuf* pBuf, int32_t code) { +int32_t tmqCommitCb2(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam2* pParam = (SMqCommitCbParam2*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; // push into array @@ -862,7 +862,7 @@ void tmqClearUnhandleMsg(tmq_t* tmq) { } } -int32_t tmqSubscribeCb(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t tmqSubscribeCb(void* param, SDataBuf* pMsg, int32_t code) { SMqSubscribeCbParam* pParam = (SMqSubscribeCbParam*)param; pParam->rspErr = code; /*tmq_t* tmq = pParam->tmq;*/ @@ -1116,7 +1116,7 @@ int32_t tmqGetSkipLogNum(tmq_message_t* tmq_message) { } #endif -int32_t tmqPollCb(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { SMqPollCbParam* pParam = (SMqPollCbParam*)param; SMqClientVg* pVg = pParam->pVg; SMqClientTopic* pTopic = pParam->pTopic; @@ -1368,7 +1368,7 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, SMqAskEpRsp* pRsp) { } #endif -int32_t tmqAskEpCb(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { SMqAskEpCbParam* pParam = (SMqAskEpCbParam*)param; tmq_t* tmq = pParam->tmq; int8_t async = pParam->async; diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c index 5911daaa2b..1f22eefddf 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c @@ -111,7 +111,7 @@ SArray *qmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_FETCH_RSP, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH_RSP, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_CANCEL_TASK, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 75e69e3716..4e581fd28e 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -86,7 +86,7 @@ static void dmProcessRpcMsg(SDnode *pDnode, SRpcMsg *pRpc, SEpSet *pEpSet) { return; case TDMT_MND_SYSTABLE_RETRIEVE_RSP: case TDMT_DND_SYSTABLE_RETRIEVE_RSP: - case TDMT_VND_FETCH_RSP: + case TDMT_SCH_FETCH_RSP: qWorkerProcessFetchRsp(NULL, NULL, pRpc, 0); return; case TDMT_MND_STATUS_RSP: diff --git a/source/dnode/qnode/src/qnode.c b/source/dnode/qnode/src/qnode.c index 5909d6f599..cfc63b083d 100644 --- a/source/dnode/qnode/src/qnode.c +++ b/source/dnode/qnode/src/qnode.c @@ -88,7 +88,7 @@ int32_t qndProcessQueryMsg(SQnode *pQnode, int64_t ts, SRpcMsg *pMsg) { case TDMT_SCH_FETCH: code = qWorkerProcessFetchMsg(pQnode, pQnode->pQuery, pMsg, ts); break; - case TDMT_VND_FETCH_RSP: + case TDMT_SCH_FETCH_RSP: code = qWorkerProcessFetchRsp(pQnode, pQnode->pQuery, pMsg, ts); break; case TDMT_SCH_CANCEL_TASK: diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 64fa6c705d..d0a1b5417f 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -246,7 +246,7 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { switch (pMsg->msgType) { case TDMT_SCH_FETCH: return qWorkerProcessFetchMsg(pVnode, pVnode->pQuery, pMsg, 0); - case TDMT_VND_FETCH_RSP: + case TDMT_SCH_FETCH_RSP: return qWorkerProcessFetchRsp(pVnode, pVnode->pQuery, pMsg, 0); case TDMT_SCH_CANCEL_TASK: return qWorkerProcessCancelMsg(pVnode, pVnode->pQuery, pMsg, 0); diff --git a/source/libs/catalog/src/ctgRemote.c b/source/libs/catalog/src/ctgRemote.c index 304da88888..59ad009527 100644 --- a/source/libs/catalog/src/ctgRemote.c +++ b/source/libs/catalog/src/ctgRemote.c @@ -241,7 +241,7 @@ int32_t ctgProcessRspMsg(void* out, int32_t reqType, char* msg, int32_t msgSize, } -int32_t ctgHandleMsgCallback(void *param, const SDataBuf *pMsg, int32_t rspCode) { +int32_t ctgHandleMsgCallback(void *param, SDataBuf *pMsg, int32_t rspCode) { SCtgTaskCallbackParam* cbParam = (SCtgTaskCallbackParam*)param; int32_t code = 0; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 9b38f46ca6..c6f1096bfe 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1933,7 +1933,7 @@ typedef struct SFetchRspHandleWrapper { int32_t sourceIndex; } SFetchRspHandleWrapper; -int32_t loadRemoteDataCallback(void* param, const SDataBuf* pMsg, int32_t code) { +int32_t loadRemoteDataCallback(void* param, SDataBuf* pMsg, int32_t code) { SFetchRspHandleWrapper* pWrapper = (SFetchRspHandleWrapper*)param; SExchangeInfo* pExchangeInfo = taosAcquireRef(exchangeObjRefPool, pWrapper->exchangeId); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index c51ef44154..146b6ca76b 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1322,7 +1322,7 @@ static void getDBNameFromCondition(SNode* pCondition, const char* dbName) { nodesWalkExpr(pCondition, getDBNameFromConditionWalker, (char*)dbName); } -static int32_t loadSysTableCallback(void* param, const SDataBuf* pMsg, int32_t code) { +static int32_t loadSysTableCallback(void* param, SDataBuf* pMsg, int32_t code) { SOperatorInfo* operator=(SOperatorInfo*) param; SSysTableScanInfo* pScanResInfo = (SSysTableScanInfo*)operator->info; if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index 2368b13dd6..4edd7a8a6e 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -82,6 +82,7 @@ extern SQWDebug gQWDebug; typedef struct SQWMsg { void *node; int32_t code; + int32_t msgType; char *msg; int32_t msgLen; SRpcHandleInfo connInfo; @@ -100,6 +101,7 @@ typedef struct SQWHbInfo { typedef struct SQWPhaseInput { int32_t code; + int32_t msgType; } SQWPhaseInput; typedef struct SQWPhaseOutput { @@ -119,6 +121,7 @@ typedef struct SQWTaskCtx { int8_t phase; int8_t taskType; int8_t explain; + int32_t queryType; bool queryFetched; bool queryEnd; diff --git a/source/libs/qworker/inc/qwMsg.h b/source/libs/qworker/inc/qwMsg.h index 8c7c030dce..9e9d1f44cb 100644 --- a/source/libs/qworker/inc/qwMsg.h +++ b/source/libs/qworker/inc/qwMsg.h @@ -39,7 +39,7 @@ int32_t qwBuildAndSendFetchRsp(SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, i int32_t code); void qwBuildFetchRsp(void *msg, SOutputData *input, int32_t len, bool qComplete); int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn); -int32_t qwBuildAndSendQueryRsp(SRpcHandleInfo *pConn, int32_t code, STbVerInfo* tbInfo); +int32_t qwBuildAndSendQueryRsp(int32_t rspType, SRpcHandleInfo *pConn, int32_t code, STbVerInfo* tbInfo); int32_t qwBuildAndSendExplainRsp(SRpcHandleInfo *pConn, SExplainExecInfo *execInfo, int32_t num); void qwFreeFetchRsp(void *msg); int32_t qwMallocFetchRsp(int32_t length, SRetrieveTableRsp **rsp); diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index 8c1371fce9..70a6a70c44 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -43,7 +43,7 @@ void qwFreeFetchRsp(void *msg) { } } -int32_t qwBuildAndSendQueryRsp(SRpcHandleInfo *pConn, int32_t code, STbVerInfo* tbInfo) { +int32_t qwBuildAndSendQueryRsp(int32_t rspType, SRpcHandleInfo *pConn, int32_t code, STbVerInfo* tbInfo) { SQueryTableRsp *pRsp = (SQueryTableRsp *)rpcMallocCont(sizeof(SQueryTableRsp)); pRsp->code = code; if (tbInfo) { @@ -53,7 +53,7 @@ int32_t qwBuildAndSendQueryRsp(SRpcHandleInfo *pConn, int32_t code, STbVerInfo* } SRpcMsg rpcRsp = { - .msgType = TDMT_VND_QUERY_RSP, + .msgType = rspType, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, @@ -73,7 +73,7 @@ int32_t qwBuildAndSendExplainRsp(SRpcHandleInfo *pConn, SExplainExecInfo *execIn tSerializeSExplainRsp(pRsp, contLen, &rsp); SRpcMsg rpcRsp = { - .msgType = TDMT_VND_EXPLAIN_RSP, + .msgType = TDMT_SCH_EXPLAIN_RSP, .pCont = pRsp, .contLen = contLen, .code = 0, @@ -92,7 +92,7 @@ int32_t qwBuildAndSendHbRsp(SRpcHandleInfo *pConn, SSchedulerHbRsp *pStatus, int tSerializeSSchedulerHbRsp(pRsp, contLen, pStatus); SRpcMsg rpcRsp = { - .msgType = TDMT_VND_QUERY_HEARTBEAT_RSP, + .msgType = TDMT_SCH_QUERY_HEARTBEAT_RSP, .contLen = contLen, .pCont = pRsp, .code = code, @@ -112,7 +112,7 @@ int32_t qwBuildAndSendFetchRsp(SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, i } SRpcMsg rpcRsp = { - .msgType = TDMT_VND_FETCH_RSP, + .msgType = TDMT_SCH_FETCH_RSP, .pCont = pRsp, .contLen = sizeof(*pRsp) + dataLength, .code = code, @@ -129,7 +129,7 @@ int32_t qwBuildAndSendCancelRsp(SRpcHandleInfo *pConn, int32_t code) { pRsp->code = code; SRpcMsg rpcRsp = { - .msgType = TDMT_VND_CANCEL_TASK_RSP, + .msgType = TDMT_SCH_CANCEL_TASK_RSP, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, @@ -145,7 +145,7 @@ int32_t qwBuildAndSendDropRsp(SRpcHandleInfo *pConn, int32_t code) { pRsp->code = code; SRpcMsg rpcRsp = { - .msgType = TDMT_VND_DROP_TASK_RSP, + .msgType = TDMT_SCH_DROP_TASK_RSP, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, @@ -325,9 +325,9 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int uint64_t tId = msg->taskId; int64_t rId = msg->refId; - SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info}; + SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info, .msgType = pMsg->msgType}; char * sql = strndup(msg->msg, msg->sqlLen); - QW_SCH_TASK_DLOG("processQuery start, node:%p, handle:%p, sql:%s", node, pMsg->info.handle, sql); + QW_SCH_TASK_DLOG("processQuery start, node:%p, type:%s, handle:%p, sql:%s", node, TMSG_INFO(pMsg->msgType), pMsg->info.handle, sql); QW_ERR_RET(qwProcessQuery(QW_FPARAMS(), &qwMsg, msg->taskType, msg->explain, sql)); QW_SCH_TASK_DLOG("processQuery end, node:%p", node); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 4250785f7c..8ab293d0ad 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -417,17 +417,10 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp } if (QW_PHASE_POST_QUERY == phase) { -#if 0 - if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_READY)) { - readyConnection = &ctx->connInfo; - QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_READY); - } -#else connInfo = ctx->ctrlConnInfo; rspConnection = &connInfo; QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_READY); -#endif } if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { @@ -458,8 +451,8 @@ _return: } if (rspConnection) { - qwBuildAndSendQueryRsp(rspConnection, code, ctx ? &ctx->tbInfo : NULL); - QW_TASK_DLOG("ready msg rsped, handle:%p, code:%x - %s", rspConnection->handle, code, tstrerror(code)); + qwBuildAndSendQueryRsp(input->msgType + 1, rspConnection, code, ctx ? &ctx->tbInfo : NULL); + QW_TASK_DLOG("query msg rsped, handle:%p, code:%x - %s", rspConnection->handle, code, tstrerror(code)); } if (ctx) { @@ -530,8 +523,9 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex QW_ERR_JRET(qwGetTaskCtx(QW_FPARAMS(), &ctx)); - atomic_store_8(&ctx->taskType, taskType); - atomic_store_8(&ctx->explain, explain); + ctx->taskType = taskType; + ctx->explain = explain; + ctx->queryType = qwMsg->msgType; QW_TASK_DLOGL("subplan json string, len:%d, %s", qwMsg->msgLen, qwMsg->msg); @@ -571,6 +565,7 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex _return: input.code = code; + input.msgType = qwMsg->msgType; code = qwHandlePostPhaseEvents(QW_FPARAMS(), QW_PHASE_POST_QUERY, &input, NULL); // if (!queryRsped) { diff --git a/source/libs/qworker/test/qworkerTests.cpp b/source/libs/qworker/test/qworkerTests.cpp index 7c7d262167..bc37400249 100644 --- a/source/libs/qworker/test/qworkerTests.cpp +++ b/source/libs/qworker/test/qworkerTests.cpp @@ -202,7 +202,8 @@ void qwtSendReqToDnode(void* pVnode, struct SEpSet* epSet, struct SRpcMsg* pReq) void qwtRpcSendResponse(const SRpcMsg *pRsp) { switch (pRsp->msgType) { - case TDMT_VND_QUERY_RSP: { + case TDMT_SCH_QUERY_RSP: + case TDMT_SCH_MERGE_QUERY_RSP: { SQueryTableRsp *rsp = (SQueryTableRsp *)pRsp->pCont; if (pRsp->code) { @@ -213,7 +214,7 @@ void qwtRpcSendResponse(const SRpcMsg *pRsp) { rpcFreeCont(rsp); break; } - case TDMT_VND_FETCH_RSP: { + case TDMT_SCH_FETCH_RSP: { SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)pRsp->pCont; if (0 == pRsp->code && 0 == rsp->completed) { @@ -229,7 +230,7 @@ void qwtRpcSendResponse(const SRpcMsg *pRsp) { break; } - case TDMT_VND_DROP_TASK_RSP: { + case TDMT_SCH_DROP_TASK_RSP: { STaskDropRsp *rsp = (STaskDropRsp *)pRsp->pCont; rpcFreeCont(rsp); diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index 4e36bd3f10..fb0c43b0ff 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -353,7 +353,7 @@ void schFreeJobImpl(void *job); int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx); int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask); int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans); -int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code); +int32_t schHandleHbCallback(void *param, SDataBuf *pMsg, int32_t code); void schFreeRpcCtx(SRpcCtx *pCtx); int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp); bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus); @@ -383,7 +383,8 @@ char* schGetOpStr(SCH_OP_TYPE type); int32_t schBeginOperation(SSchJob *pJob, SCH_OP_TYPE type, bool sync); int32_t schInitJob(SSchedulerReq *pReq, SSchJob **pSchJob); int32_t schSetJobQueryRes(SSchJob* pJob, SQueryResult* pRes); -int32_t schUpdateTaskCandidateAddr(SSchTask *pTask, SEpSet* pEpSet); +int32_t schUpdateTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask, SEpSet* pEpSet); +int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf* pData, int32_t rspCode); #ifdef __cplusplus diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index fd27193b0f..643594f4e0 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -682,9 +682,9 @@ int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) { return TSDB_CODE_SUCCESS; } -int32_t schUpdateTaskCandidateAddr(SSchTask *pTask, SEpSet* pEpSet) { +int32_t schUpdateTaskCandidateAddr(SSchJob *pJob, SSchTask *pTask, SEpSet* pEpSet) { if (NULL == pTask->candidateAddrs || 1 != taosArrayGetSize(pTask->candidateAddrs)) { - SCH_TASK_ELOG("not able to update cndidate addr, addr num %d", (pTask->candidateAddrs ? taosArrayGetSize(pTask->candidateAddrs): 0)); + SCH_TASK_ELOG("not able to update cndidate addr, addr num %d", (int32_t)(pTask->candidateAddrs ? taosArrayGetSize(pTask->candidateAddrs): 0)); SCH_ERR_RET(TSDB_CODE_APP_ERROR); } @@ -1685,7 +1685,7 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { return TSDB_CODE_SUCCESS; } - SCH_TASK_DLOG("task will be redirected now, status:%d", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_TASK_DLOG("task will be redirected now, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); schDropTaskOnExecNode(pJob, pTask); taosHashClear(pTask->execNodes); @@ -1735,7 +1735,7 @@ _return: SCH_RET(code); } -int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, int32_t msgType, SDataBuf* pData, int32_t rspCode) { +int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf* pData, int32_t rspCode) { int32_t code = 0; if (SCH_IS_DATA_SRC_QRY_TASK(pTask)) { @@ -1744,7 +1744,7 @@ int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, int32_t msgType, SData SCH_ERR_JRET(rspCode); } - SCH_ERR_JRET(schUpdateTaskCandidateAddr(pTask, pData->pEpSet)); + SCH_ERR_JRET(schUpdateTaskCandidateAddr(pJob, pTask, pData->pEpSet)); } schDoTaskRedirect(pJob, pTask, rspCode); diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index 76d2061936..91f5ff979c 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -28,9 +28,10 @@ int32_t schValidateReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgTy int32_t reqMsgType = msgType - 1; switch (msgType) { case TDMT_SCH_LINK_BROKEN: - case TDMT_VND_EXPLAIN_RSP: + case TDMT_SCH_EXPLAIN_RSP: return TSDB_CODE_SUCCESS; - case TDMT_VND_QUERY_RSP: // query_rsp may be processed later than ready_rsp + case TDMT_SCH_MERGE_QUERY_RSP: + case TDMT_SCH_QUERY_RSP: // query_rsp may be processed later than ready_rsp if (lastMsgType != reqMsgType && -1 != lastMsgType) { SCH_TASK_DLOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); @@ -43,7 +44,7 @@ int32_t schValidateReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgTy SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); return TSDB_CODE_SUCCESS; - case TDMT_VND_FETCH_RSP: + case TDMT_SCH_FETCH_RSP: if (lastMsgType != reqMsgType && -1 != lastMsgType) { SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); @@ -238,7 +239,8 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch break; } - case TDMT_VND_QUERY_RSP: { + case TDMT_SCH_QUERY_RSP: + case TDMT_SCH_MERGE_QUERY_RSP: { SQueryTableRsp *rsp = (SQueryTableRsp *)msg; SCH_ERR_JRET(rspCode); @@ -255,7 +257,7 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch break; } - case TDMT_VND_EXPLAIN_RSP: { + case TDMT_SCH_EXPLAIN_RSP: { SCH_ERR_JRET(rspCode); if (NULL == msg) { SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); @@ -285,7 +287,7 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch } break; } - case TDMT_VND_FETCH_RSP: { + case TDMT_SCH_FETCH_RSP: { SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)msg; SCH_ERR_JRET(rspCode); @@ -333,8 +335,8 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch schProcessOnDataFetched(pJob); break; } - case TDMT_VND_DROP_TASK_RSP: { - // SHOULD NEVER REACH HERE + case TDMT_SCH_DROP_TASK_RSP: { + // NEVER REACH HERE SCH_TASK_ELOG("invalid status to handle drop task rsp, refId:0x%" PRIx64, pJob->refId); SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); break; @@ -358,8 +360,9 @@ _return: } -int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, int32_t rspCode) { +int32_t schHandleCallback(void *param, SDataBuf *pMsg, int32_t rspCode) { int32_t code = 0; + int32_t msgType = pMsg->msgType; SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; SSchTask *pTask = NULL; @@ -393,8 +396,8 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in SCH_ERR_JRET(schValidateReceivedMsgType(pJob, pTask, msgType)); - if (NEED_SCHEDULER_REDIRECT_ERROR(rspCode) || ((rspCode == TSDB_CODE_RPC_NETWORK_UNAVAIL) && msgSize > 0)) { - code = schHandleRedirect(pJob, pTask, msgType, pMsg, rspCode); + if (NEED_SCHEDULER_REDIRECT_ERROR(rspCode) || ((rspCode == TSDB_CODE_RPC_NETWORK_UNAVAIL) && pMsg->len > 0)) { + code = schHandleRedirect(pJob, pTask, (SDataBuf *)pMsg, rspCode); goto _return; } @@ -416,46 +419,14 @@ _return: SCH_RET(code); } -int32_t schHandleSubmitCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_SUBMIT_RSP, code); -} - -int32_t schHandleCreateTbCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_CREATE_TABLE_RSP, code); -} - -int32_t schHandleDropTbCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_DROP_TABLE_RSP, code); -} - -int32_t schHandleAlterTbCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_ALTER_TABLE_RSP, code); -} - -int32_t schHandleQueryCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_QUERY_RSP, code); -} - -int32_t schHandleDeleteCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_DELETE_RSP, code); -} - -int32_t schHandleFetchCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_FETCH_RSP, code); -} - -int32_t schHandleExplainCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_EXPLAIN_RSP, code); -} - -int32_t schHandleDropCallback(void *param, const SDataBuf *pMsg, int32_t code) { +int32_t schHandleDropCallback(void *param, SDataBuf *pMsg, int32_t code) { SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " drop task rsp received, code:0x%x", pParam->queryId, pParam->taskId, code); taosMemoryFreeClear(param); return TSDB_CODE_SUCCESS; } -int32_t schHandleLinkBrokenCallback(void *param, const SDataBuf *pMsg, int32_t code) { +int32_t schHandleLinkBrokenCallback(void *param, SDataBuf *pMsg, int32_t code) { SSchCallbackParamHeader *head = (SSchCallbackParamHeader *)param; rpcReleaseHandle(pMsg->handle, TAOS_CONN_CLIENT); @@ -468,7 +439,7 @@ int32_t schHandleLinkBrokenCallback(void *param, const SDataBuf *pMsg, int32_t c SCH_ERR_RET(schBuildAndSendHbMsg(&hbParam->nodeEpId, NULL)); } else { - SCH_ERR_RET(schHandleCallback(param, pMsg, TDMT_SCH_LINK_BROKEN, code)); + SCH_ERR_RET(schHandleCallback(param, pMsg, code)); } return TSDB_CODE_SUCCESS; @@ -564,29 +535,15 @@ _return: int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { switch (msgType) { case TDMT_VND_CREATE_TABLE: - *fp = schHandleCreateTbCallback; - break; case TDMT_VND_DROP_TABLE: - *fp = schHandleDropTbCallback; - break; case TDMT_VND_ALTER_TABLE: - *fp = schHandleAlterTbCallback; - break; case TDMT_VND_SUBMIT: - *fp = schHandleSubmitCallback; - break; case TDMT_SCH_QUERY: case TDMT_SCH_MERGE_QUERY: - *fp = schHandleQueryCallback; - break; case TDMT_VND_DELETE: - *fp = schHandleDeleteCallback; - break; case TDMT_SCH_EXPLAIN: - *fp = schHandleExplainCallback; - break; case TDMT_SCH_FETCH: - *fp = schHandleFetchCallback; + *fp = schHandleCallback; break; case TDMT_SCH_DROP_TASK: *fp = schHandleDropCallback; @@ -700,7 +657,7 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - int32_t msgType = TDMT_VND_QUERY_HEARTBEAT_RSP; + int32_t msgType = TDMT_SCH_QUERY_HEARTBEAT_RSP; __async_send_cb_fn_t fp = NULL; SCH_ERR_JRET(schGetCallbackFp(TDMT_SCH_QUERY_HEARTBEAT, &fp)); @@ -730,7 +687,7 @@ _return: SCH_RET(code); } -int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { +int32_t schHandleHbCallback(void *param, SDataBuf *pMsg, int32_t code) { SSchedulerHbRsp rsp = {0}; SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; @@ -794,7 +751,7 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SSchTrans trans = {.pTrans = pJob->conn.pTrans, .pHandle = SCH_GET_TASK_HANDLE(pTask)}; SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, NULL, 0, TDMT_SCH_EXPLAIN, &trans, false, &pExplainMsgSendInfo)); - int32_t msgType = TDMT_VND_EXPLAIN_RSP; + int32_t msgType = TDMT_SCH_EXPLAIN_RSP; SRpcCtxVal ctxVal = {.val = pExplainMsgSendInfo, .clone = schCloneSMsgSendInfo}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index 43822e4f5b..22b08e9037 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -412,7 +412,7 @@ void *schtCreateFetchRspThread(void *param) { rsp->completed = 1; rsp->numOfRows = 10; - code = schHandleResponseMsg(pJob, pJob->fetchTask, TDMT_VND_FETCH_RSP, (char *)rsp, sizeof(*rsp), 0); + code = schHandleResponseMsg(pJob, pJob->fetchTask, TDMT_SCH_FETCH_RSP, (char *)rsp, sizeof(*rsp), 0); schReleaseJob(job); @@ -445,7 +445,7 @@ void *schtFetchRspThread(void *aa) { dataBuf.pData = rsp; dataBuf.len = sizeof(*rsp); - code = schHandleCallback(param, &dataBuf, TDMT_VND_FETCH_RSP, 0); + code = schHandleCallback(param, &dataBuf, TDMT_SCH_FETCH_RSP, 0); assert(code == 0 || code); } @@ -547,7 +547,7 @@ void* schtRunJobThread(void *aa) { dataBuf.pData = &rsp; dataBuf.len = sizeof(rsp); - code = schHandleCallback(param, &dataBuf, TDMT_VND_QUERY_RSP, 0); + code = schHandleCallback(param, &dataBuf, TDMT_SCH_QUERY_RSP, 0); assert(code == 0 || code); pIter = taosHashIterate(execTasks, pIter); @@ -566,7 +566,7 @@ void* schtRunJobThread(void *aa) { dataBuf.pData = &rsp; dataBuf.len = sizeof(rsp); - code = schHandleCallback(param, &dataBuf, TDMT_VND_QUERY_RSP, 0); + code = schHandleCallback(param, &dataBuf, TDMT_SCH_QUERY_RSP, 0); assert(code == 0 || code); pIter = taosHashIterate(execTasks, pIter); @@ -677,7 +677,7 @@ TEST(queryTest, normalCase) { SSchTask *task = *(SSchTask **)pIter; SQueryTableRsp rsp = {0}; - code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + code = schHandleResponseMsg(pJob, task, TDMT_SCH_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); ASSERT_EQ(code, 0); pIter = taosHashIterate(pJob->execTasks, pIter); @@ -688,7 +688,7 @@ TEST(queryTest, normalCase) { SSchTask *task = *(SSchTask **)pIter; SQueryTableRsp rsp = {0}; - code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + code = schHandleResponseMsg(pJob, task, TDMT_SCH_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); ASSERT_EQ(code, 0); pIter = taosHashIterate(pJob->execTasks, pIter); @@ -780,7 +780,7 @@ TEST(queryTest, readyFirstCase) { SSchTask *task = *(SSchTask **)pIter; SQueryTableRsp rsp = {0}; - code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + code = schHandleResponseMsg(pJob, task, TDMT_SCH_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); ASSERT_EQ(code, 0); pIter = taosHashIterate(pJob->execTasks, pIter); @@ -791,7 +791,7 @@ TEST(queryTest, readyFirstCase) { SSchTask *task = *(SSchTask **)pIter; SQueryTableRsp rsp = {0}; - code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + code = schHandleResponseMsg(pJob, task, TDMT_SCH_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); ASSERT_EQ(code, 0); pIter = taosHashIterate(pJob->execTasks, pIter); @@ -898,7 +898,7 @@ TEST(queryTest, flowCtrlCase) { if (task->lastMsgType == TDMT_SCH_QUERY) { SQueryTableRsp rsp = {0}; - code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + code = schHandleResponseMsg(pJob, task, TDMT_SCH_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); ASSERT_EQ(code, 0); } else { diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index f53507c8aa..aacae590a0 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -1114,4 +1114,6 @@ _return2: rpcFreeCont(msg->pCont); } +int transGetConnInfo(void* thandle, STransHandleInfo* pConnInfo) { return -1; } + #endif From 53efe75146118cbd821df10ad52f8f5bad83c263 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 29 Jun 2022 17:56:35 +0800 Subject: [PATCH 08/14] feat: query redirect --- include/libs/qcom/query.h | 7 ++++--- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/libs/function/src/udfd.c | 2 +- source/libs/qworker/src/qwMsg.c | 8 ++++---- source/libs/scheduler/inc/schedulerInt.h | 2 ++ source/libs/scheduler/src/schRemote.c | 4 ++-- source/libs/transport/src/transCli.c | 2 +- 7 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 92131e354a..9881c8cb44 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -239,7 +239,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t #define NEED_REDIRECT_ERROR(_code) \ ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ (_code) == TSDB_CODE_NODE_NOT_DEPLOYED || (_code) == TSDB_CODE_SYN_NOT_LEADER || \ - (_code) == TSDB_CODE_APP_NOT_READY) + (_code) == TSDB_CODE_APP_NOT_READY || (_code) == TSDB_CODE_RPC_BROKEN_LINK) #define NEED_CLIENT_RM_TBLMETA_REQ(_type) \ ((_type) == TDMT_VND_CREATE_TABLE || (_type) == TDMT_VND_CREATE_STB || (_type) == TDMT_VND_DROP_TABLE || \ @@ -249,8 +249,9 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_NODE_NOT_DEPLOYED || \ (_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_APP_NOT_READY) -#define NEED_SCHEDULER_RETRY_ERROR(_code) \ - (NEED_SCHEDULER_REDIRECT_ERROR(_code) || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) +#define NEED_SCHEDULER_RETRY_ERROR(_code) \ + (NEED_SCHEDULER_REDIRECT_ERROR(_code) || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ + (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR || (_code) == TSDB_CODE_RPC_BROKEN_LINK) diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 4e581fd28e..91912bb764 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -250,7 +250,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || - code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { + code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_BROKEN_LINK) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { return false; } diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index 4aad544e19..708ea4bd38 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -548,7 +548,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { } static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || - code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { + code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_BROKEN_LINK) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { return false; } diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index 70a6a70c44..30772ff1ac 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -205,7 +205,7 @@ int32_t qwRegisterQueryBrokenLinkArg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn) { .msgType = TDMT_SCH_DROP_TASK, .pCont = req, .contLen = sizeof(STaskDropReq), - .code = TSDB_CODE_RPC_NETWORK_UNAVAIL, + .code = TSDB_CODE_RPC_BROKEN_LINK, .info = *pConn, }; @@ -239,7 +239,7 @@ int32_t qwRegisterHbBrokenLinkArg(SQWorker *mgmt, uint64_t sId, SRpcHandleInfo * .msgType = TDMT_SCH_QUERY_HEARTBEAT, .pCont = msg, .contLen = msgSize, - .code = TSDB_CODE_RPC_NETWORK_UNAVAIL, + .code = TSDB_CODE_RPC_BROKEN_LINK, .info = *pConn, }; @@ -484,7 +484,7 @@ int32_t qWorkerProcessDropMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int6 SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .code = pMsg->code, .connInfo = pMsg->info}; - if (TSDB_CODE_RPC_NETWORK_UNAVAIL == pMsg->code) { + if (TSDB_CODE_RPC_BROKEN_LINK == pMsg->code) { QW_SCH_TASK_DLOG("receive drop task due to network broken, error:%s", tstrerror(pMsg->code)); } @@ -522,7 +522,7 @@ int32_t qWorkerProcessHbMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int64_ uint64_t sId = req.sId; SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .code = pMsg->code, .connInfo = pMsg->info}; - if (TSDB_CODE_RPC_NETWORK_UNAVAIL == pMsg->code) { + if (TSDB_CODE_RPC_BROKEN_LINK == pMsg->code) { QW_SCH_DLOG("receive Hb msg due to network broken, error:%s", tstrerror(pMsg->code)); } diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index fb0c43b0ff..8dfc703dd9 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -306,6 +306,8 @@ extern SSchedulerMgmt schMgmt; #define SCH_IS_WAIT_ALL_JOB(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_IS_NEED_DROP_JOB(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) +#define SCH_SUB_TASK_NETWORK_ERR(_code, _len) (((_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_BROKEN_LINK) && ((_len) > 0)) #define SCH_IS_LEVEL_UNFINISHED(_level) ((_level)->taskLaunchedNum < (_level)->taskNum) #define SCH_GET_CUR_EP(_addr) (&(_addr)->epSet.eps[(_addr)->epSet.inUse]) diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index 91f5ff979c..3688cb0240 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -384,7 +384,7 @@ int32_t schHandleCallback(void *param, SDataBuf *pMsg, int32_t rspCode) { goto _return; } - bool dropExecNode = (msgType == TDMT_SCH_LINK_BROKEN || rspCode == TSDB_CODE_RPC_NETWORK_UNAVAIL); + bool dropExecNode = (msgType == TDMT_SCH_LINK_BROKEN || SCH_NETWORK_ERR(rspCode)); SCH_ERR_JRET(schUpdateTaskHandle(pJob, pTask, dropExecNode, pMsg->handle, pParam->execIdx)); int8_t status = 0; @@ -396,7 +396,7 @@ int32_t schHandleCallback(void *param, SDataBuf *pMsg, int32_t rspCode) { SCH_ERR_JRET(schValidateReceivedMsgType(pJob, pTask, msgType)); - if (NEED_SCHEDULER_REDIRECT_ERROR(rspCode) || ((rspCode == TSDB_CODE_RPC_NETWORK_UNAVAIL) && pMsg->len > 0)) { + if (NEED_SCHEDULER_REDIRECT_ERROR(rspCode) || SCH_SUB_TASK_NETWORK_ERR(rspCode, pMsg->len > 0)) { code = schHandleRedirect(pJob, pTask, (SDataBuf *)pMsg, rspCode); goto _return; } diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index e21237ec10..5de907f8cb 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -328,7 +328,7 @@ void cliHandleResp(SCliConn* conn) { tDebug("%s conn %p construct ahandle %p by %s, persist: 1", CONN_GET_INST_LABEL(conn), conn, transMsg.info.ahandle, TMSG_INFO(transMsg.msgType)); if (!CONN_RELEASE_BY_SERVER(conn) && transMsg.info.ahandle == NULL) { - transMsg.code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + transMsg.code = TSDB_CODE_RPC_BROKEN_LINK; transMsg.info.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); tDebug("%s conn %p construct ahandle %p due brokenlink, persist: 1", CONN_GET_INST_LABEL(conn), conn, transMsg.info.ahandle); From e389dcedc8fb3bdebd20b107c0f446e62d4df56a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 30 Jun 2022 10:30:36 +0800 Subject: [PATCH 09/14] add query epset --- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 1 + source/dnode/mnode/impl/src/mndMain.c | 23 ++++++++++--------- source/dnode/vnode/src/vnd/vnodeSync.c | 10 ++++---- source/libs/transport/src/transCli.c | 23 +++++++++++++------ 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 91912bb764..939d6f52f3 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -41,6 +41,7 @@ static inline void dmBuildMnodeRedirectRsp(SDnode *pDnode, SRpcMsg *pMsg) { } static inline void dmSendRedirectRsp(SRpcMsg *pMsg, const SEpSet *pNewEpSet) { + pMsg->info.hasEpSet = 1; SRpcMsg rsp = {.code = TSDB_CODE_RPC_REDIRECT, .info = pMsg->info}; int32_t contLen = tSerializeSEpSet(NULL, 0, pNewEpSet); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index ff237c5a0c..c39c9847a9 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -58,7 +58,7 @@ static void *mndBuildTimerMsg(int32_t *pContLen) { static void mndPullupTrans(SMnode *pMnode) { int32_t contLen = 0; - void *pReq = mndBuildTimerMsg(&contLen); + void * pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); @@ -67,14 +67,14 @@ static void mndPullupTrans(SMnode *pMnode) { static void mndTtlTimer(SMnode *pMnode) { int32_t contLen = 0; - void *pReq = mndBuildTimerMsg(&contLen); + void * pReq = mndBuildTimerMsg(&contLen); SRpcMsg rpcMsg = {.msgType = TDMT_MND_TTL_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } static void mndCalMqRebalance(SMnode *pMnode) { int32_t contLen = 0; - void *pReq = mndBuildTimerMsg(&contLen); + void * pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { SRpcMsg rpcMsg = {.msgType = TDMT_MND_MQ_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); @@ -83,7 +83,7 @@ static void mndCalMqRebalance(SMnode *pMnode) { static void mndPullupTelem(SMnode *pMnode) { int32_t contLen = 0; - void *pReq = mndBuildTimerMsg(&contLen); + void * pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); @@ -395,7 +395,7 @@ void mndStop(SMnode *pMnode) { } int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { - SMnode *pMnode = pMsg->info.node; + SMnode * pMnode = pMsg->info.node; SSyncMgmt *pMgmt = &pMnode->syncMgmt; int32_t code = 0; @@ -413,7 +413,7 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { } do { - char *syncNodeStr = sync2SimpleStr(pMgmt->sync); + char * syncNodeStr = sync2SimpleStr(pMgmt->sync); static int64_t mndTick = 0; if (++mndTick % 10 == 1) { mTrace("vgId:%d, sync trace msg:%s, %s", syncGetVgId(pMgmt->sync), TMSG_INFO(pMsg->msgType), syncNodeStr); @@ -527,8 +527,8 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { static int32_t mndCheckMnodeState(SRpcMsg *pMsg) { if (!IsReq(pMsg)) return 0; - if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY || - pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT || + if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY || + pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT || pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK) { return 0; } @@ -552,6 +552,7 @@ static int32_t mndCheckMnodeState(SRpcMsg *pMsg) { int32_t contLen = tSerializeSEpSet(NULL, 0, &epSet); pMsg->info.rsp = rpcMallocCont(contLen); + pMsg->info.hasEpSet = 1; if (pMsg->info.rsp != NULL) { tSerializeSEpSet(pMsg->info.rsp, contLen, &epSet); pMsg->info.rspLen = contLen; @@ -578,7 +579,7 @@ static int32_t mndCheckMsgContent(SRpcMsg *pMsg) { } int32_t mndProcessRpcMsg(SRpcMsg *pMsg) { - SMnode *pMnode = pMsg->info.node; + SMnode * pMnode = pMsg->info.node; const STraceId *trace = &pMsg->info.traceId; MndMsgFp fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)]; @@ -631,7 +632,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr SMonStbInfo *pStbInfo, SMonGrantInfo *pGrantInfo) { if (mndAcquireRpcRef(pMnode) != 0) return -1; - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; int64_t ms = taosGetTimestampMs(); pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc)); @@ -712,7 +713,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries; tstrncpy(desc.status, "unsynced", sizeof(desc.status)); for (int32_t i = 0; i < pVgroup->replica; ++i) { - SVnodeGid *pVgid = &pVgroup->vnodeGid[i]; + SVnodeGid * pVgid = &pVgroup->vnodeGid[i]; SMonVnodeDesc *pVnDesc = &desc.vnodes[i]; pVnDesc->dnode_id = pVgid->dnodeId; tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->role), sizeof(pVnDesc->vnode_role)); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index d324a76438..67e507858f 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -119,7 +119,7 @@ static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { } void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { - SVnode *pVnode = pInfo->ahandle; + SVnode * pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; int32_t code = 0; SRpcMsg *pMsg = NULL; @@ -178,7 +178,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { for (int32_t i = 0; i < newEpSet.numOfEps; ++i) { vGTrace("vgId:%d, msg:%p redirect:%d ep:%s:%u", vgId, pMsg, i, newEpSet.eps[i].fqdn, newEpSet.eps[i].port); } - + pMsg->info.hasEpSet = 1; SRpcMsg rsp = {.code = TSDB_CODE_RPC_REDIRECT, .info = pMsg->info}; tmsgSendRedirectRsp(&rsp, &newEpSet); } else { @@ -199,7 +199,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { } void vnodeApplyMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { - SVnode *pVnode = pInfo->ahandle; + SVnode * pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; int32_t code = 0; SRpcMsg *pMsg = NULL; @@ -240,7 +240,7 @@ int32_t vnodeProcessSyncReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { STraceId *trace = &pMsg->info.traceId; do { - char *syncNodeStr = sync2SimpleStr(pVnode->sync); + char * syncNodeStr = sync2SimpleStr(pVnode->sync); static int64_t vndTick = 0; if (++vndTick % 10 == 1) { vGTrace("vgId:%d, sync trace msg:%s, %s", syncGetVgId(pVnode->sync), TMSG_INFO(pMsg->msgType), syncNodeStr); @@ -375,7 +375,7 @@ static void vnodeSyncReconfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReCon } static void vnodeSyncCommitMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { - SVnode *pVnode = pFsm->data; + SVnode * pVnode = pFsm->data; SSnapshot snapshot = {0}; SyncIndex beginIndex = SYNC_INDEX_INVALID; char logBuf[256] = {0}; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 5de907f8cb..b48939f9f9 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1014,19 +1014,28 @@ void cliCompareAndSwap(int8_t* val, int8_t exp, int8_t newVal) { } bool cliTryToExtractEpSet(STransMsg* pResp, SEpSet* dst) { - if (pResp == NULL || pResp->info.hasEpSet == 0) { + if ((pResp == NULL || pResp->info.hasEpSet == 0)) { + return false; + } + // rebuild resp msg + SEpSet epset; + if (tDeserializeSEpSet(pResp->pCont, pResp->contLen, &epset) < 0) { return false; } - tDeserializeSEpSet(pResp->pCont, pResp->contLen, dst); int32_t tlen = tSerializeSEpSet(NULL, 0, dst); - int32_t bufLen = pResp->contLen - tlen; - char* buf = rpcMallocCont(bufLen); - - memcpy(buf, (char*)pResp->pCont + tlen, bufLen); + char* buf = NULL; + int32_t len = pResp->contLen - tlen; + if (len != 0) { + buf = rpcMallocCont(len); + memcpy(buf, (char*)pResp->pCont + tlen, len); + } + rpcFreeCont(pResp->pCont); pResp->pCont = buf; - pResp->contLen = bufLen; + pResp->contLen = len; + + *dst = epset; return true; } int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { From a535df75902d2fb9326012a5e4c7376ac45c86de Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 30 Jun 2022 11:04:49 +0800 Subject: [PATCH 10/14] feat: query redirect --- include/common/tmsg.h | 5 ++ include/libs/nodes/plannodes.h | 1 + source/common/src/tmsg.c | 2 + source/libs/executor/src/executorimpl.c | 35 ++++---- source/libs/nodes/src/nodesCloneFuncs.c | 1 + source/libs/nodes/src/nodesCodeFuncs.c | 6 ++ source/libs/qworker/inc/qwInt.h | 27 +++--- source/libs/qworker/src/qwDbg.c | 59 ++++++++++++- source/libs/qworker/src/qwMsg.c | 46 ++++++++++ source/libs/qworker/src/qwUtil.c | 28 +++--- source/libs/qworker/src/qworker.c | 35 +++++++- source/libs/scheduler/inc/schedulerInt.h | 27 +++--- source/libs/scheduler/src/schJob.c | 106 +++++++++++++---------- source/libs/scheduler/src/schRemote.c | 15 ++-- 14 files changed, 282 insertions(+), 111 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index c5b0b89311..2876105748 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1494,6 +1494,7 @@ typedef struct SSubQueryMsg { uint64_t queryId; uint64_t taskId; int64_t refId; + int32_t execId; int8_t taskType; int8_t explain; uint32_t sqlLen; // the query sql, @@ -1513,6 +1514,7 @@ typedef struct { uint64_t sId; uint64_t queryId; uint64_t taskId; + int32_t execId; } SQueryContinueReq; typedef struct { @@ -1534,6 +1536,7 @@ typedef struct { uint64_t sId; uint64_t queryId; uint64_t taskId; + int32_t execId; } SResFetchReq; typedef struct { @@ -1545,6 +1548,7 @@ typedef struct { uint64_t queryId; uint64_t taskId; int64_t refId; + int32_t execId; int8_t status; } STaskStatus; @@ -1602,6 +1606,7 @@ typedef struct { uint64_t queryId; uint64_t taskId; int64_t refId; + int32_t execId; } STaskDropReq; typedef struct { diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 4671c8b81e..8031b16d93 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -346,6 +346,7 @@ typedef struct SDownstreamSourceNode { SQueryNodeAddr addr; uint64_t taskId; uint64_t schedId; + int32_t execId; } SDownstreamSourceNode; typedef struct SExchangePhysiNode { diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index e9b5c67d76..23b33674e4 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4312,6 +4312,7 @@ int32_t tSerializeSSchedulerHbRsp(void *buf, int32_t bufLen, SSchedulerHbRsp *pR if (tEncodeU64(&encoder, status->queryId) < 0) return -1; if (tEncodeU64(&encoder, status->taskId) < 0) return -1; if (tEncodeI64(&encoder, status->refId) < 0) return -1; + if (tEncodeI32(&encoder, status->execId) < 0) return -1; if (tEncodeI8(&encoder, status->status) < 0) return -1; } } else { @@ -4342,6 +4343,7 @@ int32_t tDeserializeSSchedulerHbRsp(void *buf, int32_t bufLen, SSchedulerHbRsp * if (tDecodeU64(&decoder, &status.queryId) < 0) return -1; if (tDecodeU64(&decoder, &status.taskId) < 0) return -1; if (tDecodeI64(&decoder, &status.refId) < 0) return -1; + if (tDecodeI32(&decoder, &status.execId) < 0) return -1; if (tDecodeI8(&decoder, &status.status) < 0) return -1; taosArrayPush(pRsp->taskStatus, &status); } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index c6f1096bfe..535272bc42 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2010,13 +2010,14 @@ static int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInf ASSERT(pDataInfo->status == EX_SOURCE_DATA_NOT_READY); - qDebug("%s build fetch msg and send to vgId:%d, ep:%s, taskId:0x%" PRIx64 ", %d/%" PRIzu, GET_TASKID(pTaskInfo), - pSource->addr.nodeId, pSource->addr.epSet.eps[0].fqdn, pSource->taskId, sourceIndex, totalSources); + qDebug("%s build fetch msg and send to vgId:%d, ep:%s, taskId:0x%" PRIx64 ", execId:%d, %d/%" PRIzu, GET_TASKID(pTaskInfo), + pSource->addr.nodeId, pSource->addr.epSet.eps[0].fqdn, pSource->taskId, pSource->execId, sourceIndex, totalSources); pMsg->header.vgId = htonl(pSource->addr.nodeId); pMsg->sId = htobe64(pSource->schedId); pMsg->taskId = htobe64(pSource->taskId); pMsg->queryId = htobe64(pTaskInfo->id.queryId); + pMsg->execId = htonl(pSource->execId); // send the fetch remote task result reques SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); @@ -2145,9 +2146,9 @@ static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SEx SSDataBlock* pRes = pExchangeInfo->pResult; SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo; if (pRsp->numOfRows == 0) { - qDebug("%s vgId:%d, taskId:0x%" PRIx64 " index:%d completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 + qDebug("%s vgId:%d, taskId:0x%" PRIx64 " execId:%d index:%d completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 ", completed:%d try next %d/%" PRIzu, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, i, pDataInfo->totalRows, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pDataInfo->totalRows, pExchangeInfo->loadInfo.totalRows, completed + 1, i + 1, totalSources); pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; completed += 1; @@ -2165,17 +2166,17 @@ static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SEx } if (pRsp->completed == 1) { - qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 + qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d" " index:%d completed, numOfRows:%d, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 ", completed:%d try next %d/%" PRIzu, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, i, pRes->info.rows, pDataInfo->totalRows, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, i, pRes->info.rows, pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize, completed + 1, i + 1, totalSources); completed += 1; pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; } else { - qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, totalRows:%" PRIu64 + qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%d, totalRows:%" PRIu64 ", totalBytes:%" PRIu64, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pLoadInfo->totalRows, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRes->info.rows, pLoadInfo->totalRows, pLoadInfo->totalSize); } @@ -2249,8 +2250,8 @@ static SSDataBlock* seqLoadRemoteData(SOperatorInfo* pOperator) { SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, pExchangeInfo->current); if (pDataInfo->code != TSDB_CODE_SUCCESS) { - qError("%s vgId:%d, taskID:0x%" PRIx64 " error happens, code:%s", GET_TASKID(pTaskInfo), pSource->addr.nodeId, - pSource->taskId, tstrerror(pDataInfo->code)); + qError("%s vgId:%d, taskID:0x%" PRIx64 " execId:%d error happens, code:%s", GET_TASKID(pTaskInfo), pSource->addr.nodeId, + pSource->taskId, pSource->execId, tstrerror(pDataInfo->code)); pOperator->pTaskInfo->code = pDataInfo->code; return NULL; } @@ -2258,9 +2259,9 @@ static SSDataBlock* seqLoadRemoteData(SOperatorInfo* pOperator) { SRetrieveTableRsp* pRsp = pDataInfo->pRsp; SLoadRemoteDataInfo* pLoadInfo = &pExchangeInfo->loadInfo; if (pRsp->numOfRows == 0) { - qDebug("%s vgId:%d, taskID:0x%" PRIx64 " %d of total completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 + qDebug("%s vgId:%d, taskID:0x%" PRIx64 " execId:%d %d of total completed, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 " try next", - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pExchangeInfo->current + 1, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pExchangeInfo->current + 1, pDataInfo->totalRows, pLoadInfo->totalRows); pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; @@ -2276,17 +2277,17 @@ static SSDataBlock* seqLoadRemoteData(SOperatorInfo* pOperator) { pTableRsp->compLen, pTableRsp->numOfCols, startTs, &pDataInfo->totalRows, NULL); if (pRsp->completed == 1) { - qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, rowsOfSource:%" PRIu64 + qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%d, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 " try next %d/%" PRIzu, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pDataInfo->totalRows, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRes->info.rows, pDataInfo->totalRows, pLoadInfo->totalRows, pLoadInfo->totalSize, pExchangeInfo->current + 1, totalSources); pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED; pExchangeInfo->current += 1; } else { - qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " numOfRows:%d, totalRows:%" PRIu64 + qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " execId:%d numOfRows:%d, totalRows:%" PRIu64 ", totalBytes:%" PRIu64, - GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pRes->info.rows, pLoadInfo->totalRows, + GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, pSource->execId, pRes->info.rows, pLoadInfo->totalRows, pLoadInfo->totalSize); } @@ -2378,7 +2379,7 @@ static int32_t initExchangeOperator(SExchangePhysiNode* pExNode, SExchangeInfo* } for (int32_t i = 0; i < numOfSources; ++i) { - SNodeListNode* pNode = (SNodeListNode*)nodesListGetNode((SNodeList*)pExNode->pSrcEndPoints, i); + SDownstreamSourceNode* pNode = (SDownstreamSourceNode*)nodesListGetNode((SNodeList*)pExNode->pSrcEndPoints, i); taosArrayPush(pInfo->pSources, pNode); } diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index b372bf75fc..97b8baec58 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -582,6 +582,7 @@ static int32_t downstreamSourceCopy(const SDownstreamSourceNode* pSrc, SDownstre COPY_OBJECT_FIELD(addr, sizeof(SQueryNodeAddr)); COPY_SCALAR_FIELD(taskId); COPY_SCALAR_FIELD(schedId); + COPY_SCALAR_FIELD(execId); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index df7429bd88..4375a7b04c 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -3441,6 +3441,9 @@ static int32_t downstreamSourceNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceSchedId, pNode->schedId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceSchedId, pNode->execId); + } return code; } @@ -3455,6 +3458,9 @@ static int32_t jsonToDownstreamSourceNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonGetUBigIntValue(pJson, jkDownstreamSourceSchedId, &pNode->schedId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetIntValue(pJson, jkDownstreamSourceSchedId, &pNode->execId); + } return code; } diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index 4edd7a8a6e..4fa2615470 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -75,6 +75,7 @@ typedef struct SQWDebug { bool lockEnable; bool statusEnable; bool dumpEnable; + bool tmp; } SQWDebug; extern SQWDebug gQWDebug; @@ -122,6 +123,7 @@ typedef struct SQWTaskCtx { int8_t taskType; int8_t explain; int32_t queryType; + int32_t execId; bool queryFetched; bool queryEnd; @@ -200,8 +202,8 @@ typedef struct SQWorkerMgmt { int32_t paramIdx; } SQWorkerMgmt; -#define QW_FPARAMS_DEF SQWorker *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, int64_t rId -#define QW_IDS() sId, qId, tId, rId +#define QW_FPARAMS_DEF SQWorker *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, int64_t rId, int32_t eId +#define QW_IDS() sId, qId, tId, rId, eId #define QW_FPARAMS() mgmt, QW_IDS() #define QW_STAT_INC(_item, _n) atomic_add_fetch_64(&(_item), _n) @@ -226,15 +228,18 @@ typedef struct SQWorkerMgmt { #define QW_TASK_READY(status) \ (status == JOB_TASK_STATUS_SUCCEED || status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED || \ status == JOB_TASK_STATUS_PARTIAL_SUCCEED) -#define QW_SET_QTID(id, qId, tId) \ - do { \ - *(uint64_t *)(id) = (qId); \ - *(uint64_t *)((char *)(id) + sizeof(qId)) = (tId); \ +#define QW_SET_QTID(id, qId, tId, eId) \ + do { \ + *(uint64_t *)(id) = (qId); \ + *(uint64_t *)((char *)(id) + sizeof(qId)) = (tId); \ + *(int32_t *)((char *)(id) + sizeof(qId) + sizeof(tId)) = (eId); \ } while (0) -#define QW_GET_QTID(id, qId, tId) \ - do { \ - (qId) = *(uint64_t *)(id); \ - (tId) = *(uint64_t *)((char *)(id) + sizeof(qId)); \ + +#define QW_GET_QTID(id, qId, tId, eId) \ + do { \ + (qId) = *(uint64_t *)(id); \ + (tId) = *(uint64_t *)((char *)(id) + sizeof(qId)); \ + (eId) = *(int32_t *)((char *)(id) + sizeof(qId) + sizeof(tId)); \ } while (0) #define QW_ERR_RET(c) \ @@ -365,6 +370,8 @@ void qwFreeTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx *ctx); void qwDbgDumpMgmtInfo(SQWorker *mgmt); int32_t qwDbgValidateStatus(QW_FPARAMS_DEF, int8_t oriStatus, int8_t newStatus, bool *ignore); +int32_t qwDbgBuildAndSendRedirectRsp(int32_t rspType, SRpcHandleInfo *pConn, int32_t code, SEpSet *pEpSet); +int32_t qwAddTaskCtx(QW_FPARAMS_DEF); #ifdef __cplusplus diff --git a/source/libs/qworker/src/qwDbg.c b/source/libs/qworker/src/qwDbg.c index add9700a3a..0fa01a304c 100644 --- a/source/libs/qworker/src/qwDbg.c +++ b/source/libs/qworker/src/qwDbg.c @@ -9,7 +9,7 @@ #include "tmsg.h" #include "tname.h" -SQWDebug gQWDebug = {.statusEnable = true, .dumpEnable = false}; +SQWDebug gQWDebug = {.statusEnable = true, .dumpEnable = false, .tmp = true}; int32_t qwDbgValidateStatus(QW_FPARAMS_DEF, int8_t oriStatus, int8_t newStatus, bool *ignore) { if (!gQWDebug.statusEnable) { @@ -121,3 +121,60 @@ void qwDbgDumpMgmtInfo(SQWorker *mgmt) { } +int32_t qwDbgBuildAndSendRedirectRsp(int32_t rspType, SRpcHandleInfo *pConn, int32_t code, SEpSet *pEpSet) { + int32_t contLen = 0; + char* rsp = NULL; + + if (pEpSet) { + contLen = tSerializeSEpSet(NULL, 0, pEpSet); + rsp = rpcMallocCont(contLen); + tSerializeSEpSet(rsp, contLen, pEpSet); + } + + SRpcMsg rpcRsp = { + .msgType = rspType, + .pCont = rsp, + .contLen = contLen, + .code = code, + .info = *pConn, + }; + + tmsgSendRsp(&rpcRsp); + + qDebug("response %s msg, code: %s", TMSG_INFO(rspType), tstrerror(code)); + + return TSDB_CODE_SUCCESS; +} + + +int32_t qwDbgEnableDebug(char *option) { + if (0 == strcasecmp(option, "lock")) { + gQWDebug.lockEnable = true; + qDebug("qw lock debug enabled"); + return TSDB_CODE_SUCCESS; + } + + if (0 == strcasecmp(option, "status")) { + gQWDebug.statusEnable = true; + qDebug("qw status debug enabled"); + return TSDB_CODE_SUCCESS; + } + + if (0 == strcasecmp(option, "dump")) { + gQWDebug.dumpEnable = true; + qDebug("qw dump debug enabled"); + return TSDB_CODE_SUCCESS; + } + + if (0 == strcasecmp(option, "tmp")) { + gQWDebug.tmp = true; + qDebug("qw tmp debug enabled"); + return TSDB_CODE_SUCCESS; + } + + qError("invalid qw debug option:%s", option); + + return TSDB_CODE_APP_ERROR; +} + + diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index 30772ff1ac..cc642caa70 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -156,6 +156,41 @@ int32_t qwBuildAndSendDropRsp(SRpcHandleInfo *pConn, int32_t code) { return TSDB_CODE_SUCCESS; } +int32_t qwBuildAndSendDropMsg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn) { + STaskDropReq *req = (STaskDropReq *)rpcMallocCont(sizeof(STaskDropReq)); + if (NULL == req) { + QW_SCH_TASK_ELOG("rpcMallocCont %d failed", (int32_t)sizeof(STaskDropReq)); + QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + req->header.vgId = mgmt->nodeId; + req->sId = sId; + req->queryId = qId; + req->taskId = tId; + req->refId = rId; + req->execId = eId; + + SRpcMsg pNewMsg = { + .msgType = TDMT_SCH_DROP_TASK, + .pCont = req, + .contLen = sizeof(STaskDropReq), + .code = 0, + .info = *pConn, + }; + + int32_t code = tmsgPutToQueue(&mgmt->msgCb, FETCH_QUEUE, &pNewMsg); + if (TSDB_CODE_SUCCESS != code) { + QW_SCH_TASK_ELOG("put drop task msg to queue failed, vgId:%d, code:%s", mgmt->nodeId, tstrerror(code)); + rpcFreeCont(req); + QW_ERR_RET(code); + } + + QW_SCH_TASK_DLOG("drop task msg put to queue, vgId:%d", mgmt->nodeId); + + return TSDB_CODE_SUCCESS; +} + + int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn) { SQueryContinueReq *req = (SQueryContinueReq *)rpcMallocCont(sizeof(SQueryContinueReq)); if (NULL == req) { @@ -167,6 +202,7 @@ int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn) { req->sId = sId; req->queryId = qId; req->taskId = tId; + req->execId = eId; SRpcMsg pNewMsg = { .msgType = TDMT_SCH_QUERY_CONTINUE, @@ -266,6 +302,7 @@ int32_t qWorkerPreprocessQueryMsg(void *qWorkerMgmt, SRpcMsg *pMsg) { msg->queryId = be64toh(msg->queryId); msg->taskId = be64toh(msg->taskId); msg->refId = be64toh(msg->refId); + msg->execId = ntohl(msg->execId); msg->phyLen = ntohl(msg->phyLen); msg->sqlLen = ntohl(msg->sqlLen); @@ -273,6 +310,7 @@ int32_t qWorkerPreprocessQueryMsg(void *qWorkerMgmt, SRpcMsg *pMsg) { uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = msg->refId; + int32_t eId = msg->execId; SQWMsg qwMsg = {.msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info}; @@ -295,6 +333,7 @@ int32_t qWorkerAbortPreprocessQueryMsg(void *qWorkerMgmt, SRpcMsg *pMsg) { uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = msg->refId; + int32_t eId = msg->execId; QW_SCH_TASK_DLOG("Abort prerocessQuery start, handle:%p", pMsg->info.handle); qwAbortPrerocessQuery(QW_FPARAMS()); @@ -324,6 +363,7 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = msg->refId; + int32_t eId = msg->execId; SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connInfo = pMsg->info, .msgType = pMsg->msgType}; char * sql = strndup(msg->msg, msg->sqlLen); @@ -356,6 +396,7 @@ int32_t qWorkerProcessCQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, in uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = 0; + int32_t eId = msg->execId; SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connInfo = pMsg->info}; @@ -387,11 +428,13 @@ int32_t qWorkerProcessFetchMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int msg->sId = be64toh(msg->sId); msg->queryId = be64toh(msg->queryId); msg->taskId = be64toh(msg->taskId); + msg->execId = ntohl(msg->execId); uint64_t sId = msg->sId; uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = 0; + int32_t eId = msg->execId; SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connInfo = pMsg->info}; @@ -476,11 +519,13 @@ int32_t qWorkerProcessDropMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int6 msg->queryId = be64toh(msg->queryId); msg->taskId = be64toh(msg->taskId); msg->refId = be64toh(msg->refId); + msg->execId = ntohl(msg->execId); uint64_t sId = msg->sId; uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = msg->refId; + int32_t eId = msg->execId; SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .code = pMsg->code, .connInfo = pMsg->info}; @@ -553,6 +598,7 @@ int32_t qWorkerProcessDeleteMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, SR uint64_t qId = req.queryId; uint64_t tId = req.taskId; int64_t rId = 0; + int32_t eId = 0; SQWMsg qwMsg = {.node = node, .msg = req.msg, .msgLen = req.phyLen, .connInfo = pMsg->info}; QW_SCH_TASK_DLOG("processDelete start, node:%p, handle:%p, sql:%s", node, pMsg->info.handle, req.sql); diff --git a/source/libs/qworker/src/qwUtil.c b/source/libs/qworker/src/qwUtil.c index 0759cf360a..1fb0a34314 100644 --- a/source/libs/qworker/src/qwUtil.c +++ b/source/libs/qworker/src/qwUtil.c @@ -135,8 +135,8 @@ int32_t qwAcquireScheduler(SQWorker *mgmt, uint64_t sId, int32_t rwType, SQWSchS void qwReleaseScheduler(int32_t rwType, SQWorker *mgmt) { QW_UNLOCK(rwType, &mgmt->schLock); } int32_t qwAcquireTaskStatus(QW_FPARAMS_DEF, int32_t rwType, SQWSchStatus *sch, SQWTaskStatus **task) { - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); QW_LOCK(rwType, &sch->tasksLock); *task = taosHashGet(sch->tasksHash, id, sizeof(id)); @@ -151,8 +151,8 @@ int32_t qwAcquireTaskStatus(QW_FPARAMS_DEF, int32_t rwType, SQWSchStatus *sch, S int32_t qwAddTaskStatusImpl(QW_FPARAMS_DEF, SQWSchStatus *sch, int32_t rwType, int32_t status, SQWTaskStatus **task) { int32_t code = 0; - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); SQWTaskStatus ntask = {0}; ntask.status = status; @@ -207,8 +207,8 @@ int32_t qwAddAcquireTaskStatus(QW_FPARAMS_DEF, int32_t rwType, SQWSchStatus *sch void qwReleaseTaskStatus(int32_t rwType, SQWSchStatus *sch) { QW_UNLOCK(rwType, &sch->tasksLock); } int32_t qwAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); *ctx = taosHashAcquire(mgmt->ctxHash, id, sizeof(id)); if (NULL == (*ctx)) { @@ -220,8 +220,8 @@ int32_t qwAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { } int32_t qwGetTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); *ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id)); if (NULL == (*ctx)) { @@ -233,8 +233,8 @@ int32_t qwGetTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) { } int32_t qwAddTaskCtxImpl(QW_FPARAMS_DEF, bool acquire, SQWTaskCtx **ctx) { - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); SQWTaskCtx nctx = {0}; @@ -314,8 +314,8 @@ void qwFreeTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx *ctx) { } int32_t qwDropTaskCtx(QW_FPARAMS_DEF) { - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); SQWTaskCtx octx; SQWTaskCtx *ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id)); @@ -348,8 +348,8 @@ int32_t qwDropTaskStatus(QW_FPARAMS_DEF) { SQWTaskStatus *task = NULL; int32_t code = 0; - char id[sizeof(qId) + sizeof(tId)] = {0}; - QW_SET_QTID(id, qId, tId); + char id[sizeof(qId) + sizeof(tId) + sizeof(eId)] = {0}; + QW_SET_QTID(id, qId, tId, eId); if (qwAcquireScheduler(mgmt, sId, QW_WRITE, &sch)) { QW_TASK_WLOG_E("scheduler does not exist"); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 8ab293d0ad..2b23f7a27f 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -168,7 +168,7 @@ int32_t qwGenerateSchHbRsp(SQWorker *mgmt, SQWSchStatus *sch, SQWHbInfo *hbInfo) // TODO GET EXECUTOR API TO GET MORE INFO - QW_GET_QTID(key, status.queryId, status.taskId); + QW_GET_QTID(key, status.queryId, status.taskId, status.execId); status.status = taskStatus->status; status.refId = taskStatus->refId; @@ -493,7 +493,9 @@ int32_t qwPrerocessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_ERR_JRET(qwRegisterQueryBrokenLinkArg(QW_FPARAMS(), &qwMsg->connInfo)); - QW_ERR_JRET(qwAddAcquireTaskCtx(QW_FPARAMS(), &ctx)); + QW_ERR_JRET(qwAddTaskCtx(QW_FPARAMS())); + + QW_ERR_JRET(qwAcquireTaskCtx(QW_FPARAMS(), &ctx)); ctx->ctrlConnInfo = qwMsg->connInfo; @@ -562,6 +564,33 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex QW_ERR_JRET(qwExecTask(QW_FPARAMS(), ctx, NULL)); } + + if (gQWDebug.tmp) { +#if 0 + SEpSet epSet = {0}; + epSet.inUse = 1; + epSet.numOfEps = 3; + strcpy(epSet.eps[0].fqdn, "localhost"); + epSet.eps[0].port = 7100; + strcpy(epSet.eps[1].fqdn, "localhost"); + epSet.eps[1].port = 7200; + strcpy(epSet.eps[2].fqdn, "localhost"); + epSet.eps[2].port = 7300; + + qwDbgBuildAndSendRedirectRsp(pMsg->msgType + 1, &pMsg->info, TSDB_CODE_RPC_REDIRECT, &epSet); + gQWDebug.tmp = false; + return TSDB_CODE_SUCCESS; +#else + if (TDMT_SCH_MERGE_QUERY == qwMsg->msgType) { + ctx->phase = QW_PHASE_POST_QUERY; + qwDbgBuildAndSendRedirectRsp(qwMsg->msgType + 1, &qwMsg->connInfo, TSDB_CODE_RPC_REDIRECT, NULL); + gQWDebug.tmp = false; + return TSDB_CODE_SUCCESS; + } +#endif + } + + _return: input.code = code; @@ -734,8 +763,6 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { SQWTaskCtx *ctx = NULL; bool locked = false; - // TODO : TASK ALREADY REMOVED AND A NEW DROP MSG RECEIVED - QW_ERR_JRET(qwAcquireTaskCtx(QW_FPARAMS(), &ctx)); QW_LOCK(QW_WRITE, &ctx->lock); diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index 8dfc703dd9..8595e41640 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -125,7 +125,7 @@ typedef struct SSchTaskCallbackParam { uint64_t queryId; int64_t refId; uint64_t taskId; - int32_t execIdx; + int32_t execId; void *pTrans; } SSchTaskCallbackParam; @@ -171,7 +171,7 @@ typedef struct SSchTask { uint64_t taskId; // task id SRWLatch lock; // task lock int32_t maxExecTimes; // task may exec times - int32_t execIdx; // task current execute try index + int32_t execId; // task current execute try index SSchLevel *level; // level SRWLatch planLock; // task update plan lock SSubplan *plan; // subplan @@ -243,9 +243,9 @@ extern SSchedulerMgmt schMgmt; #define SCH_LOG_TASK_START_TS(_task) \ do { \ int64_t us = taosGetTimestampUs(); \ - int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \ + int32_t idx = (_task)->execId % SCH_TASK_MAX_EXEC_TIMES; \ (_task)->profile.execUseTime[idx] = us; \ - if (0 == (_task)->execIdx) { \ + if (0 == (_task)->execId) { \ (_task)->profile.startTs = us; \ } \ } while (0) @@ -253,7 +253,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_LOG_TASK_WAIT_TS(_task) \ do { \ int64_t us = taosGetTimestampUs(); \ - int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \ + int32_t idx = (_task)->execId % SCH_TASK_MAX_EXEC_TIMES; \ (_task)->profile.waitTime += us - (_task)->profile.execUseTime[idx]; \ } while (0) @@ -261,12 +261,12 @@ extern SSchedulerMgmt schMgmt; #define SCH_LOG_TASK_END_TS(_task) \ do { \ int64_t us = taosGetTimestampUs(); \ - int32_t idx = (_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES; \ + int32_t idx = (_task)->execId % SCH_TASK_MAX_EXEC_TIMES; \ (_task)->profile.execUseTime[idx] = us - (_task)->profile.execUseTime[idx]; \ (_task)->profile.endTs = us; \ } while (0) -#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.execUseTime[(_task)->execIdx % SCH_TASK_MAX_EXEC_TIMES]) > (_task)->timeoutUsec) +#define SCH_TASK_TIMEOUT(_task) ((taosGetTimestampUs() - (_task)->profile.execUseTime[(_task)->execId % SCH_TASK_MAX_EXEC_TIMES]) > (_task)->timeoutUsec) #define SCH_TASK_READY_FOR_LAUNCH(readyNum, task) ((readyNum) >= taosArrayGetSize((task)->children)) @@ -274,6 +274,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_UNLOCK_TASK(_task) SCH_UNLOCK(SCH_WRITE, &(_task)->lock) #define SCH_TASK_ID(_task) ((_task) ? (_task)->taskId : -1) +#define SCH_TASK_EID(_task) ((_task) ? (_task)->execId : -1) #define SCH_SET_TASK_LASTMSG_TYPE(_task, _type) do { if(_task) { atomic_store_32(&(_task)->lastMsgType, _type); } } while (0) #define SCH_GET_TASK_LASTMSG_TYPE(_task) ((_task) ? atomic_load_32(&(_task)->lastMsgType) : -1) @@ -318,13 +319,13 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_DLOG(param, ...) qDebug("QID:0x%" PRIx64 " " param, pJob->queryId, __VA_ARGS__) #define SCH_TASK_ELOG(param, ...) \ - qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) + qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, pJob->queryId, SCH_TASK_ID(pTask), SCH_TASK_EID(pTask),__VA_ARGS__) #define SCH_TASK_DLOG(param, ...) \ - qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) + qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, pJob->queryId, SCH_TASK_ID(pTask), SCH_TASK_EID(pTask),__VA_ARGS__) #define SCH_TASK_DLOGL(param, ...) \ - qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) + qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, pJob->queryId, SCH_TASK_ID(pTask), SCH_TASK_EID(pTask),__VA_ARGS__) #define SCH_TASK_WLOG(param, ...) \ - qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) + qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, pJob->queryId, SCH_TASK_ID(pTask), SCH_TASK_EID(pTask),__VA_ARGS__) #define SCH_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0) #define SCH_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0) @@ -366,7 +367,7 @@ void schProcessOnDataFetched(SSchJob *job); int32_t schGetTaskInJob(SSchJob *pJob, uint64_t taskId, SSchTask **pTask); void schFreeRpcCtxVal(const void *arg); int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal *brokenVal, bool isHb); -int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execIdx); +int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execId); int32_t schExecStaticExplainJob(SSchedulerReq *pReq, int64_t *job, bool sync); int32_t schExecJobImpl(SSchedulerReq *pReq, SSchJob *pJob, bool sync); int32_t schUpdateJobStatus(SSchJob *pJob, int8_t newStatus); @@ -378,7 +379,7 @@ int32_t schExecJob(SSchedulerReq *pReq, int64_t *pJob, SQueryResult *pRes); int32_t schAsyncExecJob(SSchedulerReq *pReq, int64_t *pJob); int32_t schFetchRows(SSchJob *pJob); int32_t schAsyncFetchRows(SSchJob *pJob); -int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, bool dropExecNode, void *handle, int32_t execIdx); +int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, bool dropExecNode, void *handle, int32_t execId); int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList); void schFreeSMsgSendInfo(SMsgSendInfo *msgSendInfo); char* schGetOpStr(SCH_OP_TYPE type); diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 643594f4e0..26824738e9 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -28,7 +28,7 @@ FORCE_INLINE int32_t schReleaseJob(int64_t refId) { qDebug("sch release jobId:0x int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel) { pTask->plan = pPlan; pTask->level = pLevel; - pTask->execIdx = -1; + pTask->execId = -1; pTask->maxExecTimes = SCH_TASK_MAX_EXEC_TIMES; pTask->timeoutUsec = SCH_DEFAULT_TASK_TIMEOUT_USEC; SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); @@ -428,59 +428,59 @@ int32_t schRecordTaskSucceedNode(SSchJob *pJob, SSchTask *pTask) { return TSDB_CODE_SUCCESS; } -int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execIdx) { +int32_t schAppendTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t execId) { SSchNodeInfo nodeInfo = {.addr = *addr, .handle = NULL}; - if (taosHashPut(pTask->execNodes, &execIdx, sizeof(execIdx), &nodeInfo, sizeof(nodeInfo))) { + if (taosHashPut(pTask->execNodes, &execId, sizeof(execId), &nodeInfo, sizeof(nodeInfo))) { SCH_TASK_ELOG("taosHashPut nodeInfo to execNodes failed, errno:%d", errno); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - SCH_TASK_DLOG("task execNode added, execIdx:%d", execIdx); + SCH_TASK_DLOG("task execNode added, execId:%d", execId); return TSDB_CODE_SUCCESS; } -int32_t schDropTaskExecNode(SSchJob *pJob, SSchTask *pTask, void *handle, int32_t execIdx) { +int32_t schDropTaskExecNode(SSchJob *pJob, SSchTask *pTask, void *handle, int32_t execId) { if (NULL == pTask->execNodes) { return TSDB_CODE_SUCCESS; } - if (taosHashRemove(pTask->execNodes, &execIdx, sizeof(execIdx))) { - SCH_TASK_ELOG("fail to remove execIdx %d from execNodeList", execIdx); + if (taosHashRemove(pTask->execNodes, &execId, sizeof(execId))) { + SCH_TASK_ELOG("fail to remove execId %d from execNodeList", execId); } else { - SCH_TASK_DLOG("execIdx %d removed from execNodeList", execIdx); + SCH_TASK_DLOG("execId %d removed from execNodeList", execId); } - if (execIdx != pTask->execIdx) { // ignore it - SCH_TASK_DLOG("execIdx %d is not current execIdx %d", execIdx, pTask->execIdx); + if (execId != pTask->execId) { // ignore it + SCH_TASK_DLOG("execId %d is not current execId %d", execId, pTask->execId); SCH_RET(TSDB_CODE_SCH_IGNORE_ERROR); } return TSDB_CODE_SUCCESS; } -int32_t schUpdateTaskExecNode(SSchJob *pJob, SSchTask *pTask, void *handle, int32_t execIdx) { +int32_t schUpdateTaskExecNode(SSchJob *pJob, SSchTask *pTask, void *handle, int32_t execId) { if (taosHashGetSize(pTask->execNodes) <= 0) { return TSDB_CODE_SUCCESS; } - SSchNodeInfo *nodeInfo = taosHashGet(pTask->execNodes, &execIdx, sizeof(execIdx)); + SSchNodeInfo *nodeInfo = taosHashGet(pTask->execNodes, &execId, sizeof(execId)); nodeInfo->handle = handle; - SCH_TASK_DLOG("handle updated to %p for execIdx %d", handle, execIdx); + SCH_TASK_DLOG("handle updated to %p for execId %d", handle, execId); return TSDB_CODE_SUCCESS; } -int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, bool dropExecNode, void *handle, int32_t execIdx) { +int32_t schUpdateTaskHandle(SSchJob *pJob, SSchTask *pTask, bool dropExecNode, void *handle, int32_t execId) { if (dropExecNode) { - SCH_RET(schDropTaskExecNode(pJob, pTask, handle, execIdx)); + SCH_RET(schDropTaskExecNode(pJob, pTask, handle, execId)); } SCH_SET_TASK_HANDLE(pTask, handle); - schUpdateTaskExecNode(pJob, pTask, handle, execIdx); + schUpdateTaskExecNode(pJob, pTask, handle, execId); return TSDB_CODE_SUCCESS; } @@ -828,9 +828,9 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo } } - if ((pTask->execIdx + 1) >= pTask->maxExecTimes) { + if ((pTask->execId + 1) >= pTask->maxExecTimes) { *needRetry = false; - SCH_TASK_DLOG("task no more retry since reach max try times, execIdx:%d", pTask->execIdx); + SCH_TASK_DLOG("task no more retry since reach max try times, execId:%d", pTask->execId); return TSDB_CODE_SUCCESS; } @@ -841,9 +841,9 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo } if (SCH_IS_DATA_SRC_TASK(pTask)) { - if ((pTask->execIdx + 1) >= SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)) { + if ((pTask->execId + 1) >= SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)) { *needRetry = false; - SCH_TASK_DLOG("task no more retry since all ep tried, execIdx:%d, epNum:%d", pTask->execIdx, + SCH_TASK_DLOG("task no more retry since all ep tried, execId:%d, epNum:%d", pTask->execId, SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)); return TSDB_CODE_SUCCESS; } @@ -859,7 +859,7 @@ int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bo } *needRetry = true; - SCH_TASK_DLOG("task need the %dth retry, errCode:%x - %s", pTask->execIdx + 1, errCode, tstrerror(errCode)); + SCH_TASK_DLOG("task need the %dth retry, errCode:%x - %s", pTask->execId + 1, errCode, tstrerror(errCode)); return TSDB_CODE_SUCCESS; } @@ -1171,6 +1171,7 @@ int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) { SDownstreamSourceNode source = {.type = QUERY_NODE_DOWNSTREAM_SOURCE, .taskId = pTask->taskId, .schedId = schMgmt.sId, + .execId = pTask->execId, .addr = pTask->succeedAddr}; qSetSubplanExecutionNode(parent->plan, pTask->plan->id.groupId, &source); SCH_UNLOCK(SCH_WRITE, &parent->lock); @@ -1256,7 +1257,7 @@ int32_t schRescheduleTask(SSchJob *pJob, SSchTask *pTask) { SCH_LOCK_TASK(pTask); if (SCH_TASK_TIMEOUT(pTask) && JOB_TASK_STATUS_EXECUTING == pTask->status && pJob->fetchTask != pTask && taosArrayGetSize(pTask->candidateAddrs) > 1) { - SCH_TASK_DLOG("task execIdx %d will be rescheduled now", pTask->execIdx); + SCH_TASK_DLOG("task execId %d will be rescheduled now", pTask->execId); schDropTaskOnExecNode(pJob, pTask); taosHashClear(pTask->execNodes); schProcessOnTaskFailure(pJob, pTask, TSDB_CODE_SCH_TIMEOUT_ERROR); @@ -1283,7 +1284,7 @@ int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) { continue; } - SCH_JOB_DLOG("TID:0x%" PRIx64 " task status in server: %s", taskStatus->taskId, jobTaskStatusStr(taskStatus->status)); + SCH_JOB_DLOG("TID:0x%" PRIx64 "EID:%d task status in server: %s", taskStatus->taskId, taskStatus->execId, jobTaskStatusStr(taskStatus->status)); pTask = NULL; schGetTaskInJob(pJob, taskStatus->taskId, &pTask); @@ -1292,6 +1293,13 @@ int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) { schReleaseJob(taskStatus->refId); continue; } + + if (taskStatus->execId != pTask->execId) { + // TODO DROP TASK FROM SERVER!!!! + SCH_TASK_DLOG("EID %d in hb rsp mis-match", taskStatus->execId); + schReleaseJob(taskStatus->refId); + continue; + } if (taskStatus->status == JOB_TASK_STATUS_FAILED) { // RECORD AND HANDLE ERROR!!!! @@ -1362,9 +1370,9 @@ int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask) { int32_t code = 0; atomic_add_fetch_32(&pTask->level->taskLaunchedNum, 1); - pTask->execIdx++; + pTask->execId++; - SCH_TASK_DLOG("start to launch task's %dth exec", pTask->execIdx); + SCH_TASK_DLOG("start to launch task's %dth exec", pTask->execId); SCH_LOG_TASK_START_TS(pTask); @@ -1677,10 +1685,14 @@ _return: int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { int32_t code = 0; + int8_t status = 0; + if (schJobNeedToStop(pJob, &status)) { + SCH_TASK_ELOG("redirect will no continue cause of job status %s", jobTaskStatusStr(status)); + SCH_RET(atomic_load_32(&pJob->errCode)); + } - if ((pTask->execIdx + 1) >= pTask->maxExecTimes) { - SCH_TASK_DLOG("task no more retry since reach max try times, execIdx:%d", pTask->execIdx); - SCH_UNLOCK_TASK(pTask); + if ((pTask->execId + 1) >= pTask->maxExecTimes) { + SCH_TASK_DLOG("task no more retry since reach max try times, execId:%d", pTask->execId); schProcessOnJobFailure(pJob, rspCode); return TSDB_CODE_SUCCESS; } @@ -1703,34 +1715,36 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { SCH_ERR_JRET(schLaunchTasksInFlowCtrlList(pJob, pTask)); } } - } else { - pTask->childReady = 0; + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); - int32_t childrenNum = taosArrayGetSize(pTask->children); - for (int32_t i = 0; i < childrenNum; ++i) { - SSchTask* pChild = taosArrayGetP(pTask->children, i); - SCH_LOCK_TASK(pChild); - code = schDoTaskRedirect(pJob, pChild, rspCode); - SCH_UNLOCK_TASK(pChild); - SCH_ERR_JRET(code); - } - - qClearSubplanExecutionNode(pTask->plan); + SCH_ERR_JRET(schLaunchTask(pJob, pTask)); + + return TSDB_CODE_SUCCESS; } + + // merge plan + + pTask->childReady = 0; + + qClearSubplanExecutionNode(pTask->plan); + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); - SCH_ERR_JRET(schLaunchTask(pJob, pTask)); - - SCH_UNLOCK_TASK(pTask); + int32_t childrenNum = taosArrayGetSize(pTask->children); + for (int32_t i = 0; i < childrenNum; ++i) { + SSchTask* pChild = taosArrayGetP(pTask->children, i); + SCH_LOCK_TASK(pChild); + schDoTaskRedirect(pJob, pChild, rspCode); + SCH_UNLOCK_TASK(pChild); + } return TSDB_CODE_SUCCESS; _return: code = schProcessOnTaskFailure(pJob, pTask, code); - - SCH_UNLOCK_TASK(pTask); SCH_RET(code); } @@ -1747,7 +1761,7 @@ int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf* pData, int32 SCH_ERR_JRET(schUpdateTaskCandidateAddr(pJob, pTask, pData->pEpSet)); } - schDoTaskRedirect(pJob, pTask, rspCode); + SCH_RET(schDoTaskRedirect(pJob, pTask, rspCode)); _return: diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index 3688cb0240..3d547ffbf8 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -379,13 +379,13 @@ int32_t schHandleCallback(void *param, SDataBuf *pMsg, int32_t rspCode) { SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode)); - if (pParam->execIdx != pTask->execIdx) { - SCH_TASK_DLOG("execIdx %d mis-match current execIdx %d", pParam->execIdx, pTask->execIdx); + if (pParam->execId != pTask->execId) { + SCH_TASK_DLOG("execId %d mis-match current execId %d", pParam->execId, pTask->execId); goto _return; } bool dropExecNode = (msgType == TDMT_SCH_LINK_BROKEN || SCH_NETWORK_ERR(rspCode)); - SCH_ERR_JRET(schUpdateTaskHandle(pJob, pTask, dropExecNode, pMsg->handle, pParam->execIdx)); + SCH_ERR_JRET(schUpdateTaskHandle(pJob, pTask, dropExecNode, pMsg->handle, pParam->execId)); int8_t status = 0; if (schJobNeedToStop(pJob, &status)) { @@ -401,7 +401,7 @@ int32_t schHandleCallback(void *param, SDataBuf *pMsg, int32_t rspCode) { goto _return; } - SCH_ERR_JRET(schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode)); + code = schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode); pMsg->pData = NULL; _return: @@ -458,7 +458,7 @@ int32_t schMakeCallbackParam(SSchJob *pJob, SSchTask *pTask, int32_t msgType, bo param->refId = pJob->refId; param->taskId = SCH_TASK_ID(pTask); param->pTrans = pJob->conn.pTrans; - param->execIdx = pTask->execIdx; + param->execId = pTask->execId; *pParam = param; return TSDB_CODE_SUCCESS; @@ -1015,6 +1015,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, pMsg->queryId = htobe64(pJob->queryId); pMsg->taskId = htobe64(pTask->taskId); pMsg->refId = htobe64(pJob->refId); + pMsg->execId = htonl(pTask->execId); pMsg->taskType = TASK_TYPE_TEMP; pMsg->explain = SCH_IS_EXPLAIN_JOB(pJob); pMsg->phyLen = htonl(pTask->msgLen); @@ -1041,6 +1042,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, pMsg->sId = htobe64(schMgmt.sId); pMsg->queryId = htobe64(pJob->queryId); pMsg->taskId = htobe64(pTask->taskId); + pMsg->execId = htonl(pTask->execId); break; } @@ -1060,6 +1062,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, pMsg->queryId = htobe64(pJob->queryId); pMsg->taskId = htobe64(pTask->taskId); pMsg->refId = htobe64(pJob->refId); + pMsg->execId = htobe64(pTask->execId); break; } case TDMT_SCH_QUERY_HEARTBEAT: { @@ -1102,7 +1105,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, (rpcCtx.args ? &rpcCtx : NULL))); if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY) { - SCH_ERR_RET(schAppendTaskExecNode(pJob, pTask, addr, pTask->execIdx)); + SCH_ERR_RET(schAppendTaskExecNode(pJob, pTask, addr, pTask->execId)); } return TSDB_CODE_SUCCESS; From 47bc36872f47cb2338e29f3f24a4f90ea1bf09d1 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 30 Jun 2022 13:44:26 +0800 Subject: [PATCH 11/14] feat: query redirect --- include/common/tmsg.h | 1 + source/client/src/clientImpl.c | 4 ++-- source/libs/nodes/src/nodesCodeFuncs.c | 5 +++-- source/libs/qworker/inc/qwInt.h | 20 ++++++++--------- source/libs/qworker/src/qwMsg.c | 4 +++- source/libs/qworker/src/qworker.c | 30 ++++++++++++++------------ source/libs/scheduler/src/schJob.c | 5 +++-- source/libs/scheduler/src/schRemote.c | 2 +- 8 files changed, 39 insertions(+), 32 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 2876105748..4ba668cbf5 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1594,6 +1594,7 @@ typedef struct { uint64_t queryId; uint64_t taskId; int64_t refId; + int32_t execId; } STaskCancelReq; typedef struct { diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index b20fd4b9b0..51e43f927b 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1294,10 +1294,10 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { */ int32_t elapsed = pRequest->metric.rsp - pRequest->metric.start; if (pMsg->code == TSDB_CODE_SUCCESS) { - tscDebug("0x%" PRIx64 " message:%s, code:%s rspLen:%d, elapsed:%d ms, reqId:0x%" PRIx64, pRequest->self, + tscDebug("0x%" PRIx64 " rsp msg:%s, code:%s rspLen:%d, elapsed:%d ms, reqId:0x%" PRIx64, pRequest->self, TMSG_INFO(pMsg->msgType), tstrerror(pMsg->code), pMsg->contLen, elapsed / 1000, pRequest->requestId); } else { - tscError("0x%" PRIx64 " SQL cmd:%s, code:%s rspLen:%d, elapsed time:%d ms, reqId:0x%" PRIx64, pRequest->self, + tscError("0x%" PRIx64 " rsp msg:%s, code:%s rspLen:%d, elapsed time:%d ms, reqId:0x%" PRIx64, pRequest->self, TMSG_INFO(pMsg->msgType), tstrerror(pMsg->code), pMsg->contLen, elapsed / 1000, pRequest->requestId); } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 4375a7b04c..72263feea1 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -3430,6 +3430,7 @@ static int32_t jsonToSlotDescNode(const SJson* pJson, void* pObj) { static const char* jkDownstreamSourceAddr = "Addr"; static const char* jkDownstreamSourceTaskId = "TaskId"; static const char* jkDownstreamSourceSchedId = "SchedId"; +static const char* jkDownstreamSourceExecId = "ExecId"; static int32_t downstreamSourceNodeToJson(const void* pObj, SJson* pJson) { const SDownstreamSourceNode* pNode = (const SDownstreamSourceNode*)pObj; @@ -3442,7 +3443,7 @@ static int32_t downstreamSourceNodeToJson(const void* pObj, SJson* pJson) { code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceSchedId, pNode->schedId); } if (TSDB_CODE_SUCCESS == code) { - code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceSchedId, pNode->execId); + code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceExecId, pNode->execId); } return code; @@ -3459,7 +3460,7 @@ static int32_t jsonToDownstreamSourceNode(const SJson* pJson, void* pObj) { code = tjsonGetUBigIntValue(pJson, jkDownstreamSourceSchedId, &pNode->schedId); } if (TSDB_CODE_SUCCESS == code) { - code = tjsonGetIntValue(pJson, jkDownstreamSourceSchedId, &pNode->execId); + code = tjsonGetIntValue(pJson, jkDownstreamSourceExecId, &pNode->execId); } return code; diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index 4fa2615470..6faffa13b3 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -281,22 +281,22 @@ typedef struct SQWorkerMgmt { #define QW_SCH_ELOG(param, ...) qError("QW:%p SID:%" PRIx64 " " param, mgmt, sId, __VA_ARGS__) #define QW_SCH_DLOG(param, ...) qDebug("QW:%p SID:%" PRIx64 " " param, mgmt, sId, __VA_ARGS__) -#define QW_TASK_ELOG(param, ...) qError("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId, __VA_ARGS__) -#define QW_TASK_WLOG(param, ...) qWarn("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId, __VA_ARGS__) -#define QW_TASK_DLOG(param, ...) qDebug("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId, __VA_ARGS__) +#define QW_TASK_ELOG(param, ...) qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId, __VA_ARGS__) +#define QW_TASK_WLOG(param, ...) qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId, __VA_ARGS__) +#define QW_TASK_DLOG(param, ...) qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId, __VA_ARGS__) #define QW_TASK_DLOGL(param, ...) \ - qDebugL("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId, __VA_ARGS__) + qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId, __VA_ARGS__) -#define QW_TASK_ELOG_E(param) qError("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId) -#define QW_TASK_WLOG_E(param) qWarn("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId) -#define QW_TASK_DLOG_E(param) qDebug("QW:%p QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, qId, tId) +#define QW_TASK_ELOG_E(param) qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId) +#define QW_TASK_WLOG_E(param) qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId) +#define QW_TASK_DLOG_E(param) qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, qId, tId, eId) #define QW_SCH_TASK_ELOG(param, ...) \ - qError("QW:%p SID:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, sId, qId, tId, __VA_ARGS__) + qError("QW:%p SID:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, mgmt, sId, qId, tId, eId, __VA_ARGS__) #define QW_SCH_TASK_WLOG(param, ...) \ - qWarn("QW:%p SID:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, sId, qId, tId, __VA_ARGS__) + qWarn("QW:%p SID:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, mgmt, sId, qId, tId, eId, __VA_ARGS__) #define QW_SCH_TASK_DLOG(param, ...) \ - qDebug("QW:%p SID:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, mgmt, sId, qId, tId, __VA_ARGS__) + qDebug("QW:%p SID:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d " param, mgmt, sId, qId, tId, eId, __VA_ARGS__) #define QW_LOCK_DEBUG(...) \ do { \ diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index cc642caa70..cc4228f7c7 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -480,11 +480,13 @@ int32_t qWorkerProcessCancelMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, in msg->queryId = be64toh(msg->queryId); msg->taskId = be64toh(msg->taskId); msg->refId = be64toh(msg->refId); + msg->execId = ntohl(msg->execId); uint64_t sId = msg->sId; uint64_t qId = msg->queryId; uint64_t tId = msg->taskId; int64_t rId = msg->refId; + int32_t eId = msg->execId; SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connInfo = pMsg->info}; @@ -598,7 +600,7 @@ int32_t qWorkerProcessDeleteMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, SR uint64_t qId = req.queryId; uint64_t tId = req.taskId; int64_t rId = 0; - int32_t eId = 0; + int32_t eId = -1; SQWMsg qwMsg = {.node = node, .msg = req.msg, .msgLen = req.phyLen, .connInfo = pMsg->info}; QW_SCH_TASK_DLOG("processDelete start, node:%p, handle:%p, sql:%s", node, pMsg->info.handle, req.sql); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 2b23f7a27f..cd48452df2 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -566,20 +566,22 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex if (gQWDebug.tmp) { -#if 0 - SEpSet epSet = {0}; - epSet.inUse = 1; - epSet.numOfEps = 3; - strcpy(epSet.eps[0].fqdn, "localhost"); - epSet.eps[0].port = 7100; - strcpy(epSet.eps[1].fqdn, "localhost"); - epSet.eps[1].port = 7200; - strcpy(epSet.eps[2].fqdn, "localhost"); - epSet.eps[2].port = 7300; - - qwDbgBuildAndSendRedirectRsp(pMsg->msgType + 1, &pMsg->info, TSDB_CODE_RPC_REDIRECT, &epSet); - gQWDebug.tmp = false; - return TSDB_CODE_SUCCESS; +#if 1 + if (TDMT_SCH_QUERY == qwMsg->msgType) { + SEpSet epSet = {0}; + epSet.inUse = 1; + epSet.numOfEps = 3; + strcpy(epSet.eps[0].fqdn, "localhost"); + epSet.eps[0].port = 7100; + strcpy(epSet.eps[1].fqdn, "localhost"); + epSet.eps[1].port = 7200; + strcpy(epSet.eps[2].fqdn, "localhost"); + epSet.eps[2].port = 7300; + + qwDbgBuildAndSendRedirectRsp(qwMsg->msgType + 1, &qwMsg->connInfo, TSDB_CODE_RPC_REDIRECT, &epSet); + gQWDebug.tmp = false; + return TSDB_CODE_SUCCESS; + } #else if (TDMT_SCH_MERGE_QUERY == qwMsg->msgType) { ctx->phase = QW_PHASE_POST_QUERY; diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 26824738e9..29179536df 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -1276,6 +1276,9 @@ int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) { for (int32_t i = 0; i < taskNum; ++i) { STaskStatus *taskStatus = taosArrayGet(pStatusList, i); + qDebug("QID:%" PRIx64 ",TID:0x%" PRIx64 ",EID:%d task status in server: %s", + taskStatus->queryId, taskStatus->taskId, taskStatus->execId, jobTaskStatusStr(taskStatus->status)); + SSchJob *pJob = schAcquireJob(taskStatus->refId); if (NULL == pJob) { qWarn("job not found, refId:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64, taskStatus->refId, @@ -1284,8 +1287,6 @@ int32_t schProcessOnTaskStatusRsp(SQueryNodeEpId* pEpId, SArray* pStatusList) { continue; } - SCH_JOB_DLOG("TID:0x%" PRIx64 "EID:%d task status in server: %s", taskStatus->taskId, taskStatus->execId, jobTaskStatusStr(taskStatus->status)); - pTask = NULL; schGetTaskInJob(pJob, taskStatus->taskId, &pTask); if (NULL == pTask) { diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index 3d547ffbf8..69e41d3111 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -1062,7 +1062,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, pMsg->queryId = htobe64(pJob->queryId); pMsg->taskId = htobe64(pTask->taskId); pMsg->refId = htobe64(pJob->refId); - pMsg->execId = htobe64(pTask->execId); + pMsg->execId = htonl(pTask->execId); break; } case TDMT_SCH_QUERY_HEARTBEAT: { From 25b3f88ebe9f731440c8facee72f0611e585aad4 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 30 Jun 2022 13:55:41 +0800 Subject: [PATCH 12/14] add query epset --- source/libs/transport/src/transCli.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index b48939f9f9..77cc5dbaf7 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1054,7 +1054,8 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { */ STransConnCtx* pCtx = pMsg->ctx; int32_t code = pResp->code; - if (pTransInst->retry != NULL && pTransInst->retry(code, pResp->msgType - 1)) { + bool retry = (pTransInst->retry != NULL && pTransInst->retry(code, pResp->msgType - 1)) ? true : false; + if (retry) { pMsg->sent = 0; pCtx->retryCnt += 1; if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { @@ -1083,11 +1084,13 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { STraceId* trace = &pResp->info.traceId; - if (cliTryToExtractEpSet(pResp, &pCtx->epSet)) { + bool hasEpSet = cliTryToExtractEpSet(pResp, &pCtx->epSet); + if (hasEpSet) { char tbuf[256] = {0}; EPSET_DEBUG_STR(&pCtx->epSet, tbuf); tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); } + if (pCtx->pSem != NULL) { tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (pCtx->pRsp == NULL) { @@ -1099,10 +1102,14 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pCtx->pRsp = NULL; } else { tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); - if (!cliIsEpsetUpdated(code, pCtx)) { - pTransInst->cfp(pTransInst->parent, pResp, NULL); - } else { + if (retry == false && hasEpSet == true) { pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); + } else { + if (!cliIsEpsetUpdated(code, pCtx)) { + pTransInst->cfp(pTransInst->parent, pResp, NULL); + } else { + pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); + } } } return 0; From a8e526a6de3bfd5b59461ccfe81f3e998b0865e7 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 30 Jun 2022 14:31:12 +0800 Subject: [PATCH 13/14] feat: support query redirect --- source/libs/qworker/src/qwDbg.c | 32 +++++++++++++++++++++++++++++- source/libs/qworker/src/qworker.c | 29 --------------------------- source/libs/scheduler/src/schJob.c | 12 ++++++----- 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/source/libs/qworker/src/qwDbg.c b/source/libs/qworker/src/qwDbg.c index 0fa01a304c..68058334ab 100644 --- a/source/libs/qworker/src/qwDbg.c +++ b/source/libs/qworker/src/qwDbg.c @@ -9,7 +9,7 @@ #include "tmsg.h" #include "tname.h" -SQWDebug gQWDebug = {.statusEnable = true, .dumpEnable = false, .tmp = true}; +SQWDebug gQWDebug = {.statusEnable = true, .dumpEnable = false, .tmp = false}; int32_t qwDbgValidateStatus(QW_FPARAMS_DEF, int8_t oriStatus, int8_t newStatus, bool *ignore) { if (!gQWDebug.statusEnable) { @@ -138,6 +138,7 @@ int32_t qwDbgBuildAndSendRedirectRsp(int32_t rspType, SRpcHandleInfo *pConn, int .code = code, .info = *pConn, }; + rpcRsp.info.hasEpSet = 1; tmsgSendRsp(&rpcRsp); @@ -146,6 +147,35 @@ int32_t qwDbgBuildAndSendRedirectRsp(int32_t rspType, SRpcHandleInfo *pConn, int return TSDB_CODE_SUCCESS; } +int32_t qwDbgResponseREdirect(SQWMsg *qwMsg, SQWTaskCtx *ctx) { + if (gQWDebug.tmp) { + if (TDMT_SCH_QUERY == qwMsg->msgType) { + SEpSet epSet = {0}; + epSet.inUse = 1; + epSet.numOfEps = 3; + strcpy(epSet.eps[0].fqdn, "localhost"); + epSet.eps[0].port = 7100; + strcpy(epSet.eps[1].fqdn, "localhost"); + epSet.eps[1].port = 7200; + strcpy(epSet.eps[2].fqdn, "localhost"); + epSet.eps[2].port = 7300; + + qwDbgBuildAndSendRedirectRsp(qwMsg->msgType + 1, &qwMsg->connInfo, TSDB_CODE_RPC_REDIRECT, &epSet); + gQWDebug.tmp = false; + return TSDB_CODE_SUCCESS; + } + + if (TDMT_SCH_MERGE_QUERY == qwMsg->msgType) { + ctx->phase = QW_PHASE_POST_QUERY; + qwDbgBuildAndSendRedirectRsp(qwMsg->msgType + 1, &qwMsg->connInfo, TSDB_CODE_RPC_REDIRECT, NULL); + gQWDebug.tmp = false; + return TSDB_CODE_SUCCESS; + } + } + + return TSDB_CODE_SUCCESS; +} + int32_t qwDbgEnableDebug(char *option) { if (0 == strcasecmp(option, "lock")) { diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index cd48452df2..949b67249f 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -564,35 +564,6 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex QW_ERR_JRET(qwExecTask(QW_FPARAMS(), ctx, NULL)); } - - if (gQWDebug.tmp) { -#if 1 - if (TDMT_SCH_QUERY == qwMsg->msgType) { - SEpSet epSet = {0}; - epSet.inUse = 1; - epSet.numOfEps = 3; - strcpy(epSet.eps[0].fqdn, "localhost"); - epSet.eps[0].port = 7100; - strcpy(epSet.eps[1].fqdn, "localhost"); - epSet.eps[1].port = 7200; - strcpy(epSet.eps[2].fqdn, "localhost"); - epSet.eps[2].port = 7300; - - qwDbgBuildAndSendRedirectRsp(qwMsg->msgType + 1, &qwMsg->connInfo, TSDB_CODE_RPC_REDIRECT, &epSet); - gQWDebug.tmp = false; - return TSDB_CODE_SUCCESS; - } -#else - if (TDMT_SCH_MERGE_QUERY == qwMsg->msgType) { - ctx->phase = QW_PHASE_POST_QUERY; - qwDbgBuildAndSendRedirectRsp(qwMsg->msgType + 1, &qwMsg->connInfo, TSDB_CODE_RPC_REDIRECT, NULL); - gQWDebug.tmp = false; - return TSDB_CODE_SUCCESS; - } -#endif - } - - _return: input.code = code; diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index 29179536df..b2a96cbb23 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -1684,7 +1684,7 @@ _return: SCH_RET(code); } -int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { +int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf* pData, int32_t rspCode) { int32_t code = 0; int8_t status = 0; if (schJobNeedToStop(pJob, &status)) { @@ -1711,6 +1711,10 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { memset(&pTask->succeedAddr, 0, sizeof(pTask->succeedAddr)); if (SCH_IS_DATA_SRC_QRY_TASK(pTask)) { + if (pData) { + SCH_ERR_JRET(schUpdateTaskCandidateAddr(pJob, pTask, pData->pEpSet)); + } + if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { if (JOB_TASK_STATUS_EXECUTING == SCH_GET_TASK_STATUS(pTask)) { SCH_ERR_JRET(schLaunchTasksInFlowCtrlList(pJob, pTask)); @@ -1737,7 +1741,7 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, int32_t rspCode) { for (int32_t i = 0; i < childrenNum; ++i) { SSchTask* pChild = taosArrayGetP(pTask->children, i); SCH_LOCK_TASK(pChild); - schDoTaskRedirect(pJob, pChild, rspCode); + schDoTaskRedirect(pJob, pChild, NULL, rspCode); SCH_UNLOCK_TASK(pChild); } @@ -1758,11 +1762,9 @@ int32_t schHandleRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf* pData, int32 SCH_TASK_ELOG("no epset updated while got error %s", tstrerror(rspCode)); SCH_ERR_JRET(rspCode); } - - SCH_ERR_JRET(schUpdateTaskCandidateAddr(pJob, pTask, pData->pEpSet)); } - SCH_RET(schDoTaskRedirect(pJob, pTask, rspCode)); + SCH_RET(schDoTaskRedirect(pJob, pTask, pData, rspCode)); _return: From 4fc1a6faecc52a76a46eb667690e6c632783539d Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 30 Jun 2022 15:59:42 +0800 Subject: [PATCH 14/14] fix connect error --- source/libs/transport/src/transCli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 4ec54beebf..d03f0fd903 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -400,7 +400,7 @@ void cliHandleExcept(SCliConn* pConn) { STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; STransMsg transMsg = {0}; - transMsg.code = TSDB_CODE_RPC_BROKEN_LINK; + transMsg.code = pConn->broken ? TSDB_CODE_RPC_BROKEN_LINK : TSDB_CODE_RPC_NETWORK_UNAVAIL; transMsg.msgType = pMsg ? pMsg->msg.msgType + 1 : 0; transMsg.info.ahandle = NULL;