From f51203bb30abe868fb0ceec4ac809ad50eac390b Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 18 Mar 2022 18:04:57 +0800 Subject: [PATCH 01/59] feature/scheduler --- include/libs/qcom/query.h | 2 + source/libs/qcom/src/queryUtil.c | 9 +- source/libs/qworker/inc/qworkerInt.h | 28 ++-- source/libs/qworker/inc/qworkerMsg.h | 1 + source/libs/qworker/src/qworker.c | 74 +++++----- source/libs/qworker/src/qworkerMsg.c | 103 +++++++++----- source/libs/scheduler/src/scheduler.c | 193 +++++++++++++++++++++++--- 7 files changed, 303 insertions(+), 107 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index affa265d53..102a5954d6 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -146,6 +146,8 @@ int32_t cleanupTaskQueue(); */ int32_t taosAsyncExec(__async_exec_fn_t execFn, void* execParam, int32_t* code); +int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo, bool persistHandle, SRpcCtx *ctx); + /** * Asynchronously send message to server, after the response received, the callback will be incured. * diff --git a/source/libs/qcom/src/queryUtil.c b/source/libs/qcom/src/queryUtil.c index 44f1c454c9..e37b738e33 100644 --- a/source/libs/qcom/src/queryUtil.c +++ b/source/libs/qcom/src/queryUtil.c @@ -140,7 +140,7 @@ int32_t taosAsyncExec(__async_exec_fn_t execFn, void* execParam, int32_t* code) return 0; } -int32_t asyncSendMsgToServer(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo) { +int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo, bool persistHandle, SRpcCtx *ctx) { char* pMsg = rpcMallocCont(pInfo->msgInfo.len); if (NULL == pMsg) { qError("0x%" PRIx64 " msg:%s malloc failed", pInfo->requestId, TMSG_INFO(pInfo->msgType)); @@ -154,14 +154,19 @@ int32_t asyncSendMsgToServer(void* pTransporter, SEpSet* epSet, int64_t* pTransp .contLen = pInfo->msgInfo.len, .ahandle = (void*)pInfo, .handle = pInfo->msgInfo.handle, + .persistHandle = persistHandle, .code = 0}; assert(pInfo->fp != NULL); - rpcSendRequest(pTransporter, epSet, &rpcMsg, pTransporterId); + rpcSendRequestWithCtx(pTransporter, epSet, &rpcMsg, pTransporterId, ctx); return TSDB_CODE_SUCCESS; } +int32_t asyncSendMsgToServer(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo) { + return asyncSendMsgToServerExt(pTransporter, epSet, pTransporterId, pInfo, false, NULL); +} + char *jobTaskStatusStr(int32_t status) { switch (status) { case JOB_TASK_STATUS_NULL: diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h index 6e2d482c05..56249421c1 100644 --- a/source/libs/qworker/inc/qworkerInt.h +++ b/source/libs/qworker/inc/qworkerInt.h @@ -70,11 +70,16 @@ typedef struct SQWDebug { bool statusEnable; } SQWDebug; +typedef struct SQWConnInfo { + void *handle; + void *ahandle; +} SQWConnInfo; + typedef struct SQWMsg { - void *node; - char *msg; - int32_t msgLen; - void *connection; + void *node; + char *msg; + int32_t msgLen; + SQWConnInfo connInfo; } SQWMsg; typedef struct SQWHbInfo { @@ -100,10 +105,6 @@ typedef struct SQWTaskCtx { SRWLatch lock; int8_t phase; int8_t taskType; - - void *readyConnection; - void *dropConnection; - void *cancelConnection; bool emptyRes; bool queryFetched; @@ -112,6 +113,7 @@ typedef struct SQWTaskCtx { bool queryInQueue; int32_t rspCode; + SQWConnInfo connInfo; int8_t events[QW_EVENT_MAX]; qTaskInfo_t taskHandle; @@ -119,11 +121,11 @@ typedef struct SQWTaskCtx { } SQWTaskCtx; typedef struct SQWSchStatus { - int32_t lastAccessTs; // timestamp in second - uint64_t hbSeqId; - void *hbConnection; - SRWLatch tasksLock; - SHashObj *tasksHash; // key:queryId+taskId, value: SQWTaskStatus + int32_t lastAccessTs; // timestamp in second + uint64_t hbSeqId; + SQWConnInfo *hbConnection; + SRWLatch tasksLock; + SHashObj *tasksHash; // key:queryId+taskId, value: SQWTaskStatus } SQWSchStatus; // Qnode/Vnode level task management diff --git a/source/libs/qworker/inc/qworkerMsg.h b/source/libs/qworker/inc/qworkerMsg.h index ecb5dbd654..141e8f7916 100644 --- a/source/libs/qworker/inc/qworkerMsg.h +++ b/source/libs/qworker/inc/qworkerMsg.h @@ -41,6 +41,7 @@ void qwFreeFetchRsp(void *msg); int32_t qwMallocFetchRsp(int32_t length, SRetrieveTableRsp **rsp); int32_t qwGetSchTasksStatus(SQWorkerMgmt *mgmt, uint64_t sId, SSchedulerStatusRsp **rsp); int32_t qwBuildAndSendHbRsp(SRpcMsg *pMsg, SSchedulerHbRsp *rsp, int32_t code); +int32_t qwRegisterBrokenLinkArg(QW_FPARAMS_DEF, SQWConnInfo *pConn); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 60eb501dd2..5c73674365 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -402,6 +402,9 @@ int32_t qwKillTaskHandle(QW_FPARAMS_DEF, SQWTaskCtx *ctx) { void qwFreeTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx) { + rpcReleaseHandle(ctx->connInfo.handle, CONN_SERVER); + ctx->connInfo.handle = NULL; + qwFreeTaskHandle(QW_FPARAMS(), &ctx->taskHandle); if (ctx->sinkHandle) { @@ -729,7 +732,7 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - dropConnection = ctx->dropConnection; + dropConnection = &ctx->connInfo; QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); break; } @@ -763,7 +766,7 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - dropConnection = ctx->dropConnection; + dropConnection = &ctx->connInfo; QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); } @@ -807,9 +810,8 @@ _return: int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *input, SQWPhaseOutput *output) { int32_t code = 0; SQWTaskCtx *ctx = NULL; + SQWConnInfo connInfo = {0}; void *readyConnection = NULL; - void *dropConnection = NULL; - void *cancelConnection = NULL; QW_TASK_DLOG("start to handle event at phase %s", qwPhaseStr(phase)); @@ -826,11 +828,18 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp if (NULL == ctx->taskHandle && NULL == ctx->sinkHandle) { ctx->emptyRes = true; } - + +#if 0 if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_READY)) { - readyConnection = ctx->readyConnection; + readyConnection = &ctx->connInfo; QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_READY); } +#else + connInfo.handle = ctx->connInfo.handle; + readyConnection = &connInfo; + + QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_READY); +#endif } if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { @@ -841,7 +850,6 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - dropConnection = ctx->dropConnection; QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); } @@ -869,21 +877,11 @@ _return: qwReleaseTaskCtx(mgmt, ctx); } - if (readyConnection) { + if (TSDB_CODE_SUCCESS == code && readyConnection) { qwBuildAndSendReadyRsp(readyConnection, code); QW_TASK_DLOG("ready msg rsped, code:%x - %s", code, tstrerror(code)); } - if (dropConnection) { - qwBuildAndSendDropRsp(dropConnection, code); - QW_TASK_DLOG("drop msg rsped, code:%x - %s", code, tstrerror(code)); - } - - if (cancelConnection) { - qwBuildAndSendCancelRsp(cancelConnection, code); - QW_TASK_DLOG("cancel msg rsped, code:%x - %s", code, tstrerror(code)); - } - if (code) { qwUpdateTaskStatus(QW_FPARAMS(), JOB_TASK_STATUS_FAILED); } @@ -893,17 +891,17 @@ _return: QW_RET(code); } - int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType) { int32_t code = 0; bool queryRsped = false; - bool needStop = false; struct SSubplan *plan = NULL; SQWPhaseInput input = {0}; qTaskInfo_t pTaskInfo = NULL; DataSinkHandle sinkHandle = NULL; SQWTaskCtx *ctx = NULL; + QW_ERR_JRET(qwRegisterBrokenLinkArg(QW_FPARAMS(), &qwMsg->connInfo)); + QW_ERR_JRET(qwHandlePrePhaseEvents(QW_FPARAMS(), QW_PHASE_PRE_QUERY, &input, NULL)); QW_ERR_JRET(qwGetTaskCtx(QW_FPARAMS(), &ctx)); @@ -927,7 +925,7 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType) { QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); } - QW_ERR_JRET(qwBuildAndSendQueryRsp(qwMsg->connection, code)); + QW_ERR_JRET(qwBuildAndSendQueryRsp(&qwMsg->connInfo, code)); QW_TASK_DLOG("query msg rsped, code:%x - %s", code, tstrerror(code)); queryRsped = true; @@ -945,7 +943,7 @@ _return: code = qwHandlePostPhaseEvents(QW_FPARAMS(), QW_PHASE_POST_QUERY, &input, NULL); if (!queryRsped) { - qwBuildAndSendQueryRsp(qwMsg->connection, code); + qwBuildAndSendQueryRsp(&qwMsg->connInfo, code); QW_TASK_DLOG("query msg rsped, code:%x - %s", code, tstrerror(code)); } @@ -968,8 +966,9 @@ int32_t qwProcessReady(QW_FPARAMS_DEF, SQWMsg *qwMsg) { } if (ctx->phase == QW_PHASE_PRE_QUERY) { + ctx->connInfo.handle == qwMsg->connInfo.handle; + ctx->connInfo.ahandle = qwMsg->connInfo.ahandle; QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_READY); - ctx->readyConnection = qwMsg->connection; needRsp = false; QW_TASK_DLOG_E("ready msg will not rsp now"); goto _return; @@ -1007,7 +1006,7 @@ _return: } if (needRsp) { - qwBuildAndSendReadyRsp(qwMsg->connection, code); + qwBuildAndSendReadyRsp(&qwMsg->connInfo, code); QW_TASK_DLOG("ready msg rsped, code:%x - %s", code, tstrerror(code)); } @@ -1050,7 +1049,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_FETCH); - qwBuildAndSendFetchRsp(qwMsg->connection, rsp, dataLen, code); + qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, dataLen, code); QW_TASK_DLOG("fetch msg rsped, code:%x, dataLen:%d", code, dataLen); } else { atomic_store_8(&ctx->queryContinue, 1); @@ -1067,7 +1066,7 @@ _return: QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_FETCH); qwFreeFetchRsp(rsp); rsp = NULL; - qwBuildAndSendFetchRsp(qwMsg->connection, rsp, 0, code); + qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, 0, code); QW_TASK_DLOG("fetch msg rsped, code:%x - %s", code, tstrerror(code)); } @@ -1102,6 +1101,8 @@ int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_ERR_JRET(qwGetResFromSink(QW_FPARAMS(), ctx, &dataLen, &rsp, &sOutput)); if (NULL == rsp) { + atomic_store_ptr(&ctx->connInfo.handle, qwMsg->connInfo.handle); + atomic_store_ptr(&ctx->connInfo.ahandle, qwMsg->connInfo.ahandle); QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_FETCH); } else { bool qComplete = (DS_BUF_EMPTY == sOutput.bufStatus && sOutput.queryEnd); @@ -1123,7 +1124,7 @@ int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg) { atomic_store_8(&ctx->queryInQueue, 1); - QW_ERR_JRET(qwBuildAndSendCQueryMsg(QW_FPARAMS(), qwMsg->connection)); + QW_ERR_JRET(qwBuildAndSendCQueryMsg(QW_FPARAMS(), &qwMsg->connInfo)); } } @@ -1143,7 +1144,7 @@ _return: } if (code || rsp) { - qwBuildAndSendFetchRsp(qwMsg->connection, rsp, dataLen, code); + qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, dataLen, code); QW_TASK_DLOG("fetch msg rsped, code:%x, dataLen:%d", code, dataLen); } @@ -1180,8 +1181,9 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { // task not started } - if (!needRsp) { - ctx->dropConnection = qwMsg->connection; + if (!needRsp) { + ctx->connInfo.handle == qwMsg->connInfo.handle; + ctx->connInfo.ahandle = qwMsg->connInfo.ahandle; QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_DROP); } @@ -1205,7 +1207,7 @@ _return: } if (TSDB_CODE_SUCCESS != code || needRsp) { - QW_ERR_RET(qwBuildAndSendDropRsp(qwMsg->connection, code)); + QW_ERR_RET(qwBuildAndSendDropRsp(&qwMsg->connInfo, code)); QW_TASK_DLOG("drop msg rsped, code:%x", code); } @@ -1223,27 +1225,25 @@ int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req) { QW_ERR_JRET(qwAcquireAddScheduler(mgmt, req->sId, QW_READ, &sch)); - atomic_store_ptr(&sch->hbConnection, qwMsg->connection); + atomic_store_ptr(&sch->hbConnection, qwMsg->connInfo); ++sch->hbSeqId; rsp.seqId = sch->hbSeqId; - QW_DLOG("hb connection updated, seqId:%" PRIx64 ", sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, connection:%p", - sch->hbSeqId, req->sId, req->epId.nodeId, req->epId.ep.fqdn, req->epId.ep.port, qwMsg->connection); + QW_DLOG("hb connection updated, seqId:%" PRIx64 ", sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, handle:%p, ahandle:%p", + sch->hbSeqId, req->sId, req->epId.nodeId, req->epId.ep.fqdn, req->epId.ep.port, qwMsg->connInfo.handle, qwMsg->connInfo.ahandle); qwReleaseScheduler(QW_READ, mgmt); _return: - qwBuildAndSendHbRsp(qwMsg->connection, &rsp, code); + qwBuildAndSendHbRsp(&qwMsg->connInfo, &rsp, code); QW_RET(code); } void qwProcessHbTimerEvent(void *param, void *tmrId) { - return; - SQWorkerMgmt *mgmt = (SQWorkerMgmt *)param; SQWSchStatus *sch = NULL; int32_t taskNum = 0; diff --git a/source/libs/qworker/src/qworkerMsg.c b/source/libs/qworker/src/qworkerMsg.c index 98b917c525..b07ddb7196 100644 --- a/source/libs/qworker/src/qworkerMsg.c +++ b/source/libs/qworker/src/qworkerMsg.c @@ -26,6 +26,8 @@ int32_t qwMallocFetchRsp(int32_t length, SRetrieveTableRsp **rsp) { return TSDB_CODE_SUCCESS; } + + void qwBuildFetchRsp(void *msg, SOutputData *input, int32_t len, bool qComplete) { SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)msg; @@ -44,8 +46,7 @@ void qwFreeFetchRsp(void *msg) { } } -int32_t qwBuildAndSendQueryRsp(void *connection, int32_t code) { - SRpcMsg *pMsg = (SRpcMsg *)connection; +int32_t qwBuildAndSendQueryRsp(SQWConnInfo *pConn, int32_t code) { SQueryTableRsp rsp = {.code = code}; int32_t contLen = tSerializeSQueryTableRsp(NULL, 0, &rsp); @@ -54,8 +55,8 @@ int32_t qwBuildAndSendQueryRsp(void *connection, int32_t code) { SRpcMsg rpcRsp = { .msgType = TDMT_VND_QUERY_RSP, - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .pCont = msg, .contLen = contLen, .code = code, @@ -66,15 +67,14 @@ int32_t qwBuildAndSendQueryRsp(void *connection, int32_t code) { return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendReadyRsp(void *connection, int32_t code) { - SRpcMsg *pMsg = (SRpcMsg *)connection; +int32_t qwBuildAndSendReadyRsp(SQWConnInfo *pConn, int32_t code) { SResReadyRsp *pRsp = (SResReadyRsp *)rpcMallocCont(sizeof(SResReadyRsp)); pRsp->code = code; SRpcMsg rpcRsp = { .msgType = TDMT_VND_RES_READY_RSP, - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, @@ -85,15 +85,15 @@ int32_t qwBuildAndSendReadyRsp(void *connection, int32_t code) { return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendHbRsp(SRpcMsg *pMsg, SSchedulerHbRsp *pStatus, int32_t code) { +int32_t qwBuildAndSendHbRsp(SQWConnInfo *pConn, SSchedulerHbRsp *pStatus, int32_t code) { int32_t contLen = tSerializeSSchedulerHbRsp(NULL, 0, pStatus); void *pRsp = rpcMallocCont(contLen); tSerializeSSchedulerHbRsp(pRsp, contLen, pStatus); SRpcMsg rpcRsp = { .msgType = TDMT_VND_QUERY_HEARTBEAT_RSP, - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .pCont = pRsp, .contLen = contLen, .code = code, @@ -104,9 +104,7 @@ int32_t qwBuildAndSendHbRsp(SRpcMsg *pMsg, SSchedulerHbRsp *pStatus, int32_t cod return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendFetchRsp(void *connection, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code) { - SRpcMsg *pMsg = (SRpcMsg *)connection; - +int32_t qwBuildAndSendFetchRsp(SQWConnInfo *pConn, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code) { if (NULL == pRsp) { pRsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); memset(pRsp, 0, sizeof(SRetrieveTableRsp)); @@ -115,8 +113,8 @@ int32_t qwBuildAndSendFetchRsp(void *connection, SRetrieveTableRsp *pRsp, int32_ SRpcMsg rpcRsp = { .msgType = TDMT_VND_FETCH_RSP, - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .pCont = pRsp, .contLen = sizeof(*pRsp) + dataLength, .code = code, @@ -127,14 +125,14 @@ int32_t qwBuildAndSendFetchRsp(void *connection, SRetrieveTableRsp *pRsp, int32_ return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendCancelRsp(SRpcMsg *pMsg, int32_t code) { +int32_t qwBuildAndSendCancelRsp(SQWConnInfo *pConn, int32_t code) { STaskCancelRsp *pRsp = (STaskCancelRsp *)rpcMallocCont(sizeof(STaskCancelRsp)); pRsp->code = code; SRpcMsg rpcRsp = { .msgType = TDMT_VND_CANCEL_TASK_RSP, - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, @@ -144,15 +142,14 @@ int32_t qwBuildAndSendCancelRsp(SRpcMsg *pMsg, int32_t code) { return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendDropRsp(void *connection, int32_t code) { - SRpcMsg *pMsg = (SRpcMsg *)connection; +int32_t qwBuildAndSendDropRsp(SQWConnInfo *pConn, int32_t code) { STaskDropRsp *pRsp = (STaskDropRsp *)rpcMallocCont(sizeof(STaskDropRsp)); pRsp->code = code; SRpcMsg rpcRsp = { .msgType = TDMT_VND_DROP_TASK_RSP, - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, @@ -234,8 +231,7 @@ int32_t qwBuildAndSendShowFetchRsp(SRpcMsg *pMsg, SVShowTablesFetchReq* pFetchRe return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, void *connection) { - SRpcMsg *pMsg = (SRpcMsg *)connection; +int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SQWConnInfo *pConn) { SQueryContinueReq * req = (SQueryContinueReq *)rpcMallocCont(sizeof(SQueryContinueReq)); if (NULL == req) { QW_SCH_TASK_ELOG("rpcMallocCont %d failed", (int32_t)sizeof(SQueryContinueReq)); @@ -248,8 +244,8 @@ int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, void *connection) { req->taskId = tId; SRpcMsg pNewMsg = { - .handle = pMsg->handle, - .ahandle = pMsg->ahandle, + .handle = pConn->handle, + .ahandle = pConn->ahandle, .msgType = TDMT_VND_QUERY_CONTINUE, .pCont = req, .contLen = sizeof(SQueryContinueReq), @@ -268,6 +264,35 @@ int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, void *connection) { return TSDB_CODE_SUCCESS; } + +int32_t qwRegisterBrokenLinkArg(QW_FPARAMS_DEF, SQWConnInfo *pConn) { + STaskDropReq * req = (STaskDropReq *)rpcMallocCont(sizeof(STaskDropReq)); + if (NULL == req) { + QW_SCH_TASK_ELOG("rpcMallocCont %d failed", (int32_t)sizeof(STaskDropReq)); + QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + req->header.vgId = mgmt->nodeId; + req->sId = sId; + req->queryId = qId; + req->taskId = tId; + req->refId = rId; + + SRpcMsg pMsg = { + .handle = pConn->handle, + .ahandle = pConn->ahandle, + .msgType = TDMT_VND_DROP_TASK, + .pCont = req, + .contLen = sizeof(STaskDropReq), + .code = TSDB_CODE_RPC_NETWORK_UNAVAIL, + }; + + rpcRegisterBrokenLinkArg(&pMsg); + + return TSDB_CODE_SUCCESS; +} + + int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { if (NULL == node || NULL == qWorkerMgmt || NULL == pMsg) { QW_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); @@ -294,7 +319,9 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { uint64_t tId = msg->taskId; int64_t rId = msg->refId; - SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen, .connection = pMsg}; + SQWMsg qwMsg = {.node = node, .msg = msg->msg + msg->sqlLen, .msgLen = msg->phyLen}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; char* sql = strndup(msg->msg, msg->sqlLen); QW_SCH_TASK_DLOG("processQuery start, node:%p, sql:%s", node, sql); @@ -326,7 +353,9 @@ int32_t qWorkerProcessCQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { uint64_t tId = msg->taskId; int64_t rId = 0; - SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connection = pMsg}; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; QW_SCH_TASK_DLOG("processCQuery start, node:%p", node); @@ -358,7 +387,9 @@ int32_t qWorkerProcessReadyMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg){ uint64_t tId = msg->taskId; int64_t rId = 0; - SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connection = pMsg}; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; QW_SCH_TASK_DLOG("processReady start, node:%p", node); @@ -418,7 +449,9 @@ int32_t qWorkerProcessFetchMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { uint64_t tId = msg->taskId; int64_t rId = 0; - SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connection = pMsg}; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; QW_SCH_TASK_DLOG("processFetch start, node:%p", node); @@ -484,7 +517,9 @@ int32_t qWorkerProcessDropMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { uint64_t tId = msg->taskId; int64_t rId = msg->refId; - SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connection = pMsg}; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; QW_SCH_TASK_DLOG("processDrop start, node:%p", node); @@ -516,7 +551,9 @@ int32_t qWorkerProcessHbMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { } uint64_t sId = req.sId; - SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connection = pMsg}; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; QW_SCH_DLOG("processHb start, node:%p", node); diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 189124ef23..1f76457c13 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -70,6 +70,22 @@ int32_t schInitTask(SSchJob* pJob, SSchTask *pTask, SSubplan* pPlan, SSchLevel * return TSDB_CODE_SUCCESS; } +void schFreeRpcCtx(SRpcCtx *pCtx) { + if (NULL == pCtx) { + return; + } + void *pIter = taosHashIterate(pCtx->args, NULL); + while (pIter) { + SRpcCtxVal *ctxVal = (SRpcCtxVal *)pIter; + + ctxVal->free(ctxVal->v); + + pIter = taosHashIterate(pCtx->args, pIter); + } + + taosHashCleanup(pCtx->args); +} + void schFreeTask(SSchTask* pTask) { if (pTask->candidateAddrs) { taosArrayDestroy(pTask->candidateAddrs); @@ -106,30 +122,41 @@ static FORCE_INLINE bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus) { int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgType) { int32_t lastMsgType = SCH_GET_TASK_LASTMSG_TYPE(pTask); int32_t taskStatus = SCH_GET_TASK_STATUS(pTask); + int32_t reqMsgType = msgType - 1; switch (msgType) { + case TDMT_VND_QUERY_RSP: // query_rsp may be processed later than ready_rsp + if (lastMsgType != reqMsgType) { + SCH_TASK_DLOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_DLOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + return; + case TDMT_VND_RES_READY_RSP: + reqMsgType = TDMT_VND_QUERY; + break; case TDMT_VND_CREATE_TABLE_RSP: case TDMT_VND_SUBMIT_RSP: - case TDMT_VND_QUERY_RSP: - case TDMT_VND_RES_READY_RSP: case TDMT_VND_FETCH_RSP: - case TDMT_VND_DROP_TASK: - if (lastMsgType != (msgType - 1)) { - SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { - SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - break; default: SCH_TASK_ELOG("unknown rsp msg, type:%s, status:%s", TMSG_INFO(msgType), jobTaskStatusStr(taskStatus)); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } + if (lastMsgType != reqMsgType) { + SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); return TSDB_CODE_SUCCESS; @@ -1006,7 +1033,7 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); } - SCH_ERR_JRET(schBuildAndSendMsg(pJob, pTask, NULL, TDMT_VND_RES_READY)); + //SCH_ERR_JRET(schBuildAndSendMsg(pJob, pTask, NULL, TDMT_VND_RES_READY)); break; } @@ -1212,8 +1239,122 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { return TSDB_CODE_SUCCESS; } +void schFreeRpcCtxVal(void *arg) { + if (NULL == arg) { + return; + } + + SMsgSendInfo* pMsgSendInfo = arg; + tfree(pMsgSendInfo->param); + tfree(pMsgSendInfo); +} + +int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { + int32_t code = 0; + SSchCallbackParam *param = NULL; + SMsgSendInfo* pMsgSendInfo = NULL; -int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* epSet, int32_t msgType, void *msg, uint32_t msgSize) { + pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (NULL == pCtx->args) { + SCH_TASK_ELOG("taosHashInit %d RpcCtx failed", 1); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param = calloc(1, sizeof(SSchCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchCallbackParam)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + int32_t msgType = TDMT_VND_RES_READY_RSP; + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(TDMT_VND_RES_READY, &fp)); + + param->queryId = pJob->queryId; + param->refId = pJob->refId; + param->taskId = SCH_TASK_ID(pTask); + param->transport = pJob->transport; + + pMsgSendInfo->param = param; + pMsgSendInfo->fp = fp; + + SRpcCtxVal ctxVal = {.v = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; + if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { + SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + return TSDB_CODE_SUCCESS; + +_return: + + taosHashCleanup(pCtx->args); + tfree(param); + tfree(pMsgSendInfo); + + SCH_RET(code); +} + +int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { + int32_t code = 0; + SSchCallbackParam *param = NULL; + SMsgSendInfo* pMsgSendInfo = NULL; + + pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (NULL == pCtx->args) { + SCH_TASK_ELOG("taosHashInit %d RpcCtx failed", 1); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param = calloc(1, sizeof(SSchCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchCallbackParam)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + int32_t msgType = TDMT_VND_QUERY_HEARTBEAT_RSP; + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(TDMT_VND_QUERY_HEARTBEAT, &fp)); + + param->queryId = pJob->queryId; + param->refId = pJob->refId; + param->taskId = SCH_TASK_ID(pTask); + param->transport = pJob->transport; + + pMsgSendInfo->param = param; + pMsgSendInfo->fp = fp; + + SRpcCtxVal ctxVal = {.v = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; + if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { + SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + return TSDB_CODE_SUCCESS; + +_return: + + taosHashCleanup(pCtx->args); + tfree(param); + tfree(pMsgSendInfo); + + SCH_RET(code); +} + + +int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* epSet, int32_t msgType, void *msg, uint32_t msgSize, bool persistHandle, SRpcCtx *ctx) { int32_t code = 0; SSchTrans *trans = (SSchTrans *)transport; @@ -1237,7 +1378,6 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* param->refId = pJob->refId; param->taskId = SCH_TASK_ID(pTask); param->transport = trans->transInst; - pMsgSendInfo->param = param; pMsgSendInfo->msgInfo.pData = msg; @@ -1247,7 +1387,7 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* pMsgSendInfo->fp = fp; int64_t transporterId = 0; - code = asyncSendMsgToServer(trans->transInst, epSet, &transporterId, pMsgSendInfo); + code = asyncSendMsgToServerExt(trans->transInst, epSet, &transporterId, pMsgSendInfo, persistHandle, ctx); if (code) { SCH_ERR_JRET(code); } @@ -1267,6 +1407,9 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, void *msg = NULL; int32_t code = 0; bool isCandidateAddr = false; + bool persistHandle = false; + SRpcCtx rpcCtx = {0}; + if (NULL == addr) { addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); isCandidateAddr = true; @@ -1289,8 +1432,9 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, } case TDMT_VND_QUERY: { + SCH_ERR_RET(schMakeQueryRpcCtx(pJob, pTask, &rpcCtx)); + uint32_t len = strlen(pJob->sql); - msgSize = sizeof(SSubQueryMsg) + pTask->msgLen + len; msg = calloc(1, msgSize); if (NULL == msg) { @@ -1310,7 +1454,8 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, memcpy(pMsg->msg, pJob->sql, len); memcpy(pMsg->msg + len, pTask->msg, pTask->msgLen); - + + persistHandle = true; break; } @@ -1367,6 +1512,8 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, break; } case TDMT_VND_QUERY_HEARTBEAT: { + SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &rpcCtx)); + SSchedulerHbReq req = {0}; req.sId = schMgmt.sId; req.header.vgId = addr->nodeId; @@ -1387,6 +1534,8 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, SCH_JOB_ELOG("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + + persistHandle = true; break; } default: @@ -1398,7 +1547,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, SCH_SET_TASK_LASTMSG_TYPE(pTask, msgType); SSchTrans trans = {.transInst = pJob->transport, .transHandle = pTask ? pTask->handle : NULL}; - SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, &epSet, msgType, msg, msgSize)); + SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, &epSet, msgType, msg, msgSize, persistHandle, (rpcCtx.args ? &rpcCtx : NULL))); if (isCandidateAddr) { SCH_ERR_RET(schRecordTaskExecNode(pJob, pTask, addr)); @@ -1409,7 +1558,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, _return: SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - + schFreeRpcCtx(&rpcCtx); tfree(msg); SCH_RET(code); } From a2856bb2b41b716f3c209cba8aa1b645410fcf39 Mon Sep 17 00:00:00 2001 From: dapan Date: Mon, 21 Mar 2022 08:32:22 +0800 Subject: [PATCH 02/59] feature/scheduler --- source/libs/qworker/test/qworkerTests.cpp | 18 +-- source/libs/scheduler/test/schedulerTests.cpp | 110 ++++++++++++++++++ 2 files changed, 119 insertions(+), 9 deletions(-) diff --git a/source/libs/qworker/test/qworkerTests.cpp b/source/libs/qworker/test/qworkerTests.cpp index cc4233bd47..bed476198a 100644 --- a/source/libs/qworker/test/qworkerTests.cpp +++ b/source/libs/qworker/test/qworkerTests.cpp @@ -884,8 +884,8 @@ TEST(seqTest, normalCase) { code = qWorkerProcessQueryMsg(mockPointer, mgmt, &queryRpc); ASSERT_EQ(code, 0); - code = qWorkerProcessReadyMsg(mockPointer, mgmt, &readyRpc); - ASSERT_EQ(code, 0); + //code = qWorkerProcessReadyMsg(mockPointer, mgmt, &readyRpc); + //ASSERT_EQ(code, 0); code = qWorkerProcessFetchMsg(mockPointer, mgmt, &fetchRpc); ASSERT_EQ(code, 0); @@ -976,12 +976,12 @@ TEST(seqTest, randCase) { qwtBuildQueryReqMsg(&queryRpc); code = qWorkerProcessQueryMsg(mockPointer, mgmt, &queryRpc); } else if (r >= maxr/5 && r < maxr * 2/5) { - printf("Ready,%d\n", t++); - qwtBuildReadyReqMsg(&readyMsg, &readyRpc); - code = qWorkerProcessReadyMsg(mockPointer, mgmt, &readyRpc); - if (qwtTestEnableSleep) { - taosUsleep(1); - } + //printf("Ready,%d\n", t++); + //qwtBuildReadyReqMsg(&readyMsg, &readyRpc); + //code = qWorkerProcessReadyMsg(mockPointer, mgmt, &readyRpc); + //if (qwtTestEnableSleep) { + // taosUsleep(1); + //} } else if (r >= maxr * 2/5 && r < maxr* 3/5) { printf("Fetch,%d\n", t++); qwtBuildFetchReqMsg(&fetchMsg, &fetchRpc); @@ -1042,7 +1042,7 @@ TEST(seqTest, multithreadRand) { pthread_t t1,t2,t3,t4,t5,t6; pthread_create(&(t1), &thattr, queryThread, mgmt); - pthread_create(&(t2), &thattr, readyThread, NULL); + //pthread_create(&(t2), &thattr, readyThread, NULL); pthread_create(&(t3), &thattr, fetchThread, NULL); pthread_create(&(t4), &thattr, dropThread, NULL); pthread_create(&(t5), &thattr, statusThread, NULL); diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index 376ed1e2bc..1ecb9347f3 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -713,6 +713,116 @@ TEST(queryTest, normalCase) { schedulerDestroy(); } +TEST(queryTest, readyFirstCase) { + void *mockPointer = (void *)0x1; + char *clusterId = "cluster1"; + char *dbname = "1.db1"; + char *tablename = "table1"; + SVgroupInfo vgInfo = {0}; + int64_t job = 0; + SQueryPlan dag; + + memset(&dag, 0, sizeof(dag)); + + SArray *qnodeList = taosArrayInit(1, sizeof(SEp)); + + SEp qnodeAddr = {0}; + strcpy(qnodeAddr.fqdn, "qnode0.ep"); + qnodeAddr.port = 6031; + taosArrayPush(qnodeList, &qnodeAddr); + + int32_t code = schedulerInit(NULL); + ASSERT_EQ(code, 0); + + schtBuildQueryDag(&dag); + + schtSetPlanToString(); + schtSetExecNode(); + schtSetAsyncSendMsgToServer(); + + code = schedulerAsyncExecJob(mockPointer, qnodeList, &dag, "select * from tb", &job); + ASSERT_EQ(code, 0); + + + SSchJob *pJob = schAcquireJob(job); + + pIter = taosHashIterate(pJob->execTasks, NULL); + while (pIter) { + SSchTask *task = *(SSchTask **)pIter; + + SResReadyRsp rsp = {0}; + code = schHandleResponseMsg(pJob, task, TDMT_VND_RES_READY_RSP, (char *)&rsp, sizeof(rsp), 0); + printf("code:%d", code); + ASSERT_EQ(code, 0); + pIter = taosHashIterate(pJob->execTasks, pIter); + } + + void *pIter = taosHashIterate(pJob->execTasks, NULL); + while (pIter) { + SSchTask *task = *(SSchTask **)pIter; + + SQueryTableRsp rsp = {0}; + code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + + ASSERT_EQ(code, 0); + pIter = taosHashIterate(pJob->execTasks, pIter); + } + + pIter = taosHashIterate(pJob->execTasks, NULL); + while (pIter) { + SSchTask *task = *(SSchTask **)pIter; + + SResReadyRsp rsp = {0}; + code = schHandleResponseMsg(pJob, task, TDMT_VND_RES_READY_RSP, (char *)&rsp, sizeof(rsp), 0); + ASSERT_EQ(code, 0); + + pIter = taosHashIterate(pJob->execTasks, pIter); + } + + pIter = taosHashIterate(pJob->execTasks, NULL); + while (pIter) { + SSchTask *task = *(SSchTask **)pIter; + + SQueryTableRsp rsp = {0}; + code = schHandleResponseMsg(pJob, task, TDMT_VND_QUERY_RSP, (char *)&rsp, sizeof(rsp), 0); + + ASSERT_EQ(code, 0); + pIter = taosHashIterate(pJob->execTasks, pIter); + } + + + + pthread_attr_t thattr; + pthread_attr_init(&thattr); + + pthread_t thread1; + pthread_create(&(thread1), &thattr, schtCreateFetchRspThread, &job); + + void *data = NULL; + code = schedulerFetchRows(job, &data); + ASSERT_EQ(code, 0); + + SRetrieveTableRsp *pRsp = (SRetrieveTableRsp *)data; + ASSERT_EQ(pRsp->completed, 1); + ASSERT_EQ(pRsp->numOfRows, 10); + tfree(data); + + data = NULL; + code = schedulerFetchRows(job, &data); + ASSERT_EQ(code, 0); + ASSERT_TRUE(data == NULL); + + schReleaseJob(job); + + schedulerFreeJob(job); + + schtFreeQueryDag(&dag); + + schedulerDestroy(); +} + + + TEST(queryTest, flowCtrlCase) { void *mockPointer = (void *)0x1; char *clusterId = "cluster1"; From d3212463dd9f7cb46ffb1b9f603fc55aa1df7ff6 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 21 Mar 2022 16:37:19 +0800 Subject: [PATCH 03/59] handle except --- include/libs/transport/trpc.h | 14 +++++++-- source/libs/index/src/index_fst_automation.c | 10 +----- source/libs/transport/inc/transComm.h | 1 + source/libs/transport/src/transCli.c | 6 ++++ source/libs/transport/src/transComm.c | 10 ++++++ source/libs/transport/test/transUT.cc | 33 +++++++++++++++----- 6 files changed, 54 insertions(+), 20 deletions(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index 02029a996c..c2cce3a05d 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -52,8 +52,8 @@ typedef struct { char user[TSDB_USER_LEN]; SRpcMsg rpcMsg; int32_t rspLen; - void *pRsp; - void *pNode; + void * pRsp; + void * pNode; } SNodeMsg; typedef struct SRpcInit { @@ -87,7 +87,15 @@ typedef struct { } SRpcCtxVal; typedef struct { - SHashObj *args; + int32_t msgType; + void * val; + int32_t len; + void (*free)(void *arg); +} SRpcBrokenlinkVal; + +typedef struct { + SHashObj * args; + SRpcBrokenlinkVal brokenVal; } SRpcCtx; int32_t rpcInit(); diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c index ed1ad7a374..c7f964f2ba 100644 --- a/source/libs/index/src/index_fst_automation.c +++ b/source/libs/index/src/index_fst_automation.c @@ -154,15 +154,7 @@ AutomationCtx* automCtxCreate(void* data, AutomationType atype) { // add more search type } - char* dst = NULL; - if (data != NULL) { - char* src = (char*)data; - size_t len = strlen(src); - dst = (char*)calloc(1, len * sizeof(char) + 1); - memcpy(dst, src, len); - } - - ctx->data = dst; + ctx->data = strdup((char*)data); ctx->type = atype; ctx->stdata = (void*)sv; return ctx; diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 8cfde8267d..32a0cf0d54 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -277,6 +277,7 @@ void transCtxCleanup(STransCtx* ctx); void transCtxClear(STransCtx* ctx); void transCtxMerge(STransCtx* dst, STransCtx* src); void* transCtxDumpVal(STransCtx* ctx, int32_t key); +void* transCtxDumpBrokenlinkVal(STransCtx* ctx, int32_t* msgType); // queue sending msgs typedef struct { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index c0ee9b9ca5..ea73e07c80 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -210,6 +210,9 @@ void cliHandleResp(SCliConn* conn) { STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); + if (transMsg.ahandle == NULL) { + transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + } } else { transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; } @@ -282,6 +285,9 @@ void cliHandleExcept(SCliConn* pConn) { if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(pConn)) { transMsg.ahandle = transCtxDumpVal(&pConn->ctx, transMsg.msgType); + if (transMsg.ahandle == NULL) { + transMsg.ahandle = transCtxDumpBrokenlinkVal(&pConn->ctx, (int32_t*)&(transMsg.msgType)); + } } else { transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; } diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 209475ca05..5684c332c0 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -238,12 +238,14 @@ void transCtxCleanup(STransCtx* ctx) { iter->free(iter->val); iter = taosHashIterate(ctx->args, iter); } + taosHashCleanup(ctx->args); } void transCtxMerge(STransCtx* dst, STransCtx* src) { if (dst->args == NULL) { dst->args = src->args; + dst->brokenVal = src->brokenVal; src->args = NULL; return; } @@ -275,6 +277,14 @@ void* transCtxDumpVal(STransCtx* ctx, int32_t key) { memcpy(ret, (char*)cVal->val, cVal->len); return (void*)ret; } +void* transCtxDumpBrokenlinkVal(STransCtx* ctx, int32_t* msgType) { + char* ret = calloc(1, ctx->brokenVal.len); + + memcpy(ret, (char*)(ctx->brokenVal.val), ctx->brokenVal.len); + *msgType = ctx->brokenVal.msgType; + + return (void*)ret; +} void transQueueInit(STransQueue* queue, void (*free)(void* arg)) { queue->q = taosArrayInit(2, sizeof(void*)); diff --git a/source/libs/transport/test/transUT.cc b/source/libs/transport/test/transUT.cc index 0b1b1834df..89cb66e9cf 100644 --- a/source/libs/transport/test/transUT.cc +++ b/source/libs/transport/test/transUT.cc @@ -364,9 +364,12 @@ TEST_F(TransEnv, srvReleaseHandle) { SRpcMsg resp = {0}; tr->SetSrvContinueSend(processReleaseHandleCb); // tr->Restart(processReleaseHandleCb); - void *handle = NULL; + void * handle = NULL; + SRpcMsg req = {0}; for (int i = 0; i < 1; i++) { - SRpcMsg req = {.handle = resp.handle, .persistHandle = 1}; + memset(&req, 0, sizeof(req)); + req.handle = resp.handle; + req.persistHandle = 1; req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; @@ -378,8 +381,11 @@ TEST_F(TransEnv, srvReleaseHandle) { } TEST_F(TransEnv, cliReleaseHandleExcept) { SRpcMsg resp = {0}; + SRpcMsg req = {0}; for (int i = 0; i < 3; i++) { - SRpcMsg req = {.handle = resp.handle, .persistHandle = 1}; + memset(&req, 0, sizeof(req)); + req.handle = resp.handle; + req.persistHandle = 1; req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; @@ -396,8 +402,10 @@ TEST_F(TransEnv, cliReleaseHandleExcept) { } TEST_F(TransEnv, srvContinueSend) { tr->SetSrvContinueSend(processContinueSend); + SRpcMsg req = {0}, resp = {0}; for (int i = 0; i < 10; i++) { - SRpcMsg req = {0}, resp = {0}; + memset(&req, 0, sizeof(req)); + memset(&resp, 0, sizeof(resp)); req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; @@ -410,8 +418,10 @@ TEST_F(TransEnv, srvPersistHandleExcept) { tr->SetSrvContinueSend(processContinueSend); // tr->SetCliPersistFp(cliPersistHandle); SRpcMsg resp = {0}; + SRpcMsg req = {0}; for (int i = 0; i < 5; i++) { - SRpcMsg req = {.handle = resp.handle}; + memset(&req, 0, sizeof(req)); + req.handle = resp.handle; req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; @@ -428,8 +438,10 @@ TEST_F(TransEnv, srvPersistHandleExcept) { TEST_F(TransEnv, cliPersistHandleExcept) { tr->SetSrvContinueSend(processContinueSend); SRpcMsg resp = {0}; + SRpcMsg req = {0}; for (int i = 0; i < 5; i++) { - SRpcMsg req = {.handle = resp.handle}; + memset(&req, 0, sizeof(req)); + req.handle = resp.handle; req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; @@ -450,8 +462,11 @@ TEST_F(TransEnv, multiCliPersistHandleExcept) { TEST_F(TransEnv, queryExcept) { tr->SetSrvContinueSend(processRegisterFailure); SRpcMsg resp = {0}; + SRpcMsg req = {0}; for (int i = 0; i < 5; i++) { - SRpcMsg req = {.handle = resp.handle, .persistHandle = 1}; + memset(&req, 0, sizeof(req)); + req.handle = resp.handle; + req.persistHandle = 1; req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; @@ -466,8 +481,10 @@ TEST_F(TransEnv, queryExcept) { } TEST_F(TransEnv, noResp) { SRpcMsg resp = {0}; + SRpcMsg req = {0}; for (int i = 0; i < 5; i++) { - SRpcMsg req = {.noResp = 1}; + memset(&req, 0, sizeof(req)); + req.noResp = 1; req.msgType = 1; req.pCont = rpcMallocCont(10); req.contLen = 10; From 43336c0a3928f1e3c43fceff7291f106776e9864 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 21 Mar 2022 16:43:27 +0800 Subject: [PATCH 04/59] feature/scheduler --- include/common/tmsg.h | 1 - include/libs/qcom/query.h | 2 +- source/common/src/tmsg.c | 2 - source/dnode/mnode/impl/src/mndDb.c | 4 +- source/dnode/vnode/src/vnd/vnodeQuery.c | 5 +- source/libs/qcom/src/queryUtil.c | 4 +- source/libs/qworker/inc/qworkerInt.h | 8 +- source/libs/qworker/inc/qworkerMsg.h | 14 +-- source/libs/qworker/src/qworker.c | 43 +++++---- source/libs/qworker/src/qworkerMsg.c | 10 ++- source/libs/scheduler/inc/schedulerInt.h | 1 - source/libs/scheduler/src/scheduler.c | 88 +++++++++---------- source/libs/scheduler/test/schedulerTests.cpp | 12 +-- 13 files changed, 95 insertions(+), 99 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 9269858645..1bdc07ba6f 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1112,7 +1112,6 @@ int32_t tDeserializeSSchedulerHbReq(void* buf, int32_t bufLen, SSchedulerHbReq* void tFreeSSchedulerHbReq(SSchedulerHbReq* pReq); typedef struct { - uint64_t seqId; SQueryNodeEpId epId; SArray* taskStatus; // SArray } SSchedulerHbRsp; diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index de01ae918f..9b500ff0db 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -150,7 +150,7 @@ int32_t cleanupTaskQueue(); */ int32_t taosAsyncExec(__async_exec_fn_t execFn, void* execParam, int32_t* code); -int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo, bool persistHandle, SRpcCtx *ctx); +int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo, bool persistHandle, void *ctx); /** * Asynchronously send message to server, after the response received, the callback will be incured. diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index d5909e2bb7..06d124cb28 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -2567,7 +2567,6 @@ int32_t tSerializeSSchedulerHbRsp(void *buf, int32_t bufLen, SSchedulerHbRsp *pR tCoderInit(&encoder, TD_LITTLE_ENDIAN, buf, bufLen, TD_ENCODER); if (tStartEncode(&encoder) < 0) return -1; - if (tEncodeU64(&encoder, pRsp->seqId) < 0) return -1; if (tEncodeI32(&encoder, pRsp->epId.nodeId) < 0) return -1; if (tEncodeU16(&encoder, pRsp->epId.ep.port) < 0) return -1; if (tEncodeCStr(&encoder, pRsp->epId.ep.fqdn) < 0) return -1; @@ -2596,7 +2595,6 @@ int32_t tDeserializeSSchedulerHbRsp(void *buf, int32_t bufLen, SSchedulerHbRsp * tCoderInit(&decoder, TD_LITTLE_ENDIAN, buf, bufLen, TD_DECODER); if (tStartDecode(&decoder) < 0) return -1; - if (tDecodeU64(&decoder, &pRsp->seqId) < 0) return -1; if (tDecodeI32(&decoder, &pRsp->epId.nodeId) < 0) return -1; if (tDecodeU16(&decoder, &pRsp->epId.ep.port) < 0) return -1; if (tDecodeCStrTo(&decoder, pRsp->epId.ep.fqdn) < 0) return -1; diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index ead56c1f8c..8f2a7ef2e5 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -986,7 +986,9 @@ static int32_t mndProcessUseDbReq(SNodeMsg *pReq) { usedbRsp.vgVersion = usedbReq.vgVersion; code = 0; } - usedbRsp.vgNum = taosArrayGetSize(usedbRsp.pVgroupInfos); + usedbRsp.vgNum = taosArrayGetSize(usedbRsp.pVgroupInfos); + + // no jump, need to construct rsp } else { pDb = mndAcquireDb(pMnode, usedbReq.db); if (pDb == NULL) { diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index 0ac60ea72d..5762e81ba4 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -33,9 +33,8 @@ int vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { SReadHandle handle = {.reader = pVnode->pTsdb, .meta = pVnode->pMeta, .config = &pVnode->config}; switch (pMsg->msgType) { - case TDMT_VND_QUERY: { + case TDMT_VND_QUERY: return qWorkerProcessQueryMsg(&handle, pVnode->pQuery, pMsg); - } case TDMT_VND_QUERY_CONTINUE: return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg); default: @@ -205,7 +204,7 @@ _exit: rpcSendResponse(&rpcMsg); - return code; + return TSDB_CODE_SUCCESS; } static void freeItemHelper(void *pItem) { diff --git a/source/libs/qcom/src/queryUtil.c b/source/libs/qcom/src/queryUtil.c index b8162f596f..0cf46edf11 100644 --- a/source/libs/qcom/src/queryUtil.c +++ b/source/libs/qcom/src/queryUtil.c @@ -140,7 +140,7 @@ int32_t taosAsyncExec(__async_exec_fn_t execFn, void* execParam, int32_t* code) return 0; } -int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo, bool persistHandle, SRpcCtx *ctx) { +int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTransporterId, const SMsgSendInfo* pInfo, bool persistHandle, void *rpcCtx) { char* pMsg = rpcMallocCont(pInfo->msgInfo.len); if (NULL == pMsg) { qError("0x%" PRIx64 " msg:%s malloc failed", pInfo->requestId, TMSG_INFO(pInfo->msgType)); @@ -163,7 +163,7 @@ int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTra assert(pInfo->fp != NULL); - rpcSendRequestWithCtx(pTransporter, epSet, &rpcMsg, pTransporterId, ctx); + rpcSendRequestWithCtx(pTransporter, epSet, &rpcMsg, pTransporterId, rpcCtx); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h index 9d51e1ccbf..ab55b4b76d 100644 --- a/source/libs/qworker/inc/qworkerInt.h +++ b/source/libs/qworker/inc/qworkerInt.h @@ -84,7 +84,7 @@ typedef struct SQWMsg { typedef struct SQWHbInfo { SSchedulerHbRsp rsp; - void *connection; + SQWConnInfo connInfo; } SQWHbInfo; typedef struct SQWPhaseInput { @@ -122,8 +122,8 @@ typedef struct SQWTaskCtx { typedef struct SQWSchStatus { int32_t lastAccessTs; // timestamp in second - uint64_t hbSeqId; - SQWConnInfo *hbConnection; + SRWLatch connLock; + SQWConnInfo connInfo; SRWLatch tasksLock; SHashObj *tasksHash; // key:queryId+taskId, value: SQWTaskStatus } SQWSchStatus; @@ -225,8 +225,6 @@ typedef struct SQWorkerMgmt { } \ } while (0) -int32_t qwBuildAndSendCancelRsp(SRpcMsg *pMsg, int32_t code); - #ifdef __cplusplus } #endif diff --git a/source/libs/qworker/inc/qworkerMsg.h b/source/libs/qworker/inc/qworkerMsg.h index 141e8f7916..be1d47a189 100644 --- a/source/libs/qworker/inc/qworkerMsg.h +++ b/source/libs/qworker/inc/qworkerMsg.h @@ -30,17 +30,17 @@ int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg); int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg); int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req); -int32_t qwBuildAndSendDropRsp(void *connection, int32_t code); -int32_t qwBuildAndSendCancelRsp(SRpcMsg *pMsg, int32_t code); -int32_t qwBuildAndSendFetchRsp(void *connection, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code); +int32_t qwBuildAndSendDropRsp(SQWConnInfo *pConn, int32_t code); +int32_t qwBuildAndSendCancelRsp(SQWConnInfo *pConn, int32_t code); +int32_t qwBuildAndSendFetchRsp(SQWConnInfo *pConn, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code); void qwBuildFetchRsp(void *msg, SOutputData *input, int32_t len, bool qComplete); -int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, void *connection); -int32_t qwBuildAndSendReadyRsp(void *connection, int32_t code); -int32_t qwBuildAndSendQueryRsp(void *connection, int32_t code); +int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SQWConnInfo *pConn); +int32_t qwBuildAndSendReadyRsp(SQWConnInfo *pConn, int32_t code); +int32_t qwBuildAndSendQueryRsp(SQWConnInfo *pConn, int32_t code); void qwFreeFetchRsp(void *msg); int32_t qwMallocFetchRsp(int32_t length, SRetrieveTableRsp **rsp); int32_t qwGetSchTasksStatus(SQWorkerMgmt *mgmt, uint64_t sId, SSchedulerStatusRsp **rsp); -int32_t qwBuildAndSendHbRsp(SRpcMsg *pMsg, SSchedulerHbRsp *rsp, int32_t code); +int32_t qwBuildAndSendHbRsp(SQWConnInfo *pConn, SSchedulerHbRsp *rsp, int32_t code); int32_t qwRegisterBrokenLinkArg(QW_FPARAMS_DEF, SQWConnInfo *pConn); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index c025567fb2..3b01c2c29e 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -402,7 +402,7 @@ int32_t qwKillTaskHandle(QW_FPARAMS_DEF, SQWTaskCtx *ctx) { void qwFreeTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx) { - rpcReleaseHandle(ctx->connInfo.handle, CONN_SERVER); + rpcReleaseHandle(ctx->connInfo.handle, TAOS_CONN_SERVER); ctx->connInfo.handle = NULL; qwFreeTaskHandle(QW_FPARAMS(), &ctx->taskHandle); @@ -591,8 +591,7 @@ int32_t qwGenerateSchHbRsp(SQWorkerMgmt *mgmt, SQWSchStatus *sch, SQWHbInfo *hbI return TSDB_CODE_QRY_OUT_OF_MEMORY; } - hbInfo->connection = sch->hbConnection; - hbInfo->rsp.seqId = -1; + hbInfo->connInfo = sch->connInfo; void *key = NULL; size_t keyLen = 0; @@ -947,7 +946,7 @@ _return: QW_TASK_DLOG("query msg rsped, code:%x - %s", code, tstrerror(code)); } - QW_RET(code); + QW_RET(TSDB_CODE_SUCCESS); } int32_t qwProcessReady(QW_FPARAMS_DEF, SQWMsg *qwMsg) { @@ -1010,7 +1009,7 @@ _return: QW_TASK_DLOG("ready msg rsped, code:%x - %s", code, tstrerror(code)); } - QW_RET(code); + QW_RET(TSDB_CODE_SUCCESS); } @@ -1081,7 +1080,9 @@ _return: } while (true); input.code = code; - QW_RET(qwHandlePostPhaseEvents(QW_FPARAMS(), QW_PHASE_POST_CQUERY, &input, NULL)); + qwHandlePostPhaseEvents(QW_FPARAMS(), QW_PHASE_POST_CQUERY, &input, NULL); + + QW_RET(TSDB_CODE_SUCCESS); } @@ -1148,7 +1149,7 @@ _return: QW_TASK_DLOG("fetch msg rsped, code:%x, dataLen:%d", code, dataLen); } - QW_RET(code); + QW_RET(TSDB_CODE_SUCCESS); } @@ -1207,12 +1208,12 @@ _return: } if (TSDB_CODE_SUCCESS != code || needRsp) { - QW_ERR_RET(qwBuildAndSendDropRsp(&qwMsg->connInfo, code)); + qwBuildAndSendDropRsp(&qwMsg->connInfo, code); QW_TASK_DLOG("drop msg rsped, code:%x", code); } - QW_RET(code); + QW_RET(TSDB_CODE_SUCCESS); } int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req) { @@ -1220,18 +1221,22 @@ int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req) { SSchedulerHbRsp rsp = {0}; SQWSchStatus *sch = NULL; uint64_t seqId = 0; + void *origHandle = NULL; memcpy(&rsp.epId, &req->epId, sizeof(req->epId)); QW_ERR_JRET(qwAcquireAddScheduler(mgmt, req->sId, QW_READ, &sch)); - atomic_store_ptr(&sch->hbConnection, qwMsg->connInfo); - ++sch->hbSeqId; - - rsp.seqId = sch->hbSeqId; - - QW_DLOG("hb connection updated, seqId:%" PRIx64 ", sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, handle:%p, ahandle:%p", - sch->hbSeqId, req->sId, req->epId.nodeId, req->epId.ep.fqdn, req->epId.ep.port, qwMsg->connInfo.handle, qwMsg->connInfo.ahandle); + QW_LOCK(QW_WRITE, &sch->connLock); + + origHandle = sch->connInfo.handle; + + memcpy(&sch->connInfo, &qwMsg->connInfo, sizeof(qwMsg->connInfo)); + + QW_UNLOCK(QW_WRITE, &sch->connLock); + + QW_DLOG("hb connection updated, sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, handle:%p, ahandle:%p", + req->sId, req->epId.nodeId, req->epId.ep.fqdn, req->epId.ep.port, qwMsg->connInfo.handle, qwMsg->connInfo.ahandle); qwReleaseScheduler(QW_READ, mgmt); @@ -1239,7 +1244,7 @@ _return: qwBuildAndSendHbRsp(&qwMsg->connInfo, &rsp, code); - QW_RET(code); + QW_RET(TSDB_CODE_SUCCESS); } @@ -1288,8 +1293,8 @@ _return: QW_UNLOCK(QW_READ, &mgmt->schLock); for (int32_t j = 0; j < i; ++j) { - QW_DLOG("hb on connection %p, taskNum:%d", rspList[j].connection, (rspList[j].rsp.taskStatus ? (int32_t)taosArrayGetSize(rspList[j].rsp.taskStatus) : 0)); - qwBuildAndSendHbRsp(rspList[j].connection, &rspList[j].rsp, code); + QW_DLOG("hb on connection handle %p, taskNum:%d", rspList[j].connInfo.handle, (rspList[j].rsp.taskStatus ? (int32_t)taosArrayGetSize(rspList[j].rsp.taskStatus) : 0)); + qwBuildAndSendHbRsp(&rspList[j].connInfo, &rspList[j].rsp, code); tFreeSSchedulerHbRsp(&rspList[j].rsp); } diff --git a/source/libs/qworker/src/qworkerMsg.c b/source/libs/qworker/src/qworkerMsg.c index b07ddb7196..42d5b94397 100644 --- a/source/libs/qworker/src/qworkerMsg.c +++ b/source/libs/qworker/src/qworkerMsg.c @@ -484,11 +484,15 @@ int32_t qWorkerProcessCancelMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { msg->taskId = be64toh(msg->taskId); msg->refId = be64toh(msg->refId); + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; + qwMsg.connInfo.handle = pMsg->handle; + qwMsg.connInfo.ahandle = pMsg->ahandle; + //QW_ERR_JRET(qwCancelTask(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId)); _return: - QW_ERR_RET(qwBuildAndSendCancelRsp(pMsg, code)); + QW_ERR_RET(qwBuildAndSendCancelRsp(&qwMsg.connInfo, code)); return TSDB_CODE_SUCCESS; } @@ -572,7 +576,7 @@ int32_t qWorkerProcessShowMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { int32_t code = 0; SVShowTablesReq *pReq = pMsg->pCont; - QW_ERR_RET(qwBuildAndSendShowRsp(pMsg, code)); + QW_RET(qwBuildAndSendShowRsp(pMsg, code)); } int32_t qWorkerProcessShowFetchMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { @@ -581,7 +585,7 @@ int32_t qWorkerProcessShowFetchMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) } SVShowTablesFetchReq *pFetchReq = pMsg->pCont; - QW_ERR_RET(qwBuildAndSendShowFetchRsp(pMsg, pFetchReq)); + QW_RET(qwBuildAndSendShowFetchRsp(pMsg, pFetchReq)); } diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index 1c40f255cf..2c0311d593 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -44,7 +44,6 @@ typedef struct SSchTrans { typedef struct SSchHbTrans { SRWLatch lock; - uint64_t seqId; SSchTrans trans; } SSchHbTrans; diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index fc28427c05..7bad1860a9 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -18,6 +18,7 @@ #include "schedulerInt.h" #include "tmsg.h" #include "tref.h" +#include "trpc.h" SSchedulerMgmt schMgmt = {0}; @@ -73,7 +74,7 @@ void schFreeRpcCtx(SRpcCtx *pCtx) { while (pIter) { SRpcCtxVal *ctxVal = (SRpcCtxVal *)pIter; - ctxVal->free(ctxVal->v); + ctxVal->free(ctxVal->val); pIter = taosHashIterate(pCtx->args, pIter); } @@ -127,7 +128,7 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m } SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - return; + return TSDB_CODE_SUCCESS; case TDMT_VND_RES_READY_RSP: reqMsgType = TDMT_VND_QUERY; break; @@ -658,52 +659,42 @@ int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) { return TSDB_CODE_SUCCESS; } +int32_t schRegisterHbConnection(SQueryNodeEpId *epId, bool *exist) { + int32_t code = 0; + SSchHbTrans hb = {0}; + + code = taosHashPut(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId), &hb, sizeof(SSchHbTrans)); + if (code) { + if (HASH_NODE_EXIST(code)) { + *exist = true; + return TSDB_CODE_SUCCESS; + } + + qError("taosHashPut hb trans failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); + SCH_ERR_RET(code); + } + + return TSDB_CODE_SUCCESS; +} + + int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchHbTrans *trans) { int32_t code = 0; SSchHbTrans *hb = NULL; - while (true) { - hb = taosHashGet(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId)); - if (NULL == hb) { - code = taosHashPut(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId), trans, sizeof(SSchHbTrans)); - if (code) { - if (HASH_NODE_EXIST(code)) { - continue; - } - - qError("taosHashPut hb trans failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); - SCH_ERR_RET(code); - } - - qDebug("hb connection updated, seqId:%" PRIx64 ", sId:%" PRIx64 - ", nodeId:%d, fqdn:%s, port:%d, instance:%p, connection:%p", - trans->seqId, schMgmt.sId, epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->trans.transInst, - trans->trans.transHandle); - - return TSDB_CODE_SUCCESS; - } - - break; + hb = taosHashGet(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId)); + if (NULL == hb) { + qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); + SCH_ERR_RET(code); } SCH_LOCK(SCH_WRITE, &hb->lock); - - if (hb->seqId >= trans->seqId) { - qDebug("hb trans seqId is old, seqId:%" PRId64 ", currentId:%" PRId64 ", nodeId:%d, fqdn:%s, port:%d", trans->seqId, - hb->seqId, epId->nodeId, epId->ep.fqdn, epId->ep.port); - - SCH_UNLOCK(SCH_WRITE, &hb->lock); - return TSDB_CODE_SUCCESS; - } - - hb->seqId = trans->seqId; memcpy(&hb->trans, &trans->trans, sizeof(trans->trans)); - SCH_UNLOCK(SCH_WRITE, &hb->lock); - qDebug("hb connection updated, seqId:%" PRIx64 ", sId:%" PRIx64 + qDebug("hb connection updated, sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, instance:%p, connection:%p", - trans->seqId, schMgmt.sId, epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->trans.transInst, + schMgmt.sId, epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->trans.transInst, trans->trans.transHandle); return TSDB_CODE_SUCCESS; @@ -1159,14 +1150,11 @@ int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } - if (rsp.seqId != (uint64_t)-1) { - SSchHbTrans trans = {0}; - trans.seqId = rsp.seqId; - trans.trans.transInst = pParam->transport; - trans.trans.transHandle = pMsg->handle; + SSchHbTrans trans = {0}; + trans.trans.transInst = pParam->transport; + trans.trans.transHandle = pMsg->handle; - SCH_RET(schUpdateHbConnection(&rsp.epId, &trans)); - } + SCH_RET(schUpdateHbConnection(&rsp.epId, &trans)); int32_t taskNum = (int32_t)taosArrayGetSize(rsp.taskStatus); for (int32_t i = 0; i < taskNum; ++i) { @@ -1232,7 +1220,7 @@ void schFreeRpcCtxVal(void *arg) { tfree(pMsgSendInfo->param); tfree(pMsgSendInfo); } - + int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { int32_t code = 0; SSchCallbackParam *param = NULL; @@ -1268,7 +1256,7 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { pMsgSendInfo->param = param; pMsgSendInfo->fp = fp; - SRpcCtxVal ctxVal = {.v = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1320,7 +1308,7 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { pMsgSendInfo->param = param; pMsgSendInfo->fp = fp; - SRpcCtxVal ctxVal = {.v = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1557,7 +1545,11 @@ int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId)); if (NULL == hb) { - SCH_ERR_RET(schBuildAndSendMsg(pJob, NULL, addr, TDMT_VND_QUERY_HEARTBEAT)); + bool exist = false; + SCH_ERR_RET(schRegisterHbConnection(&epId, &exist)); + if (!exist) { + SCH_ERR_RET(schBuildAndSendMsg(pJob, NULL, addr, TDMT_VND_QUERY_HEARTBEAT)); + } } return TSDB_CODE_SUCCESS; diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index bf73617d3e..503f5de5f8 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -746,7 +746,7 @@ TEST(queryTest, readyFirstCase) { SSchJob *pJob = schAcquireJob(job); - pIter = taosHashIterate(pJob->execTasks, NULL); + void *pIter = taosHashIterate(pJob->execTasks, NULL); while (pIter) { SSchTask *task = *(SSchTask **)pIter; @@ -757,7 +757,7 @@ TEST(queryTest, readyFirstCase) { pIter = taosHashIterate(pJob->execTasks, pIter); } - void *pIter = taosHashIterate(pJob->execTasks, NULL); + pIter = taosHashIterate(pJob->execTasks, NULL); while (pIter) { SSchTask *task = *(SSchTask **)pIter; @@ -792,11 +792,11 @@ TEST(queryTest, readyFirstCase) { - pthread_attr_t thattr; - pthread_attr_init(&thattr); + TdThreadAttr thattr; + taosThreadAttrInit(&thattr); - pthread_t thread1; - pthread_create(&(thread1), &thattr, schtCreateFetchRspThread, &job); + TdThread thread1; + taosThreadCreate(&(thread1), &thattr, schtCreateFetchRspThread, &job); void *data = NULL; code = schedulerFetchRows(job, &data); From 195ca4ab2568bc067afdb25b5a4d3833339ee737 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 11:32:53 +0800 Subject: [PATCH 05/59] feature/scheduler --- include/common/tmsgdef.h | 4 + source/libs/scheduler/inc/schedulerInt.h | 26 +- source/libs/scheduler/src/scheduler.c | 378 ++++++++++++++++-- source/libs/scheduler/test/schedulerTests.cpp | 14 +- 4 files changed, 365 insertions(+), 57 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 73a78131dc..a1f6209650 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -204,6 +204,10 @@ enum { TD_DEF_MSG_TYPE(TDMT_SND_TASK_DEPLOY, "snode-task-deploy", SStreamTaskDeployReq, SStreamTaskDeployRsp) TD_DEF_MSG_TYPE(TDMT_SND_TASK_EXEC, "snode-task-exec", SStreamTaskExecReq, SStreamTaskExecRsp) + // Requests handled by SCHEDULER + TD_NEW_MSG_SEG(TDMT_SCH_MSG) + TD_DEF_MSG_TYPE(TDMT_SCH_LINK_BROKEN, "scheduler-link-broken", NULL, NULL) + #if defined(TD_MSG_NUMBER_) TDMT_MAX #endif diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index 2c0311d593..fe27b85b40 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -25,6 +25,7 @@ extern "C" { #include "planner.h" #include "scheduler.h" #include "thash.h" +#include "trpc.h" #define SCHEDULE_DEFAULT_MAX_JOB_NUM 1000 #define SCHEDULE_DEFAULT_MAX_TASK_NUM 1000 @@ -44,6 +45,7 @@ typedef struct SSchTrans { typedef struct SSchHbTrans { SRWLatch lock; + SRpcCtx rpcCtx; SSchTrans trans; } SSchHbTrans; @@ -75,12 +77,23 @@ typedef struct SSchedulerMgmt { SHashObj *hbConnections; } SSchedulerMgmt; -typedef struct SSchCallbackParam { - uint64_t queryId; - int64_t refId; - uint64_t taskId; - void *transport; -} SSchCallbackParam; +typedef struct SSchCallbackParamHeader { + bool isHbParam; +} SSchCallbackParamHeader; + +typedef struct SSchTaskCallbackParam { + SSchCallbackParamHeader head; + uint64_t queryId; + int64_t refId; + uint64_t taskId; + void *transport; +} SSchTaskCallbackParam; + +typedef struct SSchHbCallbackParam { + SSchCallbackParamHeader head; + SQueryNodeEpId nodeEpId; + void *transport; +} SSchHbCallbackParam; typedef struct SSchFlowControl { SRWLatch lock; @@ -227,6 +240,7 @@ int32_t schLaunchTasksInFlowCtrlList(SSchJob *pJob, SSchTask *pTask); int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask); int32_t schFetchFromRemote(SSchJob *pJob); int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode); +int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId); #ifdef __cplusplus diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 7bad1860a9..23233fc4c3 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -74,12 +74,16 @@ void schFreeRpcCtx(SRpcCtx *pCtx) { while (pIter) { SRpcCtxVal *ctxVal = (SRpcCtxVal *)pIter; - ctxVal->free(ctxVal->val); + (*ctxVal->free)(ctxVal->val); pIter = taosHashIterate(pCtx->args, pIter); } taosHashCleanup(pCtx->args); + + if (pCtx->brokenVal.free) { + (*pCtx->brokenVal.free)(pCtx->brokenVal.val); + } } void schFreeTask(SSchTask* pTask) { @@ -118,6 +122,8 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m int32_t taskStatus = SCH_GET_TASK_STATUS(pTask); int32_t reqMsgType = msgType - 1; switch (msgType) { + case TDMT_SCH_LINK_BROKEN: + return TSDB_CODE_SUCCESS; case TDMT_VND_QUERY_RSP: // query_rsp may be processed later than ready_rsp if (lastMsgType != reqMsgType) { SCH_TASK_DLOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); @@ -659,26 +665,7 @@ int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) { return TSDB_CODE_SUCCESS; } -int32_t schRegisterHbConnection(SQueryNodeEpId *epId, bool *exist) { - int32_t code = 0; - SSchHbTrans hb = {0}; - - code = taosHashPut(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId), &hb, sizeof(SSchHbTrans)); - if (code) { - if (HASH_NODE_EXIST(code)) { - *exist = true; - return TSDB_CODE_SUCCESS; - } - - qError("taosHashPut hb trans failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); - SCH_ERR_RET(code); - } - - return TSDB_CODE_SUCCESS; -} - - -int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchHbTrans *trans) { +int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans) { int32_t code = 0; SSchHbTrans *hb = NULL; @@ -689,13 +676,13 @@ int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchHbTrans *trans) { } SCH_LOCK(SCH_WRITE, &hb->lock); - memcpy(&hb->trans, &trans->trans, sizeof(trans->trans)); + memcpy(&hb->trans, trans, sizeof(*trans)); SCH_UNLOCK(SCH_WRITE, &hb->lock); qDebug("hb connection updated, sId:%" PRIx64 - ", nodeId:%d, fqdn:%s, port:%d, instance:%p, connection:%p", - schMgmt.sId, epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->trans.transInst, - trans->trans.transHandle); + ", nodeId:%d, fqdn:%s, port:%d, instance:%p, handle:%p", + schMgmt.sId, epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->transInst, + trans->transHandle); return TSDB_CODE_SUCCESS; } @@ -1058,6 +1045,10 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, ch SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); break; } + case TDMT_SCH_LINK_BROKEN: + SCH_TASK_ELOG("link broken received, error:%x - %s", rspCode, tstrerror(rspCode)); + SCH_ERR_JRET(rspCode); + break; default: SCH_TASK_ELOG("unknown rsp msg, type:%d, status:%s", msgType, SCH_GET_TASK_STATUS_STR(pTask)); SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); @@ -1072,7 +1063,7 @@ _return: int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, int32_t rspCode) { int32_t code = 0; - SSchCallbackParam *pParam = (SSchCallbackParam *)param; + SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; SSchTask *pTask = NULL; SSchJob *pJob = schAcquireJob(pParam->refId); @@ -1131,7 +1122,7 @@ int32_t schHandleReadyCallback(void *param, const SDataBuf *pMsg, int32_t code) } int32_t schHandleDropCallback(void *param, const SDataBuf *pMsg, int32_t code) { - SSchCallbackParam *pParam = (SSchCallbackParam *)param; + SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; qDebug("QID:%" PRIx64 ",TID:%" PRIx64 " drop task rsp received, code:%x", pParam->queryId, pParam->taskId, code); } @@ -1143,16 +1134,16 @@ int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { SSchedulerHbRsp rsp = {0}; - SSchCallbackParam *pParam = (SSchCallbackParam *)param; + SSchHbCallbackParam *pParam = (SSchHbCallbackParam *)param; if (tDeserializeSSchedulerHbRsp(pMsg->pData, pMsg->len, &rsp)) { qError("invalid hb rsp msg, size:%d", pMsg->len); SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } - SSchHbTrans trans = {0}; - trans.trans.transInst = pParam->transport; - trans.trans.transHandle = pMsg->handle; + SSchTrans trans = {0}; + trans.transInst = pParam->transport; + trans.transHandle = pMsg->handle; SCH_RET(schUpdateHbConnection(&rsp.epId, &trans)); @@ -1180,6 +1171,24 @@ _return: SCH_RET(code); } +int32_t schHandleLinkBrokenCallback(void *param, const SDataBuf *pMsg, int32_t code) { + SSchCallbackParamHeader *head = (SSchCallbackParamHeader *)param; + rpcReleaseHandle(pMsg->handle, TAOS_CONN_CLIENT); + + if (head->isHbParam) { + SSchHbCallbackParam *hbParam = (SSchHbCallbackParam *)param; + SSchTrans trans = {.transInst = hbParam->transport, .transHandle = NULL}; + SCH_ERR_RET(schUpdateHbConnection(&hbParam->nodeEpId, &trans)); + + SCH_ERR_RET(schBuildAndSendHbMsg(&hbParam->nodeEpId)); + } else { + SCH_ERR_RET(schHandleCallback(param, pMsg, TDMT_SCH_LINK_BROKEN, code)); + } + + return TSDB_CODE_SUCCESS; +} + + int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { switch (msgType) { case TDMT_VND_CREATE_TABLE: @@ -1203,6 +1212,9 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { case TDMT_VND_QUERY_HEARTBEAT: *fp = schHandleHbCallback; break; + case TDMT_SCH_LINK_BROKEN: + *fp = schHandleLinkBrokenCallback; + break; default: qError("unknown msg type for callback, msgType:%d", msgType); SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); @@ -1221,9 +1233,84 @@ void schFreeRpcCtxVal(void *arg) { tfree(pMsgSendInfo); } +int32_t schMakeTaskCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { + SSchTaskCallbackParam *param = calloc(1, sizeof(SSchTaskCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param->queryId = pJob->queryId; + param->refId = pJob->refId; + param->taskId = SCH_TASK_ID(pTask); + param->transport = pJob->transport; + + *pParam = param; + + return TSDB_CODE_SUCCESS; +} + +int32_t schMakeHbCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { + SSchHbCallbackParam *param = calloc(1, sizeof(SSchHbCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param->head.isHbParam = true; + + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + + param->nodeEpId.nodeId = addr->nodeId; + memcpy(¶m->nodeEpId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); + param->transport = pJob->transport; + + *pParam = param; + + return TSDB_CODE_SUCCESS; +} + + +int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal *brokenVal, bool isHb) { + int32_t code = 0; + SMsgSendInfo* pMsgSendInfo = NULL; + + pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + if (isHb) { + SCH_ERR_JRET(schMakeHbCallbackParam(pJob, pTask, &pMsgSendInfo->param)); + } else { + SCH_ERR_JRET(schMakeTaskCallbackParam(pJob, pTask, &pMsgSendInfo->param)); + } + + int32_t msgType = TDMT_SCH_LINK_BROKEN; + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); + + pMsgSendInfo->fp = fp; + + brokenVal->msgType = msgType; + brokenVal->val = pMsgSendInfo; + brokenVal->len = sizeof(SMsgSendInfo); + brokenVal->free = schFreeRpcCtxVal; + + return TSDB_CODE_SUCCESS; + +_return: + + tfree(pMsgSendInfo->param); + tfree(pMsgSendInfo); + + SCH_RET(code); +} + int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { int32_t code = 0; - SSchCallbackParam *param = NULL; + SSchTaskCallbackParam *param = NULL; SMsgSendInfo* pMsgSendInfo = NULL; pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); @@ -1238,9 +1325,9 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - param = calloc(1, sizeof(SSchCallbackParam)); + param = calloc(1, sizeof(SSchTaskCallbackParam)); if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchCallbackParam)); + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -1262,6 +1349,8 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + SCH_ERR_JRET(schMakeBrokenLinkVal(pJob, pTask, &pCtx->brokenVal, false)); + return TSDB_CODE_SUCCESS; _return: @@ -1275,8 +1364,13 @@ _return: int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { int32_t code = 0; - SSchCallbackParam *param = NULL; + SSchHbCallbackParam *param = NULL; SMsgSendInfo* pMsgSendInfo = NULL; + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + SQueryNodeEpId epId = {0}; + + epId.nodeId = addr->nodeId; + memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); if (NULL == pCtx->args) { @@ -1290,9 +1384,9 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - param = calloc(1, sizeof(SSchCallbackParam)); + param = calloc(1, sizeof(SSchHbCallbackParam)); if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchCallbackParam)); + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -1300,9 +1394,7 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { __async_send_cb_fn_t fp = NULL; SCH_ERR_JRET(schGetCallbackFp(TDMT_VND_QUERY_HEARTBEAT, &fp)); - param->queryId = pJob->queryId; - param->refId = pJob->refId; - param->taskId = SCH_TASK_ID(pTask); + param->nodeEpId = epId; param->transport = pJob->transport; pMsgSendInfo->param = param; @@ -1314,6 +1406,8 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + SCH_ERR_JRET(schMakeBrokenLinkVal(pJob, pTask, &pCtx->brokenVal, true)); + return TSDB_CODE_SUCCESS; _return: @@ -1326,6 +1420,118 @@ _return: } +int32_t schRegisterHbConnection(SSchJob *pJob, SSchTask *pTask, SQueryNodeEpId *epId, bool *exist) { + int32_t code = 0; + SSchHbTrans hb = {0}; + + hb.trans.transInst = pJob->transport; + + SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &hb.rpcCtx)); + + code = taosHashPut(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId), &hb, sizeof(SSchHbTrans)); + if (code) { + schFreeRpcCtx(&hb.rpcCtx); + + if (HASH_NODE_EXIST(code)) { + *exist = true; + return TSDB_CODE_SUCCESS; + } + + qError("taosHashPut hb trans failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); + SCH_ERR_RET(code); + } + + return TSDB_CODE_SUCCESS; +} + + + +int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHeader **pDst) { + if (pSrc->isHbParam) { + SSchHbCallbackParam *dst = malloc(sizeof(SSchHbCallbackParam)); + if (NULL == dst) { + qError("malloc SSchHbCallbackParam failed"); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(dst, pSrc, sizeof(*dst)); + *pDst = (SSchCallbackParamHeader *)dst; + + return TSDB_CODE_SUCCESS; + } + + SSchTaskCallbackParam *dst = malloc(sizeof(SSchTaskCallbackParam)); + if (NULL == dst) { + qError("malloc SSchTaskCallbackParam failed"); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(dst, pSrc, sizeof(*dst)); + *pDst = (SSchCallbackParamHeader *)dst; + + return TSDB_CODE_SUCCESS; +} + +int32_t schCloneSMsgSendInfo(SMsgSendInfo *pSrc, SMsgSendInfo **pDst) { + int32_t code = 0; + SMsgSendInfo *dst = malloc(sizeof(*pSrc)); + if (NULL == dst) { + qError("malloc SMsgSendInfo for rpcCtx failed, len:%d", (int32_t)sizeof(*pSrc)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(dst, pSrc, sizeof(*pSrc)); + dst->param = NULL; + + SCH_ERR_JRET(schCloneCallbackParam(pSrc->param, (SSchCallbackParamHeader **)&dst->param)); + + return TSDB_CODE_SUCCESS; + +_return: + + tfree(dst); + SCH_RET(code); +} + +int32_t schCloneHbRpcCtx(SRpcCtx *pSrc, SRpcCtx *pDst) { + int32_t code = 0; + memcpy(&pDst->brokenVal, &pSrc->brokenVal, sizeof(pSrc->brokenVal)); + pDst->brokenVal.val = NULL; + + SCH_ERR_RET(schCloneSMsgSendInfo(pSrc->brokenVal.val, (SMsgSendInfo **)&pDst->brokenVal.val)); + + pDst->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (NULL == pDst->args) { + qError("taosHashInit %d RpcCtx failed", 1); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SRpcCtxVal dst = {0}; + void *pIter = taosHashIterate(pSrc->args, NULL); + while (pIter) { + SRpcCtxVal *pVal = (SRpcCtxVal *)pIter; + int32_t *msgType = taosHashGetKey(pIter, NULL); + + SCH_ERR_JRET(schCloneSMsgSendInfo(pVal->val, (SMsgSendInfo **)&dst.val)); + + if (taosHashPut(pDst->args, msgType, sizeof(*msgType), pVal, sizeof(*pVal))) { + qError("taosHashPut msg %d to rpcCtx failed", *msgType); + (*dst.free)(dst.val); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pIter = taosHashIterate(pSrc->args, pIter); + } + + return TSDB_CODE_SUCCESS; + +_return: + + schFreeRpcCtx(pDst); + SCH_RET(code); +} + + int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* epSet, int32_t msgType, void *msg, uint32_t msgSize, bool persistHandle, SRpcCtx *ctx) { int32_t code = 0; @@ -1337,9 +1543,9 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - SSchCallbackParam *param = calloc(1, sizeof(SSchCallbackParam)); + SSchTaskCallbackParam *param = calloc(1, sizeof(SSchTaskCallbackParam)); if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchCallbackParam)); + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -1374,6 +1580,90 @@ _return: SCH_RET(code); } +int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { + SSchedulerHbReq req = {0}; + int32_t code = 0; + SRpcCtx rpcCtx = {0}; + SSchTrans trans = {0}; + int32_t msgType = TDMT_VND_QUERY_HEARTBEAT; + req.sId = schMgmt.sId; + req.header.vgId = nodeEpId->nodeId; + memcpy(&req.epId, nodeEpId, sizeof(SQueryNodeEpId)); + + SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, nodeEpId, sizeof(SQueryNodeEpId)); + if (NULL == hb) { + qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", nodeEpId->nodeId, nodeEpId->ep.fqdn, nodeEpId->ep.port); + SCH_ERR_RET(code); + } + + SCH_LOCK(SCH_WRITE, &hb->lock); + code = schCloneHbRpcCtx(&hb->rpcCtx, &rpcCtx); + memcpy(&trans, &hb->trans, sizeof(trans)); + SCH_UNLOCK(SCH_WRITE, &hb->lock); + + SCH_ERR_RET(code); + + int32_t msgSize = tSerializeSSchedulerHbReq(NULL, 0, &req); + if (msgSize < 0) { + qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + void *msg = calloc(1, msgSize); + if (NULL == msg) { + qError("calloc hb req %d failed", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + if (tSerializeSSchedulerHbReq(msg, msgSize, &req) < 0) { + qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SMsgSendInfo *pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + qError("calloc SMsgSendInfo failed"); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SSchTaskCallbackParam *param = calloc(1, sizeof(SSchTaskCallbackParam)); + if (NULL == param) { + qError("calloc SSchTaskCallbackParam failed"); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); + + param->transport = trans.transInst; + + pMsgSendInfo->param = param; + pMsgSendInfo->msgInfo.pData = msg; + pMsgSendInfo->msgInfo.len = msgSize; + pMsgSendInfo->msgInfo.handle = trans.transHandle; + pMsgSendInfo->msgType = msgType; + pMsgSendInfo->fp = fp; + + int64_t transporterId = 0; + SEpSet epSet = {.inUse = 0, .numOfEps = 1}; + memcpy(&epSet.eps[0], &nodeEpId->ep, sizeof(nodeEpId->ep)); + + code = asyncSendMsgToServerExt(trans.transInst, &epSet, &transporterId, pMsgSendInfo, true, &rpcCtx); + if (code) { + SCH_ERR_JRET(code); + } + + qDebug("req msg sent, type:%d, %s", msgType, TMSG_INFO(msgType)); + return TSDB_CODE_SUCCESS; + +_return: + + tfree(msg); + tfree(param); + tfree(pMsgSendInfo); + schFreeRpcCtx(&rpcCtx); + SCH_RET(code); +} + int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t msgType) { uint32_t msgSize = 0; void *msg = NULL; @@ -1546,9 +1836,9 @@ int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId)); if (NULL == hb) { bool exist = false; - SCH_ERR_RET(schRegisterHbConnection(&epId, &exist)); + SCH_ERR_RET(schRegisterHbConnection(pJob, pTask, &epId, &exist)); if (!exist) { - SCH_ERR_RET(schBuildAndSendMsg(pJob, NULL, addr, TDMT_VND_QUERY_HEARTBEAT)); + SCH_ERR_RET(schBuildAndSendHbMsg(&epId)); } } diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index 503f5de5f8..d94bc45802 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -387,7 +387,7 @@ void *schtCreateFetchRspThread(void *param) { void *schtFetchRspThread(void *aa) { SDataBuf dataBuf = {0}; - SSchCallbackParam* param = NULL; + SSchTaskCallbackParam* param = NULL; while (!schtTestStop) { if (0 == atomic_val_compare_exchange_32(&schtStartFetch, 1, 0)) { @@ -396,7 +396,7 @@ void *schtFetchRspThread(void *aa) { taosUsleep(1); - param = (SSchCallbackParam *)calloc(1, sizeof(*param)); + param = (SSchTaskCallbackParam *)calloc(1, sizeof(*param)); param->queryId = schtQueryId; param->taskId = schtFetchTaskId; @@ -449,7 +449,7 @@ void* schtRunJobThread(void *aa) { schtSetAsyncSendMsgToServer(); SSchJob *pJob = NULL; - SSchCallbackParam *param = NULL; + SSchTaskCallbackParam *param = NULL; SHashObj *execTasks = NULL; SDataBuf dataBuf = {0}; uint32_t jobFinished = 0; @@ -484,7 +484,7 @@ void* schtRunJobThread(void *aa) { pIter = taosHashIterate(pJob->execTasks, pIter); } - param = (SSchCallbackParam *)calloc(1, sizeof(*param)); + param = (SSchTaskCallbackParam *)calloc(1, sizeof(*param)); param->refId = queryJobRefId; param->queryId = pJob->queryId; @@ -504,7 +504,7 @@ void* schtRunJobThread(void *aa) { } - param = (SSchCallbackParam *)calloc(1, sizeof(*param)); + param = (SSchTaskCallbackParam *)calloc(1, sizeof(*param)); param->refId = queryJobRefId; param->queryId = pJob->queryId; @@ -524,7 +524,7 @@ void* schtRunJobThread(void *aa) { } - param = (SSchCallbackParam *)calloc(1, sizeof(*param)); + param = (SSchTaskCallbackParam *)calloc(1, sizeof(*param)); param->refId = queryJobRefId; param->queryId = pJob->queryId; @@ -544,7 +544,7 @@ void* schtRunJobThread(void *aa) { } - param = (SSchCallbackParam *)calloc(1, sizeof(*param)); + param = (SSchTaskCallbackParam *)calloc(1, sizeof(*param)); param->refId = queryJobRefId; param->queryId = pJob->queryId; From 5a202e89e4a0e0252d369cb5afd15cc39f0c5166 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 14:06:30 +0800 Subject: [PATCH 06/59] feature/scheduler --- source/libs/qworker/inc/qworkerInt.h | 11 ++++++----- source/libs/qworker/src/qworker.c | 14 +++++++++----- source/libs/scheduler/src/scheduler.c | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h index ab55b4b76d..785e809fe4 100644 --- a/source/libs/qworker/inc/qworkerInt.h +++ b/source/libs/qworker/inc/qworkerInt.h @@ -121,11 +121,12 @@ typedef struct SQWTaskCtx { } SQWTaskCtx; typedef struct SQWSchStatus { - int32_t lastAccessTs; // timestamp in second - SRWLatch connLock; - SQWConnInfo connInfo; - SRWLatch tasksLock; - SHashObj *tasksHash; // key:queryId+taskId, value: SQWTaskStatus + int32_t lastAccessTs; // timestamp in second + SRWLatch connLock; + SQWConnInfo hbConnInfo; + SQueryNodeEpId epId; + SRWLatch tasksLock; + SHashObj *tasksHash; // key:queryId+taskId, value: SQWTaskStatus } SQWSchStatus; // Qnode/Vnode level task management diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 3b01c2c29e..33e859d354 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -580,6 +580,9 @@ int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryEnd) { int32_t qwGenerateSchHbRsp(SQWorkerMgmt *mgmt, SQWSchStatus *sch, SQWHbInfo *hbInfo) { int32_t taskNum = 0; + hbInfo->connInfo = sch->hbConnInfo; + hbInfo->rsp.epId = sch->epId; + QW_LOCK(QW_READ, &sch->tasksLock); taskNum = taosHashGetSize(sch->tasksHash); @@ -591,8 +594,6 @@ int32_t qwGenerateSchHbRsp(SQWorkerMgmt *mgmt, SQWSchStatus *sch, SQWHbInfo *hbI return TSDB_CODE_QRY_OUT_OF_MEMORY; } - hbInfo->connInfo = sch->connInfo; - void *key = NULL; size_t keyLen = 0; int32_t i = 0; @@ -1228,10 +1229,13 @@ int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req) { QW_ERR_JRET(qwAcquireAddScheduler(mgmt, req->sId, QW_READ, &sch)); QW_LOCK(QW_WRITE, &sch->connLock); + + if (sch->hbConnInfo.handle) { + rpcReleaseHandle(sch->hbConnInfo.handle, TAOS_CONN_SERVER); + } - origHandle = sch->connInfo.handle; - - memcpy(&sch->connInfo, &qwMsg->connInfo, sizeof(qwMsg->connInfo)); + memcpy(&sch->hbConnInfo, &qwMsg->connInfo, sizeof(qwMsg->connInfo)); + memcpy(&sch->epId, &req->epId, sizeof(req->epId)); QW_UNLOCK(QW_WRITE, &sch->connLock); diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 23233fc4c3..01cf0d8d8a 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -672,15 +672,14 @@ int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans) { hb = taosHashGet(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId)); if (NULL == hb) { qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); - SCH_ERR_RET(code); + SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); } SCH_LOCK(SCH_WRITE, &hb->lock); memcpy(&hb->trans, trans, sizeof(*trans)); SCH_UNLOCK(SCH_WRITE, &hb->lock); - qDebug("hb connection updated, sId:%" PRIx64 - ", nodeId:%d, fqdn:%s, port:%d, instance:%p, handle:%p", + qDebug("hb connection updated, sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, instance:%p, handle:%p", schMgmt.sId, epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->transInst, trans->transHandle); @@ -1563,6 +1562,9 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* pMsgSendInfo->msgInfo.handle = trans->transHandle; pMsgSendInfo->msgType = msgType; pMsgSendInfo->fp = fp; + + qDebug("start to send %s msg, refId:%" PRIx64 "instance:%p, handle:%p", + TMSG_INFO(msgType), pJob->refId, trans->transInst, trans->transHandle); int64_t transporterId = 0; code = asyncSendMsgToServerExt(trans->transInst, epSet, &transporterId, pMsgSendInfo, persistHandle, ctx); @@ -1646,13 +1648,17 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { int64_t transporterId = 0; SEpSet epSet = {.inUse = 0, .numOfEps = 1}; memcpy(&epSet.eps[0], &nodeEpId->ep, sizeof(nodeEpId->ep)); + + qDebug("start to send hb msg, instance:%p, handle:%p, fqdn:%s, port:%d", trans.transInst, trans.transHandle, nodeEpId->ep.fqdn, nodeEpId->ep.port); code = asyncSendMsgToServerExt(trans.transInst, &epSet, &transporterId, pMsgSendInfo, true, &rpcCtx); if (code) { + qError("fail to send hb msg, instance:%p, handle:%p, fqdn:%s, port:%d, error:%x - %s", + trans.transInst, trans.transHandle, nodeEpId->ep.fqdn, nodeEpId->ep.port, code, tstrerror(code)); SCH_ERR_JRET(code); } - qDebug("req msg sent, type:%d, %s", msgType, TMSG_INFO(msgType)); + qDebug("hb msg sent"); return TSDB_CODE_SUCCESS; _return: @@ -1833,6 +1839,7 @@ int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { epId.nodeId = addr->nodeId; memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); +#if 0 SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId)); if (NULL == hb) { bool exist = false; @@ -1841,6 +1848,7 @@ int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { SCH_ERR_RET(schBuildAndSendHbMsg(&epId)); } } +#endif return TSDB_CODE_SUCCESS; } From ed477de87ee86f8ed6efe6850adaf6d77a7f3995 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 22 Mar 2022 14:51:26 +0800 Subject: [PATCH 07/59] handle except --- source/libs/transport/src/transSrv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index c6032a9569..6b34f864ed 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -199,6 +199,7 @@ static void uvHandleReq(SSrvConn* pConn) { if (pHead->persist == 1) { pConn->status = ConnAcquire; transRefSrvHandle(pConn); + tDebug("server conn %p acquired by server app", pConn); } } if (pConn->status == ConnNormal && pHead->noResp == 0) { @@ -725,7 +726,7 @@ void uvHandleRelease(SSrvMsg* msg, SWorkThrdObj* thrd) { } void uvHandleResp(SSrvMsg* msg, SWorkThrdObj* thrd) { // send msg to client - tDebug("server conn %p start to send resp", msg->pConn); + tDebug("server conn %p start to send resp (2/2)", msg->pConn); uvStartSendResp(msg); } void uvHandleRegister(SSrvMsg* msg, SWorkThrdObj* thrd) { @@ -735,9 +736,11 @@ void uvHandleRegister(SSrvMsg* msg, SWorkThrdObj* thrd) { if (!transQueuePush(&conn->srvMsgs, msg)) { return; } + transQueuePop(&conn->srvMsgs); conn->regArg.notifyCount = 0; conn->regArg.init = 1; conn->regArg.msg = msg->msg; + tDebug("server conn %p register brokenlink callback succ", conn); if (conn->broken) { STrans* pTransInst = conn->pTransInst; @@ -836,7 +839,7 @@ void transSendResponse(const STransMsg* pMsg) { srvMsg->pConn = pConn; srvMsg->msg = *pMsg; srvMsg->type = Normal; - tTrace("server conn %p start to send resp", pConn); + tTrace("server conn %p start to send resp (1/2)", pConn); transSendAsync(pThrd->asyncPool, &srvMsg->q); } void transRegisterMsg(const STransMsg* msg) { From 3b2a50050ba1a3fb71f783afdfb97ea51fa369ee Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 15:17:09 +0800 Subject: [PATCH 08/59] feature/scheduler --- source/libs/qworker/src/qworker.c | 34 ++++++++++++++------------- source/libs/qworker/src/qworkerMsg.c | 19 ++++++++++----- source/libs/scheduler/src/scheduler.c | 17 ++++++++++++-- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 33e859d354..ca0dd4a965 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -697,8 +697,8 @@ int32_t qwGetResFromSink(QW_FPARAMS_DEF, SQWTaskCtx *ctx, int32_t *dataLen, void int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *input, SQWPhaseOutput *output) { int32_t code = 0; SQWTaskCtx *ctx = NULL; - void *dropConnection = NULL; - void *cancelConnection = NULL; + SQWConnInfo *dropConnection = NULL; + SQWConnInfo *cancelConnection = NULL; QW_TASK_DLOG("start to handle event at phase %s", qwPhaseStr(phase)); @@ -793,12 +793,12 @@ _return: if (dropConnection) { qwBuildAndSendDropRsp(dropConnection, code); - QW_TASK_DLOG("drop msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", dropConnection->handle, code, tstrerror(code)); } if (cancelConnection) { qwBuildAndSendCancelRsp(cancelConnection, code); - QW_TASK_DLOG("cancel msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("cancel rsp send, handle:%p, code:%x - %s", cancelConnection->handle, code, tstrerror(code)); } QW_TASK_DLOG("end to handle event at phase %s, code:%x - %s", qwPhaseStr(phase), code, tstrerror(code)); @@ -811,7 +811,7 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp int32_t code = 0; SQWTaskCtx *ctx = NULL; SQWConnInfo connInfo = {0}; - void *readyConnection = NULL; + SQWConnInfo *readyConnection = NULL; QW_TASK_DLOG("start to handle event at phase %s", qwPhaseStr(phase)); @@ -879,7 +879,7 @@ _return: if (TSDB_CODE_SUCCESS == code && readyConnection) { qwBuildAndSendReadyRsp(readyConnection, code); - QW_TASK_DLOG("ready msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("ready msg rsped, handle:%p, code:%x - %s", readyConnection->handle, code, tstrerror(code)); } if (code) { @@ -907,7 +907,9 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType) { QW_ERR_JRET(qwGetTaskCtx(QW_FPARAMS(), &ctx)); atomic_store_8(&ctx->taskType, taskType); - + atomic_store_ptr(&ctx->connInfo.handle, qwMsg->connInfo.handle); + atomic_store_ptr(&ctx->connInfo.ahandle, qwMsg->connInfo.ahandle); + code = qStringToSubplan(qwMsg->msg, &plan); if (TSDB_CODE_SUCCESS != code) { QW_TASK_ELOG("task string to subplan failed, code:%x - %s", code, tstrerror(code)); @@ -926,7 +928,7 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType) { } QW_ERR_JRET(qwBuildAndSendQueryRsp(&qwMsg->connInfo, code)); - QW_TASK_DLOG("query msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("query msg rsped, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); queryRsped = true; @@ -944,7 +946,7 @@ _return: if (!queryRsped) { qwBuildAndSendQueryRsp(&qwMsg->connInfo, code); - QW_TASK_DLOG("query msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("query msg rsped, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); } QW_RET(TSDB_CODE_SUCCESS); @@ -1007,7 +1009,7 @@ _return: if (needRsp) { qwBuildAndSendReadyRsp(&qwMsg->connInfo, code); - QW_TASK_DLOG("ready msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("ready msg rsped, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); } QW_RET(TSDB_CODE_SUCCESS); @@ -1050,7 +1052,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_FETCH); qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, dataLen, code); - QW_TASK_DLOG("fetch msg rsped, code:%x, dataLen:%d", code, dataLen); + QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, tstrerror(code), dataLen); } else { atomic_store_8(&ctx->queryContinue, 1); } @@ -1067,7 +1069,7 @@ _return: qwFreeFetchRsp(rsp); rsp = NULL; qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, 0, code); - QW_TASK_DLOG("fetch msg rsped, code:%x - %s", code, tstrerror(code)); + QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, tstrerror(code), 0); } QW_LOCK(QW_WRITE, &ctx->lock); @@ -1147,7 +1149,7 @@ _return: if (code || rsp) { qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, dataLen, code); - QW_TASK_DLOG("fetch msg rsped, code:%x, dataLen:%d", code, dataLen); + QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, tstrerror(code), dataLen); } QW_RET(TSDB_CODE_SUCCESS); @@ -1210,8 +1212,7 @@ _return: if (TSDB_CODE_SUCCESS != code || needRsp) { qwBuildAndSendDropRsp(&qwMsg->connInfo, code); - - QW_TASK_DLOG("drop msg rsped, code:%x", code); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); } QW_RET(TSDB_CODE_SUCCESS); @@ -1247,6 +1248,7 @@ int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req) { _return: qwBuildAndSendHbRsp(&qwMsg->connInfo, &rsp, code); + QW_DLOG("hb rsp send, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); QW_RET(TSDB_CODE_SUCCESS); } @@ -1297,8 +1299,8 @@ _return: QW_UNLOCK(QW_READ, &mgmt->schLock); for (int32_t j = 0; j < i; ++j) { - QW_DLOG("hb on connection handle %p, taskNum:%d", rspList[j].connInfo.handle, (rspList[j].rsp.taskStatus ? (int32_t)taosArrayGetSize(rspList[j].rsp.taskStatus) : 0)); qwBuildAndSendHbRsp(&rspList[j].connInfo, &rspList[j].rsp, code); + QW_DLOG("hb rsp send, handle:%p, code:%x - %s, taskNum:%d", rspList[j].connInfo.handle, code, tstrerror(code), (rspList[j].rsp.taskStatus ? (int32_t)taosArrayGetSize(rspList[j].rsp.taskStatus) : 0)); tFreeSSchedulerHbRsp(&rspList[j].rsp); } diff --git a/source/libs/qworker/src/qworkerMsg.c b/source/libs/qworker/src/qworkerMsg.c index 42d5b94397..b3921368e1 100644 --- a/source/libs/qworker/src/qworkerMsg.c +++ b/source/libs/qworker/src/qworkerMsg.c @@ -324,7 +324,7 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { qwMsg.connInfo.ahandle = pMsg->ahandle; char* sql = strndup(msg->msg, msg->sqlLen); - QW_SCH_TASK_DLOG("processQuery start, node:%p, sql:%s", node, sql); + QW_SCH_TASK_DLOG("processQuery start, node:%p, handle:%p, sql:%s", node, pMsg->handle, sql); tfree(sql); QW_ERR_RET(qwProcessQuery(QW_FPARAMS(), &qwMsg, msg->taskType)); @@ -357,7 +357,7 @@ int32_t qWorkerProcessCQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; - QW_SCH_TASK_DLOG("processCQuery start, node:%p", node); + QW_SCH_TASK_DLOG("processCQuery start, node:%p, handle:%p", node, pMsg->handle); QW_ERR_RET(qwProcessCQuery(QW_FPARAMS(), &qwMsg)); @@ -391,7 +391,7 @@ int32_t qWorkerProcessReadyMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg){ qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; - QW_SCH_TASK_DLOG("processReady start, node:%p", node); + QW_SCH_TASK_DLOG("processReady start, node:%p, handle:%p", node, pMsg->handle); QW_ERR_RET(qwProcessReady(QW_FPARAMS(), &qwMsg)); @@ -453,7 +453,7 @@ int32_t qWorkerProcessFetchMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; - QW_SCH_TASK_DLOG("processFetch start, node:%p", node); + QW_SCH_TASK_DLOG("processFetch start, node:%p, handle:%p", node, pMsg->handle); QW_ERR_RET(qwProcessFetch(QW_FPARAMS(), &qwMsg)); @@ -472,6 +472,7 @@ int32_t qWorkerProcessCancelMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { return TSDB_CODE_QRY_INVALID_INPUT; } + SQWorkerMgmt *mgmt = (SQWorkerMgmt *)qWorkerMgmt; int32_t code = 0; STaskCancelReq *msg = pMsg->pCont; if (NULL == msg || pMsg->contLen < sizeof(*msg)) { @@ -484,6 +485,11 @@ int32_t qWorkerProcessCancelMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { msg->taskId = be64toh(msg->taskId); msg->refId = be64toh(msg->refId); + uint64_t sId = msg->sId; + uint64_t qId = msg->queryId; + uint64_t tId = msg->taskId; + int64_t rId = msg->refId; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0}; qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; @@ -493,6 +499,7 @@ int32_t qWorkerProcessCancelMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { _return: QW_ERR_RET(qwBuildAndSendCancelRsp(&qwMsg.connInfo, code)); + QW_SCH_TASK_DLOG("cancel rsp send, handle:%p, code:%x - %s", qwMsg.connInfo.handle, code, tstrerror(code)); return TSDB_CODE_SUCCESS; } @@ -525,7 +532,7 @@ int32_t qWorkerProcessDropMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; - QW_SCH_TASK_DLOG("processDrop start, node:%p", node); + QW_SCH_TASK_DLOG("processDrop start, node:%p, handle:%p", node, pMsg->handle); QW_ERR_RET(qwProcessDrop(QW_FPARAMS(), &qwMsg)); @@ -559,7 +566,7 @@ int32_t qWorkerProcessHbMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; - QW_SCH_DLOG("processHb start, node:%p", node); + QW_SCH_DLOG("processHb start, node:%p, handle:%p", node, pMsg->handle); QW_ERR_RET(qwProcessHb(mgmt, &qwMsg, &req)); diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 01cf0d8d8a..2bd18277bf 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -137,7 +137,18 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m return TSDB_CODE_SUCCESS; case TDMT_VND_RES_READY_RSP: reqMsgType = TDMT_VND_QUERY; - break; + if (lastMsgType != reqMsgType && -1 != lastMsgType) { + SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", (lastMsgType > 0 ? TMSG_INFO(lastMsgType) : "null"), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + return TSDB_CODE_SUCCESS; case TDMT_VND_CREATE_TABLE_RSP: case TDMT_VND_SUBMIT_RSP: case TDMT_VND_FETCH_RSP: @@ -1085,7 +1096,7 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in } pTask = *task; - SCH_TASK_DLOG("rsp msg received, type:%s, code:%s", TMSG_INFO(msgType), tstrerror(rspCode)); + SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode)); pTask->handle = pMsg->handle; SCH_ERR_JRET(schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode)); @@ -1174,6 +1185,8 @@ int32_t schHandleLinkBrokenCallback(void *param, const SDataBuf *pMsg, int32_t c SSchCallbackParamHeader *head = (SSchCallbackParamHeader *)param; rpcReleaseHandle(pMsg->handle, TAOS_CONN_CLIENT); + qDebug("handle %p is broken", pMsg->handle); + if (head->isHbParam) { SSchHbCallbackParam *hbParam = (SSchHbCallbackParam *)param; SSchTrans trans = {.transInst = hbParam->transport, .transHandle = NULL}; From 5e85c21f3674a0c0d113f17d639aa4e2fd6a519c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 16:12:14 +0800 Subject: [PATCH 09/59] feature/scheduler --- source/libs/qworker/src/qworkerMsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/qworker/src/qworkerMsg.c b/source/libs/qworker/src/qworkerMsg.c index b3921368e1..41b12ad552 100644 --- a/source/libs/qworker/src/qworkerMsg.c +++ b/source/libs/qworker/src/qworkerMsg.c @@ -74,7 +74,7 @@ int32_t qwBuildAndSendReadyRsp(SQWConnInfo *pConn, int32_t code) { SRpcMsg rpcRsp = { .msgType = TDMT_VND_RES_READY_RSP, .handle = pConn->handle, - .ahandle = pConn->ahandle, + .ahandle = NULL, .pCont = pRsp, .contLen = sizeof(*pRsp), .code = code, From 28c930f920407e6bd6c9dfe2aa4d261ae6a9a9a6 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 22 Mar 2022 16:19:55 +0800 Subject: [PATCH 10/59] handle except --- source/libs/transport/src/transCli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index ea73e07c80..958ffe2aa6 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -649,6 +649,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { cliSend(conn); } else { conn = cliCreateConn(pThrd); + transCtxMerge(&conn->ctx, &pCtx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); conn->hThrdIdx = pCtx->hThrdIdx; From 4b98359e9ac099e3c99f266bbfbeccf5d64a3242 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 16:51:45 +0800 Subject: [PATCH 11/59] feature/scheduler --- source/libs/qworker/src/qworker.c | 2 +- source/libs/scheduler/src/scheduler.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index ca0dd4a965..8476d3d3a3 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -1100,7 +1100,7 @@ int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_ERR_JRET(qwHandlePrePhaseEvents(QW_FPARAMS(), QW_PHASE_PRE_FETCH, &input, NULL)); QW_ERR_JRET(qwGetTaskCtx(QW_FPARAMS(), &ctx)); - + SOutputData sOutput = {0}; QW_ERR_JRET(qwGetResFromSink(QW_FPARAMS(), ctx, &dataLen, &rsp, &sOutput)); diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 2bd18277bf..dc064da3ff 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1852,7 +1852,7 @@ int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { epId.nodeId = addr->nodeId; memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); -#if 0 +#if 1 SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId)); if (NULL == hb) { bool exist = false; From 3c82cae1f1895b98f7dbb6cecf6d2a39194c5caf Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 22 Mar 2022 17:01:02 +0800 Subject: [PATCH 12/59] merge trans --- source/libs/transport/src/transCli.c | 2 +- source/libs/transport/src/transSrv.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 958ffe2aa6..59c4ffad06 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -265,7 +265,7 @@ _RETURN: void cliHandleExcept(SCliConn* pConn) { if (transQueueEmpty(&pConn->cliMsgs)) { - if (pConn->broken == true || CONN_NO_PERSIST_BY_APP(pConn)) { + if (pConn->broken == true && CONN_NO_PERSIST_BY_APP(pConn)) { transUnrefCliHandle(pConn); return; } diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 6b34f864ed..e60bcd66c3 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -291,7 +291,11 @@ void uvOnSendCb(uv_write_t* req, int status) { memset(&conn->regArg, 0, sizeof(conn->regArg)); } transQueuePop(&conn->srvMsgs); - free(msg); + tfree(msg); + msg = (SSrvMsg*)transQueuePop(&conn->srvMsgs); + if (msg != NULL) { + uvStartSendRespInternal(msg); + } } else { uvStartSendRespInternal(msg); } From 11bd7b19bec4252dabeef480a45ca89e70d0b43f Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 17:50:49 +0800 Subject: [PATCH 13/59] feature/scheduler --- source/libs/scheduler/src/scheduler.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index dc064da3ff..434f926ae7 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1158,6 +1158,8 @@ int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { SCH_RET(schUpdateHbConnection(&rsp.epId, &trans)); int32_t taskNum = (int32_t)taosArrayGetSize(rsp.taskStatus); + qDebug("%d task status in hb rsp, nodeId:%d, fqdn:%s, port:%d", taskNum, rsp.epId.nodeId, rsp.epId.ep.fqdn, rsp.epId.ep.port); + for (int32_t i = 0; i < taskNum; ++i) { STaskStatus *taskStatus = taosArrayGet(rsp.taskStatus, i); From 0ee40a21953ede4f60669964b7f47f947fa7191a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 22 Mar 2022 17:52:14 +0800 Subject: [PATCH 14/59] handle except --- source/libs/transport/src/transSrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index e60bcd66c3..b514bdba3f 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -558,7 +558,7 @@ static bool addHandleToWorkloop(void* arg) { // conn set QUEUE_INIT(&pThrd->conn); - pThrd->asyncPool = transCreateAsyncPool(pThrd->loop, 4, pThrd, uvWorkerAsyncCb); + pThrd->asyncPool = transCreateAsyncPool(pThrd->loop, 1, pThrd, uvWorkerAsyncCb); uv_read_start((uv_stream_t*)pThrd->pipe, uvAllocConnBufferCb, uvOnConnectionCb); return true; } From 97ad141579aae4f9190db05ff99f3d40249e8ff7 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 22 Mar 2022 19:00:55 +0800 Subject: [PATCH 15/59] feature/scheduler --- source/libs/scheduler/src/scheduler.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 434f926ae7..e74507bdf7 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1499,6 +1499,8 @@ int32_t schCloneSMsgSendInfo(SMsgSendInfo *pSrc, SMsgSendInfo **pDst) { SCH_ERR_JRET(schCloneCallbackParam(pSrc->param, (SSchCallbackParamHeader **)&dst->param)); + *pDst = dst; + return TSDB_CODE_SUCCESS; _return: From d595fe6ba1b70b24199fb26962e0bccb1242e03b Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 22 Mar 2022 20:54:31 +0800 Subject: [PATCH 16/59] add ahandle --- source/dnode/mgmt/container/src/dndInt.c | 2 +- source/libs/transport/inc/transComm.h | 18 ++++-- source/libs/transport/src/transCli.c | 74 ++++++++++++++++++------ source/libs/transport/src/transComm.c | 26 ++++++++- source/libs/transport/src/transSrv.c | 5 +- 5 files changed, 95 insertions(+), 30 deletions(-) diff --git a/source/dnode/mgmt/container/src/dndInt.c b/source/dnode/mgmt/container/src/dndInt.c index 8ad4351a88..dc1bde6b06 100644 --- a/source/dnode/mgmt/container/src/dndInt.c +++ b/source/dnode/mgmt/container/src/dndInt.c @@ -135,6 +135,6 @@ void dndProcessStartupReq(SDnode *pDnode, SRpcMsg *pReq) { dDebug("startup req is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished); - SRpcMsg rpcRsp = {.handle = pReq->handle, .pCont = pStartup, .contLen = sizeof(SStartupReq)}; + SRpcMsg rpcRsp = {.handle = pReq->handle, .pCont = pStartup, .contLen = sizeof(SStartupReq), .ahandle = NULL}; rpcSendResponse(&rpcRsp); } diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 32a0cf0d54..962acfee2c 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -158,7 +158,8 @@ typedef struct { char secured : 2; char spi : 2; - uint32_t code; // del later + uint64_t ahandle; // ahandle assigned by client + uint32_t code; // del later uint32_t msgType; int32_t msgLen; uint8_t content[0]; // message body starts from here @@ -296,20 +297,25 @@ void transQueueInit(STransQueue* queue, void (*free)(void* arg)); * if queue'size > 1, return false; else return true */ bool transQueuePush(STransQueue* queue, void* arg); +/* + * the size of queue + */ +int32_t transQueueSize(STransQueue* queue); /* * pop head from queue */ - void* transQueuePop(STransQueue* queue); /* - * get head from queue + * get ith from queue */ -void* transQueueGet(STransQueue* queue); - +void* transQueueGet(STransQueue* queue, int i); +/* + * rm ith from queue + */ +void* transQueueRm(STransQueue* queue, int i); /* * queue empty or not */ - bool transQueueEmpty(STransQueue* queue); /* * clear queue diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 59c4ffad06..8694d4098c 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -25,12 +25,11 @@ typedef struct SCliConn { void* hostThrd; SConnBuffer readBuf; void* data; - // SArray* cliMsgs; - STransQueue cliMsgs; - queue conn; - uint64_t expireTime; - int hThrdIdx; - STransCtx ctx; + STransQueue cliMsgs; + queue conn; + uint64_t expireTime; + int hThrdIdx; + STransCtx ctx; bool broken; // link broken or not ConnStatus status; // @@ -151,6 +150,22 @@ static void destroyThrdObj(SCliThrdObj* pThrd); } \ } while (0) +#define CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle) \ + do { \ + int i = 0, sz = transQueueSize(&conn->cliMsgs); \ + for (; i < sz; i++) { \ + pMsg = transQueueGet(&conn->cliMsgs, i); \ + if (pMsg != NULL && (uint64_t)pMsg->ctx->ahandle == ahandle) { \ + break; \ + } \ + } \ + if (i == sz) { \ + pMsg = NULL; \ + } else { \ + pMsg = transQueueRm(&conn->cliMsgs, i); \ + } \ + } while (0) + #define CONN_HANDLE_THREAD_QUIT(thrd) \ do { \ if (thrd->quit) { \ @@ -205,16 +220,36 @@ void cliHandleResp(SCliConn* conn) { CONN_SHOULD_RELEASE(conn, pHead); - SCliMsg* pMsg = transQueuePop(&conn->cliMsgs); + SCliMsg* pMsg = NULL; + STransConnCtx* pCtx = NULL; - STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; - if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { - transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); - if (transMsg.ahandle == NULL) { - transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + if (CONN_NO_PERSIST_BY_APP(conn)) { + pMsg = transQueuePop(&conn->cliMsgs); + pCtx = pMsg ? pMsg->ctx : NULL; + if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { + transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); + if (transMsg.ahandle == NULL) { + transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + } + tDebug("cli conn %p construct ahandle %p, persist: 0", conn, transMsg.ahandle); + } else { + transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; + tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); } } else { - transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; + uint64_t ahandle = (uint64_t)pHead->ahandle; + CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); + if (pMsg == NULL) { + transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); + if (transMsg.ahandle == NULL) { + transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + } + tDebug("cli conn %p construct ahandle %p, persist: 1", conn, transMsg.ahandle); + } else { + pCtx = pMsg ? pMsg->ctx : NULL; + transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; + tDebug("cli conn %p get ahandle %p, persist: 1", conn, transMsg.ahandle); + } } // buf's mem alread translated to transMsg.pCont transClearBuffer(&conn->readBuf); @@ -259,8 +294,6 @@ void cliHandleResp(SCliConn* conn) { if (!uv_is_active((uv_handle_t*)&pThrd->timer) && pTransInst->idleTime > 0) { // uv_timer_start((uv_timer_t*)&pThrd->timer, cliTimeoutCb, CONN_PERSIST_TIME(pRpc->idleTime) / 2, 0); } -_RETURN: - return; } void cliHandleExcept(SCliConn* pConn) { @@ -282,11 +315,14 @@ void cliHandleExcept(SCliConn* pConn) { transMsg.code = TSDB_CODE_RPC_NETWORK_UNAVAIL; transMsg.msgType = pMsg ? pMsg->msg.msgType + 1 : 0; transMsg.ahandle = NULL; + transMsg.handle = pConn; if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(pConn)) { transMsg.ahandle = transCtxDumpVal(&pConn->ctx, transMsg.msgType); + tDebug("cli conn %p construct msgType %s ahandle %p", pConn, TMSG_INFO(transMsg.msgType), transMsg.ahandle); if (transMsg.ahandle == NULL) { transMsg.ahandle = transCtxDumpBrokenlinkVal(&pConn->ctx, (int32_t*)&(transMsg.msgType)); + tDebug("cli conn %p construct brokenlink ahandle %p", pConn, transMsg.ahandle); } } else { transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; @@ -472,7 +508,7 @@ static void cliDestroy(uv_handle_t* handle) { static bool cliHandleNoResp(SCliConn* conn) { bool res = false; if (!transQueueEmpty(&conn->cliMsgs)) { - SCliMsg* pMsg = transQueueGet(&conn->cliMsgs); + SCliMsg* pMsg = transQueueGet(&conn->cliMsgs, 0); if (REQUEST_NO_RESP(&pMsg->msg)) { transQueuePop(&conn->cliMsgs); // taosArrayRemove(msgs, 0); @@ -510,7 +546,7 @@ void cliSend(SCliConn* pConn) { // assert(taosArrayGetSize(pConn->cliMsgs) > 0); assert(!transQueueEmpty(&pConn->cliMsgs)); - SCliMsg* pCliMsg = transQueueGet(&pConn->cliMsgs); + SCliMsg* pCliMsg = transQueueGet(&pConn->cliMsgs, 0); STransConnCtx* pCtx = pCliMsg->ctx; SCliThrdObj* pThrd = pConn->hostThrd; @@ -522,7 +558,9 @@ void cliSend(SCliConn* pConn) { pMsg->contLen = 0; } STransMsgHead* pHead = transHeadFromCont(pMsg->pCont); - int msgLen = transMsgLenFromCont(pMsg->contLen); + pHead->ahandle = (uint64_t)pCtx->ahandle; + + int msgLen = transMsgLenFromCont(pMsg->contLen); if (!pConn->secured) { char* buf = calloc(1, msgLen + sizeof(STransUserMsg)); diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 5684c332c0..87355ac8d0 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -305,14 +305,34 @@ void* transQueuePop(STransQueue* queue) { taosArrayRemove(queue->q, 0); return ptr; } - -void* transQueueGet(STransQueue* queue) { +int32_t transQueueSize(STransQueue* queue) { + // Get size + return taosArrayGetSize(queue->q); +} +void* transQueueGet(STransQueue* queue, int i) { if (taosArrayGetSize(queue->q) == 0) { return NULL; } - void* ptr = taosArrayGetP(queue->q, 0); + if (i >= taosArrayGetSize(queue->q)) { + return NULL; + } + + void* ptr = taosArrayGetP(queue->q, i); return ptr; } + +void* transQueueRm(STransQueue* queue, int i) { + if (taosArrayGetSize(queue->q) == 0) { + return NULL; + } + if (i >= taosArrayGetSize(queue->q)) { + return NULL; + } + void* ptr = taosArrayGetP(queue->q, i); + taosArrayRemove(queue->q, i); + return ptr; +} + bool transQueueEmpty(STransQueue* queue) { // return taosArrayGetSize(queue->q) == 0; diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index b514bdba3f..3ff893b365 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -190,7 +190,7 @@ static void uvHandleReq(SSrvConn* pConn) { transMsg.pCont = pHead->content; transMsg.msgType = pHead->msgType; transMsg.code = pHead->code; - transMsg.ahandle = NULL; + transMsg.ahandle = (void*)pHead->ahandle; transMsg.handle = NULL; transClearBuffer(&pConn->readBuf); @@ -280,7 +280,7 @@ void uvOnSendCb(uv_write_t* req, int status) { destroySmsg(msg); // send second data, just use for push if (!transQueueEmpty(&conn->srvMsgs)) { - msg = (SSrvMsg*)transQueueGet(&conn->srvMsgs); + msg = (SSrvMsg*)transQueueGet(&conn->srvMsgs, 0); if (msg->type == Register && conn->status == ConnAcquire) { conn->regArg.notifyCount = 0; conn->regArg.init = 1; @@ -326,6 +326,7 @@ static void uvPrepareSendData(SSrvMsg* smsg, uv_buf_t* wb) { pMsg->contLen = 0; } STransMsgHead* pHead = transHeadFromCont(pMsg->pCont); + pHead->ahandle = (uint64_t)pMsg->ahandle; // pHead->secured = pMsg->code == 0 ? 1 : 0; // if (!pConn->secured) { From dbe325e11803bae31d15b151b508913718cff29f Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 22 Mar 2022 22:27:46 +0800 Subject: [PATCH 17/59] add ahandle --- source/libs/transport/src/transSrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 3ff893b365..691c572022 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -559,7 +559,7 @@ static bool addHandleToWorkloop(void* arg) { // conn set QUEUE_INIT(&pThrd->conn); - pThrd->asyncPool = transCreateAsyncPool(pThrd->loop, 1, pThrd, uvWorkerAsyncCb); + pThrd->asyncPool = transCreateAsyncPool(pThrd->loop, 5, pThrd, uvWorkerAsyncCb); uv_read_start((uv_stream_t*)pThrd->pipe, uvAllocConnBufferCb, uvOnConnectionCb); return true; } From 2126e4c2a63f412e62ebabf59721b7eda2d22a3d Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 23 Mar 2022 09:10:46 +0800 Subject: [PATCH 18/59] feature/scheduler --- source/libs/qworker/src/qworker.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index fc3f97ec88..941deb03cd 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -1048,6 +1048,8 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { bool qComplete = (DS_BUF_EMPTY == sOutput.bufStatus && sOutput.queryEnd); qwBuildFetchRsp(rsp, &sOutput, dataLen, qComplete); atomic_store_8((int8_t*)&ctx->queryEnd, qComplete); + + qwMsg->connInfo = ctx->connInfo; QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_FETCH); @@ -1068,6 +1070,8 @@ _return: QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_FETCH); qwFreeFetchRsp(rsp); rsp = NULL; + + qwMsg->connInfo = ctx->connInfo; qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, 0, code); QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, tstrerror(code), 0); } @@ -1107,6 +1111,7 @@ int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg) { if (NULL == rsp) { atomic_store_ptr(&ctx->connInfo.handle, qwMsg->connInfo.handle); atomic_store_ptr(&ctx->connInfo.ahandle, qwMsg->connInfo.ahandle); + QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_FETCH); } else { bool qComplete = (DS_BUF_EMPTY == sOutput.bufStatus && sOutput.queryEnd); From 3a01542c5a78d8fea42137076c05b44aa78e0355 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 23 Mar 2022 11:37:34 +0800 Subject: [PATCH 19/59] merge trans --- source/dnode/mgmt/container/src/dndInt.c | 3 +- source/dnode/mgmt/dnode/src/dmWorker.c | 4 +- source/libs/transport/inc/transComm.h | 2 +- source/libs/transport/src/transCli.c | 57 +++++++++++++++++------- source/libs/transport/src/transSrv.c | 40 ++++++++--------- 5 files changed, 65 insertions(+), 41 deletions(-) diff --git a/source/dnode/mgmt/container/src/dndInt.c b/source/dnode/mgmt/container/src/dndInt.c index dc1bde6b06..ca80dcb9e4 100644 --- a/source/dnode/mgmt/container/src/dndInt.c +++ b/source/dnode/mgmt/container/src/dndInt.c @@ -135,6 +135,7 @@ void dndProcessStartupReq(SDnode *pDnode, SRpcMsg *pReq) { dDebug("startup req is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished); - SRpcMsg rpcRsp = {.handle = pReq->handle, .pCont = pStartup, .contLen = sizeof(SStartupReq), .ahandle = NULL}; + SRpcMsg rpcRsp = { + .handle = pReq->handle, .pCont = pStartup, .contLen = sizeof(SStartupReq), .ahandle = pReq->ahandle}; rpcSendResponse(&rpcRsp); } diff --git a/source/dnode/mgmt/dnode/src/dmWorker.c b/source/dnode/mgmt/dnode/src/dmWorker.c index b62c18655a..1390298567 100644 --- a/source/dnode/mgmt/dnode/src/dmWorker.c +++ b/source/dnode/mgmt/dnode/src/dmWorker.c @@ -23,7 +23,7 @@ static void *dmThreadRoutine(void *param) { SDnodeMgmt *pMgmt = param; - SDnode *pDnode = pMgmt->pDnode; + SDnode * pDnode = pMgmt->pDnode; int64_t lastStatusTime = taosGetTimestampMs(); int64_t lastMonitorTime = lastStatusTime; @@ -55,7 +55,7 @@ static void *dmThreadRoutine(void *param) { static void dmProcessQueue(SQueueInfo *pInfo, SNodeMsg *pMsg) { SDnodeMgmt *pMgmt = pInfo->ahandle; - SDnode *pDnode = pMgmt->pDnode; + SDnode * pDnode = pMgmt->pDnode; SRpcMsg *pRpc = &pMsg->rpcMsg; int32_t code = -1; dTrace("msg:%p, will be processed in dnode queue", pMsg); diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 962acfee2c..bd915d6ebe 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -183,7 +183,7 @@ typedef struct { #pragma pack(pop) typedef enum { Normal, Quit, Release, Register } STransMsgType; -typedef enum { ConnNormal, ConnAcquire, ConnRelease, ConnBroken } ConnStatus; +typedef enum { ConnNormal, ConnAcquire, ConnRelease, ConnBroken, ConnInPool } ConnStatus; #define container_of(ptr, type, member) ((type*)((char*)(ptr)-offsetof(type, member))) #define RPC_RESERVE_SIZE (sizeof(STranConnCtx)) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 8694d4098c..7150ec470e 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -53,6 +53,7 @@ typedef struct SCliMsg { queue q; uint64_t st; STransMsgType type; + int sent; //(0: no send, 1: alread sent) } SCliMsg; typedef struct SCliThrdObj { @@ -135,6 +136,8 @@ static void destroyThrdObj(SCliThrdObj* pThrd); #define CONN_SHOULD_RELEASE(conn, head) \ do { \ if ((head)->release == 1 && (head->msgLen) == sizeof(*head)) { \ + uint64_t ahandle = head->ahandle; \ + CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); \ conn->status = ConnRelease; \ transClearBuffer(&conn->readBuf); \ transFreeMsg(transContFromHead((char*)head)); \ @@ -146,6 +149,7 @@ static void destroyThrdObj(SCliThrdObj* pThrd); SCliThrdObj* thrd = conn->hostThrd; \ addConnToPool(thrd->pool, conn); \ } \ + destroyCmsg(pMsg); \ return; \ } \ } while (0) @@ -198,8 +202,18 @@ static void* cliWorkThread(void* arg); bool cliMaySendCachedMsg(SCliConn* conn) { if (!transQueueEmpty(&conn->cliMsgs)) { + SCliMsg* pCliMsg = NULL; + int i = 0; + do { + pCliMsg = transQueueGet(&conn->cliMsgs, i++); + if (pCliMsg && 0 == pCliMsg->sent) { + break; + } + } while (pCliMsg != NULL); + if (pCliMsg == NULL) { + return false; + } cliSend(conn); - return true; } return false; } @@ -218,33 +232,27 @@ void cliHandleResp(SCliConn* conn) { transMsg.msgType = pHead->msgType; transMsg.ahandle = NULL; - CONN_SHOULD_RELEASE(conn, pHead); - SCliMsg* pMsg = NULL; STransConnCtx* pCtx = NULL; + CONN_SHOULD_RELEASE(conn, pHead); if (CONN_NO_PERSIST_BY_APP(conn)) { pMsg = transQueuePop(&conn->cliMsgs); + /// uint64_t ahandle = (uint64_t)pHead->ahandle; + // CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); pCtx = pMsg ? pMsg->ctx : NULL; - if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { - transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); - if (transMsg.ahandle == NULL) { - transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); - } - tDebug("cli conn %p construct ahandle %p, persist: 0", conn, transMsg.ahandle); - } else { - transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; - tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); - } + transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; + tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); } else { uint64_t ahandle = (uint64_t)pHead->ahandle; CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); if (pMsg == NULL) { transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); + tDebug("cli conn %p construct ahandle %p by %d, persist: 1", conn, transMsg.ahandle, transMsg.msgType); if (transMsg.ahandle == NULL) { + tDebug("cli conn %p construct ahandle %p due brokenlink, persist: 1", conn, transMsg.ahandle); transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); } - tDebug("cli conn %p construct ahandle %p, persist: 1", conn, transMsg.ahandle); } else { pCtx = pMsg ? pMsg->ctx : NULL; transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; @@ -419,7 +427,7 @@ static void addConnToPool(void* pool, SCliConn* conn) { conn->expireTime = taosGetTimestampMs() + CONN_PERSIST_TIME(pTransInst->idleTime); transCtxCleanup(&conn->ctx); transQueueClear(&conn->cliMsgs); - conn->status = ConnNormal; + conn->status = ConnInPool; char key[128] = {0}; tstrncpy(key, conn->ip, strlen(conn->ip)); @@ -546,7 +554,21 @@ void cliSend(SCliConn* pConn) { // assert(taosArrayGetSize(pConn->cliMsgs) > 0); assert(!transQueueEmpty(&pConn->cliMsgs)); - SCliMsg* pCliMsg = transQueueGet(&pConn->cliMsgs, 0); + + SCliMsg* pCliMsg = NULL; + int i = 0; + do { + pCliMsg = transQueueGet(&pConn->cliMsgs, i++); + if (pCliMsg && 0 == pCliMsg->sent) { + break; + } + } while (pCliMsg != NULL); + if (pCliMsg == NULL) { + return; + } + + pCliMsg->sent = 1; + STransConnCtx* pCtx = pCliMsg->ctx; SCliThrdObj* pThrd = pConn->hostThrd; @@ -558,7 +580,7 @@ void cliSend(SCliConn* pConn) { pMsg->contLen = 0; } STransMsgHead* pHead = transHeadFromCont(pMsg->pCont); - pHead->ahandle = (uint64_t)pCtx->ahandle; + pHead->ahandle = pCtx != NULL ? (uint64_t)pCtx->ahandle : 0; int msgLen = transMsgLenFromCont(pMsg->contLen); @@ -868,6 +890,7 @@ void transReleaseCliHandle(void* handle) { STransMsg tmsg = {.handle = handle}; SCliMsg* cmsg = calloc(1, sizeof(SCliMsg)); + cmsg->msg = tmsg; cmsg->type = Release; diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 691c572022..15dcc29232 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -93,25 +93,25 @@ typedef struct SServerObj { static const char* notify = "a"; -#define CONN_SHOULD_RELEASE(conn, head) \ - do { \ - if ((head)->release == 1 && (head->msgLen) == sizeof(*head)) { \ - conn->status = ConnRelease; \ - transClearBuffer(&conn->readBuf); \ - transFreeMsg(transContFromHead((char*)head)); \ - tTrace("server conn %p received release request", conn); \ - \ - STransMsg tmsg = {.handle = (void*)conn, .code = 0}; \ - SSrvMsg* srvMsg = calloc(1, sizeof(SSrvMsg)); \ - srvMsg->msg = tmsg; \ - srvMsg->type = Release; \ - srvMsg->pConn = conn; \ - if (!transQueuePush(&conn->srvMsgs, srvMsg)) { \ - return; \ - } \ - uvStartSendRespInternal(srvMsg); \ - return; \ - } \ +#define CONN_SHOULD_RELEASE(conn, head) \ + do { \ + if ((head)->release == 1 && (head->msgLen) == sizeof(*head)) { \ + conn->status = ConnRelease; \ + transClearBuffer(&conn->readBuf); \ + transFreeMsg(transContFromHead((char*)head)); \ + tTrace("server conn %p received release request", conn); \ + \ + STransMsg tmsg = {.code = 0, .handle = (void*)conn, .ahandle = NULL}; \ + SSrvMsg* srvMsg = calloc(1, sizeof(SSrvMsg)); \ + srvMsg->msg = tmsg; \ + srvMsg->type = Release; \ + srvMsg->pConn = conn; \ + if (!transQueuePush(&conn->srvMsgs, srvMsg)) { \ + return; \ + } \ + uvStartSendRespInternal(srvMsg); \ + return; \ + } \ } while (0) static void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf); @@ -823,7 +823,7 @@ void transReleaseSrvHandle(void* handle) { SSrvConn* pConn = handle; SWorkThrdObj* pThrd = pConn->hostThrd; - STransMsg tmsg = {.handle = handle, .code = 0}; + STransMsg tmsg = {.code = 0, .handle = handle, .ahandle = NULL}; SSrvMsg* srvMsg = calloc(1, sizeof(SSrvMsg)); srvMsg->msg = tmsg; From c80da5f7186aba6dce193f520c20b376c0cb55c2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 23 Mar 2022 13:40:34 +0800 Subject: [PATCH 20/59] feature/scheduler --- source/libs/qworker/src/qworker.c | 4 ++-- source/libs/scheduler/src/scheduler.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 941deb03cd..e61123ef91 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -166,7 +166,7 @@ int32_t qwSetTaskStatus(QW_FPARAMS_DEF, SQWTaskStatus *task, int8_t status) { } -int32_t qwAddSchedulerImpl(SQWorkerMgmt *mgmt, uint64_t sId, int32_t rwType, SQWSchStatus **sch) { +int32_t qwAddSchedulerImpl(SQWorkerMgmt *mgmt, uint64_t sId, int32_t rwType) { SQWSchStatus newSch = {0}; newSch.tasksHash = taosHashInit(mgmt->cfg.maxSchTaskNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK); if (NULL == newSch.tasksHash) { @@ -200,7 +200,7 @@ int32_t qwAcquireSchedulerImpl(SQWorkerMgmt *mgmt, uint64_t sId, int32_t rwType, QW_UNLOCK(rwType, &mgmt->schLock); if (QW_NOT_EXIST_ADD == nOpt) { - QW_ERR_RET(qwAddSchedulerImpl(mgmt, sId, rwType, sch)); + QW_ERR_RET(qwAddSchedulerImpl(mgmt, sId, rwType)); nOpt = QW_NOT_EXIST_RET_ERR; diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index e74507bdf7..529f27188e 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -125,7 +125,7 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m case TDMT_SCH_LINK_BROKEN: return TSDB_CODE_SUCCESS; case TDMT_VND_QUERY_RSP: // query_rsp may be processed later than ready_rsp - if (lastMsgType != reqMsgType) { + if (lastMsgType != reqMsgType && -1 != lastMsgType && TDMT_VND_FETCH != lastMsgType) { SCH_TASK_DLOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); } @@ -1776,6 +1776,7 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, pMsg->sId = htobe64(schMgmt.sId); pMsg->queryId = htobe64(pJob->queryId); pMsg->taskId = htobe64(pTask->taskId); + break; } case TDMT_VND_DROP_TASK: { From eb081fc2b6cc17de862e21d931ec2d9c06f91d83 Mon Sep 17 00:00:00 2001 From: ubuntu Date: Wed, 23 Mar 2022 14:55:43 +0800 Subject: [PATCH 21/59] handle except --- source/libs/transport/src/transCli.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 7150ec470e..5617b0aa37 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -238,11 +238,17 @@ void cliHandleResp(SCliConn* conn) { if (CONN_NO_PERSIST_BY_APP(conn)) { pMsg = transQueuePop(&conn->cliMsgs); - /// uint64_t ahandle = (uint64_t)pHead->ahandle; - // CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); - pCtx = pMsg ? pMsg->ctx : NULL; - transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; - tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); + pCtx = pMsg ? pMsg->ctx: NULL; + if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { + transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); + if (transMsg.ahandle == NULL) { + transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + } + tDebug("cli conn %p construct ahandle %p, persist: 0", conn, transMsg.ahandle); + } else { + transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; + tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); + } } else { uint64_t ahandle = (uint64_t)pHead->ahandle; CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); @@ -414,8 +420,8 @@ static SCliConn* getConnFromPool(void* pool, char* ip, uint32_t port) { } queue* h = QUEUE_HEAD(&plist->conn); QUEUE_REMOVE(h); - SCliConn* conn = QUEUE_DATA(h, SCliConn, conn); + conn->status = ConnNormal; QUEUE_INIT(&conn->conn); return conn; } From 0336c44d6c9892f40a09219d2d3f741376e842ce Mon Sep 17 00:00:00 2001 From: ubuntu Date: Wed, 23 Mar 2022 15:20:28 +0800 Subject: [PATCH 22/59] handle except --- source/libs/transport/src/transCli.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 5617b0aa37..d0cb9af710 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -320,7 +320,7 @@ void cliHandleExcept(SCliConn* pConn) { SCliThrdObj* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - do { + while(!transQueueEmpty(&pConn->cliMsgs)){ SCliMsg* pMsg = transQueuePop(&pConn->cliMsgs); STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; @@ -343,16 +343,19 @@ void cliHandleExcept(SCliConn* pConn) { } if (pCtx == NULL || pCtx->pSem == NULL) { - tTrace("%s cli conn %p handle resp", pTransInst->label, pConn); + tTrace("%s cli conn %p handle except", pTransInst->label, pConn); + if (transMsg.ahandle == NULL) { + continue; + } (pTransInst->cfp)(pTransInst->parent, &transMsg, NULL); } else { - tTrace("%s cli conn(sync) %p handle resp", pTransInst->label, pConn); + tTrace("%s cli conn(sync) %p handle except", pTransInst->label, pConn); memcpy((char*)(pCtx->pRsp), (char*)(&transMsg), sizeof(transMsg)); tsem_post(pCtx->pSem); } destroyCmsg(pMsg); tTrace("%s cli conn %p start to destroy", CONN_GET_INST_LABEL(pConn), pConn); - } while (!transQueueEmpty(&pConn->cliMsgs)); + }; transUnrefCliHandle(pConn); } From f6b5bfae9c28d915d5687340d0dd08bd6372ca65 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 23 Mar 2022 16:03:36 +0800 Subject: [PATCH 23/59] feature/scheduler --- source/libs/scheduler/src/scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 529f27188e..ea12ba25d7 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1155,7 +1155,7 @@ int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { trans.transInst = pParam->transport; trans.transHandle = pMsg->handle; - SCH_RET(schUpdateHbConnection(&rsp.epId, &trans)); + SCH_ERR_RET(schUpdateHbConnection(&rsp.epId, &trans)); int32_t taskNum = (int32_t)taosArrayGetSize(rsp.taskStatus); qDebug("%d task status in hb rsp, nodeId:%d, fqdn:%s, port:%d", taskNum, rsp.epId.nodeId, rsp.epId.ep.fqdn, rsp.epId.ep.port); From ed5b28329110a49a7668779d4142205ab9fb0da5 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 23 Mar 2022 17:19:04 +0800 Subject: [PATCH 24/59] feature/scheduler --- include/libs/transport/trpc.h | 14 ++++---- source/client/src/clientEnv.c | 4 +++ source/client/src/clientImpl.c | 2 ++ source/libs/scheduler/inc/schedulerInt.h | 1 + source/libs/scheduler/src/scheduler.c | 36 ++++++++++++-------- source/libs/transport/src/transComm.c | 12 +++---- source/libs/transport/test/transportTests.cc | 18 ++++------ tools/shell/src/shellEngine.c | 1 + 8 files changed, 48 insertions(+), 40 deletions(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index c2cce3a05d..a506c6fe98 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -81,16 +81,16 @@ typedef struct SRpcInit { } SRpcInit; typedef struct { - void * val; - int32_t len; - void (*free)(void *arg); + void *val; + int32_t (*clone)(void *src, void **dst); + void (*free)(void *arg); } SRpcCtxVal; typedef struct { - int32_t msgType; - void * val; - int32_t len; - void (*free)(void *arg); + int32_t msgType; + void *val; + int32_t (*clone)(void *src, void **dst); + void (*free)(void *arg); } SRpcBrokenlinkVal; typedef struct { diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 6ebf9e71e0..fd6a72962d 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -185,6 +185,10 @@ static void doDestroyRequest(void *p) { doFreeReqResultInfo(&pRequest->body.resInfo); qDestroyQueryPlan(pRequest->body.pDag); + if (pRequest->body.queryJob != 0) { + schedulerFreeJob(pRequest->body.queryJob); + } + if (pRequest->body.showInfo.pArray != NULL) { taosArrayDestroy(pRequest->body.showInfo.pArray); } diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index a7f15cbe45..70cd00ff11 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -330,6 +330,8 @@ SRequestObj* execQuery(STscObj* pTscObj, const char* sql, int sqlLen) { pRequest->code = code; break; } + + destroyRequest(pRequest); } return pRequest; diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index fe27b85b40..9d973e4437 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -241,6 +241,7 @@ int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask); int32_t schFetchFromRemote(SSchJob *pJob); int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode); int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId); +int32_t schCloneSMsgSendInfo(void *src, void **dst); #ifdef __cplusplus diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index ea12ba25d7..ca471326e0 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1172,6 +1172,8 @@ int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { } // TODO + + SCH_JOB_DLOG("TID:0x%" PRIx64 " task status in server: %s", taskStatus->taskId, jobTaskStatusStr(taskStatus->status)); schReleaseJob(taskStatus->refId); } @@ -1309,7 +1311,7 @@ int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal * brokenVal->msgType = msgType; brokenVal->val = pMsgSendInfo; - brokenVal->len = sizeof(SMsgSendInfo); + brokenVal->clone = schCloneSMsgSendInfo; brokenVal->free = schFreeRpcCtxVal; return TSDB_CODE_SUCCESS; @@ -1357,7 +1359,7 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { pMsgSendInfo->param = param; pMsgSendInfo->fp = fp; - SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .free = schFreeRpcCtxVal}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1414,7 +1416,7 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { pMsgSendInfo->param = param; pMsgSendInfo->fp = fp; - SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .len = sizeof(SMsgSendInfo), .free = schFreeRpcCtxVal}; + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .free = schFreeRpcCtxVal}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1486,26 +1488,27 @@ int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHe return TSDB_CODE_SUCCESS; } -int32_t schCloneSMsgSendInfo(SMsgSendInfo *pSrc, SMsgSendInfo **pDst) { +int32_t schCloneSMsgSendInfo(void *src, void **dst) { + SMsgSendInfo *pSrc = src; int32_t code = 0; - SMsgSendInfo *dst = malloc(sizeof(*pSrc)); - if (NULL == dst) { + SMsgSendInfo *pDst = malloc(sizeof(*pSrc)); + if (NULL == pDst) { qError("malloc SMsgSendInfo for rpcCtx failed, len:%d", (int32_t)sizeof(*pSrc)); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - memcpy(dst, pSrc, sizeof(*pSrc)); - dst->param = NULL; + memcpy(pDst, pSrc, sizeof(*pSrc)); + pDst->param = NULL; - SCH_ERR_JRET(schCloneCallbackParam(pSrc->param, (SSchCallbackParamHeader **)&dst->param)); + SCH_ERR_JRET(schCloneCallbackParam(pSrc->param, (SSchCallbackParamHeader **)&pDst->param)); - *pDst = dst; + *dst = pDst; return TSDB_CODE_SUCCESS; _return: - tfree(dst); + tfree(pDst); SCH_RET(code); } @@ -1514,7 +1517,7 @@ int32_t schCloneHbRpcCtx(SRpcCtx *pSrc, SRpcCtx *pDst) { memcpy(&pDst->brokenVal, &pSrc->brokenVal, sizeof(pSrc->brokenVal)); pDst->brokenVal.val = NULL; - SCH_ERR_RET(schCloneSMsgSendInfo(pSrc->brokenVal.val, (SMsgSendInfo **)&pDst->brokenVal.val)); + SCH_ERR_RET(schCloneSMsgSendInfo(pSrc->brokenVal.val, &pDst->brokenVal.val)); pDst->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); if (NULL == pDst->args) { @@ -1528,9 +1531,12 @@ int32_t schCloneHbRpcCtx(SRpcCtx *pSrc, SRpcCtx *pDst) { SRpcCtxVal *pVal = (SRpcCtxVal *)pIter; int32_t *msgType = taosHashGetKey(pIter, NULL); - SCH_ERR_JRET(schCloneSMsgSendInfo(pVal->val, (SMsgSendInfo **)&dst.val)); + dst = *pVal; + dst.val = NULL; - if (taosHashPut(pDst->args, msgType, sizeof(*msgType), pVal, sizeof(*pVal))) { + SCH_ERR_JRET(schCloneSMsgSendInfo(pVal->val, &dst.val)); + + if (taosHashPut(pDst->args, msgType, sizeof(*msgType), &dst, sizeof(dst))) { qError("taosHashPut msg %d to rpcCtx failed", *msgType); (*dst.free)(dst.val); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -2047,7 +2053,7 @@ void schFreeJobImpl(void *job) { taosArrayDestroy(pJob->nodeList); tfree(pJob->resData); - tfree(pJob); + free(pJob); qDebug("QID:0x%" PRIx64 " job freed, refId:%" PRIx64 ", pointer:%p", queryId, refId, pJob); } diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 87355ac8d0..6833594e7d 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -273,17 +273,17 @@ void* transCtxDumpVal(STransCtx* ctx, int32_t key) { if (cVal == NULL) { return NULL; } - char* ret = calloc(1, cVal->len); - memcpy(ret, (char*)cVal->val, cVal->len); - return (void*)ret; + void *ret = NULL; + (*cVal->clone)(cVal->val, &ret); + return ret; } void* transCtxDumpBrokenlinkVal(STransCtx* ctx, int32_t* msgType) { - char* ret = calloc(1, ctx->brokenVal.len); + void *ret = NULL; + (*ctx->brokenVal.clone)(ctx->brokenVal.val, &ret); - memcpy(ret, (char*)(ctx->brokenVal.val), ctx->brokenVal.len); *msgType = ctx->brokenVal.msgType; - return (void*)ret; + return ret; } void transQueueInit(STransQueue* queue, void (*free)(void* arg)) { diff --git a/source/libs/transport/test/transportTests.cc b/source/libs/transport/test/transportTests.cc index 65d9302994..ad2da87435 100644 --- a/source/libs/transport/test/transportTests.cc +++ b/source/libs/transport/test/transportTests.cc @@ -156,17 +156,15 @@ TEST_F(TransCtxEnv, mergeTest) { STransCtx *src = (STransCtx *)calloc(1, sizeof(STransCtx)); transCtxInit(src); { - STransCtxVal val1 = {.val = NULL, .len = 0, .free = free}; + STransCtxVal val1 = {.val = NULL, .free = free}; val1.val = malloc(12); - val1.len = 12; taosHashPut(src->args, &key, sizeof(key), &val1, sizeof(val1)); key++; } { - STransCtxVal val1 = {.val = NULL, .len = 0, .free = free}; + STransCtxVal val1 = {.val = NULL, .free = free}; val1.val = malloc(12); - val1.len = 12; taosHashPut(src->args, &key, sizeof(key), &val1, sizeof(val1)); key++; } @@ -178,17 +176,15 @@ TEST_F(TransCtxEnv, mergeTest) { STransCtx *src = (STransCtx *)calloc(1, sizeof(STransCtx)); transCtxInit(src); { - STransCtxVal val1 = {.val = NULL, .len = 0, .free = free}; + STransCtxVal val1 = {.val = NULL, .free = free}; val1.val = malloc(12); - val1.len = 12; taosHashPut(src->args, &key, sizeof(key), &val1, sizeof(val1)); key++; } { - STransCtxVal val1 = {.val = NULL, .len = 0, .free = free}; + STransCtxVal val1 = {.val = NULL, .free = free}; val1.val = malloc(12); - val1.len = 12; taosHashPut(src->args, &key, sizeof(key), &val1, sizeof(val1)); key++; } @@ -202,19 +198,17 @@ TEST_F(TransCtxEnv, mergeTest) { STransCtx *src = (STransCtx *)calloc(1, sizeof(STransCtx)); transCtxInit(src); { - STransCtxVal val1 = {.val = NULL, .len = 0, .free = free}; + STransCtxVal val1 = {.val = NULL, .free = free}; val1.val = calloc(1, 11); memcpy(val1.val, val.c_str(), val.size()); - val1.len = 11; taosHashPut(src->args, &key, sizeof(key), &val1, sizeof(val1)); key++; } { - STransCtxVal val1 = {.val = NULL, .len = 0, .free = free}; + STransCtxVal val1 = {.val = NULL, .free = free}; val1.val = calloc(1, 11); memcpy(val1.val, val.c_str(), val.size()); - val1.len = 11; taosHashPut(src->args, &key, sizeof(key), &val1, sizeof(val1)); key++; } diff --git a/tools/shell/src/shellEngine.c b/tools/shell/src/shellEngine.c index 1b35afb57d..c8fb901c0d 100644 --- a/tools/shell/src/shellEngine.c +++ b/tools/shell/src/shellEngine.c @@ -358,6 +358,7 @@ void shellRunCommandOnServer(TAOS *con, char command[]) { } else { printf("Query interrupted (%s), %d row(s) in set (%.6fs)\n", taos_errstr(pSql), numOfRows, (et - st) / 1E6); } + taos_free_result(pSql); } else { int num_rows_affacted = taos_affected_rows(pSql); taos_free_result(pSql); From e11d53f5ca0b000bc75f50b8a6f5b3ebf8b44809 Mon Sep 17 00:00:00 2001 From: ubuntu Date: Wed, 23 Mar 2022 17:55:19 +0800 Subject: [PATCH 25/59] handle except --- source/libs/transport/src/transComm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 6833594e7d..2d6485b346 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -240,6 +240,7 @@ void transCtxCleanup(STransCtx* ctx) { } taosHashCleanup(ctx->args); + ctx->args = NULL; } void transCtxMerge(STransCtx* dst, STransCtx* src) { From 4cd1d75a416149eaf1f1b6e1910f138422f1ac2a Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 23 Mar 2022 18:53:25 +0800 Subject: [PATCH 26/59] feature/scheduler --- source/libs/scheduler/src/scheduler.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index ca471326e0..5a1a8581b0 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1143,14 +1143,13 @@ int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { } SSchedulerHbRsp rsp = {0}; - - SSchHbCallbackParam *pParam = (SSchHbCallbackParam *)param; - if (tDeserializeSSchedulerHbRsp(pMsg->pData, pMsg->len, &rsp)) { qError("invalid hb rsp msg, size:%d", pMsg->len); SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); } + SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; + SSchTrans trans = {0}; trans.transInst = pParam->transport; trans.transHandle = pMsg->handle; From af7d630eb5690fa5fd9c983faadff2aad8b713e8 Mon Sep 17 00:00:00 2001 From: ubuntu Date: Wed, 23 Mar 2022 20:12:44 +0800 Subject: [PATCH 27/59] handle except --- source/libs/transport/src/transCli.c | 40 +++++++++++++--------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d0cb9af710..5370306683 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -169,6 +169,19 @@ static void destroyThrdObj(SCliThrdObj* pThrd); pMsg = transQueueRm(&conn->cliMsgs, i); \ } \ } while (0) +#define CONN_GET_NEXT_SENDMSG(conn) \ + do { \ + int i = 0; \ + do { \ + pCliMsg = transQueueGet(&conn->cliMsgs, i++); \ + if (pCliMsg && 0 == pCliMsg->sent) { \ + break; \ + } \ + } while (pCliMsg != NULL); \ + if (pCliMsg == NULL) { \ + goto _RETURN; \ + } \ + } while (0) #define CONN_HANDLE_THREAD_QUIT(thrd) \ do { \ @@ -203,19 +216,12 @@ static void* cliWorkThread(void* arg); bool cliMaySendCachedMsg(SCliConn* conn) { if (!transQueueEmpty(&conn->cliMsgs)) { SCliMsg* pCliMsg = NULL; - int i = 0; - do { - pCliMsg = transQueueGet(&conn->cliMsgs, i++); - if (pCliMsg && 0 == pCliMsg->sent) { - break; - } - } while (pCliMsg != NULL); - if (pCliMsg == NULL) { - return false; - } + CONN_GET_NEXT_SENDMSG(conn); cliSend(conn); } return false; +_RETURN: + return false; } void cliHandleResp(SCliConn* conn) { SCliThrdObj* pThrd = conn->hostThrd; @@ -565,17 +571,7 @@ void cliSend(SCliConn* pConn) { assert(!transQueueEmpty(&pConn->cliMsgs)); SCliMsg* pCliMsg = NULL; - int i = 0; - do { - pCliMsg = transQueueGet(&pConn->cliMsgs, i++); - if (pCliMsg && 0 == pCliMsg->sent) { - break; - } - } while (pCliMsg != NULL); - if (pCliMsg == NULL) { - return; - } - + CONN_GET_NEXT_SENDMSG(pConn); pCliMsg->sent = 1; STransConnCtx* pCtx = pCliMsg->ctx; @@ -630,6 +626,8 @@ void cliSend(SCliConn* pConn) { pConn->writeReq.data = pConn; uv_write(&pConn->writeReq, (uv_stream_t*)pConn->stream, &wb, 1, cliSendCb); + return; +_RETURN: return; } From 3cbce27a9faba9c2b58faced885a208edff37529 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 09:25:09 +0800 Subject: [PATCH 28/59] handle except --- source/libs/transport/src/transCli.c | 35 ++++++++++++++++------------ 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d0cb9af710..fd91742789 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -193,7 +193,7 @@ static void destroyThrdObj(SCliThrdObj* pThrd); } \ } while (0) #define CONN_NO_PERSIST_BY_APP(conn) ((conn)->status == ConnNormal && T_REF_VAL_GET(conn) == 1) - +#define CONN_RELEASE_BY_SERVER(conn) ((conn)->status == ConnRelease && T_REF_VAL_GET(conn) == 1) #define REQUEST_NO_RESP(msg) ((msg)->noResp == 1) #define REQUEST_PERSIS_HANDLE(msg) ((msg)->persistHandle == 1) #define REQUEST_RELEASE_HANDLE(cmsg) ((cmsg)->type == Release) @@ -238,26 +238,26 @@ void cliHandleResp(SCliConn* conn) { if (CONN_NO_PERSIST_BY_APP(conn)) { pMsg = transQueuePop(&conn->cliMsgs); - pCtx = pMsg ? pMsg->ctx: NULL; - if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { - transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); - if (transMsg.ahandle == NULL) { - transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); - } - tDebug("cli conn %p construct ahandle %p, persist: 0", conn, transMsg.ahandle); - } else { - transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; - tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); - } + pCtx = pMsg ? pMsg->ctx : NULL; + if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { + transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); + if (transMsg.ahandle == NULL) { + transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + } + tDebug("cli conn %p construct ahandle %p, persist: 0", conn, transMsg.ahandle); + } else { + transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; + tDebug("cli conn %p get ahandle %p, persist: 0", conn, transMsg.ahandle); + } } else { uint64_t ahandle = (uint64_t)pHead->ahandle; CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); if (pMsg == NULL) { transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); tDebug("cli conn %p construct ahandle %p by %d, persist: 1", conn, transMsg.ahandle, transMsg.msgType); - if (transMsg.ahandle == NULL) { - tDebug("cli conn %p construct ahandle %p due brokenlink, persist: 1", conn, transMsg.ahandle); + if (!CONN_RELEASE_BY_SERVER(conn)&& transMsg.ahandle = NULL) { transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); + tDebug("cli conn %p construct ahandle %p due brokenlink, persist: 1", conn, transMsg.ahandle); } } else { pCtx = pMsg ? pMsg->ctx : NULL; @@ -284,6 +284,11 @@ void cliHandleResp(SCliConn* conn) { // transUnrefCliHandle(conn); return; } + if (CONN_RELEASE_BY_SERVER(conn) && transMsg.ahandle == NULL) { + tTrace("except, server continue send while cli ignore it"); + // transUnrefCliHandle(conn); + return; + } if (pCtx == NULL || pCtx->pSem == NULL) { tTrace("%s cli conn %p handle resp", pTransInst->label, conn); @@ -320,7 +325,7 @@ void cliHandleExcept(SCliConn* pConn) { SCliThrdObj* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - while(!transQueueEmpty(&pConn->cliMsgs)){ + while (!transQueueEmpty(&pConn->cliMsgs)) { SCliMsg* pMsg = transQueuePop(&pConn->cliMsgs); STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; From f9f5e6009e9946875e8bcdb1d8a5f1682ab6a919 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 24 Mar 2022 09:29:03 +0800 Subject: [PATCH 29/59] feature/scheduler --- source/libs/qworker/src/qworker.c | 33 +++++++++++++++++++-------- source/libs/scheduler/src/scheduler.c | 14 +++++++++++- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index e61123ef91..876807d17c 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -519,7 +519,8 @@ int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryEnd) { while (true) { QW_TASK_DLOG("start to execTask, loopIdx:%d", i++); - + + taosSsleep(20); code = qExecTask(*taskHandle, &pRes, &useconds); if (code) { QW_TASK_ELOG("qExecTask failed, code:%x - %s", code, tstrerror(code)); @@ -730,9 +731,13 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu } if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { - QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - dropConnection = &ctx->connInfo; + QW_ERR_JRET(qwDropTask(QW_FPARAMS())); + dropConnection = NULL; + + qwBuildAndSendDropRsp(&ctx->connInfo, code); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->connInfo.handle, code, tstrerror(code)); + QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); break; } @@ -764,9 +769,13 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu } if (QW_IS_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { - QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - dropConnection = &ctx->connInfo; + QW_ERR_JRET(qwDropTask(QW_FPARAMS())); + dropConnection = NULL; + + qwBuildAndSendDropRsp(&ctx->connInfo, code); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->connInfo.handle, code, tstrerror(code)); + QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); } @@ -847,6 +856,9 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp QW_TASK_WLOG("drop received at wrong phase %s", qwPhaseStr(phase)); QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); } + + qwBuildAndSendDropRsp(&ctx->connInfo, code); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->connInfo.handle, code, tstrerror(code)); QW_ERR_JRET(qwDropTask(QW_FPARAMS())); @@ -1163,7 +1175,7 @@ _return: int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { int32_t code = 0; - bool needRsp = false; + bool rsped = false; SQWTaskCtx *ctx = NULL; bool locked = false; @@ -1184,13 +1196,16 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_ERR_JRET(qwKillTaskHandle(QW_FPARAMS(), ctx)); qwUpdateTaskStatus(QW_FPARAMS(), JOB_TASK_STATUS_DROPPING); } else if (ctx->phase > 0) { + qwBuildAndSendDropRsp(&ctx->connInfo, code); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->connInfo.handle, code, tstrerror(code)); + QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - needRsp = true; + rsped = true; } else { // task not started } - if (!needRsp) { + if (!rsped) { ctx->connInfo.handle == qwMsg->connInfo.handle; ctx->connInfo.ahandle = qwMsg->connInfo.ahandle; @@ -1215,7 +1230,7 @@ _return: qwReleaseTaskCtx(mgmt, ctx); } - if (TSDB_CODE_SUCCESS != code || needRsp) { + if (TSDB_CODE_SUCCESS != code) { qwBuildAndSendDropRsp(&qwMsg->connInfo, code); QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); } diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 5a1a8581b0..77f8ccf8cc 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -147,11 +147,23 @@ int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t m SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); } + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + return TSDB_CODE_SUCCESS; + case TDMT_VND_FETCH_RSP: + if (lastMsgType != reqMsgType && -1 != lastMsgType) { + SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); return TSDB_CODE_SUCCESS; case TDMT_VND_CREATE_TABLE_RSP: case TDMT_VND_SUBMIT_RSP: - case TDMT_VND_FETCH_RSP: break; default: SCH_TASK_ELOG("unknown rsp msg, type:%s, status:%s", TMSG_INFO(msgType), jobTaskStatusStr(taskStatus)); From 9b52c22741bc1392c9325212365d6b7fb661e3b4 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 09:50:57 +0800 Subject: [PATCH 30/59] handle except --- source/libs/transport/src/transCli.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 4269865993..aee60d9a94 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -206,7 +206,8 @@ static void destroyThrdObj(SCliThrdObj* pThrd); } \ } while (0) #define CONN_NO_PERSIST_BY_APP(conn) ((conn)->status == ConnNormal && T_REF_VAL_GET(conn) == 1) -#define CONN_RELEASE_BY_SERVER(conn) ((conn)->status == ConnRelease && T_REF_VAL_GET(conn) == 1) +#define CONN_RELEASE_BY_SERVER(conn) \ + (((conn)->status == ConnRelease || (conn)->status == ConnInPool) && T_REF_VAL_GET(conn) == 1) #define REQUEST_NO_RESP(msg) ((msg)->noResp == 1) #define REQUEST_PERSIS_HANDLE(msg) ((msg)->persistHandle == 1) #define REQUEST_RELEASE_HANDLE(cmsg) ((cmsg)->type == Release) @@ -261,7 +262,7 @@ void cliHandleResp(SCliConn* conn) { if (pMsg == NULL) { transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); tDebug("cli conn %p construct ahandle %p by %d, persist: 1", conn, transMsg.ahandle, transMsg.msgType); - if (!CONN_RELEASE_BY_SERVER(conn)&& transMsg.ahandle = NULL) { + if (!CONN_RELEASE_BY_SERVER(conn) && transMsg.ahandle == NULL) { transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); tDebug("cli conn %p construct ahandle %p due brokenlink, persist: 1", conn, transMsg.ahandle); } From 7ea13edca89f23c1b56b4208f81e4ccd086afa56 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 24 Mar 2022 11:46:32 +0800 Subject: [PATCH 31/59] feature/scheduler --- source/libs/qworker/src/qworker.c | 7 ++-- source/libs/scheduler/inc/schedulerInt.h | 9 ++++- source/libs/scheduler/src/scheduler.c | 45 ++++++++++++++---------- source/libs/transport/src/transCli.c | 13 ++++--- 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 876807d17c..07e31d549e 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -520,7 +520,6 @@ int32_t qwExecTask(QW_FPARAMS_DEF, SQWTaskCtx *ctx, bool *queryEnd) { while (true) { QW_TASK_DLOG("start to execTask, loopIdx:%d", i++); - taosSsleep(20); code = qExecTask(*taskHandle, &pRes, &useconds); if (code) { QW_TASK_ELOG("qExecTask failed, code:%x - %s", code, tstrerror(code)); @@ -1196,8 +1195,8 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_ERR_JRET(qwKillTaskHandle(QW_FPARAMS(), ctx)); qwUpdateTaskStatus(QW_FPARAMS(), JOB_TASK_STATUS_DROPPING); } else if (ctx->phase > 0) { - qwBuildAndSendDropRsp(&ctx->connInfo, code); - QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->connInfo.handle, code, tstrerror(code)); + qwBuildAndSendDropRsp(&qwMsg->connInfo, code); + QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", qwMsg->connInfo.handle, code, tstrerror(code)); QW_ERR_JRET(qwDropTask(QW_FPARAMS())); rsped = true; @@ -1206,7 +1205,7 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { } if (!rsped) { - ctx->connInfo.handle == qwMsg->connInfo.handle; + ctx->connInfo.handle = qwMsg->connInfo.handle; ctx->connInfo.ahandle = qwMsg->connInfo.ahandle; QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_DROP); diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index 9d973e4437..c1c4359607 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -103,6 +103,11 @@ typedef struct SSchFlowControl { SArray *taskList; // Element is SSchTask* } SSchFlowControl; +typedef struct SSchNodeInfo { + SQueryNodeAddr addr; + void *handle; +} SSchNodeInfo; + typedef struct SSchLevel { int32_t level; int8_t status; @@ -128,7 +133,7 @@ typedef struct SSchTask { SQueryNodeAddr succeedAddr; // task executed success node address int8_t candidateIdx; // current try condidation index SArray *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr - SArray *execAddrs; // all tried node for current task, element is SQueryNodeAddr + SArray *execNodes; // all tried node for current task, element is SSchNodeInfo SQueryProfileSummary summary; // task execution summary int32_t childReady; // child task ready number SArray *children; // the datasource tasks,from which to fetch the result, element is SQueryTask* @@ -190,6 +195,8 @@ extern SSchedulerMgmt schMgmt; #define SCH_GET_TASK_STATUS(task) atomic_load_8(&(task)->status) #define SCH_GET_TASK_STATUS_STR(task) jobTaskStatusStr(SCH_GET_TASK_STATUS(task)) +#define SCH_GET_TASK_HANDLE(_task) ((_task) ? (_task)->handle : NULL) +#define SCH_SET_TASK_HANDLE(_task, _handle) ((_task)->handle = (_handle)) #define SCH_SET_JOB_STATUS(job, st) atomic_store_8(&(job)->status, st) #define SCH_GET_JOB_STATUS(job) atomic_load_8(&(job)->status) diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 77f8ccf8cc..e612f3ae59 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -57,9 +57,9 @@ int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel * pTask->level = pLevel; SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); pTask->taskId = schGenTaskId(); - pTask->execAddrs = taosArrayInit(SCH_MAX_CANDIDATE_EP_NUM, sizeof(SQueryNodeAddr)); - if (NULL == pTask->execAddrs) { - SCH_TASK_ELOG("taosArrayInit %d exec addrs failed", SCH_MAX_CANDIDATE_EP_NUM); + pTask->execNodes = taosArrayInit(SCH_MAX_CANDIDATE_EP_NUM, sizeof(SSchNodeInfo)); + if (NULL == pTask->execNodes) { + SCH_TASK_ELOG("taosArrayInit %d execNodes failed", SCH_MAX_CANDIDATE_EP_NUM); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -101,8 +101,8 @@ void schFreeTask(SSchTask* pTask) { taosArrayDestroy(pTask->parents); } - if (pTask->execAddrs) { - taosArrayDestroy(pTask->execAddrs); + if (pTask->execNodes) { + taosArrayDestroy(pTask->execNodes); } } @@ -355,12 +355,16 @@ int32_t schRecordTaskSucceedNode(SSchJob *pJob, SSchTask *pTask) { return TSDB_CODE_SUCCESS; } -int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr) { - if (NULL == taosArrayPush(pTask->execAddrs, addr)) { - SCH_TASK_ELOG("taosArrayPush addr to execAddr list failed, errno:%d", errno); +int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, void *handle) { + SSchNodeInfo nodeInfo = {.addr = *addr, .handle = handle}; + + if (NULL == taosArrayPush(pTask->execNodes, &nodeInfo)) { + SCH_TASK_ELOG("taosArrayPush nodeInfo to execNodes list failed, errno:%d", errno); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } + SCH_TASK_DLOG("task execNode recorded, handle:%p", handle); + return TSDB_CODE_SUCCESS; } @@ -1090,7 +1094,7 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in SSchJob *pJob = schAcquireJob(pParam->refId); if (NULL == pJob) { - qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 "taosAcquireRef job failed, may be dropped, refId:%" PRIx64, + qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 "taosAcquireRef job failed, may be dropped, refId:%" PRIx64, pParam->queryId, pParam->taskId, pParam->refId); SCH_ERR_JRET(TSDB_CODE_QRY_JOB_FREED); } @@ -1110,7 +1114,7 @@ int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, in pTask = *task; SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode)); - pTask->handle = pMsg->handle; + SCH_SET_TASK_HANDLE(pTask, pMsg->handle); SCH_ERR_JRET(schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode)); _return: @@ -1849,11 +1853,11 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, SCH_SET_TASK_LASTMSG_TYPE(pTask, msgType); - SSchTrans trans = {.transInst = pJob->transport, .transHandle = pTask ? pTask->handle : NULL}; + SSchTrans trans = {.transInst = pJob->transport, .transHandle = SCH_GET_TASK_HANDLE(pTask)}; SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, &epSet, msgType, msg, msgSize, persistHandle, (rpcCtx.args ? &rpcCtx : NULL))); - if (isCandidateAddr) { - SCH_ERR_RET(schRecordTaskExecNode(pJob, pTask, addr)); + if (msgType == TDMT_VND_QUERY) { + SCH_ERR_RET(schRecordTaskExecNode(pJob, pTask, addr, trans.transHandle)); } return TSDB_CODE_SUCCESS; @@ -1935,6 +1939,8 @@ int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) { bool enough = false; int32_t code = 0; + SCH_SET_TASK_HANDLE(pTask, NULL); + if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { SCH_ERR_JRET(schCheckIncTaskFlowQuota(pJob, pTask, &enough)); @@ -1975,23 +1981,24 @@ int32_t schLaunchJob(SSchJob *pJob) { } void schDropTaskOnExecutedNode(SSchJob *pJob, SSchTask *pTask) { - if (NULL == pTask->execAddrs) { + if (NULL == pTask->execNodes) { SCH_TASK_DLOG("no exec address, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); return; } - int32_t size = (int32_t)taosArrayGetSize(pTask->execAddrs); + int32_t size = (int32_t)taosArrayGetSize(pTask->execNodes); if (size <= 0) { - SCH_TASK_DLOG("task has no exec address, no need to drop it, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_TASK_DLOG("task has no execNodes, no need to drop it, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); return; } - SQueryNodeAddr *addr = NULL; + SSchNodeInfo *nodeInfo = NULL; for (int32_t i = 0; i < size; ++i) { - addr = (SQueryNodeAddr *)taosArrayGet(pTask->execAddrs, i); + nodeInfo = (SSchNodeInfo *)taosArrayGet(pTask->execNodes, i); + SCH_SET_TASK_HANDLE(pTask, nodeInfo->handle); - schBuildAndSendMsg(pJob, pTask, addr, TDMT_VND_DROP_TASK); + schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_VND_DROP_TASK); } SCH_TASK_DLOG("task has %d exec address", size); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 4269865993..23bc43693e 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -261,7 +261,7 @@ void cliHandleResp(SCliConn* conn) { if (pMsg == NULL) { transMsg.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); tDebug("cli conn %p construct ahandle %p by %d, persist: 1", conn, transMsg.ahandle, transMsg.msgType); - if (!CONN_RELEASE_BY_SERVER(conn)&& transMsg.ahandle = NULL) { + if (!CONN_RELEASE_BY_SERVER(conn)&& transMsg.ahandle == NULL) { transMsg.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); tDebug("cli conn %p construct ahandle %p due brokenlink, persist: 1", conn, transMsg.ahandle); } @@ -330,10 +330,12 @@ void cliHandleExcept(SCliConn* pConn) { } SCliThrdObj* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - - while (!transQueueEmpty(&pConn->cliMsgs)) { + bool once = false; + do { SCliMsg* pMsg = transQueuePop(&pConn->cliMsgs); - + if (pMsg == NULL && once) { + break; + } STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; STransMsg transMsg = {0}; @@ -356,6 +358,7 @@ void cliHandleExcept(SCliConn* pConn) { if (pCtx == NULL || pCtx->pSem == NULL) { tTrace("%s cli conn %p handle except", pTransInst->label, pConn); if (transMsg.ahandle == NULL) { + once = true; continue; } (pTransInst->cfp)(pTransInst->parent, &transMsg, NULL); @@ -366,7 +369,7 @@ void cliHandleExcept(SCliConn* pConn) { } destroyCmsg(pMsg); tTrace("%s cli conn %p start to destroy", CONN_GET_INST_LABEL(pConn), pConn); - }; + } while (!transQueueEmpty(&pConn->cliMsgs)); transUnrefCliHandle(pConn); } From 622484280f550532d4a8c335ac3c3233507385ab Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 11:53:24 +0800 Subject: [PATCH 32/59] handle except --- source/libs/transport/src/transCli.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 5cce976a15..01cd899dfd 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -205,7 +205,8 @@ static void destroyThrdObj(SCliThrdObj* pThrd); transRefCliHandle(conn); \ } \ } while (0) -#define CONN_NO_PERSIST_BY_APP(conn) ((conn)->status == ConnNormal && T_REF_VAL_GET(conn) == 1) +#define CONN_NO_PERSIST_BY_APP(conn) \ + (((conn)->status == ConnNormal || (conn->status) == ConnInPool) T_REF_VAL_GET(conn) == 1) #define CONN_RELEASE_BY_SERVER(conn) \ (((conn)->status == ConnRelease || (conn)->status == ConnInPool) && T_REF_VAL_GET(conn) == 1) #define REQUEST_NO_RESP(msg) ((msg)->noResp == 1) @@ -331,12 +332,12 @@ void cliHandleExcept(SCliConn* pConn) { } SCliThrdObj* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - bool once = false; + bool once = false; do { SCliMsg* pMsg = transQueuePop(&pConn->cliMsgs); if (pMsg == NULL && once) { break; - } + } STransConnCtx* pCtx = pMsg ? pMsg->ctx : NULL; STransMsg transMsg = {0}; From 7cbc04cfbf0bf2f9c631b396acee8a59cc992495 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 12:23:48 +0800 Subject: [PATCH 33/59] handle except --- source/libs/transport/src/transCli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 01cd899dfd..75d1893d59 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -206,7 +206,7 @@ static void destroyThrdObj(SCliThrdObj* pThrd); } \ } while (0) #define CONN_NO_PERSIST_BY_APP(conn) \ - (((conn)->status == ConnNormal || (conn->status) == ConnInPool) T_REF_VAL_GET(conn) == 1) + (((conn)->status == ConnNormal || (conn)->status == ConnInPool) && T_REF_VAL_GET(conn) == 1) #define CONN_RELEASE_BY_SERVER(conn) \ (((conn)->status == ConnRelease || (conn)->status == ConnInPool) && T_REF_VAL_GET(conn) == 1) #define REQUEST_NO_RESP(msg) ((msg)->noResp == 1) From 99b24c71dda47bd7860078342d29ee6768e88db7 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 12:34:48 +0800 Subject: [PATCH 34/59] handle except --- source/libs/transport/src/transCli.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 75d1893d59..3b8ea5858f 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -326,6 +326,7 @@ void cliHandleResp(SCliConn* conn) { void cliHandleExcept(SCliConn* pConn) { if (transQueueEmpty(&pConn->cliMsgs)) { if (pConn->broken == true && CONN_NO_PERSIST_BY_APP(pConn)) { + tTrace("%s cli conn %p handle except, persist:0", CONN_GET_INST_LABEL(pConn), pConn); transUnrefCliHandle(pConn); return; } @@ -348,10 +349,12 @@ void cliHandleExcept(SCliConn* pConn) { if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(pConn)) { transMsg.ahandle = transCtxDumpVal(&pConn->ctx, transMsg.msgType); - tDebug("cli conn %p construct msgType %s ahandle %p", pConn, TMSG_INFO(transMsg.msgType), transMsg.ahandle); + tDebug("%s cli conn %p construct ahandle %p by %s", CONN_GET_INST_LABEL(pConn), pConn, transMsg.ahandle, + TMSG_INFO(transMsg.msgType)); if (transMsg.ahandle == NULL) { transMsg.ahandle = transCtxDumpBrokenlinkVal(&pConn->ctx, (int32_t*)&(transMsg.msgType)); - tDebug("cli conn %p construct brokenlink ahandle %p", pConn, transMsg.ahandle); + tDebug("%s cli conn %p construct ahandle %p due to brokenlink", CONN_GET_INST_LABEL(pConn), pConn, + transMsg.ahandle); } } else { transMsg.ahandle = pCtx ? pCtx->ahandle : NULL; From 97d587e379b54deaaea097e7f84530b9e6a69df6 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 24 Mar 2022 14:37:53 +0800 Subject: [PATCH 35/59] feature/scheduler --- source/dnode/mgmt/vnode/src/vmWorker.c | 4 +-- source/libs/qworker/inc/qworkerInt.h | 8 +++-- source/libs/qworker/src/qworker.c | 42 +++++++++++++++++++++++--- source/libs/qworker/src/qworkerMsg.c | 14 ++++++--- 4 files changed, 54 insertions(+), 14 deletions(-) diff --git a/source/dnode/mgmt/vnode/src/vmWorker.c b/source/dnode/mgmt/vnode/src/vmWorker.c index 6c7d513c58..c493e44bda 100644 --- a/source/dnode/mgmt/vnode/src/vmWorker.c +++ b/source/dnode/mgmt/vnode/src/vmWorker.c @@ -165,8 +165,8 @@ static int32_t vmPutNodeMsgToQueue(SVnodesMgmt *pMgmt, SNodeMsg *pMsg, EQueueTyp int32_t code = -1; SMsgHead *pHead = pRpc->pCont; - pHead->contLen = htonl(pHead->contLen); - pHead->vgId = htonl(pHead->vgId); + pHead->contLen = ntohl(pHead->contLen); + pHead->vgId = ntohl(pHead->vgId); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, pHead->vgId); if (pVnode == NULL) { diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h index 41891c7cac..f6bc204227 100644 --- a/source/libs/qworker/inc/qworkerInt.h +++ b/source/libs/qworker/inc/qworkerInt.h @@ -69,6 +69,7 @@ enum { typedef struct SQWDebug { bool lockEnable; bool statusEnable; + bool dumpEnable; } SQWDebug; typedef struct SQWConnInfo { @@ -123,9 +124,9 @@ typedef struct SQWTaskCtx { typedef struct SQWSchStatus { int32_t lastAccessTs; // timestamp in second - SRWLatch connLock; + SRWLatch hbConnLock; SQWConnInfo hbConnInfo; - SQueryNodeEpId epId; + SQueryNodeEpId hbEpId; SRWLatch tasksLock; SHashObj *tasksHash; // key:queryId+taskId, value: SQWTaskStatus } SQWSchStatus; @@ -175,6 +176,9 @@ typedef struct SQWorkerMgmt { #define QW_ELOG(param, ...) qError("QW:%p " param, mgmt, __VA_ARGS__) #define QW_DLOG(param, ...) qDebug("QW:%p " param, mgmt, __VA_ARGS__) +#define QW_DUMP(param, ...) do { if (gQWDebug.dumpEnable) { qDebug("QW:%p " param, mgmt, __VA_ARGS__); } } while (0) + + #define QW_SCH_ELOG(param, ...) qError("QW:%p SID:%"PRIx64" " param, mgmt, sId, __VA_ARGS__) #define QW_SCH_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64" " param, mgmt, sId, __VA_ARGS__) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 07e31d549e..53235cae68 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -9,7 +9,7 @@ #include "tname.h" #include "dataSinkMgt.h" -SQWDebug gQWDebug = {.statusEnable = true}; +SQWDebug gQWDebug = {.statusEnable = true, .dumpEnable = true}; int32_t qwDbgValidateStatus(QW_FPARAMS_DEF, int8_t oriStatus, int8_t newStatus, bool *ignore) { if (!gQWDebug.statusEnable) { @@ -103,6 +103,36 @@ _return: QW_RET(code); } +void qwDbgDumpSchInfo(SQWSchStatus *sch, int32_t i) { + +} + +void qwDbgDumpMgmtInfo(SQWorkerMgmt *mgmt) { + if (!gQWDebug.dumpEnable) { + return; + } + + QW_LOCK(QW_READ, &mgmt->schLock); + + QW_DUMP("total remain schduler num:%d", taosHashGetSize(mgmt->schHash)); + + void *key = NULL; + size_t keyLen = 0; + int32_t i = 0; + SQWSchStatus *sch = NULL; + + void *pIter = taosHashIterate(mgmt->schHash, NULL); + while (pIter) { + sch = (SQWSchStatus *)pIter; + qwDbgDumpSchInfo(sch, i); + ++i; + pIter = taosHashIterate(mgmt->schHash, pIter); + } + + QW_UNLOCK(QW_READ, &mgmt->schLock); + + QW_DUMP("total remain ctx num:%d", taosHashGetSize(mgmt->ctxHash)); +} char *qwPhaseStr(int32_t phase) { switch (phase) { @@ -581,7 +611,7 @@ int32_t qwGenerateSchHbRsp(SQWorkerMgmt *mgmt, SQWSchStatus *sch, SQWHbInfo *hbI int32_t taskNum = 0; hbInfo->connInfo = sch->hbConnInfo; - hbInfo->rsp.epId = sch->epId; + hbInfo->rsp.epId = sch->hbEpId; QW_LOCK(QW_READ, &sch->tasksLock); @@ -1248,16 +1278,16 @@ int32_t qwProcessHb(SQWorkerMgmt *mgmt, SQWMsg *qwMsg, SSchedulerHbReq *req) { QW_ERR_JRET(qwAcquireAddScheduler(mgmt, req->sId, QW_READ, &sch)); - QW_LOCK(QW_WRITE, &sch->connLock); + QW_LOCK(QW_WRITE, &sch->hbConnLock); if (sch->hbConnInfo.handle) { rpcReleaseHandle(sch->hbConnInfo.handle, TAOS_CONN_SERVER); } memcpy(&sch->hbConnInfo, &qwMsg->connInfo, sizeof(qwMsg->connInfo)); - memcpy(&sch->epId, &req->epId, sizeof(req->epId)); + memcpy(&sch->hbEpId, &req->epId, sizeof(req->epId)); - QW_UNLOCK(QW_WRITE, &sch->connLock); + QW_UNLOCK(QW_WRITE, &sch->hbConnLock); QW_DLOG("hb connection updated, sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, handle:%p, ahandle:%p", req->sId, req->epId.nodeId, req->epId.ep.fqdn, req->epId.ep.port, qwMsg->connInfo.handle, qwMsg->connInfo.ahandle); @@ -1280,6 +1310,8 @@ void qwProcessHbTimerEvent(void *param, void *tmrId) { SQWHbInfo *rspList = NULL; int32_t code = 0; + qwDbgDumpMgmtInfo(mgmt); + QW_LOCK(QW_READ, &mgmt->schLock); int32_t schNum = taosHashGetSize(mgmt->schHash); diff --git a/source/libs/qworker/src/qworkerMsg.c b/source/libs/qworker/src/qworkerMsg.c index e6a1260de5..97ef53aaea 100644 --- a/source/libs/qworker/src/qworkerMsg.c +++ b/source/libs/qworker/src/qworkerMsg.c @@ -272,11 +272,11 @@ int32_t qwRegisterBrokenLinkArg(QW_FPARAMS_DEF, SQWConnInfo *pConn) { QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - req->header.vgId = mgmt->nodeId; - req->sId = sId; - req->queryId = qId; - req->taskId = tId; - req->refId = rId; + req->header.vgId = htonl(mgmt->nodeId); + req->sId = htobe64(sId); + req->queryId = htobe64(qId); + req->taskId = htobe64(tId); + req->refId = htobe64(rId); SRpcMsg pMsg = { .handle = pConn->handle, @@ -532,6 +532,10 @@ int32_t qWorkerProcessDropMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) { qwMsg.connInfo.handle = pMsg->handle; qwMsg.connInfo.ahandle = pMsg->ahandle; + if (TSDB_CODE_RPC_NETWORK_UNAVAIL == pMsg->code) { + QW_SCH_TASK_DLOG("receive drop task due to network broken, error:%s", tstrerror(pMsg->code)); + } + QW_SCH_TASK_DLOG("processDrop start, node:%p, handle:%p", node, pMsg->handle); QW_ERR_RET(qwProcessDrop(QW_FPARAMS(), &qwMsg)); From c643c48287cea81f12ceed068323a5b493438220 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 15:53:01 +0800 Subject: [PATCH 36/59] handle except --- source/libs/transport/src/transComm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 2d6485b346..8b668092dd 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -280,6 +280,9 @@ void* transCtxDumpVal(STransCtx* ctx, int32_t key) { } void* transCtxDumpBrokenlinkVal(STransCtx* ctx, int32_t* msgType) { void *ret = NULL; + if (ctx->brokenVal.clone == NULL) { + return ret; + } (*ctx->brokenVal.clone)(ctx->brokenVal.val, &ret); *msgType = ctx->brokenVal.msgType; From e0524091fbd3edd29a5ea1a3424c9f44dd597b28 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 16:31:26 +0800 Subject: [PATCH 37/59] handle except --- source/libs/transport/src/transComm.c | 23 +++++++++++++++-------- source/libs/transport/src/transSrv.c | 3 +-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 8b668092dd..fd5bd18344 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -274,14 +274,14 @@ void* transCtxDumpVal(STransCtx* ctx, int32_t key) { if (cVal == NULL) { return NULL; } - void *ret = NULL; + void* ret = NULL; (*cVal->clone)(cVal->val, &ret); return ret; } void* transCtxDumpBrokenlinkVal(STransCtx* ctx, int32_t* msgType) { - void *ret = NULL; + void* ret = NULL; if (ctx->brokenVal.clone == NULL) { - return ret; + return ret; } (*ctx->brokenVal.clone)(ctx->brokenVal.val, &ret); @@ -295,6 +295,9 @@ void transQueueInit(STransQueue* queue, void (*free)(void* arg)) { queue->free = free; } bool transQueuePush(STransQueue* queue, void* arg) { + if (queue->q == NULL) { + return true; + } taosArrayPush(queue->q, &arg); if (taosArrayGetSize(queue->q) > 1) { return false; @@ -302,7 +305,7 @@ bool transQueuePush(STransQueue* queue, void* arg) { return true; } void* transQueuePop(STransQueue* queue) { - if (taosArrayGetSize(queue->q) == 0) { + if (queue->q == NULL || taosArrayGetSize(queue->q) == 0) { return NULL; } void* ptr = taosArrayGetP(queue->q, 0); @@ -310,11 +313,13 @@ void* transQueuePop(STransQueue* queue) { return ptr; } int32_t transQueueSize(STransQueue* queue) { - // Get size + if (queue->q == NULL) { + return 0; + } return taosArrayGetSize(queue->q); } void* transQueueGet(STransQueue* queue, int i) { - if (taosArrayGetSize(queue->q) == 0) { + if (queue->q == NULL || taosArrayGetSize(queue->q) == 0) { return NULL; } if (i >= taosArrayGetSize(queue->q)) { @@ -326,7 +331,7 @@ void* transQueueGet(STransQueue* queue, int i) { } void* transQueueRm(STransQueue* queue, int i) { - if (taosArrayGetSize(queue->q) == 0) { + if (queue->q == NULL || taosArrayGetSize(queue->q) == 0) { return NULL; } if (i >= taosArrayGetSize(queue->q)) { @@ -338,7 +343,9 @@ void* transQueueRm(STransQueue* queue, int i) { } bool transQueueEmpty(STransQueue* queue) { - // + if (queue->q == NULL) { + return true; + } return taosArrayGetSize(queue->q) == 0; } void transQueueClear(STransQueue* queue) { diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 15dcc29232..b4052aea46 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -623,8 +623,6 @@ static void destroyConn(SSrvConn* conn, bool clear) { return; } transDestroyBuffer(&conn->readBuf); - - transQueueDestroy(&conn->srvMsgs); if (clear) { tTrace("server conn %p to be destroyed", conn); uv_shutdown_t* req = malloc(sizeof(uv_shutdown_t)); @@ -640,6 +638,7 @@ static void uvDestroyConn(uv_handle_t* handle) { tDebug("server conn %p destroy", conn); uv_timer_stop(&conn->pTimer); + transQueueDestroy(&conn->srvMsgs); QUEUE_REMOVE(&conn->queue); free(conn->pTcp); // free(conn); From c563dccbb80f11a8b1e81c18027e9bb6ff5e8431 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 24 Mar 2022 16:38:23 +0800 Subject: [PATCH 38/59] handle quit except --- source/libs/transport/src/transSrv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index b4052aea46..787c538f2a 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -838,6 +838,9 @@ void transSendResponse(const STransMsg* pMsg) { } SSrvConn* pConn = pMsg->handle; SWorkThrdObj* pThrd = pConn->hostThrd; + if (pThrd->quit) { + return; + } SSrvMsg* srvMsg = calloc(1, sizeof(SSrvMsg)); srvMsg->pConn = pConn; From cec51ef719a41aca125f8a78f5f7b5e9f8bc0852 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 24 Mar 2022 16:38:44 +0800 Subject: [PATCH 39/59] feature/scheduler --- include/libs/catalog/catalog.h | 2 +- source/libs/catalog/src/catalog.c | 23 +++++++++++----------- source/libs/parser/src/parTranslater.c | 27 ++++++++++++++++++++------ 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/include/libs/catalog/catalog.h b/include/libs/catalog/catalog.h index dd5f7fc104..9f0d4b11c2 100644 --- a/include/libs/catalog/catalog.h +++ b/include/libs/catalog/catalog.h @@ -112,7 +112,7 @@ int32_t catalogUpdateDBVgInfo(SCatalog* pCatalog, const char* dbName, uint64_t d int32_t catalogRemoveDB(SCatalog* pCatalog, const char* dbName, uint64_t dbId); -int32_t catalogRemoveTableMeta(SCatalog* pCtg, SName* pTableName); +int32_t catalogRemoveTableMeta(SCatalog* pCtg, const SName* pTableName); int32_t catalogRemoveStbMeta(SCatalog* pCtg, const char* dbFName, uint64_t dbId, const char* stbName, uint64_t suid); diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index 8f67482650..a3a52c8b6d 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -406,9 +406,9 @@ _return: } -int32_t ctgPushRmStbMsgInQueue(SCatalog* pCtg, const char *dbFName, int64_t dbId, const char *stbName, uint64_t suid) { +int32_t ctgPushRmStbMsgInQueue(SCatalog* pCtg, const char *dbFName, int64_t dbId, const char *stbName, uint64_t suid, bool syncReq) { int32_t code = 0; - SCtgMetaAction action= {.act = CTG_ACT_REMOVE_STB}; + SCtgMetaAction action= {.act = CTG_ACT_REMOVE_STB, .syncReq = syncReq}; SCtgRemoveStbMsg *msg = malloc(sizeof(SCtgRemoveStbMsg)); if (NULL == msg) { ctgError("malloc %d failed", (int32_t)sizeof(SCtgRemoveStbMsg)); @@ -435,9 +435,9 @@ _return: -int32_t ctgPushRmTblMsgInQueue(SCatalog* pCtg, const char *dbFName, int64_t dbId, const char *tbName) { +int32_t ctgPushRmTblMsgInQueue(SCatalog* pCtg, const char *dbFName, int64_t dbId, const char *tbName, bool syncReq) { int32_t code = 0; - SCtgMetaAction action= {.act = CTG_ACT_REMOVE_TBL}; + SCtgMetaAction action= {.act = CTG_ACT_REMOVE_TBL, .syncReq = syncReq}; SCtgRemoveTblMsg *msg = malloc(sizeof(SCtgRemoveTblMsg)); if (NULL == msg) { ctgError("malloc %d failed", (int32_t)sizeof(SCtgRemoveTblMsg)); @@ -496,7 +496,7 @@ _return: int32_t ctgPushUpdateTblMsgInQueue(SCatalog* pCtg, STableMetaOutput *output, bool syncReq) { int32_t code = 0; - SCtgMetaAction action= {.act = CTG_ACT_UPDATE_TBL}; + SCtgMetaAction action= {.act = CTG_ACT_UPDATE_TBL, .syncReq = syncReq}; SCtgUpdateTblMsg *msg = malloc(sizeof(SCtgUpdateTblMsg)); if (NULL == msg) { ctgError("malloc %d failed", (int32_t)sizeof(SCtgUpdateTblMsg)); @@ -1843,6 +1843,7 @@ int32_t ctgRefreshTblMeta(SCatalog* pCtg, void *pTrans, const SEpSet* pMgmtEps, if (CTG_IS_META_NULL(output->metaType)) { ctgError("no tbmeta got, tbNmae:%s", tNameGetTableName(pTableName)); + catalogRemoveTableMeta(pCtg, pTableName); CTG_ERR_JRET(CTG_ERR_CODE_TABLE_NOT_EXIST); } @@ -1951,9 +1952,9 @@ _return: } if (TSDB_SUPER_TABLE == tbType) { - ctgPushRmStbMsgInQueue(pCtg, dbFName, dbId, pTableName->tname, suid); + ctgPushRmStbMsgInQueue(pCtg, dbFName, dbId, pTableName->tname, suid, false); } else { - ctgPushRmTblMsgInQueue(pCtg, dbFName, dbId, pTableName->tname); + ctgPushRmTblMsgInQueue(pCtg, dbFName, dbId, pTableName->tname, false); } } @@ -2534,7 +2535,7 @@ int32_t catalogUpdateVgEpSet(SCatalog* pCtg, const char* dbFName, int32_t vgId, } -int32_t catalogRemoveTableMeta(SCatalog* pCtg, SName* pTableName) { +int32_t catalogRemoveTableMeta(SCatalog* pCtg, const SName* pTableName) { CTG_API_ENTER(); int32_t code = 0; @@ -2561,9 +2562,9 @@ int32_t catalogRemoveTableMeta(SCatalog* pCtg, SName* pTableName) { tNameGetFullDbName(pTableName, dbFName); if (TSDB_SUPER_TABLE == tblMeta->tableType) { - CTG_ERR_JRET(ctgPushRmStbMsgInQueue(pCtg, dbFName, dbId, pTableName->tname, tblMeta->suid)); + CTG_ERR_JRET(ctgPushRmStbMsgInQueue(pCtg, dbFName, dbId, pTableName->tname, tblMeta->suid, true)); } else { - CTG_ERR_JRET(ctgPushRmTblMsgInQueue(pCtg, dbFName, dbId, pTableName->tname)); + CTG_ERR_JRET(ctgPushRmTblMsgInQueue(pCtg, dbFName, dbId, pTableName->tname, true)); } @@ -2588,7 +2589,7 @@ int32_t catalogRemoveStbMeta(SCatalog* pCtg, const char* dbFName, uint64_t dbId, CTG_API_LEAVE(TSDB_CODE_SUCCESS); } - CTG_ERR_JRET(ctgPushRmStbMsgInQueue(pCtg, dbFName, dbId, stbName, suid)); + CTG_ERR_JRET(ctgPushRmStbMsgInQueue(pCtg, dbFName, dbId, stbName, suid, true)); CTG_API_LEAVE(TSDB_CODE_SUCCESS); diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 100fc3f107..19f7f47558 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -84,12 +84,18 @@ static SName* toName(int32_t acctId, const char* pDbName, const char* pTableName return pName; } -static int32_t collectUseDatabase(const char* pFullDbName, SHashObj* pDbs) { +static int32_t collectUseDatabaseImpl(const char* pFullDbName, SHashObj* pDbs) { SFullDatabaseName name = {0}; strcpy(name.fullDbName, pFullDbName); return taosHashPut(pDbs, pFullDbName, strlen(pFullDbName), &name, sizeof(SFullDatabaseName)); } +static int32_t collectUseDatabase(const SName* pName, SHashObj* pDbs) { + char dbFName[TSDB_DB_FNAME_LEN] = {0}; + tNameGetFullDbName(pName, dbFName); + return collectUseDatabaseImpl(dbFName, pDbs); +} + static int32_t collectUseTable(const SName* pName, SHashObj* pDbs) { char fullName[TSDB_TABLE_FNAME_LEN]; tNameExtractFullName(pName, fullName); @@ -98,7 +104,10 @@ static int32_t collectUseTable(const SName* pName, SHashObj* pDbs) { static int32_t getTableMetaImpl(STranslateContext* pCxt, const SName* pName, STableMeta** pMeta) { SParseContext* pParCxt = pCxt->pParseCxt; - int32_t code = collectUseTable(pName, pCxt->pTables); + int32_t code = collectUseDatabase(pName, pCxt->pDbs); + if (TSDB_CODE_SUCCESS == code) { + code = collectUseTable(pName, pCxt->pTables); + } if (TSDB_CODE_SUCCESS == code) { code = catalogGetTableMeta(pParCxt->pCatalog, pParCxt->pTransporter, &pParCxt->mgmtEpSet, pName, pMeta); } @@ -117,7 +126,10 @@ static int32_t getTableMeta(STranslateContext* pCxt, const char* pDbName, const static int32_t getTableDistVgInfo(STranslateContext* pCxt, const SName* pName, SArray** pVgInfo) { SParseContext* pParCxt = pCxt->pParseCxt; - int32_t code = collectUseTable(pName, pCxt->pTables); + int32_t code = collectUseDatabase(pName, pCxt->pDbs); + if (TSDB_CODE_SUCCESS == code) { + code = collectUseTable(pName, pCxt->pTables); + } if (TSDB_CODE_SUCCESS == code) { code = catalogGetTableDistVgInfo(pParCxt->pCatalog, pParCxt->pTransporter, &pParCxt->mgmtEpSet, pName, pVgInfo); } @@ -131,7 +143,7 @@ static int32_t getDBVgInfoImpl(STranslateContext* pCxt, const SName* pName, SArr SParseContext* pParCxt = pCxt->pParseCxt; char fullDbName[TSDB_DB_FNAME_LEN]; tNameGetFullDbName(pName, fullDbName); - int32_t code = collectUseDatabase(fullDbName, pCxt->pDbs); + int32_t code = collectUseDatabaseImpl(fullDbName, pCxt->pDbs); if (TSDB_CODE_SUCCESS == code) { code = catalogGetDBVgInfo(pParCxt->pCatalog, pParCxt->pTransporter, &pParCxt->mgmtEpSet, fullDbName, pVgInfo); } @@ -151,7 +163,10 @@ static int32_t getDBVgInfo(STranslateContext* pCxt, const char* pDbName, SArray* static int32_t getTableHashVgroupImpl(STranslateContext* pCxt, const SName* pName, SVgroupInfo* pInfo) { SParseContext* pParCxt = pCxt->pParseCxt; - int32_t code = collectUseTable(pName, pCxt->pTables); + int32_t code = collectUseDatabase(pName, pCxt->pDbs); + if (TSDB_CODE_SUCCESS == code) { + code = collectUseTable(pName, pCxt->pTables); + } if (TSDB_CODE_SUCCESS == code) { code = catalogGetTableHashVgroup(pParCxt->pCatalog, pParCxt->pTransporter, &pParCxt->mgmtEpSet, pName, pInfo); } @@ -170,7 +185,7 @@ static int32_t getTableHashVgroup(STranslateContext* pCxt, const char* pDbName, static int32_t getDBVgVersion(STranslateContext* pCxt, const char* pDbFName, int32_t* pVersion, int64_t* pDbId, int32_t* pTableNum) { SParseContext* pParCxt = pCxt->pParseCxt; - int32_t code = collectUseDatabase(pDbFName, pCxt->pDbs); + int32_t code = collectUseDatabaseImpl(pDbFName, pCxt->pDbs); if (TSDB_CODE_SUCCESS == code) { code = catalogGetDBVgVersion(pParCxt->pCatalog, pDbFName, pVersion, pDbId, pTableNum); } From 354fc37f64f39da8c2442d15838389e9cade00b4 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 24 Mar 2022 19:02:42 +0800 Subject: [PATCH 40/59] feature/scheduler --- source/libs/nodes/src/nodesCodeFuncs.c | 2 +- source/libs/parser/src/parTranslater.c | 2 +- source/libs/scheduler/inc/schedulerInt.h | 2 ++ source/libs/scheduler/src/scheduler.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 9859d4a9b9..b648e18809 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1480,7 +1480,7 @@ static int32_t jsonToDatum(const SJson* pJson, void* pObj) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: { - pNode->datum.p = calloc(1, pNode->node.resType.bytes); + pNode->datum.p = calloc(1, pNode->node.resType.bytes + VARSTR_HEADER_SIZE + 1); if (NULL == pNode->datum.p) { code = TSDB_CODE_OUT_OF_MEMORY; break; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 8959934af8..429895fc9d 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -398,7 +398,7 @@ static EDealRes translateValue(STranslateContext* pCxt, SValueNode* pVal) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: { - pVal->datum.p = calloc(1, pVal->node.resType.bytes + VARSTR_HEADER_SIZE); + pVal->datum.p = calloc(1, pVal->node.resType.bytes + VARSTR_HEADER_SIZE + 1); if (NULL == pVal->datum.p) { return generateDealNodeErrMsg(pCxt, TSDB_CODE_OUT_OF_MEMORY); } diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index c1c4359607..fd46d10161 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -224,6 +224,8 @@ extern SSchedulerMgmt schMgmt; qError("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) #define SCH_TASK_DLOG(param, ...) \ qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) +#define SCH_TASK_DLOGL(param, ...) \ + qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) #define SCH_TASK_WLOG(param, ...) \ qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index e612f3ae59..46b27b0ac4 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1919,7 +1919,7 @@ int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask) { pTask->msgLen); SCH_ERR_RET(code); } else { - SCH_TASK_DLOG("physical plan len:%d, %s", pTask->msgLen, pTask->msg); + SCH_TASK_DLOGL("physical plan len:%d, %s", pTask->msgLen, pTask->msg); } } From 05e2108fba9282005a8c6fa494f1ba28110c52b0 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 08:47:58 +0800 Subject: [PATCH 41/59] feature/scheduler --- source/libs/qworker/inc/qworkerInt.h | 1 + source/libs/qworker/src/qworker.c | 2 ++ source/libs/scheduler/inc/schedulerInt.h | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h index f6bc204227..573eaed2e6 100644 --- a/source/libs/qworker/inc/qworkerInt.h +++ b/source/libs/qworker/inc/qworkerInt.h @@ -185,6 +185,7 @@ typedef struct SQWorkerMgmt { #define QW_TASK_ELOG(param, ...) qError("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) #define QW_TASK_WLOG(param, ...) qWarn("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) #define QW_TASK_DLOG(param, ...) qDebug("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) +#define QW_TASK_DLOGL(param, ...) qDebugL("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) #define QW_TASK_ELOG_E(param) qError("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId) #define QW_TASK_WLOG_E(param) qWarn("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 44192d7463..e42582e019 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -950,6 +950,8 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType) { atomic_store_ptr(&ctx->connInfo.handle, qwMsg->connInfo.handle); atomic_store_ptr(&ctx->connInfo.ahandle, qwMsg->connInfo.ahandle); + QW_TASK_DLOGL("subplan json string, len:%d, %s", qwMsg->msgLen, qwMsg->msg); + code = qStringToSubplan(qwMsg->msg, &plan); if (TSDB_CODE_SUCCESS != code) { QW_TASK_ELOG("task string to subplan failed, code:%x - %s", code, tstrerror(code)); diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index fd46d10161..518da6e2b8 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -225,7 +225,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_TASK_DLOG(param, ...) \ qDebug("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) #define SCH_TASK_DLOGL(param, ...) \ - qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) + qDebugL("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) #define SCH_TASK_WLOG(param, ...) \ qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 " " param, pJob->queryId, SCH_TASK_ID(pTask), __VA_ARGS__) From ac225d58be4c41da7924ffabeb98d6ddce0a25e3 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 09:53:25 +0800 Subject: [PATCH 42/59] feature/scheduler --- source/libs/parser/src/parTranslater.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 429895fc9d..61c7bfa35c 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -403,7 +403,7 @@ static EDealRes translateValue(STranslateContext* pCxt, SValueNode* pVal) { return generateDealNodeErrMsg(pCxt, TSDB_CODE_OUT_OF_MEMORY); } varDataSetLen(pVal->datum.p, pVal->node.resType.bytes); - strcpy(varDataVal(pVal->datum.p), pVal->literal); + strncpy(varDataVal(pVal->datum.p), pVal->literal, pVal->node.resType.bytes); break; } case TSDB_DATA_TYPE_TIMESTAMP: { From f323ac14433dd4cdec483fe14e1e8120d8ea3039 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 10:07:30 +0800 Subject: [PATCH 43/59] feature/scheduler --- source/libs/nodes/src/nodesCodeFuncs.c | 3 ++- source/libs/parser/src/parTranslater.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index b648e18809..6e60c6cadc 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1480,13 +1480,14 @@ static int32_t jsonToDatum(const SJson* pJson, void* pObj) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: { - pNode->datum.p = calloc(1, pNode->node.resType.bytes + VARSTR_HEADER_SIZE + 1); + pNode->datum.p = calloc(1, pNode->node.resType.bytes + VARSTR_HEADER_SIZE + 1 + 100); if (NULL == pNode->datum.p) { code = TSDB_CODE_OUT_OF_MEMORY; break; } varDataSetLen(pNode->datum.p, pNode->node.resType.bytes); code = tjsonGetStringValue(pJson, jkValueDatum, varDataVal(pNode->datum.p)); + nodesDebug("!!!!!!!!!len:%d,string:%s", pNode->node.resType.bytes, varDataVal(pNode->datum.p)); break; } case TSDB_DATA_TYPE_JSON: diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 61c7bfa35c..d8ea4fbfc6 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -404,6 +404,7 @@ static EDealRes translateValue(STranslateContext* pCxt, SValueNode* pVal) { } varDataSetLen(pVal->datum.p, pVal->node.resType.bytes); strncpy(varDataVal(pVal->datum.p), pVal->literal, pVal->node.resType.bytes); + parserDebug("!!!!!!!!!!!!value:%s,len:%d", pVal->literal, pVal->node.resType.bytes); break; } case TSDB_DATA_TYPE_TIMESTAMP: { From 397f1f3b032fa7ee118e40563eb98a8be42d4c41 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 10:09:04 +0800 Subject: [PATCH 44/59] feature/scheduler --- source/libs/qworker/inc/qworkerInt.h | 2 ++ source/libs/qworker/src/qworker.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h index f6bc204227..57355cd988 100644 --- a/source/libs/qworker/inc/qworkerInt.h +++ b/source/libs/qworker/inc/qworkerInt.h @@ -185,6 +185,8 @@ typedef struct SQWorkerMgmt { #define QW_TASK_ELOG(param, ...) qError("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) #define QW_TASK_WLOG(param, ...) qWarn("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) #define QW_TASK_DLOG(param, ...) qDebug("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) +#define QW_TASK_DLOGL(param, ...) qDebugL("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__) + #define QW_TASK_ELOG_E(param) qError("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId) #define QW_TASK_WLOG_E(param) qWarn("QW:%p QID:0x%"PRIx64",TID:0x%"PRIx64" " param, mgmt, qId, tId) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 44192d7463..7b49b96928 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -950,6 +950,8 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType) { atomic_store_ptr(&ctx->connInfo.handle, qwMsg->connInfo.handle); atomic_store_ptr(&ctx->connInfo.ahandle, qwMsg->connInfo.ahandle); + QW_TASK_DLOGL("subplan json string, len:%d, %s", qwMsg->msgLen, qwMsg->msg); + code = qStringToSubplan(qwMsg->msg, &plan); if (TSDB_CODE_SUCCESS != code) { QW_TASK_ELOG("task string to subplan failed, code:%x - %s", code, tstrerror(code)); From fad5a80bab03ec67008e4cf3874c210493a2ce11 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 13:27:14 +0800 Subject: [PATCH 45/59] feature/scheduler --- include/libs/qworker/qworker.h | 1 + source/dnode/mgmt/mnode/src/mmMsg.c | 2 +- source/dnode/mnode/impl/CMakeLists.txt | 2 +- source/dnode/mnode/impl/inc/mndInt.h | 5 +- source/dnode/mnode/impl/inc/mndQuery.h | 33 ++++++++++ source/dnode/mnode/impl/src/mndQuery.c | 70 ++++++++++++++++++++++ source/dnode/mnode/impl/src/mnode.c | 2 + source/libs/nodes/src/nodesCodeFuncs.c | 3 +- source/libs/parser/src/parTranslater.c | 17 +++--- source/libs/planner/src/planPhysiCreater.c | 13 ++-- 10 files changed, 131 insertions(+), 17 deletions(-) create mode 100644 source/dnode/mnode/impl/inc/mndQuery.h create mode 100644 source/dnode/mnode/impl/src/mndQuery.c diff --git a/include/libs/qworker/qworker.h b/include/libs/qworker/qworker.h index 944ac97ddb..0846841cef 100644 --- a/include/libs/qworker/qworker.h +++ b/include/libs/qworker/qworker.h @@ -28,6 +28,7 @@ enum { NODE_TYPE_VNODE = 1, NODE_TYPE_QNODE, NODE_TYPE_SNODE, + NODE_TYPE_MNODE, }; diff --git a/source/dnode/mgmt/mnode/src/mmMsg.c b/source/dnode/mgmt/mnode/src/mmMsg.c index 1cae2220ad..f38bf3a65f 100644 --- a/source/dnode/mgmt/mnode/src/mmMsg.c +++ b/source/dnode/mgmt/mnode/src/mmMsg.c @@ -159,6 +159,6 @@ void mmInitMsgHandles(SMgmtWrapper *pWrapper) { dndSetMsgHandle(pWrapper, TDMT_VND_QUERY, (NodeMsgFp)mmProcessReadMsg, MND_VGID); dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_CONTINUE, (NodeMsgFp)mmProcessReadMsg, MND_VGID); dndSetMsgHandle(pWrapper, TDMT_VND_FETCH, (NodeMsgFp)mmProcessReadMsg, MND_VGID); - dndSetMsgHandle(pWrapper, TDMT_VND_FETCH_RSP, (NodeMsgFp)mmProcessReadMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_DROP_TASK, (NodeMsgFp)mmProcessReadMsg, MND_VGID); } diff --git a/source/dnode/mnode/impl/CMakeLists.txt b/source/dnode/mnode/impl/CMakeLists.txt index 514bba19f4..60bf366504 100644 --- a/source/dnode/mnode/impl/CMakeLists.txt +++ b/source/dnode/mnode/impl/CMakeLists.txt @@ -6,7 +6,7 @@ target_include_directories( PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) target_link_libraries( - mnode scheduler sdb wal transport cjson sync monitor + mnode scheduler sdb wal transport cjson sync monitor executor qworker ) if(${BUILD_TEST}) diff --git a/source/dnode/mnode/impl/inc/mndInt.h b/source/dnode/mnode/impl/inc/mndInt.h index 20e85973be..f89e9d8fe0 100644 --- a/source/dnode/mnode/impl/inc/mndInt.h +++ b/source/dnode/mnode/impl/inc/mndInt.h @@ -59,6 +59,8 @@ typedef struct SMnodeLoad { int64_t compStorage; } SMnodeLoad; +typedef struct SQWorkerMgmt SQHandle; + typedef struct { const char *name; MndInitFp initFp; @@ -112,6 +114,7 @@ typedef struct SMnode { SSdb *pSdb; SMgmtWrapper *pWrapper; SArray *pSteps; + SQHandle *pQuery; SShowMgmt showMgmt; SProfileMgmt profileMgmt; STelemMgmt telemMgmt; @@ -119,7 +122,7 @@ typedef struct SMnode { SHashObj *infosMeta; SGrantInfo grant; MndMsgFp msgFp[TDMT_MAX]; - SMsgCb msgCb; + SMsgCb msgCb; } SMnode; void mndSetMsgHandle(SMnode *pMnode, tmsg_t msgType, MndMsgFp fp); diff --git a/source/dnode/mnode/impl/inc/mndQuery.h b/source/dnode/mnode/impl/inc/mndQuery.h new file mode 100644 index 0000000000..7fab80de77 --- /dev/null +++ b/source/dnode/mnode/impl/inc/mndQuery.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_MND_QUERY_H_ +#define _TD_MND_QUERY_H_ + +#include "mndInt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int32_t mndInitQuery(SMnode *pMnode); +void mndCleanupQuery(SMnode *pMnode); + + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_MND_QUERY_H_*/ diff --git a/source/dnode/mnode/impl/src/mndQuery.c b/source/dnode/mnode/impl/src/mndQuery.c new file mode 100644 index 0000000000..e93a0d9b17 --- /dev/null +++ b/source/dnode/mnode/impl/src/mndQuery.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "mndQuery.h" +#include "mndMnode.h" +#include "executor.h" +#include "qworker.h" + +int32_t mndProcessQueryMsg(SNodeMsg *pReq) { + mTrace("message in query queue is processing"); + SMnode *pMnode = pReq->pNode; + SReadHandle handle = {0}; + + switch (pReq->rpcMsg.msgType) { + case TDMT_VND_QUERY: + return qWorkerProcessQueryMsg(&handle, pMnode->pQuery, &pReq->rpcMsg); + case TDMT_VND_QUERY_CONTINUE: + return qWorkerProcessCQueryMsg(&handle, pMnode->pQuery, &pReq->rpcMsg); + default: + mError("unknown msg type:%d in query queue", pReq->rpcMsg.msgType); + return TSDB_CODE_VND_APP_ERROR; + } +} + +int32_t mndProcessFetchMsg(SNodeMsg *pReq) { + mTrace("message in fetch queue is processing"); + SMnode *pMnode = pReq->pNode; + + switch (pReq->rpcMsg.msgType) { + case TDMT_VND_FETCH: + return qWorkerProcessFetchMsg(pMnode, pMnode->pQuery, &pReq->rpcMsg); + case TDMT_VND_DROP_TASK: + return qWorkerProcessDropMsg(pMnode, pMnode->pQuery, &pReq->rpcMsg); + case TDMT_VND_QUERY_HEARTBEAT: + return qWorkerProcessHbMsg(pMnode, pMnode->pQuery, &pReq->rpcMsg); + default: + mError("unknown msg type:%d in fetch queue", pReq->rpcMsg.msgType); + return TSDB_CODE_VND_APP_ERROR; + } +} + +int32_t mndInitQuery(SMnode *pMnode) { + int32_t code = qWorkerInit(NODE_TYPE_MNODE, MND_VGID, NULL, (void **)&pMnode->pQuery, &pMnode->msgCb); + if (code) { + return code; + } + + mndSetMsgHandle(pMnode, TDMT_VND_QUERY, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_VND_QUERY_CONTINUE, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_VND_FETCH, mndProcessFetchMsg); + mndSetMsgHandle(pMnode, TDMT_VND_DROP_TASK, mndProcessFetchMsg); + mndSetMsgHandle(pMnode, TDMT_VND_QUERY_HEARTBEAT, mndProcessFetchMsg); + + return 0; +} + +void mndCleanupQuery(SMnode *pMnode) { qWorkerDestroy((void **)&pMnode->pQuery); } + diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index c4d389a379..754bed030b 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -39,6 +39,7 @@ #include "mndTrans.h" #include "mndUser.h" #include "mndVgroup.h" +#include "mndQuery.h" #define MQ_TIMER_MS 3000 #define TRNAS_TIMER_MS 6000 @@ -217,6 +218,7 @@ static int32_t mndInitSteps(SMnode *pMnode) { // if (mndAllocStep(pMnode, "mnode-timer", mndInitTimer, NULL) != 0) return -1; if (mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile) != 0) return -1; if (mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-query", mndInitQuery, mndCleanupQuery) != 0) return -1; if (mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync) != 0) return -1; if (mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem) != 0) return -1; if (mndAllocStep(pMnode, "mnode-timer", NULL, mndCleanupTimer) != 0) return -1; diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 9859d4a9b9..9b453d17bc 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1480,13 +1480,14 @@ static int32_t jsonToDatum(const SJson* pJson, void* pObj) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: { - pNode->datum.p = calloc(1, pNode->node.resType.bytes); + pNode->datum.p = calloc(1, pNode->node.resType.bytes + VARSTR_HEADER_SIZE + 1 + 100); if (NULL == pNode->datum.p) { code = TSDB_CODE_OUT_OF_MEMORY; break; } varDataSetLen(pNode->datum.p, pNode->node.resType.bytes); code = tjsonGetStringValue(pJson, jkValueDatum, varDataVal(pNode->datum.p)); + nodesDebug("varchar len:%d,string:%s", pNode->node.resType.bytes, varDataVal(pNode->datum.p)); break; } case TSDB_DATA_TYPE_JSON: diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index f82ce2c1b4..2761f3ef36 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -383,12 +383,13 @@ static EDealRes translateValue(STranslateContext* pCxt, SValueNode* pVal) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: { - pVal->datum.p = calloc(1, pVal->node.resType.bytes + VARSTR_HEADER_SIZE); + pVal->datum.p = calloc(1, pVal->node.resType.bytes + VARSTR_HEADER_SIZE + 1); if (NULL == pVal->datum.p) { return generateDealNodeErrMsg(pCxt, TSDB_CODE_OUT_OF_MEMORY); } varDataSetLen(pVal->datum.p, pVal->node.resType.bytes); - strcpy(varDataVal(pVal->datum.p), pVal->literal); + strncpy(varDataVal(pVal->datum.p), pVal->literal, pVal->node.resType.bytes); + parserDebug("varchar value:%s,len:%d", pVal->literal, pVal->node.resType.bytes); break; } case TSDB_DATA_TYPE_TIMESTAMP: { @@ -599,9 +600,9 @@ static int32_t toVgroupsInfo(SArray* pVgs, SVgroupsInfo** pVgsInfo) { static int32_t setSysTableVgroupList(STranslateContext* pCxt, SName* pName, SRealTableNode* pRealTable) { // todo release - // if (0 != strcmp(pRealTable->table.tableName, TSDB_INS_TABLE_USER_TABLES)) { - // return TSDB_CODE_SUCCESS; - // } + if (0 != strcmp(pRealTable->table.tableName, TSDB_INS_TABLE_USER_TABLES)) { + return TSDB_CODE_SUCCESS; + } int32_t code = TSDB_CODE_SUCCESS; SArray* vgroupList = NULL; @@ -613,9 +614,9 @@ static int32_t setSysTableVgroupList(STranslateContext* pCxt, SName* pName, SRea if (TSDB_CODE_SUCCESS == code) { // todo remove - if (NULL != vgroupList && taosArrayGetSize(vgroupList) > 0 && 0 != strcmp(pRealTable->table.tableName, TSDB_INS_TABLE_USER_TABLES)) { - taosArrayPopTailBatch(vgroupList, taosArrayGetSize(vgroupList) - 1); - } + //if (NULL != vgroupList && taosArrayGetSize(vgroupList) > 0 && 0 != strcmp(pRealTable->table.tableName, TSDB_INS_TABLE_USER_TABLES)) { + // taosArrayPopTailBatch(vgroupList, taosArrayGetSize(vgroupList) - 1); + //} code = toVgroupsInfo(vgroupList, &pRealTable->pVgroupList); } diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 4077c57f2c..c8ea98687b 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -307,11 +307,14 @@ static int32_t createSystemTableScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* vgroupInfoToNodeAddr(pScanLogicNode->pVgroupList->vgroups, &pSubplan->execNode); taosArrayPush(pCxt->pExecNodeList, &pSubplan->execNode); } else { - for (int32_t i = 0; i < pScanLogicNode->pVgroupList->numOfVgroups; ++i) { - SQueryNodeAddr addr; - vgroupInfoToNodeAddr(pScanLogicNode->pVgroupList->vgroups + i, &addr); - taosArrayPush(pCxt->pExecNodeList, &addr); - } + SQueryNodeAddr addr = { .nodeId = MND_VGID, .epSet = pCxt->pPlanCxt->mgmtEpSet }; + taosArrayPush(pCxt->pExecNodeList, &addr); + + //for (int32_t i = 0; i < pScanLogicNode->pVgroupList->numOfVgroups; ++i) { + // SQueryNodeAddr addr; + // vgroupInfoToNodeAddr(pScanLogicNode->pVgroupList->vgroups + i, &addr); + // taosArrayPush(pCxt->pExecNodeList, &addr); + //} } pScan->mgmtEpSet = pCxt->pPlanCxt->mgmtEpSet; tNameGetFullDbName(&pScanLogicNode->tableName, pSubplan->dbFName); From 77898958bb9c3d2181844198fef548da2f158b9d Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 14:26:17 +0800 Subject: [PATCH 46/59] feature/qnode --- source/dnode/mgmt/mnode/src/mmMsg.c | 1 + source/dnode/mgmt/mnode/src/mmWorker.c | 3 ++- source/libs/scheduler/src/scheduler.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/source/dnode/mgmt/mnode/src/mmMsg.c b/source/dnode/mgmt/mnode/src/mmMsg.c index f38bf3a65f..d45a6c54f3 100644 --- a/source/dnode/mgmt/mnode/src/mmMsg.c +++ b/source/dnode/mgmt/mnode/src/mmMsg.c @@ -160,5 +160,6 @@ void mmInitMsgHandles(SMgmtWrapper *pWrapper) { dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_CONTINUE, (NodeMsgFp)mmProcessReadMsg, MND_VGID); dndSetMsgHandle(pWrapper, TDMT_VND_FETCH, (NodeMsgFp)mmProcessReadMsg, MND_VGID); dndSetMsgHandle(pWrapper, TDMT_VND_DROP_TASK, (NodeMsgFp)mmProcessReadMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_HEARTBEAT, (NodeMsgFp)mmProcessReadMsg, MND_VGID); } diff --git a/source/dnode/mgmt/mnode/src/mmWorker.c b/source/dnode/mgmt/mnode/src/mmWorker.c index d6b150106d..ef0dc5923a 100644 --- a/source/dnode/mgmt/mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mnode/src/mmWorker.c @@ -92,8 +92,9 @@ int32_t mmPutMsgToReadQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpc) { int32_t mmStartWorker(SMnodeMgmt *pMgmt) { SSingleWorkerCfg cfg = {.minNum = 0, .maxNum = 1, .name = "mnode-read", .fp = (FItem)mmProcessQueue, .param = pMgmt}; + SSingleWorkerCfg readCfg = {.minNum = 2, .maxNum = 2, .name = "mnode-read", .fp = (FItem)mmProcessQueue, .param = pMgmt}; - if (tSingleWorkerInit(&pMgmt->readWorker, &cfg) != 0) { + if (tSingleWorkerInit(&pMgmt->readWorker, &readCfg) != 0) { dError("failed to start mnode-read worker since %s", terrstr()); return -1; } diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 46b27b0ac4..57d7131b8e 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -1626,8 +1626,9 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { SRpcCtx rpcCtx = {0}; SSchTrans trans = {0}; int32_t msgType = TDMT_VND_QUERY_HEARTBEAT; + + req.header.vgId = htonl(nodeEpId->nodeId); req.sId = schMgmt.sId; - req.header.vgId = nodeEpId->nodeId; memcpy(&req.epId, nodeEpId, sizeof(SQueryNodeEpId)); SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, nodeEpId, sizeof(SQueryNodeEpId)); From c428c9395e94f7784a20ad10e5f172094a38e6b1 Mon Sep 17 00:00:00 2001 From: Shengliang Date: Fri, 25 Mar 2022 14:52:24 +0800 Subject: [PATCH 47/59] handle except --- source/libs/transport/src/transCli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 3b8ea5858f..3a24d08a58 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -159,7 +159,7 @@ static void destroyThrdObj(SCliThrdObj* pThrd); int i = 0, sz = transQueueSize(&conn->cliMsgs); \ for (; i < sz; i++) { \ pMsg = transQueueGet(&conn->cliMsgs, i); \ - if (pMsg != NULL && (uint64_t)pMsg->ctx->ahandle == ahandle) { \ + if (pMsg != NULL && pMsg->ctx != NULL && (uint64_t)pMsg->ctx->ahandle == ahandle) { \ break; \ } \ } \ From 9addf717c9da4473a350862896b60ce6350fef49 Mon Sep 17 00:00:00 2001 From: Shengliang Date: Fri, 25 Mar 2022 14:55:58 +0800 Subject: [PATCH 48/59] handle except --- source/libs/transport/src/transSrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 787c538f2a..3950a42ef0 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -292,7 +292,7 @@ void uvOnSendCb(uv_write_t* req, int status) { } transQueuePop(&conn->srvMsgs); tfree(msg); - msg = (SSrvMsg*)transQueuePop(&conn->srvMsgs); + msg = (SSrvMsg*)transQueueGet(&conn->srvMsgs, 0); if (msg != NULL) { uvStartSendRespInternal(msg); } From aec400e15cf1a3985be2d44a13c86cee5c1a79c9 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 25 Mar 2022 15:12:45 +0800 Subject: [PATCH 49/59] handle except --- source/libs/transport/src/transCli.c | 28 ++++++++++++++-------------- source/libs/transport/src/transSrv.c | 12 +++++++----- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 3b8ea5858f..9af26f9d67 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -154,20 +154,20 @@ static void destroyThrdObj(SCliThrdObj* pThrd); } \ } while (0) -#define CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle) \ - do { \ - int i = 0, sz = transQueueSize(&conn->cliMsgs); \ - for (; i < sz; i++) { \ - pMsg = transQueueGet(&conn->cliMsgs, i); \ - if (pMsg != NULL && (uint64_t)pMsg->ctx->ahandle == ahandle) { \ - break; \ - } \ - } \ - if (i == sz) { \ - pMsg = NULL; \ - } else { \ - pMsg = transQueueRm(&conn->cliMsgs, i); \ - } \ +#define CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle) \ + do { \ + int i = 0, sz = transQueueSize(&conn->cliMsgs); \ + for (; i < sz; i++) { \ + pMsg = transQueueGet(&conn->cliMsgs, i); \ + if (pMsg != NULL && pMsg->ctx != NULL && (uint64_t)pMsg->ctx->ahandle == ahandle) { \ + break; \ + } \ + } \ + if (i == sz) { \ + pMsg = NULL; \ + } else { \ + pMsg = transQueueRm(&conn->cliMsgs, i); \ + } \ } while (0) #define CONN_GET_NEXT_SENDMSG(conn) \ do { \ diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 787c538f2a..6b0413c40e 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -644,6 +644,7 @@ static void uvDestroyConn(uv_handle_t* handle) { // free(conn); if (thrd->quit && QUEUE_IS_EMPTY(&thrd->conn)) { + tTrace("work thread quit"); uv_loop_close(thrd->loop); uv_stop(thrd->loop); } @@ -705,12 +706,12 @@ End: return NULL; } void uvHandleQuit(SSrvMsg* msg, SWorkThrdObj* thrd) { + thrd->quit = true; if (QUEUE_IS_EMPTY(&thrd->conn)) { uv_loop_close(thrd->loop); uv_stop(thrd->loop); } else { destroyAllConn(thrd); - thrd->quit = true; } free(msg); } @@ -773,15 +774,16 @@ void sendQuitToWorkThrd(SWorkThrdObj* pThrd) { void transCloseServer(void* arg) { // impl later SServerObj* srv = arg; - for (int i = 0; i < srv->numOfThreads; i++) { - sendQuitToWorkThrd(srv->pThreadObj[i]); - destroyWorkThrd(srv->pThreadObj[i]); - } tDebug("send quit msg to accept thread"); uv_async_send(srv->pAcceptAsync); taosThreadJoin(srv->thread, NULL); + for (int i = 0; i < srv->numOfThreads; i++) { + sendQuitToWorkThrd(srv->pThreadObj[i]); + destroyWorkThrd(srv->pThreadObj[i]); + } + free(srv->pThreadObj); free(srv->pAcceptAsync); free(srv->loop); From 8cfc194c1cca1055fea2d8a2504f4ead6bd3e5d2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 15:23:12 +0800 Subject: [PATCH 50/59] feature/scheduler --- source/libs/nodes/src/nodesCodeFuncs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 6e60c6cadc..11cae9a142 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1418,6 +1418,7 @@ static int32_t datumToJson(const void* pObj, SJson* pJson) { case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: code = tjsonAddStringToObject(pJson, jkValueDatum, varDataVal(pNode->datum.p)); + nodesDebug("!!!!!!!!tojson, value:%s", varDataVal(pNode->datum.p)); break; case TSDB_DATA_TYPE_JSON: case TSDB_DATA_TYPE_DECIMAL: From d47015d4ebe1a986c7f0431824a79944f1e505c2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 15:29:59 +0800 Subject: [PATCH 51/59] feature/scheduler --- source/libs/nodes/src/nodesCloneFuncs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 12cd6f1bc1..60692323f5 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -141,12 +141,12 @@ static SNode* valueNodeCopy(const SValueNode* pSrc, SValueNode* pDst) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: - pDst->datum.p = malloc(pSrc->node.resType.bytes + VARSTR_HEADER_SIZE); + pDst->datum.p = malloc(pSrc->node.resType.bytes + VARSTR_HEADER_SIZE + 1); if (NULL == pDst->datum.p) { nodesDestroyNode(pDst); return NULL; } - memcpy(pDst->datum.p, pSrc->datum.p, pSrc->node.resType.bytes + VARSTR_HEADER_SIZE); + memcpy(pDst->datum.p, pSrc->datum.p, pSrc->node.resType.bytes + VARSTR_HEADER_SIZE + 1); break; case TSDB_DATA_TYPE_JSON: case TSDB_DATA_TYPE_DECIMAL: From 0a0699e4f934ad08c357fc5fa7d85204830b8b56 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 15:32:16 +0800 Subject: [PATCH 52/59] feature/scheduler --- source/libs/nodes/src/nodesCodeFuncs.c | 9 +-------- source/libs/parser/src/parTranslater.c | 1 - 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 39a1b24e2e..eed97234cc 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -2430,11 +2430,4 @@ int32_t nodesStringToList(const char* pStr, SNodeList** pList) { if (NULL == pJson) { return TSDB_CODE_FAILED; } - int32_t code = jsonToNodeListImpl(pJson, pList); - if (TSDB_CODE_SUCCESS != code) { - nodesDestroyList(*pList); - terrno = code; - return code; - } - return TSDB_CODE_SUCCESS; -} + int32_t \ No newline at end of file diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index af4351500d..1998fbc626 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -404,7 +404,6 @@ static EDealRes translateValue(STranslateContext* pCxt, SValueNode* pVal) { } varDataSetLen(pVal->datum.p, pVal->node.resType.bytes); strncpy(varDataVal(pVal->datum.p), pVal->literal, pVal->node.resType.bytes); - parserDebug("!!!!!!!!!!!!value:%s,len:%d", pVal->literal, pVal->node.resType.bytes); break; } case TSDB_DATA_TYPE_TIMESTAMP: { From e18d7de960234b43dc356a243093b2ae115c72a5 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 15:38:04 +0800 Subject: [PATCH 53/59] feature/qnode --- include/common/tmsgcb.h | 2 +- source/dnode/mgmt/mnode/inc/mmInt.h | 5 ++++- source/dnode/mgmt/mnode/src/mmInt.c | 5 +++-- source/dnode/mgmt/mnode/src/mmMsg.c | 10 +++++----- source/dnode/mgmt/mnode/src/mmWorker.c | 19 +++++++++++++++++-- source/dnode/mnode/impl/src/mnode.c | 4 ++-- 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/include/common/tmsgcb.h b/include/common/tmsgcb.h index 54a145ff33..784ec6234b 100644 --- a/include/common/tmsgcb.h +++ b/include/common/tmsgcb.h @@ -25,7 +25,7 @@ extern "C" { typedef struct SRpcMsg SRpcMsg; typedef struct SEpSet SEpSet; typedef struct SMgmtWrapper SMgmtWrapper; -typedef enum { QUERY_QUEUE, FETCH_QUEUE, WRITE_QUEUE, APPLY_QUEUE, SYNC_QUEUE, QUEUE_MAX } EQueueType; +typedef enum { QUERY_QUEUE, FETCH_QUEUE, READ_QUEUE, WRITE_QUEUE, APPLY_QUEUE, SYNC_QUEUE, QUEUE_MAX } EQueueType; typedef int32_t (*PutToQueueFp)(SMgmtWrapper* pWrapper, SRpcMsg* pReq); typedef int32_t (*GetQueueSizeFp)(SMgmtWrapper* pWrapper, int32_t vgId, EQueueType qtype); diff --git a/source/dnode/mgmt/mnode/inc/mmInt.h b/source/dnode/mgmt/mnode/inc/mmInt.h index d57088474f..cd4585048b 100644 --- a/source/dnode/mgmt/mnode/inc/mmInt.h +++ b/source/dnode/mgmt/mnode/inc/mmInt.h @@ -28,6 +28,7 @@ typedef struct SMnodeMgmt { SDnode *pDnode; SMgmtWrapper *pWrapper; const char *path; + SSingleWorker queryWorker; SSingleWorker readWorker; SSingleWorker writeWorker; SSingleWorker syncWorker; @@ -57,11 +58,13 @@ void mmStopWorker(SMnodeMgmt *pMgmt); int32_t mmProcessWriteMsg(SMnodeMgmt *pMgmt, SNodeMsg *pMsg); int32_t mmProcessSyncMsg(SMnodeMgmt *pMgmt, SNodeMsg *pMsg); int32_t mmProcessReadMsg(SMnodeMgmt *pMgmt, SNodeMsg *pMsg); +int32_t mmProcessQueryMsg(SMnodeMgmt *pMgmt, SNodeMsg *pMsg); int32_t mmPutMsgToWriteQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpcMsg); int32_t mmPutMsgToReadQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpcMsg); +int32_t mmPutMsgToQueryQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpc); #ifdef __cplusplus } #endif -#endif /*_TD_DND_MNODE_INT_H_*/ \ No newline at end of file +#endif /*_TD_DND_MNODE_INT_H_*/ diff --git a/source/dnode/mgmt/mnode/src/mmInt.c b/source/dnode/mgmt/mnode/src/mmInt.c index 1f60007be1..591bc5aad7 100644 --- a/source/dnode/mgmt/mnode/src/mmInt.c +++ b/source/dnode/mgmt/mnode/src/mmInt.c @@ -45,7 +45,8 @@ static void mmInitOption(SMnodeMgmt *pMgmt, SMnodeOpt *pOption) { SMsgCb msgCb = {0}; msgCb.pWrapper = pMgmt->pWrapper; - msgCb.queueFps[QUERY_QUEUE] = mmPutMsgToReadQueue; + msgCb.queueFps[QUERY_QUEUE] = mmPutMsgToQueryQueue; + msgCb.queueFps[READ_QUEUE] = mmPutMsgToReadQueue; msgCb.queueFps[WRITE_QUEUE] = mmPutMsgToWriteQueue; msgCb.sendReqFp = dndSendReqToDnode; msgCb.sendMnodeReqFp = dndSendReqToMnode; @@ -258,4 +259,4 @@ int32_t mmMonitorMnodeInfo(SMgmtWrapper *pWrapper, SMonClusterInfo *pClusterInfo SMonGrantInfo *pGrantInfo) { SMnodeMgmt *pMgmt = pWrapper->pMgmt; return mndGetMonitorInfo(pMgmt->pMnode, pClusterInfo, pVgroupInfo, pGrantInfo); -} \ No newline at end of file +} diff --git a/source/dnode/mgmt/mnode/src/mmMsg.c b/source/dnode/mgmt/mnode/src/mmMsg.c index d45a6c54f3..d04077baf8 100644 --- a/source/dnode/mgmt/mnode/src/mmMsg.c +++ b/source/dnode/mgmt/mnode/src/mmMsg.c @@ -156,10 +156,10 @@ void mmInitMsgHandles(SMgmtWrapper *pWrapper) { dndSetMsgHandle(pWrapper, TDMT_VND_CREATE_SMA_RSP, (NodeMsgFp)mmProcessWriteMsg, VND_VGID); dndSetMsgHandle(pWrapper, TDMT_VND_DROP_SMA_RSP, (NodeMsgFp)mmProcessWriteMsg, VND_VGID); - dndSetMsgHandle(pWrapper, TDMT_VND_QUERY, (NodeMsgFp)mmProcessReadMsg, MND_VGID); - dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_CONTINUE, (NodeMsgFp)mmProcessReadMsg, MND_VGID); - dndSetMsgHandle(pWrapper, TDMT_VND_FETCH, (NodeMsgFp)mmProcessReadMsg, MND_VGID); - dndSetMsgHandle(pWrapper, TDMT_VND_DROP_TASK, (NodeMsgFp)mmProcessReadMsg, MND_VGID); - dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_HEARTBEAT, (NodeMsgFp)mmProcessReadMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_QUERY, (NodeMsgFp)mmProcessQueryMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_CONTINUE, (NodeMsgFp)mmProcessQueryMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_FETCH, (NodeMsgFp)mmProcessQueryMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_DROP_TASK, (NodeMsgFp)mmProcessQueryMsg, MND_VGID); + dndSetMsgHandle(pWrapper, TDMT_VND_QUERY_HEARTBEAT, (NodeMsgFp)mmProcessQueryMsg, MND_VGID); } diff --git a/source/dnode/mgmt/mnode/src/mmWorker.c b/source/dnode/mgmt/mnode/src/mmWorker.c index ef0dc5923a..1b408bbde6 100644 --- a/source/dnode/mgmt/mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mnode/src/mmWorker.c @@ -61,6 +61,10 @@ int32_t mmProcessReadMsg(SMnodeMgmt *pMgmt, SNodeMsg *pMsg) { return mmPutMsgToWorker(pMgmt, &pMgmt->readWorker, pMsg); } +int32_t mmProcessQueryMsg(SMnodeMgmt *pMgmt, SNodeMsg *pMsg) { + return mmPutMsgToWorker(pMgmt, &pMgmt->queryWorker, pMsg); +} + static int32_t mmPutRpcMsgToWorker(SMnodeMgmt *pMgmt, SSingleWorker *pWorker, SRpcMsg *pRpc) { SNodeMsg *pMsg = taosAllocateQitem(sizeof(SNodeMsg)); if (pMsg == NULL) { @@ -90,11 +94,21 @@ int32_t mmPutMsgToReadQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpc) { return mmPutRpcMsgToWorker(pMgmt, &pMgmt->readWorker, pRpc); } +int32_t mmPutMsgToQueryQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpc) { + SMnodeMgmt *pMgmt = pWrapper->pMgmt; + return mmPutRpcMsgToWorker(pMgmt, &pMgmt->queryWorker, pRpc); +} + + int32_t mmStartWorker(SMnodeMgmt *pMgmt) { SSingleWorkerCfg cfg = {.minNum = 0, .maxNum = 1, .name = "mnode-read", .fp = (FItem)mmProcessQueue, .param = pMgmt}; - SSingleWorkerCfg readCfg = {.minNum = 2, .maxNum = 2, .name = "mnode-read", .fp = (FItem)mmProcessQueue, .param = pMgmt}; - if (tSingleWorkerInit(&pMgmt->readWorker, &readCfg) != 0) { + if (tSingleWorkerInit(&pMgmt->queryWorker, &cfg) != 0) { + dError("failed to start mnode-query worker since %s", terrstr()); + return -1; + } + + if (tSingleWorkerInit(&pMgmt->readWorker, &cfg) != 0) { dError("failed to start mnode-read worker since %s", terrstr()); return -1; } @@ -115,6 +129,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { void mmStopWorker(SMnodeMgmt *pMgmt) { tSingleWorkerCleanup(&pMgmt->readWorker); + tSingleWorkerCleanup(&pMgmt->queryWorker); tSingleWorkerCleanup(&pMgmt->writeWorker); tSingleWorkerCleanup(&pMgmt->syncWorker); dDebug("mnode workers are closed"); diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 754bed030b..2a58968511 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -80,7 +80,7 @@ static void mndCalMqRebalance(void *param, void *tmrId) { .pCont = pReq, .contLen = contLen, }; - tmsgPutToQueue(&pMnode->msgCb, QUERY_QUEUE, &rpcMsg); + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } taosTmrReset(mndCalMqRebalance, MQ_TIMER_MS, pMnode, pMnode->timer, &pMnode->mqTimer); @@ -92,7 +92,7 @@ static void mndPullupTelem(void *param, void *tmrId) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen}; - tmsgPutToQueue(&pMnode->msgCb, QUERY_QUEUE, &rpcMsg); + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } taosTmrReset(mndPullupTelem, TELEM_TIMER_MS, pMnode, pMnode->timer, &pMnode->telemTimer); From f9644f2611a591021da7d0b705666de16cfbdcb2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 15:42:09 +0800 Subject: [PATCH 54/59] feature/qnode --- source/libs/nodes/src/nodesCodeFuncs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 606bef6b89..b648e18809 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1418,7 +1418,6 @@ static int32_t datumToJson(const void* pObj, SJson* pJson) { case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: code = tjsonAddStringToObject(pJson, jkValueDatum, varDataVal(pNode->datum.p)); - nodesDebug("!!!!!!!!tojson, value:%s", varDataVal(pNode->datum.p)); break; case TSDB_DATA_TYPE_JSON: case TSDB_DATA_TYPE_DECIMAL: @@ -1488,7 +1487,6 @@ static int32_t jsonToDatum(const SJson* pJson, void* pObj) { } varDataSetLen(pNode->datum.p, pNode->node.resType.bytes); code = tjsonGetStringValue(pJson, jkValueDatum, varDataVal(pNode->datum.p)); - nodesDebug("varchar len:%d,string:%s", pNode->node.resType.bytes, varDataVal(pNode->datum.p)); break; } case TSDB_DATA_TYPE_JSON: @@ -2430,4 +2428,11 @@ int32_t nodesStringToList(const char* pStr, SNodeList** pList) { if (NULL == pJson) { return TSDB_CODE_FAILED; } - int32_t \ No newline at end of file + int32_t code = jsonToNodeListImpl(pJson, pList); + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyList(*pList); + terrno = code; + return code; + } + return TSDB_CODE_SUCCESS; +} From 64f0fe2f8282fa5be90698a3492ca26c6b2991af Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 15:43:27 +0800 Subject: [PATCH 55/59] feature/scheduler --- source/libs/nodes/src/nodesCodeFuncs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index eed97234cc..b648e18809 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1418,7 +1418,6 @@ static int32_t datumToJson(const void* pObj, SJson* pJson) { case TSDB_DATA_TYPE_VARCHAR: case TSDB_DATA_TYPE_VARBINARY: code = tjsonAddStringToObject(pJson, jkValueDatum, varDataVal(pNode->datum.p)); - nodesDebug("!!!!!!!!tojson, value:%s", varDataVal(pNode->datum.p)); break; case TSDB_DATA_TYPE_JSON: case TSDB_DATA_TYPE_DECIMAL: @@ -1488,7 +1487,6 @@ static int32_t jsonToDatum(const SJson* pJson, void* pObj) { } varDataSetLen(pNode->datum.p, pNode->node.resType.bytes); code = tjsonGetStringValue(pJson, jkValueDatum, varDataVal(pNode->datum.p)); - nodesDebug("!!!!!!!!!len:%d,string:%s", pNode->node.resType.bytes, varDataVal(pNode->datum.p)); break; } case TSDB_DATA_TYPE_JSON: @@ -2430,4 +2428,11 @@ int32_t nodesStringToList(const char* pStr, SNodeList** pList) { if (NULL == pJson) { return TSDB_CODE_FAILED; } - int32_t \ No newline at end of file + int32_t code = jsonToNodeListImpl(pJson, pList); + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyList(*pList); + terrno = code; + return code; + } + return TSDB_CODE_SUCCESS; +} From aa518a92134dca4cce46bd51c25d44f7684ed074 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 25 Mar 2022 16:07:07 +0800 Subject: [PATCH 56/59] feature/qnode --- source/libs/transport/src/transSrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 3950a42ef0..787c538f2a 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -292,7 +292,7 @@ void uvOnSendCb(uv_write_t* req, int status) { } transQueuePop(&conn->srvMsgs); tfree(msg); - msg = (SSrvMsg*)transQueueGet(&conn->srvMsgs, 0); + msg = (SSrvMsg*)transQueuePop(&conn->srvMsgs); if (msg != NULL) { uvStartSendRespInternal(msg); } From 6591f1f9b5d963beef9add8532fc552a0ff27491 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 25 Mar 2022 17:53:32 +0800 Subject: [PATCH 57/59] handle except --- source/libs/transport/src/transSrv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/transport/src/transSrv.c b/source/libs/transport/src/transSrv.c index 6b0413c40e..d69ad6c983 100644 --- a/source/libs/transport/src/transSrv.c +++ b/source/libs/transport/src/transSrv.c @@ -292,7 +292,8 @@ void uvOnSendCb(uv_write_t* req, int status) { } transQueuePop(&conn->srvMsgs); tfree(msg); - msg = (SSrvMsg*)transQueuePop(&conn->srvMsgs); + + msg = (SSrvMsg*)transQueueGet(&conn->srvMsgs, 0); if (msg != NULL) { uvStartSendRespInternal(msg); } From 4188934040100f7b9a514fcb71fba2a2124016fd Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Sat, 26 Mar 2022 09:12:47 +0800 Subject: [PATCH 58/59] feature/qnode --- include/util/tdef.h | 2 ++ source/dnode/mgmt/mnode/src/mmWorker.c | 27 ++++++++++++++++++++- source/dnode/mnode/impl/src/mndInfoSchema.c | 1 + source/libs/executor/src/executorimpl.c | 1 + tests/script/tsim/db/error1.sim | 8 +----- 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index fd8194e63e..655deb4625 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -109,6 +109,8 @@ extern const int32_t TYPE_BYTES[15]; #define TSDB_INS_TABLE_USER_USERS "user_users" #define TSDB_INS_TABLE_VGROUPS "vgroups" +#define TSDB_INS_USER_STABLES_DBNAME_COLID 2 + #define TSDB_TICK_PER_SECOND(precision) \ ((int64_t)((precision) == TSDB_TIME_PRECISION_MILLI ? 1e3L \ : ((precision) == TSDB_TIME_PRECISION_MICRO ? 1e6L : 1e9L))) diff --git a/source/dnode/mgmt/mnode/src/mmWorker.c b/source/dnode/mgmt/mnode/src/mmWorker.c index 1b408bbde6..27489b45d0 100644 --- a/source/dnode/mgmt/mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mnode/src/mmWorker.c @@ -44,6 +44,30 @@ static void mmProcessQueue(SQueueInfo *pInfo, SNodeMsg *pMsg) { taosFreeQitem(pMsg); } +static void mmProcessQueryQueue(SQueueInfo *pInfo, SNodeMsg *pMsg) { + SMnodeMgmt *pMgmt = pInfo->ahandle; + + dTrace("msg:%p, will be processed in mnode queue", pMsg); + SRpcMsg *pRpc = &pMsg->rpcMsg; + int32_t code = -1; + + pMsg->pNode = pMgmt->pMnode; + code = mndProcessMsg(pMsg); + + if (pRpc->msgType & 1U) { + if (pRpc->handle == NULL) return; + if (code != 0) { + SRpcMsg rsp = {.handle = pRpc->handle, .code = code, .ahandle = pRpc->ahandle}; + dndSendRsp(pMgmt->pWrapper, &rsp); + } + } + + dTrace("msg:%p, is freed, result:0x%04x:%s", pMsg, code & 0XFFFF, tstrerror(code)); + rpcFreeCont(pRpc->pCont); + taosFreeQitem(pMsg); +} + + static int32_t mmPutMsgToWorker(SMnodeMgmt *pMgmt, SSingleWorker *pWorker, SNodeMsg *pMsg) { dTrace("msg:%p, put into worker %s", pMsg, pWorker->name); return taosWriteQitem(pWorker->queue, pMsg); @@ -102,8 +126,9 @@ int32_t mmPutMsgToQueryQueue(SMgmtWrapper *pWrapper, SRpcMsg *pRpc) { int32_t mmStartWorker(SMnodeMgmt *pMgmt) { SSingleWorkerCfg cfg = {.minNum = 0, .maxNum = 1, .name = "mnode-read", .fp = (FItem)mmProcessQueue, .param = pMgmt}; + SSingleWorkerCfg queryCfg = {.minNum = 0, .maxNum = 1, .name = "mnode-query", .fp = (FItem)mmProcessQueryQueue, .param = pMgmt}; - if (tSingleWorkerInit(&pMgmt->queryWorker, &cfg) != 0) { + if (tSingleWorkerInit(&pMgmt->queryWorker, &queryCfg) != 0) { dError("failed to start mnode-query worker since %s", terrstr()); return -1; } diff --git a/source/dnode/mnode/impl/src/mndInfoSchema.c b/source/dnode/mnode/impl/src/mndInfoSchema.c index 964c4ab424..aa110ff8b5 100644 --- a/source/dnode/mnode/impl/src/mndInfoSchema.c +++ b/source/dnode/mnode/impl/src/mndInfoSchema.c @@ -19,6 +19,7 @@ #define SYSTABLE_SCH_TABLE_NAME_LEN ((TSDB_TABLE_NAME_LEN - 1) + VARSTR_HEADER_SIZE) #define SYSTABLE_SCH_DB_NAME_LEN ((TSDB_DB_NAME_LEN - 1) + VARSTR_HEADER_SIZE) +//!!!! Note: only APPEND columns in below tables, NO insert !!!! static const SInfosTableSchema dnodesSchema[] = {{.name = "id", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT}, {.name = "endpoint", .bytes = TSDB_EP_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}, {.name = "vnodes", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT}, diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 177d09be76..dd47beb165 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -5612,6 +5612,7 @@ static SSDataBlock* doSysTableScan(SOperatorInfo *pOperator, bool* newgroup) { pInfo->req.type = pInfo->type; strncpy(pInfo->req.tb, tNameGetTableName(&pInfo->name), tListLen(pInfo->req.tb)); + getFullDBNameFromCondition(pInfo->pCondition, pInfo->req.db)); int32_t contLen = tSerializeSRetrieveTableReq(NULL, 0, &pInfo->req); char* buf1 = calloc(1, contLen); diff --git a/tests/script/tsim/db/error1.sim b/tests/script/tsim/db/error1.sim index 09f0149a5b..73d9e6fab6 100644 --- a/tests/script/tsim/db/error1.sim +++ b/tests/script/tsim/db/error1.sim @@ -16,17 +16,11 @@ create1: return -1 endi -# todo remove -sql create database useless_db - sql show dnodes if $data4_2 != ready then goto create1 endi -# todo remove -sql drop database useless_db - print ========== stop dnode2 system sh/exec.sh -n dnode2 -s stop -x SIGKILL @@ -103,4 +97,4 @@ if $data03 != 0 then endi system sh/exec.sh -n dnode1 -s stop -x SIGINT -system sh/exec.sh -n dnode2 -s stop -x SIGINT \ No newline at end of file +system sh/exec.sh -n dnode2 -s stop -x SIGINT From 96eee800635a1a66e584253f7bb6567f77ec98bc Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Sat, 26 Mar 2022 15:03:22 +0800 Subject: [PATCH 59/59] feature/qnode --- include/libs/nodes/plannodes.h | 3 + include/libs/parser/parser.h | 1 + include/libs/planner/planner.h | 1 + include/libs/transport/trpc.h | 2 +- source/client/src/clientImpl.c | 3 +- source/dnode/mnode/impl/src/mndShow.c | 7 ++ source/dnode/mnode/impl/src/mndStb.c | 2 +- source/libs/executor/inc/executorimpl.h | 4 +- source/libs/executor/src/executorimpl.c | 90 +++++++++++++++++-- source/libs/nodes/src/nodesCloneFuncs.c | 1 + source/libs/nodes/src/nodesCodeFuncs.c | 14 +++ source/libs/parser/src/parTranslater.c | 1 + source/libs/planner/src/planLogicCreater.c | 1 + source/libs/planner/src/planPhysiCreater.c | 8 +- source/libs/qworker/src/qworker.c | 2 +- source/libs/scheduler/src/scheduler.c | 73 +++++++-------- source/libs/scheduler/test/schedulerTests.cpp | 2 +- tests/script/tsim/db/basic1.sim | 9 +- tests/script/tsim/db/basic6.sim | 11 ++- tests/script/tsim/db/error1.sim | 1 + tests/script/tsim/dnode/basic1.sim | 8 +- tests/script/tsim/tmq/basic.sim | 6 -- tests/script/tsim/user/basic1.sim | 8 +- 23 files changed, 170 insertions(+), 88 deletions(-) diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 794e0ca85a..4d025eb9b7 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -48,6 +48,7 @@ typedef struct SScanLogicNode { uint8_t scanFlag; // denotes reversed scan of data or not STimeWindow scanRange; SName tableName; + bool showRewrite; } SScanLogicNode; typedef struct SJoinLogicNode { @@ -171,6 +172,8 @@ typedef SScanPhysiNode SStreamScanPhysiNode; typedef struct SSystemTableScanPhysiNode { SScanPhysiNode scan; SEpSet mgmtEpSet; + bool showRewrite; + int32_t accountId; } SSystemTableScanPhysiNode; typedef struct STableScanPhysiNode { diff --git a/include/libs/parser/parser.h b/include/libs/parser/parser.h index 2254298e5c..0747534721 100644 --- a/include/libs/parser/parser.h +++ b/include/libs/parser/parser.h @@ -54,6 +54,7 @@ typedef struct SQuery { int32_t msgType; SArray* pDbList; SArray* pTableList; + bool showRewrite; } SQuery; int32_t qParseQuerySql(SParseContext* pCxt, SQuery** pQuery); diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index 38b30ec01e..8db78fccf5 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -29,6 +29,7 @@ typedef struct SPlanContext { SNode* pAstRoot; bool topicQuery; bool streamQuery; + bool showRewrite; } SPlanContext; // Create the physical plan for the query, according to the AST. diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index e5286e65f2..8125de7647 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -90,7 +90,7 @@ typedef struct { int32_t msgType; void *val; int32_t (*clone)(void *src, void **dst); - void (*free)(void *arg); + void (*freeFunc)(const void *arg); } SRpcBrokenlinkVal; typedef struct { diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index c99c7050dc..6e65a4267f 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -199,7 +199,8 @@ int32_t getPlan(SRequestObj* pRequest, SQuery* pQuery, SQueryPlan** pPlan, SArra .queryId = pRequest->requestId, .acctId = pRequest->pTscObj->acctId, .mgmtEpSet = getEpSet_s(&pRequest->pTscObj->pAppInfo->mgmtEp), - .pAstRoot = pQuery->pRoot + .pAstRoot = pQuery->pRoot, + .showRewrite = pQuery->showRewrite }; int32_t code = qCreateQueryPlan(&cxt, pPlan, pNodeList); if (code != 0) { diff --git a/source/dnode/mnode/impl/src/mndShow.c b/source/dnode/mnode/impl/src/mndShow.c index a830ee7104..dff918f135 100644 --- a/source/dnode/mnode/impl/src/mndShow.c +++ b/source/dnode/mnode/impl/src/mndShow.c @@ -357,6 +357,13 @@ static int32_t mndProcessRetrieveSysTableReq(SNodeMsg *pReq) { // if free flag is set, client wants to clean the resources if ((retrieveReq.free & TSDB_QUERY_TYPE_FREE_RESOURCE) != TSDB_QUERY_TYPE_FREE_RESOURCE) { rowsRead = (*retrieveFp)(pReq, (SShowObj*) pShow, pRsp->data, rowsToRead); + if (rowsRead < 0) { + terrno = rowsRead; + rpcFreeCont(pRsp); + mDebug("show:0x%" PRIx64 ", retrieve completed", pShow->id); + mndReleaseShowObj((SShowObj*) pShow, true); + return -1; + } } mDebug("show:0x%" PRIx64 ", stop retrieve data, rowsRead:%d rowsToRead:%d", pShow->id, rowsRead, rowsToRead); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index acf45a31d9..fdd03be710 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -1613,7 +1613,7 @@ static int32_t mndRetrieveStb(SNodeMsg *pReq, SShowObj *pShow, char *data, int32 SDbObj* pDb = NULL; if (strlen(pShow->db) > 0) { pDb = mndAcquireDb(pMnode, pShow->db); - if (pDb == NULL) return 0; + if (pDb == NULL) return terrno; } while (numOfRows < rows) { diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index c582873315..ead830394e 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -449,6 +449,8 @@ typedef struct SSysTableScanInfo { SEpSet epSet; tsem_t ready; + int32_t accountId; + bool showRewrite; SNode* pCondition; // db_name filter condition, to discard data that are not in current database void *pCur; // cursor for iterate the local table meta store. SArray *scanCols; // SArray scan column id list @@ -655,7 +657,7 @@ SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SExprInfo* p SOperatorInfo* createOrderOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SArray* pOrderVal, SExecTaskInfo* pTaskInfo); SOperatorInfo* createSortedMergeOperatorInfo(SOperatorInfo** downstream, int32_t numOfDownstream, SExprInfo* pExprInfo, int32_t num, SArray* pOrderVal, SArray* pGroupInfo, SExecTaskInfo* pTaskInfo); SOperatorInfo* createSysTableScanOperatorInfo(void* pSysTableReadHandle, SSDataBlock* pResBlock, const SName* pName, - SNode* pCondition, SEpSet epset, SArray* colList, SExecTaskInfo* pTaskInfo); + SNode* pCondition, SEpSet epset, SArray* colList, SExecTaskInfo* pTaskInfo, bool showRewrite, int32_t accountId); SOperatorInfo* createLimitOperatorInfo(SOperatorInfo* downstream, SLimit* pLimit, SExecTaskInfo* pTaskInfo); SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResBlock, SInterval* pInterval, diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 2ce4549776..4f7f3d2d7e 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -5489,7 +5489,8 @@ SOperatorInfo* createStreamScanOperatorInfo(void *streamReadHandle, SSDataBlock* } static int32_t loadSysTableContentCb(void* param, const SDataBuf* pMsg, int32_t code) { - SSysTableScanInfo* pScanResInfo = (SSysTableScanInfo*) param; + SOperatorInfo* operator = (SOperatorInfo *)param; + SSysTableScanInfo* pScanResInfo = (SSysTableScanInfo *)operator->info; if (TSDB_CODE_SUCCESS == code) { pScanResInfo->pRsp = pMsg->pData; @@ -5498,6 +5499,8 @@ static int32_t loadSysTableContentCb(void* param, const SDataBuf* pMsg, int32_t pRsp->useconds = htobe64(pRsp->useconds); pRsp->handle = htobe64(pRsp->handle); pRsp->compLen = htonl(pRsp->compLen); + } else { + operator->pTaskInfo->code = code; } tsem_post(&pScanResInfo->ready); @@ -5544,6 +5547,64 @@ static SSDataBlock* doFilterResult(SSysTableScanInfo* pInfo) { return pInfo->pRes->info.rows == 0? NULL:pInfo->pRes; } +EDealRes getDBNameFromConditionWalker(SNode* pNode, void* pContext) { + int32_t code = TSDB_CODE_SUCCESS; + ENodeType nType = nodeType(pNode); + + switch (nType) { + case QUERY_NODE_OPERATOR: { + SOperatorNode *node = (SOperatorNode *)pNode; + + if (OP_TYPE_EQUAL == node->opType) { + *(int32_t *)pContext = 1; + return DEAL_RES_CONTINUE; + } + + *(int32_t *)pContext = 0; + + return DEAL_RES_IGNORE_CHILD; + } + case QUERY_NODE_COLUMN: { + if (1 != *(int32_t *)pContext) { + return DEAL_RES_CONTINUE; + } + + SColumnNode *node = (SColumnNode *)pNode; + if (TSDB_INS_USER_STABLES_DBNAME_COLID == node->colId) { + *(int32_t *)pContext = 2; + return DEAL_RES_CONTINUE; + } + + *(int32_t *)pContext = 0; + return DEAL_RES_CONTINUE; + } + case QUERY_NODE_VALUE: { + if (2 != *(int32_t *)pContext) { + return DEAL_RES_CONTINUE; + } + + SValueNode *node = (SValueNode *)pNode; + char *dbName = nodesGetValueFromNode(node); + strncpy(pContext, varDataVal(dbName), varDataLen(dbName)); + *((char *)pContext + varDataLen(dbName)) = 0; + return DEAL_RES_ERROR; // stop walk + } + default: + break; + } + + return DEAL_RES_CONTINUE; +} + + +void getDBNameFromCondition(SNode *pCondition, char *dbName) { + if (NULL == pCondition) { + return; + } + + nodesWalkNode(pCondition, getDBNameFromConditionWalker, dbName); +} + static SSDataBlock* doSysTableScan(SOperatorInfo *pOperator, bool* newgroup) { // build message and send to mnode to fetch the content of system tables. SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; @@ -5600,7 +5661,11 @@ static SSDataBlock* doSysTableScan(SOperatorInfo *pOperator, bool* newgroup) { pInfo->req.type = pInfo->type; strncpy(pInfo->req.tb, tNameGetTableName(&pInfo->name), tListLen(pInfo->req.tb)); - getFullDBNameFromCondition(pInfo->pCondition, pInfo->req.db)); + if (pInfo->showRewrite) { + char dbName[TSDB_DB_NAME_LEN] = {0}; + getDBNameFromCondition(pInfo->pCondition, dbName); + sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); + } int32_t contLen = tSerializeSRetrieveTableReq(NULL, 0, &pInfo->req); char* buf1 = taosMemoryCalloc(1, contLen); @@ -5614,7 +5679,7 @@ static SSDataBlock* doSysTableScan(SOperatorInfo *pOperator, bool* newgroup) { return NULL; } - pMsgSendInfo->param = pInfo; + pMsgSendInfo->param = pOperator; pMsgSendInfo->msgInfo.pData = buf1; pMsgSendInfo->msgInfo.len = contLen; pMsgSendInfo->msgType = TDMT_MND_SYSTABLE_RETRIEVE; @@ -5624,6 +5689,10 @@ static SSDataBlock* doSysTableScan(SOperatorInfo *pOperator, bool* newgroup) { int32_t code = asyncSendMsgToServer(pInfo->pTransporter, &pInfo->epSet, &transporterId, pMsgSendInfo); tsem_wait(&pInfo->ready); + if (pTaskInfo->code) { + return NULL; + } + SRetrieveMetaTableRsp* pRsp = pInfo->pRsp; pInfo->req.showId = pRsp->handle; @@ -5645,7 +5714,7 @@ static SSDataBlock* doSysTableScan(SOperatorInfo *pOperator, bool* newgroup) { } SOperatorInfo* createSysTableScanOperatorInfo(void* pSysTableReadHandle, SSDataBlock* pResBlock, const SName* pName, - SNode* pCondition, SEpSet epset, SArray* colList, SExecTaskInfo* pTaskInfo) { + SNode* pCondition, SEpSet epset, SArray* colList, SExecTaskInfo* pTaskInfo, bool showRewrite, int32_t accountId) { SSysTableScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SSysTableScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -5655,10 +5724,12 @@ SOperatorInfo* createSysTableScanOperatorInfo(void* pSysTableReadHandle, SSDataB return NULL; } - pInfo->pRes = pResBlock; - pInfo->capacity = 4096; - pInfo->pCondition = pCondition; - pInfo->scanCols = colList; + pInfo->accountId = accountId; + pInfo->showRewrite = showRewrite; + pInfo->pRes = pResBlock; + pInfo->capacity = 4096; + pInfo->pCondition = pCondition; + pInfo->scanCols = colList; // TODO remove it int32_t tableType = 0; @@ -8531,7 +8602,8 @@ SOperatorInfo* doCreateOperatorTreeNode(SPhysiNode* pPhyNode, SExecTaskInfo* pTa SArray* colList = extractScanColumnId(pScanNode->pScanCols); SOperatorInfo* pOperator = createSysTableScanOperatorInfo(pHandle->meta, pResBlock, &pScanNode->tableName, - pScanNode->node.pConditions, pSysScanPhyNode->mgmtEpSet, colList, pTaskInfo); + pScanNode->node.pConditions, pSysScanPhyNode->mgmtEpSet, + colList, pTaskInfo, pSysScanPhyNode->showRewrite, pSysScanPhyNode->accountId); return pOperator; } else { ASSERT(0); diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 078f3ab3e6..a9660c8573 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -237,6 +237,7 @@ static SNode* logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { COPY_SCALAR_FIELD(scanFlag); COPY_SCALAR_FIELD(scanRange); COPY_SCALAR_FIELD(tableName); + COPY_SCALAR_FIELD(showRewrite); return (SNode*)pDst; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 1acc3c51c5..d7caf6e511 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -764,6 +764,8 @@ static int32_t jsonToEpSet(const SJson* pJson, void* pObj) { } static const char* jkSysTableScanPhysiPlanMnodeEpSet = "MnodeEpSet"; +static const char* jkSysTableScanPhysiPlanShowRewrite = "ShowRewrite"; +static const char* jkSysTableScanPhysiPlanAccountId = "AccountId"; static int32_t physiSysTableScanNodeToJson(const void* pObj, SJson* pJson) { const SSystemTableScanPhysiNode* pNode = (const SSystemTableScanPhysiNode*)pObj; @@ -772,6 +774,12 @@ static int32_t physiSysTableScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddObject(pJson, jkSysTableScanPhysiPlanMnodeEpSet, epSetToJson, &pNode->mgmtEpSet); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddBoolToObject(pJson, jkSysTableScanPhysiPlanShowRewrite, pNode->showRewrite); + } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkSysTableScanPhysiPlanAccountId, pNode->accountId); + } return code; } @@ -783,6 +791,12 @@ static int32_t jsonToPhysiSysTableScanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonToObject(pJson, jkSysTableScanPhysiPlanMnodeEpSet, jsonToEpSet, &pNode->mgmtEpSet); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetBoolValue(pJson, jkSysTableScanPhysiPlanShowRewrite, &pNode->showRewrite); + } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetNumberValue(pJson, jkSysTableScanPhysiPlanAccountId, pNode->accountId); + } return code; } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 2f722bb82b..1c721f3caf 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -1962,6 +1962,7 @@ static int32_t rewriteShow(STranslateContext* pCxt, SQuery* pQuery) { code = createShowCondition((SShowStmt*)pQuery->pRoot, pStmt); } if (TSDB_CODE_SUCCESS == code) { + pQuery->showRewrite = true; nodesDestroyNode(pQuery->pRoot); pQuery->pRoot = (SNode*)pStmt; } diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 84fa52a070..6ea476a334 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -161,6 +161,7 @@ static int32_t createScanLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect pScan->tableName.acctId = pCxt->pPlanCxt->acctId; strcpy(pScan->tableName.dbname, pRealTable->table.dbName); strcpy(pScan->tableName.tname, pRealTable->table.tableName); + pScan->showRewrite = pCxt->pPlanCxt->showRewrite; // set columns to scan SNodeList* pCols = NULL; diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 4b000da4e5..d8d090d280 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -331,18 +331,14 @@ static int32_t createSystemTableScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* return TSDB_CODE_OUT_OF_MEMORY; } + pScan->showRewrite = pScanLogicNode->showRewrite; + pScan->accountId = pCxt->pPlanCxt->acctId; if (0 == strcmp(pScanLogicNode->tableName.tname, TSDB_INS_TABLE_USER_TABLES)) { vgroupInfoToNodeAddr(pScanLogicNode->pVgroupList->vgroups, &pSubplan->execNode); taosArrayPush(pCxt->pExecNodeList, &pSubplan->execNode); } else { SQueryNodeAddr addr = { .nodeId = MND_VGID, .epSet = pCxt->pPlanCxt->mgmtEpSet }; taosArrayPush(pCxt->pExecNodeList, &addr); - - //for (int32_t i = 0; i < pScanLogicNode->pVgroupList->numOfVgroups; ++i) { - // SQueryNodeAddr addr; - // vgroupInfoToNodeAddr(pScanLogicNode->pVgroupList->vgroups + i, &addr); - // taosArrayPush(pCxt->pExecNodeList, &addr); - //} } pScan->mgmtEpSet = pCxt->pPlanCxt->mgmtEpSet; tNameGetFullDbName(&pScanLogicNode->tableName, pSubplan->dbFName); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index e7681116e0..70ca0f736b 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -917,7 +917,7 @@ _return: qwReleaseTaskCtx(mgmt, ctx); } - if (TSDB_CODE_SUCCESS == code && readyConnection) { + if (readyConnection) { qwBuildAndSendReadyRsp(readyConnection, code); QW_TASK_DLOG("ready msg rsped, handle:%p, code:%x - %s", readyConnection->handle, code, tstrerror(code)); } diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 38196f485f..5af13d97ca 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -74,15 +74,15 @@ void schFreeRpcCtx(SRpcCtx *pCtx) { while (pIter) { SRpcCtxVal *ctxVal = (SRpcCtxVal *)pIter; - (*ctxVal->free)(ctxVal->val); + (*ctxVal->freeFunc)(ctxVal->val); pIter = taosHashIterate(pCtx->args, pIter); } taosHashCleanup(pCtx->args); - if (pCtx->brokenVal.free) { - (*pCtx->brokenVal.free)(pCtx->brokenVal.val); + if (pCtx->brokenVal.freeFunc) { + (*pCtx->brokenVal.freeFunc)(pCtx->brokenVal.val); } } @@ -1254,18 +1254,18 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { return TSDB_CODE_SUCCESS; } -void schFreeRpcCtxVal(void *arg) { +void schFreeRpcCtxVal(const void *arg) { if (NULL == arg) { return; } - SMsgSendInfo* pMsgSendInfo = arg; - tfree(pMsgSendInfo->param); - tfree(pMsgSendInfo); + SMsgSendInfo* pMsgSendInfo = (SMsgSendInfo *)arg; + taosMemoryFreeClear(pMsgSendInfo->param); + taosMemoryFreeClear(pMsgSendInfo); } int32_t schMakeTaskCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { - SSchTaskCallbackParam *param = calloc(1, sizeof(SSchTaskCallbackParam)); + SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); if (NULL == param) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1282,7 +1282,7 @@ int32_t schMakeTaskCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) } int32_t schMakeHbCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { - SSchHbCallbackParam *param = calloc(1, sizeof(SSchHbCallbackParam)); + SSchHbCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchHbCallbackParam)); if (NULL == param) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1306,7 +1306,7 @@ int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal * int32_t code = 0; SMsgSendInfo* pMsgSendInfo = NULL; - pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); if (NULL == pMsgSendInfo) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1327,14 +1327,14 @@ int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal * brokenVal->msgType = msgType; brokenVal->val = pMsgSendInfo; brokenVal->clone = schCloneSMsgSendInfo; - brokenVal->free = schFreeRpcCtxVal; + brokenVal->freeFunc = schFreeRpcCtxVal; return TSDB_CODE_SUCCESS; _return: - tfree(pMsgSendInfo->param); - tfree(pMsgSendInfo); + taosMemoryFreeClear(pMsgSendInfo->param); + taosMemoryFreeClear(pMsgSendInfo); SCH_RET(code); } @@ -1350,13 +1350,13 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); if (NULL == pMsgSendInfo) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - param = calloc(1, sizeof(SSchTaskCallbackParam)); + param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); if (NULL == param) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1374,7 +1374,7 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { pMsgSendInfo->param = param; pMsgSendInfo->fp = fp; - SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .free = schFreeRpcCtxVal}; + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .freeFunc = schFreeRpcCtxVal}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1387,8 +1387,8 @@ int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { _return: taosHashCleanup(pCtx->args); - tfree(param); - tfree(pMsgSendInfo); + taosMemoryFreeClear(param); + taosMemoryFreeClear(pMsgSendInfo); SCH_RET(code); } @@ -1409,13 +1409,13 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); if (NULL == pMsgSendInfo) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - param = calloc(1, sizeof(SSchHbCallbackParam)); + param = taosMemoryCalloc(1, sizeof(SSchHbCallbackParam)); if (NULL == param) { SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1431,7 +1431,7 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { pMsgSendInfo->param = param; pMsgSendInfo->fp = fp; - SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .free = schFreeRpcCtxVal}; + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .freeFunc = schFreeRpcCtxVal}; if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1444,8 +1444,8 @@ int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { _return: taosHashCleanup(pCtx->args); - tfree(param); - tfree(pMsgSendInfo); + taosMemoryFreeClear(param); + taosMemoryFreeClear(pMsgSendInfo); SCH_RET(code); } @@ -1479,7 +1479,7 @@ int32_t schRegisterHbConnection(SSchJob *pJob, SSchTask *pTask, SQueryNodeEpId * int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHeader **pDst) { if (pSrc->isHbParam) { - SSchHbCallbackParam *dst = malloc(sizeof(SSchHbCallbackParam)); + SSchHbCallbackParam *dst = taosMemoryMalloc(sizeof(SSchHbCallbackParam)); if (NULL == dst) { qError("malloc SSchHbCallbackParam failed"); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1491,7 +1491,7 @@ int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHe return TSDB_CODE_SUCCESS; } - SSchTaskCallbackParam *dst = malloc(sizeof(SSchTaskCallbackParam)); + SSchTaskCallbackParam *dst = taosMemoryMalloc(sizeof(SSchTaskCallbackParam)); if (NULL == dst) { qError("malloc SSchTaskCallbackParam failed"); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1506,7 +1506,7 @@ int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHe int32_t schCloneSMsgSendInfo(void *src, void **dst) { SMsgSendInfo *pSrc = src; int32_t code = 0; - SMsgSendInfo *pDst = malloc(sizeof(*pSrc)); + SMsgSendInfo *pDst = taosMemoryMalloc(sizeof(*pSrc)); if (NULL == pDst) { qError("malloc SMsgSendInfo for rpcCtx failed, len:%d", (int32_t)sizeof(*pSrc)); SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1523,7 +1523,7 @@ int32_t schCloneSMsgSendInfo(void *src, void **dst) { _return: - tfree(pDst); + taosMemoryFreeClear(pDst); SCH_RET(code); } @@ -1553,7 +1553,7 @@ int32_t schCloneHbRpcCtx(SRpcCtx *pSrc, SRpcCtx *pDst) { if (taosHashPut(pDst->args, msgType, sizeof(*msgType), &dst, sizeof(dst))) { qError("taosHashPut msg %d to rpcCtx failed", *msgType); - (*dst.free)(dst.val); + (*dst.freeFunc)(dst.val); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } @@ -1601,8 +1601,9 @@ int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet* pMsgSendInfo->msgType = msgType; pMsgSendInfo->fp = fp; - qDebug("start to send %s msg, refId:%" PRIx64 "instance:%p, handle:%p", - TMSG_INFO(msgType), pJob->refId, trans->transInst, trans->transHandle); + qDebug("start to send %s msg to node[%d,%s,%d], refId:%" PRIx64 "instance:%p, handle:%p", + TMSG_INFO(msgType), ntohl(((SMsgHead *)msg)->vgId), epSet->eps[epSet->inUse].fqdn, epSet->eps[epSet->inUse].port, + pJob->refId, trans->transInst, trans->transHandle); int64_t transporterId = 0; code = asyncSendMsgToServerExt(trans->transInst, epSet, &transporterId, pMsgSendInfo, persistHandle, ctx); @@ -1649,7 +1650,7 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - void *msg = calloc(1, msgSize); + void *msg = taosMemoryCalloc(1, msgSize); if (NULL == msg) { qError("calloc hb req %d failed", msgSize); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1660,13 +1661,13 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - SMsgSendInfo *pMsgSendInfo = calloc(1, sizeof(SMsgSendInfo)); + SMsgSendInfo *pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); if (NULL == pMsgSendInfo) { qError("calloc SMsgSendInfo failed"); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); } - SSchTaskCallbackParam *param = calloc(1, sizeof(SSchTaskCallbackParam)); + SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); if (NULL == param) { qError("calloc SSchTaskCallbackParam failed"); SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); @@ -1702,9 +1703,9 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { _return: - tfree(msg); - tfree(param); - tfree(pMsgSendInfo); + taosMemoryFreeClear(msg); + taosMemoryFreeClear(param); + taosMemoryFreeClear(pMsgSendInfo); schFreeRpcCtx(&rpcCtx); SCH_RET(code); } diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp index 445d95132b..cf04b06579 100644 --- a/source/libs/scheduler/test/schedulerTests.cpp +++ b/source/libs/scheduler/test/schedulerTests.cpp @@ -805,7 +805,7 @@ TEST(queryTest, readyFirstCase) { SRetrieveTableRsp *pRsp = (SRetrieveTableRsp *)data; ASSERT_EQ(pRsp->completed, 1); ASSERT_EQ(pRsp->numOfRows, 10); - tfree(data); + taosMemoryFreeClear(data); data = NULL; code = schedulerFetchRows(job, &data); diff --git a/tests/script/tsim/db/basic1.sim b/tests/script/tsim/db/basic1.sim index 7877bfc3a7..c07ebd0400 100644 --- a/tests/script/tsim/db/basic1.sim +++ b/tests/script/tsim/db/basic1.sim @@ -39,11 +39,10 @@ endi print =============== drop database sql drop database d1 -# todo release -#sql show databases -#if $rows != 1 then -# return -1 -#endi +sql show databases +if $rows != 1 then + return -1 +endi print =============== more databases sql create database d2 vgroups 2 diff --git a/tests/script/tsim/db/basic6.sim b/tests/script/tsim/db/basic6.sim index 48b3fccd47..7e57fe8f1b 100644 --- a/tests/script/tsim/db/basic6.sim +++ b/tests/script/tsim/db/basic6.sim @@ -58,11 +58,10 @@ endi print =============== step3 sql drop database $db -# todo release -#sql show databases -#if $rows != 1 then -# return -1 -#endi +sql show databases +if $rows != 1 then + return -1 +endi print =============== step4 sql_error drop database $db @@ -319,4 +318,4 @@ if $rows != 0 then return -1 endi -system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/db/error1.sim b/tests/script/tsim/db/error1.sim index 73d9e6fab6..6f62228ae7 100644 --- a/tests/script/tsim/db/error1.sim +++ b/tests/script/tsim/db/error1.sim @@ -61,6 +61,7 @@ endi print ========== stop dnode2 system sh/exec.sh -n dnode2 -s stop -x SIGKILL +sleep 1000 print =============== create database sql_error drop database d1 diff --git a/tests/script/tsim/dnode/basic1.sim b/tests/script/tsim/dnode/basic1.sim index c5b83aa3a3..6f0d5f88b8 100644 --- a/tests/script/tsim/dnode/basic1.sim +++ b/tests/script/tsim/dnode/basic1.sim @@ -5,9 +5,6 @@ system sh/exec.sh -n dnode1 -s start system sh/exec.sh -n dnode2 -s start sql connect -# todo remove -sql create database useless_db - print =============== show dnodes sql show dnodes; if $rows != 1 then @@ -83,9 +80,6 @@ if $data02 != master then return -1 endi -# todo remove -sql drop database useless_db - print =============== create database sql create database d1 vgroups 4; sql create database d2; @@ -202,4 +196,4 @@ if $data00 != 1 then endi system sh/exec.sh -n dnode1 -s stop -x SIGINT -system sh/exec.sh -n dnode2 -s stop -x SIGINT \ No newline at end of file +system sh/exec.sh -n dnode2 -s stop -x SIGINT diff --git a/tests/script/tsim/tmq/basic.sim b/tests/script/tsim/tmq/basic.sim index 3e42c2cbd7..876cf7e266 100644 --- a/tests/script/tsim/tmq/basic.sim +++ b/tests/script/tsim/tmq/basic.sim @@ -6,9 +6,6 @@ system sh/exec.sh -n dnode1 -s start sleep 500 sql connect -# todo remove -sql create database useless_db - $loop_cnt = 0 check_dnode_ready: $loop_cnt = $loop_cnt + 1 @@ -26,9 +23,6 @@ if $data04 != ready then goto check_dnode_ready endi -# todo remove -sql drop database useless_db - #root@trd02 /data2/dnode $ tmq_demo --help #Used to tmq_demo # -c Configuration directory, default is diff --git a/tests/script/tsim/user/basic1.sim b/tests/script/tsim/user/basic1.sim index e14aa3af2f..7af5ba8d00 100644 --- a/tests/script/tsim/user/basic1.sim +++ b/tests/script/tsim/user/basic1.sim @@ -3,9 +3,6 @@ system sh/deploy.sh -n dnode1 -i 1 system sh/exec.sh -n dnode1 -s start sql connect -# todo remove -sql create database useless_db - print =============== show users sql show users if $rows != 1 then @@ -74,7 +71,4 @@ print $data10 $data11 $data22 print $data20 $data11 $data22 print $data30 $data31 $data32 -# todo remove -sql drop database useless_db - -system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file +system sh/exec.sh -n dnode1 -s stop -x SIGINT