From 249aecacda766ee5b2560ae5416985b201c80d69 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 21 May 2022 21:26:27 +0800 Subject: [PATCH 01/32] enh(sync) sync/mnode integration --- include/common/tmsgdef.h | 2 + include/dnode/mnode/mnode.h | 3 + source/dnode/mgmt/mgmt_mnode/inc/mmInt.h | 2 + source/dnode/mgmt/mgmt_mnode/src/mmInt.c | 5 + source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 42 ++++++++ source/dnode/mnode/impl/inc/mndInt.h | 4 +- source/dnode/mnode/impl/src/mndSync.c | 107 +++++++++++++++++++- source/dnode/mnode/impl/src/mndTrans.c | 5 +- source/dnode/mnode/impl/src/mnode.c | 20 +++- source/libs/sync/src/syncMain.c | 19 ++-- 10 files changed, 193 insertions(+), 16 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 455898585a..bf683fc9ac 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -158,6 +158,8 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_DROP_INDEX, "mnode-drop-index", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GET_DB_CFG, "mnode-get-db-cfg", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GET_INDEX, "mnode-get-index", NULL, NULL) + + TD_DEF_MSG_TYPE(TDMT_MND_APPLY_MSG, "mnode-apply-msg", NULL, NULL) // Requests handled by VNODE TD_NEW_MSG_SEG(TDMT_VND_MSG) diff --git a/include/dnode/mnode/mnode.h b/include/dnode/mnode/mnode.h index 28c470a443..16cb4cdd70 100644 --- a/include/dnode/mnode/mnode.h +++ b/include/dnode/mnode/mnode.h @@ -90,6 +90,9 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad); */ int32_t mndProcessMsg(SRpcMsg *pMsg); + +int32_t mndProcessApplyMsg(SRpcMsg *pMsg); + /** * @brief Generate machine code */ diff --git a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h index 75e83d6547..380ae63b8d 100644 --- a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h +++ b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h @@ -33,6 +33,7 @@ typedef struct SMnodeMgmt { SSingleWorker readWorker; SSingleWorker writeWorker; SSingleWorker syncWorker; + SSingleWorker applyWorker; SSingleWorker monitorWorker; SReplica replicas[TSDB_MAX_REPLICA]; int8_t replica; @@ -59,6 +60,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt); void mmStopWorker(SMnodeMgmt *pMgmt); int32_t mmPutNodeMsgToWriteQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); +int32_t mmPutNodeMsgToApplyQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToMonitorQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmInt.c b/source/dnode/mgmt/mgmt_mnode/src/mmInt.c index 4f7fd4a1c0..68b2889278 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmInt.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmInt.c @@ -122,6 +122,11 @@ static int32_t mmOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { return -1; } + if (syncInit() != 0) { + dError("failed to init sync since %s", terrstr()); + return -1; + } + SMnodeMgmt *pMgmt = taosMemoryCalloc(1, sizeof(SMnodeMgmt)); if (pMgmt == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index c4314a57b1..04fe891ce9 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -56,6 +56,32 @@ static void mmProcessQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { taosFreeQitem(pMsg); } + +static void mmProcessApplyQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { + SMnodeMgmt *pMgmt = pInfo->ahandle; + int32_t code = -1; + tmsg_t msgType = pMsg->msgType; + bool isRequest = msgType & 1U; + dTrace("msg:%p, get from mnode-query queue", pMsg); + + pMsg->info.node = pMgmt->pMnode; + + mndProcessApplyMsg(pMsg); + + /* + if (isRequest) { + if (pMsg->info.handle != NULL && code != 0) { + if (code != 0 && terrno != 0) code = terrno; + mmSendRsp(pMsg, code); + } + } + */ + + dTrace("msg:%p, is freed, code:0x%x", pMsg, code); + rpcFreeCont(pMsg->pCont); + taosFreeQitem(pMsg); +} + static void mmProcessQueryQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SMnodeMgmt *pMgmt = pInfo->ahandle; int32_t code = -1; @@ -92,6 +118,10 @@ int32_t mmPutNodeMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutNodeMsgToWorker(&pMgmt->syncWorker, pMsg); } +int32_t mmPutNodeMsgToApplyQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { + return mmPutNodeMsgToWorker(&pMgmt->applyWorker, pMsg); +} + int32_t mmPutNodeMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutNodeMsgToWorker(&pMgmt->readWorker, pMsg); } @@ -179,6 +209,18 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { return -1; } + SSingleWorkerCfg aCfg = { + .min = 1, + .max = 1, + .name = "mnode-apply", + .fp = (FItem)mmProcessApplyQueue, + .param = pMgmt, + }; + if (tSingleWorkerInit(&pMgmt->applyWorker, &aCfg) != 0) { + dError("failed to start mnode mnode-apply worker since %s", terrstr()); + return -1; + } + SSingleWorkerCfg mCfg = { .min = 1, .max = 1, diff --git a/source/dnode/mnode/impl/inc/mndInt.h b/source/dnode/mnode/impl/inc/mndInt.h index 5258fa9e02..0982096d25 100644 --- a/source/dnode/mnode/impl/inc/mndInt.h +++ b/source/dnode/mnode/impl/inc/mndInt.h @@ -75,7 +75,9 @@ typedef struct { int32_t errCode; sem_t syncSem; SWal *pWal; - SSyncNode *pSyncNode; + //SSyncNode *pSyncNode; + int64_t sync; + ESyncState state; } SSyncMgmt; diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 3dbe3241a7..e7144f5673 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -46,6 +46,12 @@ static void mndCloseWal(SMnode *pMnode) { } static int32_t mndRestoreWal(SMnode *pMnode) { + +// do nothing +return 0; + +#if 0 + SWal *pWal = pMnode->syncMgmt.pWal; SSdb *pSdb = pMnode->pSdb; int64_t lastSdbVer = sdbUpdateVer(pSdb, 0); @@ -114,6 +120,70 @@ static int32_t mndRestoreWal(SMnode *pMnode) { _OVER: walCloseReadHandle(pHandle); return code; + +#endif + +} + +int32_t mndSyncEqMsg(const SMsgCb* msgcb, SRpcMsg *pMsg) { + int32_t ret = 0; + if (msgcb->queueFps[SYNC_QUEUE] != NULL) { + tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + } else { + mError("mndSyncEqMsg queue is NULL, SYNC_QUEUE:%d", SYNC_QUEUE); + } + return ret; +} + +int32_t mndSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { + int32_t ret = 0; + pMsg->info.noResp = 1; + tmsgSendReq(pEpSet, pMsg); + return ret; +} + +void mndSyncCommitCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { + SyncIndex beginIndex = SYNC_INDEX_INVALID; + if (pFsm->FpGetSnapshot != NULL) { + SSnapshot snapshot; + pFsm->FpGetSnapshot(pFsm, &snapshot); + beginIndex = snapshot.lastApplyIndex; + } + + if (cbMeta.index > beginIndex) { + SMnode *pMnode = pFsm->data; + SSyncMgmt *pMgmt = &pMnode->syncMgmt; + + mndProcessApplyMsg((SRpcMsg*)pMsg); + //mmPutNodeMsgToApplyQueue(pMnode->pWrapper->pMgmt, pMsg); + + if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { + tsem_post(&pMgmt->syncSem); + } + } +} + +void mndSyncPreCommitCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { + // strict consistent, do nothing +} + +void mndSyncRollBackCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { + // strict consistent, do nothing +} + +int32_t mndSyncGetSnapshotCb(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { + // snapshot + return 0; +} + +SSyncFSM *syncMnodeMakeFsm(SMnode *pMnode) { + SSyncFSM *pFsm = (SSyncFSM *)taosMemoryMalloc(sizeof(SSyncFSM)); + pFsm->data = pMnode; + pFsm->FpCommitCb = mndSyncCommitCb; + pFsm->FpPreCommitCb = mndSyncPreCommitCb; + pFsm->FpRollBackCb = mndSyncRollBackCb; + pFsm->FpGetSnapshot = mndSyncGetSnapshotCb; + return pFsm; } int32_t mndInitSync(SMnode *pMnode) { @@ -133,7 +203,27 @@ int32_t mndInitSync(SMnode *pMnode) { if (pMnode->selfId == 1) { pMgmt->state = TAOS_SYNC_STATE_LEADER; } - pMgmt->pSyncNode = NULL; + + // pMgmt->pSyncNode = NULL; + SSyncInfo syncInfo; + syncInfo.vgId = 1; + SSyncCfg *pCfg = &(syncInfo.syncCfg); + pCfg->replicaNum = pMnode->replica; + pCfg->myIndex = pMnode->selfIndex; + for (int i = 0; i < pMnode->replica; ++i) { + snprintf((pCfg->nodeInfo)->nodeFqdn, sizeof((pCfg->nodeInfo)->nodeFqdn), "%s", (pMnode->replicas)[i].fqdn); + (pCfg->nodeInfo)->nodePort = (pMnode->replicas)[i].port; + } + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s/sync", pMnode->path); + syncInfo.pWal = pMnode->syncMgmt.pWal; + + syncInfo.pFsm = syncMnodeMakeFsm(pMnode); + syncInfo.FpSendMsg = mndSendMsg; + syncInfo.FpEqMsg = mndSyncEqMsg; + + pMnode->syncMgmt.sync = syncOpen(&syncInfo); + ASSERT(pMnode->syncMgmt.sync > 0); + return 0; } @@ -157,6 +247,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { SWal *pWal = pMnode->syncMgmt.pWal; SSdb *pSdb = pMnode->pSdb; +#if 0 int64_t ver = sdbUpdateVer(pSdb, 1); if (walWrite(pWal, ver, 1, pRaw, sdbGetRawTotalSize(pRaw)) < 0) { sdbUpdateVer(pSdb, -1); @@ -168,24 +259,32 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { walCommit(pWal, ver); walFsync(pWal, true); -#if 1 return 0; + #else + if (pMnode->replica == 1) return 0; SSyncMgmt *pMgmt = &pMnode->syncMgmt; pMgmt->errCode = 0; - SSyncBuffer buf = {.data = pRaw, .len = sdbGetRawTotalSize(pRaw)}; + //SSyncBuffer buf = {.data = pRaw, .len = sdbGetRawTotalSize(pRaw)}; + + SRpcMsg rpcMsg; + rpcMsg.code = TDMT_MND_APPLY_MSG; + rpcMsg.contLen = sdbGetRawTotalSize(pRaw); + rpcMsg.pCont = rpcMallocCont(rpcMsg.contLen); + memcpy(rpcMsg.pCont, pRaw, rpcMsg.contLen); bool isWeak = false; - int32_t code = syncPropose(pMgmt->pSyncNode, &buf, pMnode, isWeak); + int32_t code = syncPropose(pMgmt->sync, &rpcMsg, isWeak); if (code != 0) return code; tsem_wait(&pMgmt->syncSem); return pMgmt->errCode; #endif + } bool mndIsMaster(SMnode *pMnode) { diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 35ecaa748e..f4fa4beaf1 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -683,11 +683,14 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) { mDebug("trans:%d, sync finished", pTrans->id); - code = sdbWrite(pMnode->pSdb, pRaw); +// do it in state machine commit cb +#if 0 + code = sdbWriteWithout(pMnode->pSdb, pRaw); if (code != 0) { mError("trans:%d, failed to write sdb since %s", pTrans->id, terrstr()); return -1; } +#endif return 0; } diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 8c805dd8c7..f8e5a65f0f 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -336,9 +336,25 @@ int32_t mndAlter(SMnode *pMnode, const SMnodeOpt *pOption) { return 0; } -int32_t mndStart(SMnode *pMnode) { return mndInitTimer(pMnode); } +int32_t mndStart(SMnode *pMnode) { + syncStart(pMnode->syncMgmt.sync); + return mndInitTimer(pMnode); +} -void mndStop(SMnode *pMnode) { return mndCleanupTimer(pMnode); } +void mndStop(SMnode *pMnode) { + syncStop(pMnode->syncMgmt.sync); + return mndCleanupTimer(pMnode); +} + +int32_t mndProcessApplyMsg(SRpcMsg *pMsg) { + + SSdbRaw *pRaw = pMsg->pCont; + SMnode *pMnode = pMsg->info.node; + int32_t code = sdbWriteWithoutFree(pMnode->pSdb, pRaw); + rpcFreeCont(pMsg->pCont); + + return code; +} int32_t mndProcessMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index d9ff60bbe2..9c02b91ef0 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -55,14 +55,17 @@ static void syncFreeNode(void* param); // --------------------------------- int32_t syncInit() { - int32_t ret; - tsNodeRefId = taosOpenRef(200, syncFreeNode); - if (tsNodeRefId < 0) { - sError("failed to init node ref"); - syncCleanUp(); - ret = -1; - } else { - ret = syncEnvStart(); + int32_t ret = 0; + + if (!syncEnvIsStart()) { + tsNodeRefId = taosOpenRef(200, syncFreeNode); + if (tsNodeRefId < 0) { + sError("failed to init node ref"); + syncCleanUp(); + ret = -1; + } else { + ret = syncEnvStart(); + } } return ret; From a1cd6839a7d0af955a5142bccccbd7a6e7f30848 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 21 May 2022 22:15:23 +0800 Subject: [PATCH 02/32] enh(sync) sync/mnode integration --- include/dnode/mnode/mnode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dnode/mnode/mnode.h b/include/dnode/mnode/mnode.h index 16cb4cdd70..ec4b4dd06d 100644 --- a/include/dnode/mnode/mnode.h +++ b/include/dnode/mnode/mnode.h @@ -90,6 +90,7 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad); */ int32_t mndProcessMsg(SRpcMsg *pMsg); +int32_t mndProcessSyncMsg(SRpcMsg *pMsg); int32_t mndProcessApplyMsg(SRpcMsg *pMsg); From b20794f365058bbc6d6a7266530f046e6d81003b Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Sat, 21 May 2022 22:35:02 +0800 Subject: [PATCH 03/32] enh(sync) sync/mnode integration --- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 10 +++ source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 9 +- source/dnode/mnode/impl/src/mnode.c | 98 +++++++++++++++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 2ce42d7a5f..a894a4962d 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -237,6 +237,16 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_VNODE_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMPACT_VNODE_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_TIMEOUT, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_PING, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_PING_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_CLIENT_REQUEST, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_CLIENT_REQUEST_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_REQUEST_VOTE, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_REQUEST_VOTE_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_APPEND_ENTRIES, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_SYNC_APPEND_ENTRIES_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index 04fe891ce9..8574e01226 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -56,6 +56,13 @@ static void mmProcessQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { taosFreeQitem(pMsg); } +static void mmProcessSyncQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { + SMnodeMgmt *pMgmt = pInfo->ahandle; + pMsg->info.node = pMgmt->pMnode; + + mndProcessSyncMsg(pMsg); + return; +} static void mmProcessApplyQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SMnodeMgmt *pMgmt = pInfo->ahandle; @@ -201,7 +208,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { .min = 1, .max = 1, .name = "mnode-sync", - .fp = (FItem)mmProcessQueue, + .fp = (FItem)mmProcessSyncQueue, .param = pMgmt, }; if (tSingleWorkerInit(&pMgmt->syncWorker, &sCfg) != 0) { diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index f8e5a65f0f..23baa43e97 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -356,6 +356,104 @@ int32_t mndProcessApplyMsg(SRpcMsg *pMsg) { return code; } +#include "syncTools.h" + +int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { + SMnode *pMnode = pMsg->info.node; + void *ahandle = pMsg->info.ahandle; + + int32_t ret = TAOS_SYNC_PROPOSE_OTHER_ERROR; + + if (syncEnvIsStart()) { + SSyncNode *pSyncNode = syncNodeAcquire(pMnode->syncMgmt.sync); + assert(pSyncNode != NULL); + + ESyncState state = syncGetMyRole(pMnode->syncMgmt.sync); + SyncTerm currentTerm = syncGetMyTerm(pMnode->syncMgmt.sync); + + SMsgHead *pHead = pMsg->pCont; + + char logBuf[512]; + char *syncNodeStr = sync2SimpleStr(pMnode->syncMgmt.sync); + snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); + syncRpcMsgLog2(logBuf, pMsg); + taosMemoryFree(syncNodeStr); + + SRpcMsg *pRpcMsg = pMsg; + + if (pRpcMsg->msgType == TDMT_VND_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); + syncClientRequestDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_VND_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + + ret = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + + } else { + mError("==mndProcessSyncMsg== error msg type:%d", pRpcMsg->msgType); + ret = TAOS_SYNC_PROPOSE_OTHER_ERROR; + } + + syncNodeRelease(pSyncNode); + } else { + mError("==mndProcessSyncMsg== error syncEnv stop"); + ret = TAOS_SYNC_PROPOSE_OTHER_ERROR; + } + + return ret; + + + return 0; +} + int32_t mndProcessMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; void *ahandle = pMsg->info.ahandle; From e06f9fd05ce82b24398b00387dbf9426dd3bd1ee Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sun, 22 May 2022 12:34:22 +0800 Subject: [PATCH 04/32] refactor: sync integrate into mnode --- include/common/tmsgdef.h | 1 - include/dnode/mnode/mnode.h | 2 - source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 19 +------- source/dnode/mnode/impl/inc/mndInt.h | 7 +-- source/dnode/mnode/impl/src/mndSync.c | 53 +++++++-------------- source/dnode/mnode/impl/src/mnode.c | 13 ++--- source/libs/sync/src/syncMain.c | 1 - 7 files changed, 25 insertions(+), 71 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index bf683fc9ac..e8e931daa5 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -158,7 +158,6 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_DROP_INDEX, "mnode-drop-index", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GET_DB_CFG, "mnode-get-db-cfg", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GET_INDEX, "mnode-get-index", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_MND_APPLY_MSG, "mnode-apply-msg", NULL, NULL) // Requests handled by VNODE diff --git a/include/dnode/mnode/mnode.h b/include/dnode/mnode/mnode.h index ec4b4dd06d..5b028d5055 100644 --- a/include/dnode/mnode/mnode.h +++ b/include/dnode/mnode/mnode.h @@ -89,9 +89,7 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad); * @return int32_t 0 for success, -1 for failure. */ int32_t mndProcessMsg(SRpcMsg *pMsg); - int32_t mndProcessSyncMsg(SRpcMsg *pMsg); - int32_t mndProcessApplyMsg(SRpcMsg *pMsg); /** diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index 89d657d2d5..0f1d325237 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -59,32 +59,17 @@ static void mmProcessQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { static void mmProcessSyncQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SMnodeMgmt *pMgmt = pInfo->ahandle; pMsg->info.node = pMgmt->pMnode; - mndProcessSyncMsg(pMsg); - return; } static void mmProcessApplyQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SMnodeMgmt *pMgmt = pInfo->ahandle; - int32_t code = -1; - tmsg_t msgType = pMsg->msgType; - bool isRequest = msgType & 1U; - dTrace("msg:%p, get from mnode-query queue", pMsg); + dTrace("msg:%p, get from mnode-apply queue", pMsg); pMsg->info.node = pMgmt->pMnode; - mndProcessApplyMsg(pMsg); - /* - if (isRequest) { - if (pMsg->info.handle != NULL && code != 0) { - if (code != 0 && terrno != 0) code = terrno; - mmSendRsp(pMsg, code); - } - } - */ - - dTrace("msg:%p, is freed, code:0x%x", pMsg, code); + dTrace("msg:%p, is freed", pMsg); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); } diff --git a/source/dnode/mnode/impl/inc/mndInt.h b/source/dnode/mnode/impl/inc/mndInt.h index 0982096d25..16945b1403 100644 --- a/source/dnode/mnode/impl/inc/mndInt.h +++ b/source/dnode/mnode/impl/inc/mndInt.h @@ -19,6 +19,7 @@ #include "mndDef.h" #include "sdb.h" +#include "syncTools.h" #include "tcache.h" #include "tdatablock.h" #include "tglobal.h" @@ -31,12 +32,14 @@ extern "C" { #endif +// clang-format off #define mFatal(...) { if (mDebugFlag & DEBUG_FATAL) { taosPrintLog("MND FATAL ", DEBUG_FATAL, 255, __VA_ARGS__); }} #define mError(...) { if (mDebugFlag & DEBUG_ERROR) { taosPrintLog("MND ERROR ", DEBUG_ERROR, 255, __VA_ARGS__); }} #define mWarn(...) { if (mDebugFlag & DEBUG_WARN) { taosPrintLog("MND WARN ", DEBUG_WARN, 255, __VA_ARGS__); }} #define mInfo(...) { if (mDebugFlag & DEBUG_INFO) { taosPrintLog("MND ", DEBUG_INFO, 255, __VA_ARGS__); }} #define mDebug(...) { if (mDebugFlag & DEBUG_DEBUG) { taosPrintLog("MND ", DEBUG_DEBUG, mDebugFlag, __VA_ARGS__); }} #define mTrace(...) { if (mDebugFlag & DEBUG_TRACE) { taosPrintLog("MND ", DEBUG_TRACE, mDebugFlag, __VA_ARGS__); }} +// clang-format on #define SYSTABLE_SCH_TABLE_NAME_LEN ((TSDB_TABLE_NAME_LEN - 1) + VARSTR_HEADER_SIZE) #define SYSTABLE_SCH_DB_NAME_LEN ((TSDB_DB_NAME_LEN - 1) + VARSTR_HEADER_SIZE) @@ -75,9 +78,7 @@ typedef struct { int32_t errCode; sem_t syncSem; SWal *pWal; - //SSyncNode *pSyncNode; - int64_t sync; - + int64_t sync; ESyncState state; } SSyncMgmt; diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index e7144f5673..d8eb021462 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -125,24 +125,11 @@ _OVER: } -int32_t mndSyncEqMsg(const SMsgCb* msgcb, SRpcMsg *pMsg) { - int32_t ret = 0; - if (msgcb->queueFps[SYNC_QUEUE] != NULL) { - tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); - } else { - mError("mndSyncEqMsg queue is NULL, SYNC_QUEUE:%d", SYNC_QUEUE); - } - return ret; -} +int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); } -int32_t mndSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { - int32_t ret = 0; - pMsg->info.noResp = 1; - tmsgSendReq(pEpSet, pMsg); - return ret; -} +int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } -void mndSyncCommitCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { +void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { SyncIndex beginIndex = SYNC_INDEX_INVALID; if (pFsm->FpGetSnapshot != NULL) { SSnapshot snapshot; @@ -163,26 +150,26 @@ void mndSyncCommitCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMe } } -void mndSyncPreCommitCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { +void mndSyncPreCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { // strict consistent, do nothing } -void mndSyncRollBackCb(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { +void mndSyncRollBackMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { // strict consistent, do nothing } -int32_t mndSyncGetSnapshotCb(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { +int32_t mndSyncGetSnapshot(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { // snapshot return 0; } -SSyncFSM *syncMnodeMakeFsm(SMnode *pMnode) { - SSyncFSM *pFsm = (SSyncFSM *)taosMemoryMalloc(sizeof(SSyncFSM)); +SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { + SSyncFSM *pFsm = taosMemoryCalloc(1, sizeof(SSyncFSM)); pFsm->data = pMnode; - pFsm->FpCommitCb = mndSyncCommitCb; - pFsm->FpPreCommitCb = mndSyncPreCommitCb; - pFsm->FpRollBackCb = mndSyncRollBackCb; - pFsm->FpGetSnapshot = mndSyncGetSnapshotCb; + pFsm->FpCommitCb = mndSyncCommitMsg; + pFsm->FpPreCommitCb = mndSyncPreCommitMsg; + pFsm->FpRollBackCb = mndSyncRollBackMsg; + pFsm->FpGetSnapshot = mndSyncGetSnapshot; return pFsm; } @@ -195,18 +182,11 @@ int32_t mndInitSync(SMnode *pMnode) { return -1; } - if (mndRestoreWal(pMnode) < 0) { - mError("failed to restore wal since %s", terrstr()); - return -1; - } - if (pMnode->selfId == 1) { pMgmt->state = TAOS_SYNC_STATE_LEADER; } - - // pMgmt->pSyncNode = NULL; - SSyncInfo syncInfo; - syncInfo.vgId = 1; + + SSyncInfo syncInfo = {.vgId = 1}; SSyncCfg *pCfg = &(syncInfo.syncCfg); pCfg->replicaNum = pMnode->replica; pCfg->myIndex = pMnode->selfIndex; @@ -216,9 +196,8 @@ int32_t mndInitSync(SMnode *pMnode) { } snprintf(syncInfo.path, sizeof(syncInfo.path), "%s/sync", pMnode->path); syncInfo.pWal = pMnode->syncMgmt.pWal; - - syncInfo.pFsm = syncMnodeMakeFsm(pMnode); - syncInfo.FpSendMsg = mndSendMsg; + syncInfo.pFsm = mndSyncMakeFsm(pMnode); + syncInfo.FpSendMsg = mndSyncSendMsg; syncInfo.FpEqMsg = mndSyncEqMsg; pMnode->syncMgmt.sync = syncOpen(&syncInfo); diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 02f4e0872c..297174beb7 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -343,25 +343,18 @@ int32_t mndStart(SMnode *pMnode) { void mndStop(SMnode *pMnode) { syncStop(pMnode->syncMgmt.sync); - return mndCleanupTimer(pMnode); + return mndCleanupTimer(pMnode); } int32_t mndProcessApplyMsg(SRpcMsg *pMsg) { - SSdbRaw *pRaw = pMsg->pCont; - SMnode *pMnode = pMsg->info.node; - int32_t code = sdbWriteWithoutFree(pMnode->pSdb, pRaw); - rpcFreeCont(pMsg->pCont); - - return code; + SMnode *pMnode = pMsg->info.node; + return sdbWriteWithoutFree(pMnode->pSdb, pRaw); } -#include "syncTools.h" - int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; void *ahandle = pMsg->info.ahandle; - int32_t ret = TAOS_SYNC_PROPOSE_OTHER_ERROR; if (syncEnvIsStart()) { diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 9c02b91ef0..f4f09d0f7b 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -13,7 +13,6 @@ * along with this program. If not, see . */ -#include #include "sync.h" #include "syncAppendEntries.h" #include "syncAppendEntriesReply.h" From 32d48f3b8fab0b2aae3d3ebb5be7c3b848ff9e92 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sun, 22 May 2022 16:42:44 +0800 Subject: [PATCH 05/32] refactor: sync integrate into mnode --- source/dnode/mnode/impl/src/mndSync.c | 57 ++++++++++++++------------- source/dnode/mnode/impl/src/mnode.c | 5 ++- source/dnode/mnode/sdb/src/sdb.c | 10 +---- 3 files changed, 34 insertions(+), 38 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index d8eb021462..df289aa285 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -46,9 +46,8 @@ static void mndCloseWal(SMnode *pMnode) { } static int32_t mndRestoreWal(SMnode *pMnode) { - -// do nothing -return 0; + // do nothing + return 0; #if 0 @@ -122,7 +121,6 @@ _OVER: return code; #endif - } int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); } @@ -132,7 +130,7 @@ int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { SyncIndex beginIndex = SYNC_INDEX_INVALID; if (pFsm->FpGetSnapshot != NULL) { - SSnapshot snapshot; + SSnapshot snapshot = {0}; pFsm->FpGetSnapshot(pFsm, &snapshot); beginIndex = snapshot.lastApplyIndex; } @@ -141,8 +139,9 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM SMnode *pMnode = pFsm->data; SSyncMgmt *pMgmt = &pMnode->syncMgmt; - mndProcessApplyMsg((SRpcMsg*)pMsg); - //mmPutNodeMsgToApplyQueue(pMnode->pWrapper->pMgmt, pMsg); + SRpcMsg *pApplyMsg = (SRpcMsg *)pMsg; + pApplyMsg->info.node = pFsm->data; + mndProcessApplyMsg(pApplyMsg); if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { tsem_post(&pMgmt->syncSem); @@ -182,19 +181,16 @@ int32_t mndInitSync(SMnode *pMnode) { return -1; } - if (pMnode->selfId == 1) { - pMgmt->state = TAOS_SYNC_STATE_LEADER; - } - SSyncInfo syncInfo = {.vgId = 1}; SSyncCfg *pCfg = &(syncInfo.syncCfg); pCfg->replicaNum = pMnode->replica; pCfg->myIndex = pMnode->selfIndex; - for (int i = 0; i < pMnode->replica; ++i) { - snprintf((pCfg->nodeInfo)->nodeFqdn, sizeof((pCfg->nodeInfo)->nodeFqdn), "%s", (pMnode->replicas)[i].fqdn); - (pCfg->nodeInfo)->nodePort = (pMnode->replicas)[i].port; + for (int32_t i = 0; i < pMnode->replica; ++i) { + SNodeInfo *pNodeInfo = &pCfg->nodeInfo[i]; + tstrncpy(pNodeInfo->nodeFqdn, pMnode->replicas[i].fqdn, sizeof(pNodeInfo->nodeFqdn)); + pNodeInfo->nodePort = pMnode->replicas[i].port; } - snprintf(syncInfo.path, sizeof(syncInfo.path), "%s/sync", pMnode->path); + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", pMnode->path, TD_DIRSEP); syncInfo.pWal = pMnode->syncMgmt.pWal; syncInfo.pFsm = mndSyncMakeFsm(pMnode); syncInfo.FpSendMsg = mndSyncSendMsg; @@ -242,31 +238,38 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { #else - if (pMnode->replica == 1) return 0; - SSyncMgmt *pMgmt = &pMnode->syncMgmt; pMgmt->errCode = 0; - //SSyncBuffer buf = {.data = pRaw, .len = sdbGetRawTotalSize(pRaw)}; - - SRpcMsg rpcMsg; - rpcMsg.code = TDMT_MND_APPLY_MSG; - rpcMsg.contLen = sdbGetRawTotalSize(pRaw); - rpcMsg.pCont = rpcMallocCont(rpcMsg.contLen); - memcpy(rpcMsg.pCont, pRaw, rpcMsg.contLen); + SRpcMsg rsp = {0}; + rsp.code = TDMT_MND_APPLY_MSG; + rsp.contLen = sdbGetRawTotalSize(pRaw); + rsp.pCont = rpcMallocCont(rsp.contLen); + memcpy(rsp.pCont, pRaw, rsp.contLen); bool isWeak = false; - int32_t code = syncPropose(pMgmt->sync, &rpcMsg, isWeak); + int32_t code = syncPropose(pMgmt->sync, &rsp, isWeak); + if (code == 0) { + tsem_wait(&pMgmt->syncSem); + } else if (code == TAOS_SYNC_PROPOSE_NOT_LEADER) { + terrno = TSDB_CODE_APP_NOT_READY; + mError("failed to propose raw:%p since not leader", pRaw); + return -1; + } else if (code == TAOS_SYNC_PROPOSE_OTHER_ERROR) { + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + mError("failed to propose raw:%p since sync internal error", pRaw); + } else { + assert(0); + } if (code != 0) return code; - tsem_wait(&pMgmt->syncSem); return pMgmt->errCode; #endif - } bool mndIsMaster(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; + pMgmt->state = syncGetMyRole(pMgmt->sync); return pMgmt->state == TAOS_SYNC_STATE_LEADER; } diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 297174beb7..9a68e0e1f0 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -336,8 +336,9 @@ int32_t mndAlter(SMnode *pMnode, const SMnodeOpt *pOption) { return 0; } -int32_t mndStart(SMnode *pMnode) { - syncStart(pMnode->syncMgmt.sync); +int32_t mndStart(SMnode *pMnode) { + syncSetMsgCb(pMnode->syncMgmt.sync, &pMnode->msgCb); + syncStart(pMnode->syncMgmt.sync); return mndInitTimer(pMnode); } diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index 1f11a77e6c..6e5dde57a6 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -31,11 +31,9 @@ SSdb *sdbInit(SSdbOpt *pOption) { char path[PATH_MAX + 100] = {0}; snprintf(path, sizeof(path), "%s%sdata", pOption->path, TD_DIRSEP); pSdb->currDir = strdup(path); - snprintf(path, sizeof(path), "%s%ssync", pOption->path, TD_DIRSEP); - pSdb->syncDir = strdup(path); snprintf(path, sizeof(path), "%s%stmp", pOption->path, TD_DIRSEP); pSdb->tmpDir = strdup(path); - if (pSdb->currDir == NULL || pSdb->currDir == NULL || pSdb->currDir == NULL) { + if (pSdb->currDir == NULL || pSdb->tmpDir == NULL) { sdbCleanup(pSdb); terrno = TSDB_CODE_OUT_OF_MEMORY; mError("failed to init sdb since %s", terrstr()); @@ -149,12 +147,6 @@ static int32_t sdbCreateDir(SSdb *pSdb) { return -1; } - if (taosMkDir(pSdb->syncDir) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - mError("failed to create dir:%s since %s", pSdb->syncDir, terrstr()); - return -1; - } - if (taosMkDir(pSdb->tmpDir) != 0) { terrno = TAOS_SYSTEM_ERROR(errno); mError("failed to create dir:%s since %s", pSdb->tmpDir, terrstr()); From 6c1d1927c935bee7bee168c42000d0218d0dee10 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 09:58:21 +0800 Subject: [PATCH 06/32] refactor: sync integrate into mnode --- include/dnode/mnode/sdb/sdb.h | 9 +-- source/dnode/mnode/impl/src/mndSync.c | 71 +++++++++----------- source/dnode/mnode/impl/test/sdb/sdbTest.cpp | 15 ++--- source/dnode/mnode/sdb/src/sdb.c | 4 +- 4 files changed, 44 insertions(+), 55 deletions(-) diff --git a/include/dnode/mnode/sdb/sdb.h b/include/dnode/mnode/sdb/sdb.h index 2abe0e5c73..3a885dd3da 100644 --- a/include/dnode/mnode/sdb/sdb.h +++ b/include/dnode/mnode/sdb/sdb.h @@ -304,13 +304,14 @@ int32_t sdbGetMaxId(SSdb *pSdb, ESdbType type); int64_t sdbGetTableVer(SSdb *pSdb, ESdbType type); /** - * @brief Update the version of sdb + * @brief Update the index of sdb * * @param pSdb The sdb object. - * @param val The update value of the version. - * @return int32_t The current version of sdb + * @param index The update value of the apply index. + * @return int32_t The current index of sdb */ -int64_t sdbUpdateVer(SSdb *pSdb, int32_t val); +void sdbSetApplyIndex(SSdb *pSdb, int64_t index); +int64_t sdbGetApplyIndex(SSdb *pSdb); SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen); void sdbFreeRaw(SSdbRaw *pRaw); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 0b7ed04819..df7949f6a5 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -128,38 +128,35 @@ int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { - SyncIndex beginIndex = SYNC_INDEX_INVALID; - if (pFsm->FpGetSnapshot != NULL) { - SSnapshot snapshot = {0}; - pFsm->FpGetSnapshot(pFsm, &snapshot); - beginIndex = snapshot.lastApplyIndex; - } - - if (cbMeta.index > beginIndex) { - SMnode *pMnode = pFsm->data; - SSyncMgmt *pMgmt = &pMnode->syncMgmt; - - SRpcMsg *pApplyMsg = (SRpcMsg *)pMsg; - pApplyMsg->info.node = pFsm->data; - mndProcessApplyMsg(pApplyMsg); - sdbUpdateVer(pMnode->pSdb, 1); + SMnode *pMnode = pFsm->data; + SSdb *pSdb = pMnode->pSdb; + SSyncMgmt *pMgmt = &pMnode->syncMgmt; + SyncIndex lastApply = sdbGetApplyIndex(pSdb); + SSdbRaw *pRaw = pMsg->pCont; + if (cbMeta.index > lastApply) { + mTrace("ver:%" PRId64 ", apply raw:%p to sdb, role:%s", cbMeta.index, pRaw, syncStr(cbMeta.state)); + sdbWriteWithoutFree(pMnode->pSdb, pRaw); + sdbSetApplyIndex(pMnode->pSdb, cbMeta.index); if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { tsem_post(&pMgmt->syncSem); } + } else { + mTrace("ver:%" PRId64 ", already apply raw:%p to sdb, last:%" PRId64, cbMeta.index, pRaw, lastApply); } } -void mndSyncPreCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { +static void mndSyncPreCommitMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { // strict consistent, do nothing } -void mndSyncRollBackMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { +static void mndSyncRollBackMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { // strict consistent, do nothing } -int32_t mndSyncGetSnapshot(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { - // snapshot +static int32_t mndSyncGetSnapshot(SSyncFSM *pFsm, SSnapshot *pSnapshot) { + SMnode *pMnode = pFsm->data; + pSnapshot->lastApplyIndex = sdbGetApplyIndex(pMnode->pSdb); return 0; } @@ -182,23 +179,25 @@ int32_t mndInitSync(SMnode *pMnode) { return -1; } - SSyncInfo syncInfo = {.vgId = 1}; - SSyncCfg *pCfg = &(syncInfo.syncCfg); + SSyncInfo syncInfo = {.vgId = 1, .FpSendMsg = mndSyncSendMsg, .FpEqMsg = mndSyncEqMsg}; + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", pMnode->path, TD_DIRSEP); + syncInfo.pWal = pMgmt->pWal; + syncInfo.pFsm = mndSyncMakeFsm(pMnode); + + SSyncCfg *pCfg = &syncInfo.syncCfg; pCfg->replicaNum = pMnode->replica; pCfg->myIndex = pMnode->selfIndex; for (int32_t i = 0; i < pMnode->replica; ++i) { - SNodeInfo *pNodeInfo = &pCfg->nodeInfo[i]; - tstrncpy(pNodeInfo->nodeFqdn, pMnode->replicas[i].fqdn, sizeof(pNodeInfo->nodeFqdn)); - pNodeInfo->nodePort = pMnode->replicas[i].port; + SNodeInfo *pNode = &pCfg->nodeInfo[i]; + tstrncpy(pNode->nodeFqdn, pMnode->replicas[i].fqdn, sizeof(pNode->nodeFqdn)); + pNode->nodePort = pMnode->replicas[i].port; } - snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", pMnode->path, TD_DIRSEP); - syncInfo.pWal = pMnode->syncMgmt.pWal; - syncInfo.pFsm = mndSyncMakeFsm(pMnode); - syncInfo.FpSendMsg = mndSyncSendMsg; - syncInfo.FpEqMsg = mndSyncEqMsg; - pMnode->syncMgmt.sync = syncOpen(&syncInfo); - ASSERT(pMnode->syncMgmt.sync > 0); + pMgmt->sync = syncOpen(&syncInfo); + if (pMgmt->sync <= 0) { + mError("failed to open sync since %s", terrstr()); + return -1; + } return 0; } @@ -209,16 +208,6 @@ void mndCleanupSync(SMnode *pMnode) { mndCloseWal(pMnode); } -static int32_t mndSyncApplyCb(struct SSyncFSM *fsm, SyncIndex index, const SSyncBuffer *buf, void *pData) { - SMnode *pMnode = pData; - SSyncMgmt *pMgmt = &pMnode->syncMgmt; - - pMgmt->errCode = 0; - tsem_post(&pMgmt->syncSem); - - return 0; -} - int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { SWal *pWal = pMnode->syncMgmt.pWal; SSdb *pSdb = pMnode->pSdb; diff --git a/source/dnode/mnode/impl/test/sdb/sdbTest.cpp b/source/dnode/mnode/impl/test/sdb/sdbTest.cpp index b93adf9930..6808c412d8 100644 --- a/source/dnode/mnode/impl/test/sdb/sdbTest.cpp +++ b/source/dnode/mnode/impl/test/sdb/sdbTest.cpp @@ -493,9 +493,8 @@ TEST_F(MndTestSdb, 01_Write_Str) { ASSERT_EQ(sdbGetSize(pSdb, SDB_USER), 2); ASSERT_EQ(sdbGetMaxId(pSdb, SDB_USER), -1); ASSERT_EQ(sdbGetTableVer(pSdb, SDB_USER), 2 ); - ASSERT_EQ(sdbUpdateVer(pSdb, 0), -1); - ASSERT_EQ(sdbUpdateVer(pSdb, 1), 0); - ASSERT_EQ(sdbUpdateVer(pSdb, -1), -1); + sdbSetApplyIndex(pSdb, -1); + ASSERT_EQ(sdbGetApplyIndex(pSdb), -1); ASSERT_EQ(mnode.insertTimes, 2); ASSERT_EQ(mnode.deleteTimes, 0); @@ -537,9 +536,6 @@ TEST_F(MndTestSdb, 01_Write_Str) { ASSERT_EQ(sdbGetSize(pSdb, SDB_USER), 3); ASSERT_EQ(sdbGetTableVer(pSdb, SDB_USER), 4); - ASSERT_EQ(sdbUpdateVer(pSdb, 0), -1); - ASSERT_EQ(sdbUpdateVer(pSdb, 1), 0); - ASSERT_EQ(sdbUpdateVer(pSdb, -1), -1); ASSERT_EQ(mnode.insertTimes, 3); ASSERT_EQ(mnode.deleteTimes, 0); @@ -704,8 +700,9 @@ TEST_F(MndTestSdb, 01_Write_Str) { } // write version - ASSERT_EQ(sdbUpdateVer(pSdb, 1), 0); - ASSERT_EQ(sdbUpdateVer(pSdb, 1), 1); + sdbSetApplyIndex(pSdb, 0); + sdbSetApplyIndex(pSdb, 1); + ASSERT_EQ(sdbGetApplyIndex(pSdb), 1); ASSERT_EQ(sdbWriteFile(pSdb), 0); ASSERT_EQ(sdbWriteFile(pSdb), 0); @@ -775,7 +772,7 @@ TEST_F(MndTestSdb, 01_Read_Str) { ASSERT_EQ(sdbGetSize(pSdb, SDB_USER), 2); ASSERT_EQ(sdbGetMaxId(pSdb, SDB_USER), -1); ASSERT_EQ(sdbGetTableVer(pSdb, SDB_USER), 5); - ASSERT_EQ(sdbUpdateVer(pSdb, 0), 1); + ASSERT_EQ(sdbGetApplyIndex(pSdb), 1); ASSERT_EQ(mnode.insertTimes, 4); ASSERT_EQ(mnode.deleteTimes, 0); diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index 6e5dde57a6..10bf4126c5 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -156,4 +156,6 @@ static int32_t sdbCreateDir(SSdb *pSdb) { return 0; } -int64_t sdbUpdateVer(SSdb *pSdb, int32_t val) { return atomic_add_fetch_64(&pSdb->curVer, val); } \ No newline at end of file +void sdbSetApplyIndex(SSdb *pSdb, int64_t index) { pSdb->curVer = index; } + +int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } From c38a8055bac5384e1d4f97f93c355297fd4a843c Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 11:08:31 +0800 Subject: [PATCH 07/32] fix: add error code if malloc failed --- include/libs/transport/trpc.h | 20 +-- source/dnode/mnode/impl/src/mndSync.c | 173 +++++--------------------- source/libs/transport/src/trans.c | 39 +++--- 3 files changed, 61 insertions(+), 171 deletions(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index fcb00ddf01..754a203471 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -28,7 +28,7 @@ extern "C" { #define TAOS_CONN_CLIENT 1 #define IsReq(pMsg) (pMsg->msgType & 1U) -extern int tsRpcHeadSize; +extern int32_t tsRpcHeadSize; typedef struct { uint32_t clientIp; @@ -69,10 +69,10 @@ typedef struct SRpcInit { char localFqdn[TSDB_FQDN_LEN]; uint16_t localPort; // local port char * label; // for debug purpose - int numOfThreads; // number of threads to handle connections - int sessions; // number of sessions allowed + int32_t numOfThreads; // number of threads to handle connections + int32_t sessions; // number of sessions allowed int8_t connType; // TAOS_CONN_UDP, TAOS_CONN_TCPC, TAOS_CONN_TCPS - int idleTime; // milliseconds, 0 means idle timer is disabled + int32_t idleTime; // milliseconds, 0 means idle timer is disabled // the following is for client app ecurity only char *user; // user name @@ -108,9 +108,9 @@ int32_t rpcInit(); void rpcCleanup(); void * rpcOpen(const SRpcInit *pRpc); void rpcClose(void *); -void * rpcMallocCont(int contLen); +void * rpcMallocCont(int32_t contLen); void rpcFreeCont(void *pCont); -void * rpcReallocCont(void *ptr, int contLen); +void * rpcReallocCont(void *ptr, int32_t contLen); // Because taosd supports multi-process mode // These functions should not be used on the server side @@ -121,10 +121,10 @@ void rpcRegisterBrokenLinkArg(SRpcMsg *msg); void rpcReleaseHandle(void *handle, int8_t type); // just release client conn to rpc instance, no close sock // These functions will not be called in the child process -void rpcSendRedirectRsp(void *pConn, const SEpSet *pEpSet); -void rpcSendRequestWithCtx(void *thandle, const SEpSet *pEpSet, SRpcMsg *pMsg, int64_t *rid, SRpcCtx *ctx); -int rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo); -void rpcSendRecv(void *shandle, SEpSet *pEpSet, SRpcMsg *pReq, SRpcMsg *pRsp); +void rpcSendRedirectRsp(void *pConn, const SEpSet *pEpSet); +void rpcSendRequestWithCtx(void *thandle, const SEpSet *pEpSet, SRpcMsg *pMsg, int64_t *rid, SRpcCtx *ctx); +int32_t rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo); +void rpcSendRecv(void *shandle, SEpSet *pEpSet, SRpcMsg *pReq, SRpcMsg *pRsp); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index df7949f6a5..825b52dd9b 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -17,112 +17,6 @@ #include "mndSync.h" #include "mndTrans.h" -static int32_t mndInitWal(SMnode *pMnode) { - SSyncMgmt *pMgmt = &pMnode->syncMgmt; - - char path[PATH_MAX] = {0}; - snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); - SWalCfg cfg = { - .vgId = 1, - .fsyncPeriod = 0, - .rollPeriod = -1, - .segSize = -1, - .retentionPeriod = -1, - .retentionSize = -1, - .level = TAOS_WAL_FSYNC, - }; - pMgmt->pWal = walOpen(path, &cfg); - if (pMgmt->pWal == NULL) return -1; - - return 0; -} - -static void mndCloseWal(SMnode *pMnode) { - SSyncMgmt *pMgmt = &pMnode->syncMgmt; - if (pMgmt->pWal != NULL) { - walClose(pMgmt->pWal); - pMgmt->pWal = NULL; - } -} - -static int32_t mndRestoreWal(SMnode *pMnode) { - // do nothing - return 0; - -#if 0 - - SWal *pWal = pMnode->syncMgmt.pWal; - SSdb *pSdb = pMnode->pSdb; - int64_t lastSdbVer = sdbUpdateVer(pSdb, 0); - int32_t code = -1; - - SWalReadHandle *pHandle = walOpenReadHandle(pWal); - if (pHandle == NULL) return -1; - - int64_t first = walGetFirstVer(pWal); - int64_t last = walGetLastVer(pWal); - mDebug("start to restore wal, sdbver:%" PRId64 ", first:%" PRId64 " last:%" PRId64, lastSdbVer, first, last); - - first = TMAX(lastSdbVer + 1, first); - for (int64_t ver = first; ver >= 0 && ver <= last; ++ver) { - if (walReadWithHandle(pHandle, ver) < 0) { - mError("ver:%" PRId64 ", failed to read from wal since %s", ver, terrstr()); - goto _OVER; - } - - SWalHead *pHead = pHandle->pHead; - int64_t sdbVer = sdbUpdateVer(pSdb, 0); - if (sdbVer + 1 != ver) { - terrno = TSDB_CODE_SDB_INVALID_WAl_VER; - mError("ver:%" PRId64 ", failed to write to sdb, since inconsistent with sdbver:%" PRId64, ver, sdbVer); - goto _OVER; - } - - mTrace("ver:%" PRId64 ", will be restored, content:%p", ver, pHead->head.body); - if (sdbWriteWithoutFree(pSdb, (void *)pHead->head.body) < 0) { - mError("ver:%" PRId64 ", failed to write to sdb since %s", ver, terrstr()); - goto _OVER; - } - - sdbUpdateVer(pSdb, 1); - mDebug("ver:%" PRId64 ", is restored", ver); - } - - int64_t sdbVer = sdbUpdateVer(pSdb, 0); - mDebug("restore wal finished, sdbver:%" PRId64, sdbVer); - - mndTransPullup(pMnode); - sdbVer = sdbUpdateVer(pSdb, 0); - mDebug("pullup trans finished, sdbver:%" PRId64, sdbVer); - - if (sdbVer != lastSdbVer) { - mInfo("sdb restored from %" PRId64 " to %" PRId64 ", write file", lastSdbVer, sdbVer); - if (sdbWriteFile(pSdb) != 0) { - goto _OVER; - } - - if (walCommit(pWal, sdbVer) != 0) { - goto _OVER; - } - - if (walBeginSnapshot(pWal, sdbVer) < 0) { - goto _OVER; - } - - if (walEndSnapshot(pWal) < 0) { - goto _OVER; - } - } - - code = 0; - -_OVER: - walCloseReadHandle(pHandle); - return code; - -#endif -} - int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); } int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } @@ -131,18 +25,20 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM SMnode *pMnode = pFsm->data; SSdb *pSdb = pMnode->pSdb; SSyncMgmt *pMgmt = &pMnode->syncMgmt; - SyncIndex lastApply = sdbGetApplyIndex(pSdb); SSdbRaw *pRaw = pMsg->pCont; - if (cbMeta.index > lastApply) { + SSnapshot snapshot = {0}; + (*pFsm->FpGetSnapshot)(pFsm, &snapshot); + + if (cbMeta.index > snapshot.lastApplyIndex) { mTrace("ver:%" PRId64 ", apply raw:%p to sdb, role:%s", cbMeta.index, pRaw, syncStr(cbMeta.state)); - sdbWriteWithoutFree(pMnode->pSdb, pRaw); - sdbSetApplyIndex(pMnode->pSdb, cbMeta.index); + sdbWriteWithoutFree(pSdb, pRaw); + sdbSetApplyIndex(pSdb, cbMeta.index); if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { tsem_post(&pMgmt->syncSem); } } else { - mTrace("ver:%" PRId64 ", already apply raw:%p to sdb, last:%" PRId64, cbMeta.index, pRaw, lastApply); + mTrace("ver:%" PRId64 ", already apply raw:%p to sdb, last:%" PRId64, cbMeta.index, pRaw, snapshot.lastApplyIndex); } } @@ -174,7 +70,20 @@ int32_t mndInitSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; tsem_init(&pMgmt->syncSem, 0, 0); - if (mndInitWal(pMnode) < 0) { + char path[PATH_MAX + 20] = {0}; + snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); + SWalCfg cfg = { + .vgId = 1, + .fsyncPeriod = 0, + .rollPeriod = -1, + .segSize = -1, + .retentionPeriod = -1, + .retentionSize = -1, + .level = TAOS_WAL_FSYNC, + }; + + pMgmt->pWal = walOpen(path, &cfg); + if (pMgmt->pWal == NULL) { mError("failed to open wal since %s", terrstr()); return -1; } @@ -205,56 +114,36 @@ int32_t mndInitSync(SMnode *pMnode) { void mndCleanupSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; tsem_destroy(&pMgmt->syncSem); - mndCloseWal(pMnode); + if (pMgmt->pWal != NULL) { + walClose(pMgmt->pWal); + } + + memset(pMgmt, 0, sizeof(SSyncMgmt)); } int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { - SWal *pWal = pMnode->syncMgmt.pWal; - SSdb *pSdb = pMnode->pSdb; - -#if 0 - int64_t ver = sdbUpdateVer(pSdb, 1); - if (walWrite(pWal, ver, 1, pRaw, sdbGetRawTotalSize(pRaw)) < 0) { - sdbUpdateVer(pSdb, -1); - mError("ver:%" PRId64 ", failed to write raw:%p to wal since %s", ver, pRaw, terrstr()); - return -1; - } - - mTrace("ver:%" PRId64 ", write to wal, raw:%p", ver, pRaw); - walCommit(pWal, ver); - walFsync(pWal, true); - - return 0; - -#else - SSyncMgmt *pMgmt = &pMnode->syncMgmt; pMgmt->errCode = 0; - SRpcMsg rsp = {0}; - rsp.code = TDMT_MND_APPLY_MSG; - rsp.contLen = sdbGetRawTotalSize(pRaw); + SRpcMsg rsp = {.code = TDMT_MND_APPLY_MSG, .contLen = sdbGetRawTotalSize(pRaw)}; rsp.pCont = rpcMallocCont(rsp.contLen); + if (rsp.pCont == NULL) return -1; memcpy(rsp.pCont, pRaw, rsp.contLen); - bool isWeak = false; - int32_t code = syncPropose(pMgmt->sync, &rsp, isWeak); + const bool isWeak = false; + int32_t code = syncPropose(pMgmt->sync, &rsp, isWeak); if (code == 0) { tsem_wait(&pMgmt->syncSem); } else if (code == TAOS_SYNC_PROPOSE_NOT_LEADER) { terrno = TSDB_CODE_APP_NOT_READY; - mError("failed to propose raw:%p since not leader", pRaw); - return -1; } else if (code == TAOS_SYNC_PROPOSE_OTHER_ERROR) { terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - mError("failed to propose raw:%p since sync internal error", pRaw); } else { - assert(0); + terrno = TSDB_CODE_APP_ERROR; } if (code != 0) return code; return pMgmt->errCode; -#endif } bool mndIsMaster(SMnode *pMnode) { diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index 5627dbfbf5..9e71c87fa5 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -17,7 +17,7 @@ #include "transComm.h" -void* (*taosInitHandle[])(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) = { +void* (*taosInitHandle[])(uint32_t ip, uint32_t port, char* label, int32_t numOfThreads, void* fp, void* shandle) = { transInitServer, transInitClient}; void (*taosCloseHandle[])(void* arg) = {transCloseServer, transCloseClient}; @@ -77,37 +77,38 @@ void rpcClose(void* arg) { taosMemoryFree(pRpc); return; } -void* rpcMallocCont(int contLen) { - int size = contLen + TRANS_MSG_OVERHEAD; - char* start = (char*)taosMemoryCalloc(1, (size_t)size); +void* rpcMallocCont(int32_t contLen) { + int32_t size = contLen + TRANS_MSG_OVERHEAD; + char* start = taosMemoryCalloc(1, size); if (start == NULL) { tError("failed to malloc msg, size:%d", size); + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } else { tTrace("malloc mem:%p size:%d", start, size); } + return start + sizeof(STransMsgHead); } -void rpcFreeCont(void* cont) { - // impl - if (cont == NULL) { - return; - } +void rpcFreeCont(void* cont) { + if (cont == NULL) return; taosMemoryFree((char*)cont - TRANS_MSG_OVERHEAD); tTrace("free mem: %p", (char*)cont - TRANS_MSG_OVERHEAD); } -void* rpcReallocCont(void* ptr, int contLen) { - if (ptr == NULL) { - return rpcMallocCont(contLen); - } - char* st = (char*)ptr - TRANS_MSG_OVERHEAD; - int sz = contLen + TRANS_MSG_OVERHEAD; + +void* rpcReallocCont(void* ptr, int32_t contLen) { + if (ptr == NULL) return rpcMallocCont(contLen); + + char* st = (char*)ptr - TRANS_MSG_OVERHEAD; + int32_t sz = contLen + TRANS_MSG_OVERHEAD; st = taosMemoryRealloc(st, sz); if (st == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + return st + TRANS_MSG_OVERHEAD; } @@ -116,8 +117,8 @@ void rpcSendRedirectRsp(void* thandle, const SEpSet* pEpSet) { assert(0); } -int rpcReportProgress(void* pConn, char* pCont, int contLen) { return -1; } -void rpcCancelRequest(int64_t rid) { return; } +int32_t rpcReportProgress(void* pConn, char* pCont, int32_t contLen) { return -1; } +void rpcCancelRequest(int64_t rid) { return; } void rpcSendRequest(void* shandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* pRid) { transSendRequest(shandle, pEpSet, pMsg, NULL); @@ -129,8 +130,8 @@ void rpcSendRecv(void* shandle, SEpSet* pEpSet, SRpcMsg* pMsg, SRpcMsg* pRsp) { transSendRecv(shandle, pEpSet, pMsg, pRsp); } -void rpcSendResponse(const SRpcMsg* pMsg) { transSendResponse(pMsg); } -int rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return transGetConnInfo((void*)thandle, pInfo); } +void rpcSendResponse(const SRpcMsg* pMsg) { transSendResponse(pMsg); } +int32_t rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return transGetConnInfo((void*)thandle, pInfo); } void rpcRefHandle(void* handle, int8_t type) { assert(type == TAOS_CONN_SERVER || type == TAOS_CONN_CLIENT); From 97b1e95ad8ea575d047d87a61888c3f9f1880332 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 12:14:53 +0800 Subject: [PATCH 08/32] enh(sync) sync/mnode integration, add term, currentTerm in cbMeta --- include/libs/sync/sync.h | 2 ++ source/dnode/mnode/impl/src/mndSync.c | 32 ++++++++++++++++++------ source/libs/sync/src/syncAppendEntries.c | 2 ++ source/libs/sync/src/syncCommit.c | 4 ++- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 9b6593e4b5..872785abe5 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -78,6 +78,8 @@ typedef struct SFsmCbMeta { int32_t code; ESyncState state; uint64_t seqNum; + SyncTerm term; + SyncTerm currentTerm; } SFsmCbMeta; typedef struct SSyncFSM { diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 0b7ed04819..5eec46056b 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -139,14 +139,32 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM SMnode *pMnode = pFsm->data; SSyncMgmt *pMgmt = &pMnode->syncMgmt; - SRpcMsg *pApplyMsg = (SRpcMsg *)pMsg; - pApplyMsg->info.node = pFsm->data; - mndProcessApplyMsg(pApplyMsg); - sdbUpdateVer(pMnode->pSdb, 1); + if (cbMeta.term < cbMeta.currentTerm) { + // restoring + + SRpcMsg *pApplyMsg = (SRpcMsg *)pMsg; + pApplyMsg->info.node = pFsm->data; + mndProcessApplyMsg(pApplyMsg); + //sdbUpdateVer(pMnode->pSdb, 1); ==> sdbSetVer(cbMeta.index, cbMeta.term); + + // mndTransPullup(pMnode); + + } else if (cbMeta.term == cbMeta.currentTerm) { + // restore finish + + SRpcMsg *pApplyMsg = (SRpcMsg *)pMsg; + pApplyMsg->info.node = pFsm->data; + mndProcessApplyMsg(pApplyMsg); + + //sdbUpdateVer(pMnode->pSdb, 1); ==> sdbSetVer(cbMeta.index, cbMeta.term); + + if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { + tsem_post(&pMgmt->syncSem); + } + } else { + ASSERT(0); + } - if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { - tsem_post(&pMgmt->syncSem); - } } } diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1a5d418e75..bf1eeb4186 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -332,6 +332,8 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { cbMeta.code = 0; cbMeta.state = ths->state; cbMeta.seqNum = pEntry->seqNum; + cbMeta.term = pEntry->term; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, cbMeta); } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 0f17cf267e..01e408e543 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -110,6 +110,8 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { cbMeta.code = 0; cbMeta.state = pSyncNode->state; cbMeta.seqNum = pEntry->seqNum; + cbMeta.term = pEntry->term; + cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &rpcMsg, cbMeta); } @@ -162,4 +164,4 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { } } return false; -} \ No newline at end of file +} From 89c1e823374ec12dd421ed972d427e754e36ae3f Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 13:05:35 +0800 Subject: [PATCH 09/32] refactor: sync integrate into mnode --- source/dnode/mnode/impl/inc/mndInt.h | 5 +++-- source/dnode/mnode/impl/inc/mndSync.h | 3 +++ source/dnode/mnode/impl/src/mndSync.c | 24 +++++++++++++++++++++++- source/dnode/mnode/impl/src/mndTrans.c | 10 ---------- source/dnode/mnode/impl/src/mnode.c | 16 +++++++--------- 5 files changed, 36 insertions(+), 22 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndInt.h b/source/dnode/mnode/impl/inc/mndInt.h index 16945b1403..5a1653b937 100644 --- a/source/dnode/mnode/impl/inc/mndInt.h +++ b/source/dnode/mnode/impl/inc/mndInt.h @@ -75,9 +75,10 @@ typedef struct { } STelemMgmt; typedef struct { - int32_t errCode; - sem_t syncSem; SWal *pWal; + int32_t errCode; + bool restored; + sem_t syncSem; int64_t sync; ESyncState state; } SSyncMgmt; diff --git a/source/dnode/mnode/impl/inc/mndSync.h b/source/dnode/mnode/impl/inc/mndSync.h index fe557cdeac..7f35ff5927 100644 --- a/source/dnode/mnode/impl/inc/mndSync.h +++ b/source/dnode/mnode/impl/inc/mndSync.h @@ -25,7 +25,10 @@ extern "C" { int32_t mndInitSync(SMnode *pMnode); void mndCleanupSync(SMnode *pMnode); bool mndIsMaster(SMnode *pMnode); +bool mndIsRestored(SMnode *pMnode); int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw); +void mndSyncStart(SMnode *pMnode); +void mndSyncStop(SMnode *pMnode); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 825b52dd9b..b8ee63d05e 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -68,7 +68,6 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { int32_t mndInitSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; - tsem_init(&pMgmt->syncSem, 0, 0); char path[PATH_MAX + 20] = {0}; snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); @@ -102,6 +101,7 @@ int32_t mndInitSync(SMnode *pMnode) { pNode->nodePort = pMnode->replicas[i].port; } + tsem_init(&pMgmt->syncSem, 0, 0); pMgmt->sync = syncOpen(&syncInfo); if (pMgmt->sync <= 0) { mError("failed to open sync since %s", terrstr()); @@ -146,8 +146,30 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { return pMgmt->errCode; } +void mndSyncStart(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + int64_t lastApplyIndex = sdbGetApplyIndex(pSdb); + + syncSetMsgCb(pMnode->syncMgmt.sync, &pMnode->msgCb); + syncStart(pMnode->syncMgmt.sync); + + int64_t applyIndex = sdbGetApplyIndex(pSdb); + mndTransPullup(pMnode); + mDebug("pullup trans finished, applyIndex:%" PRId64, applyIndex); + if (applyIndex != lastApplyIndex) { + mInfo("sdb restored from %" PRId64 " to %" PRId64 ", write file", lastApplyIndex, applyIndex); + sdbWriteFile(pSdb); + } + + pMnode->syncMgmt.restored = true; +} + +void mndSyncStop(SMnode *pMnode) { syncStop(pMnode->syncMgmt.sync); } + bool mndIsMaster(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; pMgmt->state = syncGetMyRole(pMgmt->sync); return pMgmt->state == TAOS_SYNC_STATE_LEADER; } + +bool mndIsRestored(SMnode *pMnode) { return pMnode->syncMgmt.restored; } \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 8e19b023cc..e4a29365e7 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -682,16 +682,6 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) { } mDebug("trans:%d, sync finished", pTrans->id); - -// do it in state machine commit cb -#if 0 - code = sdbWriteWithout(pMnode->pSdb, pRaw); - if (code != 0) { - mError("trans:%d, failed to write sdb since %s", pTrans->id, terrstr()); - return -1; - } -#endif - return 0; } diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 9a68e0e1f0..554556f5b0 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -86,7 +86,7 @@ static void *mndThreadFp(void *param) { lastTime++; taosMsleep(100); if (pMnode->stopped) break; - if (!mndIsMaster(pMnode)) continue; + if (!mndIsMaster(pMnode) || !mndIsRestored(pMnode)) continue; if (lastTime % (tsTransPullupInterval * 10) == 0) { mndPullupTrans(pMnode); @@ -337,13 +337,12 @@ int32_t mndAlter(SMnode *pMnode, const SMnodeOpt *pOption) { } int32_t mndStart(SMnode *pMnode) { - syncSetMsgCb(pMnode->syncMgmt.sync, &pMnode->msgCb); - syncStart(pMnode->syncMgmt.sync); + mndSyncStart(pMnode); return mndInitTimer(pMnode); } -void mndStop(SMnode *pMnode) { - syncStop(pMnode->syncMgmt.sync); +void mndStop(SMnode *pMnode) { + mndSyncStop(pMnode); return mndCleanupTimer(pMnode); } @@ -357,7 +356,7 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; void *ahandle = pMsg->info.ahandle; int32_t ret = TAOS_SYNC_PROPOSE_OTHER_ERROR; - + if (syncEnvIsStart()) { SSyncNode *pSyncNode = syncNodeAcquire(pMnode->syncMgmt.sync); assert(pSyncNode != NULL); @@ -444,7 +443,6 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { return ret; - return 0; } @@ -454,7 +452,7 @@ int32_t mndProcessMsg(SRpcMsg *pMsg) { mTrace("msg:%p, will be processed, type:%s app:%p", pMsg, TMSG_INFO(pMsg->msgType), ahandle); if (IsReq(pMsg)) { - if (!mndIsMaster(pMnode)) { + if (!mndIsMaster(pMnode) || !mndIsRestored(pMnode)) { terrno = TSDB_CODE_APP_NOT_READY; mDebug("msg:%p, failed to process since %s, app:%p", pMsg, terrstr(), ahandle); return -1; @@ -514,7 +512,7 @@ int64_t mndGenerateUid(char *name, int32_t len) { int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgroupInfo *pVgroupInfo, SMonGrantInfo *pGrantInfo) { - if (!mndIsMaster(pMnode)) return -1; + if (!mndIsMaster(pMnode) || !mndIsRestored(pMnode)) return -1; SSdb *pSdb = pMnode->pSdb; int64_t ms = taosGetTimestampMs(); From 68b7f6946d50644ccfb614ccd2f335b4f3d6f84e Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 13:14:54 +0800 Subject: [PATCH 10/32] refactor: sync integrate into mnode --- include/dnode/mnode/sdb/sdb.h | 3 +++ source/dnode/mnode/impl/src/mndSync.c | 1 + source/dnode/mnode/sdb/src/sdb.c | 5 +++++ source/dnode/mnode/sdb/src/sdbFile.c | 22 +++++++++++++++++++--- tests/test/c/sdbDump.c | 3 ++- 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/include/dnode/mnode/sdb/sdb.h b/include/dnode/mnode/sdb/sdb.h index 3a885dd3da..94d41a7416 100644 --- a/include/dnode/mnode/sdb/sdb.h +++ b/include/dnode/mnode/sdb/sdb.h @@ -312,6 +312,8 @@ int64_t sdbGetTableVer(SSdb *pSdb, ESdbType type); */ void sdbSetApplyIndex(SSdb *pSdb, int64_t index); int64_t sdbGetApplyIndex(SSdb *pSdb); +void sdbSetApplyTerm(SSdb *pSdb, int64_t term); +int64_t sdbGetApplyTerm(SSdb *pSdb); SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen); void sdbFreeRaw(SSdbRaw *pRaw); @@ -340,6 +342,7 @@ typedef struct SSdb { char *tmpDir; int64_t lastCommitVer; int64_t curVer; + int64_t curTerm; int64_t tableVer[SDB_MAX]; int64_t maxId[SDB_MAX]; EKeyType keyTypes[SDB_MAX]; diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index b8ee63d05e..28b8382e7c 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -53,6 +53,7 @@ static void mndSyncRollBackMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta c static int32_t mndSyncGetSnapshot(SSyncFSM *pFsm, SSnapshot *pSnapshot) { SMnode *pMnode = pFsm->data; pSnapshot->lastApplyIndex = sdbGetApplyIndex(pMnode->pSdb); + pSnapshot->lastApplyTerm = sdbGetApplyTerm(pMnode->pSdb); return 0; } diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index 10bf4126c5..7b90d8acb5 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -53,6 +53,7 @@ SSdb *sdbInit(SSdbOpt *pOption) { } pSdb->curVer = -1; + pSdb->curTerm = -1; pSdb->lastCommitVer = -1; pSdb->pMnode = pOption->pMnode; mDebug("sdb init successfully"); @@ -159,3 +160,7 @@ static int32_t sdbCreateDir(SSdb *pSdb) { void sdbSetApplyIndex(SSdb *pSdb, int64_t index) { pSdb->curVer = index; } int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } + +void sdbSetApplyTerm(SSdb *pSdb, int64_t term) { pSdb->curTerm = term; } + +int64_t sdbGetApplyTerm(SSdb *pSdb) { return pSdb->curTerm; } diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index a391ea8d03..b000c208c8 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -65,6 +65,16 @@ static int32_t sdbReadFileHead(SSdb *pSdb, TdFilePtr pFile) { return -1; } + ret = taosReadFile(pFile, &pSdb->curTerm, sizeof(int64_t)); + if (ret < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + if (ret != sizeof(int64_t)) { + terrno = TSDB_CODE_FILE_CORRUPTED; + return -1; + } + for (int32_t i = 0; i < SDB_TABLE_SIZE; ++i) { int64_t maxId = 0; ret = taosReadFile(pFile, &maxId, sizeof(int64_t)); @@ -123,6 +133,11 @@ static int32_t sdbWriteFileHead(SSdb *pSdb, TdFilePtr pFile) { return -1; } + if (taosWriteFile(pFile, &pSdb->curTerm, sizeof(int64_t)) != sizeof(int64_t)) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + for (int32_t i = 0; i < SDB_TABLE_SIZE; ++i) { int64_t maxId = 0; if (i < SDB_MAX) { @@ -182,6 +197,7 @@ int32_t sdbReadFile(SSdb *pSdb) { if (sdbReadFileHead(pSdb, pFile) != 0) { mError("failed to read file:%s head since %s", file, terrstr()); pSdb->curVer = -1; + pSdb->curTerm = -1; taosMemoryFree(pRaw); taosCloseFile(&pFile); return -1; @@ -256,8 +272,8 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { char curfile[PATH_MAX] = {0}; snprintf(curfile, sizeof(curfile), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP); - mDebug("start to write file:%s, current ver:%" PRId64 ", commit ver:%" PRId64, curfile, pSdb->curVer, - pSdb->lastCommitVer); + mDebug("start to write file:%s, current ver:%" PRId64 " term:%" PRId64 ", commit ver:%" PRId64, curfile, pSdb->curVer, + pSdb->curTerm, pSdb->lastCommitVer); TdFilePtr pFile = taosOpenFile(tmpfile, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pFile == NULL) { @@ -350,7 +366,7 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { mError("failed to write file:%s since %s", curfile, tstrerror(code)); } else { pSdb->lastCommitVer = pSdb->curVer; - mDebug("write file:%s successfully, ver:%" PRId64, curfile, pSdb->lastCommitVer); + mDebug("write file:%s successfully, ver:%" PRId64 " term:%" PRId64, curfile, pSdb->lastCommitVer, pSdb->curTerm); } terrno = code; diff --git a/tests/test/c/sdbDump.c b/tests/test/c/sdbDump.c index 2bc60f777c..1d3eba7cde 100644 --- a/tests/test/c/sdbDump.c +++ b/tests/test/c/sdbDump.c @@ -262,7 +262,7 @@ void dumpCluster(SSdb *pSdb, SJson *json) { } void dumpTrans(SSdb *pSdb, SJson *json) { - void *pIter = NULL; + void *pIter = NULL; SJson *items = tjsonCreateObject(); tjsonAddItemToObject(json, "transactions", items); @@ -294,6 +294,7 @@ void dumpTrans(SSdb *pSdb, SJson *json) { void dumpHeader(SSdb *pSdb, SJson *json) { tjsonAddIntegerToObject(json, "sver", 1); tjsonAddStringToObject(json, "curVer", i642str(pSdb->curVer)); + tjsonAddStringToObject(json, "curTerm", i642str(pSdb->curTerm)); SJson *maxIdsJson = tjsonCreateObject(); tjsonAddItemToObject(json, "maxIds", maxIdsJson); From cb2527f71f7a7c6894e8eecf193428a727a00f77 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 13:44:49 +0800 Subject: [PATCH 11/32] refactor: sync integrate into mnode --- include/dnode/mnode/mnode.h | 1 - source/dnode/mgmt/mgmt_mnode/inc/mmInt.h | 2 -- source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 28 --------------------- source/dnode/mnode/impl/src/mnode.c | 6 ----- 4 files changed, 37 deletions(-) diff --git a/include/dnode/mnode/mnode.h b/include/dnode/mnode/mnode.h index 5b028d5055..f2c8c916c8 100644 --- a/include/dnode/mnode/mnode.h +++ b/include/dnode/mnode/mnode.h @@ -90,7 +90,6 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad); */ int32_t mndProcessMsg(SRpcMsg *pMsg); int32_t mndProcessSyncMsg(SRpcMsg *pMsg); -int32_t mndProcessApplyMsg(SRpcMsg *pMsg); /** * @brief Generate machine code diff --git a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h index 380ae63b8d..75e83d6547 100644 --- a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h +++ b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h @@ -33,7 +33,6 @@ typedef struct SMnodeMgmt { SSingleWorker readWorker; SSingleWorker writeWorker; SSingleWorker syncWorker; - SSingleWorker applyWorker; SSingleWorker monitorWorker; SReplica replicas[TSDB_MAX_REPLICA]; int8_t replica; @@ -60,7 +59,6 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt); void mmStopWorker(SMnodeMgmt *pMgmt); int32_t mmPutNodeMsgToWriteQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); -int32_t mmPutNodeMsgToApplyQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutNodeMsgToMonitorQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index 0f1d325237..f408992aa6 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -62,18 +62,6 @@ static void mmProcessSyncQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { mndProcessSyncMsg(pMsg); } -static void mmProcessApplyQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { - SMnodeMgmt *pMgmt = pInfo->ahandle; - dTrace("msg:%p, get from mnode-apply queue", pMsg); - - pMsg->info.node = pMgmt->pMnode; - mndProcessApplyMsg(pMsg); - - dTrace("msg:%p, is freed", pMsg); - rpcFreeCont(pMsg->pCont); - taosFreeQitem(pMsg); -} - static int32_t mmPutNodeMsgToWorker(SSingleWorker *pWorker, SRpcMsg *pMsg) { dTrace("msg:%p, put into worker %s, type:%s", pMsg, pWorker->name, TMSG_INFO(pMsg->msgType)); taosWriteQitem(pWorker->queue, pMsg); @@ -88,10 +76,6 @@ int32_t mmPutNodeMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutNodeMsgToWorker(&pMgmt->syncWorker, pMsg); } -int32_t mmPutNodeMsgToApplyQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { - return mmPutNodeMsgToWorker(&pMgmt->applyWorker, pMsg); -} - int32_t mmPutNodeMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutNodeMsgToWorker(&pMgmt->readWorker, pMsg); } @@ -179,18 +163,6 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { return -1; } - SSingleWorkerCfg aCfg = { - .min = 1, - .max = 1, - .name = "mnode-apply", - .fp = (FItem)mmProcessApplyQueue, - .param = pMgmt, - }; - if (tSingleWorkerInit(&pMgmt->applyWorker, &aCfg) != 0) { - dError("failed to start mnode mnode-apply worker since %s", terrstr()); - return -1; - } - SSingleWorkerCfg mCfg = { .min = 1, .max = 1, diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 554556f5b0..4e3c8b8773 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -346,12 +346,6 @@ void mndStop(SMnode *pMnode) { return mndCleanupTimer(pMnode); } -int32_t mndProcessApplyMsg(SRpcMsg *pMsg) { - SSdbRaw *pRaw = pMsg->pCont; - SMnode *pMnode = pMsg->info.node; - return sdbWriteWithoutFree(pMnode->pSdb, pRaw); -} - int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; void *ahandle = pMsg->info.ahandle; From 8bbae8dd125c38d1f8dc042b08c1ab02f1bbaabe Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 14:32:44 +0800 Subject: [PATCH 12/32] refactor: pullup after sorting by transId --- source/dnode/mnode/impl/src/mndTrans.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index e4a29365e7..821b04a017 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -1343,19 +1343,35 @@ _OVER: return code; } -void mndTransPullup(SMnode *pMnode) { - STrans *pTrans = NULL; - void *pIter = NULL; +static int32_t mndCompareTransId(int32_t *pTransId1, int32_t *pTransId2) { return *pTransId1 >= *pTransId2 ? 1 : 0; } +void mndTransPullup(SMnode *pMnode) { + SSdb *pSdb = pMnode->pSdb; + SArray *pArray = taosArrayInit(sdbGetSize(pSdb, SDB_TRANS), sizeof(int32_t)); + if (pArray == NULL) return; + + void *pIter = NULL; while (1) { + STrans *pTrans = NULL; pIter = sdbFetch(pMnode->pSdb, SDB_TRANS, pIter, (void **)&pTrans); if (pIter == NULL) break; + taosArrayPush(pArray, &pTrans->id); + sdbRelease(pSdb, pTrans); + } - mndTransExecute(pMnode, pTrans); - sdbRelease(pMnode->pSdb, pTrans); + taosArraySort(pArray, (__compar_fn_t)mndCompareTransId); + + for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) { + int32_t *pTransId = taosArrayGet(pArray, i); + STrans *pTrans = mndAcquireTrans(pMnode, *pTransId); + if (pTrans != NULL) { + mndTransExecute(pMnode, pTrans); + } + mndReleaseTrans(pMnode, pTrans); } sdbWriteFile(pMnode->pSdb); + taosArrayDestroy(pArray); } static int32_t mndRetrieveTrans(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) { From 4535722957103e94d3bbd595047af4bc92079238 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 15:41:04 +0800 Subject: [PATCH 13/32] enh(sync) sync/mnode integration, syncStart async -> sync --- include/libs/sync/sync.h | 2 +- source/libs/sync/inc/syncIO.h | 4 ++-- source/libs/sync/inc/syncInt.h | 5 +++++ source/libs/sync/src/syncAppendEntries.c | 20 ++++++++++++++++- source/libs/sync/src/syncCommit.c | 25 ++++++++++++++++++---- source/libs/sync/src/syncMain.c | 21 +++++++++++++++++- source/libs/sync/test/syncSnapshotTest.cpp | 2 ++ 7 files changed, 70 insertions(+), 9 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 872785abe5..1eed353f33 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -87,6 +87,7 @@ typedef struct SSyncFSM { void (*FpCommitCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta); void (*FpPreCommitCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta); void (*FpRollBackCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta); + void (*FpRestoreFinish)(struct SSyncFSM* pFsm); int32_t (*FpGetSnapshot)(struct SSyncFSM* pFsm, SSnapshot* pSnapshot); int32_t (*FpRestoreSnapshot)(struct SSyncFSM* pFsm, const SSnapshot* snapshot); } SSyncFSM; @@ -119,7 +120,6 @@ typedef struct SSyncLogStore { } SSyncLogStore; - typedef struct SSyncInfo { SyncGroupId vgId; SSyncCfg syncCfg; diff --git a/source/libs/sync/inc/syncIO.h b/source/libs/sync/inc/syncIO.h index f65a317694..b69c087b5f 100644 --- a/source/libs/sync/inc/syncIO.h +++ b/source/libs/sync/inc/syncIO.h @@ -36,8 +36,8 @@ typedef struct SSyncIO { STaosQueue *pMsgQ; STaosQset * pQset; TdThread consumerTid; - void *serverRpc; - void *clientRpc; + void * serverRpc; + void * clientRpc; SEpSet myAddr; SMsgCb msgcb; diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 768e1c1cf1..d3345620e9 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -147,6 +147,11 @@ typedef struct SSyncNode { // tools SSyncRespMgr* pSyncRespMgr; + // restore state + bool restoreFinish; + sem_t restoreSem; + SSnapshot* pSnapshot; + } SSyncNode; // open/close -------------- diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index bf1eeb4186..18735f5b71 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -324,7 +324,6 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { SRpcMsg rpcMsg; syncEntry2OriginalRpc(pEntry, &rpcMsg); - // if (ths->pFsm->FpCommitCb != NULL && pEntry->originalRpcType != TDMT_VND_SYNC_NOOP) { if (ths->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { SFsmCbMeta cbMeta; cbMeta.index = pEntry->index; @@ -335,6 +334,15 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { cbMeta.term = pEntry->term; cbMeta.currentTerm = ths->pRaftStore->currentTerm; ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, cbMeta); + + bool needExecute = true; + if (ths->pSnapshot != NULL && cbMeta.index <= ths->pSnapshot->lastApplyIndex) { + needExecute = false; + } + + if (needExecute) { + ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } } // config change @@ -351,6 +359,16 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { } } + // restore finish + if (pEntry->index == ths->pLogStore->getLastIndex(ths->pLogStore)) { + if (ths->restoreFinish == false) { + ths->pFsm->FpRestoreFinish(ths->pFsm); + ths->restoreFinish = true; + + tsem_post(&ths->restoreSem); + } + } + rpcFreeCont(rpcMsg.pCont); syncEntryDestory(pEntry); } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 01e408e543..557f69a2ce 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -102,7 +102,6 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { SRpcMsg rpcMsg; syncEntry2OriginalRpc(pEntry, &rpcMsg); - // if (pSyncNode->pFsm->FpCommitCb != NULL && pEntry->originalRpcType != TDMT_VND_SYNC_NOOP) { if (pSyncNode->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { SFsmCbMeta cbMeta; cbMeta.index = pEntry->index; @@ -110,9 +109,17 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { cbMeta.code = 0; cbMeta.state = pSyncNode->state; cbMeta.seqNum = pEntry->seqNum; - cbMeta.term = pEntry->term; - cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; - pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &rpcMsg, cbMeta); + cbMeta.term = pEntry->term; + cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; + + bool needExecute = true; + if (pSyncNode->pSnapshot != NULL && cbMeta.index <= pSyncNode->pSnapshot->lastApplyIndex) { + needExecute = false; + } + + if (needExecute) { + pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &rpcMsg, cbMeta); + } } // config change @@ -129,6 +136,16 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { } } + // restore finish + if (pEntry->index == pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore)) { + if (pSyncNode->restoreFinish == false) { + pSyncNode->pFsm->FpRestoreFinish(pSyncNode->pFsm); + pSyncNode->restoreFinish = true; + + tsem_post(&pSyncNode->restoreSem); + } + } + rpcFreeCont(rpcMsg.pCont); syncEntryDestory(pEntry); } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index f4f09d0f7b..bffebc1693 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -242,7 +242,7 @@ int32_t syncGetAndDelRespRpc(int64_t rid, uint64_t index, SRpcMsg* msg) { return ret; } -void syncSetMsgCb(int64_t rid, const SMsgCb *msgcb) { +void syncSetMsgCb(int64_t rid, const SMsgCb* msgcb) { SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { sTrace("syncSetQ get pSyncNode is NULL, rid:%ld", rid); @@ -492,6 +492,15 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pSyncInfo) { pSyncNode->pSyncRespMgr = syncRespMgrCreate(NULL, 0); assert(pSyncNode->pSyncRespMgr != NULL); + // restore state + pSyncNode->restoreFinish = false; + pSyncNode->pSnapshot = NULL; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pSnapshot = taosMemoryMalloc(sizeof(SSnapshot)); + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, pSyncNode->pSnapshot); + } + tsem_init(&(pSyncNode->restoreSem), 0, 0); + // start in syncNodeStart // start raft // syncNodeBecomeFollower(pSyncNode); @@ -511,6 +520,8 @@ void syncNodeStart(SSyncNode* pSyncNode) { // use this now syncNodeAppendNoop(pSyncNode); syncMaybeAdvanceCommitIndex(pSyncNode); // maybe only one replica + + tsem_wait(&pSyncNode->restoreSem); return; } @@ -520,6 +531,8 @@ void syncNodeStart(SSyncNode* pSyncNode) { int32_t ret = 0; // ret = syncNodeStartPingTimer(pSyncNode); assert(ret == 0); + + tsem_wait(&pSyncNode->restoreSem); } void syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -556,6 +569,12 @@ void syncNodeClose(SSyncNode* pSyncNode) { taosMemoryFree(pSyncNode->pFsm); } + if (pSyncNode->pSnapshot != NULL) { + taosMemoryFree(pSyncNode->pSnapshot); + } + + tsem_destroy(&pSyncNode->restoreSem); + // free memory in syncFreeNode // taosMemoryFree(pSyncNode); } diff --git a/source/libs/sync/test/syncSnapshotTest.cpp b/source/libs/sync/test/syncSnapshotTest.cpp index 62bda5b22e..8ccd698907 100644 --- a/source/libs/sync/test/syncSnapshotTest.cpp +++ b/source/libs/sync/test/syncSnapshotTest.cpp @@ -160,6 +160,8 @@ SyncClientRequest *step1(const SRpcMsg *pMsg) { } int main(int argc, char **argv) { + sprintf(tsTempDir, "%s", "."); + // taosInitLog((char *)"syncTest.log", 100000, 10); tsAsyncLog = 0; sDebugFlag = 143 + 64; From dc82c1ba226f74805edbd519b959b4254b805572 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 16:00:03 +0800 Subject: [PATCH 14/32] enh(sync) sync/mnode integration, syncStart async -> sync --- source/dnode/mnode/impl/src/mndSync.c | 39 ++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 5eec46056b..16c6b7f7fa 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -128,6 +128,18 @@ int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { + SRpcMsg *pApplyMsg = (SRpcMsg *)pMsg; + pApplyMsg->info.node = pFsm->data; + mndProcessApplyMsg(pApplyMsg); + //sdbUpdateVer(pMnode->pSdb, 1); ==> sdbSetVer(cbMeta.index, cbMeta.term); + + if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { + SMnode *pMnode = pFsm->data; + SSyncMgmt *pMgmt = &pMnode->syncMgmt; + tsem_post(&pMgmt->syncSem); + } + +#if 0 SyncIndex beginIndex = SYNC_INDEX_INVALID; if (pFsm->FpGetSnapshot != NULL) { SSnapshot snapshot = {0}; @@ -166,8 +178,23 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM } } +#endif + } +int32_t mndSyncGetSnapshot(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { + SMnode *pMnode = pFsm->data; + pSnapshot->lastApplyIndex = pMnode->pSdb->lastCommitVer; + //pSnapshot->lastApplyTerm = ...; + return 0; +} + +void mndRestoreFinish(struct SSyncFSM* pFsm) { + SMnode *pMnode = pFsm->data; + mndTransPullup(pMnode); +} + +#if 0 void mndSyncPreCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { // strict consistent, do nothing } @@ -175,19 +202,17 @@ void mndSyncPreCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta void mndSyncRollBackMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { // strict consistent, do nothing } - -int32_t mndSyncGetSnapshot(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { - // snapshot - return 0; -} +#endif SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { SSyncFSM *pFsm = taosMemoryCalloc(1, sizeof(SSyncFSM)); pFsm->data = pMnode; pFsm->FpCommitCb = mndSyncCommitMsg; - pFsm->FpPreCommitCb = mndSyncPreCommitMsg; - pFsm->FpRollBackCb = mndSyncRollBackMsg; + pFsm->FpPreCommitCb = NULL; + pFsm->FpRollBackCb = NULL; pFsm->FpGetSnapshot = mndSyncGetSnapshot; + pFsm->FpRestoreFinish = mndRestoreFinish; + pFsm->FpRestoreSnapshot = NULL; return pFsm; } From 2be77666d49ab15b6dea9ff3b3101c286c0f0360 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 16:39:36 +0800 Subject: [PATCH 15/32] test: delete expired utest of mnode --- source/dnode/mgmt/test/mnode/CMakeLists.txt | 8 ++++---- source/dnode/mnode/impl/test/trans/CMakeLists.txt | 8 ++++---- source/dnode/mnode/impl/test/trans/trans2.cpp | 9 +++++++++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/source/dnode/mgmt/test/mnode/CMakeLists.txt b/source/dnode/mgmt/test/mnode/CMakeLists.txt index e83f5dbbec..788cf53976 100644 --- a/source/dnode/mgmt/test/mnode/CMakeLists.txt +++ b/source/dnode/mgmt/test/mnode/CMakeLists.txt @@ -4,7 +4,7 @@ target_link_libraries( dmnodeTest sut ) -add_test( - NAME dmnodeTest - COMMAND dmnodeTest -) +#add_test( +# NAME dmnodeTest +# COMMAND dmnodeTest +#) diff --git a/source/dnode/mnode/impl/test/trans/CMakeLists.txt b/source/dnode/mnode/impl/test/trans/CMakeLists.txt index 55fc3abbc2..22ff85563f 100644 --- a/source/dnode/mnode/impl/test/trans/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/trans/CMakeLists.txt @@ -31,7 +31,7 @@ target_include_directories( PUBLIC "${TD_SOURCE_DIR}/include/dnode/mnode" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../inc" ) -add_test( - NAME transTest2 - COMMAND transTest2 -) +#add_test( +# NAME transTest2 +# COMMAND transTest2 +#) diff --git a/source/dnode/mnode/impl/test/trans/trans2.cpp b/source/dnode/mnode/impl/test/trans/trans2.cpp index c4ed48fe60..08ab70a989 100644 --- a/source/dnode/mnode/impl/test/trans/trans2.cpp +++ b/source/dnode/mnode/impl/test/trans/trans2.cpp @@ -23,6 +23,11 @@ int32_t sendReq(const SEpSet *pEpSet, SRpcMsg *pMsg) { return -1; } +int32_t putToQueue(void *pMgmt, SRpcMsg *pMsg) { + terrno = TSDB_CODE_INVALID_PTR; + return -1; +} + class MndTestTrans2 : public ::testing::Test { protected: static void InitLog() { @@ -55,6 +60,9 @@ class MndTestTrans2 : public ::testing::Test { msgCb.reportStartupFp = reportStartup; msgCb.sendReqFp = sendReq; msgCb.sendRspFp = sendRsp; + msgCb.queueFps[SYNC_QUEUE] = putToQueue; + msgCb.queueFps[WRITE_QUEUE] = putToQueue; + msgCb.queueFps[READ_QUEUE] = putToQueue; msgCb.mgmt = (SMgmtWrapper *)(&msgCb); // hack tmsgSetDefault(&msgCb); @@ -77,6 +85,7 @@ class MndTestTrans2 : public ::testing::Test { static void SetUpTestSuite() { InitLog(); walInit(); + syncInit(); InitMnode(); } From ee4b694a3540c9f185c5e349625e6d8a4ca73cd6 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 18:10:04 +0800 Subject: [PATCH 16/32] enh(sync) sync/mnode integration, syncStart async -> sync --- source/libs/sync/src/syncAppendEntries.c | 4 +++- source/libs/sync/src/syncCommit.c | 4 +++- source/libs/sync/test/syncConfigChangeTest.cpp | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 18735f5b71..ef66d580fb 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -362,7 +362,9 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { // restore finish if (pEntry->index == ths->pLogStore->getLastIndex(ths->pLogStore)) { if (ths->restoreFinish == false) { - ths->pFsm->FpRestoreFinish(ths->pFsm); + if (ths->pFsm->FpRestoreFinish != NULL) { + ths->pFsm->FpRestoreFinish(ths->pFsm); + } ths->restoreFinish = true; tsem_post(&ths->restoreSem); diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 557f69a2ce..aa4bcb4fae 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -139,7 +139,9 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { // restore finish if (pEntry->index == pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore)) { if (pSyncNode->restoreFinish == false) { - pSyncNode->pFsm->FpRestoreFinish(pSyncNode->pFsm); + if (pSyncNode->pFsm->FpRestoreFinish != NULL) { + pSyncNode->pFsm->FpRestoreFinish(pSyncNode->pFsm); + } pSyncNode->restoreFinish = true; tsem_post(&pSyncNode->restoreSem); diff --git a/source/libs/sync/test/syncConfigChangeTest.cpp b/source/libs/sync/test/syncConfigChangeTest.cpp index cff692239a..0850ef6343 100644 --- a/source/libs/sync/test/syncConfigChangeTest.cpp +++ b/source/libs/sync/test/syncConfigChangeTest.cpp @@ -73,12 +73,17 @@ int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { return 0; } +void FpRestoreFinishCb(struct SSyncFSM* pFsm) { + sTrace("==callback== ==FpRestoreFinishCb=="); +} + SSyncFSM* createFsm() { SSyncFSM* pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); pFsm->FpCommitCb = CommitCb; pFsm->FpPreCommitCb = PreCommitCb; pFsm->FpRollBackCb = RollBackCb; pFsm->FpGetSnapshot = GetSnapshotCb; + pFsm->FpRestoreFinish = FpRestoreFinishCb; return pFsm; } From 5ec450d01cac6888af809566c356ce7ab6a547ef Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 18:15:31 +0800 Subject: [PATCH 17/32] fix: stop sync after queue is empty --- source/dnode/mnode/impl/src/mndSync.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 28b8382e7c..68f1d0cd8e 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -109,11 +109,15 @@ int32_t mndInitSync(SMnode *pMnode) { return -1; } + mDebug("mnode sync is opened, id:%" PRId64, pMgmt->sync); return 0; } void mndCleanupSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; + syncStop(pMgmt->sync); + mDebug("sync:%" PRId64 " is stopped", pMgmt->sync); + tsem_destroy(&pMgmt->syncSem); if (pMgmt->pWal != NULL) { walClose(pMgmt->pWal); @@ -163,9 +167,10 @@ void mndSyncStart(SMnode *pMnode) { } pMnode->syncMgmt.restored = true; + mDebug("sync:%" PRId64 " is started", pMnode->syncMgmt.sync); } -void mndSyncStop(SMnode *pMnode) { syncStop(pMnode->syncMgmt.sync); } +void mndSyncStop(SMnode *pMnode) {} bool mndIsMaster(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; From aeb86ee8ac6d27554e4b010bc690b4aaa95e8e80 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 18:22:04 +0800 Subject: [PATCH 18/32] enh(sync) vnode FpRestoreFinish --- source/dnode/vnode/src/vnd/vnodeSync.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 8659c41807..538d3f7f87 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -141,5 +141,6 @@ SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { pFsm->FpPreCommitCb = vnodeSyncPreCommitMsg; pFsm->FpRollBackCb = vnodeSyncRollBackMsg; pFsm->FpGetSnapshot = vnodeSyncGetSnapshot; + pFsm->FpRestoreFinish = NULL; return pFsm; } \ No newline at end of file From 102a08f44be0445e2ea34c3485dd532250765aae Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 20:26:12 +0800 Subject: [PATCH 19/32] enh(sync) sync/mnode integration, add log --- source/libs/sync/src/syncAppendEntries.c | 1 + source/libs/sync/src/syncCommit.c | 1 + 2 files changed, 2 insertions(+) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index ef66d580fb..5f90325f65 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -368,6 +368,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { ths->restoreFinish = true; tsem_post(&ths->restoreSem); + sInfo("==syncNodeOnAppendEntriesCb== RestoreFinish tsem_post"); } } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index aa4bcb4fae..f5486cd1f0 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -145,6 +145,7 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { pSyncNode->restoreFinish = true; tsem_post(&pSyncNode->restoreSem); + sInfo("==syncMaybeAdvanceCommitIndex== RestoreFinish tsem_post"); } } From 3629958b43a6bddd72fedf633c158ec09948053f Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Mon, 23 May 2022 20:31:35 +0800 Subject: [PATCH 20/32] feat(tmq): serializer and deserializer for tq exec --- include/util/tencode.h | 2 +- source/dnode/vnode/src/inc/tq.h | 7 ++-- source/dnode/vnode/src/tq/tq.c | 57 +++++++++++++++++++++++++++++---- source/libs/wal/src/walMgmt.c | 16 ++++----- 4 files changed, 65 insertions(+), 17 deletions(-) diff --git a/include/util/tencode.h b/include/util/tencode.h index af38d694e2..cbacd59fa7 100644 --- a/include/util/tencode.h +++ b/include/util/tencode.h @@ -82,7 +82,7 @@ typedef struct { do { \ SEncoder coder = {0}; \ tEncoderInit(&coder, NULL, 0); \ - if ((E)(&coder, S) == 0) { \ + if ((E)(&coder, S) >= 0) { \ SIZE = coder.pos; \ RET = 0; \ } else { \ diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index f89df4a96f..40b490da47 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -20,9 +20,9 @@ #include "executor.h" #include "os.h" -#include "tcache.h" #include "thash.h" #include "tmsg.h" +#include "tqueue.h" #include "trpc.h" #include "ttimer.h" #include "wal.h" @@ -86,6 +86,9 @@ typedef struct { qTaskInfo_t task[5]; } STqExec; +int32_t tEncodeSTqExec(SEncoder* pEncoder, const STqExec* pExec); +int32_t tDecodeSTqExec(SDecoder* pDecoder, STqExec* pExec); + struct STQ { char* path; SHashObj* pushMgr; // consumerId -> STqExec* @@ -93,7 +96,7 @@ struct STQ { SHashObj* pStreamTasks; SVnode* pVnode; SWal* pWal; - // TDB* pTdb; + TDB* pTdb; }; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 6ca523b580..f7b4ba93a6 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -14,14 +14,25 @@ */ #include "tq.h" -#include "tqueue.h" int32_t tqInit() { - // + int8_t old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 1); + if (old == 0) { + tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ"); + if (tqMgmt.timer == NULL) { + atomic_store_8(&tqMgmt.inited, 0); + return -1; + } + } return 0; } -void tqCleanUp() {} +void tqCleanUp() { + int8_t old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2); + if (old != 1) return; + taosTmrCleanUp(tqMgmt.timer); + atomic_store_8(&tqMgmt.inited, 0); +} STQ* tqOpen(const char* path, SVnode* pVnode, SWal* pWal) { STQ* pTq = taosMemoryMalloc(sizeof(STQ)); @@ -32,9 +43,9 @@ STQ* tqOpen(const char* path, SVnode* pVnode, SWal* pWal) { pTq->path = strdup(path); pTq->pVnode = pVnode; pTq->pWal = pWal; - /*if (tdbOpen(path, 4096, 1, &pTq->pTdb) < 0) {*/ - /*ASSERT(0);*/ - /*}*/ + if (tdbOpen(path, 4096, 1, &pTq->pTdb) < 0) { + ASSERT(0); + } pTq->execs = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); @@ -51,11 +62,45 @@ void tqClose(STQ* pTq) { taosHashCleanup(pTq->execs); taosHashCleanup(pTq->pStreamTasks); taosHashCleanup(pTq->pushMgr); + tdbClose(pTq->pTdb); taosMemoryFree(pTq); } // TODO } +int32_t tEncodeSTqExec(SEncoder* pEncoder, const STqExec* pExec) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeCStr(pEncoder, pExec->subKey) < 0) return -1; + if (tEncodeI64(pEncoder, pExec->consumerId) < 0) return -1; + if (tEncodeI32(pEncoder, pExec->epoch) < 0) return -1; + if (tEncodeI8(pEncoder, pExec->subType) < 0) return -1; + if (tEncodeI8(pEncoder, pExec->withTbName) < 0) return -1; + if (tEncodeI8(pEncoder, pExec->withSchema) < 0) return -1; + if (tEncodeI8(pEncoder, pExec->withTag) < 0) return -1; + if (pExec->subType == TOPIC_SUB_TYPE__TABLE) { + if (tEncodeCStr(pEncoder, pExec->qmsg) < 0) return -1; + // TODO encode modified exec + } + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSTqExec(SDecoder* pDecoder, STqExec* pExec) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeCStrTo(pDecoder, pExec->subKey) < 0) return -1; + if (tDecodeI64(pDecoder, &pExec->consumerId) < 0) return -1; + if (tDecodeI32(pDecoder, &pExec->epoch) < 0) return -1; + if (tDecodeI8(pDecoder, &pExec->subType) < 0) return -1; + if (tDecodeI8(pDecoder, &pExec->withTbName) < 0) return -1; + if (tDecodeI8(pDecoder, &pExec->withSchema) < 0) return -1; + if (tDecodeI8(pDecoder, &pExec->withTag) < 0) return -1; + if (pExec->subType == TOPIC_SUB_TYPE__TABLE) { + if (tDecodeCStrAlloc(pDecoder, &pExec->qmsg) < 0) return -1; + // TODO decode modified exec + } + tEndDecode(pDecoder); + return 0; +} int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { void* pIter = NULL; while (1) { diff --git a/source/libs/wal/src/walMgmt.c b/source/libs/wal/src/walMgmt.c index ada1f599f2..f2f423ddf5 100644 --- a/source/libs/wal/src/walMgmt.c +++ b/source/libs/wal/src/walMgmt.c @@ -14,17 +14,17 @@ */ #define _DEFAULT_SOURCE -#include "tcompare.h" #include "os.h" #include "taoserror.h" +#include "tcompare.h" #include "tref.h" #include "walInt.h" typedef struct { - int8_t stop; - int8_t inited; - uint32_t seq; - int32_t refSetId; + int8_t stop; + int8_t inited; + uint32_t seq; + int32_t refSetId; TdThread thread; } SWalMgmt; @@ -53,13 +53,14 @@ int32_t walInit() { } void walCleanUp() { - int8_t old = atomic_val_compare_exchange_8(&tsWal.inited, 1, 0); - if (old == 0) { + int8_t old = atomic_val_compare_exchange_8(&tsWal.inited, 1, 2); + if (old != 1) { return; } walStopThread(); taosCloseRef(tsWal.refSetId); wInfo("wal module is cleaned up"); + atomic_store_8(&tsWal.inited, 0); } SWal *walOpen(const char *path, SWalCfg *pCfg) { @@ -126,7 +127,6 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { } if (walCheckAndRepairIdx(pWal) < 0) { - } wDebug("vgId:%d, wal:%p is opened, level:%d fsyncPeriod:%d", pWal->cfg.vgId, pWal, pWal->cfg.level, From 0ad9c4cebd4a5eb4fcb62768e940d84b9ad8615c Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Mon, 23 May 2022 20:40:37 +0800 Subject: [PATCH 21/32] fix: init once --- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/tq/tq.c | 22 +++++++++++---- source/libs/wal/src/walMgmt.c | 47 ++++++++++++++++++++------------- 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 40b490da47..ad3f8cc869 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -104,7 +104,7 @@ typedef struct { tmr_h timer; } STqMgmt; -static STqMgmt tqMgmt; +static STqMgmt tqMgmt = {0}; // init once int tqInit(); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index f7b4ba93a6..bd48ed9b4c 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -16,22 +16,34 @@ #include "tq.h" int32_t tqInit() { - int8_t old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 1); + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(&tqMgmt.inited, 0, 2); + if (old != 2) break; + } + if (old == 0) { tqMgmt.timer = taosTmrInit(10000, 100, 10000, "TQ"); if (tqMgmt.timer == NULL) { atomic_store_8(&tqMgmt.inited, 0); return -1; } + atomic_store_8(&tqMgmt.inited, 1); } return 0; } void tqCleanUp() { - int8_t old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2); - if (old != 1) return; - taosTmrCleanUp(tqMgmt.timer); - atomic_store_8(&tqMgmt.inited, 0); + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(&tqMgmt.inited, 1, 2); + if (old != 2) break; + } + + if (old == 1) { + taosTmrCleanUp(tqMgmt.timer); + atomic_store_8(&tqMgmt.inited, 0); + } } STQ* tqOpen(const char* path, SVnode* pVnode, SWal* pWal) { diff --git a/source/libs/wal/src/walMgmt.c b/source/libs/wal/src/walMgmt.c index f2f423ddf5..71cd6de73f 100644 --- a/source/libs/wal/src/walMgmt.c +++ b/source/libs/wal/src/walMgmt.c @@ -36,31 +36,42 @@ static void walFreeObj(void *pWal); int64_t walGetSeq() { return (int64_t)atomic_load_32(&tsWal.seq); } int32_t walInit() { - int8_t old = atomic_val_compare_exchange_8(&tsWal.inited, 0, 1); - if (old == 1) return 0; - - tsWal.refSetId = taosOpenRef(TSDB_MIN_VNODES, walFreeObj); - - int32_t code = walCreateThread(); - if (code != 0) { - wError("failed to init wal module since %s", tstrerror(code)); - atomic_store_8(&tsWal.inited, 0); - return code; + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(&tsWal.inited, 0, 2); + if (old != 2) break; + } + + if (old == 0) { + tsWal.refSetId = taosOpenRef(TSDB_MIN_VNODES, walFreeObj); + + int32_t code = walCreateThread(); + if (code != 0) { + wError("failed to init wal module since %s", tstrerror(code)); + atomic_store_8(&tsWal.inited, 0); + return code; + } + + wInfo("wal module is initialized, rsetId:%d", tsWal.refSetId); + atomic_store_8(&tsWal.inited, 1); } - wInfo("wal module is initialized, rsetId:%d", tsWal.refSetId); return 0; } void walCleanUp() { - int8_t old = atomic_val_compare_exchange_8(&tsWal.inited, 1, 2); - if (old != 1) { - return; + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(&tsWal.inited, 1, 2); + if (old != 2) break; + } + + if (old == 1) { + walStopThread(); + taosCloseRef(tsWal.refSetId); + wInfo("wal module is cleaned up"); + atomic_store_8(&tsWal.inited, 0); } - walStopThread(); - taosCloseRef(tsWal.refSetId); - wInfo("wal module is cleaned up"); - atomic_store_8(&tsWal.inited, 0); } SWal *walOpen(const char *path, SWalCfg *pCfg) { From a878b3edd5baa20d02d9affd9661728d9d35d438 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 21:08:00 +0800 Subject: [PATCH 22/32] fix: commit log should not be null --- include/util/taoserror.h | 1 + source/dnode/mnode/impl/inc/mndTopic.h | 2 +- source/dnode/mnode/impl/src/mndConsumer.c | 2 +- source/dnode/mnode/impl/src/mndDnode.c | 16 ++++++------- source/dnode/mnode/impl/src/mndOffset.c | 3 ++- source/dnode/mnode/impl/src/mndStb.c | 4 +--- source/dnode/mnode/impl/src/mndStream.c | 8 +++---- source/dnode/mnode/impl/src/mndSubscribe.c | 2 +- source/dnode/mnode/impl/src/mndTopic.c | 26 +++++++++++----------- source/dnode/mnode/impl/src/mndTrans.c | 6 +++++ source/dnode/mnode/impl/src/mndUser.c | 24 ++++++++++---------- source/util/src/terror.c | 1 + 12 files changed, 51 insertions(+), 44 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 9868c2cc0d..cda08a96a9 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -253,6 +253,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_MND_TRANS_INVALID_STAGE TAOS_DEF_ERROR_CODE(0, 0x03D2) #define TSDB_CODE_MND_TRANS_CONFLICT TAOS_DEF_ERROR_CODE(0, 0x03D3) #define TSDB_CODE_MND_TRANS_UNKNOW_ERROR TAOS_DEF_ERROR_CODE(0, 0x03D4) +#define TSDB_CODE_MND_TRANS_CLOG_IS_NULL TAOS_DEF_ERROR_CODE(0, 0x03D5) // mnode-mq #define TSDB_CODE_MND_TOPIC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E0) diff --git a/source/dnode/mnode/impl/inc/mndTopic.h b/source/dnode/mnode/impl/inc/mndTopic.h index d7e6f9c87b..c5c4800e02 100644 --- a/source/dnode/mnode/impl/inc/mndTopic.h +++ b/source/dnode/mnode/impl/inc/mndTopic.h @@ -35,7 +35,7 @@ int32_t mndDropTopicByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); const char *mndTopicGetShowName(const char topic[TSDB_TOPIC_FNAME_LEN]); -int32_t mndSetTopicRedoLogs(SMnode *pMnode, STrans *pTrans, SMqTopicObj *pTopic); +int32_t mndSetTopicCommitLogs(SMnode *pMnode, STrans *pTrans, SMqTopicObj *pTopic); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 8b2799833b..1bb003bab9 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -419,7 +419,7 @@ static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { SMqTopicObj topicObj = {0}; memcpy(&topicObj, pTopic, sizeof(SMqTopicObj)); topicObj.refConsumerCnt = pTopic->refConsumerCnt + 1; - if (mndSetTopicRedoLogs(pMnode, pTrans, &topicObj) != 0) goto SUBSCRIBE_OVER; + if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) goto SUBSCRIBE_OVER; mndReleaseTopic(pMnode, pTopic); } diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 01ff08cef9..0cac7fd86b 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -448,13 +448,13 @@ static int32_t mndCreateDnode(SMnode *pMnode, SRpcMsg *pReq, SCreateDnodeReq *pC } mDebug("trans:%d, used to create dnode:%s", pTrans->id, dnodeObj.ep); - SSdbRaw *pRedoRaw = mndDnodeActionEncode(&dnodeObj); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndDnodeActionEncode(&dnodeObj); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); @@ -524,13 +524,13 @@ static int32_t mndDropDnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode) { } mDebug("trans:%d, used to drop dnode:%d", pTrans->id, pDnode->id); - SSdbRaw *pRedoRaw = mndDnodeActionEncode(pDnode); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndDnodeActionEncode(pDnode); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_DROPPED); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndOffset.c b/source/dnode/mnode/impl/src/mndOffset.c index 6f42d66625..dca07f6a6d 100644 --- a/source/dnode/mnode/impl/src/mndOffset.c +++ b/source/dnode/mnode/impl/src/mndOffset.c @@ -153,6 +153,7 @@ int32_t mndCreateOffsets(STrans *pTrans, const char *cgroup, const char *topicNa return -1; } sdbSetRawStatus(pOffsetRaw, SDB_STATUS_READY); + // commit log or redo log? if (mndTransAppendRedolog(pTrans, pOffsetRaw) < 0) { return -1; } @@ -188,7 +189,7 @@ static int32_t mndProcessCommitOffsetReq(SRpcMsg *pMsg) { pOffsetObj->offset = pOffset->offset; SSdbRaw *pOffsetRaw = mndOffsetActionEncode(pOffsetObj); sdbSetRawStatus(pOffsetRaw, SDB_STATUS_READY); - mndTransAppendRedolog(pTrans, pOffsetRaw); + mndTransAppendCommitlog(pTrans, pOffsetRaw); if (create) { taosMemoryFree(pOffsetObj); } else { diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index f6043615ab..61f115e2ba 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -743,9 +743,7 @@ static int32_t mndCreateStb(SMnode *pMnode, SRpcMsg *pReq, SMCreateStbReq *pCrea mDebug("trans:%d, used to create stb:%s", pTrans->id, pCreate->name); - if (mndBuildStbFromReq(pMnode, &stbObj, pCreate, pDb) != 0) { - goto _OVER; - } + if (mndBuildStbFromReq(pMnode, &stbObj, pCreate, pDb) != 0) goto _OVER; if (mndAddStbToTrans(pMnode, pTrans, pDb, &stbObj) < 0) goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 7b6383a470..9de6138689 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -279,13 +279,13 @@ int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast } mDebug("trans:%d, used to create stream:%s", pTrans->id, pStream->name); - SSdbRaw *pRedoRaw = mndStreamActionEncode(pStream); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); return 0; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index c82472eec0..3713bd501a 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -479,7 +479,7 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu SMqTopicObj topicObj = {0}; memcpy(&topicObj, pTopic, sizeof(SMqTopicObj)); topicObj.refConsumerCnt = pTopic->refConsumerCnt - consumerNum; - if (mndSetTopicRedoLogs(pMnode, pTrans, &topicObj) != 0) goto REB_FAIL; + if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) goto REB_FAIL; } } diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index b9b01a4391..ec3d30ff07 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -386,14 +386,14 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq * } mDebug("trans:%d, used to create topic:%s", pTrans->id, pCreate->name); - SSdbRaw *pRedoRaw = mndTopicActionEncode(&topicObj); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndTopicActionEncode(&topicObj); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); taosMemoryFreeClear(topicObj.physicalPlan); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); @@ -473,13 +473,13 @@ CREATE_TOPIC_OVER: } static int32_t mndDropTopic(SMnode *pMnode, STrans *pTrans, SRpcMsg *pReq, SMqTopicObj *pTopic) { - SSdbRaw *pRedoRaw = mndTopicActionEncode(pTopic); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndTopicActionEncode(pTopic); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_DROPPED); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); @@ -627,11 +627,11 @@ static int32_t mndRetrieveTopic(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBl return numOfRows; } -int32_t mndSetTopicRedoLogs(SMnode *pMnode, STrans *pTrans, SMqTopicObj *pTopic) { - SSdbRaw *pRedoRaw = mndTopicActionEncode(pTopic); - if (pRedoRaw == NULL) return -1; - if (mndTransAppendCommitlog(pTrans, pRedoRaw) != 0) return -1; - if (sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY) != 0) return -1; +int32_t mndSetTopicCommitLogs(SMnode *pMnode, STrans *pTrans, SMqTopicObj *pTopic) { + SSdbRaw *pCommitRaw = mndTopicActionEncode(pTopic); + if (pCommitRaw == NULL) return -1; + if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) return -1; + if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) return -1; return 0; } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 5d205197d1..f567e3f3cf 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -768,6 +768,12 @@ int32_t mndTransPrepare(SMnode *pMnode, STrans *pTrans) { return -1; } + if (taosArrayGetSize(pTrans->commitLogs) <= 0) { + terrno = TSDB_CODE_MND_TRANS_CLOG_IS_NULL; + mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); + return -1; + } + mDebug("trans:%d, prepare transaction", pTrans->id); if (mndTransSync(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 88e646e765..5f2147a5fe 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -272,13 +272,13 @@ static int32_t mndCreateUser(SMnode *pMnode, char *acct, SCreateUserReq *pCreate } mDebug("trans:%d, used to create user:%s", pTrans->id, pCreate->user); - SSdbRaw *pRedoRaw = mndUserActionEncode(&userObj); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndUserActionEncode(&userObj); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to commit redo log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); @@ -352,13 +352,13 @@ static int32_t mndAlterUser(SMnode *pMnode, SUserObj *pOld, SUserObj *pNew, SRpc } mDebug("trans:%d, used to alter user:%s", pTrans->id, pOld->user); - SSdbRaw *pRedoRaw = mndUserActionEncode(pNew); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndUserActionEncode(pNew); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_READY); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); @@ -559,13 +559,13 @@ static int32_t mndDropUser(SMnode *pMnode, SRpcMsg *pReq, SUserObj *pUser) { } mDebug("trans:%d, used to drop user:%s", pTrans->id, pUser->user); - SSdbRaw *pRedoRaw = mndUserActionEncode(pUser); - if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); + SSdbRaw *pCommitRaw = mndUserActionEncode(pUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); return -1; } - sdbSetRawStatus(pRedoRaw, SDB_STATUS_DROPPED); + sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 3890a55ff1..7c4f0fa2dd 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -259,6 +259,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_NOT_EXIST, "Transaction not exist TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_INVALID_STAGE, "Invalid stage to kill") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CONFLICT, "Conflict transaction not completed") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_UNKNOW_ERROR, "Unknown transaction error") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CLOG_IS_NULL, "Transaction commitlog is null") // mnode-mq TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_ALREADY_EXIST, "Topic already exists") From 31c0ba6d78dcda7a3d2f2175e7d9bb41e9d29c2b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 21:24:50 +0800 Subject: [PATCH 23/32] refactor: remove utest of transTest --- source/dnode/mnode/impl/test/trans/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/test/trans/CMakeLists.txt b/source/dnode/mnode/impl/test/trans/CMakeLists.txt index 55fc3abbc2..023c8caa62 100644 --- a/source/dnode/mnode/impl/test/trans/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/trans/CMakeLists.txt @@ -31,7 +31,7 @@ target_include_directories( PUBLIC "${TD_SOURCE_DIR}/include/dnode/mnode" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../inc" ) -add_test( - NAME transTest2 - COMMAND transTest2 -) +# add_test( +# NAME transTest2 +# COMMAND transTest2 +# ) From 1d7bdbc351cf788bf445eb28f0ebc3616061763e Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 21:37:13 +0800 Subject: [PATCH 24/32] fix(sync) sync/mnode integration dead lock --- source/libs/sync/inc/syncInt.h | 2 +- source/libs/sync/src/syncAppendEntries.c | 5 ++++- source/libs/sync/src/syncCommit.c | 5 ++++- source/libs/sync/src/syncMain.c | 27 ++++++++++++++++++++++-- source/libs/transport/inc/transComm.h | 2 +- 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index d3345620e9..9246041b81 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -149,7 +149,7 @@ typedef struct SSyncNode { // restore state bool restoreFinish; - sem_t restoreSem; + //sem_t restoreSem; SSnapshot* pSnapshot; } SSyncNode; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 5f90325f65..fa735e71c0 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -366,9 +366,12 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { ths->pFsm->FpRestoreFinish(ths->pFsm); } ths->restoreFinish = true; + sInfo("==syncNodeOnAppendEntriesCb== restoreFinish set true %p vgId:%d", ths, ths->vgId); + /* tsem_post(&ths->restoreSem); - sInfo("==syncNodeOnAppendEntriesCb== RestoreFinish tsem_post"); + sInfo("==syncNodeOnAppendEntriesCb== RestoreFinish tsem_post %p", ths); + */ } } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index f5486cd1f0..18c6f8930a 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -143,9 +143,12 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { pSyncNode->pFsm->FpRestoreFinish(pSyncNode->pFsm); } pSyncNode->restoreFinish = true; + sInfo("==syncMaybeAdvanceCommitIndex== restoreFinish set true %p vgId:%d", pSyncNode, pSyncNode->vgId); + /* tsem_post(&pSyncNode->restoreSem); - sInfo("==syncMaybeAdvanceCommitIndex== RestoreFinish tsem_post"); + sInfo("==syncMaybeAdvanceCommitIndex== RestoreFinish tsem_post %p", pSyncNode); + */ } } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index bffebc1693..821ed364e8 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -499,7 +499,7 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pSyncInfo) { pSyncNode->pSnapshot = taosMemoryMalloc(sizeof(SSnapshot)); pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, pSyncNode->pSnapshot); } - tsem_init(&(pSyncNode->restoreSem), 0, 0); + //tsem_init(&(pSyncNode->restoreSem), 0, 0); // start in syncNodeStart // start raft @@ -521,7 +521,19 @@ void syncNodeStart(SSyncNode* pSyncNode) { syncNodeAppendNoop(pSyncNode); syncMaybeAdvanceCommitIndex(pSyncNode); // maybe only one replica + /* + sInfo("==syncNodeStart== RestoreFinish begin 1 replica tsem_wait %p", pSyncNode); tsem_wait(&pSyncNode->restoreSem); + sInfo("==syncNodeStart== RestoreFinish end 1 replica tsem_wait %p", pSyncNode); + */ + + /* + while (pSyncNode->restoreFinish != true) { + taosMsleep(10); + } + */ + + sInfo("==syncNodeStart== restoreFinish ok 1 replica %p vgId:%d", pSyncNode, pSyncNode->vgId); return; } @@ -532,7 +544,18 @@ void syncNodeStart(SSyncNode* pSyncNode) { // ret = syncNodeStartPingTimer(pSyncNode); assert(ret == 0); + /* + sInfo("==syncNodeStart== RestoreFinish begin multi replica tsem_wait %p", pSyncNode); tsem_wait(&pSyncNode->restoreSem); + sInfo("==syncNodeStart== RestoreFinish end multi replica tsem_wait %p", pSyncNode); + */ + + /* + while (pSyncNode->restoreFinish != true) { + taosMsleep(10); + } + */ + sInfo("==syncNodeStart== restoreFinish ok multi replica %p vgId:%d", pSyncNode, pSyncNode->vgId); } void syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -573,7 +596,7 @@ void syncNodeClose(SSyncNode* pSyncNode) { taosMemoryFree(pSyncNode->pSnapshot); } - tsem_destroy(&pSyncNode->restoreSem); + //tsem_destroy(&pSyncNode->restoreSem); // free memory in syncFreeNode // taosMemoryFree(pSyncNode); diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index e71e19edce..30f799f39e 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -94,7 +94,7 @@ typedef void* queue[2]; /* Return the structure holding the given element. */ #define QUEUE_DATA(e, type, field) ((type*)((void*)((char*)(e)-offsetof(type, field)))) -#define TRANS_RETRY_COUNT_LIMIT 20 // retry count limit +#define TRANS_RETRY_COUNT_LIMIT 100 // retry count limit #define TRANS_RETRY_INTERVAL 15 // ms retry interval #define TRANS_CONN_TIMEOUT 3 // connect timeout From 679c946029b42adcf8f5a2c6d5f02de8c5e45b2c Mon Sep 17 00:00:00 2001 From: dapan1121 <89396746@qq.com> Date: Mon, 23 May 2022 21:45:47 +0800 Subject: [PATCH 25/32] scheduler refactor --- source/libs/scheduler/inc/schedulerInt.h | 28 +- source/libs/scheduler/src/schFlowCtrl.c | 2 +- source/libs/scheduler/src/schJob.c | 1312 +++++++++++ source/libs/scheduler/src/schRemote.c | 1231 ++++++++++ source/libs/scheduler/src/schUtil.c | 92 + source/libs/scheduler/src/scheduler.c | 2696 +--------------------- 6 files changed, 2664 insertions(+), 2697 deletions(-) create mode 100644 source/libs/scheduler/src/schJob.c create mode 100644 source/libs/scheduler/src/schRemote.c create mode 100644 source/libs/scheduler/src/schUtil.c diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h index 9e0878e118..ffac0f856d 100644 --- a/source/libs/scheduler/inc/schedulerInt.h +++ b/source/libs/scheduler/inc/schedulerInt.h @@ -264,7 +264,7 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, SQueryNodeAddr *addr, i SSchJob *schAcquireJob(int64_t refId); int32_t schReleaseJob(int64_t refId); void schFreeFlowCtrl(SSchJob *pJob); -int32_t schCheckJobNeedFlowCtrl(SSchJob *pJob, SSchLevel *pLevel); +int32_t schChkJobNeedFlowCtrl(SSchJob *pJob, SSchLevel *pLevel); int32_t schDecTaskFlowQuota(SSchJob *pJob, SSchTask *pTask); int32_t schCheckIncTaskFlowQuota(SSchJob *pJob, SSchTask *pTask, bool *enough); int32_t schLaunchTasksInFlowCtrlList(SSchJob *pJob, SSchTask *pTask); @@ -275,6 +275,32 @@ int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId); int32_t schCloneSMsgSendInfo(void *src, void **dst); int32_t schValidateAndBuildJob(SQueryPlan *pDag, SSchJob *pJob); void schFreeJobImpl(void *job); +int32_t schMakeHbCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam); +int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx); +int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask); +int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans); +int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code); +void schFreeRpcCtx(SRpcCtx *pCtx); +int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp); +bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus); +int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask); +int32_t schSaveJobQueryRes(SSchJob *pJob, SResReadyRsp *rsp); +int32_t schProcessOnExplainDone(SSchJob *pJob, SSchTask *pTask, SRetrieveTableRsp *pRsp); +void schProcessOnDataFetched(SSchJob *job); +int32_t schGetTaskFromTaskList(SHashObj *pTaskList, uint64_t taskId, SSchTask **pTask); +int32_t schUpdateTaskExecNodeHandle(SSchTask *pTask, void *handle, int32_t rspCode); +void schFreeRpcCtxVal(const void *arg); +int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal *brokenVal, bool isHb); +int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, void *handle); +int32_t schExecStaticExplain(void *transport, SArray *pNodeList, SQueryPlan *pDag, int64_t *job, const char *sql, + bool syncSchedule); +int32_t schExecJobImpl(void *transport, SArray *pNodeList, SQueryPlan *pDag, int64_t *job, const char *sql, + int64_t startTs, bool sync); +int32_t schChkUpdateJobStatus(SSchJob *pJob, int8_t newStatus); +int32_t schCancelJob(SSchJob *pJob); +int32_t schProcessOnJobDropped(SSchJob *pJob, int32_t errCode); +uint64_t schGenTaskId(void); +void schCloseJobRef(void); #ifdef __cplusplus diff --git a/source/libs/scheduler/src/schFlowCtrl.c b/source/libs/scheduler/src/schFlowCtrl.c index fbcca403bb..85d205f5f2 100644 --- a/source/libs/scheduler/src/schFlowCtrl.c +++ b/source/libs/scheduler/src/schFlowCtrl.c @@ -40,7 +40,7 @@ void schFreeFlowCtrl(SSchJob *pJob) { pJob->flowCtrl = NULL; } -int32_t schCheckJobNeedFlowCtrl(SSchJob *pJob, SSchLevel *pLevel) { +int32_t schChkJobNeedFlowCtrl(SSchJob *pJob, SSchLevel *pLevel) { if (!SCH_IS_QUERY_JOB(pJob)) { SCH_JOB_DLOG("job no need flow ctrl, queryJob:%d", SCH_IS_QUERY_JOB(pJob)); return TSDB_CODE_SUCCESS; diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c new file mode 100644 index 0000000000..14f4646397 --- /dev/null +++ b/source/libs/scheduler/src/schJob.c @@ -0,0 +1,1312 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "catalog.h" +#include "command.h" +#include "query.h" +#include "schedulerInt.h" +#include "tmsg.h" +#include "tref.h" +#include "trpc.h" + +FORCE_INLINE SSchJob *schAcquireJob(int64_t refId) { return (SSchJob *)taosAcquireRef(schMgmt.jobRef, refId); } + +FORCE_INLINE int32_t schReleaseJob(int64_t refId) { return taosReleaseRef(schMgmt.jobRef, refId); } + +int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel) { + pTask->plan = pPlan; + pTask->level = pLevel; + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); + pTask->taskId = schGenTaskId(); + pTask->execNodes = taosArrayInit(SCH_MAX_CANDIDATE_EP_NUM, sizeof(SSchNodeInfo)); + if (NULL == pTask->execNodes) { + SCH_TASK_ELOG("taosArrayInit %d execNodes failed", SCH_MAX_CANDIDATE_EP_NUM); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t schInitJob(SSchJob **pSchJob, SQueryPlan *pDag, void *transport, SArray *pNodeList, const char *sql, + int64_t startTs, bool syncSchedule) { + int32_t code = 0; + int64_t refId = -1; + SSchJob *pJob = taosMemoryCalloc(1, sizeof(SSchJob)); + if (NULL == pJob) { + qError("QID:%" PRIx64 " calloc %d failed", pDag->queryId, (int32_t)sizeof(SSchJob)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->attr.explainMode = pDag->explainInfo.mode; + pJob->attr.syncSchedule = syncSchedule; + pJob->transport = transport; + pJob->sql = sql; + + if (pNodeList != NULL) { + pJob->nodeList = taosArrayDup(pNodeList); + } + + SCH_ERR_JRET(schValidateAndBuildJob(pDag, pJob)); + + if (SCH_IS_EXPLAIN_JOB(pJob)) { + SCH_ERR_JRET(qExecExplainBegin(pDag, &pJob->explainCtx, startTs)); + } + + pJob->execTasks = + taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); + if (NULL == pJob->execTasks) { + SCH_JOB_ELOG("taosHashInit %d execTasks failed", pDag->numOfSubplans); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->succTasks = + taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); + if (NULL == pJob->succTasks) { + SCH_JOB_ELOG("taosHashInit %d succTasks failed", pDag->numOfSubplans); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->failTasks = + taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); + if (NULL == pJob->failTasks) { + SCH_JOB_ELOG("taosHashInit %d failTasks failed", pDag->numOfSubplans); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + tsem_init(&pJob->rspSem, 0, 0); + + refId = taosAddRef(schMgmt.jobRef, pJob); + if (refId < 0) { + SCH_JOB_ELOG("taosAddRef job failed, error:%s", tstrerror(terrno)); + SCH_ERR_JRET(terrno); + } + + atomic_add_fetch_32(&schMgmt.jobNum, 1); + + if (NULL == schAcquireJob(refId)) { + SCH_JOB_ELOG("schAcquireJob job failed, refId:%" PRIx64, refId); + SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); + } + + pJob->refId = refId; + + SCH_JOB_DLOG("job refId:%" PRIx64, pJob->refId); + + pJob->status = JOB_TASK_STATUS_NOT_START; + + *pSchJob = pJob; + + return TSDB_CODE_SUCCESS; + +_return: + + if (refId < 0) { + schFreeJobImpl(pJob); + } else { + taosRemoveRef(schMgmt.jobRef, refId); + } + SCH_RET(code); +} + +void schFreeTask(SSchTask *pTask) { + if (pTask->candidateAddrs) { + taosArrayDestroy(pTask->candidateAddrs); + } + + taosMemoryFreeClear(pTask->msg); + + if (pTask->children) { + taosArrayDestroy(pTask->children); + } + + if (pTask->parents) { + taosArrayDestroy(pTask->parents); + } + + if (pTask->execNodes) { + taosArrayDestroy(pTask->execNodes); + } +} + +FORCE_INLINE bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus) { + int8_t status = SCH_GET_JOB_STATUS(pJob); + if (pStatus) { + *pStatus = status; + } + + return (status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED || + status == JOB_TASK_STATUS_CANCELLING || status == JOB_TASK_STATUS_DROPPING || + status == JOB_TASK_STATUS_SUCCEED); +} + +int32_t schChkUpdateJobStatus(SSchJob *pJob, int8_t newStatus) { + int32_t code = 0; + + int8_t oriStatus = 0; + + while (true) { + oriStatus = SCH_GET_JOB_STATUS(pJob); + + if (oriStatus == newStatus) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + switch (oriStatus) { + case JOB_TASK_STATUS_NULL: + if (newStatus != JOB_TASK_STATUS_NOT_START) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_NOT_START: + if (newStatus != JOB_TASK_STATUS_EXECUTING) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_EXECUTING: + if (newStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED && newStatus != JOB_TASK_STATUS_FAILED && + newStatus != JOB_TASK_STATUS_CANCELLING && newStatus != JOB_TASK_STATUS_CANCELLED && + newStatus != JOB_TASK_STATUS_DROPPING) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_PARTIAL_SUCCEED: + if (newStatus != JOB_TASK_STATUS_FAILED && newStatus != JOB_TASK_STATUS_SUCCEED && + newStatus != JOB_TASK_STATUS_DROPPING) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_SUCCEED: + case JOB_TASK_STATUS_FAILED: + case JOB_TASK_STATUS_CANCELLING: + if (newStatus != JOB_TASK_STATUS_DROPPING) { + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + break; + case JOB_TASK_STATUS_CANCELLED: + case JOB_TASK_STATUS_DROPPING: + SCH_ERR_JRET(TSDB_CODE_QRY_JOB_FREED); + break; + + default: + SCH_JOB_ELOG("invalid job status:%s", jobTaskStatusStr(oriStatus)); + SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); + } + + if (oriStatus != atomic_val_compare_exchange_8(&pJob->status, oriStatus, newStatus)) { + continue; + } + + SCH_JOB_DLOG("job status updated from %s to %s", jobTaskStatusStr(oriStatus), jobTaskStatusStr(newStatus)); + + break; + } + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_JOB_ELOG("invalid job status update, from %s to %s", jobTaskStatusStr(oriStatus), jobTaskStatusStr(newStatus)); + SCH_ERR_RET(code); + return TSDB_CODE_SUCCESS; +} + +int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) { + for (int32_t i = 0; i < pJob->levelNum; ++i) { + SSchLevel *pLevel = taosArrayGet(pJob->levels, i); + + for (int32_t m = 0; m < pLevel->taskNum; ++m) { + SSchTask *pTask = taosArrayGet(pLevel->subTasks, m); + SSubplan *pPlan = pTask->plan; + int32_t childNum = pPlan->pChildren ? (int32_t)LIST_LENGTH(pPlan->pChildren) : 0; + int32_t parentNum = pPlan->pParents ? (int32_t)LIST_LENGTH(pPlan->pParents) : 0; + + if (childNum > 0) { + if (pJob->levelIdx == pLevel->level) { + SCH_JOB_ELOG("invalid query plan, lowest level, childNum:%d", childNum); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + pTask->children = taosArrayInit(childNum, POINTER_BYTES); + if (NULL == pTask->children) { + SCH_TASK_ELOG("taosArrayInit %d children failed", childNum); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + } + + for (int32_t n = 0; n < childNum; ++n) { + SSubplan *child = (SSubplan *)nodesListGetNode(pPlan->pChildren, n); + SSchTask **childTask = taosHashGet(planToTask, &child, POINTER_BYTES); + if (NULL == childTask || NULL == *childTask) { + SCH_TASK_ELOG("subplan children relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + if (NULL == taosArrayPush(pTask->children, childTask)) { + SCH_TASK_ELOG("taosArrayPush childTask failed, level:%d, taskIdx:%d, childIdx:%d", i, m, n); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_TASK_DLOG("children info, the %d child TID %" PRIx64, n, (*childTask)->taskId); + } + + if (parentNum > 0) { + if (0 == pLevel->level) { + SCH_TASK_ELOG("invalid task info, level:0, parentNum:%d", parentNum); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + pTask->parents = taosArrayInit(parentNum, POINTER_BYTES); + if (NULL == pTask->parents) { + SCH_TASK_ELOG("taosArrayInit %d parents failed", parentNum); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + } else { + if (0 != pLevel->level) { + SCH_TASK_ELOG("invalid task info, level:%d, parentNum:%d", pLevel->level, parentNum); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + } + + for (int32_t n = 0; n < parentNum; ++n) { + SSubplan *parent = (SSubplan *)nodesListGetNode(pPlan->pParents, n); + SSchTask **parentTask = taosHashGet(planToTask, &parent, POINTER_BYTES); + if (NULL == parentTask || NULL == *parentTask) { + SCH_TASK_ELOG("subplan parent relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + if (NULL == taosArrayPush(pTask->parents, parentTask)) { + SCH_TASK_ELOG("taosArrayPush parentTask failed, level:%d, taskIdx:%d, childIdx:%d", i, m, n); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_TASK_DLOG("parents info, the %d parent TID %" PRIx64, n, (*parentTask)->taskId); + } + + SCH_TASK_DLOG("level:%d, parentNum:%d, childNum:%d", i, parentNum, childNum); + } + } + + SSchLevel *pLevel = taosArrayGet(pJob->levels, 0); + if (SCH_IS_QUERY_JOB(pJob) && pLevel->taskNum > 1) { + SCH_JOB_ELOG("invalid query plan, level:0, taskNum:%d", pLevel->taskNum); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t schRecordTaskSucceedNode(SSchJob *pJob, SSchTask *pTask) { + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + if (NULL == addr) { + SCH_TASK_ELOG("taosArrayGet candidate addr failed, idx:%d, size:%d", pTask->candidateIdx, + (int32_t)taosArrayGetSize(pTask->candidateAddrs)); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + pTask->succeedAddr = *addr; + + return TSDB_CODE_SUCCESS; +} + +int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, void *handle) { + SSchNodeInfo nodeInfo = {.addr = *addr, .handle = handle}; + + if (NULL == taosArrayPush(pTask->execNodes, &nodeInfo)) { + SCH_TASK_ELOG("taosArrayPush nodeInfo to execNodes list failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_TASK_DLOG("task execNode recorded, handle:%p", handle); + + return TSDB_CODE_SUCCESS; +} + +int32_t schRecordQueryDataSrc(SSchJob *pJob, SSchTask *pTask) { + if (!SCH_IS_DATA_SRC_QRY_TASK(pTask)) { + return TSDB_CODE_SUCCESS; + } + + taosArrayPush(pJob->dataSrcTasks, &pTask); + + return TSDB_CODE_SUCCESS; +} + + +int32_t schValidateAndBuildJob(SQueryPlan *pDag, SSchJob *pJob) { + int32_t code = 0; + pJob->queryId = pDag->queryId; + + if (pDag->numOfSubplans <= 0) { + SCH_JOB_ELOG("invalid subplan num:%d", pDag->numOfSubplans); + SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); + } + + pJob->dataSrcTasks = taosArrayInit(pDag->numOfSubplans, POINTER_BYTES); + if (NULL == pJob->dataSrcTasks) { + SCH_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); + } + + int32_t levelNum = (int32_t)LIST_LENGTH(pDag->pSubplans); + if (levelNum <= 0) { + SCH_JOB_ELOG("invalid level num:%d", levelNum); + SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); + } + + SHashObj *planToTask = taosHashInit( + SCHEDULE_DEFAULT_MAX_TASK_NUM, + taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false, + HASH_NO_LOCK); + if (NULL == planToTask) { + SCH_JOB_ELOG("taosHashInit %d failed", SCHEDULE_DEFAULT_MAX_TASK_NUM); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->levels = taosArrayInit(levelNum, sizeof(SSchLevel)); + if (NULL == pJob->levels) { + SCH_JOB_ELOG("taosArrayInit %d failed", levelNum); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->levelNum = levelNum; + pJob->levelIdx = levelNum - 1; + + pJob->subPlans = pDag->pSubplans; + + SSchLevel level = {0}; + SNodeListNode *plans = NULL; + int32_t taskNum = 0; + SSchLevel *pLevel = NULL; + + level.status = JOB_TASK_STATUS_NOT_START; + + for (int32_t i = 0; i < levelNum; ++i) { + if (NULL == taosArrayPush(pJob->levels, &level)) { + SCH_JOB_ELOG("taosArrayPush level failed, level:%d", i); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pLevel = taosArrayGet(pJob->levels, i); + pLevel->level = i; + + plans = (SNodeListNode *)nodesListGetNode(pDag->pSubplans, i); + if (NULL == plans) { + SCH_JOB_ELOG("empty level plan, level:%d", i); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + taskNum = (int32_t)LIST_LENGTH(plans->pNodeList); + if (taskNum <= 0) { + SCH_JOB_ELOG("invalid level plan number:%d, level:%d", taskNum, i); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + pLevel->taskNum = taskNum; + + pLevel->subTasks = taosArrayInit(taskNum, sizeof(SSchTask)); + if (NULL == pLevel->subTasks) { + SCH_JOB_ELOG("taosArrayInit %d failed", taskNum); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + for (int32_t n = 0; n < taskNum; ++n) { + SSubplan *plan = (SSubplan *)nodesListGetNode(plans->pNodeList, n); + + SCH_SET_JOB_TYPE(pJob, plan->subplanType); + + SSchTask task = {0}; + SSchTask *pTask = &task; + + SCH_ERR_JRET(schInitTask(pJob, &task, plan, pLevel)); + + void *p = taosArrayPush(pLevel->subTasks, &task); + if (NULL == p) { + SCH_TASK_ELOG("taosArrayPush task to level failed, level:%d, taskIdx:%d", pLevel->level, n); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_ERR_JRET(schRecordQueryDataSrc(pJob, p)); + + if (0 != taosHashPut(planToTask, &plan, POINTER_BYTES, &p, POINTER_BYTES)) { + SCH_TASK_ELOG("taosHashPut to planToTaks failed, taskIdx:%d", n); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + ++pJob->taskNum; + } + + SCH_JOB_DLOG("level initialized, taskNum:%d", taskNum); + } + + SCH_ERR_JRET(schBuildTaskRalation(pJob, planToTask)); + +_return: + if (planToTask) { + taosHashCleanup(planToTask); + } + + SCH_RET(code); +} + +int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) { + if (NULL != pTask->candidateAddrs) { + return TSDB_CODE_SUCCESS; + } + + pTask->candidateIdx = 0; + pTask->candidateAddrs = taosArrayInit(SCH_MAX_CANDIDATE_EP_NUM, sizeof(SQueryNodeAddr)); + if (NULL == pTask->candidateAddrs) { + SCH_TASK_ELOG("taosArrayInit %d condidate addrs failed", SCH_MAX_CANDIDATE_EP_NUM); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + if (pTask->plan->execNode.epSet.numOfEps > 0) { + if (NULL == taosArrayPush(pTask->candidateAddrs, &pTask->plan->execNode)) { + SCH_TASK_ELOG("taosArrayPush execNode to candidate addrs failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_TASK_DLOG("use execNode from plan as candidate addr, numOfEps:%d", pTask->plan->execNode.epSet.numOfEps); + + return TSDB_CODE_SUCCESS; + } + + int32_t addNum = 0; + int32_t nodeNum = 0; + if (pJob->nodeList) { + nodeNum = taosArrayGetSize(pJob->nodeList); + + for (int32_t i = 0; i < nodeNum && addNum < SCH_MAX_CANDIDATE_EP_NUM; ++i) { + SQueryNodeAddr *naddr = taosArrayGet(pJob->nodeList, i); + + if (NULL == taosArrayPush(pTask->candidateAddrs, naddr)) { + SCH_TASK_ELOG("taosArrayPush execNode to candidate addrs failed, addNum:%d, errno:%d", addNum, errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + ++addNum; + } + } + + if (addNum <= 0) { + SCH_TASK_ELOG("no available execNode as candidates, nodeNum:%d", nodeNum); + SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); + } + + /* + for (int32_t i = 0; i < job->dataSrcEps.numOfEps && addNum < SCH_MAX_CANDIDATE_EP_NUM; ++i) { + strncpy(epSet->fqdn[epSet->numOfEps], job->dataSrcEps.fqdn[i], sizeof(job->dataSrcEps.fqdn[i])); + epSet->port[epSet->numOfEps] = job->dataSrcEps.port[i]; + + ++epSet->numOfEps; + } + */ + + return TSDB_CODE_SUCCESS; +} + +int32_t schRemoveTaskFromExecList(SSchJob *pJob, SSchTask *pTask) { + int32_t code = taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId)); + if (code) { + SCH_TASK_ELOG("task failed to rm from execTask list, code:%x", code); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + return TSDB_CODE_SUCCESS; +} + + +int32_t schPushTaskToExecList(SSchJob *pJob, SSchTask *pTask) { + int32_t code = taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); + if (0 != code) { + if (HASH_NODE_EXIST(code)) { + SCH_TASK_ELOG("task already in execTask list, code:%x", code); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + SCH_TASK_ELOG("taosHashPut task to execTask list failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_TASK_DLOG("task added to execTask list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); + + return TSDB_CODE_SUCCESS; +} + +int32_t schMoveTaskToSuccList(SSchJob *pJob, SSchTask *pTask, bool *moved) { + if (0 != taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId))) { + SCH_TASK_WLOG("remove task from execTask list failed, may not exist, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + } else { + SCH_TASK_DLOG("task removed from execTask list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); + } + + int32_t code = taosHashPut(pJob->succTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); + if (0 != code) { + if (HASH_NODE_EXIST(code)) { + *moved = true; + SCH_TASK_ELOG("task already in succTask list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_TASK_ELOG("taosHashPut task to succTask list failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + *moved = true; + + SCH_TASK_DLOG("task moved to succTask list, numOfTasks:%d", taosHashGetSize(pJob->succTasks)); + + return TSDB_CODE_SUCCESS; +} + +int32_t schMoveTaskToFailList(SSchJob *pJob, SSchTask *pTask, bool *moved) { + *moved = false; + + if (0 != taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId))) { + SCH_TASK_WLOG("remove task from execTask list failed, may not exist, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + } + + int32_t code = taosHashPut(pJob->failTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); + if (0 != code) { + if (HASH_NODE_EXIST(code)) { + *moved = true; + + SCH_TASK_WLOG("task already in failTask list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_TASK_ELOG("taosHashPut task to failTask list failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + *moved = true; + + SCH_TASK_DLOG("task moved to failTask list, numOfTasks:%d", taosHashGetSize(pJob->failTasks)); + + return TSDB_CODE_SUCCESS; +} + +int32_t schMoveTaskToExecList(SSchJob *pJob, SSchTask *pTask, bool *moved) { + if (0 != taosHashRemove(pJob->succTasks, &pTask->taskId, sizeof(pTask->taskId))) { + SCH_TASK_WLOG("remove task from succTask list failed, may not exist, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + } + + int32_t code = taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); + if (0 != code) { + if (HASH_NODE_EXIST(code)) { + *moved = true; + + SCH_TASK_ELOG("task already in execTask list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_TASK_ELOG("taosHashPut task to execTask list failed, errno:%d", errno); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + *moved = true; + + SCH_TASK_DLOG("task moved to execTask list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); + + return TSDB_CODE_SUCCESS; +} + +int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bool *needRetry) { + int8_t status = 0; + ++pTask->tryTimes; + + if (schJobNeedToStop(pJob, &status)) { + *needRetry = false; + SCH_TASK_DLOG("task no more retry cause of job status, job status:%s", jobTaskStatusStr(status)); + return TSDB_CODE_SUCCESS; + } + + if (pTask->tryTimes >= REQUEST_MAX_TRY_TIMES) { + *needRetry = false; + SCH_TASK_DLOG("task no more retry since reach max try times, tryTimes:%d", pTask->tryTimes); + return TSDB_CODE_SUCCESS; + } + + if (!NEED_SCHEDULER_RETRY_ERROR(errCode)) { + *needRetry = false; + SCH_TASK_DLOG("task no more retry cause of errCode, errCode:%x - %s", errCode, tstrerror(errCode)); + return TSDB_CODE_SUCCESS; + } + + // TODO CHECK epList/condidateList + if (SCH_IS_DATA_SRC_TASK(pTask)) { + if (pTask->tryTimes >= SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)) { + *needRetry = false; + SCH_TASK_DLOG("task no more retry since all ep tried, tryTimes:%d, epNum:%d", pTask->tryTimes, + SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)); + return TSDB_CODE_SUCCESS; + } + } else { + int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs); + + if ((pTask->candidateIdx + 1) >= candidateNum) { + *needRetry = false; + SCH_TASK_DLOG("task no more retry since all candiates tried, candidateIdx:%d, candidateNum:%d", + pTask->candidateIdx, candidateNum); + return TSDB_CODE_SUCCESS; + } + } + + *needRetry = true; + SCH_TASK_DLOG("task need the %dth retry, errCode:%x - %s", pTask->tryTimes, errCode, tstrerror(errCode)); + + return TSDB_CODE_SUCCESS; +} + +int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) { + atomic_sub_fetch_32(&pTask->level->taskLaunchedNum, 1); + + SCH_ERR_RET(schRemoveTaskFromExecList(pJob, pTask)); + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); + + if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { + SCH_ERR_RET(schDecTaskFlowQuota(pJob, pTask)); + SCH_ERR_RET(schLaunchTasksInFlowCtrlList(pJob, pTask)); + } + + if (SCH_IS_DATA_SRC_TASK(pTask)) { + SCH_SWITCH_EPSET(&pTask->plan->execNode); + } else { + ++pTask->candidateIdx; + } + + SCH_ERR_RET(schLaunchTask(pJob, pTask)); + + return TSDB_CODE_SUCCESS; +} + +void schUpdateJobErrCode(SSchJob *pJob, int32_t errCode) { + if (TSDB_CODE_SUCCESS == errCode) { + return; + } + + int32_t origCode = atomic_load_32(&pJob->errCode); + if (TSDB_CODE_SUCCESS == origCode) { + if (origCode == atomic_val_compare_exchange_32(&pJob->errCode, origCode, errCode)) { + goto _return; + } + + origCode = atomic_load_32(&pJob->errCode); + } + + if (NEED_CLIENT_HANDLE_ERROR(origCode)) { + return; + } + + if (NEED_CLIENT_HANDLE_ERROR(errCode)) { + atomic_store_32(&pJob->errCode, errCode); + goto _return; + } + + return; + +_return: + + SCH_JOB_DLOG("job errCode updated to %x - %s", errCode, tstrerror(errCode)); +} + +int32_t schProcessOnJobFailureImpl(SSchJob *pJob, int32_t status, int32_t errCode) { + // if already FAILED, no more processing + SCH_ERR_RET(schChkUpdateJobStatus(pJob, status)); + + schUpdateJobErrCode(pJob, errCode); + + if (atomic_load_8(&pJob->userFetch) || pJob->attr.syncSchedule) { + tsem_post(&pJob->rspSem); + } + + int32_t code = atomic_load_32(&pJob->errCode); + + SCH_JOB_DLOG("job failed with error: %s", tstrerror(code)); + + SCH_RET(code); +} + +// Note: no more task error processing, handled in function internal +int32_t schProcessOnJobFailure(SSchJob *pJob, int32_t errCode) { + SCH_RET(schProcessOnJobFailureImpl(pJob, JOB_TASK_STATUS_FAILED, errCode)); +} + +// Note: no more error processing, handled in function internal +int32_t schProcessOnJobDropped(SSchJob *pJob, int32_t errCode) { + SCH_RET(schProcessOnJobFailureImpl(pJob, JOB_TASK_STATUS_DROPPING, errCode)); +} + +// Note: no more task error processing, handled in function internal +int32_t schProcessOnJobPartialSuccess(SSchJob *pJob) { + int32_t code = 0; + + SCH_ERR_RET(schChkUpdateJobStatus(pJob, JOB_TASK_STATUS_PARTIAL_SUCCEED)); + + if (pJob->attr.syncSchedule) { + tsem_post(&pJob->rspSem); + } + + if (atomic_load_8(&pJob->userFetch)) { + SCH_ERR_JRET(schFetchFromRemote(pJob)); + } + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_RET(schProcessOnJobFailure(pJob, code)); +} + +void schProcessOnDataFetched(SSchJob *job) { + atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0); + tsem_post(&job->rspSem); +} + +// Note: no more task error processing, handled in function internal +int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode) { + int8_t status = 0; + + if (schJobNeedToStop(pJob, &status)) { + SCH_TASK_DLOG("task failed not processed cause of job status, job status:%s", jobTaskStatusStr(status)); + SCH_RET(atomic_load_32(&pJob->errCode)); + } + + bool needRetry = false; + bool moved = false; + int32_t taskDone = 0; + int32_t code = 0; + + SCH_TASK_DLOG("taskOnFailure, code:%s", tstrerror(errCode)); + + SCH_ERR_JRET(schTaskCheckSetRetry(pJob, pTask, errCode, &needRetry)); + + if (!needRetry) { + SCH_TASK_ELOG("task failed and no more retry, code:%s", tstrerror(errCode)); + + if (SCH_GET_TASK_STATUS(pTask) == JOB_TASK_STATUS_EXECUTING) { + SCH_ERR_JRET(schMoveTaskToFailList(pJob, pTask, &moved)); + } else { + SCH_TASK_ELOG("task not in executing list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_FAILED); + + if (SCH_IS_WAIT_ALL_JOB(pJob)) { + SCH_LOCK(SCH_WRITE, &pTask->level->lock); + pTask->level->taskFailed++; + taskDone = pTask->level->taskSucceed + pTask->level->taskFailed; + SCH_UNLOCK(SCH_WRITE, &pTask->level->lock); + + schUpdateJobErrCode(pJob, errCode); + + if (taskDone < pTask->level->taskNum) { + SCH_TASK_DLOG("need to wait other tasks, doneNum:%d, allNum:%d", taskDone, pTask->level->taskNum); + SCH_RET(errCode); + } + } + } else { + SCH_ERR_JRET(schHandleTaskRetry(pJob, pTask)); + + return TSDB_CODE_SUCCESS; + } + +_return: + + SCH_RET(schProcessOnJobFailure(pJob, errCode)); +} + +int32_t schLaunchNextLevelTasks(SSchJob *pJob, SSchTask *pTask) { + if (!SCH_IS_QUERY_JOB(pJob)) { + return TSDB_CODE_SUCCESS; + } + + SSchLevel *pLevel = pTask->level; + int32_t doneNum = atomic_add_fetch_32(&pLevel->taskDoneNum, 1); + if (doneNum == pLevel->taskNum) { + pJob->levelIdx--; + + pLevel = taosArrayGet(pJob->levels, pJob->levelIdx); + for (int32_t i = 0; i < pLevel->taskNum; ++i) { + SSchTask *pTask = taosArrayGet(pLevel->subTasks, i); + + if (pTask->children && taosArrayGetSize(pTask->children) > 0) { + continue; + } + + SCH_ERR_RET(schLaunchTask(pJob, pTask)); + } + } + + return TSDB_CODE_SUCCESS; +} + + +// Note: no more task error processing, handled in function internal +int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) { + bool moved = false; + int32_t code = 0; + + SCH_TASK_DLOG("taskOnSuccess, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + + SCH_ERR_JRET(schMoveTaskToSuccList(pJob, pTask, &moved)); + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_PARTIAL_SUCCEED); + + SCH_ERR_JRET(schRecordTaskSucceedNode(pJob, pTask)); + + SCH_ERR_JRET(schLaunchTasksInFlowCtrlList(pJob, pTask)); + + int32_t parentNum = pTask->parents ? (int32_t)taosArrayGetSize(pTask->parents) : 0; + if (parentNum == 0) { + int32_t taskDone = 0; + if (SCH_IS_WAIT_ALL_JOB(pJob)) { + SCH_LOCK(SCH_WRITE, &pTask->level->lock); + pTask->level->taskSucceed++; + taskDone = pTask->level->taskSucceed + pTask->level->taskFailed; + SCH_UNLOCK(SCH_WRITE, &pTask->level->lock); + + if (taskDone < pTask->level->taskNum) { + SCH_TASK_DLOG("wait all tasks, done:%d, all:%d", taskDone, pTask->level->taskNum); + return TSDB_CODE_SUCCESS; + } else if (taskDone > pTask->level->taskNum) { + SCH_TASK_ELOG("taskDone number invalid, done:%d, total:%d", taskDone, pTask->level->taskNum); + } + + if (pTask->level->taskFailed > 0) { + SCH_RET(schProcessOnJobFailure(pJob, 0)); + } else { + SCH_RET(schProcessOnJobPartialSuccess(pJob)); + } + } else { + pJob->resNode = pTask->succeedAddr; + } + + pJob->fetchTask = pTask; + + SCH_ERR_JRET(schMoveTaskToExecList(pJob, pTask, &moved)); + + SCH_RET(schProcessOnJobPartialSuccess(pJob)); + } + + /* + if (SCH_IS_DATA_SRC_TASK(task) && job->dataSrcEps.numOfEps < SCH_MAX_CANDIDATE_EP_NUM) { + strncpy(job->dataSrcEps.fqdn[job->dataSrcEps.numOfEps], task->execAddr.fqdn, sizeof(task->execAddr.fqdn)); + job->dataSrcEps.port[job->dataSrcEps.numOfEps] = task->execAddr.port; + + ++job->dataSrcEps.numOfEps; + } + */ + + for (int32_t i = 0; i < parentNum; ++i) { + SSchTask *par = *(SSchTask **)taosArrayGet(pTask->parents, i); + int32_t readyNum = atomic_add_fetch_32(&par->childReady, 1); + + SCH_LOCK(SCH_WRITE, &par->lock); + SDownstreamSourceNode source = {.type = QUERY_NODE_DOWNSTREAM_SOURCE, + .taskId = pTask->taskId, + .schedId = schMgmt.sId, + .addr = pTask->succeedAddr}; + qSetSubplanExecutionNode(par->plan, pTask->plan->id.groupId, &source); + SCH_UNLOCK(SCH_WRITE, &par->lock); + + if (SCH_TASK_READY_FOR_LAUNCH(readyNum, par)) { + SCH_ERR_RET(schLaunchTask(pJob, par)); + } + } + + SCH_ERR_RET(schLaunchNextLevelTasks(pJob, pTask)); + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_RET(schProcessOnJobFailure(pJob, code)); +} + +// Note: no more error processing, handled in function internal +int32_t schFetchFromRemote(SSchJob *pJob) { + int32_t code = 0; + + if (atomic_val_compare_exchange_32(&pJob->remoteFetch, 0, 1) != 0) { + SCH_JOB_ELOG("prior fetching not finished, remoteFetch:%d", atomic_load_32(&pJob->remoteFetch)); + return TSDB_CODE_SUCCESS; + } + + void *resData = atomic_load_ptr(&pJob->resData); + if (resData) { + atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); + + SCH_JOB_DLOG("res already fetched, res:%p", resData); + return TSDB_CODE_SUCCESS; + } + + SCH_ERR_JRET(schBuildAndSendMsg(pJob, pJob->fetchTask, &pJob->resNode, TDMT_VND_FETCH)); + + return TSDB_CODE_SUCCESS; + +_return: + + atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); + + SCH_RET(schProcessOnTaskFailure(pJob, pJob->fetchTask, code)); +} + +int32_t schProcessOnExplainDone(SSchJob *pJob, SSchTask *pTask, SRetrieveTableRsp *pRsp) { + SCH_TASK_DLOG("got explain rsp, rows:%d, complete:%d", htonl(pRsp->numOfRows), pRsp->completed); + + atomic_store_32(&pJob->resNumOfRows, htonl(pRsp->numOfRows)); + atomic_store_ptr(&pJob->resData, pRsp); + + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_SUCCEED); + + schProcessOnDataFetched(pJob); + + return TSDB_CODE_SUCCESS; +} + +int32_t schSaveJobQueryRes(SSchJob *pJob, SResReadyRsp *rsp) { + if (rsp->tbFName[0]) { + if (NULL == pJob->queryRes) { + pJob->queryRes = taosArrayInit(pJob->taskNum, sizeof(STbVerInfo)); + if (NULL == pJob->queryRes) { + SCH_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); + } + } + + STbVerInfo tbInfo; + strcpy(tbInfo.tbFName, rsp->tbFName); + tbInfo.sversion = rsp->sversion; + tbInfo.tversion = rsp->tversion; + + taosArrayPush((SArray *)pJob->queryRes, &tbInfo); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t schGetTaskFromTaskList(SHashObj *pTaskList, uint64_t taskId, SSchTask **pTask) { + int32_t s = taosHashGetSize(pTaskList); + if (s <= 0) { + return TSDB_CODE_SUCCESS; + } + + SSchTask **task = taosHashGet(pTaskList, &taskId, sizeof(taskId)); + if (NULL == task || NULL == (*task)) { + return TSDB_CODE_SUCCESS; + } + + *pTask = *task; + + return TSDB_CODE_SUCCESS; +} + +int32_t schUpdateTaskExecNodeHandle(SSchTask *pTask, void *handle, int32_t rspCode) { + if (rspCode || NULL == pTask->execNodes || taosArrayGetSize(pTask->execNodes) > 1 || + taosArrayGetSize(pTask->execNodes) <= 0) { + return TSDB_CODE_SUCCESS; + } + + SSchNodeInfo *nodeInfo = taosArrayGet(pTask->execNodes, 0); + nodeInfo->handle = handle; + + return TSDB_CODE_SUCCESS; +} + +int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask) { + int8_t status = 0; + int32_t code = 0; + + atomic_add_fetch_32(&pTask->level->taskLaunchedNum, 1); + + if (schJobNeedToStop(pJob, &status)) { + SCH_TASK_DLOG("no need to launch task cause of job status, job status:%s", jobTaskStatusStr(status)); + + SCH_RET(atomic_load_32(&pJob->errCode)); + } + + // NOTE: race condition: the task should be put into the hash table before send msg to server + if (SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_EXECUTING) { + SCH_ERR_RET(schPushTaskToExecList(pJob, pTask)); + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_EXECUTING); + } + + SSubplan *plan = pTask->plan; + + if (NULL == pTask->msg) { // TODO add more detailed reason for failure + code = qSubPlanToString(plan, &pTask->msg, &pTask->msgLen); + if (TSDB_CODE_SUCCESS != code) { + SCH_TASK_ELOG("failed to create physical plan, code:%s, msg:%p, len:%d", tstrerror(code), pTask->msg, + pTask->msgLen); + SCH_ERR_RET(code); + } else { + SCH_TASK_DLOGL("physical plan len:%d, %s", pTask->msgLen, pTask->msg); + } + } + + SCH_ERR_RET(schSetTaskCandidateAddrs(pJob, pTask)); + + if (SCH_IS_QUERY_JOB(pJob)) { + SCH_ERR_RET(schEnsureHbConnection(pJob, pTask)); + } + + SCH_ERR_RET(schBuildAndSendMsg(pJob, pTask, NULL, plan->msgType)); + + return TSDB_CODE_SUCCESS; +} + +// Note: no more error processing, handled in function internal +int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) { + bool enough = false; + int32_t code = 0; + + SCH_SET_TASK_HANDLE(pTask, NULL); + + if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { + SCH_ERR_JRET(schCheckIncTaskFlowQuota(pJob, pTask, &enough)); + + if (enough) { + SCH_ERR_JRET(schLaunchTaskImpl(pJob, pTask)); + } + } else { + SCH_ERR_JRET(schLaunchTaskImpl(pJob, pTask)); + } + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_RET(schProcessOnTaskFailure(pJob, pTask, code)); +} + +int32_t schLaunchLevelTasks(SSchJob *pJob, SSchLevel *level) { + for (int32_t i = 0; i < level->taskNum; ++i) { + SSchTask *pTask = taosArrayGet(level->subTasks, i); + + SCH_ERR_RET(schLaunchTask(pJob, pTask)); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t schLaunchJob(SSchJob *pJob) { + SSchLevel *level = taosArrayGet(pJob->levels, pJob->levelIdx); + + SCH_ERR_RET(schChkUpdateJobStatus(pJob, JOB_TASK_STATUS_EXECUTING)); + + SCH_ERR_RET(schChkJobNeedFlowCtrl(pJob, level)); + + SCH_ERR_RET(schLaunchLevelTasks(pJob, level)); + + return TSDB_CODE_SUCCESS; +} + +void schDropTaskOnExecNode(SSchJob *pJob, SSchTask *pTask) { + if (NULL == pTask->execNodes) { + SCH_TASK_DLOG("no exec address, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + return; + } + + int32_t size = (int32_t)taosArrayGetSize(pTask->execNodes); + + if (size <= 0) { + SCH_TASK_DLOG("task has no execNodes, no need to drop it, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + return; + } + + SSchNodeInfo *nodeInfo = NULL; + for (int32_t i = 0; i < size; ++i) { + nodeInfo = (SSchNodeInfo *)taosArrayGet(pTask->execNodes, i); + SCH_SET_TASK_HANDLE(pTask, nodeInfo->handle); + + schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_VND_DROP_TASK); + } + + SCH_TASK_DLOG("task has %d exec address", size); +} + +void schDropTaskInHashList(SSchJob *pJob, SHashObj *list) { + if (!SCH_IS_NEED_DROP_JOB(pJob)) { + return; + } + + void *pIter = taosHashIterate(list, NULL); + while (pIter) { + SSchTask *pTask = *(SSchTask **)pIter; + + schDropTaskOnExecNode(pJob, pTask); + + pIter = taosHashIterate(list, pIter); + } +} + +void schDropJobAllTasks(SSchJob *pJob) { + schDropTaskInHashList(pJob, pJob->execTasks); + schDropTaskInHashList(pJob, pJob->succTasks); + schDropTaskInHashList(pJob, pJob->failTasks); +} + +int32_t schCancelJob(SSchJob *pJob) { + // TODO + return TSDB_CODE_SUCCESS; + // TODO MOVE ALL TASKS FROM EXEC LIST TO FAIL LIST +} + +void schFreeJobImpl(void *job) { + if (NULL == job) { + return; + } + + SSchJob *pJob = job; + uint64_t queryId = pJob->queryId; + int64_t refId = pJob->refId; + + if (pJob->status == JOB_TASK_STATUS_EXECUTING) { + schCancelJob(pJob); + } + + schDropJobAllTasks(pJob); + + pJob->subPlans = NULL; // it is a reference to pDag->pSubplans + + int32_t numOfLevels = taosArrayGetSize(pJob->levels); + for (int32_t i = 0; i < numOfLevels; ++i) { + SSchLevel *pLevel = taosArrayGet(pJob->levels, i); + + int32_t numOfTasks = taosArrayGetSize(pLevel->subTasks); + for (int32_t j = 0; j < numOfTasks; ++j) { + SSchTask *pTask = taosArrayGet(pLevel->subTasks, j); + schFreeTask(pTask); + } + + taosArrayDestroy(pLevel->subTasks); + } + + schFreeFlowCtrl(pJob); + + taosHashCleanup(pJob->execTasks); + taosHashCleanup(pJob->failTasks); + taosHashCleanup(pJob->succTasks); + + taosArrayDestroy(pJob->levels); + taosArrayDestroy(pJob->nodeList); + taosArrayDestroy(pJob->dataSrcTasks); + + qExplainFreeCtx(pJob->explainCtx); + + if (SCH_IS_QUERY_JOB(pJob)) { + taosArrayDestroy((SArray *)pJob->queryRes); + } else { + tFreeSSubmitRsp((SSubmitRsp*)pJob->queryRes); + } + + taosMemoryFreeClear(pJob->resData); + taosMemoryFreeClear(pJob); + + qDebug("QID:0x%" PRIx64 " job freed, refId:%" PRIx64 ", pointer:%p", queryId, refId, pJob); + + atomic_sub_fetch_32(&schMgmt.jobNum, 1); + + schCloseJobRef(); +} + +int32_t schExecJobImpl(void *transport, SArray *pNodeList, SQueryPlan *pDag, int64_t *job, const char *sql, + int64_t startTs, bool sync) { + qDebug("QID:0x%" PRIx64 " job started", pDag->queryId); + + if (pNodeList == NULL || taosArrayGetSize(pNodeList) <= 0) { + qDebug("QID:0x%" PRIx64 " input exec nodeList is empty", pDag->queryId); + } + + int32_t code = 0; + SSchJob *pJob = NULL; + SCH_ERR_JRET(schInitJob(&pJob, pDag, transport, pNodeList, sql, startTs, sync)); + + SCH_ERR_JRET(schLaunchJob(pJob)); + + *job = pJob->refId; + + if (sync) { + SCH_JOB_DLOG("will wait for rsp now, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); + tsem_wait(&pJob->rspSem); + } + + SCH_JOB_DLOG("job exec done, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); + + schReleaseJob(pJob->refId); + + return TSDB_CODE_SUCCESS; + +_return: + + schFreeJobImpl(pJob); + SCH_RET(code); +} + +int32_t schExecStaticExplain(void *transport, SArray *pNodeList, SQueryPlan *pDag, int64_t *job, const char *sql, + bool syncSchedule) { + qDebug("QID:0x%" PRIx64 " job started", pDag->queryId); + + int32_t code = 0; + SSchJob *pJob = taosMemoryCalloc(1, sizeof(SSchJob)); + if (NULL == pJob) { + qError("QID:%" PRIx64 " calloc %d failed", pDag->queryId, (int32_t)sizeof(SSchJob)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pJob->sql = sql; + pJob->attr.queryJob = true; + pJob->attr.explainMode = pDag->explainInfo.mode; + pJob->queryId = pDag->queryId; + pJob->subPlans = pDag->pSubplans; + + SCH_ERR_JRET(qExecStaticExplain(pDag, (SRetrieveTableRsp **)&pJob->resData)); + + int64_t refId = taosAddRef(schMgmt.jobRef, pJob); + if (refId < 0) { + SCH_JOB_ELOG("taosAddRef job failed, error:%s", tstrerror(terrno)); + SCH_ERR_JRET(terrno); + } + + if (NULL == schAcquireJob(refId)) { + SCH_JOB_ELOG("schAcquireJob job failed, refId:%" PRIx64, refId); + SCH_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + pJob->refId = refId; + + SCH_JOB_DLOG("job refId:%" PRIx64, pJob->refId); + + pJob->status = JOB_TASK_STATUS_PARTIAL_SUCCEED; + *job = pJob->refId; + SCH_JOB_DLOG("job exec done, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); + + schReleaseJob(pJob->refId); + + return TSDB_CODE_SUCCESS; + +_return: + + schFreeJobImpl(pJob); + SCH_RET(code); +} + + diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c new file mode 100644 index 0000000000..6d9f6b435f --- /dev/null +++ b/source/libs/scheduler/src/schRemote.c @@ -0,0 +1,1231 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "catalog.h" +#include "command.h" +#include "query.h" +#include "schedulerInt.h" +#include "tmsg.h" +#include "tref.h" +#include "trpc.h" + + +int32_t schValidateReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgType) { + int32_t lastMsgType = SCH_GET_TASK_LASTMSG_TYPE(pTask); + int32_t taskStatus = SCH_GET_TASK_STATUS(pTask); + int32_t reqMsgType = msgType - 1; + switch (msgType) { + case TDMT_SCH_LINK_BROKEN: + case TDMT_VND_EXPLAIN_RSP: + return TSDB_CODE_SUCCESS; + case TDMT_VND_QUERY_RSP: // query_rsp may be processed later than ready_rsp + if (lastMsgType != reqMsgType && -1 != lastMsgType && TDMT_VND_FETCH != lastMsgType) { + SCH_TASK_DLOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), + TMSG_INFO(msgType)); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_DLOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), + TMSG_INFO(msgType)); + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + return TSDB_CODE_SUCCESS; + case TDMT_VND_RES_READY_RSP: + reqMsgType = TDMT_VND_QUERY; + if (lastMsgType != reqMsgType && -1 != lastMsgType) { + SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", + (lastMsgType > 0 ? TMSG_INFO(lastMsgType) : "null"), TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), + TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + return TSDB_CODE_SUCCESS; + case TDMT_VND_FETCH_RSP: + if (lastMsgType != reqMsgType && -1 != lastMsgType) { + SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), + TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), + TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + return TSDB_CODE_SUCCESS; + case TDMT_VND_CREATE_TABLE_RSP: + case TDMT_VND_DROP_TABLE_RSP: + case TDMT_VND_ALTER_TABLE_RSP: + case TDMT_VND_SUBMIT_RSP: + break; + default: + SCH_TASK_ELOG("unknown rsp msg, type:%s, status:%s", TMSG_INFO(msgType), jobTaskStatusStr(taskStatus)); + SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); + } + + if (lastMsgType != reqMsgType) { + SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), + TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { + SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), + TMSG_INFO(msgType)); + SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + + return TSDB_CODE_SUCCESS; +} + +// Note: no more task error processing, handled in function internal +int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, char *msg, int32_t msgSize, + int32_t rspCode) { + int32_t code = 0; + int8_t status = 0; + + if (schJobNeedToStop(pJob, &status)) { + SCH_TASK_ELOG("rsp not processed cause of job status, job status:%s, rspCode:0x%x", jobTaskStatusStr(status), + rspCode); + SCH_RET(atomic_load_32(&pJob->errCode)); + } + + SCH_ERR_JRET(schValidateReceivedMsgType(pJob, pTask, msgType)); + + switch (msgType) { + case TDMT_VND_CREATE_TABLE_RSP: { + SVCreateTbBatchRsp batchRsp = {0}; + if (msg) { + SDecoder coder = {0}; + tDecoderInit(&coder, msg, msgSize); + code = tDecodeSVCreateTbBatchRsp(&coder, &batchRsp); + if (TSDB_CODE_SUCCESS == code && batchRsp.nRsps > 0) { + for (int32_t i = 0; i < batchRsp.nRsps; ++i) { + SVCreateTbRsp *rsp = batchRsp.pRsps + i; + if (TSDB_CODE_SUCCESS != rsp->code) { + code = rsp->code; + tDecoderClear(&coder); + SCH_ERR_JRET(code); + } + } + } + tDecoderClear(&coder); + SCH_ERR_JRET(code); + } + + SCH_ERR_JRET(rspCode); + SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); + break; + } + case TDMT_VND_DROP_TABLE_RSP: { + SVDropTbBatchRsp batchRsp = {0}; + if (msg) { + SDecoder coder = {0}; + tDecoderInit(&coder, msg, msgSize); + code = tDecodeSVDropTbBatchRsp(&coder, &batchRsp); + if (TSDB_CODE_SUCCESS == code && batchRsp.nRsps > 0) { + for (int32_t i = 0; i < batchRsp.nRsps; ++i) { + SVDropTbRsp *rsp = batchRsp.pRsps + i; + if (TSDB_CODE_SUCCESS != rsp->code) { + code = rsp->code; + tDecoderClear(&coder); + SCH_ERR_JRET(code); + } + } + } + tDecoderClear(&coder); + SCH_ERR_JRET(code); + } + + SCH_ERR_JRET(rspCode); + SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); + break; + } + case TDMT_VND_ALTER_TABLE_RSP: { + SVAlterTbRsp rsp = {0}; + if (msg) { + SDecoder coder = {0}; + tDecoderInit(&coder, msg, msgSize); + code = tDecodeSVAlterTbRsp(&coder, &rsp); + tDecoderClear(&coder); + SCH_ERR_JRET(code); + SCH_ERR_JRET(rsp.code); + } + + SCH_ERR_JRET(rspCode); + + if (NULL == msg) { + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); + break; + } + case TDMT_VND_SUBMIT_RSP: { + SCH_ERR_JRET(rspCode); + + if (msg) { + SDecoder coder = {0}; + SSubmitRsp *rsp = taosMemoryMalloc(sizeof(*rsp)); + tDecoderInit(&coder, msg, msgSize); + code = tDecodeSSubmitRsp(&coder, rsp); + if (code) { + SCH_TASK_ELOG("decode submitRsp failed, code:%d", code); + tFreeSSubmitRsp(rsp); + SCH_ERR_JRET(code); + } + + if (rsp->nBlocks > 0) { + for (int32_t i = 0; i < rsp->nBlocks; ++i) { + SSubmitBlkRsp *blk = rsp->pBlocks + i; + if (TSDB_CODE_SUCCESS != blk->code) { + code = blk->code; + tFreeSSubmitRsp(rsp); + SCH_ERR_JRET(code); + } + } + } + + atomic_add_fetch_32(&pJob->resNumOfRows, rsp->affectedRows); + SCH_TASK_DLOG("submit succeed, affectedRows:%d", rsp->affectedRows); + + SCH_LOCK(SCH_WRITE, &pJob->resLock); + if (pJob->queryRes) { + SSubmitRsp *sum = pJob->queryRes; + sum->affectedRows += rsp->affectedRows; + sum->nBlocks += rsp->nBlocks; + sum->pBlocks = taosMemoryRealloc(sum->pBlocks, sum->nBlocks * sizeof(*sum->pBlocks)); + memcpy(sum->pBlocks + sum->nBlocks - rsp->nBlocks, rsp->pBlocks, rsp->nBlocks * sizeof(*sum->pBlocks)); + taosMemoryFree(rsp->pBlocks); + taosMemoryFree(rsp); + } else { + pJob->queryRes = rsp; + } + SCH_UNLOCK(SCH_WRITE, &pJob->resLock); + } + + SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); + + break; + } + case TDMT_VND_QUERY_RSP: { + SQueryTableRsp rsp = {0}; + if (msg) { + SCH_ERR_JRET(tDeserializeSQueryTableRsp(msg, msgSize, &rsp)); + SCH_ERR_JRET(rsp.code); + } + + SCH_ERR_JRET(rspCode); + + if (NULL == msg) { + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + // SCH_ERR_JRET(schBuildAndSendMsg(pJob, pTask, NULL, TDMT_VND_RES_READY)); + + break; + } + case TDMT_VND_RES_READY_RSP: { + SResReadyRsp *rsp = (SResReadyRsp *)msg; + + SCH_ERR_JRET(rspCode); + if (NULL == msg) { + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + SCH_ERR_JRET(rsp->code); + + SCH_ERR_JRET(schSaveJobQueryRes(pJob, rsp)); + + SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); + + break; + } + case TDMT_VND_EXPLAIN_RSP: { + SCH_ERR_JRET(rspCode); + if (NULL == msg) { + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + if (!SCH_IS_EXPLAIN_JOB(pJob)) { + SCH_TASK_ELOG("invalid msg received for none explain query, msg type:%s", TMSG_INFO(msgType)); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + if (pJob->resData) { + SCH_TASK_ELOG("explain result is already generated, res:%p", pJob->resData); + SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); + } + + SExplainRsp rsp = {0}; + if (tDeserializeSExplainRsp(msg, msgSize, &rsp)) { + taosMemoryFree(rsp.subplanInfo); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SRetrieveTableRsp *pRsp = NULL; + SCH_ERR_JRET(qExplainUpdateExecInfo(pJob->explainCtx, &rsp, pTask->plan->id.groupId, &pRsp)); + + if (pRsp) { + SCH_ERR_JRET(schProcessOnExplainDone(pJob, pTask, pRsp)); + } + break; + } + case TDMT_VND_FETCH_RSP: { + SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)msg; + + SCH_ERR_JRET(rspCode); + if (NULL == msg) { + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + if (SCH_IS_EXPLAIN_JOB(pJob)) { + if (rsp->completed) { + SRetrieveTableRsp *pRsp = NULL; + SCH_ERR_JRET(qExecExplainEnd(pJob->explainCtx, &pRsp)); + if (pRsp) { + SCH_ERR_JRET(schProcessOnExplainDone(pJob, pTask, pRsp)); + } + + return TSDB_CODE_SUCCESS; + } + + atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); + + SCH_ERR_JRET(schFetchFromRemote(pJob)); + + return TSDB_CODE_SUCCESS; + } + + if (pJob->resData) { + SCH_TASK_ELOG("got fetch rsp while res already exists, res:%p", pJob->resData); + taosMemoryFreeClear(rsp); + SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); + } + + atomic_store_ptr(&pJob->resData, rsp); + atomic_add_fetch_32(&pJob->resNumOfRows, htonl(rsp->numOfRows)); + + if (rsp->completed) { + SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_SUCCEED); + } + + SCH_TASK_DLOG("got fetch rsp, rows:%d, complete:%d", htonl(rsp->numOfRows), rsp->completed); + + schProcessOnDataFetched(pJob); + break; + } + case TDMT_VND_DROP_TASK_RSP: { + // SHOULD NEVER REACH HERE + SCH_TASK_ELOG("invalid status to handle drop task rsp, refId:%" PRIx64, pJob->refId); + SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); + break; + } + case TDMT_SCH_LINK_BROKEN: + SCH_TASK_ELOG("link broken received, error:%x - %s", rspCode, tstrerror(rspCode)); + SCH_ERR_JRET(rspCode); + break; + default: + SCH_TASK_ELOG("unknown rsp msg, type:%d, status:%s", msgType, SCH_GET_TASK_STATUS_STR(pTask)); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_RET(schProcessOnTaskFailure(pJob, pTask, code)); +} + + +int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, int32_t rspCode) { + int32_t code = 0; + SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; + SSchTask *pTask = NULL; + + SSchJob *pJob = schAcquireJob(pParam->refId); + if (NULL == pJob) { + qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 "taosAcquireRef job failed, may be dropped, refId:%" PRIx64, + pParam->queryId, pParam->taskId, pParam->refId); + SCH_ERR_JRET(TSDB_CODE_QRY_JOB_FREED); + } + + schGetTaskFromTaskList(pJob->execTasks, pParam->taskId, &pTask); + if (NULL == pTask) { + if (TDMT_VND_EXPLAIN_RSP == msgType) { + schGetTaskFromTaskList(pJob->succTasks, pParam->taskId, &pTask); + } else { + SCH_JOB_ELOG("task not found in execTask list, refId:%" PRIx64 ", taskId:%" PRIx64, pParam->refId, + pParam->taskId); + SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + } + + if (NULL == pTask) { + SCH_JOB_ELOG("task not found in execList & succList, refId:%" PRIx64 ", taskId:%" PRIx64, pParam->refId, + pParam->taskId); + SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); + } + + SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode)); + + SCH_SET_TASK_HANDLE(pTask, pMsg->handle); + schUpdateTaskExecNodeHandle(pTask, pMsg->handle, rspCode); + + SCH_ERR_JRET(schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode)); + +_return: + if (pJob) { + schReleaseJob(pParam->refId); + } + + taosMemoryFreeClear(param); + SCH_RET(code); +} + +int32_t schHandleSubmitCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_SUBMIT_RSP, code); +} + +int32_t schHandleCreateTbCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_CREATE_TABLE_RSP, code); +} + +int32_t schHandleDropTbCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_DROP_TABLE_RSP, code); +} + +int32_t schHandleAlterTbCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_ALTER_TABLE_RSP, code); +} + +int32_t schHandleQueryCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_QUERY_RSP, code); +} + +int32_t schHandleFetchCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_FETCH_RSP, code); +} + +int32_t schHandleReadyCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_RES_READY_RSP, code); +} + +int32_t schHandleExplainCallback(void *param, const SDataBuf *pMsg, int32_t code) { + return schHandleCallback(param, pMsg, TDMT_VND_EXPLAIN_RSP, code); +} + +int32_t schHandleDropCallback(void *param, const SDataBuf *pMsg, int32_t code) { + SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; + qDebug("QID:%" PRIx64 ",TID:%" PRIx64 " drop task rsp received, code:%x", pParam->queryId, pParam->taskId, code); + return TSDB_CODE_SUCCESS; +} + +int32_t schHandleLinkBrokenCallback(void *param, const SDataBuf *pMsg, int32_t code) { + SSchCallbackParamHeader *head = (SSchCallbackParamHeader *)param; + rpcReleaseHandle(pMsg->handle, TAOS_CONN_CLIENT); + + qDebug("handle %p is broken", pMsg->handle); + + if (head->isHbParam) { + SSchHbCallbackParam *hbParam = (SSchHbCallbackParam *)param; + SSchTrans trans = {.transInst = hbParam->transport, .transHandle = NULL}; + SCH_ERR_RET(schUpdateHbConnection(&hbParam->nodeEpId, &trans)); + + SCH_ERR_RET(schBuildAndSendHbMsg(&hbParam->nodeEpId)); + } else { + SCH_ERR_RET(schHandleCallback(param, pMsg, TDMT_SCH_LINK_BROKEN, code)); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t schGenerateCallBackInfo(SSchJob *pJob, SSchTask *pTask, int32_t msgType, SMsgSendInfo **pMsgSendInfo) { + int32_t code = 0; + SMsgSendInfo *msgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (NULL == msgSendInfo) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); + + param->queryId = pJob->queryId; + param->refId = pJob->refId; + param->taskId = SCH_TASK_ID(pTask); + param->transport = pJob->transport; + + msgSendInfo->param = param; + msgSendInfo->fp = fp; + + *pMsgSendInfo = msgSendInfo; + + return TSDB_CODE_SUCCESS; + +_return: + + taosMemoryFree(param); + taosMemoryFree(msgSendInfo); + + SCH_RET(code); +} + + +int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { + switch (msgType) { + case TDMT_VND_CREATE_TABLE: + *fp = schHandleCreateTbCallback; + break; + case TDMT_VND_DROP_TABLE: + *fp = schHandleDropTbCallback; + break; + case TDMT_VND_ALTER_TABLE: + *fp = schHandleAlterTbCallback; + break; + case TDMT_VND_SUBMIT: + *fp = schHandleSubmitCallback; + break; + case TDMT_VND_QUERY: + *fp = schHandleQueryCallback; + break; + case TDMT_VND_RES_READY: + *fp = schHandleReadyCallback; + break; + case TDMT_VND_EXPLAIN: + *fp = schHandleExplainCallback; + break; + case TDMT_VND_FETCH: + *fp = schHandleFetchCallback; + break; + case TDMT_VND_DROP_TASK: + *fp = schHandleDropCallback; + break; + case TDMT_VND_QUERY_HEARTBEAT: + *fp = schHandleHbCallback; + break; + case TDMT_SCH_LINK_BROKEN: + *fp = schHandleLinkBrokenCallback; + break; + default: + qError("unknown msg type for callback, msgType:%d", msgType); + SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); + } + + return TSDB_CODE_SUCCESS; +} + + +int32_t schMakeHbCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { + SSchHbCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchHbCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param->head.isHbParam = true; + + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + + param->nodeEpId.nodeId = addr->nodeId; + memcpy(¶m->nodeEpId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); + param->transport = pJob->transport; + + *pParam = param; + + return TSDB_CODE_SUCCESS; +} + +int32_t schCloneHbRpcCtx(SRpcCtx *pSrc, SRpcCtx *pDst) { + int32_t code = 0; + memcpy(&pDst->brokenVal, &pSrc->brokenVal, sizeof(pSrc->brokenVal)); + pDst->brokenVal.val = NULL; + + SCH_ERR_RET(schCloneSMsgSendInfo(pSrc->brokenVal.val, &pDst->brokenVal.val)); + + pDst->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (NULL == pDst->args) { + qError("taosHashInit %d RpcCtx failed", 1); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SRpcCtxVal dst = {0}; + void *pIter = taosHashIterate(pSrc->args, NULL); + while (pIter) { + SRpcCtxVal *pVal = (SRpcCtxVal *)pIter; + int32_t *msgType = taosHashGetKey(pIter, NULL); + + dst = *pVal; + dst.val = NULL; + + SCH_ERR_JRET(schCloneSMsgSendInfo(pVal->val, &dst.val)); + + if (taosHashPut(pDst->args, msgType, sizeof(*msgType), &dst, sizeof(dst))) { + qError("taosHashPut msg %d to rpcCtx failed", *msgType); + (*dst.freeFunc)(dst.val); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pIter = taosHashIterate(pSrc->args, pIter); + } + + return TSDB_CODE_SUCCESS; + +_return: + + schFreeRpcCtx(pDst); + SCH_RET(code); +} + + +int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { + int32_t code = 0; + SSchHbCallbackParam *param = NULL; + SMsgSendInfo *pMsgSendInfo = NULL; + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + SQueryNodeEpId epId = {0}; + + epId.nodeId = addr->nodeId; + memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); + + pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (NULL == pCtx->args) { + SCH_TASK_ELOG("taosHashInit %d RpcCtx failed", 1); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param = taosMemoryCalloc(1, sizeof(SSchHbCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + int32_t msgType = TDMT_VND_QUERY_HEARTBEAT_RSP; + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(TDMT_VND_QUERY_HEARTBEAT, &fp)); + + param->nodeEpId = epId; + param->transport = pJob->transport; + + pMsgSendInfo->param = param; + pMsgSendInfo->fp = fp; + + SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .freeFunc = schFreeRpcCtxVal}; + if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { + SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_ERR_JRET(schMakeBrokenLinkVal(pJob, pTask, &pCtx->brokenVal, true)); + + return TSDB_CODE_SUCCESS; + +_return: + + taosHashCleanup(pCtx->args); + taosMemoryFreeClear(param); + taosMemoryFreeClear(pMsgSendInfo); + + SCH_RET(code); +} + +int32_t schRegisterHbConnection(SSchJob *pJob, SSchTask *pTask, SQueryNodeEpId *epId, bool *exist) { + int32_t code = 0; + SSchHbTrans hb = {0}; + + hb.trans.transInst = pJob->transport; + + SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &hb.rpcCtx)); + + code = taosHashPut(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId), &hb, sizeof(SSchHbTrans)); + if (code) { + schFreeRpcCtx(&hb.rpcCtx); + + if (HASH_NODE_EXIST(code)) { + *exist = true; + return TSDB_CODE_SUCCESS; + } + + qError("taosHashPut hb trans failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); + SCH_ERR_RET(code); + } + + return TSDB_CODE_SUCCESS; +} + + +int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { + SSchedulerHbReq req = {0}; + int32_t code = 0; + SRpcCtx rpcCtx = {0}; + SSchTrans trans = {0}; + int32_t msgType = TDMT_VND_QUERY_HEARTBEAT; + + req.header.vgId = nodeEpId->nodeId; + req.sId = schMgmt.sId; + memcpy(&req.epId, nodeEpId, sizeof(SQueryNodeEpId)); + + SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, nodeEpId, sizeof(SQueryNodeEpId)); + if (NULL == hb) { + qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", nodeEpId->nodeId, nodeEpId->ep.fqdn, + nodeEpId->ep.port); + SCH_ERR_RET(code); + } + + SCH_LOCK(SCH_WRITE, &hb->lock); + code = schCloneHbRpcCtx(&hb->rpcCtx, &rpcCtx); + memcpy(&trans, &hb->trans, sizeof(trans)); + SCH_UNLOCK(SCH_WRITE, &hb->lock); + + SCH_ERR_RET(code); + + int32_t msgSize = tSerializeSSchedulerHbReq(NULL, 0, &req); + if (msgSize < 0) { + qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + void *msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + qError("calloc hb req %d failed", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + if (tSerializeSSchedulerHbReq(msg, msgSize, &req) < 0) { + qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SMsgSendInfo *pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + qError("calloc SMsgSendInfo failed"); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); + if (NULL == param) { + qError("calloc SSchTaskCallbackParam failed"); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); + + param->transport = trans.transInst; + + pMsgSendInfo->param = param; + pMsgSendInfo->msgInfo.pData = msg; + pMsgSendInfo->msgInfo.len = msgSize; + pMsgSendInfo->msgInfo.handle = trans.transHandle; + pMsgSendInfo->msgType = msgType; + pMsgSendInfo->fp = fp; + + int64_t transporterId = 0; + SEpSet epSet = {.inUse = 0, .numOfEps = 1}; + memcpy(&epSet.eps[0], &nodeEpId->ep, sizeof(nodeEpId->ep)); + + qDebug("start to send hb msg, instance:%p, handle:%p, fqdn:%s, port:%d", trans.transInst, trans.transHandle, + nodeEpId->ep.fqdn, nodeEpId->ep.port); + + code = asyncSendMsgToServerExt(trans.transInst, &epSet, &transporterId, pMsgSendInfo, true, &rpcCtx); + if (code) { + qError("fail to send hb msg, instance:%p, handle:%p, fqdn:%s, port:%d, error:%x - %s", trans.transInst, + trans.transHandle, nodeEpId->ep.fqdn, nodeEpId->ep.port, code, tstrerror(code)); + SCH_ERR_JRET(code); + } + + qDebug("hb msg sent"); + return TSDB_CODE_SUCCESS; + +_return: + + taosMemoryFreeClear(msg); + taosMemoryFreeClear(param); + taosMemoryFreeClear(pMsgSendInfo); + schFreeRpcCtx(&rpcCtx); + SCH_RET(code); +} + + +int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { + SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + SQueryNodeEpId epId = {0}; + + epId.nodeId = addr->nodeId; + memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); + + SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId)); + if (NULL == hb) { + bool exist = false; + SCH_ERR_RET(schRegisterHbConnection(pJob, pTask, &epId, &exist)); + if (!exist) { + SCH_ERR_RET(schBuildAndSendHbMsg(&epId)); + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans) { + int32_t code = 0; + SSchHbTrans *hb = NULL; + + hb = taosHashGet(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId)); + if (NULL == hb) { + qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); + SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); + } + + SCH_LOCK(SCH_WRITE, &hb->lock); + memcpy(&hb->trans, trans, sizeof(*trans)); + SCH_UNLOCK(SCH_WRITE, &hb->lock); + + qDebug("hb connection updated, sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, instance:%p, handle:%p", schMgmt.sId, + epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->transInst, trans->transHandle); + + return TSDB_CODE_SUCCESS; +} + +int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { + SSchedulerHbRsp rsp = {0}; + SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; + + if (code) { + qError("hb rsp error:%s", tstrerror(code)); + SCH_ERR_JRET(code); + } + + if (tDeserializeSSchedulerHbRsp(pMsg->pData, pMsg->len, &rsp)) { + qError("invalid hb rsp msg, size:%d", pMsg->len); + SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); + } + + SSchTrans trans = {0}; + trans.transInst = pParam->transport; + trans.transHandle = pMsg->handle; + + SCH_ERR_JRET(schUpdateHbConnection(&rsp.epId, &trans)); + + int32_t taskNum = (int32_t)taosArrayGetSize(rsp.taskStatus); + qDebug("%d task status in hb rsp, nodeId:%d, fqdn:%s, port:%d", taskNum, rsp.epId.nodeId, rsp.epId.ep.fqdn, + rsp.epId.ep.port); + + for (int32_t i = 0; i < taskNum; ++i) { + STaskStatus *taskStatus = taosArrayGet(rsp.taskStatus, i); + + SSchJob *pJob = schAcquireJob(taskStatus->refId); + if (NULL == pJob) { + qWarn("job not found, refId:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64, taskStatus->refId, + taskStatus->queryId, taskStatus->taskId); + // TODO DROP TASK FROM SERVER!!!! + continue; + } + + // TODO + + SCH_JOB_DLOG("TID:0x%" PRIx64 " task status in server: %s", taskStatus->taskId, + jobTaskStatusStr(taskStatus->status)); + + schReleaseJob(taskStatus->refId); + } + +_return: + + tFreeSSchedulerHbRsp(&rsp); + taosMemoryFree(param); + + SCH_RET(code); +} + +int32_t schMakeCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { + SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); + if (NULL == param) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + param->queryId = pJob->queryId; + param->refId = pJob->refId; + param->taskId = SCH_TASK_ID(pTask); + param->transport = pJob->transport; + + *pParam = param; + + return TSDB_CODE_SUCCESS; +} + +int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal *brokenVal, bool isHb) { + int32_t code = 0; + SMsgSendInfo *pMsgSendInfo = NULL; + + pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (NULL == pMsgSendInfo) { + SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + if (isHb) { + SCH_ERR_JRET(schMakeHbCallbackParam(pJob, pTask, &pMsgSendInfo->param)); + } else { + SCH_ERR_JRET(schMakeCallbackParam(pJob, pTask, &pMsgSendInfo->param)); + } + + int32_t msgType = TDMT_SCH_LINK_BROKEN; + __async_send_cb_fn_t fp = NULL; + SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); + + pMsgSendInfo->fp = fp; + + brokenVal->msgType = msgType; + brokenVal->val = pMsgSendInfo; + brokenVal->clone = schCloneSMsgSendInfo; + brokenVal->freeFunc = schFreeRpcCtxVal; + + return TSDB_CODE_SUCCESS; + +_return: + + taosMemoryFreeClear(pMsgSendInfo->param); + taosMemoryFreeClear(pMsgSendInfo); + + SCH_RET(code); +} + +int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { + int32_t code = 0; + SMsgSendInfo *pReadyMsgSendInfo = NULL; + SMsgSendInfo *pExplainMsgSendInfo = NULL; + + pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (NULL == pCtx->args) { + SCH_TASK_ELOG("taosHashInit %d RpcCtx failed", 1); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, TDMT_VND_RES_READY, &pReadyMsgSendInfo)); + SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, TDMT_VND_EXPLAIN, &pExplainMsgSendInfo)); + + int32_t msgType = TDMT_VND_RES_READY_RSP; + SRpcCtxVal ctxVal = {.val = pReadyMsgSendInfo, .clone = schCloneSMsgSendInfo, .freeFunc = schFreeRpcCtxVal}; + if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { + SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + msgType = TDMT_VND_EXPLAIN_RSP; + ctxVal.val = pExplainMsgSendInfo; + if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { + SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SCH_ERR_JRET(schMakeBrokenLinkVal(pJob, pTask, &pCtx->brokenVal, false)); + + return TSDB_CODE_SUCCESS; + +_return: + + taosHashCleanup(pCtx->args); + + if (pReadyMsgSendInfo) { + taosMemoryFreeClear(pReadyMsgSendInfo->param); + taosMemoryFreeClear(pReadyMsgSendInfo); + } + + if (pExplainMsgSendInfo) { + taosMemoryFreeClear(pExplainMsgSendInfo->param); + taosMemoryFreeClear(pExplainMsgSendInfo); + } + + SCH_RET(code); +} + +int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHeader **pDst) { + if (pSrc->isHbParam) { + SSchHbCallbackParam *dst = taosMemoryMalloc(sizeof(SSchHbCallbackParam)); + if (NULL == dst) { + qError("malloc SSchHbCallbackParam failed"); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(dst, pSrc, sizeof(*dst)); + *pDst = (SSchCallbackParamHeader *)dst; + + return TSDB_CODE_SUCCESS; + } + + SSchTaskCallbackParam *dst = taosMemoryMalloc(sizeof(SSchTaskCallbackParam)); + if (NULL == dst) { + qError("malloc SSchTaskCallbackParam failed"); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(dst, pSrc, sizeof(*dst)); + *pDst = (SSchCallbackParamHeader *)dst; + + return TSDB_CODE_SUCCESS; +} + +int32_t schCloneSMsgSendInfo(void *src, void **dst) { + SMsgSendInfo *pSrc = src; + int32_t code = 0; + SMsgSendInfo *pDst = taosMemoryMalloc(sizeof(*pSrc)); + if (NULL == pDst) { + qError("malloc SMsgSendInfo for rpcCtx failed, len:%d", (int32_t)sizeof(*pSrc)); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(pDst, pSrc, sizeof(*pSrc)); + pDst->param = NULL; + + SCH_ERR_JRET(schCloneCallbackParam(pSrc->param, (SSchCallbackParamHeader **)&pDst->param)); + + *dst = pDst; + + return TSDB_CODE_SUCCESS; + +_return: + + taosMemoryFreeClear(pDst); + SCH_RET(code); +} + + +int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet *epSet, int32_t msgType, void *msg, + uint32_t msgSize, bool persistHandle, SRpcCtx *ctx) { + int32_t code = 0; + + SSchTrans *trans = (SSchTrans *)transport; + + SMsgSendInfo *pMsgSendInfo = NULL; + SCH_ERR_JRET(schGenerateCallBackInfo(pJob, pTask, msgType, &pMsgSendInfo)); + + pMsgSendInfo->msgInfo.pData = msg; + pMsgSendInfo->msgInfo.len = msgSize; + pMsgSendInfo->msgInfo.handle = trans->transHandle; + pMsgSendInfo->msgType = msgType; + + qDebug("start to send %s msg to node[%d,%s,%d], refId:%" PRIx64 "instance:%p, handle:%p", TMSG_INFO(msgType), + ntohl(((SMsgHead *)msg)->vgId), epSet->eps[epSet->inUse].fqdn, epSet->eps[epSet->inUse].port, pJob->refId, + trans->transInst, trans->transHandle); + + int64_t transporterId = 0; + code = asyncSendMsgToServerExt(trans->transInst, epSet, &transporterId, pMsgSendInfo, persistHandle, ctx); + if (code) { + SCH_ERR_JRET(code); + } + + SCH_TASK_DLOG("req msg sent, refId:%" PRIx64 ", type:%d, %s", pJob->refId, msgType, TMSG_INFO(msgType)); + return TSDB_CODE_SUCCESS; + +_return: + + if (pMsgSendInfo) { + taosMemoryFreeClear(pMsgSendInfo->param); + taosMemoryFreeClear(pMsgSendInfo); + } + + SCH_RET(code); +} + +int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t msgType) { + uint32_t msgSize = 0; + void *msg = NULL; + int32_t code = 0; + bool isCandidateAddr = false; + bool persistHandle = false; + SRpcCtx rpcCtx = {0}; + + if (NULL == addr) { + addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); + isCandidateAddr = true; + } + + SEpSet epSet = addr->epSet; + + switch (msgType) { + case TDMT_VND_CREATE_TABLE: + case TDMT_VND_DROP_TABLE: + case TDMT_VND_ALTER_TABLE: + case TDMT_VND_SUBMIT: { + msgSize = pTask->msgLen; + msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + SCH_TASK_ELOG("calloc %d failed", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + memcpy(msg, pTask->msg, msgSize); + break; + } + + case TDMT_VND_QUERY: { + SCH_ERR_RET(schMakeQueryRpcCtx(pJob, pTask, &rpcCtx)); + + uint32_t len = strlen(pJob->sql); + msgSize = sizeof(SSubQueryMsg) + pTask->msgLen + len; + msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + SCH_TASK_ELOG("calloc %d failed", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SSubQueryMsg *pMsg = msg; + pMsg->header.vgId = htonl(addr->nodeId); + pMsg->sId = htobe64(schMgmt.sId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); + pMsg->refId = htobe64(pJob->refId); + pMsg->taskType = TASK_TYPE_TEMP; + pMsg->explain = SCH_IS_EXPLAIN_JOB(pJob); + pMsg->phyLen = htonl(pTask->msgLen); + pMsg->sqlLen = htonl(len); + + memcpy(pMsg->msg, pJob->sql, len); + memcpy(pMsg->msg + len, pTask->msg, pTask->msgLen); + + persistHandle = true; + break; + } + + case TDMT_VND_RES_READY: { + msgSize = sizeof(SResReadyReq); + msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + SCH_TASK_ELOG("calloc %d failed", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SResReadyReq *pMsg = msg; + + pMsg->header.vgId = htonl(addr->nodeId); + + pMsg->sId = htobe64(schMgmt.sId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); + break; + } + case TDMT_VND_FETCH: { + msgSize = sizeof(SResFetchReq); + msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + SCH_TASK_ELOG("calloc %d failed", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + SResFetchReq *pMsg = msg; + + pMsg->header.vgId = htonl(addr->nodeId); + + pMsg->sId = htobe64(schMgmt.sId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); + + break; + } + case TDMT_VND_DROP_TASK: { + msgSize = sizeof(STaskDropReq); + msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + SCH_TASK_ELOG("calloc %d failed", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + STaskDropReq *pMsg = msg; + + pMsg->header.vgId = htonl(addr->nodeId); + + pMsg->sId = htobe64(schMgmt.sId); + pMsg->queryId = htobe64(pJob->queryId); + pMsg->taskId = htobe64(pTask->taskId); + pMsg->refId = htobe64(pJob->refId); + break; + } + case TDMT_VND_QUERY_HEARTBEAT: { + SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &rpcCtx)); + + SSchedulerHbReq req = {0}; + req.sId = schMgmt.sId; + req.header.vgId = addr->nodeId; + req.epId.nodeId = addr->nodeId; + memcpy(&req.epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); + + msgSize = tSerializeSSchedulerHbReq(NULL, 0, &req); + if (msgSize < 0) { + SCH_JOB_ELOG("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + SCH_JOB_ELOG("calloc %d failed", msgSize); + SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + if (tSerializeSSchedulerHbReq(msg, msgSize, &req) < 0) { + SCH_JOB_ELOG("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); + SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); + } + + persistHandle = true; + break; + } + default: + SCH_TASK_ELOG("unknown msg type to send, msgType:%d", msgType); + SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); + break; + } + + SCH_SET_TASK_LASTMSG_TYPE(pTask, msgType); + + SSchTrans trans = {.transInst = pJob->transport, .transHandle = SCH_GET_TASK_HANDLE(pTask)}; + SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, &epSet, msgType, msg, msgSize, persistHandle, + (rpcCtx.args ? &rpcCtx : NULL))); + + if (msgType == TDMT_VND_QUERY) { + SCH_ERR_RET(schRecordTaskExecNode(pJob, pTask, addr, trans.transHandle)); + } + + return TSDB_CODE_SUCCESS; + +_return: + + SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); + schFreeRpcCtx(&rpcCtx); + + taosMemoryFreeClear(msg); + SCH_RET(code); +} + + + diff --git a/source/libs/scheduler/src/schUtil.c b/source/libs/scheduler/src/schUtil.c new file mode 100644 index 0000000000..57a86ba125 --- /dev/null +++ b/source/libs/scheduler/src/schUtil.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "catalog.h" +#include "command.h" +#include "query.h" +#include "schedulerInt.h" +#include "tmsg.h" +#include "tref.h" +#include "trpc.h" + +void schCloseJobRef(void) { + if (!atomic_load_8((int8_t *)&schMgmt.exit)) { + return; + } + + SCH_LOCK(SCH_WRITE, &schMgmt.lock); + if (atomic_load_32(&schMgmt.jobNum) <= 0 && schMgmt.jobRef >= 0) { + taosCloseRef(schMgmt.jobRef); + schMgmt.jobRef = -1; + } + SCH_UNLOCK(SCH_WRITE, &schMgmt.lock); +} + +uint64_t schGenTaskId(void) { return atomic_add_fetch_64(&schMgmt.taskId, 1); } + +uint64_t schGenUUID(void) { + static uint64_t hashId = 0; + static int32_t requestSerialId = 0; + + if (hashId == 0) { + char uid[64]; + int32_t code = taosGetSystemUUID(uid, tListLen(uid)); + if (code != TSDB_CODE_SUCCESS) { + qError("Failed to get the system uid, reason:%s", tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + hashId = MurmurHash3_32(uid, strlen(uid)); + } + } + + int64_t ts = taosGetTimestampMs(); + uint64_t pid = taosGetPId(); + int32_t val = atomic_add_fetch_32(&requestSerialId, 1); + + uint64_t id = ((hashId & 0x0FFF) << 52) | ((pid & 0x0FFF) << 40) | ((ts & 0xFFFFFF) << 16) | (val & 0xFFFF); + return id; +} + + +void schFreeRpcCtxVal(const void *arg) { + if (NULL == arg) { + return; + } + + SMsgSendInfo *pMsgSendInfo = (SMsgSendInfo *)arg; + taosMemoryFreeClear(pMsgSendInfo->param); + taosMemoryFreeClear(pMsgSendInfo); +} + +void schFreeRpcCtx(SRpcCtx *pCtx) { + if (NULL == pCtx) { + return; + } + void *pIter = taosHashIterate(pCtx->args, NULL); + while (pIter) { + SRpcCtxVal *ctxVal = (SRpcCtxVal *)pIter; + + (*ctxVal->freeFunc)(ctxVal->val); + + pIter = taosHashIterate(pCtx->args, pIter); + } + + taosHashCleanup(pCtx->args); + + if (pCtx->brokenVal.freeFunc) { + (*pCtx->brokenVal.freeFunc)(pCtx->brokenVal.val); + } +} + + diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 7a75c00e6e..bd2c7e5b49 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -25,2563 +25,6 @@ SSchedulerMgmt schMgmt = { .jobRef = -1, }; -FORCE_INLINE SSchJob *schAcquireJob(int64_t refId) { return (SSchJob *)taosAcquireRef(schMgmt.jobRef, refId); } - -FORCE_INLINE int32_t schReleaseJob(int64_t refId) { return taosReleaseRef(schMgmt.jobRef, refId); } - -uint64_t schGenTaskId(void) { return atomic_add_fetch_64(&schMgmt.taskId, 1); } - -#if 0 -uint64_t schGenUUID(void) { - static uint64_t hashId = 0; - static int32_t requestSerialId = 0; - - if (hashId == 0) { - char uid[64]; - int32_t code = taosGetSystemUUID(uid, tListLen(uid)); - if (code != TSDB_CODE_SUCCESS) { - qError("Failed to get the system uid, reason:%s", tstrerror(TAOS_SYSTEM_ERROR(errno))); - } else { - hashId = MurmurHash3_32(uid, strlen(uid)); - } - } - - int64_t ts = taosGetTimestampMs(); - uint64_t pid = taosGetPId(); - int32_t val = atomic_add_fetch_32(&requestSerialId, 1); - - uint64_t id = ((hashId & 0x0FFF) << 52) | ((pid & 0x0FFF) << 40) | ((ts & 0xFFFFFF) << 16) | (val & 0xFFFF); - return id; -} -#endif - -int32_t schInitTask(SSchJob *pJob, SSchTask *pTask, SSubplan *pPlan, SSchLevel *pLevel) { - pTask->plan = pPlan; - pTask->level = pLevel; - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); - pTask->taskId = schGenTaskId(); - pTask->execNodes = taosArrayInit(SCH_MAX_CANDIDATE_EP_NUM, sizeof(SSchNodeInfo)); - if (NULL == pTask->execNodes) { - SCH_TASK_ELOG("taosArrayInit %d execNodes failed", SCH_MAX_CANDIDATE_EP_NUM); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t schInitJob(SSchJob **pSchJob, SQueryPlan *pDag, void *transport, SArray *pNodeList, const char *sql, - int64_t startTs, bool syncSchedule) { - int32_t code = 0; - int64_t refId = -1; - SSchJob *pJob = taosMemoryCalloc(1, sizeof(SSchJob)); - if (NULL == pJob) { - qError("QID:%" PRIx64 " calloc %d failed", pDag->queryId, (int32_t)sizeof(SSchJob)); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pJob->attr.explainMode = pDag->explainInfo.mode; - pJob->attr.syncSchedule = syncSchedule; - pJob->transport = transport; - pJob->sql = sql; - - if (pNodeList != NULL) { - pJob->nodeList = taosArrayDup(pNodeList); - } - - SCH_ERR_JRET(schValidateAndBuildJob(pDag, pJob)); - - if (SCH_IS_EXPLAIN_JOB(pJob)) { - SCH_ERR_JRET(qExecExplainBegin(pDag, &pJob->explainCtx, startTs)); - } - - pJob->execTasks = - taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); - if (NULL == pJob->execTasks) { - SCH_JOB_ELOG("taosHashInit %d execTasks failed", pDag->numOfSubplans); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pJob->succTasks = - taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); - if (NULL == pJob->succTasks) { - SCH_JOB_ELOG("taosHashInit %d succTasks failed", pDag->numOfSubplans); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pJob->failTasks = - taosHashInit(pDag->numOfSubplans, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_ENTRY_LOCK); - if (NULL == pJob->failTasks) { - SCH_JOB_ELOG("taosHashInit %d failTasks failed", pDag->numOfSubplans); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - tsem_init(&pJob->rspSem, 0, 0); - - refId = taosAddRef(schMgmt.jobRef, pJob); - if (refId < 0) { - SCH_JOB_ELOG("taosAddRef job failed, error:%s", tstrerror(terrno)); - SCH_ERR_JRET(terrno); - } - - atomic_add_fetch_32(&schMgmt.jobNum, 1); - - if (NULL == schAcquireJob(refId)) { - SCH_JOB_ELOG("schAcquireJob job failed, refId:%" PRIx64, refId); - SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); - } - - pJob->refId = refId; - - SCH_JOB_DLOG("job refId:%" PRIx64, pJob->refId); - - pJob->status = JOB_TASK_STATUS_NOT_START; - - *pSchJob = pJob; - - return TSDB_CODE_SUCCESS; - -_return: - - if (refId < 0) { - schFreeJobImpl(pJob); - } else { - taosRemoveRef(schMgmt.jobRef, refId); - } - SCH_RET(code); -} - -void schFreeRpcCtx(SRpcCtx *pCtx) { - if (NULL == pCtx) { - return; - } - void *pIter = taosHashIterate(pCtx->args, NULL); - while (pIter) { - SRpcCtxVal *ctxVal = (SRpcCtxVal *)pIter; - - (*ctxVal->freeFunc)(ctxVal->val); - - pIter = taosHashIterate(pCtx->args, pIter); - } - - taosHashCleanup(pCtx->args); - - if (pCtx->brokenVal.freeFunc) { - (*pCtx->brokenVal.freeFunc)(pCtx->brokenVal.val); - } -} - -void schFreeTask(SSchTask *pTask) { - if (pTask->candidateAddrs) { - taosArrayDestroy(pTask->candidateAddrs); - } - - taosMemoryFreeClear(pTask->msg); - - if (pTask->children) { - taosArrayDestroy(pTask->children); - } - - if (pTask->parents) { - taosArrayDestroy(pTask->parents); - } - - if (pTask->execNodes) { - taosArrayDestroy(pTask->execNodes); - } -} - -static FORCE_INLINE bool schJobNeedToStop(SSchJob *pJob, int8_t *pStatus) { - int8_t status = SCH_GET_JOB_STATUS(pJob); - if (pStatus) { - *pStatus = status; - } - - return (status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED || - status == JOB_TASK_STATUS_CANCELLING || status == JOB_TASK_STATUS_DROPPING || - status == JOB_TASK_STATUS_SUCCEED); -} - -int32_t schValidateTaskReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgType) { - int32_t lastMsgType = SCH_GET_TASK_LASTMSG_TYPE(pTask); - int32_t taskStatus = SCH_GET_TASK_STATUS(pTask); - int32_t reqMsgType = msgType - 1; - switch (msgType) { - case TDMT_SCH_LINK_BROKEN: - case TDMT_VND_EXPLAIN_RSP: - return TSDB_CODE_SUCCESS; - case TDMT_VND_QUERY_RSP: // query_rsp may be processed later than ready_rsp - if (lastMsgType != reqMsgType && -1 != lastMsgType && TDMT_VND_FETCH != lastMsgType) { - SCH_TASK_DLOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), - TMSG_INFO(msgType)); - } - - if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { - SCH_TASK_DLOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), - TMSG_INFO(msgType)); - } - - SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - return TSDB_CODE_SUCCESS; - case TDMT_VND_RES_READY_RSP: - reqMsgType = TDMT_VND_QUERY; - if (lastMsgType != reqMsgType && -1 != lastMsgType) { - SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", - (lastMsgType > 0 ? TMSG_INFO(lastMsgType) : "null"), TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { - SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), - TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - return TSDB_CODE_SUCCESS; - case TDMT_VND_FETCH_RSP: - if (lastMsgType != reqMsgType && -1 != lastMsgType) { - SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), - TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { - SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), - TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - return TSDB_CODE_SUCCESS; - case TDMT_VND_CREATE_TABLE_RSP: - case TDMT_VND_DROP_TABLE_RSP: - case TDMT_VND_ALTER_TABLE_RSP: - case TDMT_VND_SUBMIT_RSP: - break; - default: - SCH_TASK_ELOG("unknown rsp msg, type:%s, status:%s", TMSG_INFO(msgType), jobTaskStatusStr(taskStatus)); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - if (lastMsgType != reqMsgType) { - SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), - TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - if (taskStatus != JOB_TASK_STATUS_EXECUTING && taskStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED) { - SCH_TASK_ELOG("rsp msg conflicted with task status, status:%s, rspType:%s", jobTaskStatusStr(taskStatus), - TMSG_INFO(msgType)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - - return TSDB_CODE_SUCCESS; -} - -int32_t schCheckAndUpdateJobStatus(SSchJob *pJob, int8_t newStatus) { - int32_t code = 0; - - int8_t oriStatus = 0; - - while (true) { - oriStatus = SCH_GET_JOB_STATUS(pJob); - - if (oriStatus == newStatus) { - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - switch (oriStatus) { - case JOB_TASK_STATUS_NULL: - if (newStatus != JOB_TASK_STATUS_NOT_START) { - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - break; - case JOB_TASK_STATUS_NOT_START: - if (newStatus != JOB_TASK_STATUS_EXECUTING) { - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - break; - case JOB_TASK_STATUS_EXECUTING: - if (newStatus != JOB_TASK_STATUS_PARTIAL_SUCCEED && newStatus != JOB_TASK_STATUS_FAILED && - newStatus != JOB_TASK_STATUS_CANCELLING && newStatus != JOB_TASK_STATUS_CANCELLED && - newStatus != JOB_TASK_STATUS_DROPPING) { - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - break; - case JOB_TASK_STATUS_PARTIAL_SUCCEED: - if (newStatus != JOB_TASK_STATUS_FAILED && newStatus != JOB_TASK_STATUS_SUCCEED && - newStatus != JOB_TASK_STATUS_DROPPING) { - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - break; - case JOB_TASK_STATUS_SUCCEED: - case JOB_TASK_STATUS_FAILED: - case JOB_TASK_STATUS_CANCELLING: - if (newStatus != JOB_TASK_STATUS_DROPPING) { - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - break; - case JOB_TASK_STATUS_CANCELLED: - case JOB_TASK_STATUS_DROPPING: - SCH_ERR_JRET(TSDB_CODE_QRY_JOB_FREED); - break; - - default: - SCH_JOB_ELOG("invalid job status:%s", jobTaskStatusStr(oriStatus)); - SCH_ERR_JRET(TSDB_CODE_QRY_APP_ERROR); - } - - if (oriStatus != atomic_val_compare_exchange_8(&pJob->status, oriStatus, newStatus)) { - continue; - } - - SCH_JOB_DLOG("job status updated from %s to %s", jobTaskStatusStr(oriStatus), jobTaskStatusStr(newStatus)); - - break; - } - - return TSDB_CODE_SUCCESS; - -_return: - - SCH_JOB_ELOG("invalid job status update, from %s to %s", jobTaskStatusStr(oriStatus), jobTaskStatusStr(newStatus)); - SCH_ERR_RET(code); - return TSDB_CODE_SUCCESS; -} - -int32_t schBuildTaskRalation(SSchJob *pJob, SHashObj *planToTask) { - for (int32_t i = 0; i < pJob->levelNum; ++i) { - SSchLevel *pLevel = taosArrayGet(pJob->levels, i); - - for (int32_t m = 0; m < pLevel->taskNum; ++m) { - SSchTask *pTask = taosArrayGet(pLevel->subTasks, m); - SSubplan *pPlan = pTask->plan; - int32_t childNum = pPlan->pChildren ? (int32_t)LIST_LENGTH(pPlan->pChildren) : 0; - int32_t parentNum = pPlan->pParents ? (int32_t)LIST_LENGTH(pPlan->pParents) : 0; - - if (childNum > 0) { - if (pJob->levelIdx == pLevel->level) { - SCH_JOB_ELOG("invalid query plan, lowest level, childNum:%d", childNum); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - pTask->children = taosArrayInit(childNum, POINTER_BYTES); - if (NULL == pTask->children) { - SCH_TASK_ELOG("taosArrayInit %d children failed", childNum); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - } - - for (int32_t n = 0; n < childNum; ++n) { - SSubplan *child = (SSubplan *)nodesListGetNode(pPlan->pChildren, n); - SSchTask **childTask = taosHashGet(planToTask, &child, POINTER_BYTES); - if (NULL == childTask || NULL == *childTask) { - SCH_TASK_ELOG("subplan children relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - if (NULL == taosArrayPush(pTask->children, childTask)) { - SCH_TASK_ELOG("taosArrayPush childTask failed, level:%d, taskIdx:%d, childIdx:%d", i, m, n); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_TASK_DLOG("children info, the %d child TID %" PRIx64, n, (*childTask)->taskId); - } - - if (parentNum > 0) { - if (0 == pLevel->level) { - SCH_TASK_ELOG("invalid task info, level:0, parentNum:%d", parentNum); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - pTask->parents = taosArrayInit(parentNum, POINTER_BYTES); - if (NULL == pTask->parents) { - SCH_TASK_ELOG("taosArrayInit %d parents failed", parentNum); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - } else { - if (0 != pLevel->level) { - SCH_TASK_ELOG("invalid task info, level:%d, parentNum:%d", pLevel->level, parentNum); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - } - - for (int32_t n = 0; n < parentNum; ++n) { - SSubplan *parent = (SSubplan *)nodesListGetNode(pPlan->pParents, n); - SSchTask **parentTask = taosHashGet(planToTask, &parent, POINTER_BYTES); - if (NULL == parentTask || NULL == *parentTask) { - SCH_TASK_ELOG("subplan parent relationship error, level:%d, taskIdx:%d, childIdx:%d", i, m, n); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - if (NULL == taosArrayPush(pTask->parents, parentTask)) { - SCH_TASK_ELOG("taosArrayPush parentTask failed, level:%d, taskIdx:%d, childIdx:%d", i, m, n); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_TASK_DLOG("parents info, the %d parent TID %" PRIx64, n, (*parentTask)->taskId); - } - - SCH_TASK_DLOG("level:%d, parentNum:%d, childNum:%d", i, parentNum, childNum); - } - } - - SSchLevel *pLevel = taosArrayGet(pJob->levels, 0); - if (SCH_IS_QUERY_JOB(pJob) && pLevel->taskNum > 1) { - SCH_JOB_ELOG("invalid query plan, level:0, taskNum:%d", pLevel->taskNum); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t schRecordTaskSucceedNode(SSchJob *pJob, SSchTask *pTask) { - SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - if (NULL == addr) { - SCH_TASK_ELOG("taosArrayGet candidate addr failed, idx:%d, size:%d", pTask->candidateIdx, - (int32_t)taosArrayGetSize(pTask->candidateAddrs)); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - pTask->succeedAddr = *addr; - - return TSDB_CODE_SUCCESS; -} - -int32_t schRecordTaskExecNode(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, void *handle) { - SSchNodeInfo nodeInfo = {.addr = *addr, .handle = handle}; - - if (NULL == taosArrayPush(pTask->execNodes, &nodeInfo)) { - SCH_TASK_ELOG("taosArrayPush nodeInfo to execNodes list failed, errno:%d", errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_TASK_DLOG("task execNode recorded, handle:%p", handle); - - return TSDB_CODE_SUCCESS; -} - -int32_t schRecordQueryDataSrc(SSchJob *pJob, SSchTask *pTask) { - if (!SCH_IS_DATA_SRC_QRY_TASK(pTask)) { - return TSDB_CODE_SUCCESS; - } - - taosArrayPush(pJob->dataSrcTasks, &pTask); - - return TSDB_CODE_SUCCESS; -} - - -int32_t schValidateAndBuildJob(SQueryPlan *pDag, SSchJob *pJob) { - int32_t code = 0; - pJob->queryId = pDag->queryId; - - if (pDag->numOfSubplans <= 0) { - SCH_JOB_ELOG("invalid subplan num:%d", pDag->numOfSubplans); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - pJob->dataSrcTasks = taosArrayInit(pDag->numOfSubplans, POINTER_BYTES); - if (NULL == pJob->dataSrcTasks) { - SCH_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); - } - - int32_t levelNum = (int32_t)LIST_LENGTH(pDag->pSubplans); - if (levelNum <= 0) { - SCH_JOB_ELOG("invalid level num:%d", levelNum); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - SHashObj *planToTask = taosHashInit( - SCHEDULE_DEFAULT_MAX_TASK_NUM, - taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), false, - HASH_NO_LOCK); - if (NULL == planToTask) { - SCH_JOB_ELOG("taosHashInit %d failed", SCHEDULE_DEFAULT_MAX_TASK_NUM); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pJob->levels = taosArrayInit(levelNum, sizeof(SSchLevel)); - if (NULL == pJob->levels) { - SCH_JOB_ELOG("taosArrayInit %d failed", levelNum); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pJob->levelNum = levelNum; - pJob->levelIdx = levelNum - 1; - - pJob->subPlans = pDag->pSubplans; - - SSchLevel level = {0}; - SNodeListNode *plans = NULL; - int32_t taskNum = 0; - SSchLevel *pLevel = NULL; - - level.status = JOB_TASK_STATUS_NOT_START; - - for (int32_t i = 0; i < levelNum; ++i) { - if (NULL == taosArrayPush(pJob->levels, &level)) { - SCH_JOB_ELOG("taosArrayPush level failed, level:%d", i); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pLevel = taosArrayGet(pJob->levels, i); - pLevel->level = i; - - plans = (SNodeListNode *)nodesListGetNode(pDag->pSubplans, i); - if (NULL == plans) { - SCH_JOB_ELOG("empty level plan, level:%d", i); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - taskNum = (int32_t)LIST_LENGTH(plans->pNodeList); - if (taskNum <= 0) { - SCH_JOB_ELOG("invalid level plan number:%d, level:%d", taskNum, i); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - pLevel->taskNum = taskNum; - - pLevel->subTasks = taosArrayInit(taskNum, sizeof(SSchTask)); - if (NULL == pLevel->subTasks) { - SCH_JOB_ELOG("taosArrayInit %d failed", taskNum); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - for (int32_t n = 0; n < taskNum; ++n) { - SSubplan *plan = (SSubplan *)nodesListGetNode(plans->pNodeList, n); - - SCH_SET_JOB_TYPE(pJob, plan->subplanType); - - SSchTask task = {0}; - SSchTask *pTask = &task; - - SCH_ERR_JRET(schInitTask(pJob, &task, plan, pLevel)); - - void *p = taosArrayPush(pLevel->subTasks, &task); - if (NULL == p) { - SCH_TASK_ELOG("taosArrayPush task to level failed, level:%d, taskIdx:%d", pLevel->level, n); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_ERR_JRET(schRecordQueryDataSrc(pJob, p)); - - if (0 != taosHashPut(planToTask, &plan, POINTER_BYTES, &p, POINTER_BYTES)) { - SCH_TASK_ELOG("taosHashPut to planToTaks failed, taskIdx:%d", n); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - ++pJob->taskNum; - } - - SCH_JOB_DLOG("level initialized, taskNum:%d", taskNum); - } - - SCH_ERR_JRET(schBuildTaskRalation(pJob, planToTask)); - -_return: - if (planToTask) { - taosHashCleanup(planToTask); - } - - SCH_RET(code); -} - -int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) { - if (NULL != pTask->candidateAddrs) { - return TSDB_CODE_SUCCESS; - } - - pTask->candidateIdx = 0; - pTask->candidateAddrs = taosArrayInit(SCH_MAX_CANDIDATE_EP_NUM, sizeof(SQueryNodeAddr)); - if (NULL == pTask->candidateAddrs) { - SCH_TASK_ELOG("taosArrayInit %d condidate addrs failed", SCH_MAX_CANDIDATE_EP_NUM); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - if (pTask->plan->execNode.epSet.numOfEps > 0) { - if (NULL == taosArrayPush(pTask->candidateAddrs, &pTask->plan->execNode)) { - SCH_TASK_ELOG("taosArrayPush execNode to candidate addrs failed, errno:%d", errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_TASK_DLOG("use execNode from plan as candidate addr, numOfEps:%d", pTask->plan->execNode.epSet.numOfEps); - - return TSDB_CODE_SUCCESS; - } - - int32_t addNum = 0; - int32_t nodeNum = 0; - if (pJob->nodeList) { - nodeNum = taosArrayGetSize(pJob->nodeList); - - for (int32_t i = 0; i < nodeNum && addNum < SCH_MAX_CANDIDATE_EP_NUM; ++i) { - SQueryNodeAddr *naddr = taosArrayGet(pJob->nodeList, i); - - if (NULL == taosArrayPush(pTask->candidateAddrs, naddr)) { - SCH_TASK_ELOG("taosArrayPush execNode to candidate addrs failed, addNum:%d, errno:%d", addNum, errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - ++addNum; - } - } - - if (addNum <= 0) { - SCH_TASK_ELOG("no available execNode as candidates, nodeNum:%d", nodeNum); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - /* - for (int32_t i = 0; i < job->dataSrcEps.numOfEps && addNum < SCH_MAX_CANDIDATE_EP_NUM; ++i) { - strncpy(epSet->fqdn[epSet->numOfEps], job->dataSrcEps.fqdn[i], sizeof(job->dataSrcEps.fqdn[i])); - epSet->port[epSet->numOfEps] = job->dataSrcEps.port[i]; - - ++epSet->numOfEps; - } - */ - - return TSDB_CODE_SUCCESS; -} - -int32_t schRemoveTaskFromExecList(SSchJob *pJob, SSchTask *pTask) { - int32_t code = taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId)); - if (code) { - SCH_TASK_ELOG("task failed to rm from execTask list, code:%x", code); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - return TSDB_CODE_SUCCESS; -} - - -int32_t schPushTaskToExecList(SSchJob *pJob, SSchTask *pTask) { - int32_t code = taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); - if (0 != code) { - if (HASH_NODE_EXIST(code)) { - SCH_TASK_ELOG("task already in execTask list, code:%x", code); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - SCH_TASK_ELOG("taosHashPut task to execTask list failed, errno:%d", errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_TASK_DLOG("task added to execTask list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); - - return TSDB_CODE_SUCCESS; -} - -int32_t schMoveTaskToSuccList(SSchJob *pJob, SSchTask *pTask, bool *moved) { - if (0 != taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId))) { - SCH_TASK_WLOG("remove task from execTask list failed, may not exist, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - } else { - SCH_TASK_DLOG("task removed from execTask list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); - } - - int32_t code = taosHashPut(pJob->succTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); - if (0 != code) { - if (HASH_NODE_EXIST(code)) { - *moved = true; - SCH_TASK_ELOG("task already in succTask list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_TASK_ELOG("taosHashPut task to succTask list failed, errno:%d", errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - *moved = true; - - SCH_TASK_DLOG("task moved to succTask list, numOfTasks:%d", taosHashGetSize(pJob->succTasks)); - - return TSDB_CODE_SUCCESS; -} - -int32_t schMoveTaskToFailList(SSchJob *pJob, SSchTask *pTask, bool *moved) { - *moved = false; - - if (0 != taosHashRemove(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId))) { - SCH_TASK_WLOG("remove task from execTask list failed, may not exist, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - } - - int32_t code = taosHashPut(pJob->failTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); - if (0 != code) { - if (HASH_NODE_EXIST(code)) { - *moved = true; - - SCH_TASK_WLOG("task already in failTask list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_TASK_ELOG("taosHashPut task to failTask list failed, errno:%d", errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - *moved = true; - - SCH_TASK_DLOG("task moved to failTask list, numOfTasks:%d", taosHashGetSize(pJob->failTasks)); - - return TSDB_CODE_SUCCESS; -} - -int32_t schMoveTaskToExecList(SSchJob *pJob, SSchTask *pTask, bool *moved) { - if (0 != taosHashRemove(pJob->succTasks, &pTask->taskId, sizeof(pTask->taskId))) { - SCH_TASK_WLOG("remove task from succTask list failed, may not exist, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - } - - int32_t code = taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES); - if (0 != code) { - if (HASH_NODE_EXIST(code)) { - *moved = true; - - SCH_TASK_ELOG("task already in execTask list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - SCH_ERR_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_TASK_ELOG("taosHashPut task to execTask list failed, errno:%d", errno); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - *moved = true; - - SCH_TASK_DLOG("task moved to execTask list, numOfTasks:%d", taosHashGetSize(pJob->execTasks)); - - return TSDB_CODE_SUCCESS; -} - -int32_t schTaskCheckSetRetry(SSchJob *pJob, SSchTask *pTask, int32_t errCode, bool *needRetry) { - int8_t status = 0; - ++pTask->tryTimes; - - if (schJobNeedToStop(pJob, &status)) { - *needRetry = false; - SCH_TASK_DLOG("task no more retry cause of job status, job status:%s", jobTaskStatusStr(status)); - return TSDB_CODE_SUCCESS; - } - - if (pTask->tryTimes >= REQUEST_MAX_TRY_TIMES) { - *needRetry = false; - SCH_TASK_DLOG("task no more retry since reach max try times, tryTimes:%d", pTask->tryTimes); - return TSDB_CODE_SUCCESS; - } - - if (!NEED_SCHEDULER_RETRY_ERROR(errCode)) { - *needRetry = false; - SCH_TASK_DLOG("task no more retry cause of errCode, errCode:%x - %s", errCode, tstrerror(errCode)); - return TSDB_CODE_SUCCESS; - } - - // TODO CHECK epList/condidateList - if (SCH_IS_DATA_SRC_TASK(pTask)) { - if (pTask->tryTimes >= SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)) { - *needRetry = false; - SCH_TASK_DLOG("task no more retry since all ep tried, tryTimes:%d, epNum:%d", pTask->tryTimes, - SCH_TASK_NUM_OF_EPS(&pTask->plan->execNode)); - return TSDB_CODE_SUCCESS; - } - } else { - int32_t candidateNum = taosArrayGetSize(pTask->candidateAddrs); - - if ((pTask->candidateIdx + 1) >= candidateNum) { - *needRetry = false; - SCH_TASK_DLOG("task no more retry since all candiates tried, candidateIdx:%d, candidateNum:%d", - pTask->candidateIdx, candidateNum); - return TSDB_CODE_SUCCESS; - } - } - - *needRetry = true; - SCH_TASK_DLOG("task need the %dth retry, errCode:%x - %s", pTask->tryTimes, errCode, tstrerror(errCode)); - - return TSDB_CODE_SUCCESS; -} - -int32_t schHandleTaskRetry(SSchJob *pJob, SSchTask *pTask) { - atomic_sub_fetch_32(&pTask->level->taskLaunchedNum, 1); - - SCH_ERR_RET(schRemoveTaskFromExecList(pJob, pTask)); - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_NOT_START); - - if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { - SCH_ERR_RET(schDecTaskFlowQuota(pJob, pTask)); - SCH_ERR_RET(schLaunchTasksInFlowCtrlList(pJob, pTask)); - } - - if (SCH_IS_DATA_SRC_TASK(pTask)) { - SCH_SWITCH_EPSET(&pTask->plan->execNode); - } else { - ++pTask->candidateIdx; - } - - SCH_ERR_RET(schLaunchTask(pJob, pTask)); - - return TSDB_CODE_SUCCESS; -} - -int32_t schUpdateHbConnection(SQueryNodeEpId *epId, SSchTrans *trans) { - int32_t code = 0; - SSchHbTrans *hb = NULL; - - hb = taosHashGet(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId)); - if (NULL == hb) { - qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); - SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); - } - - SCH_LOCK(SCH_WRITE, &hb->lock); - memcpy(&hb->trans, trans, sizeof(*trans)); - SCH_UNLOCK(SCH_WRITE, &hb->lock); - - qDebug("hb connection updated, sId:%" PRIx64 ", nodeId:%d, fqdn:%s, port:%d, instance:%p, handle:%p", schMgmt.sId, - epId->nodeId, epId->ep.fqdn, epId->ep.port, trans->transInst, trans->transHandle); - - return TSDB_CODE_SUCCESS; -} - -void schUpdateJobErrCode(SSchJob *pJob, int32_t errCode) { - if (TSDB_CODE_SUCCESS == errCode) { - return; - } - - int32_t origCode = atomic_load_32(&pJob->errCode); - if (TSDB_CODE_SUCCESS == origCode) { - if (origCode == atomic_val_compare_exchange_32(&pJob->errCode, origCode, errCode)) { - goto _return; - } - - origCode = atomic_load_32(&pJob->errCode); - } - - if (NEED_CLIENT_HANDLE_ERROR(origCode)) { - return; - } - - if (NEED_CLIENT_HANDLE_ERROR(errCode)) { - atomic_store_32(&pJob->errCode, errCode); - goto _return; - } - - return; - -_return: - - SCH_JOB_DLOG("job errCode updated to %x - %s", errCode, tstrerror(errCode)); -} - -int32_t schProcessOnJobFailureImpl(SSchJob *pJob, int32_t status, int32_t errCode) { - // if already FAILED, no more processing - SCH_ERR_RET(schCheckAndUpdateJobStatus(pJob, status)); - - schUpdateJobErrCode(pJob, errCode); - - if (atomic_load_8(&pJob->userFetch) || pJob->attr.syncSchedule) { - tsem_post(&pJob->rspSem); - } - - int32_t code = atomic_load_32(&pJob->errCode); - - SCH_JOB_DLOG("job failed with error: %s", tstrerror(code)); - - SCH_RET(code); -} - -// Note: no more task error processing, handled in function internal -int32_t schProcessOnJobFailure(SSchJob *pJob, int32_t errCode) { - SCH_RET(schProcessOnJobFailureImpl(pJob, JOB_TASK_STATUS_FAILED, errCode)); -} - -// Note: no more error processing, handled in function internal -int32_t schProcessOnJobDropped(SSchJob *pJob, int32_t errCode) { - SCH_RET(schProcessOnJobFailureImpl(pJob, JOB_TASK_STATUS_DROPPING, errCode)); -} - -// Note: no more task error processing, handled in function internal -int32_t schProcessOnJobPartialSuccess(SSchJob *pJob) { - int32_t code = 0; - - SCH_ERR_RET(schCheckAndUpdateJobStatus(pJob, JOB_TASK_STATUS_PARTIAL_SUCCEED)); - - if (pJob->attr.syncSchedule) { - tsem_post(&pJob->rspSem); - } - - if (atomic_load_8(&pJob->userFetch)) { - SCH_ERR_JRET(schFetchFromRemote(pJob)); - } - - return TSDB_CODE_SUCCESS; - -_return: - - SCH_RET(schProcessOnJobFailure(pJob, code)); -} - -void schProcessOnDataFetched(SSchJob *job) { - atomic_val_compare_exchange_32(&job->remoteFetch, 1, 0); - tsem_post(&job->rspSem); -} - -// Note: no more task error processing, handled in function internal -int32_t schProcessOnTaskFailure(SSchJob *pJob, SSchTask *pTask, int32_t errCode) { - int8_t status = 0; - - if (schJobNeedToStop(pJob, &status)) { - SCH_TASK_DLOG("task failed not processed cause of job status, job status:%s", jobTaskStatusStr(status)); - SCH_RET(atomic_load_32(&pJob->errCode)); - } - - bool needRetry = false; - bool moved = false; - int32_t taskDone = 0; - int32_t code = 0; - - SCH_TASK_DLOG("taskOnFailure, code:%s", tstrerror(errCode)); - - SCH_ERR_JRET(schTaskCheckSetRetry(pJob, pTask, errCode, &needRetry)); - - if (!needRetry) { - SCH_TASK_ELOG("task failed and no more retry, code:%s", tstrerror(errCode)); - - if (SCH_GET_TASK_STATUS(pTask) == JOB_TASK_STATUS_EXECUTING) { - SCH_ERR_JRET(schMoveTaskToFailList(pJob, pTask, &moved)); - } else { - SCH_TASK_ELOG("task not in executing list, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_FAILED); - - if (SCH_IS_WAIT_ALL_JOB(pJob)) { - SCH_LOCK(SCH_WRITE, &pTask->level->lock); - pTask->level->taskFailed++; - taskDone = pTask->level->taskSucceed + pTask->level->taskFailed; - SCH_UNLOCK(SCH_WRITE, &pTask->level->lock); - - schUpdateJobErrCode(pJob, errCode); - - if (taskDone < pTask->level->taskNum) { - SCH_TASK_DLOG("need to wait other tasks, doneNum:%d, allNum:%d", taskDone, pTask->level->taskNum); - SCH_RET(errCode); - } - } - } else { - SCH_ERR_JRET(schHandleTaskRetry(pJob, pTask)); - - return TSDB_CODE_SUCCESS; - } - -_return: - - SCH_RET(schProcessOnJobFailure(pJob, errCode)); -} - -int32_t schLaunchNextLevelTasks(SSchJob *pJob, SSchTask *pTask) { - if (!SCH_IS_QUERY_JOB(pJob)) { - return TSDB_CODE_SUCCESS; - } - - SSchLevel *pLevel = pTask->level; - int32_t doneNum = atomic_add_fetch_32(&pLevel->taskDoneNum, 1); - if (doneNum == pLevel->taskNum) { - pJob->levelIdx--; - - pLevel = taosArrayGet(pJob->levels, pJob->levelIdx); - for (int32_t i = 0; i < pLevel->taskNum; ++i) { - SSchTask *pTask = taosArrayGet(pLevel->subTasks, i); - - if (pTask->children && taosArrayGetSize(pTask->children) > 0) { - continue; - } - - SCH_ERR_RET(schLaunchTask(pJob, pTask)); - } - } - - return TSDB_CODE_SUCCESS; -} - - -// Note: no more task error processing, handled in function internal -int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) { - bool moved = false; - int32_t code = 0; - - SCH_TASK_DLOG("taskOnSuccess, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - - SCH_ERR_JRET(schMoveTaskToSuccList(pJob, pTask, &moved)); - - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_PARTIAL_SUCCEED); - - SCH_ERR_JRET(schRecordTaskSucceedNode(pJob, pTask)); - - SCH_ERR_JRET(schLaunchTasksInFlowCtrlList(pJob, pTask)); - - int32_t parentNum = pTask->parents ? (int32_t)taosArrayGetSize(pTask->parents) : 0; - if (parentNum == 0) { - int32_t taskDone = 0; - if (SCH_IS_WAIT_ALL_JOB(pJob)) { - SCH_LOCK(SCH_WRITE, &pTask->level->lock); - pTask->level->taskSucceed++; - taskDone = pTask->level->taskSucceed + pTask->level->taskFailed; - SCH_UNLOCK(SCH_WRITE, &pTask->level->lock); - - if (taskDone < pTask->level->taskNum) { - SCH_TASK_DLOG("wait all tasks, done:%d, all:%d", taskDone, pTask->level->taskNum); - return TSDB_CODE_SUCCESS; - } else if (taskDone > pTask->level->taskNum) { - SCH_TASK_ELOG("taskDone number invalid, done:%d, total:%d", taskDone, pTask->level->taskNum); - } - - if (pTask->level->taskFailed > 0) { - SCH_RET(schProcessOnJobFailure(pJob, 0)); - } else { - SCH_RET(schProcessOnJobPartialSuccess(pJob)); - } - } else { - pJob->resNode = pTask->succeedAddr; - } - - pJob->fetchTask = pTask; - - SCH_ERR_JRET(schMoveTaskToExecList(pJob, pTask, &moved)); - - SCH_RET(schProcessOnJobPartialSuccess(pJob)); - } - - /* - if (SCH_IS_DATA_SRC_TASK(task) && job->dataSrcEps.numOfEps < SCH_MAX_CANDIDATE_EP_NUM) { - strncpy(job->dataSrcEps.fqdn[job->dataSrcEps.numOfEps], task->execAddr.fqdn, sizeof(task->execAddr.fqdn)); - job->dataSrcEps.port[job->dataSrcEps.numOfEps] = task->execAddr.port; - - ++job->dataSrcEps.numOfEps; - } - */ - - for (int32_t i = 0; i < parentNum; ++i) { - SSchTask *par = *(SSchTask **)taosArrayGet(pTask->parents, i); - int32_t readyNum = atomic_add_fetch_32(&par->childReady, 1); - - SCH_LOCK(SCH_WRITE, &par->lock); - SDownstreamSourceNode source = {.type = QUERY_NODE_DOWNSTREAM_SOURCE, - .taskId = pTask->taskId, - .schedId = schMgmt.sId, - .addr = pTask->succeedAddr}; - qSetSubplanExecutionNode(par->plan, pTask->plan->id.groupId, &source); - SCH_UNLOCK(SCH_WRITE, &par->lock); - - if (SCH_TASK_READY_FOR_LAUNCH(readyNum, par)) { - SCH_ERR_RET(schLaunchTask(pJob, par)); - } - } - - SCH_ERR_RET(schLaunchNextLevelTasks(pJob, pTask)); - - return TSDB_CODE_SUCCESS; - -_return: - - SCH_RET(schProcessOnJobFailure(pJob, code)); -} - -// Note: no more error processing, handled in function internal -int32_t schFetchFromRemote(SSchJob *pJob) { - int32_t code = 0; - - if (atomic_val_compare_exchange_32(&pJob->remoteFetch, 0, 1) != 0) { - SCH_JOB_ELOG("prior fetching not finished, remoteFetch:%d", atomic_load_32(&pJob->remoteFetch)); - return TSDB_CODE_SUCCESS; - } - - void *resData = atomic_load_ptr(&pJob->resData); - if (resData) { - atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); - - SCH_JOB_DLOG("res already fetched, res:%p", resData); - return TSDB_CODE_SUCCESS; - } - - SCH_ERR_JRET(schBuildAndSendMsg(pJob, pJob->fetchTask, &pJob->resNode, TDMT_VND_FETCH)); - - return TSDB_CODE_SUCCESS; - -_return: - - atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); - - SCH_RET(schProcessOnTaskFailure(pJob, pJob->fetchTask, code)); -} - -int32_t schProcessOnExplainDone(SSchJob *pJob, SSchTask *pTask, SRetrieveTableRsp *pRsp) { - SCH_TASK_DLOG("got explain rsp, rows:%d, complete:%d", htonl(pRsp->numOfRows), pRsp->completed); - - atomic_store_32(&pJob->resNumOfRows, htonl(pRsp->numOfRows)); - atomic_store_ptr(&pJob->resData, pRsp); - - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_SUCCEED); - - schProcessOnDataFetched(pJob); - - return TSDB_CODE_SUCCESS; -} - -int32_t schSaveJobQueryRes(SSchJob *pJob, SResReadyRsp *rsp) { - if (rsp->tbFName[0]) { - if (NULL == pJob->queryRes) { - pJob->queryRes = taosArrayInit(pJob->taskNum, sizeof(STbVerInfo)); - if (NULL == pJob->queryRes) { - SCH_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); - } - } - - STbVerInfo tbInfo; - strcpy(tbInfo.tbFName, rsp->tbFName); - tbInfo.sversion = rsp->sversion; - tbInfo.tversion = rsp->tversion; - - taosArrayPush((SArray *)pJob->queryRes, &tbInfo); - } - - return TSDB_CODE_SUCCESS; -} - - -// Note: no more task error processing, handled in function internal -int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t msgType, char *msg, int32_t msgSize, - int32_t rspCode) { - int32_t code = 0; - int8_t status = 0; - - if (schJobNeedToStop(pJob, &status)) { - SCH_TASK_ELOG("rsp not processed cause of job status, job status:%s, rspCode:0x%x", jobTaskStatusStr(status), - rspCode); - SCH_RET(atomic_load_32(&pJob->errCode)); - } - - SCH_ERR_JRET(schValidateTaskReceivedMsgType(pJob, pTask, msgType)); - - switch (msgType) { - case TDMT_VND_CREATE_TABLE_RSP: { - SVCreateTbBatchRsp batchRsp = {0}; - if (msg) { - SDecoder coder = {0}; - tDecoderInit(&coder, msg, msgSize); - code = tDecodeSVCreateTbBatchRsp(&coder, &batchRsp); - if (TSDB_CODE_SUCCESS == code && batchRsp.nRsps > 0) { - for (int32_t i = 0; i < batchRsp.nRsps; ++i) { - SVCreateTbRsp *rsp = batchRsp.pRsps + i; - if (TSDB_CODE_SUCCESS != rsp->code) { - code = rsp->code; - tDecoderClear(&coder); - SCH_ERR_JRET(code); - } - } - } - tDecoderClear(&coder); - SCH_ERR_JRET(code); - } - - SCH_ERR_JRET(rspCode); - SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); - break; - } - case TDMT_VND_DROP_TABLE_RSP: { - SVDropTbBatchRsp batchRsp = {0}; - if (msg) { - SDecoder coder = {0}; - tDecoderInit(&coder, msg, msgSize); - code = tDecodeSVDropTbBatchRsp(&coder, &batchRsp); - if (TSDB_CODE_SUCCESS == code && batchRsp.nRsps > 0) { - for (int32_t i = 0; i < batchRsp.nRsps; ++i) { - SVDropTbRsp *rsp = batchRsp.pRsps + i; - if (TSDB_CODE_SUCCESS != rsp->code) { - code = rsp->code; - tDecoderClear(&coder); - SCH_ERR_JRET(code); - } - } - } - tDecoderClear(&coder); - SCH_ERR_JRET(code); - } - - SCH_ERR_JRET(rspCode); - SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); - break; - } - case TDMT_VND_ALTER_TABLE_RSP: { - SVAlterTbRsp rsp = {0}; - if (msg) { - SDecoder coder = {0}; - tDecoderInit(&coder, msg, msgSize); - code = tDecodeSVAlterTbRsp(&coder, &rsp); - tDecoderClear(&coder); - SCH_ERR_JRET(code); - SCH_ERR_JRET(rsp.code); - } - - SCH_ERR_JRET(rspCode); - - if (NULL == msg) { - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); - break; - } - case TDMT_VND_SUBMIT_RSP: { - SCH_ERR_JRET(rspCode); - - if (msg) { - SDecoder coder = {0}; - SSubmitRsp *rsp = taosMemoryMalloc(sizeof(*rsp)); - tDecoderInit(&coder, msg, msgSize); - code = tDecodeSSubmitRsp(&coder, rsp); - if (code) { - SCH_TASK_ELOG("decode submitRsp failed, code:%d", code); - tFreeSSubmitRsp(rsp); - SCH_ERR_JRET(code); - } - - if (rsp->nBlocks > 0) { - for (int32_t i = 0; i < rsp->nBlocks; ++i) { - SSubmitBlkRsp *blk = rsp->pBlocks + i; - if (TSDB_CODE_SUCCESS != blk->code) { - code = blk->code; - tFreeSSubmitRsp(rsp); - SCH_ERR_JRET(code); - } - } - } - - atomic_add_fetch_32(&pJob->resNumOfRows, rsp->affectedRows); - SCH_TASK_DLOG("submit succeed, affectedRows:%d", rsp->affectedRows); - - SCH_LOCK(SCH_WRITE, &pJob->resLock); - if (pJob->queryRes) { - SSubmitRsp *sum = pJob->queryRes; - sum->affectedRows += rsp->affectedRows; - sum->nBlocks += rsp->nBlocks; - sum->pBlocks = taosMemoryRealloc(sum->pBlocks, sum->nBlocks * sizeof(*sum->pBlocks)); - memcpy(sum->pBlocks + sum->nBlocks - rsp->nBlocks, rsp->pBlocks, rsp->nBlocks * sizeof(*sum->pBlocks)); - taosMemoryFree(rsp->pBlocks); - taosMemoryFree(rsp); - } else { - pJob->queryRes = rsp; - } - SCH_UNLOCK(SCH_WRITE, &pJob->resLock); - } - - SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); - - break; - } - case TDMT_VND_QUERY_RSP: { - SQueryTableRsp rsp = {0}; - if (msg) { - SCH_ERR_JRET(tDeserializeSQueryTableRsp(msg, msgSize, &rsp)); - SCH_ERR_JRET(rsp.code); - } - - SCH_ERR_JRET(rspCode); - - if (NULL == msg) { - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - // SCH_ERR_JRET(schBuildAndSendMsg(pJob, pTask, NULL, TDMT_VND_RES_READY)); - - break; - } - case TDMT_VND_RES_READY_RSP: { - SResReadyRsp *rsp = (SResReadyRsp *)msg; - - SCH_ERR_JRET(rspCode); - if (NULL == msg) { - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - SCH_ERR_JRET(rsp->code); - - SCH_ERR_JRET(schSaveJobQueryRes(pJob, rsp)); - - SCH_ERR_RET(schProcessOnTaskSuccess(pJob, pTask)); - - break; - } - case TDMT_VND_EXPLAIN_RSP: { - SCH_ERR_JRET(rspCode); - if (NULL == msg) { - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - if (!SCH_IS_EXPLAIN_JOB(pJob)) { - SCH_TASK_ELOG("invalid msg received for none explain query, msg type:%s", TMSG_INFO(msgType)); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - if (pJob->resData) { - SCH_TASK_ELOG("explain result is already generated, res:%p", pJob->resData); - SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); - } - - SExplainRsp rsp = {0}; - if (tDeserializeSExplainRsp(msg, msgSize, &rsp)) { - taosMemoryFree(rsp.subplanInfo); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SRetrieveTableRsp *pRsp = NULL; - SCH_ERR_JRET(qExplainUpdateExecInfo(pJob->explainCtx, &rsp, pTask->plan->id.groupId, &pRsp)); - - if (pRsp) { - SCH_ERR_JRET(schProcessOnExplainDone(pJob, pTask, pRsp)); - } - break; - } - case TDMT_VND_FETCH_RSP: { - SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)msg; - - SCH_ERR_JRET(rspCode); - if (NULL == msg) { - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - if (SCH_IS_EXPLAIN_JOB(pJob)) { - if (rsp->completed) { - SRetrieveTableRsp *pRsp = NULL; - SCH_ERR_JRET(qExecExplainEnd(pJob->explainCtx, &pRsp)); - if (pRsp) { - SCH_ERR_JRET(schProcessOnExplainDone(pJob, pTask, pRsp)); - } - - return TSDB_CODE_SUCCESS; - } - - atomic_val_compare_exchange_32(&pJob->remoteFetch, 1, 0); - - SCH_ERR_JRET(schFetchFromRemote(pJob)); - - return TSDB_CODE_SUCCESS; - } - - if (pJob->resData) { - SCH_TASK_ELOG("got fetch rsp while res already exists, res:%p", pJob->resData); - taosMemoryFreeClear(rsp); - SCH_ERR_JRET(TSDB_CODE_SCH_STATUS_ERROR); - } - - atomic_store_ptr(&pJob->resData, rsp); - atomic_add_fetch_32(&pJob->resNumOfRows, htonl(rsp->numOfRows)); - - if (rsp->completed) { - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_SUCCEED); - } - - SCH_TASK_DLOG("got fetch rsp, rows:%d, complete:%d", htonl(rsp->numOfRows), rsp->completed); - - schProcessOnDataFetched(pJob); - break; - } - case TDMT_VND_DROP_TASK_RSP: { - // SHOULD NEVER REACH HERE - SCH_TASK_ELOG("invalid status to handle drop task rsp, refId:%" PRIx64, pJob->refId); - SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); - break; - } - case TDMT_SCH_LINK_BROKEN: - SCH_TASK_ELOG("link broken received, error:%x - %s", rspCode, tstrerror(rspCode)); - SCH_ERR_JRET(rspCode); - break; - default: - SCH_TASK_ELOG("unknown rsp msg, type:%d, status:%s", msgType, SCH_GET_TASK_STATUS_STR(pTask)); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - return TSDB_CODE_SUCCESS; - -_return: - - SCH_RET(schProcessOnTaskFailure(pJob, pTask, code)); -} - -int32_t schGetTaskFromTaskList(SHashObj *pTaskList, uint64_t taskId, SSchTask **pTask) { - int32_t s = taosHashGetSize(pTaskList); - if (s <= 0) { - return TSDB_CODE_SUCCESS; - } - - SSchTask **task = taosHashGet(pTaskList, &taskId, sizeof(taskId)); - if (NULL == task || NULL == (*task)) { - return TSDB_CODE_SUCCESS; - } - - *pTask = *task; - - return TSDB_CODE_SUCCESS; -} - -int32_t schUpdateTaskExecNodeHandle(SSchTask *pTask, void *handle, int32_t rspCode) { - if (rspCode || NULL == pTask->execNodes || taosArrayGetSize(pTask->execNodes) > 1 || - taosArrayGetSize(pTask->execNodes) <= 0) { - return TSDB_CODE_SUCCESS; - } - - SSchNodeInfo *nodeInfo = taosArrayGet(pTask->execNodes, 0); - nodeInfo->handle = handle; - - return TSDB_CODE_SUCCESS; -} - -int32_t schHandleCallback(void *param, const SDataBuf *pMsg, int32_t msgType, int32_t rspCode) { - int32_t code = 0; - SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; - SSchTask *pTask = NULL; - - SSchJob *pJob = schAcquireJob(pParam->refId); - if (NULL == pJob) { - qWarn("QID:0x%" PRIx64 ",TID:0x%" PRIx64 "taosAcquireRef job failed, may be dropped, refId:%" PRIx64, - pParam->queryId, pParam->taskId, pParam->refId); - SCH_ERR_JRET(TSDB_CODE_QRY_JOB_FREED); - } - - schGetTaskFromTaskList(pJob->execTasks, pParam->taskId, &pTask); - if (NULL == pTask) { - if (TDMT_VND_EXPLAIN_RSP == msgType) { - schGetTaskFromTaskList(pJob->succTasks, pParam->taskId, &pTask); - } else { - SCH_JOB_ELOG("task not found in execTask list, refId:%" PRIx64 ", taskId:%" PRIx64, pParam->refId, - pParam->taskId); - SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - } - - if (NULL == pTask) { - SCH_JOB_ELOG("task not found in execList & succList, refId:%" PRIx64 ", taskId:%" PRIx64, pParam->refId, - pParam->taskId); - SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR); - } - - SCH_TASK_DLOG("rsp msg received, type:%s, handle:%p, code:%s", TMSG_INFO(msgType), pMsg->handle, tstrerror(rspCode)); - - SCH_SET_TASK_HANDLE(pTask, pMsg->handle); - schUpdateTaskExecNodeHandle(pTask, pMsg->handle, rspCode); - SCH_ERR_JRET(schHandleResponseMsg(pJob, pTask, msgType, pMsg->pData, pMsg->len, rspCode)); - -_return: - if (pJob) { - schReleaseJob(pParam->refId); - } - - taosMemoryFreeClear(param); - SCH_RET(code); -} - -int32_t schHandleSubmitCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_SUBMIT_RSP, code); -} - -int32_t schHandleCreateTableCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_CREATE_TABLE_RSP, code); -} - -int32_t schHandleDropTableCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_DROP_TABLE_RSP, code); -} - -int32_t schHandleAlterTableCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_ALTER_TABLE_RSP, code); -} - -int32_t schHandleQueryCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_QUERY_RSP, code); -} - -int32_t schHandleFetchCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_FETCH_RSP, code); -} - -int32_t schHandleReadyCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_RES_READY_RSP, code); -} - -int32_t schHandleExplainCallback(void *param, const SDataBuf *pMsg, int32_t code) { - return schHandleCallback(param, pMsg, TDMT_VND_EXPLAIN_RSP, code); -} - -int32_t schHandleDropCallback(void *param, const SDataBuf *pMsg, int32_t code) { - SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; - qDebug("QID:%" PRIx64 ",TID:%" PRIx64 " drop task rsp received, code:%x", pParam->queryId, pParam->taskId, code); - return TSDB_CODE_SUCCESS; -} - -int32_t schHandleHbCallback(void *param, const SDataBuf *pMsg, int32_t code) { - SSchedulerHbRsp rsp = {0}; - SSchTaskCallbackParam *pParam = (SSchTaskCallbackParam *)param; - - if (code) { - qError("hb rsp error:%s", tstrerror(code)); - SCH_ERR_JRET(code); - } - - if (tDeserializeSSchedulerHbRsp(pMsg->pData, pMsg->len, &rsp)) { - qError("invalid hb rsp msg, size:%d", pMsg->len); - SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - - SSchTrans trans = {0}; - trans.transInst = pParam->transport; - trans.transHandle = pMsg->handle; - - SCH_ERR_JRET(schUpdateHbConnection(&rsp.epId, &trans)); - - int32_t taskNum = (int32_t)taosArrayGetSize(rsp.taskStatus); - qDebug("%d task status in hb rsp, nodeId:%d, fqdn:%s, port:%d", taskNum, rsp.epId.nodeId, rsp.epId.ep.fqdn, - rsp.epId.ep.port); - - for (int32_t i = 0; i < taskNum; ++i) { - STaskStatus *taskStatus = taosArrayGet(rsp.taskStatus, i); - - SSchJob *pJob = schAcquireJob(taskStatus->refId); - if (NULL == pJob) { - qWarn("job not found, refId:0x%" PRIx64 ",QID:0x%" PRIx64 ",TID:0x%" PRIx64, taskStatus->refId, - taskStatus->queryId, taskStatus->taskId); - // TODO DROP TASK FROM SERVER!!!! - continue; - } - - // TODO - - SCH_JOB_DLOG("TID:0x%" PRIx64 " task status in server: %s", taskStatus->taskId, - jobTaskStatusStr(taskStatus->status)); - - schReleaseJob(taskStatus->refId); - } - -_return: - - tFreeSSchedulerHbRsp(&rsp); - taosMemoryFree(param); - - SCH_RET(code); -} - -int32_t schHandleLinkBrokenCallback(void *param, const SDataBuf *pMsg, int32_t code) { - SSchCallbackParamHeader *head = (SSchCallbackParamHeader *)param; - rpcReleaseHandle(pMsg->handle, TAOS_CONN_CLIENT); - - qDebug("handle %p is broken", pMsg->handle); - - if (head->isHbParam) { - SSchHbCallbackParam *hbParam = (SSchHbCallbackParam *)param; - SSchTrans trans = {.transInst = hbParam->transport, .transHandle = NULL}; - SCH_ERR_RET(schUpdateHbConnection(&hbParam->nodeEpId, &trans)); - - SCH_ERR_RET(schBuildAndSendHbMsg(&hbParam->nodeEpId)); - } else { - SCH_ERR_RET(schHandleCallback(param, pMsg, TDMT_SCH_LINK_BROKEN, code)); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { - switch (msgType) { - case TDMT_VND_CREATE_TABLE: - *fp = schHandleCreateTableCallback; - break; - case TDMT_VND_DROP_TABLE: - *fp = schHandleDropTableCallback; - break; - case TDMT_VND_ALTER_TABLE: - *fp = schHandleAlterTableCallback; - break; - case TDMT_VND_SUBMIT: - *fp = schHandleSubmitCallback; - break; - case TDMT_VND_QUERY: - *fp = schHandleQueryCallback; - break; - case TDMT_VND_RES_READY: - *fp = schHandleReadyCallback; - break; - case TDMT_VND_EXPLAIN: - *fp = schHandleExplainCallback; - break; - case TDMT_VND_FETCH: - *fp = schHandleFetchCallback; - break; - case TDMT_VND_DROP_TASK: - *fp = schHandleDropCallback; - break; - case TDMT_VND_QUERY_HEARTBEAT: - *fp = schHandleHbCallback; - break; - case TDMT_SCH_LINK_BROKEN: - *fp = schHandleLinkBrokenCallback; - break; - default: - qError("unknown msg type for callback, msgType:%d", msgType); - SCH_ERR_RET(TSDB_CODE_QRY_APP_ERROR); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t schGenerateTaskCallBackAHandle(SSchJob *pJob, SSchTask *pTask, int32_t msgType, SMsgSendInfo **pMsgSendInfo) { - int32_t code = 0; - SMsgSendInfo *msgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); - if (NULL == msgSendInfo) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); - if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - __async_send_cb_fn_t fp = NULL; - SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); - - param->queryId = pJob->queryId; - param->refId = pJob->refId; - param->taskId = SCH_TASK_ID(pTask); - param->transport = pJob->transport; - - msgSendInfo->param = param; - msgSendInfo->fp = fp; - - *pMsgSendInfo = msgSendInfo; - - return TSDB_CODE_SUCCESS; - -_return: - - taosMemoryFree(param); - taosMemoryFree(msgSendInfo); - - SCH_RET(code); -} - -void schFreeRpcCtxVal(const void *arg) { - if (NULL == arg) { - return; - } - - SMsgSendInfo *pMsgSendInfo = (SMsgSendInfo *)arg; - taosMemoryFreeClear(pMsgSendInfo->param); - taosMemoryFreeClear(pMsgSendInfo); -} - -int32_t schMakeTaskCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { - SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); - if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchTaskCallbackParam)); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - param->queryId = pJob->queryId; - param->refId = pJob->refId; - param->taskId = SCH_TASK_ID(pTask); - param->transport = pJob->transport; - - *pParam = param; - - return TSDB_CODE_SUCCESS; -} - -int32_t schMakeHbCallbackParam(SSchJob *pJob, SSchTask *pTask, void **pParam) { - SSchHbCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchHbCallbackParam)); - if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - param->head.isHbParam = true; - - SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - - param->nodeEpId.nodeId = addr->nodeId; - memcpy(¶m->nodeEpId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); - param->transport = pJob->transport; - - *pParam = param; - - return TSDB_CODE_SUCCESS; -} - -int32_t schMakeBrokenLinkVal(SSchJob *pJob, SSchTask *pTask, SRpcBrokenlinkVal *brokenVal, bool isHb) { - int32_t code = 0; - SMsgSendInfo *pMsgSendInfo = NULL; - - pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); - if (NULL == pMsgSendInfo) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - if (isHb) { - SCH_ERR_JRET(schMakeHbCallbackParam(pJob, pTask, &pMsgSendInfo->param)); - } else { - SCH_ERR_JRET(schMakeTaskCallbackParam(pJob, pTask, &pMsgSendInfo->param)); - } - - int32_t msgType = TDMT_SCH_LINK_BROKEN; - __async_send_cb_fn_t fp = NULL; - SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); - - pMsgSendInfo->fp = fp; - - brokenVal->msgType = msgType; - brokenVal->val = pMsgSendInfo; - brokenVal->clone = schCloneSMsgSendInfo; - brokenVal->freeFunc = schFreeRpcCtxVal; - - return TSDB_CODE_SUCCESS; - -_return: - - taosMemoryFreeClear(pMsgSendInfo->param); - taosMemoryFreeClear(pMsgSendInfo); - - SCH_RET(code); -} - -int32_t schMakeQueryRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { - int32_t code = 0; - SMsgSendInfo *pReadyMsgSendInfo = NULL; - SMsgSendInfo *pExplainMsgSendInfo = NULL; - - pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); - if (NULL == pCtx->args) { - SCH_TASK_ELOG("taosHashInit %d RpcCtx failed", 1); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_ERR_JRET(schGenerateTaskCallBackAHandle(pJob, pTask, TDMT_VND_RES_READY, &pReadyMsgSendInfo)); - SCH_ERR_JRET(schGenerateTaskCallBackAHandle(pJob, pTask, TDMT_VND_EXPLAIN, &pExplainMsgSendInfo)); - - int32_t msgType = TDMT_VND_RES_READY_RSP; - SRpcCtxVal ctxVal = {.val = pReadyMsgSendInfo, .clone = schCloneSMsgSendInfo, .freeFunc = schFreeRpcCtxVal}; - if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { - SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - msgType = TDMT_VND_EXPLAIN_RSP; - ctxVal.val = pExplainMsgSendInfo; - if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { - SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_ERR_JRET(schMakeBrokenLinkVal(pJob, pTask, &pCtx->brokenVal, false)); - - return TSDB_CODE_SUCCESS; - -_return: - - taosHashCleanup(pCtx->args); - - if (pReadyMsgSendInfo) { - taosMemoryFreeClear(pReadyMsgSendInfo->param); - taosMemoryFreeClear(pReadyMsgSendInfo); - } - - if (pExplainMsgSendInfo) { - taosMemoryFreeClear(pExplainMsgSendInfo->param); - taosMemoryFreeClear(pExplainMsgSendInfo); - } - - SCH_RET(code); -} - -int32_t schMakeHbRpcCtx(SSchJob *pJob, SSchTask *pTask, SRpcCtx *pCtx) { - int32_t code = 0; - SSchHbCallbackParam *param = NULL; - SMsgSendInfo *pMsgSendInfo = NULL; - SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - SQueryNodeEpId epId = {0}; - - epId.nodeId = addr->nodeId; - memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); - - pCtx->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); - if (NULL == pCtx->args) { - SCH_TASK_ELOG("taosHashInit %d RpcCtx failed", 1); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); - if (NULL == pMsgSendInfo) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SMsgSendInfo)); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - param = taosMemoryCalloc(1, sizeof(SSchHbCallbackParam)); - if (NULL == param) { - SCH_TASK_ELOG("calloc %d failed", (int32_t)sizeof(SSchHbCallbackParam)); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - int32_t msgType = TDMT_VND_QUERY_HEARTBEAT_RSP; - __async_send_cb_fn_t fp = NULL; - SCH_ERR_JRET(schGetCallbackFp(TDMT_VND_QUERY_HEARTBEAT, &fp)); - - param->nodeEpId = epId; - param->transport = pJob->transport; - - pMsgSendInfo->param = param; - pMsgSendInfo->fp = fp; - - SRpcCtxVal ctxVal = {.val = pMsgSendInfo, .clone = schCloneSMsgSendInfo, .freeFunc = schFreeRpcCtxVal}; - if (taosHashPut(pCtx->args, &msgType, sizeof(msgType), &ctxVal, sizeof(ctxVal))) { - SCH_TASK_ELOG("taosHashPut msg %d to rpcCtx failed", msgType); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SCH_ERR_JRET(schMakeBrokenLinkVal(pJob, pTask, &pCtx->brokenVal, true)); - - return TSDB_CODE_SUCCESS; - -_return: - - taosHashCleanup(pCtx->args); - taosMemoryFreeClear(param); - taosMemoryFreeClear(pMsgSendInfo); - - SCH_RET(code); -} - -int32_t schRegisterHbConnection(SSchJob *pJob, SSchTask *pTask, SQueryNodeEpId *epId, bool *exist) { - int32_t code = 0; - SSchHbTrans hb = {0}; - - hb.trans.transInst = pJob->transport; - - SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &hb.rpcCtx)); - - code = taosHashPut(schMgmt.hbConnections, epId, sizeof(SQueryNodeEpId), &hb, sizeof(SSchHbTrans)); - if (code) { - schFreeRpcCtx(&hb.rpcCtx); - - if (HASH_NODE_EXIST(code)) { - *exist = true; - return TSDB_CODE_SUCCESS; - } - - qError("taosHashPut hb trans failed, nodeId:%d, fqdn:%s, port:%d", epId->nodeId, epId->ep.fqdn, epId->ep.port); - SCH_ERR_RET(code); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t schCloneCallbackParam(SSchCallbackParamHeader *pSrc, SSchCallbackParamHeader **pDst) { - if (pSrc->isHbParam) { - SSchHbCallbackParam *dst = taosMemoryMalloc(sizeof(SSchHbCallbackParam)); - if (NULL == dst) { - qError("malloc SSchHbCallbackParam failed"); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - memcpy(dst, pSrc, sizeof(*dst)); - *pDst = (SSchCallbackParamHeader *)dst; - - return TSDB_CODE_SUCCESS; - } - - SSchTaskCallbackParam *dst = taosMemoryMalloc(sizeof(SSchTaskCallbackParam)); - if (NULL == dst) { - qError("malloc SSchTaskCallbackParam failed"); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - memcpy(dst, pSrc, sizeof(*dst)); - *pDst = (SSchCallbackParamHeader *)dst; - - return TSDB_CODE_SUCCESS; -} - -int32_t schCloneSMsgSendInfo(void *src, void **dst) { - SMsgSendInfo *pSrc = src; - int32_t code = 0; - SMsgSendInfo *pDst = taosMemoryMalloc(sizeof(*pSrc)); - if (NULL == pDst) { - qError("malloc SMsgSendInfo for rpcCtx failed, len:%d", (int32_t)sizeof(*pSrc)); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - memcpy(pDst, pSrc, sizeof(*pSrc)); - pDst->param = NULL; - - SCH_ERR_JRET(schCloneCallbackParam(pSrc->param, (SSchCallbackParamHeader **)&pDst->param)); - - *dst = pDst; - - return TSDB_CODE_SUCCESS; - -_return: - - taosMemoryFreeClear(pDst); - SCH_RET(code); -} - -int32_t schCloneHbRpcCtx(SRpcCtx *pSrc, SRpcCtx *pDst) { - int32_t code = 0; - memcpy(&pDst->brokenVal, &pSrc->brokenVal, sizeof(pSrc->brokenVal)); - pDst->brokenVal.val = NULL; - - SCH_ERR_RET(schCloneSMsgSendInfo(pSrc->brokenVal.val, &pDst->brokenVal.val)); - - pDst->args = taosHashInit(1, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); - if (NULL == pDst->args) { - qError("taosHashInit %d RpcCtx failed", 1); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SRpcCtxVal dst = {0}; - void *pIter = taosHashIterate(pSrc->args, NULL); - while (pIter) { - SRpcCtxVal *pVal = (SRpcCtxVal *)pIter; - int32_t *msgType = taosHashGetKey(pIter, NULL); - - dst = *pVal; - dst.val = NULL; - - SCH_ERR_JRET(schCloneSMsgSendInfo(pVal->val, &dst.val)); - - if (taosHashPut(pDst->args, msgType, sizeof(*msgType), &dst, sizeof(dst))) { - qError("taosHashPut msg %d to rpcCtx failed", *msgType); - (*dst.freeFunc)(dst.val); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pIter = taosHashIterate(pSrc->args, pIter); - } - - return TSDB_CODE_SUCCESS; - -_return: - - schFreeRpcCtx(pDst); - SCH_RET(code); -} - -int32_t schAsyncSendMsg(SSchJob *pJob, SSchTask *pTask, void *transport, SEpSet *epSet, int32_t msgType, void *msg, - uint32_t msgSize, bool persistHandle, SRpcCtx *ctx) { - int32_t code = 0; - - SSchTrans *trans = (SSchTrans *)transport; - - SMsgSendInfo *pMsgSendInfo = NULL; - SCH_ERR_JRET(schGenerateTaskCallBackAHandle(pJob, pTask, msgType, &pMsgSendInfo)); - - pMsgSendInfo->msgInfo.pData = msg; - pMsgSendInfo->msgInfo.len = msgSize; - pMsgSendInfo->msgInfo.handle = trans->transHandle; - pMsgSendInfo->msgType = msgType; - - qDebug("start to send %s msg to node[%d,%s,%d], refId:%" PRIx64 "instance:%p, handle:%p", TMSG_INFO(msgType), - ntohl(((SMsgHead *)msg)->vgId), epSet->eps[epSet->inUse].fqdn, epSet->eps[epSet->inUse].port, pJob->refId, - trans->transInst, trans->transHandle); - - int64_t transporterId = 0; - code = asyncSendMsgToServerExt(trans->transInst, epSet, &transporterId, pMsgSendInfo, persistHandle, ctx); - if (code) { - SCH_ERR_JRET(code); - } - - SCH_TASK_DLOG("req msg sent, refId:%" PRIx64 ", type:%d, %s", pJob->refId, msgType, TMSG_INFO(msgType)); - return TSDB_CODE_SUCCESS; - -_return: - - if (pMsgSendInfo) { - taosMemoryFreeClear(pMsgSendInfo->param); - taosMemoryFreeClear(pMsgSendInfo); - } - - SCH_RET(code); -} - -int32_t schBuildAndSendHbMsg(SQueryNodeEpId *nodeEpId) { - SSchedulerHbReq req = {0}; - int32_t code = 0; - SRpcCtx rpcCtx = {0}; - SSchTrans trans = {0}; - int32_t msgType = TDMT_VND_QUERY_HEARTBEAT; - - req.header.vgId = nodeEpId->nodeId; - req.sId = schMgmt.sId; - memcpy(&req.epId, nodeEpId, sizeof(SQueryNodeEpId)); - - SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, nodeEpId, sizeof(SQueryNodeEpId)); - if (NULL == hb) { - qError("taosHashGet hb connection failed, nodeId:%d, fqdn:%s, port:%d", nodeEpId->nodeId, nodeEpId->ep.fqdn, - nodeEpId->ep.port); - SCH_ERR_RET(code); - } - - SCH_LOCK(SCH_WRITE, &hb->lock); - code = schCloneHbRpcCtx(&hb->rpcCtx, &rpcCtx); - memcpy(&trans, &hb->trans, sizeof(trans)); - SCH_UNLOCK(SCH_WRITE, &hb->lock); - - SCH_ERR_RET(code); - - int32_t msgSize = tSerializeSSchedulerHbReq(NULL, 0, &req); - if (msgSize < 0) { - qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - void *msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - qError("calloc hb req %d failed", msgSize); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - if (tSerializeSSchedulerHbReq(msg, msgSize, &req) < 0) { - qError("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SMsgSendInfo *pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); - if (NULL == pMsgSendInfo) { - qError("calloc SMsgSendInfo failed"); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SSchTaskCallbackParam *param = taosMemoryCalloc(1, sizeof(SSchTaskCallbackParam)); - if (NULL == param) { - qError("calloc SSchTaskCallbackParam failed"); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - __async_send_cb_fn_t fp = NULL; - SCH_ERR_JRET(schGetCallbackFp(msgType, &fp)); - - param->transport = trans.transInst; - - pMsgSendInfo->param = param; - pMsgSendInfo->msgInfo.pData = msg; - pMsgSendInfo->msgInfo.len = msgSize; - pMsgSendInfo->msgInfo.handle = trans.transHandle; - pMsgSendInfo->msgType = msgType; - pMsgSendInfo->fp = fp; - - int64_t transporterId = 0; - SEpSet epSet = {.inUse = 0, .numOfEps = 1}; - memcpy(&epSet.eps[0], &nodeEpId->ep, sizeof(nodeEpId->ep)); - - qDebug("start to send hb msg, instance:%p, handle:%p, fqdn:%s, port:%d", trans.transInst, trans.transHandle, - nodeEpId->ep.fqdn, nodeEpId->ep.port); - - code = asyncSendMsgToServerExt(trans.transInst, &epSet, &transporterId, pMsgSendInfo, true, &rpcCtx); - if (code) { - qError("fail to send hb msg, instance:%p, handle:%p, fqdn:%s, port:%d, error:%x - %s", trans.transInst, - trans.transHandle, nodeEpId->ep.fqdn, nodeEpId->ep.port, code, tstrerror(code)); - SCH_ERR_JRET(code); - } - - qDebug("hb msg sent"); - return TSDB_CODE_SUCCESS; - -_return: - - taosMemoryFreeClear(msg); - taosMemoryFreeClear(param); - taosMemoryFreeClear(pMsgSendInfo); - schFreeRpcCtx(&rpcCtx); - SCH_RET(code); -} - -int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, int32_t msgType) { - uint32_t msgSize = 0; - void *msg = NULL; - int32_t code = 0; - bool isCandidateAddr = false; - bool persistHandle = false; - SRpcCtx rpcCtx = {0}; - - if (NULL == addr) { - addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - isCandidateAddr = true; - } - - SEpSet epSet = addr->epSet; - - switch (msgType) { - case TDMT_VND_CREATE_TABLE: - case TDMT_VND_DROP_TABLE: - case TDMT_VND_ALTER_TABLE: - case TDMT_VND_SUBMIT: { - msgSize = pTask->msgLen; - msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - SCH_TASK_ELOG("calloc %d failed", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - memcpy(msg, pTask->msg, msgSize); - break; - } - - case TDMT_VND_QUERY: { - SCH_ERR_RET(schMakeQueryRpcCtx(pJob, pTask, &rpcCtx)); - - uint32_t len = strlen(pJob->sql); - msgSize = sizeof(SSubQueryMsg) + pTask->msgLen + len; - msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - SCH_TASK_ELOG("calloc %d failed", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SSubQueryMsg *pMsg = msg; - pMsg->header.vgId = htonl(addr->nodeId); - pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(pJob->queryId); - pMsg->taskId = htobe64(pTask->taskId); - pMsg->refId = htobe64(pJob->refId); - pMsg->taskType = TASK_TYPE_TEMP; - pMsg->explain = SCH_IS_EXPLAIN_JOB(pJob); - pMsg->phyLen = htonl(pTask->msgLen); - pMsg->sqlLen = htonl(len); - - memcpy(pMsg->msg, pJob->sql, len); - memcpy(pMsg->msg + len, pTask->msg, pTask->msgLen); - - persistHandle = true; - break; - } - - case TDMT_VND_RES_READY: { - msgSize = sizeof(SResReadyReq); - msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - SCH_TASK_ELOG("calloc %d failed", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SResReadyReq *pMsg = msg; - - pMsg->header.vgId = htonl(addr->nodeId); - - pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(pJob->queryId); - pMsg->taskId = htobe64(pTask->taskId); - break; - } - case TDMT_VND_FETCH: { - msgSize = sizeof(SResFetchReq); - msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - SCH_TASK_ELOG("calloc %d failed", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SResFetchReq *pMsg = msg; - - pMsg->header.vgId = htonl(addr->nodeId); - - pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(pJob->queryId); - pMsg->taskId = htobe64(pTask->taskId); - - break; - } - case TDMT_VND_DROP_TASK: { - msgSize = sizeof(STaskDropReq); - msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - SCH_TASK_ELOG("calloc %d failed", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - STaskDropReq *pMsg = msg; - - pMsg->header.vgId = htonl(addr->nodeId); - - pMsg->sId = htobe64(schMgmt.sId); - pMsg->queryId = htobe64(pJob->queryId); - pMsg->taskId = htobe64(pTask->taskId); - pMsg->refId = htobe64(pJob->refId); - break; - } - case TDMT_VND_QUERY_HEARTBEAT: { - SCH_ERR_RET(schMakeHbRpcCtx(pJob, pTask, &rpcCtx)); - - SSchedulerHbReq req = {0}; - req.sId = schMgmt.sId; - req.header.vgId = addr->nodeId; - req.epId.nodeId = addr->nodeId; - memcpy(&req.epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); - - msgSize = tSerializeSSchedulerHbReq(NULL, 0, &req); - if (msgSize < 0) { - SCH_JOB_ELOG("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - msg = taosMemoryCalloc(1, msgSize); - if (NULL == msg) { - SCH_JOB_ELOG("calloc %d failed", msgSize); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - if (tSerializeSSchedulerHbReq(msg, msgSize, &req) < 0) { - SCH_JOB_ELOG("tSerializeSSchedulerHbReq hbReq failed, size:%d", msgSize); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - persistHandle = true; - break; - } - default: - SCH_TASK_ELOG("unknown msg type to send, msgType:%d", msgType); - SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR); - break; - } - - SCH_SET_TASK_LASTMSG_TYPE(pTask, msgType); - - SSchTrans trans = {.transInst = pJob->transport, .transHandle = SCH_GET_TASK_HANDLE(pTask)}; - SCH_ERR_JRET(schAsyncSendMsg(pJob, pTask, &trans, &epSet, msgType, msg, msgSize, persistHandle, - (rpcCtx.args ? &rpcCtx : NULL))); - - if (msgType == TDMT_VND_QUERY) { - SCH_ERR_RET(schRecordTaskExecNode(pJob, pTask, addr, trans.transHandle)); - } - - return TSDB_CODE_SUCCESS; - -_return: - - SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); - schFreeRpcCtx(&rpcCtx); - - taosMemoryFreeClear(msg); - SCH_RET(code); -} - -int32_t schEnsureHbConnection(SSchJob *pJob, SSchTask *pTask) { - SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - SQueryNodeEpId epId = {0}; - - epId.nodeId = addr->nodeId; - memcpy(&epId.ep, SCH_GET_CUR_EP(addr), sizeof(SEp)); - -#if 1 - SSchHbTrans *hb = taosHashGet(schMgmt.hbConnections, &epId, sizeof(SQueryNodeEpId)); - if (NULL == hb) { - bool exist = false; - SCH_ERR_RET(schRegisterHbConnection(pJob, pTask, &epId, &exist)); - if (!exist) { - SCH_ERR_RET(schBuildAndSendHbMsg(&epId)); - } - } -#endif - - return TSDB_CODE_SUCCESS; -} - -int32_t schLaunchTaskImpl(SSchJob *pJob, SSchTask *pTask) { - int8_t status = 0; - int32_t code = 0; - - atomic_add_fetch_32(&pTask->level->taskLaunchedNum, 1); - - if (schJobNeedToStop(pJob, &status)) { - SCH_TASK_DLOG("no need to launch task cause of job status, job status:%s", jobTaskStatusStr(status)); - - SCH_RET(atomic_load_32(&pJob->errCode)); - } - - // NOTE: race condition: the task should be put into the hash table before send msg to server - if (SCH_GET_TASK_STATUS(pTask) != JOB_TASK_STATUS_EXECUTING) { - SCH_ERR_RET(schPushTaskToExecList(pJob, pTask)); - SCH_SET_TASK_STATUS(pTask, JOB_TASK_STATUS_EXECUTING); - } - - SSubplan *plan = pTask->plan; - - if (NULL == pTask->msg) { // TODO add more detailed reason for failure - code = qSubPlanToString(plan, &pTask->msg, &pTask->msgLen); - if (TSDB_CODE_SUCCESS != code) { - SCH_TASK_ELOG("failed to create physical plan, code:%s, msg:%p, len:%d", tstrerror(code), pTask->msg, - pTask->msgLen); - SCH_ERR_RET(code); - } else { - SCH_TASK_DLOGL("physical plan len:%d, %s", pTask->msgLen, pTask->msg); - } - } - - SCH_ERR_RET(schSetTaskCandidateAddrs(pJob, pTask)); - - if (SCH_IS_QUERY_JOB(pJob)) { - SCH_ERR_RET(schEnsureHbConnection(pJob, pTask)); - } - - SCH_ERR_RET(schBuildAndSendMsg(pJob, pTask, NULL, plan->msgType)); - - return TSDB_CODE_SUCCESS; -} - -// Note: no more error processing, handled in function internal -int32_t schLaunchTask(SSchJob *pJob, SSchTask *pTask) { - bool enough = false; - int32_t code = 0; - - SCH_SET_TASK_HANDLE(pTask, NULL); - - if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { - SCH_ERR_JRET(schCheckIncTaskFlowQuota(pJob, pTask, &enough)); - - if (enough) { - SCH_ERR_JRET(schLaunchTaskImpl(pJob, pTask)); - } - } else { - SCH_ERR_JRET(schLaunchTaskImpl(pJob, pTask)); - } - - return TSDB_CODE_SUCCESS; - -_return: - - SCH_RET(schProcessOnTaskFailure(pJob, pTask, code)); -} - -int32_t schLaunchLevelTasks(SSchJob *pJob, SSchLevel *level) { - for (int32_t i = 0; i < level->taskNum; ++i) { - SSchTask *pTask = taosArrayGet(level->subTasks, i); - - SCH_ERR_RET(schLaunchTask(pJob, pTask)); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t schLaunchJob(SSchJob *pJob) { - SSchLevel *level = taosArrayGet(pJob->levels, pJob->levelIdx); - - SCH_ERR_RET(schCheckAndUpdateJobStatus(pJob, JOB_TASK_STATUS_EXECUTING)); - - SCH_ERR_RET(schCheckJobNeedFlowCtrl(pJob, level)); - - SCH_ERR_RET(schLaunchLevelTasks(pJob, level)); - - return TSDB_CODE_SUCCESS; -} - -void schDropTaskOnExecutedNode(SSchJob *pJob, SSchTask *pTask) { - if (NULL == pTask->execNodes) { - SCH_TASK_DLOG("no exec address, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - return; - } - - int32_t size = (int32_t)taosArrayGetSize(pTask->execNodes); - - if (size <= 0) { - SCH_TASK_DLOG("task has no execNodes, no need to drop it, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); - return; - } - - SSchNodeInfo *nodeInfo = NULL; - for (int32_t i = 0; i < size; ++i) { - nodeInfo = (SSchNodeInfo *)taosArrayGet(pTask->execNodes, i); - SCH_SET_TASK_HANDLE(pTask, nodeInfo->handle); - - schBuildAndSendMsg(pJob, pTask, &nodeInfo->addr, TDMT_VND_DROP_TASK); - } - - SCH_TASK_DLOG("task has %d exec address", size); -} - -void schDropTaskInHashList(SSchJob *pJob, SHashObj *list) { - if (!SCH_IS_NEED_DROP_JOB(pJob)) { - return; - } - - void *pIter = taosHashIterate(list, NULL); - while (pIter) { - SSchTask *pTask = *(SSchTask **)pIter; - - schDropTaskOnExecutedNode(pJob, pTask); - - pIter = taosHashIterate(list, pIter); - } -} - -void schDropJobAllTasks(SSchJob *pJob) { - schDropTaskInHashList(pJob, pJob->execTasks); - schDropTaskInHashList(pJob, pJob->succTasks); - schDropTaskInHashList(pJob, pJob->failTasks); -} - -int32_t schCancelJob(SSchJob *pJob) { - // TODO - return TSDB_CODE_SUCCESS; - // TODO MOVE ALL TASKS FROM EXEC LIST TO FAIL LIST -} - -void schCloseJobRef(void) { - if (!atomic_load_8((int8_t *)&schMgmt.exit)) { - return; - } - - SCH_LOCK(SCH_WRITE, &schMgmt.lock); - if (atomic_load_32(&schMgmt.jobNum) <= 0 && schMgmt.jobRef >= 0) { - taosCloseRef(schMgmt.jobRef); - schMgmt.jobRef = -1; - } - SCH_UNLOCK(SCH_WRITE, &schMgmt.lock); -} - -void schFreeJobImpl(void *job) { - if (NULL == job) { - return; - } - - SSchJob *pJob = job; - uint64_t queryId = pJob->queryId; - int64_t refId = pJob->refId; - - if (pJob->status == JOB_TASK_STATUS_EXECUTING) { - schCancelJob(pJob); - } - - schDropJobAllTasks(pJob); - - pJob->subPlans = NULL; // it is a reference to pDag->pSubplans - - int32_t numOfLevels = taosArrayGetSize(pJob->levels); - for (int32_t i = 0; i < numOfLevels; ++i) { - SSchLevel *pLevel = taosArrayGet(pJob->levels, i); - - int32_t numOfTasks = taosArrayGetSize(pLevel->subTasks); - for (int32_t j = 0; j < numOfTasks; ++j) { - SSchTask *pTask = taosArrayGet(pLevel->subTasks, j); - schFreeTask(pTask); - } - - taosArrayDestroy(pLevel->subTasks); - } - - schFreeFlowCtrl(pJob); - - taosHashCleanup(pJob->execTasks); - taosHashCleanup(pJob->failTasks); - taosHashCleanup(pJob->succTasks); - - taosArrayDestroy(pJob->levels); - taosArrayDestroy(pJob->nodeList); - taosArrayDestroy(pJob->dataSrcTasks); - - qExplainFreeCtx(pJob->explainCtx); - - if (SCH_IS_QUERY_JOB(pJob)) { - taosArrayDestroy((SArray *)pJob->queryRes); - } else { - tFreeSSubmitRsp((SSubmitRsp*)pJob->queryRes); - } - - taosMemoryFreeClear(pJob->resData); - taosMemoryFreeClear(pJob); - - qDebug("QID:0x%" PRIx64 " job freed, refId:%" PRIx64 ", pointer:%p", queryId, refId, pJob); - - atomic_sub_fetch_32(&schMgmt.jobNum, 1); - - schCloseJobRef(); -} - -static int32_t schExecJobImpl(void *transport, SArray *pNodeList, SQueryPlan *pDag, int64_t *job, const char *sql, - int64_t startTs, bool syncSchedule) { - qDebug("QID:0x%" PRIx64 " job started", pDag->queryId); - - if (pNodeList == NULL || taosArrayGetSize(pNodeList) <= 0) { - qDebug("QID:0x%" PRIx64 " input exec nodeList is empty", pDag->queryId); - } - - int32_t code = 0; - SSchJob *pJob = NULL; - SCH_ERR_JRET(schInitJob(&pJob, pDag, transport, pNodeList, sql, startTs, syncSchedule)); - - SCH_ERR_JRET(schLaunchJob(pJob)); - - *job = pJob->refId; - - if (syncSchedule) { - SCH_JOB_DLOG("will wait for rsp now, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); - tsem_wait(&pJob->rspSem); - } - - SCH_JOB_DLOG("job exec done, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); - - schReleaseJob(pJob->refId); - - return TSDB_CODE_SUCCESS; - -_return: - - schFreeJobImpl(pJob); - SCH_RET(code); -} - -int32_t schExecStaticExplain(void *transport, SArray *pNodeList, SQueryPlan *pDag, int64_t *job, const char *sql, - bool syncSchedule) { - qDebug("QID:0x%" PRIx64 " job started", pDag->queryId); - - int32_t code = 0; - SSchJob *pJob = taosMemoryCalloc(1, sizeof(SSchJob)); - if (NULL == pJob) { - qError("QID:%" PRIx64 " calloc %d failed", pDag->queryId, (int32_t)sizeof(SSchJob)); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - pJob->sql = sql; - pJob->attr.queryJob = true; - pJob->attr.explainMode = pDag->explainInfo.mode; - pJob->queryId = pDag->queryId; - pJob->subPlans = pDag->pSubplans; - - SCH_ERR_JRET(qExecStaticExplain(pDag, (SRetrieveTableRsp **)&pJob->resData)); - - int64_t refId = taosAddRef(schMgmt.jobRef, pJob); - if (refId < 0) { - SCH_JOB_ELOG("taosAddRef job failed, error:%s", tstrerror(terrno)); - SCH_ERR_JRET(terrno); - } - - if (NULL == schAcquireJob(refId)) { - SCH_JOB_ELOG("schAcquireJob job failed, refId:%" PRIx64, refId); - SCH_RET(TSDB_CODE_SCH_STATUS_ERROR); - } - - pJob->refId = refId; - - SCH_JOB_DLOG("job refId:%" PRIx64, pJob->refId); - - pJob->status = JOB_TASK_STATUS_PARTIAL_SUCCEED; - *job = pJob->refId; - SCH_JOB_DLOG("job exec done, job status:%s", SCH_GET_JOB_STATUS_STR(pJob)); - - schReleaseJob(pJob->refId); - - return TSDB_CODE_SUCCESS; - -_return: - - schFreeJobImpl(pJob); - SCH_RET(code); -} - int32_t schedulerInit(SSchedulerCfg *cfg) { if (schMgmt.jobRef >= 0) { qError("scheduler already initialized"); @@ -2670,129 +113,6 @@ int32_t schedulerAsyncExecJob(void *transport, SArray *pNodeList, SQueryPlan *pD return TSDB_CODE_SUCCESS; } -#if 0 -int32_t schedulerConvertDagToTaskList(SQueryPlan* pDag, SArray **pTasks) { - if (NULL == pDag || pDag->numOfSubplans <= 0 || LIST_LENGTH(pDag->pSubplans) == 0) { - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - int32_t levelNum = LIST_LENGTH(pDag->pSubplans); - if (1 != levelNum) { - qError("invalid level num: %d", levelNum); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - SNodeListNode *plans = (SNodeListNode*)nodesListGetNode(pDag->pSubplans, 0); - int32_t taskNum = LIST_LENGTH(plans->pNodeList); - if (taskNum <= 0) { - qError("invalid task num: %d", taskNum); - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - SArray *info = taosArrayInit(taskNum, sizeof(STaskInfo)); - if (NULL == info) { - qError("taosArrayInit %d taskInfo failed", taskNum); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - STaskInfo tInfo = {0}; - char *msg = NULL; - int32_t msgLen = 0; - int32_t code = 0; - - for (int32_t i = 0; i < taskNum; ++i) { - SSubplan *plan = (SSubplan*)nodesListGetNode(plans->pNodeList, i); - tInfo.addr = plan->execNode; - - code = qSubPlanToString(plan, &msg, &msgLen); - if (TSDB_CODE_SUCCESS != code) { - qError("subplanToString error, code:%x, msg:%p, len:%d", code, msg, msgLen); - SCH_ERR_JRET(code); - } - - int32_t msgSize = sizeof(SSubQueryMsg) + msgLen; - if (NULL == msg) { - qError("calloc %d failed", msgSize); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - SSubQueryMsg* pMsg = taosMemoryCalloc(1, msgSize); - - pMsg->header.vgId = tInfo.addr.nodeId; - - pMsg->sId = schMgmt.sId; - pMsg->queryId = plan->id.queryId; - pMsg->taskId = schGenUUID(); - pMsg->taskType = TASK_TYPE_PERSISTENT; - pMsg->phyLen = msgLen; - pMsg->sqlLen = 0; - memcpy(pMsg->msg, msg, msgLen); - /*memcpy(pMsg->msg, ((SSubQueryMsg*)msg)->msg, msgLen);*/ - - tInfo.msg = pMsg; - - if (NULL == taosArrayPush(info, &tInfo)) { - qError("taosArrayPush failed, idx:%d", i); - taosMemoryFree(msg); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - } - - *pTasks = info; - info = NULL; - -_return: - schedulerFreeTaskList(info); - SCH_RET(code); -} - -int32_t schedulerCopyTask(STaskInfo *src, SArray **dst, int32_t copyNum) { - if (NULL == src || NULL == dst || copyNum <= 0) { - SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); - } - - int32_t code = 0; - - *dst = taosArrayInit(copyNum, sizeof(STaskInfo)); - if (NULL == *dst) { - qError("taosArrayInit %d taskInfo failed", copyNum); - SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - int32_t msgSize = src->msg->phyLen + sizeof(*src->msg); - STaskInfo info = {0}; - - info.addr = src->addr; - - for (int32_t i = 0; i < copyNum; ++i) { - info.msg = taosMemoryMalloc(msgSize); - if (NULL == info.msg) { - qError("malloc %d failed", msgSize); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - - memcpy(info.msg, src->msg, msgSize); - - info.msg->taskId = schGenUUID(); - - if (NULL == taosArrayPush(*dst, &info)) { - qError("taosArrayPush failed, idx:%d", i); - taosMemoryFree(info.msg); - SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY); - } - } - - return TSDB_CODE_SUCCESS; - -_return: - - schedulerFreeTaskList(*dst); - *dst = NULL; - - SCH_RET(code); -} -#endif - int32_t schedulerFetchRows(int64_t job, void **pData) { if (NULL == pData) { SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT); @@ -2848,7 +168,7 @@ int32_t schedulerFetchRows(int64_t job, void **pData) { } if (pJob->resData && ((SRetrieveTableRsp *)pJob->resData)->completed) { - SCH_ERR_JRET(schCheckAndUpdateJobStatus(pJob, JOB_TASK_STATUS_SUCCEED)); + SCH_ERR_JRET(schChkUpdateJobStatus(pJob, JOB_TASK_STATUS_SUCCEED)); } while (true) { @@ -2942,20 +262,6 @@ void schedulerFreeJob(int64_t job) { schReleaseJob(job); } -void schedulerFreeTaskList(SArray *taskList) { - if (NULL == taskList) { - return; - } - - int32_t taskNum = taosArrayGetSize(taskList); - for (int32_t i = 0; i < taskNum; ++i) { - STaskInfo *info = taosArrayGet(taskList, i); - taosMemoryFreeClear(info->msg); - } - - taosArrayDestroy(taskList); -} - void schedulerDestroy(void) { atomic_store_8((int8_t *)&schMgmt.exit, 1); From f93c4d084617671da6c7e0d73a9a3163c6e92714 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 23 May 2022 21:53:10 +0800 Subject: [PATCH 26/32] fix: follower processes the trans but does not send the action msg --- source/dnode/mnode/impl/src/mndTrans.c | 4 ++++ source/dnode/mnode/impl/src/mnode.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index f567e3f3cf..2a500829e5 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -1086,6 +1086,8 @@ static bool mndTransPerformRedoLogStage(SMnode *pMnode, STrans *pTrans) { } static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans) { + if (!mndIsMaster(pMnode)) return false; + bool continueExec = true; int32_t code = mndTransExecuteRedoActions(pMnode, pTrans); @@ -1175,6 +1177,8 @@ static bool mndTransPerformUndoLogStage(SMnode *pMnode, STrans *pTrans) { } static bool mndTransPerformUndoActionStage(SMnode *pMnode, STrans *pTrans) { + if (!mndIsMaster(pMnode)) return false; + bool continueExec = true; int32_t code = mndTransExecuteUndoActions(pMnode, pTrans); diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 285ae030e1..23abd1503b 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -86,7 +86,6 @@ static void *mndThreadFp(void *param) { lastTime++; taosMsleep(100); if (pMnode->stopped) break; - if (!mndIsMaster(pMnode)) continue; if (lastTime % (tsTransPullupInterval * 10) == 0) { mndPullupTrans(pMnode); @@ -346,7 +345,8 @@ int32_t mndProcessMsg(SRpcMsg *pMsg) { mTrace("msg:%p, will be processed, type:%s app:%p", pMsg, TMSG_INFO(pMsg->msgType), ahandle); if (IsReq(pMsg)) { - if (!mndIsMaster(pMnode)) { + if (!mndIsMaster(pMnode) && pMsg->msgType != TDMT_MND_TRANS_TIMER && pMsg->msgType != TDMT_MND_MQ_TIMER && + pMsg->msgType != TDMT_MND_TELEM_TIMER) { terrno = TSDB_CODE_APP_NOT_READY; mDebug("msg:%p, failed to process since %s, app:%p", pMsg, terrstr(), ahandle); return -1; From f0da893bfb6fc5450cec971c6e8d9846935084fc Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 23 May 2022 21:59:47 +0800 Subject: [PATCH 27/32] fix: avoid multi thread read/write crash --- source/libs/index/CMakeLists.txt | 6 +-- source/libs/index/inc/indexTfile.h | 2 +- source/libs/index/src/index.c | 31 +++++++++-- source/libs/index/src/indexCache.c | 8 ++- .../libs/index/src/indexFstCountingWriter.c | 1 + source/libs/index/src/indexTfile.c | 13 +++-- source/libs/index/test/indexTests.cc | 54 +++++++++++-------- 7 files changed, 77 insertions(+), 38 deletions(-) diff --git a/source/libs/index/CMakeLists.txt b/source/libs/index/CMakeLists.txt index d5fd574aad..7dc66e4789 100644 --- a/source/libs/index/CMakeLists.txt +++ b/source/libs/index/CMakeLists.txt @@ -31,7 +31,7 @@ if (${BUILD_WITH_INVERTEDINDEX}) endif(${BUILD_WITH_INVERTEDINDEX}) -#if (${BUILD_TEST}) -# add_subdirectory(test) -#endif(${BUILD_TEST}) +if (${BUILD_TEST}) + add_subdirectory(test) +endif(${BUILD_TEST}) diff --git a/source/libs/index/inc/indexTfile.h b/source/libs/index/inc/indexTfile.h index 9712e4b30f..85ed397b0a 100644 --- a/source/libs/index/inc/indexTfile.h +++ b/source/libs/index/inc/indexTfile.h @@ -40,7 +40,7 @@ typedef struct TFileHeader { } TFileHeader; #pragma pack(pop) -#define TFILE_HEADER_SIZE (sizeof(TFileHeader)) +#define TFILE_HEADER_SIZE (sizeof(TFileHeader)) #define TFILE_HEADER_NO_FST (TFILE_HEADER_SIZE - sizeof(int32_t)) typedef struct TFileValue { diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index 2141e90bbd..6add788a89 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -29,7 +29,7 @@ #include "lucene++/Lucene_c.h" #endif -#define INDEX_NUM_OF_THREADS 4 +#define INDEX_NUM_OF_THREADS 1 #define INDEX_QUEUE_SIZE 200 #define INDEX_DATA_BOOL_NULL 0x02 @@ -117,7 +117,6 @@ int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { sIdx->path = tstrdup(path); taosThreadMutexInit(&sIdx->mtx, NULL); tsem_init(&sIdx->sem, 0, 0); - // taosThreadCondInit(&sIdx->finished, NULL); sIdx->refId = indexAddRef(sIdx); indexAcquireRef(sIdx->refId); @@ -143,13 +142,13 @@ void indexDestroy(void* handle) { return; } void indexClose(SIndex* sIdx) { - indexReleaseRef(sIdx->refId); bool ref = 0; if (sIdx->colObj != NULL) { void* iter = taosHashIterate(sIdx->colObj, NULL); while (iter) { IndexCache** pCache = iter; indexCacheForceToMerge((void*)(*pCache)); + indexInfo("%s wait to merge", (*pCache)->colName); indexWait((void*)(sIdx)); iter = taosHashIterate(sIdx->colObj, iter); indexCacheUnRef(*pCache); @@ -157,7 +156,7 @@ void indexClose(SIndex* sIdx) { taosHashCleanup(sIdx->colObj); sIdx->colObj = NULL; } - // taosMsleep(1000 * 5); + indexReleaseRef(sIdx->refId); indexRemoveRef(sIdx->refId); } int64_t indexAddRef(void* p) { @@ -554,8 +553,29 @@ void iterateValueDestroy(IterateValue* value, bool destroy) { taosMemoryFree(value->colVal); value->colVal = NULL; } + +static int64_t indexGetAvaialbleVer(SIndex* sIdx, IndexCache* cache) { + ICacheKey key = {.suid = cache->suid, .colName = cache->colName, .nColName = strlen(cache->colName)}; + int64_t ver = CACHE_VERSION(cache); + taosThreadMutexLock(&sIdx->mtx); + TFileReader* trd = tfileCacheGet(((IndexTFile*)sIdx->tindex)->cache, &key); + if (trd != NULL) { + if (ver < trd->header.version) { + ver = trd->header.version + 1; + } else { + ver += 1; + } + indexInfo("header: %d, ver: %" PRId64 "", trd->header.version, ver); + tfileReaderUnRef(trd); + } else { + indexInfo("not found reader base %p", trd); + } + taosThreadMutexUnlock(&sIdx->mtx); + return ver; +} static int indexGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) { - int32_t version = CACHE_VERSION(cache); + int64_t version = indexGetAvaialbleVer(sIdx, cache); + indexInfo("file name version: %" PRId64 "", version); uint8_t colType = cache->type; TFileWriter* tw = tfileWriterOpen(sIdx->path, cache->suid, version, cache->colName, colType); @@ -575,6 +595,7 @@ static int indexGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) { if (reader == NULL) { return -1; } + indexInfo("success to create tfile, reopen it, %s", reader->ctx->file.buf); TFileHeader* header = &reader->header; ICacheKey key = {.suid = cache->suid, .colName = header->colName, .nColName = strlen(header->colName)}; diff --git a/source/libs/index/src/indexCache.c b/source/libs/index/src/indexCache.c index 232eca9304..d704e3876e 100644 --- a/source/libs/index/src/indexCache.c +++ b/source/libs/index/src/indexCache.c @@ -335,6 +335,9 @@ IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, in taosThreadCondInit(&cache->finished, NULL); indexCacheRef(cache); + if (idx != NULL) { + indexAcquireRef(idx->refId); + } return cache; } void indexCacheDebug(IndexCache* cache) { @@ -426,13 +429,16 @@ void indexCacheDestroy(void* cache) { if (pCache == NULL) { return; } + indexMemUnRef(pCache->mem); indexMemUnRef(pCache->imm); taosMemoryFree(pCache->colName); taosThreadMutexDestroy(&pCache->mtx); taosThreadCondDestroy(&pCache->finished); - + if (pCache->index != NULL) { + indexReleaseRef(((SIndex*)pCache->index)->refId); + } taosMemoryFree(pCache); } diff --git a/source/libs/index/src/indexFstCountingWriter.c b/source/libs/index/src/indexFstCountingWriter.c index 1d4395aff6..8ba5173602 100644 --- a/source/libs/index/src/indexFstCountingWriter.c +++ b/source/libs/index/src/indexFstCountingWriter.c @@ -97,6 +97,7 @@ WriterCtx* writerCtxCreate(WriterType type, const char* path, bool readOnly, int int64_t file_size; taosStatFile(path, &file_size, NULL); ctx->file.size = (int)file_size; + } else { // ctx->file.pFile = open(path, O_RDONLY, S_IRWXU | S_IRWXG | S_IRWXO); ctx->file.pFile = taosOpenFile(path, TD_FILE_READ); diff --git a/source/libs/index/src/indexTfile.c b/source/libs/index/src/indexTfile.c index f5b3cbf227..3d85646bd2 100644 --- a/source/libs/index/src/indexTfile.c +++ b/source/libs/index/src/indexTfile.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2019 TAOS Data, Inc. -p * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free Software Foundation. @@ -152,10 +151,13 @@ TFileReader* tfileCacheGet(TFileCache* tcache, ICacheKey* key) { char buf[128] = {0}; int32_t sz = indexSerialCacheKey(key, buf); assert(sz < sizeof(buf)); + indexInfo("Try to get key: %s", buf); TFileReader** reader = taosHashGet(tcache->tableCache, buf, sz); - if (reader == NULL) { + if (reader == NULL || *reader == NULL) { + indexInfo("failed to get key: %s", buf); return NULL; } + indexInfo("Get key: %s file: %s", buf, (*reader)->ctx->file.buf); tfileReaderRef(*reader); return *reader; @@ -165,9 +167,10 @@ void tfileCachePut(TFileCache* tcache, ICacheKey* key, TFileReader* reader) { int32_t sz = indexSerialCacheKey(key, buf); // remove last version index reader TFileReader** p = taosHashGet(tcache->tableCache, buf, sz); - if (p != NULL) { + if (p != NULL && *p != NULL) { TFileReader* oldReader = *p; taosHashRemove(tcache->tableCache, buf, sz); + indexInfo("found %s, remove file %s", buf, oldReader->ctx->file.buf); oldReader->remove = true; tfileReaderUnRef(oldReader); } @@ -180,7 +183,6 @@ TFileReader* tfileReaderCreate(WriterCtx* ctx) { if (reader == NULL) { return NULL; } - reader->ctx = ctx; if (0 != tfileReaderVerify(reader)) { @@ -202,6 +204,7 @@ TFileReader* tfileReaderCreate(WriterCtx* ctx) { tfileReaderDestroy(reader); return NULL; } + reader->remove = false; return reader; } @@ -536,7 +539,7 @@ TFileReader* tfileReaderOpen(char* path, uint64_t suid, int32_t version, const c indexError("failed to open readonly file: %s, reason: %s", fullname, terrstr()); return NULL; } - indexInfo("open read file name:%s, file size: %d", wc->file.buf, wc->file.size); + indexTrace("open read file name:%s, file size: %d", wc->file.buf, wc->file.size); TFileReader* reader = tfileReaderCreate(wc); return reader; diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index 11a25a798f..f848cee86b 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -674,10 +674,13 @@ class IndexObj { // opt numOfWrite = 0; numOfRead = 0; - indexInit(); + // indexInit(); } - int Init(const std::string& dir) { - taosRemoveDir(dir.c_str()); + int Init(const std::string& dir, bool remove = true) { + if (remove) { + taosRemoveDir(dir.c_str()); + taosMkDir(dir.c_str()); + } taosMkDir(dir.c_str()); int ret = indexOpen(&opts, dir.c_str(), &idx); if (ret != 0) { @@ -838,8 +841,11 @@ class IndexEnv2 : public ::testing::Test { initLog(); index = new IndexObj(); } - virtual void TearDown() { delete index; } - IndexObj* index; + virtual void TearDown() { + // taosMsleep(500); + delete index; + } + IndexObj* index; }; TEST_F(IndexEnv2, testIndexOpen) { std::string path = TD_TMP_DIR_PATH "test"; @@ -951,6 +957,8 @@ static void single_write_and_search(IndexObj* idx) { target = idx->SearchOne("tag2", "Test"); } static void multi_write_and_search(IndexObj* idx) { + idx->PutOne("tag1", "Hello"); + idx->PutOne("tag2", "Test"); int target = idx->SearchOne("tag1", "Hello"); target = idx->SearchOne("tag2", "Test"); idx->WriteMultiMillonData("tag1", "hello world test", 100 * 100); @@ -992,16 +1000,16 @@ TEST_F(IndexEnv2, testIndex_MultiWrite_and_MultiRead) { } } -// TEST_F(IndexEnv2, testIndex_restart) { -// std::string path = TD_TMP_DIR_PATH "cache_and_tfile"; -// if (index->Init(path) != 0) { -// } -// index->SearchOneTarget("tag1", "Hello", 10); -// index->SearchOneTarget("tag2", "Test", 10); -//} +TEST_F(IndexEnv2, testIndex_restart) { + std::string path = TD_TMP_DIR_PATH "cache_and_tfile"; + if (index->Init(path, false) != 0) { + } + index->SearchOneTarget("tag1", "Hello", 10); + index->SearchOneTarget("tag2", "Test", 10); +} // TEST_F(IndexEnv2, testIndex_restart1) { // std::string path = TD_TMP_DIR_PATH "cache_and_tfile"; -// if (index->Init(path) != 0) { +// if (index->Init(path, false) != 0) { // } // index->ReadMultiMillonData("tag1", "coding"); // index->SearchOneTarget("tag1", "Hello", 10); @@ -1018,16 +1026,16 @@ TEST_F(IndexEnv2, testIndex_MultiWrite_and_MultiRead) { // std::cout << "reader sz: " << index->SearchOne("tag1", "Hello") << std::endl; // assert(3 == index->SearchOne("tag1", "Hello")); //} -// TEST_F(IndexEnv2, testIndexMultiTag) { -// std::string path = TD_TMP_DIR_PATH "multi_tag"; -// if (index->Init(path) != 0) { -// } -// int64_t st = taosGetTimestampUs(); -// int32_t num = 1000 * 10000; -// index->WriteMultiMillonData("tag1", "xxxxxxxxxxxxxxx", num); -// std::cout << "numOfRow: " << num << "\ttime cost:" << taosGetTimestampUs() - st << std::endl; -// // index->WriteMultiMillonData("tag2", "xxxxxxxxxxxxxxxxxxxxxxxxx", 100 * 10000); -//} +TEST_F(IndexEnv2, testIndexMultiTag) { + std::string path = TD_TMP_DIR_PATH "multi_tag"; + if (index->Init(path) != 0) { + } + int64_t st = taosGetTimestampUs(); + int32_t num = 100 * 100; + index->WriteMultiMillonData("tag1", "xxxxxxxxxxxxxxx", num); + std::cout << "numOfRow: " << num << "\ttime cost:" << taosGetTimestampUs() - st << std::endl; + // index->WriteMultiMillonData("tag2", "xxxxxxxxxxxxxxxxxxxxxxxxx", 100 * 10000); +} TEST_F(IndexEnv2, testLongComVal1) { std::string path = TD_TMP_DIR_PATH "long_colVal"; if (index->Init(path) != 0) { From 43f2a51e4b01033182b5dc3b9ff0940fdb390d16 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 23 May 2022 22:08:14 +0800 Subject: [PATCH 28/32] fix(sync) sync/mnode eq msg --- source/dnode/mnode/impl/src/mndSync.c | 8 +++++++- source/dnode/vnode/src/vnd/vnodeSync.c | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index b7cae58b12..63809a3d76 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -17,7 +17,13 @@ #include "mndSync.h" #include "mndTrans.h" -int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); } +int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { + int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + } + return code; +} int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 538d3f7f87..882ee912cd 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -56,7 +56,13 @@ void vnodeSyncStart(SVnode *pVnode) { void vnodeSyncClose(SVnode *pVnode) { syncStop(pVnode->sync); } -int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); } +int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { + int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + } + return code; +} int32_t vnodeSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } From 0dfbfc7cc28071647c7f480eb56da7cf7ec9c0fb Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 23 May 2022 23:55:15 +0800 Subject: [PATCH 29/32] enh: add debug log of tsdb load block data cols --- include/common/taosdef.h | 1 + source/dnode/vnode/src/tsdb/tsdbReadImpl.c | 131 +++++++++++++++++++-- 2 files changed, 121 insertions(+), 11 deletions(-) diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 1d32c9825f..1f3a4d3323 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -96,6 +96,7 @@ extern char *qtypeStr[]; #define TSDB_PORT_HTTP 11 #undef TD_DEBUG_PRINT_ROW +#undef TD_DEBUG_PRINT_TSDB_READ_DCOLS #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c index 0773805684..5426b99730 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c @@ -252,6 +252,45 @@ static FORCE_INLINE void tsdbSwapDataCols(SDataCols *pDest, SDataCols *pSrc) { pSrc->cols = pCols; } +static void printTsdbLoadBlkData(SReadH *readh, SDataCols *pDCols, SBlock *pBlock, const char *tag, int32_t ln) { + printf("%s:%d:%" PRIi64 " ================\n", tag, ln, taosGetSelfPthreadId()); + if (pBlock) { + SDFile *pHeadf = TSDB_READ_HEAD_FILE(readh); + printf("%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + pHeadf->f.aname); + SDFile *pDFile = pBlock->last ? TSDB_READ_LAST_FILE(readh) : TSDB_READ_DATA_FILE(readh); + printf("%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + pDFile->f.aname); + } + SDataCol *pDCol = pDCols->cols + 0; + if (TSKEY_MIN == *(int64_t *)pDCol->pData) { + ASSERT(0); + } + + int rows = pDCols->numOfRows; + for (int r = 0; r < rows; ++r) { + if (pBlock) { + printf("%s:%d:%" PRIi64 ":%p:%d rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + rows, r); + } else { + printf("%s:%d:%" PRIi64 ":%s rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), "=== merge === ", rows, r); + } + + int nDataCols = pDCols->numOfCols; + int j = 0; + SCellVal sVal = {0}; + while (j < nDataCols) { + SDataCol *pDataCol = pDCols->cols + j; + tdGetColDataOfRow(&sVal, pDataCol, r, pDCols->bitmapMode); + tdSCellValPrint(&sVal, pDataCol->type); + ++j; + } + printf("\n"); + } + + fflush(stdout); +} + int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { ASSERT(pBlock->numOfSubBlocks > 0); STsdbCfg *pCfg = REPO_CFG(pReadh->pRepo); @@ -266,15 +305,23 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { } } - if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[0], TSDB_BITMODE_ONE_BIT) < 0) return -1; +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkData(pReadh, pReadh->pDCols[0], iBlock, __func__, __LINE__); +#endif for (int i = 1; i < pBlock->numOfSubBlocks; i++) { iBlock++; if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[1], TSDB_BITMODE_DEFAULT) < 0) return -1; +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkData(pReadh, pReadh->pDCols[1], iBlock, __func__, __LINE__); +#endif // TODO: use the real maxVersion to replace the UINT64_MAX to support Multi-Version if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, TD_SUPPORT_UPDATE(update), TD_VER_MAX) < 0) return -1; +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkData(pReadh, pReadh->pDCols[0], iBlock, " === MERGE === ", __LINE__); +#endif } // if ((pBlock->numOfSubBlocks == 1) && (iBlock->hasDupKey)) { // TODO: use this line if (pBlock->numOfSubBlocks == 1) { @@ -286,6 +333,9 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { } tsdbSwapDataCols(pReadh->pDCols[0], pReadh->pDCols[1]); ASSERT(pReadh->pDCols[0]->bitmapMode != 0); +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkData(pReadh, pReadh->pDCols[0], iBlock, " === UPDATE FILTER === ", __LINE__); +#endif } ASSERT(pReadh->pDCols[0]->numOfRows <= pBlock->numOfRows); @@ -295,6 +345,53 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { return 0; } +static void printTsdbLoadBlkDataCols(SReadH *readh, SDataCols *pDCols, SBlock *pBlock, const int16_t *colIds, + int numOfColsIds, const char *tag, int32_t ln) { + printf("ausp:%s:%d:%" PRIi64 " ================\n", tag, ln, taosGetSelfPthreadId()); + if (pBlock) { + SDFile *pHeadf = TSDB_READ_HEAD_FILE(readh); + printf("ausp:%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + pHeadf->f.aname); + SDFile *pDFile = pBlock->last ? TSDB_READ_LAST_FILE(readh) : TSDB_READ_DATA_FILE(readh); + printf("ausp:%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + pDFile->f.aname); + } + + int rows = pDCols->numOfRows; + for (int r = 0; r < rows; ++r) { + if (pBlock) { + printf("ausp:%s:%d:%" PRIi64 ":%p:%d rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), pBlock, + (int32_t)pBlock->len, rows, r); + } else { + printf("ausp:%s:%d:%" PRIi64 ":%s rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), "=== merge === ", rows, r); + } + + int nDataCols = pDCols->numOfCols; + int j = 0, k = 0; + SCellVal sVal = {0}; + while (j < nDataCols) { + if (k >= numOfColsIds) break; + SDataCol *pDataCol = pDCols->cols + j; + int16_t colId1 = pDataCol->colId; + int16_t colId2 = *(colIds + k); + if (colId1 < colId2) { + ++j; + } else if (colId1 > colId2) { + ++k; // colId2 not exists in SDataCols + printf("NotExists "); + } else { + tdGetColDataOfRow(&sVal, pDataCol, r, pDCols->bitmapMode); + tdSCellValPrint(&sVal, pDataCol->type); + ++j; + ++k; + } + } + printf("\n"); + } + + fflush(stdout); +} + // TODO: filter by Multi-Version int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, const int16_t *colIds, int numOfColsIds, bool mergeBitmap) { @@ -310,14 +407,25 @@ int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, } } - if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[0], colIds, numOfColsIds, TSDB_BITMODE_ONE_BIT) < 0) return -1; + if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[0], colIds, numOfColsIds, TSDB_BITMODE_ONE_BIT) < 0) + return -1; +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], iBlock, colIds, numOfColsIds, __func__, __LINE__); +#endif for (int i = 1; i < pBlock->numOfSubBlocks; i++) { iBlock++; - if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[1], colIds, numOfColsIds, TSDB_BITMODE_DEFAULT) < 0) return -1; + if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[1], colIds, numOfColsIds, TSDB_BITMODE_DEFAULT) < 0) + return -1; +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[1], iBlock, colIds, numOfColsIds, __func__, __LINE__); +#endif // TODO: use the real maxVersion to replace the UINT64_MAX to support Multi-Version if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, TD_SUPPORT_UPDATE(update), TD_VER_MAX) < 0) return -1; +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], NULL, colIds, numOfColsIds, __func__, __LINE__); +#endif } // if ((pBlock->numOfSubBlocks == 1) && (iBlock->hasDupKey)) { // TODO: use this line if (pBlock->numOfSubBlocks == 1) { @@ -329,18 +437,23 @@ int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, } tsdbSwapDataCols(pReadh->pDCols[0], pReadh->pDCols[1]); ASSERT(pReadh->pDCols[0]->bitmapMode != 0); +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], NULL, colIds, numOfColsIds, + " === update filter === ", __LINE__); +#endif } if (mergeBitmap && !tdDataColsIsBitmapI(pReadh->pDCols[0])) { for (int i = 0; i < numOfColsIds; ++i) { SDataCol *pDataCol = pReadh->pDCols[0]->cols + i; if (pDataCol->len > 0 && pDataCol->bitmap) { - ASSERT(pDataCol->colId != PRIMARYKEY_TIMESTAMP_COL_ID); - ASSERT(pDataCol->pBitmap); tdMergeBitmap(pDataCol->pBitmap, pReadh->pDCols[0]->numOfRows, pDataCol->pBitmap); tdDataColsSetBitmapI(pReadh->pDCols[0]); } } +#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS + printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], NULL, colIds, numOfColsIds, " === merge bitmap === ", __LINE__); +#endif } ASSERT(pReadh->pDCols[0]->numOfRows <= pBlock->numOfRows); @@ -551,9 +664,7 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat tdResetDataCols(pDataCols); - if (tdIsBitmapModeI(bitmapMode)) { - tdDataColsSetBitmapI(pDataCols); - } + pDataCols->bitmapMode = bitmapMode; if (tsdbMakeRoom((void **)(&TSDB_READ_BUF(pReadh)), pBlock->len) < 0) return -1; @@ -740,9 +851,7 @@ static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols * tdResetDataCols(pDataCols); - if (tdIsBitmapModeI(bitmapMode)) { - tdDataColsSetBitmapI(pDataCols); - } + pDataCols->bitmapMode = bitmapMode; // If only load timestamp column, no need to load SBlockData part if (numOfColIds > 1 && tsdbLoadBlockOffset(pReadh, pBlock) < 0) return -1; From 4a110b0170f2544f759ab31752077a430d9c842d Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 23 May 2022 23:59:16 +0800 Subject: [PATCH 30/32] enh: code optimization --- include/common/taosdef.h | 2 +- source/dnode/vnode/src/tsdb/tsdbReadImpl.c | 30 +++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 1f3a4d3323..d39c7a1215 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -96,7 +96,7 @@ extern char *qtypeStr[]; #define TSDB_PORT_HTTP 11 #undef TD_DEBUG_PRINT_ROW -#undef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#undef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c index 5426b99730..f66037b16d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c @@ -306,20 +306,20 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { } if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[0], TSDB_BITMODE_ONE_BIT) < 0) return -1; -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkData(pReadh, pReadh->pDCols[0], iBlock, __func__, __LINE__); #endif for (int i = 1; i < pBlock->numOfSubBlocks; i++) { iBlock++; if (tsdbLoadBlockDataImpl(pReadh, iBlock, pReadh->pDCols[1], TSDB_BITMODE_DEFAULT) < 0) return -1; -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkData(pReadh, pReadh->pDCols[1], iBlock, __func__, __LINE__); #endif // TODO: use the real maxVersion to replace the UINT64_MAX to support Multi-Version if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, TD_SUPPORT_UPDATE(update), TD_VER_MAX) < 0) return -1; -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkData(pReadh, pReadh->pDCols[0], iBlock, " === MERGE === ", __LINE__); #endif } @@ -333,7 +333,7 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { } tsdbSwapDataCols(pReadh->pDCols[0], pReadh->pDCols[1]); ASSERT(pReadh->pDCols[0]->bitmapMode != 0); -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkData(pReadh, pReadh->pDCols[0], iBlock, " === UPDATE FILTER === ", __LINE__); #endif } @@ -347,23 +347,23 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) { static void printTsdbLoadBlkDataCols(SReadH *readh, SDataCols *pDCols, SBlock *pBlock, const int16_t *colIds, int numOfColsIds, const char *tag, int32_t ln) { - printf("ausp:%s:%d:%" PRIi64 " ================\n", tag, ln, taosGetSelfPthreadId()); + printf("%s:%d:%" PRIi64 " ================\n", tag, ln, taosGetSelfPthreadId()); if (pBlock) { SDFile *pHeadf = TSDB_READ_HEAD_FILE(readh); - printf("ausp:%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + printf("%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, pHeadf->f.aname); SDFile *pDFile = pBlock->last ? TSDB_READ_LAST_FILE(readh) : TSDB_READ_DATA_FILE(readh); - printf("ausp:%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + printf("%s:%d:%" PRIi64 ":%p:%d %s\n", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, pDFile->f.aname); } int rows = pDCols->numOfRows; for (int r = 0; r < rows; ++r) { if (pBlock) { - printf("ausp:%s:%d:%" PRIi64 ":%p:%d rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), pBlock, - (int32_t)pBlock->len, rows, r); + printf("%s:%d:%" PRIi64 ":%p:%d rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), pBlock, (int32_t)pBlock->len, + rows, r); } else { - printf("ausp:%s:%d:%" PRIi64 ":%s rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), "=== merge === ", rows, r); + printf("%s:%d:%" PRIi64 ":%s rows[%d][%d] ", tag, ln, taosGetSelfPthreadId(), "=== merge === ", rows, r); } int nDataCols = pDCols->numOfCols; @@ -409,21 +409,21 @@ int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[0], colIds, numOfColsIds, TSDB_BITMODE_ONE_BIT) < 0) return -1; -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], iBlock, colIds, numOfColsIds, __func__, __LINE__); #endif for (int i = 1; i < pBlock->numOfSubBlocks; i++) { iBlock++; if (tsdbLoadBlockDataColsImpl(pReadh, iBlock, pReadh->pDCols[1], colIds, numOfColsIds, TSDB_BITMODE_DEFAULT) < 0) return -1; -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[1], iBlock, colIds, numOfColsIds, __func__, __LINE__); #endif // TODO: use the real maxVersion to replace the UINT64_MAX to support Multi-Version if (tdMergeDataCols(pReadh->pDCols[0], pReadh->pDCols[1], pReadh->pDCols[1]->numOfRows, NULL, TD_SUPPORT_UPDATE(update), TD_VER_MAX) < 0) return -1; -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], NULL, colIds, numOfColsIds, __func__, __LINE__); #endif } @@ -437,7 +437,7 @@ int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, } tsdbSwapDataCols(pReadh->pDCols[0], pReadh->pDCols[1]); ASSERT(pReadh->pDCols[0]->bitmapMode != 0); -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], NULL, colIds, numOfColsIds, " === update filter === ", __LINE__); #endif @@ -451,7 +451,7 @@ int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, tdDataColsSetBitmapI(pReadh->pDCols[0]); } } -#ifdef TD_DEBUG_PRINT_TSDB_READ_DCOLS +#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS printTsdbLoadBlkDataCols(pReadh, pReadh->pDCols[0], NULL, colIds, numOfColsIds, " === merge bitmap === ", __LINE__); #endif } From 924b334b00169fa4faaf339cb5191f1df1829370 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 24 May 2022 00:07:09 +0800 Subject: [PATCH 31/32] fix: avoid memory leak whilel stop sync --- source/dnode/mgmt/mgmt_mnode/inc/mmInt.h | 31 ++++++++++++--------- source/dnode/mgmt/mgmt_mnode/src/mmInt.c | 21 ++++++++++++++ source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 10 ++++++- source/dnode/mnode/impl/src/mndSync.c | 7 +++-- source/dnode/mnode/impl/src/mnode.c | 2 -- 5 files changed, 52 insertions(+), 19 deletions(-) diff --git a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h index 75e83d6547..030d4b309e 100644 --- a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h +++ b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h @@ -24,19 +24,22 @@ extern "C" { #endif typedef struct SMnodeMgmt { - SDnodeData *pData; - SMnode *pMnode; - SMsgCb msgCb; - const char *path; - const char *name; - SSingleWorker queryWorker; - SSingleWorker readWorker; - SSingleWorker writeWorker; - SSingleWorker syncWorker; - SSingleWorker monitorWorker; - SReplica replicas[TSDB_MAX_REPLICA]; - int8_t replica; - int8_t selfIndex; + SDnodeData *pData; + SMnode *pMnode; + SMsgCb msgCb; + const char *path; + const char *name; + SSingleWorker queryWorker; + SSingleWorker readWorker; + SSingleWorker writeWorker; + SSingleWorker syncWorker; + SSingleWorker monitorWorker; + SReplica replicas[TSDB_MAX_REPLICA]; + int8_t replica; + int8_t selfIndex; + bool stopped; + int32_t refCount; + TdThreadRwlock lock; } SMnodeMgmt; // mmFile.c @@ -45,6 +48,8 @@ int32_t mmWriteFile(SMnodeMgmt *pMgmt, SDCreateMnodeReq *pMsg, bool deployed); // mmInt.c int32_t mmAlter(SMnodeMgmt *pMgmt, SDAlterMnodeReq *pMsg); +int32_t mmAcquire(SMnodeMgmt *pMgmt); +void mmRelease(SMnodeMgmt *pMgmt); // mmHandle.c SArray *mmGetMsgHandles(); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmInt.c b/source/dnode/mgmt/mgmt_mnode/src/mmInt.c index 68b2889278..43113d05af 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmInt.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmInt.c @@ -110,6 +110,7 @@ static void mmClose(SMnodeMgmt *pMgmt) { if (pMgmt->pMnode != NULL) { mmStopWorker(pMgmt); mndClose(pMgmt->pMnode); + taosThreadRwlockDestroy(&pMgmt->lock); pMgmt->pMnode = NULL; } @@ -142,6 +143,7 @@ static int32_t mmOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { pMgmt->msgCb.queueFps[WRITE_QUEUE] = (PutToQueueFp)mmPutRpcMsgToWriteQueue; pMgmt->msgCb.queueFps[SYNC_QUEUE] = (PutToQueueFp)mmPutRpcMsgToSyncQueue; pMgmt->msgCb.mgmt = pMgmt; + taosThreadRwlockInit(&pMgmt->lock, NULL); bool deployed = false; if (mmReadFile(pMgmt, &deployed) != 0) { @@ -211,3 +213,22 @@ SMgmtFunc mmGetMgmtFunc() { return mgmtFunc; } + +int32_t mmAcquire(SMnodeMgmt *pMgmt) { + int32_t code = 0; + + taosThreadRwlockRdlock(&pMgmt->lock); + if (pMgmt->stopped) { + code = -1; + } else { + atomic_add_fetch_32(&pMgmt->refCount, 1); + } + taosThreadRwlockUnlock(&pMgmt->lock); + return code; +} + +void mmRelease(SMnodeMgmt *pMgmt) { + taosThreadRwlockRdlock(&pMgmt->lock); + atomic_sub_fetch_32(&pMgmt->refCount, 1); + taosThreadRwlockUnlock(&pMgmt->lock); +} \ No newline at end of file diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index f408992aa6..45dec1153f 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -111,7 +111,10 @@ int32_t mmPutRpcMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { } int32_t mmPutRpcMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { - return mmPutRpcMsgToWorker(&pMgmt->syncWorker, pMsg); + if (mmAcquire(pMgmt) != 0) return -1; + int32_t code = mmPutRpcMsgToWorker(&pMgmt->syncWorker, pMsg); + mmRelease(pMgmt); + return code; } int32_t mmStartWorker(SMnodeMgmt *pMgmt) { @@ -180,6 +183,11 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { } void mmStopWorker(SMnodeMgmt *pMgmt) { + taosThreadRwlockWrlock(&pMgmt->lock); + pMgmt->stopped = 1; + taosThreadRwlockUnlock(&pMgmt->lock); + while (pMgmt->refCount > 0) taosMsleep(10); + tSingleWorkerCleanup(&pMgmt->monitorWorker); tSingleWorkerCleanup(&pMgmt->queryWorker); tSingleWorkerCleanup(&pMgmt->readWorker); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 84da778300..e3f7b40526 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -152,9 +152,10 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { } void mndSyncStart(SMnode *pMnode) { - syncSetMsgCb(pMnode->syncMgmt.sync, &pMnode->msgCb); - syncStart(pMnode->syncMgmt.sync); - mDebug("sync:%" PRId64 " is started", pMnode->syncMgmt.sync); + SSyncMgmt *pMgmt = &pMnode->syncMgmt; + syncSetMsgCb(pMgmt->sync, &pMnode->msgCb); + syncStart(pMgmt->sync); + mDebug("sync:%" PRId64 " is started", pMgmt->sync); } void mndSyncStop(SMnode *pMnode) {} diff --git a/source/dnode/mnode/impl/src/mnode.c b/source/dnode/mnode/impl/src/mnode.c index 535d04e565..5b8c1a3101 100644 --- a/source/dnode/mnode/impl/src/mnode.c +++ b/source/dnode/mnode/impl/src/mnode.c @@ -435,8 +435,6 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { } return ret; - - return 0; } int32_t mndProcessMsg(SRpcMsg *pMsg) { From 262d28bef0ff9e9ade65658d1a57c9c3a8c5d5aa Mon Sep 17 00:00:00 2001 From: Chait Diwadkar <94201190+cdiwadkar16@users.noreply.github.com> Date: Mon, 23 May 2022 13:54:47 -0700 Subject: [PATCH 32/32] docs: stylistic and phrasing changes Seems like previous PR was overwritten and reverted to original. --- docs-en/01-index.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs-en/01-index.md b/docs-en/01-index.md index 9574323fe6..f5b7f3e0f6 100644 --- a/docs-en/01-index.md +++ b/docs-en/01-index.md @@ -4,24 +4,24 @@ sidebar_label: Documentation Home slug: / --- -TDengine is a [high-performance](https://tdengine.com/fast), [scalable](https://tdengine.com/scalable) time series database with [SQL support](https://tdengine.com/sql-support). This document is the TDengine user manual. It introduces the basic concepts, installation, features, SQL, APIs, operation, maintenance, kernel design, etc. It’s written mainly for architects, developers and system administrators. +TDengine is a [high-performance](https://tdengine.com/fast), [scalable](https://tdengine.com/scalable) time series database with [SQL support](https://tdengine.com/sql-support). This document is the TDengine user manual. It introduces the basic, as well as novel concepts, in TDengine, and also talks in detail about installation, features, SQL, APIs, operation, maintenance, kernel design and other topics. It’s written mainly for architects, developers and system administrators. -To get a global view about TDengine, like feature list, benchmarks, and competitive advantages, please browse through section [Introduction](./intro). +To get an overview of TDengine, such as a feature list, benchmarks, and competitive advantages, please browse through the [Introduction](./intro) section. -TDengine makes full use of the characteristics of time series data, proposes the concepts of "one table for one data collection point" and "super table", and designs an innovative storage engine, which greatly improves the efficiency of data ingestion, querying and storage. To understand the new concepts and use TDengine in the right way, please read [“Concepts”](./concept) thoroughly. +TDengine greatly improves the efficiency of data ingestion, querying and storage by exploiting the characteristics of time series data, introducing the novel concepts of "one table for one data collection point" and "super table", and designing an innovative storage engine. To understand the new concepts in TDengine and make full use of the features and capabilities of TDengine, please read [“Concepts”](./concept) thoroughly. -If you are a developer, please read the [“Developer Guide”](./develop) carefully. This section introduces the database connection, data modeling, data ingestion, query, continuous query, cache, data subscription, user-defined function, etc. in detail. Sample code is provided for a variety of programming languages. In most cases, you can just copy and paste the sample code, make a few changes to accommodate your application, and it will work. +If you are a developer, please read the [“Developer Guide”](./develop) carefully. This section introduces the database connection, data modeling, data ingestion, query, continuous query, cache, data subscription, user-defined functions, and other functionality in detail. Sample code is provided for a variety of programming languages. In most cases, you can just copy and paste the sample code, make a few changes to accommodate your application, and it will work. -We live in the era of big data, and scale-up is unable to meet the growing business needs. Any modern data system must have the ability to scale out, and clustering has become an indispensable feature of big data systems. The TDengine team has not only developed the cluster feature, they also decided to open source this important feature. To learn how to deploy, manage and maintain a TDengine cluster please refer to ["Cluster"](./cluster). +We live in the era of big data, and scale-up is unable to meet the growing needs of business. Any modern data system must have the ability to scale out, and clustering has become an indispensable feature of big data systems. Not only did the TDengine team develop the cluster feature, but also decided to open source this important feature. To learn how to deploy, manage and maintain a TDengine cluster please refer to ["cluster"](./cluster). -TDengine uses SQL as its query language, which greatly reduces learning costs and migration costs. In addition to the standard SQL, TDengine has extensions to support time series data scenarios better, such as roll up, interpolation, time weighted average, etc. The ["SQL Reference"](./taos-sql) chapter describes the SQL syntax in detail, and lists the various supported commands and functions. +TDengine uses ubiquitious SQL as its query language, which greatly reduces learning costs and migration costs. In addition to the standard SQL, TDengine has extensions to better support time series data analysis. These extensions include functions such as roll up, interpolation and time weighted average, among many others. The ["SQL Reference"](./taos-sql) chapter describes the SQL syntax in detail, and lists the various supported commands and functions. -If you are a system administrator who cares about installation, upgrade, fault tolerance, disaster recovery, data import, data export, system configuration, how to monitor whether TDengine is running healthily, and how to improve system performance, please refer to the ["Administration"](./operation) thoroughly. +If you are a system administrator who cares about installation, upgrade, fault tolerance, disaster recovery, data import, data export, system configuration, how to monitor whether TDengine is running healthily, and how to improve system performance, please refer to, and thoroughly read the ["Administration"](./operation) section. -If you want to know more about TDengine tools, REST API, and connectors for various programming languages, please see the ["Reference"](./reference) chapter. +If you want to know more about TDengine tools, the REST API, and connectors for various programming languages, please see the ["Reference"](./reference) chapter. If you are very interested in the internal design of TDengine, please read the chapter ["Inside TDengine”](./tdinternal), which introduces the cluster design, data partitioning, sharding, writing, and reading processes in detail. If you want to study TDengine code or even contribute code, please read this chapter carefully. -TDengine is an open source database, you are welcome to be a part of TDengine. If you find any errors in the documentation, or the description is not clear, please click "Edit this page" at the bottom of each page to edit it directly. +TDengine is an open source database, and we would love for you to be a part of TDengine. If you find any errors in the documentation, or see parts where more clarity or elaboration is needed, please click "Edit this page" at the bottom of each page to edit it directly. Together, we make a difference.