From 02dbf353547896a8120bd8f5d685b370fd97f618 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 16:57:16 +0800 Subject: [PATCH 01/53] fix(query): return TSDB_CODE_RPC_VGROUP_NOT_CONNECTED if all nodes in vgroups is offline --- include/util/taoserror.h | 1 + source/libs/transport/src/transCli.c | 9 +++++++++ source/util/src/terror.c | 1 + 3 files changed, 11 insertions(+) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 52221bdd44..f1f35a67a4 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -65,6 +65,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0017) // #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) // #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // +#define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d144a76eb0..04783605f1 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1670,6 +1670,15 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } } + // check whole vnodes is offline on this vgroup + if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { + if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { + if (pMsg->msg.msgType == TDMT_VND_SUBMIT || pMsg->msg.msgType == TDMT_SCH_QUERY) { + pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + } + } + } + STraceId* trace = &pResp->info.traceId; bool hasEpSet = cliTryExtractEpSet(pResp, &pCtx->epSet); if (hasEpSet) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 4b9dde5059..1871697ddc 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -51,6 +51,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQD TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_BROKEN_LINK, "Conn is broken") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_TIMEOUT, "Conn read timeout") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_CONNECTED, "Vgroup could not be connected") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Client and server's time is not synchronized") From 7d9e5924fa1203ca570fcfa7e7c77fa791c6133c Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 18:06:04 +0800 Subject: [PATCH 02/53] fix(query): remove check msgType --- source/libs/transport/src/transCli.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 04783605f1..62355b710b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1673,9 +1673,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { // check whole vnodes is offline on this vgroup if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { - if (pMsg->msg.msgType == TDMT_VND_SUBMIT || pMsg->msg.msgType == TDMT_SCH_QUERY) { pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; - } } } From df6102f66e588c5d60748aedcb037a58fdfaa078 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 18:14:24 +0800 Subject: [PATCH 03/53] fix(query): TSDB_CODE_RPC_VGROUP_NOT_CONNECTED same condition check with TSDB_CODE_RPC_NETWORK_UNAVAIL --- include/libs/qcom/query.h | 2 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 1 + source/dnode/mnode/impl/src/mndTrans.c | 2 +- source/libs/function/src/udfd.c | 1 + source/libs/scheduler/inc/schInt.h | 2 +- source/libs/transport/src/transCli.c | 2 +- 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 3f11d2a218..efc86d302d 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -268,7 +268,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_RESTORING || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_MNODE_NOT_FOUND) -#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) +#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define NEED_REDIRECT_ERROR(_code) \ (NO_RET_REDIRECT_ERROR(_code) || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 5e1dcc6353..fe676c769c 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -233,6 +233,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { rpcRe static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_MNODE_NOT_FOUND || + code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_VND_STOPPED || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index b92be19741..35a094cb65 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -957,7 +957,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { for (int32_t i = 0; i < size; ++i) { SRpcHandleInfo *pInfo = taosArrayGet(pTrans->pRpcArray, i); if (pInfo->handle != NULL) { - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { code = TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL; } if (i != 0 && code == 0) { diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index 40c75ce6ba..d6753a55fc 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -606,6 +606,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { } static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_SYN_NOT_LEADER || + code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_MNODE_NOT_FOUND || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 48df7e36a3..2ab37c1715 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -375,7 +375,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_WAIT(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) -#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) \ (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)) || (_task)->redirectCtx.inRedirect)) #define SCH_REDIRECT_MSGTYPE(_msgType) \ diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 62355b710b..e8b8c95eb1 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1665,7 +1665,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCode != TSDB_CODE_SUCCESS) { int32_t code = pResp->code; // return internal code app - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { pResp->code = pCtx->retryCode; } } From c66b7df5d717d26acace1e2d6d915348922d0506 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 19:13:00 +0800 Subject: [PATCH 04/53] feat(rpc): fixed build error --- include/libs/qcom/query.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index efc86d302d..98cc95facf 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -268,7 +268,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_RESTORING || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_MNODE_NOT_FOUND) -#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define NEED_REDIRECT_ERROR(_code) \ (NO_RET_REDIRECT_ERROR(_code) || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ From 8774d2c17b8e6b81dda6c18b0977b358964ec479 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 19:14:57 +0800 Subject: [PATCH 05/53] feat(rpc): fixed build error --- source/libs/scheduler/inc/schInt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 2ab37c1715..7730a722ab 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -375,7 +375,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_WAIT(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) -#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) \ (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)) || (_task)->redirectCtx.inRedirect)) #define SCH_REDIRECT_MSGTYPE(_msgType) \ From 37ea86f8ed0238dd32e0c3394a60775d1e93d392 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 21:13:33 +0800 Subject: [PATCH 06/53] fix(rpc): add msgType condition --- source/libs/transport/src/transCli.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index e8b8c95eb1..bf43b0c0d0 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1673,7 +1673,14 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { // check whole vnodes is offline on this vgroup if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { - pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + switch (pMsg->msg.msgType) { + case TDMT_VND_BATCH_META: + case TDMT_VND_SUBMIT: + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: + pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + break; + } } } From 15160544c5fec4521cfc7f1f72b083a524eca235 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 21 Dec 2022 11:48:24 +0800 Subject: [PATCH 07/53] feat(rpc): move the check msgType to client --- include/util/taoserror.h | 1 + source/client/src/clientImpl.c | 20 ++++++++++++++++++++ source/libs/transport/src/transCli.c | 15 +++++---------- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index f1f35a67a4..8d79946633 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -66,6 +66,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) // #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // #define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" +#define TSDB_CODE_RPC_VGROUP_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index d792896b2d..1e22498b50 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1424,6 +1424,26 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { memcpy((void*)tEpSet, (void*)pEpSet, sizeof(SEpSet)); } + switch (pMsg->msg.msgType) { + case TDMT_VND_BATCH_META: + case TDMT_VND_SUBMIT: + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: + // uniform to one error code: TSDB_CODE_RPC_VGROUP_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + } + break; + default: + // restore origin code + if (pMsg->code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + } else if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; + } + break; + } + AsyncArg* arg = taosMemoryCalloc(1, sizeof(AsyncArg)); arg->msg = *pMsg; arg->pEpset = tEpSet; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index bf43b0c0d0..21e66870e0 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1671,16 +1671,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } // check whole vnodes is offline on this vgroup - if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { - if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { - switch (pMsg->msg.msgType) { - case TDMT_VND_BATCH_META: - case TDMT_VND_SUBMIT: - case TDMT_SCH_QUERY: - case TDMT_SCH_MERGE_QUERY: - pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; - break; - } + if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { + if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + } else if (pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { + pResp->code = TSDB_CODE_RPC_VGROUP_BROKEN_LINK; } } From 5ca52595aedba3a85f9d94d93803750a941857f4 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 21 Dec 2022 12:20:26 +0800 Subject: [PATCH 08/53] feat(rpc): move the check msgType to client build error --- source/client/src/clientImpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 1e22498b50..0bf91c6db8 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1424,7 +1424,7 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { memcpy((void*)tEpSet, (void*)pEpSet, sizeof(SEpSet)); } - switch (pMsg->msg.msgType) { + switch (pMsg->msgType) { case TDMT_VND_BATCH_META: case TDMT_VND_SUBMIT: case TDMT_SCH_QUERY: From ced76efde0ec2b8d40eca54a23cf6b7e63df99b7 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 21 Dec 2022 12:44:29 +0800 Subject: [PATCH 09/53] feat(rpc): move the check msgType to cliet --- source/client/src/clientImpl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 0bf91c6db8..2e76d50be5 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1424,11 +1424,12 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { memcpy((void*)tEpSet, (void*)pEpSet, sizeof(SEpSet)); } + // pMsg is response msg switch (pMsg->msgType) { - case TDMT_VND_BATCH_META: - case TDMT_VND_SUBMIT: - case TDMT_SCH_QUERY: - case TDMT_SCH_MERGE_QUERY: + case TDMT_VND_BATCH_META + 1: + case TDMT_VND_SUBMIT + 1: + case TDMT_SCH_QUERY + 1: + case TDMT_SCH_MERGE_QUERY + 1: // uniform to one error code: TSDB_CODE_RPC_VGROUP_NOT_CONNECTED if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { pMsg->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; From cd1b87031a35c78bd450ca97accd9f8cc3fb3ead Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Thu, 22 Dec 2022 10:22:51 +0800 Subject: [PATCH 10/53] fix(query): modify error describe --- include/libs/qcom/query.h | 2 +- include/util/taoserror.h | 4 ++-- source/client/src/clientImpl.c | 10 +++++----- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/dnode/mnode/impl/src/mndTrans.c | 2 +- source/libs/function/src/udfd.c | 2 +- source/libs/scheduler/inc/schInt.h | 2 +- source/libs/transport/src/transCli.c | 6 +++--- source/util/src/terror.c | 2 +- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 98cc95facf..5b640dce92 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -268,7 +268,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_RESTORING || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_MNODE_NOT_FOUND) -#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) #define NEED_REDIRECT_ERROR(_code) \ (NO_RET_REDIRECT_ERROR(_code) || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 8d79946633..6dcf244823 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -65,8 +65,8 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0017) // #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) // #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // -#define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" -#define TSDB_CODE_RPC_VGROUP_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // +#define TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" +#define TSDB_CODE_RPC_SOMENODE_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 2e76d50be5..b480b0b230 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1430,16 +1430,16 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { case TDMT_VND_SUBMIT + 1: case TDMT_SCH_QUERY + 1: case TDMT_SCH_MERGE_QUERY + 1: - // uniform to one error code: TSDB_CODE_RPC_VGROUP_NOT_CONNECTED - if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; } break; default: // restore origin code - if (pMsg->code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - } else if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { + } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; } break; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index fe676c769c..2dd98f8cd1 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -233,7 +233,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { rpcRe static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_MNODE_NOT_FOUND || - code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || + code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_VND_STOPPED || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 35a094cb65..c8d22d13ea 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -957,7 +957,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { for (int32_t i = 0; i < size; ++i) { SRpcHandleInfo *pInfo = taosArrayGet(pTrans->pRpcArray, i); if (pInfo->handle != NULL) { - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { code = TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL; } if (i != 0 && code == 0) { diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index d6753a55fc..6c88e4d5c8 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -606,7 +606,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { } static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_SYN_NOT_LEADER || - code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || + code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_MNODE_NOT_FOUND || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 7730a722ab..e8216fcd7c 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -375,7 +375,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_WAIT(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) -#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) #define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) \ (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)) || (_task)->redirectCtx.inRedirect)) #define SCH_REDIRECT_MSGTYPE(_msgType) \ diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 21e66870e0..1dc79e0cfb 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1665,7 +1665,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCode != TSDB_CODE_SUCCESS) { int32_t code = pResp->code; // return internal code app - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { pResp->code = pCtx->retryCode; } } @@ -1673,9 +1673,9 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { // check whole vnodes is offline on this vgroup if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + pResp->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; } else if (pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { - pResp->code = TSDB_CODE_RPC_VGROUP_BROKEN_LINK; + pResp->code = TSDB_CODE_RPC_SOMENODE_BROKEN_LINK; } } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 1871697ddc..ec469e3c62 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -51,7 +51,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQD TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_BROKEN_LINK, "Conn is broken") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_TIMEOUT, "Conn read timeout") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_CONNECTED, "Vgroup could not be connected") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED, "some vnode/qnode/mnode(s) out of service") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Client and server's time is not synchronized") From 48b6bd438d5d10529a97f73b0656ad55f567a275 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 22 Dec 2022 17:30:02 +0800 Subject: [PATCH 11/53] fix: restart snapshot sender on receiver is restart --- include/util/taoserror.h | 1 + source/dnode/vnode/src/vnd/vnodeCommit.c | 7 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 2 +- source/dnode/vnode/src/vnd/vnodeSync.c | 4 +- source/libs/sync/src/syncSnapshot.c | 137 ++++++++++++--------- source/libs/sync/test/syncRaftLogTest2.cpp | 2 +- source/libs/sync/test/syncRaftLogTest3.cpp | 2 +- source/util/src/terror.c | 1 + 8 files changed, 90 insertions(+), 66 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 7cc1a47404..2d7b15ebda 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -517,6 +517,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SYN_STANDBY_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0912) #define TSDB_CODE_SYN_BATCH_ERROR TAOS_DEF_ERROR_CODE(0, 0x0913) #define TSDB_CODE_SYN_RESTORING TAOS_DEF_ERROR_CODE(0, 0x0914) +#define TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG TAOS_DEF_ERROR_CODE(0, 0x0915) // internal #define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF) // tq diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index be977e7cbd..f78956a431 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -234,10 +234,11 @@ int vnodeAsyncCommit(SVnode *pVnode) { _exit: if (code) { - vError("vgId:%d, %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), + vError("vgId:%d, vnode async commit failed since %s, commitId:%" PRId64, TD_VID(pVnode), tstrerror(code), pVnode->state.commitID); } else { - vDebug("vgId:%d, %s done", TD_VID(pVnode), __func__); + vInfo("vgId:%d, vnode async commit done, commitId:%" PRId64 " term:%" PRId64 " applied:%" PRId64, TD_VID(pVnode), + pVnode->state.commitID, pVnode->state.applyTerm, pVnode->state.applied); } return code; } @@ -256,7 +257,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { char dir[TSDB_FILENAME_LEN] = {0}; SVnode *pVnode = pInfo->pVnode; - vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), + vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); // persist wal before starting diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index fcfacd1ca9..dbd06d6ec0 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -423,7 +423,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { ASSERT(pHdr->index == pWriter->index + 1); pWriter->index = pHdr->index; - vInfo("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d nData:%d", TD_VID(pVnode), pHdr->index, + vInfo("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d blockLen:%d", TD_VID(pVnode), pHdr->index, pHdr->type, nData); switch (pHdr->type) { diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 5caaae502f..0437703c92 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -465,9 +465,9 @@ static int32_t vnodeSnapshotStopWrite(const SSyncFSM *pFsm, void *pWriter, bool static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *pBuf, int32_t len) { SVnode *pVnode = pFsm->data; - vDebug("vgId:%d, continue write vnode snapshot, len:%d", pVnode->config.vgId, len); + vDebug("vgId:%d, continue write vnode snapshot, blockLen:%d", pVnode->config.vgId, len); int32_t code = vnodeSnapWrite(pWriter, pBuf, len); - vDebug("vgId:%d, continue write vnode snapshot finished, len:%d", pVnode->config.vgId, len); + vDebug("vgId:%d, continue write vnode snapshot finished, blockLen:%d", pVnode->config.vgId, len); return code; } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 30324c1113..7a35f90165 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -294,7 +294,7 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { } if (snapshotSenderIsStart(pSender)) { - sSError(pSender, "snapshot sender already start, ignore"); + sSInfo(pSender, "snapshot sender already start, ignore"); return 0; } @@ -523,7 +523,7 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap static int32_t snapshotReceiverGotData(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { if (pMsg->seq != pReceiver->ack + 1) { sRError(pReceiver, "snapshot receiver invalid seq, ack:%d seq:%d", pReceiver->ack, pMsg->seq); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG; return -1; } @@ -721,8 +721,12 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS timeNow = taosGetTimestampMs(); } + int32_t code = 0; if (snapshotReceiverGotData(pReceiver, pMsg) != 0) { - return -1; + code = terrno; + if (code >= SYNC_SNAPSHOT_SEQ_INVALID) { + code = TSDB_CODE_SYN_INTERNAL_ERROR; + } } // build msg @@ -740,7 +744,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg @@ -861,7 +865,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process force stop"); snapshotReceiverForceStop(pReceiver); } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { - syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq"); + syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq data"); syncNodeOnSnapshotTransfering(pSyncNode, pMsg); } else { // error log @@ -982,68 +986,85 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { } // state, term, seq/ack - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { - // prepare , send begin msg - if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot"); - syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); - return 0; - } + if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { + sSError(pSender, "snapshot sender not leader"); + return -1; + } - if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq begin"); - if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { - return -1; - } + if (pMsg->term != pSyncNode->pRaftStore->currentTerm) { + sSError(pSender, "snapshot sender term not equal"); + return -1; + } - if (snapshotSend(pSender) != 0) { - return -1; - } - return 0; - } + if (pMsg->code != 0) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code"); + sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code); + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogReplMgrReset(pMgr); + } - // receive ack is finish, close sender - if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end"); - snapshotSenderStop(pSender, true); - SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); - syncLogReplMgrReset(pMgr); - } - return 0; - } + return -1; + } - // send next msg - if (pMsg->ack == pSender->seq) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq"); - // update sender ack - if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { - return -1; - } - if (snapshotSend(pSender) != 0) { - return -1; - } + // prepare , send begin msg + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot"); + syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); + return 0; + } - } else if (pMsg->ack == pSender->seq - 1) { - // maybe resend - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend"); - snapshotReSend(pSender); - - } else { - // error log - sSError(pSender, "snapshot sender recv error ack:%d, my seq:%d", pMsg->ack, pSender->seq); - return -1; - } - } else { - // error log - sSError(pSender, "snapshot sender term not equal"); + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq begin"); + if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { return -1; } + + if (snapshotSend(pSender) != 0) { + return -1; + } + return 0; + } + + // receive ack is finish, close sender + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end"); + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); + syncLogReplMgrReset(pMgr); + } + return 0; + } + + // send next msg + if (pMsg->ack == pSender->seq) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq data"); + // update sender ack + if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { + return -1; + } + if (snapshotSend(pSender) != 0) { + return -1; + } + + } else if (pMsg->ack == pSender->seq - 1) { + // maybe resend + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend"); + snapshotReSend(pSender); + } else { // error log - sSError(pSender, "snapshot sender not leader"); + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error ack"); + sSError(pSender, "snapshot sender receive error ack:%d, my seq:%d", pMsg->ack, pSender->seq); + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogReplMgrReset(pMgr); + } + return -1; } diff --git a/source/libs/sync/test/syncRaftLogTest2.cpp b/source/libs/sync/test/syncRaftLogTest2.cpp index a7752dcb8b..de9137fbe1 100644 --- a/source/libs/sync/test/syncRaftLogTest2.cpp +++ b/source/libs/sync/test/syncRaftLogTest2.cpp @@ -47,7 +47,7 @@ void init() { pSyncNode->pWal = pWal; pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); - pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; + // pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; } void cleanup() { diff --git a/source/libs/sync/test/syncRaftLogTest3.cpp b/source/libs/sync/test/syncRaftLogTest3.cpp index 31c06625aa..1eed08102f 100644 --- a/source/libs/sync/test/syncRaftLogTest3.cpp +++ b/source/libs/sync/test/syncRaftLogTest3.cpp @@ -47,7 +47,7 @@ void init() { pSyncNode->pWal = pWal; pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); - pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; + // pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; } void cleanup() { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index d73c8661fc..e59b1daa05 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -405,6 +405,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_PROPOSE_NOT_READY, "Sync not ready for pr TAOS_DEFINE_ERROR(TSDB_CODE_SYN_STANDBY_NOT_READY, "Sync not ready for standby") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BATCH_ERROR, "Sync batch error") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RESTORING, "Sync is restoring") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG, "Sync invalid snapshot msg") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INTERNAL_ERROR, "Sync internal error") //tq From 336ee146e618d4f81c42c3b4eba128ca66113d0d Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 17:56:56 +0800 Subject: [PATCH 12/53] fix(tdb/restore): seek jfd to begin --- source/libs/tdb/src/db/tdbPager.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 8d9933b160..2a2a6f8bbd 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -869,6 +869,12 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + if (tdbOsLSeek(jfd, 0L, SEEK_SET) < 0) { + tdbError("failed to lseek jfd due to %s. file:%s, offset:0", strerror(errno), pPager->dbFileName); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + pageBuf = tdbOsCalloc(1, pPager->pageSize); if (pageBuf == NULL) { return -1; From 99bc54dfd9dc66ffd952a74da6c68883ca9c1221 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 13/53] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..e0c3397d63 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); From 78af4f54a3108246b397d5859a6f3f4795b0e440 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 14/53] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..eb7bcf9385 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); From 7a38465c760eedb9f0893c8579c33cab3676879a Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 15/53] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..ce20f6808f 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -929,7 +933,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { + if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); From 773423a6b64e8f5951affa2f7fe7b156ad7cbf1f Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 09:11:45 +0800 Subject: [PATCH 16/53] fix: remove some logs --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++-- source/libs/sync/src/syncSnapshot.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index dbd06d6ec0..0362d4af2a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -423,8 +423,8 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { ASSERT(pHdr->index == pWriter->index + 1); pWriter->index = pHdr->index; - vInfo("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d blockLen:%d", TD_VID(pVnode), pHdr->index, - pHdr->type, nData); + vDebug("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d blockLen:%d", TD_VID(pVnode), pHdr->index, + pHdr->type, nData); switch (pHdr->type) { case SNAP_DATA_CFG: { diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 7a35f90165..9e1c6b36f1 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -49,7 +49,6 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &pSender->snapshot); pSender->finish = false; - sDebug("vgId:%d, snapshot sender create", pSender->pSyncNode->vgId); return pSender; } From 0c64cfc309e41de75bb10ef2ae7c126413ad949b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 17/53] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..62d82edeb1 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -929,7 +933,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { + if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); @@ -975,7 +984,12 @@ int tdbPagerRollback(SPager *pPager) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbOsRemove(name) < 0 && errno != ENOENT) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbOsRemove(jname) < 0 && errno != ENOENT) { tdbCloseDir(&pDir); tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name); From b59bee6696b159ad6bb8fed620de3f70057174f1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 11:03:50 +0800 Subject: [PATCH 18/53] fix: restart snapshot sender on receiver is restart --- source/libs/sync/src/syncPipeline.c | 2 +- source/libs/sync/src/syncRaftLog.c | 24 ++++++++--------- source/libs/sync/src/syncSnapshot.c | 41 ++++++++++++++++++----------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index f438856ace..225de30755 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -112,7 +112,7 @@ SyncTerm syncLogReplMgrGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, S return prevLogTerm; } - sError("vgId:%d, failed to get log term since %s. index: %" PRId64 "", pNode->vgId, terrstr(), prevIndex); + sInfo("vgId:%d, failed to get log term since %s. index:%" PRId64, pNode->vgId, terrstr(), prevIndex); terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; return -1; } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 018ac5bb7d..3f9f397ef5 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -115,8 +115,8 @@ static int32_t raftLogRestoreFromSnapshot(struct SSyncLogStore* pLogStore, SyncI const char* sysErrStr = strerror(errno); sNError(pData->pSyncNode, - "wal restore from snapshot error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - snapshotIndex, err, err, errStr, sysErr, sysErrStr); + "wal restore from snapshot error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", snapshotIndex, + err, errStr, sysErr, sysErrStr); return -1; } @@ -212,8 +212,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sNError(pData->pSyncNode, "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pEntry->index, err, err, errStr, sysErr, sysErrStr); + sNError(pData->pSyncNode, "wal write error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", + pEntry->index, err, errStr, sysErr, sysErrStr); return -1; } @@ -257,11 +257,11 @@ int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncR const char* sysErrStr = strerror(errno); if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - sNTrace(pData->pSyncNode, "wal read not exist, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, - err, err, errStr, sysErr, sysErrStr); + sNTrace(pData->pSyncNode, "wal read not exist, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", index, + err, errStr, sysErr, sysErrStr); } else { - sNTrace(pData->pSyncNode, "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, - err, err, errStr, sysErr, sysErrStr); + sNTrace(pData->pSyncNode, "wal read error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", index, err, + errStr, sysErr, sysErrStr); } /* @@ -341,8 +341,8 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pData->pSyncNode->vgId, fromIndex, err, err, errStr, sysErr, sysErrStr); + sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", + pData->pSyncNode->vgId, fromIndex, err, errStr, sysErr, sysErrStr); } // event log @@ -392,8 +392,8 @@ int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d, wal update commit index error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pData->pSyncNode->vgId, index, err, err, errStr, sysErr, sysErrStr); + sError("vgId:%d, wal update commit index error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", + pData->pSyncNode->vgId, index, err, errStr, sysErr, sysErrStr); return -1; } return 0; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 9e1c6b36f1..54c11a503b 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -747,7 +747,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg - syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver receiving"); + syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver received"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { sRError(pReceiver, "snapshot receiver send resp failed since %s", terrstr()); return -1; @@ -979,32 +979,31 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { return -1; } - if (pMsg->startTime != pSender->startTime) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "sender:% " PRId64 " receiver:%" PRId64 " time not match"); - return -1; - } - // state, term, seq/ack if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader"); sSError(pSender, "snapshot sender not leader"); - return -1; + goto _ERROR; + } + + if (pMsg->startTime != pSender->startTime) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver time not match"); + sSError(pSender, "sender:%" PRId64 " receiver:%" PRId64 " time not match, code:0x%x", pMsg->startTime, + pSender->startTime, pMsg->code); + goto _ERROR; } if (pMsg->term != pSyncNode->pRaftStore->currentTerm) { - sSError(pSender, "snapshot sender term not equal"); - return -1; + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match"); + sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term, + pSyncNode->pRaftStore->currentTerm); + goto _ERROR; } if (pMsg->code != 0) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code"); sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code); - snapshotSenderStop(pSender, true); - SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogReplMgrReset(pMgr); - } - - return -1; + goto _ERROR; } // prepare , send begin msg @@ -1068,4 +1067,14 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { } return 0; + +_ERROR: + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); + syncLogReplMgrReset(pMgr); + } + + return -1; } From 90e44ced3eba30d57a6700a4b23da1e5f2841bf8 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 11:15:45 +0800 Subject: [PATCH 19/53] test: add logs --- tests/script/sh/deploy.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index 662c4a1a6c..217bd66ef6 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -134,6 +134,7 @@ echo "mDebugFlag 143" >> $TAOS_CFG echo "wDebugFlag 143" >> $TAOS_CFG echo "sDebugFlag 143" >> $TAOS_CFG echo "tsdbDebugFlag 143" >> $TAOS_CFG +echo "tdbDebugFlag 143" >> $TAOS_CFG echo "tqDebugFlag 143" >> $TAOS_CFG echo "fsDebugFlag 143" >> $TAOS_CFG echo "idxDebugFlag 143" >> $TAOS_CFG From 4f33119d461b0f9a8fbdc2fa1eda92555be062ab Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 13:07:10 +0800 Subject: [PATCH 20/53] test: adjust test.sh --- tests/script/test.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/script/test.sh b/tests/script/test.sh index a7a5d34fbe..19180382fd 100755 --- a/tests/script/test.sh +++ b/tests/script/test.sh @@ -10,13 +10,11 @@ set +e #set -x FILE_NAME= -RELEASE=0 -ASYNC=0 VALGRIND=0 -UNIQUE=0 +TEST=0 UNAME_BIN=`which uname` OS_TYPE=`$UNAME_BIN` -while getopts "f:agvum" arg +while getopts "f:tgv" arg do case $arg in f) @@ -25,8 +23,8 @@ do v) VALGRIND=1 ;; - u) - UNIQUE=1 + t) + TEST=1 ;; g) VALGRIND=2 @@ -140,6 +138,11 @@ if [ -n "$FILE_NAME" ]; then result=$? echo "Execute result:" $result + if [ $TEST -eq 1 ]; then + echo "Exit without check asan errors" + exit 1 + fi + if [ $result -eq 0 ]; then $CODE_DIR/sh/sigint_stop_dnodes.sh $CODE_DIR/sh/checkAsan.sh From 99ef55c7d456756549235ca172d2fbaebbf15937 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Fri, 23 Dec 2022 16:02:08 +0800 Subject: [PATCH 21/53] query(fix): return new error code except connect msg --- source/client/src/clientImpl.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index b480b0b230..4b4a1c368f 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1425,24 +1425,18 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { } // pMsg is response msg - switch (pMsg->msgType) { - case TDMT_VND_BATCH_META + 1: - case TDMT_VND_SUBMIT + 1: - case TDMT_SCH_QUERY + 1: - case TDMT_SCH_MERGE_QUERY + 1: - // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED - if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; - } - break; - default: - // restore origin code - if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { - pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; - } - break; + if (pMsg->msgType != TDMT_MND_CONNECT + 1) { + // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; + } + } else { + // restore origin code + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { + pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; + } } AsyncArg* arg = taosMemoryCalloc(1, sizeof(AsyncArg)); From d0f789744b6521f0ce3bf82cf870f47fe6bf9aac Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Fri, 23 Dec 2022 16:04:48 +0800 Subject: [PATCH 22/53] query(fix): return new error code except connect msg --- source/client/src/clientImpl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 4b4a1c368f..456a059191 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1425,18 +1425,18 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { } // pMsg is response msg - if (pMsg->msgType != TDMT_MND_CONNECT + 1) { - // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED - if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; - } - } else { + if (pMsg->msgType == TDMT_MND_CONNECT + 1) { // restore origin code if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; } + } else { + // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; + } } AsyncArg* arg = taosMemoryCalloc(1, sizeof(AsyncArg)); From 4a97f25725843afa08aacdb3ade7c113ffd2a524 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Sat, 24 Dec 2022 11:16:45 +0800 Subject: [PATCH 23/53] fix(query): show sql len on systable length modify from 1024 to 2048 --- include/util/tdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index e1d421a399..d585912259 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -254,7 +254,7 @@ typedef enum ELogicConditionType { #define TSDB_EP_LEN (TSDB_FQDN_LEN + 6) #define TSDB_IPv4ADDR_LEN 16 #define TSDB_FILENAME_LEN 128 -#define TSDB_SHOW_SQL_LEN 1024 +#define TSDB_SHOW_SQL_LEN 2048 #define TSDB_SLOW_QUERY_SQL_LEN 512 #define TSDB_SHOW_SUBQUERY_LEN 1000 From c4f40874c11c87db89281abd58b21f9af493c2f0 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 24/53] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 20 +++++++++++++++++--- source/libs/tdb/src/db/tdbTable.c | 4 ++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..62d82edeb1 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -929,7 +933,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { + if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); @@ -975,7 +984,12 @@ int tdbPagerRollback(SPager *pPager) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbOsRemove(name) < 0 && errno != ENOENT) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbOsRemove(jname) < 0 && errno != ENOENT) { tdbCloseDir(&pDir); tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name); diff --git a/source/libs/tdb/src/db/tdbTable.c b/source/libs/tdb/src/db/tdbTable.c index 2950169979..972e2f29e5 100644 --- a/source/libs/tdb/src/db/tdbTable.c +++ b/source/libs/tdb/src/db/tdbTable.c @@ -108,13 +108,13 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF ASSERT(pPager != NULL); if (rollback) { - tdbPagerRollback(pPager); - } else { ret = tdbPagerRestoreJournals(pPager); if (ret < 0) { tdbOsFree(pTb); return -1; } + } else { + tdbPagerRollback(pPager); } // pTb->pBt From 3ea9fadfc8c146784dd116914e1aed876716a95b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 26 Dec 2022 10:10:49 +0800 Subject: [PATCH 25/53] fix(vnd): save vnode info at the begining of vnode prepare commit --- source/dnode/vnode/src/vnd/vnodeCommit.c | 67 +++++++++++++++++------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index be977e7cbd..1aefe19476 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -184,16 +184,51 @@ _err: return -1; } -static void vnodePrepareCommit(SVnode *pVnode) { +static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { + int32_t code = 0; + int32_t lino = 0; + char dir[TSDB_FILENAME_LEN] = {0}; + tsem_wait(&pVnode->canCommit); + pVnode->state.commitTerm = pVnode->state.applyTerm; + + pInfo->info.config = pVnode->config; + pInfo->info.state.committed = pVnode->state.applied; + pInfo->info.state.commitTerm = pVnode->state.applyTerm; + pInfo->info.state.commitID = pVnode->state.commitID; + pInfo->pVnode = pVnode; + pInfo->txn = metaGetTxn(pVnode->pMeta); + + // save info + if (pVnode->pTfs) { + snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); + } else { + snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path); + } + if (vnodeSaveInfo(dir, &pInfo->info) < 0) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbPrepareCommit(pVnode->pTsdb); - metaPrepareAsyncCommit(pVnode->pMeta); smaPrepareAsyncCommit(pVnode->pSma); + metaPrepareAsyncCommit(pVnode->pMeta); + vnodeBufPoolUnRef(pVnode->inUse); pVnode->inUse = NULL; + +_exit: + if (code) { + vError("vgId:%d, %s failed at line %d since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, lino, + tstrerror(code), pVnode->state.commitID); + } else { + vDebug("vgId:%d, %s done", TD_VID(pVnode), __func__); + } + return code; } + static int32_t vnodeCommitTask(void *arg) { int32_t code = 0; @@ -213,27 +248,26 @@ _exit: int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; - // prepare to commit - vnodePrepareCommit(pVnode); - - // schedule the task - pVnode->state.commitTerm = pVnode->state.applyTerm; - SCommitInfo *pInfo = (SCommitInfo *)taosMemoryCalloc(1, sizeof(*pInfo)); if (NULL == pInfo) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - pInfo->info.config = pVnode->config; - pInfo->info.state.committed = pVnode->state.applied; - pInfo->info.state.commitTerm = pVnode->state.applyTerm; - pInfo->info.state.commitID = pVnode->state.commitID; - pInfo->pVnode = pVnode; - pInfo->txn = metaGetTxn(pVnode->pMeta); + + // prepare to commit + code = vnodePrepareCommit(pVnode, pInfo); + if (TSDB_CODE_SUCCESS != code) { + goto _exit; + } + + // schedule the task vnodeScheduleTask(vnodeCommitTask, pInfo); _exit: if (code) { + if (NULL != pInfo) { + taosMemoryFree(pInfo); + } vError("vgId:%d, %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), pVnode->state.commitID); } else { @@ -265,16 +299,11 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { return -1; } - // save info if (pVnode->pTfs) { snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); } else { snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path); } - if (vnodeSaveInfo(dir, &pInfo->info) < 0) { - code = terrno; - TSDB_CHECK_CODE(code, lino, _exit); - } // walBeginSnapshot(pVnode->pWal, pVnode->state.applied); syncBeginSnapshot(pVnode->sync, pVnode->state.applied); From 70ffbabcbf5fa8d98b18835cd436b1100be4e722 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Mon, 26 Dec 2022 10:15:14 +0800 Subject: [PATCH 26/53] fix:remove assert --- source/libs/stream/src/streamState.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index af1d738de0..6670bf463e 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -656,8 +656,7 @@ int32_t streamStateSessionClear(SStreamState* pState) { void* buf = NULL; int32_t size = 0; int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size); - if (code == 0) { - ASSERT(size > 0); + if (code == 0 && size > 0) { memset(buf, 0, size); streamStateSessionPut(pState, &delKey, buf, size); } else { From a5165ab5e7dcff44dc050f2bbcfe57d879f187d3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 10:58:17 +0800 Subject: [PATCH 27/53] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbMergeTree.c | 1 - source/dnode/vnode/src/tsdb/tsdbRead.c | 143 ++++++-------------- source/libs/executor/src/dataDeleter.c | 5 +- source/libs/executor/src/dataDispatcher.c | 13 +- source/libs/executor/src/exchangeoperator.c | 1 - source/libs/executor/src/executorimpl.c | 38 +----- source/libs/executor/src/joinoperator.c | 58 ++++---- source/libs/executor/src/sortoperator.c | 4 - source/libs/executor/src/tsimplehash.c | 5 +- source/libs/executor/src/tsort.c | 1 + 10 files changed, 85 insertions(+), 184 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 41bf823095..beb4303156 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -567,7 +567,6 @@ int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFRead pMTree->pLoadInfo = pBlockLoadInfo; pMTree->destroyLoadInfo = destroyLoadInfo; - ASSERT(pMTree->pLoadInfo != NULL); for (int32_t i = 0; i < pFReader->pSet->nSttF; ++i) { // open all last file struct SLDataIter *pIter = NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 050d03cf73..346ad854df 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -243,7 +243,7 @@ static int32_t setColumnIdSlotList(SBlockLoadSuppInfo* pSupInfo, SColumnInfo* pC return TSDB_CODE_SUCCESS; } -static void updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { +static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { int32_t i = 0, j = 0; while(i < pSchema->numOfCols && j < pSupInfo->numOfCols) { @@ -251,7 +251,7 @@ static void updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) if (pTCol->colId == pSupInfo->colId[j]) { if (!IS_BSMA_ON(pTCol)) { pSupInfo->smaValid = false; - return; + return TSDB_CODE_SUCCESS; } i += 1; @@ -260,9 +260,11 @@ static void updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) // do nothing i += 1; } else { - ASSERT(0); + return TSDB_CODE_INVALID_PARA; } } + + return TSDB_CODE_SUCCESS; } static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { @@ -579,7 +581,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd } if (VND_IS_TSMA(pVnode)) { - tsdbDebug("vgId:%d, tsma is selected to query", TD_VID(pVnode)); + tsdbDebug("vgId:%d, tsma is selected to query, %s", TD_VID(pVnode), idstr); } initReaderStatus(&pReader->status); @@ -594,7 +596,6 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pReader->type = pCond->type; pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket - ASSERT(pCond->numOfCols > 0); if (pReader->pResBlock == NULL) { pReader->freeBlock = true; @@ -605,6 +606,12 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd } } + if (pCond->numOfCols <= 0) { + tsdbError("vgId:%d, invalid column number %d in query cond, %s", TD_VID(pVnode), pCond->numOfCols, idstr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + // todo refactor. limitOutputBufferSize(pCond, &pReader->capacity); @@ -794,8 +801,9 @@ static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_ } static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { - if (taosArrayGetSize(pBlockIter->blockList) == 0) { - ASSERT(pBlockIter->numOfBlocks == taosArrayGetSize(pBlockIter->blockList)); + size_t num = taosArrayGetSize(pBlockIter->blockList); + if (num == 0) { + ASSERT(pBlockIter->numOfBlocks == num); return NULL; } @@ -805,73 +813,6 @@ static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { static SDataBlk* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } -int32_t binarySearchForTs(char* pValue, int num, TSKEY key, int order) { - int32_t midPos = -1; - int32_t numOfRows; - - ASSERT(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); - - TSKEY* keyList = (TSKEY*)pValue; - int32_t firstPos = 0; - int32_t lastPos = num - 1; - - if (order == TSDB_ORDER_DESC) { - // find the first position which is smaller than the key - while (1) { - if (key >= keyList[firstPos]) return firstPos; - if (key == keyList[lastPos]) return lastPos; - - if (key < keyList[lastPos]) { - lastPos += 1; - if (lastPos >= num) { - return -1; - } else { - return lastPos; - } - } - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1) + firstPos; - - if (key < keyList[midPos]) { - firstPos = midPos + 1; - } else if (key > keyList[midPos]) { - lastPos = midPos - 1; - } else { - break; - } - } - - } else { - // find the first position which is bigger than the key - while (1) { - if (key <= keyList[firstPos]) return firstPos; - if (key == keyList[lastPos]) return lastPos; - - if (key > keyList[lastPos]) { - lastPos = lastPos + 1; - if (lastPos >= num) - return -1; - else - return lastPos; - } - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1u) + firstPos; - - if (key < keyList[midPos]) { - lastPos = midPos - 1; - } else if (key > keyList[midPos]) { - firstPos = midPos + 1; - } else { - break; - } - } - } - - return midPos; -} - static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { // start end position int s, e; @@ -972,8 +913,8 @@ static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo int32_t step = asc? 1:-1; - // make sure it is aligned to 8bit - ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); + // make sure it is aligned to 8bit, the allocated memory address is aligned to 256bit +// ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); // 1. copy data in a batch model memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); @@ -1183,7 +1124,6 @@ static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockI SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - ASSERT(pBlockInfo != NULL); SDataBlk* pBlock = getCurrentBlock(pBlockIter); code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); @@ -1221,8 +1161,6 @@ static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { } static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) { - ASSERT(numOfTables >= 1); - pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables); @@ -1329,7 +1267,10 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte sup.numOfTables += 1; } - ASSERT(numOfBlocks == cnt); + if (numOfBlocks != cnt && sup.numOfTables != numOfTables) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_INVALID_PARA; + } // since there is only one table qualified, blocks are not sorted if (sup.numOfTables == 1) { @@ -1351,10 +1292,9 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, pReader->idStr); - ASSERT(cnt <= numOfBlocks && sup.numOfTables <= numOfTables); - SMultiwayMergeTreeInfo* pTree = NULL; - uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); + + uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); if (ret != TSDB_CODE_SUCCESS) { cleanupBlockOrderSupporter(&sup); return TSDB_CODE_OUT_OF_MEMORY; @@ -1432,8 +1372,6 @@ static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBl } static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) { - ASSERT(pBlockIter != NULL && pFBlockInfo != NULL); - int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1; int32_t index = pBlockIter->index; @@ -1924,7 +1862,6 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, } doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, &merge, &pReader->verRange); - ASSERT(mergeBlockData); // merge with block data if ts == key if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) { @@ -1990,7 +1927,6 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader tRowMergerClear(&merge); return code; } else { - ASSERT(0); return TSDB_CODE_SUCCESS; } } else { // desc order @@ -2011,7 +1947,6 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pDelList, pReader); TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); - ASSERT(pRow != NULL && piRow != NULL); int64_t tsLast = INT64_MIN; if (hasDataInLastBlock(pLastBlockReader)) { @@ -2235,7 +2170,6 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea if (pReader->pReadSnap->pMem != NULL) { d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); if (d != NULL) { - ASSERT(pBlockScanInfo->iter.iter == NULL); code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL); @@ -2349,10 +2283,9 @@ static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader) { static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) { - if (pBlockData->nRow > 0) { - ASSERT(pBlockData->nRow == pDumpInfo->totalRows); + if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) { + return false; // this is an invalid result. } - return pBlockData->nRow > 0 && (!pDumpInfo->allDumped); } @@ -2583,7 +2516,6 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* int32_t code = 0; SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - ASSERT(pReader->pReadSnap != NULL); SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile && taosArrayGetSize(pReader->pDelIdx) > 0) { @@ -2868,7 +2800,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); if (pBlockInfo == NULL) { // build data block from last data file - ASSERT(pBlockIter->numOfBlocks == 0); code = buildComposedDataBlock(pReader); } else if (fileBlockShouldLoad(pReader, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader)) { code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); @@ -3837,7 +3768,10 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL } if (pReader->pSchema != NULL) { - updateBlockSMAInfo(pReader->pSchema, &pReader->suppInfo); + code = updateBlockSMAInfo(pReader->pSchema, &pReader->suppInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } } STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; @@ -4113,25 +4047,27 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ } int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave) { + SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; + int32_t code = 0; - SColumnDataAgg ***pBlockSMA = &pDataBlock->pBlockAgg; *allHave = false; + *pBlockSMA = NULL; if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { - *pBlockSMA = NULL; return TSDB_CODE_SUCCESS; } // there is no statistics data for composed block if (pReader->status.composedDataBlock || (!pReader->suppInfo.smaValid)) { - *pBlockSMA = NULL; return TSDB_CODE_SUCCESS; } SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - ASSERT(pReader->pResBlock->info.id.uid == pFBlock->uid); + if (pReader->pResBlock->info.id.uid != pFBlock->uid) { + return TSDB_CODE_SUCCESS; + } SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); if (tDataBlkHasSma(pBlock)) { @@ -4187,7 +4123,7 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, } else if (pAgg->colId < pSup->colId[j]) { i += 1; } else if (pSup->colId[j] < pAgg->colId) { - ASSERT(pSup->colId[j] == PRIMARYKEY_TIMESTAMP_COL_ID); + // ASSERT(pSup->colId[j] == PRIMARYKEY_TIMESTAMP_COL_ID); pResBlock->pBlockAgg[pSup->slotId[j]] = &pSup->tsColAgg; j += 1; } @@ -4418,9 +4354,12 @@ int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int6 return terrno; } sversion = mr.me.stbEntry.schemaRow.version; - } else { - ASSERT(mr.me.type == TSDB_NORMAL_TABLE); + } else if (mr.me.type == TSDB_NORMAL_TABLE) { sversion = mr.me.ntbEntry.schemaRow.version; + } else { + terrno = TSDB_CODE_INVALID_PARA; + metaReaderClear(&mr); + return terrno; } metaReaderClear(&mr); diff --git a/source/libs/executor/src/dataDeleter.c b/source/libs/executor/src/dataDeleter.c index eff7a5ef93..a8051ea7c3 100644 --- a/source/libs/executor/src/dataDeleter.c +++ b/source/libs/executor/src/dataDeleter.c @@ -62,8 +62,8 @@ static void toDataCacheEntry(SDataDeleterHandle* pHandle, const SInputData* pInp pEntry->numOfCols = taosArrayGetSize(pInput->pData->pDataBlock); pEntry->dataLen = sizeof(SDeleterRes); - ASSERT(1 == pEntry->numOfRows); - ASSERT(3 == pEntry->numOfCols); +// ASSERT(1 == pEntry->numOfRows); +// ASSERT(3 == pEntry->numOfCols); pBuf->useSize = sizeof(SDataCacheEntry); @@ -167,7 +167,6 @@ static void getDataLength(SDataSinkHandle* pHandle, int64_t* pLen, bool* pQueryE SDataDeleterBuf* pBuf = NULL; taosReadQitem(pDeleter->pDataBlocks, (void**)&pBuf); - ASSERT(NULL != pBuf); memcpy(&pDeleter->nextOutput, pBuf, sizeof(SDataDeleterBuf)); taosFreeQitem(pBuf); diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index c2fa438c80..a603bffba5 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -77,8 +77,8 @@ static void toDataCacheEntry(SDataDispatchHandle* pHandle, const SInputData* pIn pBuf->useSize = sizeof(SDataCacheEntry); pEntry->dataLen = blockEncode(pInput->pData, pEntry->data, numOfCols); - ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); - ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); +// ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); +// ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); pBuf->useSize += pEntry->dataLen; @@ -162,15 +162,14 @@ static void getDataLength(SDataSinkHandle* pHandle, int64_t* pLen, bool* pQueryE SDataDispatchBuf* pBuf = NULL; taosReadQitem(pDispatcher->pDataBlocks, (void**)&pBuf); - ASSERT(NULL != pBuf); memcpy(&pDispatcher->nextOutput, pBuf, sizeof(SDataDispatchBuf)); taosFreeQitem(pBuf); SDataCacheEntry* pEntry = (SDataCacheEntry*)pDispatcher->nextOutput.pData; *pLen = pEntry->dataLen; - ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); - ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); +// ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); +// ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); *pQueryEnd = pDispatcher->queryEnd; qDebug("got data len %" PRId64 ", row num %d in sink", *pLen, @@ -193,8 +192,8 @@ static int32_t getDataBlock(SDataSinkHandle* pHandle, SOutputData* pOutput) { pOutput->numOfCols = pEntry->numOfCols; pOutput->compressed = pEntry->compressed; - ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); - ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); +// ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); +// ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); atomic_sub_fetch_64(&pDispatcher->cachedSize, pEntry->dataLen); atomic_sub_fetch_64(&gDataSinkStat.cachedSize, pEntry->dataLen); diff --git a/source/libs/executor/src/exchangeoperator.c b/source/libs/executor/src/exchangeoperator.c index 4103ca82dc..9873c52006 100644 --- a/source/libs/executor/src/exchangeoperator.c +++ b/source/libs/executor/src/exchangeoperator.c @@ -373,7 +373,6 @@ int32_t loadRemoteDataCallback(void* param, SDataBuf* pMsg, int32_t code) { pRsp->useconds = htobe64(pRsp->useconds); pRsp->numOfBlocks = htonl(pRsp->numOfBlocks); - ASSERT(pRsp != NULL); qDebug("%s fetch rsp received, index:%d, blocks:%d, rows:%" PRId64 ", %p", pSourceDataInfo->taskId, index, pRsp->numOfBlocks, pRsp->numOfRows, pExchangeInfo); } else { diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 668a93740d..fde13498ea 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -104,8 +104,6 @@ static int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, void setOperatorCompleted(SOperatorInfo* pOperator) { pOperator->status = OP_EXEC_DONE; - ASSERT(pOperator->pTaskInfo != NULL); - pOperator->cost.totalCost = (taosGetTimestampUs() - pOperator->pTaskInfo->cost.start) / 1000.0; setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED); } @@ -524,7 +522,7 @@ bool functionNeedToExecute(SqlFunctionCtx* pCtx) { return true; } -static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t type, +static int32_t doCreateConstantValColumnSMAInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t type, int32_t paramIndex, int32_t numOfRows) { if (pInput->pData[paramIndex] == NULL) { pInput->pData[paramIndex] = taosMemoryCalloc(1, sizeof(SColumnInfoData)); @@ -548,8 +546,6 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF da = pInput->pColumnDataAgg[paramIndex]; } - ASSERT(!IS_VAR_DATA_TYPE(type)); - if (type == TSDB_DATA_TYPE_BIGINT) { int64_t v = pFuncParam->param.i; *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .sum = v * numOfRows}; @@ -570,7 +566,7 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF } else if (type == TSDB_DATA_TYPE_TIMESTAMP) { // do nothing } else { - ASSERT(0); + qError("invalid constant type for sma info"); } return TSDB_CODE_SUCCESS; @@ -600,7 +596,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB // the data in the corresponding SColumnInfoData will not be used. pInput->pData[j] = taosArrayGet(pBlock->pDataBlock, slotId); } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) { - doCreateConstantValColumnAggInfo(pInput, pFuncParam, pFuncParam->param.nType, j, pBlock->info.rows); + doCreateConstantValColumnSMAInfo(pInput, pFuncParam, pFuncParam->param.nType, j, pBlock->info.rows); } } } else { @@ -2217,13 +2213,11 @@ int32_t extractTableScanNode(SPhysiNode* pNode, STableScanPhysiNode** ppNode) { *ppNode = (STableScanPhysiNode*)pNode; return 0; } else { - ASSERT(0); terrno = TSDB_CODE_APP_ERROR; return -1; } } else { if (LIST_LENGTH(pNode->pChildren) != 1) { - ASSERT(0); terrno = TSDB_CODE_APP_ERROR; return -1; } @@ -2233,32 +2227,6 @@ int32_t extractTableScanNode(SPhysiNode* pNode, STableScanPhysiNode** ppNode) { return -1; } -#if 0 -int32_t rebuildReader(SOperatorInfo* pOperator, SSubplan* plan, SReadHandle* pHandle, int64_t uid, int64_t ts) { - STableScanInfo* pTableScanInfo = NULL; - if (extractTbscanInStreamOpTree(pOperator, &pTableScanInfo) < 0) { - return -1; - } - - STableScanPhysiNode* pNode = NULL; - if (extractTableScanNode(plan->pNode, &pNode) < 0) { - ASSERT(0); - } - - tsdbReaderClose(pTableScanInfo->dataReader); - - STableListInfo info = {0}; - pTableScanInfo->dataReader = doCreateDataReader(pNode, pHandle, &info, NULL); - if (pTableScanInfo->dataReader == NULL) { - ASSERT(0); - qError("failed to create data reader"); - return TSDB_CODE_APP_ERROR; - } - // TODO: set uid and ts to data reader - return 0; -} -#endif - int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, qTaskInfo_t* pTaskInfo, SReadHandle* readHandle) { SExecTaskInfo* pTask = *(SExecTaskInfo**)pTaskInfo; diff --git a/source/libs/executor/src/joinoperator.c b/source/libs/executor/src/joinoperator.c index 8a097a23ce..88ed9eccb3 100644 --- a/source/libs/executor/src/joinoperator.c +++ b/source/libs/executor/src/joinoperator.c @@ -42,38 +42,40 @@ typedef struct SJoinOperatorInfo { static void setJoinColumnInfo(SColumnInfo* pColumn, const SColumnNode* pColumnNode); static SSDataBlock* doMergeJoin(struct SOperatorInfo* pOperator); static void destroyMergeJoinOperator(void* param); -static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t numOfDownstream, - SSortMergeJoinPhysiNode* pJoinNode); +static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t num, + SSortMergeJoinPhysiNode* pJoinNode, const char* idStr); -static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t numOfDownstream, - SSortMergeJoinPhysiNode* pJoinNode) { +static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t num, + SSortMergeJoinPhysiNode* pJoinNode, const char* idStr) { SNode* pMergeCondition = pJoinNode->pMergeCondition; - if (nodeType(pMergeCondition) == QUERY_NODE_OPERATOR) { - SOperatorNode* pNode = (SOperatorNode*)pMergeCondition; - SColumnNode* col1 = (SColumnNode*)pNode->pLeft; - SColumnNode* col2 = (SColumnNode*)pNode->pRight; - SColumnNode* leftTsCol = NULL; - SColumnNode* rightTsCol = NULL; - if (col1->dataBlockId == col2->dataBlockId ) { + if (nodeType(pMergeCondition) != QUERY_NODE_OPERATOR) { + qError("not support this in join operator, %s", idStr); + return; // do not handle this + } + + SOperatorNode* pNode = (SOperatorNode*)pMergeCondition; + SColumnNode* col1 = (SColumnNode*)pNode->pLeft; + SColumnNode* col2 = (SColumnNode*)pNode->pRight; + SColumnNode* leftTsCol = NULL; + SColumnNode* rightTsCol = NULL; + if (col1->dataBlockId == col2->dataBlockId) { + leftTsCol = col1; + rightTsCol = col2; + } else { + if (col1->dataBlockId == pDownstream[0]->resultDataBlockId) { + ASSERT(col2->dataBlockId == pDownstream[1]->resultDataBlockId); leftTsCol = col1; rightTsCol = col2; } else { - if (col1->dataBlockId == pDownstream[0]->resultDataBlockId) { - ASSERT(col2->dataBlockId == pDownstream[1]->resultDataBlockId); - leftTsCol = col1; - rightTsCol = col2; - } else { - ASSERT(col1->dataBlockId == pDownstream[1]->resultDataBlockId); - ASSERT(col2->dataBlockId == pDownstream[0]->resultDataBlockId); - leftTsCol = col2; - rightTsCol = col1; - } + ASSERT(col1->dataBlockId == pDownstream[1]->resultDataBlockId); + ASSERT(col2->dataBlockId == pDownstream[0]->resultDataBlockId); + leftTsCol = col2; + rightTsCol = col1; } - setJoinColumnInfo(&pInfo->leftCol, leftTsCol); - setJoinColumnInfo(&pInfo->rightCol, rightTsCol); - } else { - ASSERT(false); - }} + } + setJoinColumnInfo(&pInfo->leftCol, leftTsCol); + setJoinColumnInfo(&pInfo->rightCol, rightTsCol); +} SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo) { @@ -97,7 +99,7 @@ SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t pOperator->exprSupp.pExprInfo = pExprInfo; pOperator->exprSupp.numOfExprs = numOfCols; - extractTimeCondition(pInfo, pDownstream, numOfDownstream, pJoinNode); + extractTimeCondition(pInfo, pDownstream, numOfDownstream, pJoinNode, GET_TASKID(pTaskInfo)); if (pJoinNode->pOnConditions != NULL && pJoinNode->node.pConditions != NULL) { pInfo->pCondAfterMerge = nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); @@ -364,8 +366,6 @@ static bool mergeJoinGetNextTimestamp(SOperatorInfo* pOperator, int64_t* pLeftTs char* pRightVal = colDataGetData(pRightCol, pJoinInfo->rightPos); *pRightTs = *(int64_t*)pRightVal; - ASSERT(pLeftCol->info.type == TSDB_DATA_TYPE_TIMESTAMP); - ASSERT(pRightCol->info.type == TSDB_DATA_TYPE_TIMESTAMP); return true; } diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 7ac007b7cb..ee7f88e813 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -139,7 +139,6 @@ SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, i int32_t numOfCols = taosArrayGetSize(pColMatchInfo); for (int32_t i = 0; i < numOfCols; ++i) { SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); - // ASSERT(pmInfo->matchType == COL_MATCH_FROM_SLOT_ID); SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); @@ -272,7 +271,6 @@ void destroySortOperatorInfo(void* param) { } int32_t getExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - ASSERT(pOptr != NULL); SSortExecInfo* pInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); SSortOperatorInfo* pOperatorInfo = (SSortOperatorInfo*)pOptr->info; @@ -329,7 +327,6 @@ SSDataBlock* getGroupSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlo int32_t numOfCols = taosArrayGetSize(pColMatchInfo); for (int32_t i = 0; i < numOfCols; ++i) { SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); - // ASSERT(pmInfo->matchType == COL_MATCH_FROM_SLOT_ID); SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); @@ -746,7 +743,6 @@ void destroyMultiwayMergeOperatorInfo(void* param) { } int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - ASSERT(pOptr != NULL); SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; diff --git a/source/libs/executor/src/tsimplehash.c b/source/libs/executor/src/tsimplehash.c index 484d917069..fd6215e3a1 100644 --- a/source/libs/executor/src/tsimplehash.c +++ b/source/libs/executor/src/tsimplehash.c @@ -49,7 +49,9 @@ static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { } SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) { - ASSERT(fn != NULL); + if (fn == NULL) { + return NULL; + } if (capacity == 0) { capacity = 4; @@ -66,7 +68,6 @@ SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) { pHashObj->equalFp = memcmp; pHashObj->hashFp = fn; - ASSERT((pHashObj->capacity & (pHashObj->capacity - 1)) == 0); pHashObj->hashList = (SHNode **)taosMemoryCalloc(pHashObj->capacity, sizeof(void *)); if (!pHashObj->hashList) { diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 30911887bb..fa0cdb3943 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -800,6 +800,7 @@ STupleHandle* tsortNextTuple(SSortHandle* pHandle) { } } + // all sources are completed. if (pHandle->cmpParam.numOfSources == pHandle->numOfCompletedSources) { return NULL; } From 1a07451bc3cd629ac62b38b774ca54f6407c6dbc Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 13:45:25 +0800 Subject: [PATCH 28/53] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 12 +++++------- source/libs/executor/src/executorimpl.c | 4 +--- source/libs/executor/src/scanoperator.c | 24 ------------------------ 3 files changed, 6 insertions(+), 34 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 346ad854df..d69b8da667 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -4095,13 +4095,10 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, // update the number of NULL data rows size_t numOfCols = pSup->numOfCols; - int32_t i = 0, j = 0; - size_t size = taosArrayGetSize(pSup->pColAgg); - // ensure capacity - if(pDataBlock->pDataBlock) { - size_t colsNum = taosArrayGetSize(pDataBlock->pDataBlock); - taosArrayEnsureCap(pSup->pColAgg, colsNum); + if (pDataBlock->pDataBlock) { + size_t colsNum = taosArrayGetSize(pDataBlock->pDataBlock); + taosArrayEnsureCap(pSup->pColAgg, colsNum); } SSDataBlock* pResBlock = pReader->pResBlock; @@ -4112,8 +4109,9 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, // do fill all null column value SMA info doFillNullColSMA(pSup, pBlock->nRow, numOfCols, pTsAgg); + size_t size = taosArrayGetSize(pSup->pColAgg); - i = 0, j = 0; + int32_t i = 0, j = 0; while (j < numOfCols && i < size) { SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); if (pAgg->colId == pSup->colId[j]) { diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index fde13498ea..9a5729e161 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1573,8 +1573,7 @@ void destroyOperatorInfo(SOperatorInfo* pOperator) { // each operator should be set their own function to return total cost buffer int32_t optrDefaultBufFn(SOperatorInfo* pOperator) { if (pOperator->blocking) { - ASSERT(0); - return 0; + return -1; } else { return 0; } @@ -2201,7 +2200,6 @@ static int32_t extractTbscanInStreamOpTree(SOperatorInfo* pOperator, STableScanI return extractTbscanInStreamOpTree(pOperator->pDownstream[0], ppInfo); } else { SStreamScanInfo* pInfo = pOperator->info; - ASSERT(pInfo->pTableScanOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN); *ppInfo = pInfo->pTableScanOp->info; return 0; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d074ceede8..b23f5c4b6e 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -232,30 +232,6 @@ static bool doLoadBlockSMA(STableScanBase* pTableScanInfo, SSDataBlock* pBlock, if (!allColumnsHaveAgg) { return false; } - -#if 0 - // if (allColumnsHaveAgg == true) { - int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - - // todo create this buffer during creating operator - if (pBlock->pBlockAgg == NULL) { - pBlock->pBlockAgg = taosMemoryCalloc(numOfCols, POINTER_BYTES); - if (pBlock->pBlockAgg == NULL) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - } - - size_t num = taosArrayGetSize(pTableScanInfo->matchInfo.pList); - for (int32_t i = 0; i < num; ++i) { - SColMatchItem* pColMatchInfo = taosArrayGet(pTableScanInfo->matchInfo.pList, i); - if (!pColMatchInfo->needOutput) { - continue; - } - - pBlock->pBlockAgg[pColMatchInfo->dstSlotId] = pColAgg[i]; - } -#endif - return true; } From f54f6201be005cb489910326ab2821d88523dd90 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 14:07:22 +0800 Subject: [PATCH 29/53] fix(query): fix coverity issues. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 2 -- source/libs/executor/src/scanoperator.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index d69b8da667..4f9e419f78 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3777,9 +3777,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; pReader->status.pTableMap = createDataBlockScanInfo(p, &pReader->blockInfoBuf, pTableList, numOfTables); if (pReader->status.pTableMap == NULL) { - tsdbReaderClose(p); *ppReader = NULL; - code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b23f5c4b6e..0d7098a8ea 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2182,6 +2182,7 @@ static void destroyStreamScanOperatorInfo(void* param) { SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond, SExecTaskInfo* pTaskInfo) { + SArray* pColIds = NULL; SStreamScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); @@ -2204,7 +2205,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys } int32_t numOfOutput = taosArrayGetSize(pInfo->matchInfo.pList); - SArray* pColIds = taosArrayInit(numOfOutput, sizeof(int16_t)); + pColIds = taosArrayInit(numOfOutput, sizeof(int16_t)); for (int32_t i = 0; i < numOfOutput; ++i) { SColMatchItem* id = taosArrayGet(pInfo->matchInfo.pList, i); @@ -2300,6 +2301,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys memcpy(&pTaskInfo->streamInfo.tableCond, &pTSInfo->base.cond, sizeof(SQueryTableDataCond)); } else { taosArrayDestroy(pColIds); + pColIds = NULL; } // create the pseduo columns info From 8a4fb244fbbb2f7fd47bd6513b8fc1ccd714b239 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 14:20:05 +0800 Subject: [PATCH 30/53] fix(query): fix coverity issues. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 11 ++++++++++- source/libs/executor/src/sortoperator.c | 15 ++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 4f9e419f78..a18533cf56 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -1013,11 +1013,20 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn // pDumpInfo->rowIndex = 0; } else if (!asc && pReader->window.ekey >= pBlock->maxKey.ts) { // pDumpInfo->rowIndex = pBlock->nRow - 1; - } else { + } else { // find the appropriate the start position in current block, and set it to be the current rowIndex int32_t pos = asc ? pBlock->nRow - 1 : 0; int32_t order = asc ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; int64_t key = asc ? pReader->window.skey : pReader->window.ekey; pDumpInfo->rowIndex = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, order); + + if (pDumpInfo->rowIndex < 0) { + tsdbError( + "%p failed to locate the start position in current block, global index:%d, table index:%d, brange:%" PRId64 + "-%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 " %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->minVer, + pBlock->maxVer, pReader->idStr); + return TSDB_CODE_INVALID_PARA; + } } } diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index ee7f88e813..f5dc6cc623 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -46,13 +46,15 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* pOperator->pTaskInfo = pTaskInfo; SDataBlockDescNode* pDescNode = pSortNode->node.pOutputDataBlockDesc; - int32_t numOfCols = 0; - SSDataBlock* pResBlock = createDataBlockFromDescNode(pDescNode); - SExprInfo* pExprInfo = createExprInfo(pSortNode->pExprs, NULL, &numOfCols); + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pSortNode->pExprs, NULL, &numOfCols); int32_t numOfOutputCols = 0; int32_t code = extractColMatchInfo(pSortNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, &pInfo->matchInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } pOperator->exprSupp.pCtx = createSqlFunctionCtx(pExprInfo, numOfCols, &pOperator->exprSupp.rowEntryInfoOffset); initResultSizeInfo(&pOperator->resultInfo, 1024); @@ -61,7 +63,7 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* goto _error; } - pInfo->binfo.pRes = pResBlock; + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); pInfo->pSortInfo = createSortInfo(pSortNode->pSortKeys); initLimitInfo(pSortNode->node.pLimit, pSortNode->node.pSlimit, &pInfo->limitInfo); @@ -86,7 +88,10 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* _error: pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; - taosMemoryFree(pInfo); + if (pInfo != NULL) { + destroySortOperatorInfo(pInfo); + } + taosMemoryFree(pOperator); return NULL; } From 668a26c1b0bd83f421af0f2b0f4a60db638abd04 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 26 Dec 2022 15:03:18 +0800 Subject: [PATCH 31/53] enh: handle error while transfer snapshot --- source/libs/sync/inc/syncInt.h | 2 +- source/libs/sync/inc/syncPipeline.h | 2 +- source/libs/sync/inc/syncSnapshot.h | 6 +- source/libs/sync/src/syncMain.c | 90 +++++++------- source/libs/sync/src/syncPipeline.c | 5 +- source/libs/sync/src/syncSnapshot.c | 179 ++++++++++++---------------- 6 files changed, 130 insertions(+), 154 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 6af60af43d..b5227152df 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -238,7 +238,7 @@ int32_t syncNodeStopPingTimer(SSyncNode* pSyncNode); int32_t syncNodeStartElectTimer(SSyncNode* pSyncNode, int32_t ms); int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode); int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms); -int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode); +void syncNodeResetElectTimer(SSyncNode* pSyncNode); int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode); int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode); int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index 8c7edf85ff..9188be2b42 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -61,7 +61,7 @@ typedef struct SSyncLogBuffer { // SSyncLogRepMgr SSyncLogReplMgr* syncLogReplMgrCreate(); void syncLogReplMgrDestroy(SSyncLogReplMgr* pMgr); -int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr); +void syncLogReplMgrReset(SSyncLogReplMgr* pMgr); int32_t syncNodeLogReplMgrInit(SSyncNode* pNode); void syncNodeLogReplMgrDestroy(SSyncNode* pNode); diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 2b6e14a457..974a8f968e 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -56,7 +56,7 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI void snapshotSenderDestroy(SSyncSnapshotSender *pSender); bool snapshotSenderIsStart(SSyncSnapshotSender *pSender); int32_t snapshotSenderStart(SSyncSnapshotSender *pSender); -int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish); +void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish); int32_t snapshotSend(SSyncSnapshotSender *pSender); int32_t snapshotReSend(SSyncSnapshotSender *pSender); @@ -79,8 +79,8 @@ typedef struct SSyncSnapshotReceiver { SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId); void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver); -int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg); -int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg); +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver); bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index f1aa9312c6..152e16bd2e 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -200,12 +200,15 @@ int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg) { code = syncNodeOnLocalCmd(pSyncNode, pMsg); break; default: - sError("vgId:%d, failed to process msg:%p since invalid type:%s", pSyncNode->vgId, pMsg, - TMSG_INFO(pMsg->msgType)); + terrno = TSDB_CODE_MSG_NOT_PROCESSED; code = -1; } syncNodeRelease(pSyncNode); + if (code != 0) { + sDebug("vgId:%d, failed to process sync msg:%p type:%s since 0x%x", pSyncNode->vgId, pMsg, TMSG_INFO(pMsg->msgType), + terrno); + } return code; } @@ -228,8 +231,7 @@ int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq) { syncNodeRelease(pNode); if (ret == 1) { - sInfo("send timeout response, seq:%" PRId64 " handle:%p ahandle:%p", seq, rpcMsg.info.handle, - rpcMsg.info.ahandle); + sInfo("send timeout response, seq:%" PRId64 " handle:%p ahandle:%p", seq, rpcMsg.info.handle, rpcMsg.info.ahandle); rpcSendResponse(&rpcMsg); return 0; } else { @@ -1084,13 +1086,17 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { // snapshot senders for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i); - // ASSERT(pSender != NULL); - (pSyncNode->senders)[i] = pSender; - sSDebug(pSender, "snapshot sender create new while open, data:%p", pSender); + if (pSender == NULL) return NULL; + + pSyncNode->senders[i] = pSender; + sSDebug(pSender, "snapshot sender create while open sync node, data:%p", pSender); } // snapshot receivers pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, EMPTY_RAFT_ID); + if (pSyncNode->pNewNodeReceiver == NULL) return NULL; + sRDebug(pSyncNode->pNewNodeReceiver, "snapshot receiver create while open sync node, data:%p", + pSyncNode->pNewNodeReceiver); // is config changing pSyncNode->changing = false; @@ -1131,10 +1137,8 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->hbrSlowNum = 0; pSyncNode->tmrRoutineNum = 0; - sNInfo(pSyncNode, "sync open, node:%p", pSyncNode); - sTrace("vgId:%d, tsElectInterval:%d, tsHeartbeatInterval:%d, tsHeartbeatTimeout:%d", pSyncNode->vgId, tsElectInterval, - tsHeartbeatInterval, tsHeartbeatTimeout); - + sNInfo(pSyncNode, "sync open, node:%p electInterval:%d heartbeatInterval:%d heartbeatTimeout:%d", pSyncNode, + tsElectInterval, tsHeartbeatInterval, tsHeartbeatTimeout); return pSyncNode; _error: @@ -1251,6 +1255,8 @@ void syncNodePreClose(SSyncNode* pSyncNode) { snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); } + sDebug("vgId:%d, snapshot receiver destroy while preclose sync node, data:%p", pSyncNode->vgId, + pSyncNode->pNewNodeReceiver); snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver); pSyncNode->pNewNodeReceiver = NULL; } @@ -1295,15 +1301,15 @@ void syncNodeClose(SSyncNode* pSyncNode) { syncNodeStopHeartbeatTimer(pSyncNode); for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - if ((pSyncNode->senders)[i] != NULL) { - sSTrace((pSyncNode->senders)[i], "snapshot sender destroy while close, data:%p", (pSyncNode->senders)[i]); + if (pSyncNode->senders[i] != NULL) { + sDebug("vgId:%d, snapshot sender destroy while close, data:%p", pSyncNode->vgId, pSyncNode->senders[i]); - if (snapshotSenderIsStart((pSyncNode->senders)[i])) { - snapshotSenderStop((pSyncNode->senders)[i], false); + if (snapshotSenderIsStart(pSyncNode->senders[i])) { + snapshotSenderStop(pSyncNode->senders[i], false); } - snapshotSenderDestroy((pSyncNode->senders)[i]); - (pSyncNode->senders)[i] = NULL; + snapshotSenderDestroy(pSyncNode->senders[i]); + pSyncNode->senders[i] = NULL; } } @@ -1312,6 +1318,7 @@ void syncNodeClose(SSyncNode* pSyncNode) { snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); } + sDebug("vgId:%d, snapshot receiver destroy while close, data:%p", pSyncNode->vgId, pSyncNode->pNewNodeReceiver); snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver); pSyncNode->pNewNodeReceiver = NULL; } @@ -1382,8 +1389,7 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms) { return ret; } -int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) { - int32_t ret = 0; +void syncNodeResetElectTimer(SSyncNode* pSyncNode) { int32_t electMS; if (pSyncNode->pRaftCfg->isStandBy) { @@ -1391,11 +1397,11 @@ int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) { } else { electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); } - ret = syncNodeRestartElectTimer(pSyncNode, electMS); + + (void)syncNodeRestartElectTimer(pSyncNode, electMS); sNTrace(pSyncNode, "reset elect timer, min:%d, max:%d, ms:%d", pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine, electMS); - return ret; } static int32_t syncNodeDoStartHeartbeatTimer(SSyncNode* pSyncNode) { @@ -1455,23 +1461,20 @@ int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) { return 0; } -// utils -------------- int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg) { SEpSet epSet; syncUtilRaftId2EpSet(destRaftId, &epSet); - if (pSyncNode->syncSendMSg != NULL) { - // htonl - syncUtilMsgHtoN(pMsg->pCont); + if (pSyncNode->syncSendMSg != NULL) { + syncUtilMsgHtoN(pMsg->pCont); pMsg->info.noResp = 1; - pSyncNode->syncSendMSg(&epSet, pMsg); + return pSyncNode->syncSendMSg(&epSet, pMsg); } else { sError("vgId:%d, sync send msg by id error, fp-send-msg is null", pSyncNode->vgId); rpcFreeCont(pMsg->pCont); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } - - return 0; } int32_t syncNodeSendMsgByInfo(const SNodeInfo* nodeInfo, SSyncNode* pSyncNode, SRpcMsg* pMsg) { @@ -1586,7 +1589,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId)); SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA]; for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - oldSenders[i] = (pSyncNode->senders)[i]; + oldSenders[i] = pSyncNode->senders[i]; sSTrace(oldSenders[i], "snapshot sender save old"); } @@ -1625,7 +1628,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde // clear new for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - (pSyncNode->senders)[i] = NULL; + pSyncNode->senders[i] = NULL; } // reset new @@ -1640,16 +1643,16 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde sNTrace(pSyncNode, "snapshot sender reset for: %" PRId64 ", newIndex:%d, %s:%d, %p", (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j]); - (pSyncNode->senders)[i] = oldSenders[j]; + pSyncNode->senders[i] = oldSenders[j]; oldSenders[j] = NULL; reset = true; // reset replicaIndex - int32_t oldreplicaIndex = (pSyncNode->senders)[i]->replicaIndex; - (pSyncNode->senders)[i]->replicaIndex = i; + int32_t oldreplicaIndex = pSyncNode->senders[i]->replicaIndex; + pSyncNode->senders[i]->replicaIndex = i; sNTrace(pSyncNode, "snapshot sender udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d", oldreplicaIndex, - i, host, port, (pSyncNode->senders)[i], reset); + i, host, port, pSyncNode->senders[i], reset); break; } @@ -1658,18 +1661,23 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde // create new for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - if ((pSyncNode->senders)[i] == NULL) { - (pSyncNode->senders)[i] = snapshotSenderCreate(pSyncNode, i); - sSTrace((pSyncNode->senders)[i], "snapshot sender create new while reconfig, data:%p", (pSyncNode->senders)[i]); + if (pSyncNode->senders[i] == NULL) { + pSyncNode->senders[i] = snapshotSenderCreate(pSyncNode, i); + if (pSyncNode->senders[i] == NULL) { + // will be created later while send snapshot + sSError(pSyncNode->senders[i], "snapshot sender create failed while reconfig"); + } else { + sSDebug(pSyncNode->senders[i], "snapshot sender create while reconfig, data:%p", pSyncNode->senders[i]); + } } else { - sSTrace((pSyncNode->senders)[i], "snapshot sender already exist, data:%p", (pSyncNode->senders)[i]); + sSDebug(pSyncNode->senders[i], "snapshot sender already exist, data:%p", pSyncNode->senders[i]); } } // free old for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { if (oldSenders[i] != NULL) { - sNTrace(pSyncNode, "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i); + sSDebug(oldSenders[i], "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i); snapshotSenderDestroy(oldSenders[i]); oldSenders[i] = NULL; } @@ -1844,8 +1852,8 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId)); if (pMySender != NULL) { for (int32_t i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) { - if ((pSyncNode->senders)[i]->privateTerm > pMySender->privateTerm) { - pMySender->privateTerm = (pSyncNode->senders)[i]->privateTerm; + if (pSyncNode->senders[i]->privateTerm > pMySender->privateTerm) { + pMySender->privateTerm = pSyncNode->senders[i]->privateTerm; } } (pMySender->privateTerm) += 100; diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 225de30755..f2c86cef19 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -566,7 +566,9 @@ _out: return ret; } -int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr) { +void syncLogReplMgrReset(SSyncLogReplMgr* pMgr) { + if (pMgr == NULL) return; + ASSERT(pMgr->startIndex >= 0); for (SyncIndex index = pMgr->startIndex; index < pMgr->endIndex; index++) { memset(&pMgr->states[index % pMgr->size], 0, sizeof(pMgr->states[0])); @@ -576,7 +578,6 @@ int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr) { pMgr->endIndex = 0; pMgr->restored = false; pMgr->retryBackoff = 0; - return 0; } int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 54c11a503b..f2da2fb2ce 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -54,7 +54,6 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { if (pSender == NULL) return; - sDebug("vgId:%d, snapshot sender destroy", pSender->pSyncNode->vgId); // free current block if (pSender->pCurrentBlock != NULL) { @@ -75,12 +74,6 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { - if (snapshotSenderIsStart(pSender)) { - sSError(pSender, "vgId:%d, snapshot sender is already start"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - pSender->start = true; pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; @@ -95,7 +88,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pSender->snapshot.lastApplyTerm = SYNC_TERM_INVALID; pSender->snapshot.lastConfigIndex = SYNC_INDEX_INVALID; - memset(&(pSender->lastConfig), 0, sizeof(pSender->lastConfig)); + memset(&pSender->lastConfig, 0, sizeof(pSender->lastConfig)); pSender->sendingMS = 0; pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; pSender->startTime = taosGetTimestampMs(); @@ -111,7 +104,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; - pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -122,7 +115,6 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pMsg->seq = SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT; // event log - sSDebug(pSender, "snapshot sender start"); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender start"); // send msg @@ -134,7 +126,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { return 0; } -int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { +void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { sSDebug(pSender, "snapshot sender stop, finish:%d reader:%p", finish, pSender->pReader); // update flag @@ -154,8 +146,6 @@ int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { pSender->pCurrentBlock = NULL; pSender->blockLen = 0; } - - return 0; } // when sender receive ack, call this function to send msg from seq @@ -177,8 +167,8 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { } if (pSender->blockLen > 0) { - sSDebug(pSender, "snapshot sender continue to read, blockLen:%d seq:%d", pSender->blockLen, pSender->seq); // has read data + sSDebug(pSender, "snapshot sender continue to read, blockLen:%d seq:%d", pSender->blockLen, pSender->seq); } else { // read finish, update seq to end pSender->seq = SYNC_SNAPSHOT_SEQ_END; @@ -194,7 +184,7 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; - pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -202,7 +192,6 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; - // pMsg->privateTerm = pSender->privateTerm; if (pSender->pCurrentBlock != NULL) { memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); @@ -210,10 +199,8 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { // event log if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) { - sSDebug(pSender, "snapshot sender finish, seq:%d", pSender->seq); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender finish"); } else { - sSDebug(pSender, "snapshot sender sending, seq:%d", pSender->seq); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender sending"); } @@ -238,7 +225,7 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; - pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -248,12 +235,10 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { pMsg->seq = pSender->seq; if (pSender->pCurrentBlock != NULL && pSender->blockLen > 0) { - // pMsg->privateTerm = pSender->privateTerm; memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); } // event log - sSDebug(pSender, "snapshot sender resend, seq:%d", pSender->seq); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender resend"); // send msg @@ -299,13 +284,10 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { if (pSender->finish && taosGetTimestampMs() - pSender->endTime < SNAPSHOT_WAIT_MS) { sSInfo(pSender, "snapshot sender start too frequently, ignore"); - return 1; + return 0; } - char host[64]; - uint16_t port; - syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - sSInfo(pSender, "snapshot sender start for peer:%s:%u", host, port); + sSInfo(pSender, "snapshot sender start"); int32_t code = snapshotSenderStart(pSender); if (code != 0) { @@ -338,13 +320,11 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from pReceiver->snapshot.lastApplyTerm = 0; pReceiver->snapshot.lastConfigIndex = SYNC_INDEX_INVALID; - sDebug("vgId:%d, snapshot receiver create", pSyncNode->vgId); return pReceiver; } void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { if (pReceiver == NULL) return; - sDebug("vgId:%d, snapshot receiver destroy", pReceiver->pSyncNode->vgId); // close writer if (pReceiver->pWriter != NULL) { @@ -368,7 +348,6 @@ void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) { // force close, abandon incomplete data if (pReceiver->pWriter != NULL) { - // event log int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false, &pReceiver->snapshot); if (ret != 0) { @@ -380,13 +359,7 @@ void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) { pReceiver->start = false; } -int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { - if (!snapshotReceiverIsStart(pReceiver)) { - sRError(pReceiver, "snapshot receiver is not start"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - +static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { if (pReceiver->pWriter != NULL) { sRError(pReceiver, "vgId:%d, snapshot receiver writer is not null"); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; @@ -416,10 +389,10 @@ int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapsh return 0; } -int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pPreMsg) { +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pPreMsg) { if (snapshotReceiverIsStart(pReceiver)) { sRInfo(pReceiver, "snapshot receiver has started"); - return 0; + return; } pReceiver->start = true; @@ -430,12 +403,11 @@ int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend // event log sRInfo(pReceiver, "snapshot receiver is start"); - return 0; } // just set start = false // FpSnapshotStopWrite should not be called, assert writer == NULL -int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { sRInfo(pReceiver, "snapshot receiver stop, not apply, writer:%p", pReceiver->pWriter); if (pReceiver->pWriter != NULL) { @@ -450,17 +422,10 @@ int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { } pReceiver->start = false; - return 0; } // when recv last snapshot block, apply data into snapshot static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { - if (pMsg->seq != SYNC_SNAPSHOT_SEQ_END) { - sRError(pReceiver, "snapshot receiver seq:%d is invalid", pMsg->seq); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - int32_t code = 0; if (pReceiver->pWriter != NULL) { // write data @@ -582,6 +547,7 @@ SyncIndex syncNodeGetSnapBeginIndex(SSyncNode *ths) { static int32_t syncNodeOnSnapshotPre(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; int64_t timeNow = taosGetTimestampMs(); + int32_t code = 0; if (snapshotReceiverIsStart(pReceiver)) { // already start @@ -593,14 +559,14 @@ static int32_t syncNodeOnSnapshotPre(SSyncNode *pSyncNode, SyncSnapshotSend *pMs sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " == msg startTime:%" PRId64 " send reply", pReceiver->startTime, pMsg->startTime); goto _SEND_REPLY; - } else { // ignore - sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore", - pReceiver->startTime, pMsg->startTime); - return 0; + sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore", + pReceiver->startTime, pMsg->startTime); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + code = terrno; + goto _SEND_REPLY; } - } else { // start new sRInfo(pReceiver, "snapshot receiver not start yet so start new one"); @@ -611,7 +577,8 @@ _START_RECEIVER: if (timeNow - pMsg->startTime > SNAPSHOT_MAX_CLOCK_SKEW_MS) { sRError(pReceiver, "snapshot receiver time skew too much, now:%" PRId64 " msg startTime:%" PRId64, timeNow, pMsg->startTime); - return -1; + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + code = terrno; } else { // waiting for clock match while (timeNow < pMsg->startTime) { @@ -647,7 +614,7 @@ _SEND_REPLY: pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pMsg->seq; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); // send msg @@ -657,26 +624,36 @@ _SEND_REPLY: return -1; } - return 0; + return code; } static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // condition 1 SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; + int32_t code = TSDB_CODE_SYN_INTERNAL_ERROR; if (!snapshotReceiverIsStart(pReceiver)) { - sRError(pReceiver, "snapshot receiver not start"); - return -1; + sRError(pReceiver, "snapshot receiver begin failed since not start"); + goto _SEND_REPLY; } if (pReceiver->startTime != pMsg->startTime) { - sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " not equal to msg startTime:%" PRId64, + sRError(pReceiver, "snapshot receiver begin failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, pReceiver->startTime, pMsg->startTime); - return -1; + goto _SEND_REPLY; } // start writer - snapshotReceiverStartWriter(pReceiver, pMsg); + if (snapshotReceiverStartWriter(pReceiver, pMsg) != 0) { + sRError(pReceiver, "snapshot receiver begin failed since start writer failed"); + goto _SEND_REPLY; + } + + code = 0; +_SEND_REPLY: + if (code != 0 && terrno != 0) { + code = terrno; + } // build msg SRpcMsg rpcMsg = {0}; @@ -693,7 +670,7 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg @@ -703,10 +680,10 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p return -1; } - return 0; + return code; } -static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { +static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // condition 4 // transfering SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; @@ -753,7 +730,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS return -1; } - return 0; + return code; } static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { @@ -790,7 +767,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg @@ -800,7 +777,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs return -1; } - return 0; + return code; } // receiver on message @@ -830,12 +807,14 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { // if already drop replica, do not process if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "not in my config"); - return 0; + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; } if (pMsg->term < pSyncNode->pRaftStore->currentTerm) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject since small term"); - return 0; + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; } if (pMsg->term > pSyncNode->pRaftStore->currentTerm) { @@ -844,20 +823,21 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncNodeResetElectTimer(pSyncNode); // state, term, seq/ack + int32_t code = 0; if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot"); - syncNodeOnSnapshotPre(pSyncNode, pMsg); + code = syncNodeOnSnapshotPre(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq begin"); - syncNodeOnSnapshotBegin(pSyncNode, pMsg); + code = syncNodeOnSnapshotBegin(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq end"); - syncNodeOnSnapshotEnd(pSyncNode, pMsg); + code = syncNodeOnSnapshotEnd(pSyncNode, pMsg); if (syncLogBufferReInit(pSyncNode->pLogBuf, pSyncNode) != 0) { sRError(pReceiver, "failed to reinit log buffer since %s", terrstr()); - return -1; + code = -1; } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { // force close, no response @@ -865,35 +845,27 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { snapshotReceiverForceStop(pReceiver); } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq data"); - syncNodeOnSnapshotTransfering(pSyncNode, pMsg); + code = syncNodeOnSnapshotReceive(pSyncNode, pMsg); } else { // error log sRError(pReceiver, "snapshot receiver recv error seq:%d, my ack:%d", pMsg->seq, pReceiver->ack); - return -1; + code = -1; } } else { // error log sRError(pReceiver, "snapshot receiver term not equal"); - return -1; + code = -1; } } else { // error log sRError(pReceiver, "snapshot receiver not follower"); - return -1; + code = -1; } - return 0; + return code; } -int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { - // get sender - SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, &(pMsg->srcId)); - if (pSender == NULL) { - sNError(pSyncNode, "prepare snapshot error since sender is null"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - +static int32_t syncNodeOnSnapshotPreRsp(SSyncNode *pSyncNode, SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { SSnapshot snapshot = {0}; pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); @@ -915,7 +887,7 @@ int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) pSender->snapshot = snapshot; // start reader - int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &(pSender->snapshotParam), &(pSender->pReader)); + int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); if (code != 0) { sSError(pSender, "prepare snapshot failed since %s", terrstr()); return -1; @@ -936,7 +908,7 @@ int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) SyncSnapshotSend *pSendMsg = rpcMsg.pCont; pSendMsg->srcId = pSender->pSyncNode->myRaftId; - pSendMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pSendMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pSendMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pSendMsg->beginIndex = pSender->snapshotParam.start; pSendMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -966,8 +938,9 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { SyncSnapshotRsp *pMsg = pRpcMsg->pCont; // if already drop replica, do not process - if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId))) { + if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "maybe replica already dropped"); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } @@ -983,6 +956,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader"); sSError(pSender, "snapshot sender not leader"); + terrno = TSDB_CODE_SYN_NOT_LEADER; goto _ERROR; } @@ -990,6 +964,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver time not match"); sSError(pSender, "sender:%" PRId64 " receiver:%" PRId64 " time not match, code:0x%x", pMsg->startTime, pSender->startTime, pMsg->code); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; goto _ERROR; } @@ -997,20 +972,21 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match"); sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term, pSyncNode->pRaftStore->currentTerm); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; goto _ERROR; } if (pMsg->code != 0) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code"); sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code); + terrno = pMsg->code; goto _ERROR; } // prepare , send begin msg if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot"); - syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); - return 0; + return syncNodeOnSnapshotPreRsp(pSyncNode, pSender, pMsg); } if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { @@ -1030,10 +1006,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end"); snapshotSenderStop(pSender, true); SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); - syncLogReplMgrReset(pMgr); - } + syncLogReplMgrReset(pMgr); return 0; } @@ -1047,22 +1020,19 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { if (snapshotSend(pSender) != 0) { return -1; } - } else if (pMsg->ack == pSender->seq - 1) { // maybe resend syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend"); - snapshotReSend(pSender); - + if (snapshotReSend(pSender) != 0) { + return -1; + } } else { // error log syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error ack"); sSError(pSender, "snapshot sender receive error ack:%d, my seq:%d", pMsg->ack, pSender->seq); snapshotSenderStop(pSender, true); SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogReplMgrReset(pMgr); - } - + syncLogReplMgrReset(pMgr); return -1; } @@ -1071,10 +1041,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { _ERROR: snapshotSenderStop(pSender, true); SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); - syncLogReplMgrReset(pMgr); - } + syncLogReplMgrReset(pMgr); return -1; } From b381c42c9de6f0335117ebb7334a9828e7e785ff Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 26 Dec 2022 15:59:45 +0800 Subject: [PATCH 32/53] enh: adjust log while transfer snapshot --- source/dnode/vnode/src/meta/metaSnapshot.c | 4 ++-- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaSnapshot.c b/source/dnode/vnode/src/meta/metaSnapshot.c index 054e785980..63d044df6e 100644 --- a/source/dnode/vnode/src/meta/metaSnapshot.c +++ b/source/dnode/vnode/src/meta/metaSnapshot.c @@ -113,8 +113,8 @@ int32_t metaSnapRead(SMetaSnapReader* pReader, uint8_t** ppData) { pHdr->size = nData; memcpy(pHdr->data, pData, nData); - metaInfo("vgId:%d, vnode snapshot meta read data, version:%" PRId64 " uid:%" PRId64 " nData:%d", - TD_VID(pReader->pMeta->pVnode), key.version, key.uid, nData); + metaDebug("vgId:%d, vnode snapshot meta read data, version:%" PRId64 " uid:%" PRId64 " blockLen:%d", + TD_VID(pReader->pMeta->pVnode), key.version, key.uid, nData); _exit: return code; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 0362d4af2a..2fc06fba86 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -257,8 +257,8 @@ _exit: pReader->index++; *nData = sizeof(SSnapDataHdr) + pHdr->size; pHdr->index = pReader->index; - vInfo("vgId:%d, vnode snapshot read data,index:%" PRId64 " type:%d nData:%d ", TD_VID(pReader->pVnode), - pReader->index, pHdr->type, *nData); + vDebug("vgId:%d, vnode snapshot read data, index:%" PRId64 " type:%d blockLen:%d ", TD_VID(pReader->pVnode), + pReader->index, pHdr->type, *nData); } else { vInfo("vgId:%d, vnode snapshot read data end, index:%" PRId64, TD_VID(pReader->pVnode), pReader->index); } From 962fab4ff3c84fc65e2f150a227be5c5b5f3c288 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Mon, 26 Dec 2022 17:32:41 +0800 Subject: [PATCH 33/53] fix:remove assert --- source/libs/executor/src/timewindowoperator.c | 34 ++++--------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 8107cea4a0..e5dcc43797 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -907,7 +907,7 @@ static void removeDeleteResults(SHashObj* pUpdatedMap, SArray* pDelWins) { } bool isOverdue(TSKEY ekey, STimeWindowAggSupp* pTwSup) { - ASSERT(pTwSup->maxTs == INT64_MIN || pTwSup->maxTs > 0); + ASSERTS(pTwSup->maxTs == INT64_MIN || pTwSup->maxTs > 0, "maxts should greater than 0"); return pTwSup->maxTs != INT64_MIN && ekey < pTwSup->maxTs - pTwSup->waterMark; } @@ -1396,7 +1396,6 @@ static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SHashObj* resWins) { while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { void* key = tSimpleHashGetKey(pIte, &keyLen); uint64_t groupId = *(uint64_t*)key; - ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); SResultRowPosition* pPos = (SResultRowPosition*)pIte; int32_t code = saveWinResult(ts, pPos->pageId, pPos->offset, groupId, resWins); @@ -1547,7 +1546,7 @@ static void closeChildIntervalWindow(SOperatorInfo* pOperator, SArray* pChildren for (int32_t i = 0; i < size; i++) { SOperatorInfo* pChildOp = taosArrayGetP(pChildren, i); SStreamIntervalOperatorInfo* pChInfo = pChildOp->info; - ASSERT(pChInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE); + ASSERTS(pChInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE, "children trigger type should be at once"); pChInfo->twAggSup.maxTs = TMAX(pChInfo->twAggSup.maxTs, maxTs); closeStreamIntervalWindow(pChInfo->aggSup.pResultRowHashTable, &pChInfo->twAggSup, &pChInfo->interval, NULL, NULL, NULL, pOperator); @@ -1767,8 +1766,6 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPh .maxTs = INT64_MIN, }; - ASSERT(as.calTrigger != STREAM_TRIGGER_MAX_DELAY); - pInfo->win = pTaskInfo->window; pInfo->inputOrder = (pPhyNode->window.inputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; pInfo->resultTsOrder = (pPhyNode->window.outputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; @@ -2252,7 +2249,6 @@ static void doBuildPullDataBlock(SArray* array, int32_t* pIndex, SSDataBlock* pB return; } blockDataEnsureCapacity(pBlock, size - (*pIndex)); - ASSERT(3 <= taosArrayGetSize(pBlock->pDataBlock)); SColumnInfoData* pStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); SColumnInfoData* pGroupId = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); @@ -2359,7 +2355,6 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p SResultRow* pResult = NULL; int32_t forwardRows = 0; - ASSERT(pSDataBlock->pDataBlock != NULL); SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); tsCols = (int64_t*)pColDataInfo->pData; @@ -2482,7 +2477,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); if (pInfo->pPullDataRes->info.rows != 0) { // process the rest of the data - ASSERT(IS_FINAL_OP(pInfo)); printDataBlock(pInfo->pPullDataRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); return pInfo->pPullDataRes; } @@ -2543,7 +2537,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { pInfo->numOfDatapack++; printDataBlock(pBlock, IS_FINAL_OP(pInfo) ? "interval final recv" : "interval semi recv"); - ASSERT(pBlock->info.type != STREAM_INVERT); if (pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_PULL_DATA) { pInfo->binfo.pRes->info.type = pBlock->info.type; } else if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || @@ -2633,7 +2626,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); if (pInfo->pPullDataRes->info.rows != 0) { // process the rest of the data - ASSERT(IS_FINAL_OP(pInfo)); printDataBlock(pInfo->pPullDataRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); return pInfo->pPullDataRes; } @@ -2688,7 +2680,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, .deleteMarkSaved = 0, .calTriggerSaved = 0, }; - ASSERT(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY); + ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; initResultSizeInfo(&pOperator->resultInfo, 4096); @@ -2713,7 +2705,6 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, initStreamFunciton(pOperator->exprSupp.pCtx, pOperator->exprSupp.numOfExprs); - ASSERT(numOfCols > 0); initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); pInfo->pState = taosMemoryCalloc(1, sizeof(SStreamState)); @@ -2724,6 +2715,9 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->pChildren = NULL; if (numOfChild > 0) { pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); + if (!pInfo->pChildren) { + goto _error; + } for (int32_t i = 0; i < numOfChild; i++) { SOperatorInfo* pChildOp = createStreamFinalIntervalOperatorInfo(NULL, pPhyNode, pTaskInfo, 0); if (pChildOp) { @@ -2746,7 +2740,6 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, // semi interval operator does not catch result pInfo->isFinal = false; pOperator->name = "StreamSemiIntervalOperator"; - ASSERT(pInfo->aggSup.currentPageId == -1); } if (!IS_FINAL_OP(pInfo) || numOfChild == 0) { @@ -3162,15 +3155,6 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } } -void deleteWindow(SArray* pWinInfos, int32_t index, FDelete fp) { - ASSERT(index >= 0 && index < taosArrayGetSize(pWinInfos)); - if (fp) { - void* ptr = taosArrayGet(pWinInfos, index); - fp(ptr); - } - taosArrayRemove(pWinInfos, index); -} - static void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArray* result) { SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); TSKEY* startDatas = (TSKEY*)pStartTsCol->pData; @@ -3218,7 +3202,6 @@ static int32_t copyUpdateResult(SSHashObj* pStUpdated, SArray* pUpdated) { int32_t iter = 0; while ((pIte = tSimpleHashIterate(pStUpdated, pIte, &iter)) != NULL) { void* key = tSimpleHashGetKey(pIte, &keyLen); - ASSERT(keyLen == sizeof(SSessionKey)); taosArrayPush(pUpdated, key); } taosArraySort(pUpdated, sessionKeyCompareAsc); @@ -3279,7 +3262,6 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; int32_t numOfOutput = pSup->numOfExprs; int32_t numOfChildren = taosArrayGetSize(pInfo->pChildren); - ASSERT(pInfo->pChildren); for (int32_t i = 0; i < size; i++) { SSessionKey* pWinKey = taosArrayGet(pWinArray, i); @@ -3380,7 +3362,6 @@ static void copyDeleteWindowInfo(SArray* pResWins, SSHashObj* pStDeleted) { void initGroupResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayList) { pGroupResInfo->pRows = pArrayList; pGroupResInfo->index = 0; - ASSERT(pGroupResInfo->index <= getNumOfTotalRes(pGroupResInfo)); } void doBuildSessionResult(SOperatorInfo* pOperator, SStreamState* pState, SGroupResInfo* pGroupResInfo, @@ -4811,7 +4792,6 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys int32_t code = TSDB_CODE_SUCCESS; int32_t numOfCols = 0; SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); - ASSERT(numOfCols > 0); SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); SInterval interval = { @@ -4831,7 +4811,7 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys .deleteMark = getDeleteMark(pIntervalPhyNode), }; - ASSERT(twAggSupp.calTrigger != STREAM_TRIGGER_MAX_DELAY); + ASSERTS(twAggSupp.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); pOperator->pTaskInfo = pTaskInfo; pInfo->interval = interval; From e08797ac0a6274b393ac09f6765b98b73de70990 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 26 Dec 2022 19:43:37 +0800 Subject: [PATCH 34/53] fix: ctg lock/unlock logic --- source/libs/catalog/src/ctgAsync.c | 7 +++++-- source/libs/catalog/src/ctgCache.c | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index acd18fcca5..e9273c5b1f 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -471,16 +471,19 @@ int32_t ctgHandleForceUpdate(SCatalog* pCtg, int32_t taskNum, SCtgJob* pJob, con } int32_t ctgInitTask(SCtgJob* pJob, CTG_TASK_TYPE type, void* param, int32_t* taskId) { + int32_t code = 0; int32_t tid = atomic_fetch_add_32(&pJob->taskIdx, 1); CTG_LOCK(CTG_WRITE, &pJob->taskLock); - CTG_ERR_RET((*gCtgAsyncFps[type].initFp)(pJob, tid, param)); - CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); + CTG_ERR_JRET((*gCtgAsyncFps[type].initFp)(pJob, tid, param)); if (taskId) { *taskId = tid; } +_return: + CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); + return TSDB_CODE_SUCCESS; } diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index fe83854a91..c266cc1df9 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -2500,6 +2500,7 @@ int32_t ctgGetTbMetasFromCache(SCatalog *pCtg, SRequestConnInfo *pConn, SCtgTbMe CTG_LOCK(CTG_READ, &pCache->metaLock); if (NULL == pCache->pMeta) { + CTG_UNLOCK(CTG_READ, &pCache->metaLock); ctgDebug("tb %s meta not in cache, dbFName:%s", pName->tname, dbFName); ctgAddFetch(&ctx->pFetchs, dbIdx, i, fetchIdx, baseResIdx + i, flag); taosArraySetSize(ctx->pResList, taosArrayGetSize(ctx->pResList) + 1); From 5e6230fac6ca40bde9c1a1f82d67d11caa69dc95 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 26 Dec 2022 19:46:39 +0800 Subject: [PATCH 35/53] fix: ctg lock/unlock logic --- source/libs/catalog/src/ctgAsync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index e9273c5b1f..b8590c9255 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -484,7 +484,7 @@ int32_t ctgInitTask(SCtgJob* pJob, CTG_TASK_TYPE type, void* param, int32_t* tas _return: CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); - return TSDB_CODE_SUCCESS; + return code; } int32_t ctgInitJob(SCatalog* pCtg, SRequestConnInfo* pConn, SCtgJob** job, const SCatalogReq* pReq, catalogCallback fp, From d8922990788c401eb6e9eaa282f5675a73ee81b6 Mon Sep 17 00:00:00 2001 From: Xuefeng Tan <1172915550@qq.com> Date: Mon, 26 Dec 2022 20:02:26 +0800 Subject: [PATCH 36/53] fix(taosAdapter): invalid pointer on stack (#19156) --- cmake/taosadapter_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taosadapter_CMakeLists.txt.in b/cmake/taosadapter_CMakeLists.txt.in index a47b3b0feb..31ca6b30fa 100644 --- a/cmake/taosadapter_CMakeLists.txt.in +++ b/cmake/taosadapter_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taosadapter ExternalProject_Add(taosadapter GIT_REPOSITORY https://github.com/taosdata/taosadapter.git - GIT_TAG f0c1753 + GIT_TAG 5662a6d SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From 794fb5d1b516800e5b6bde26367c851fcc0057b9 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 26 Dec 2022 20:40:33 +0800 Subject: [PATCH 37/53] fix: handle error if sync buffer is full --- include/util/taoserror.h | 1 + source/dnode/vnode/src/vnd/vnodeSync.c | 4 ++-- source/libs/sync/inc/syncPipeline.h | 2 ++ source/libs/sync/src/syncMain.c | 10 +++++++--- source/libs/sync/src/syncPipeline.c | 23 +++++++++++++---------- source/util/src/terror.c | 1 + 6 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 3823252de6..b315432be1 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -520,6 +520,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SYN_BATCH_ERROR TAOS_DEF_ERROR_CODE(0, 0x0913) #define TSDB_CODE_SYN_RESTORING TAOS_DEF_ERROR_CODE(0, 0x0914) #define TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG TAOS_DEF_ERROR_CODE(0, 0x0915) // internal +#define TSDB_CODE_SYN_BUFFER_FULL TAOS_DEF_ERROR_CODE(0, 0x0916) // #define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF) // tq diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 0437703c92..5697487743 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -391,9 +391,9 @@ static int32_t vnodeSyncApplyMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, const SFsm const STraceId *trace = &pMsg->info.traceId; vGTrace("vgId:%d, commit-cb is excuted, fsm:%p, index:%" PRId64 ", term:%" PRIu64 ", msg-index:%" PRId64 - ", weak:%d, code:%d, state:%d %s, type:%s", + ", weak:%d, code:%d, state:%d %s, type:%s code:0x%x", pVnode->config.vgId, pFsm, pMeta->index, pMeta->term, pMsg->info.conn.applyIndex, pMeta->isWeak, pMeta->code, - pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType)); + pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType), pMsg->code); return tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, pMsg); } diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index 9188be2b42..a0a0691694 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -109,6 +109,8 @@ SSyncRaftEntry* syncLogBufferGetOneEntry(SSyncLogBuffer* pBuf, SSyncNode* pNode, int32_t syncLogBufferValidate(SSyncLogBuffer* pBuf); int32_t syncLogBufferRollback(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncIndex toIndex); +int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry, + int32_t applyCode); #ifdef __cplusplus } #endif diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 152e16bd2e..636890b5aa 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2386,7 +2386,11 @@ int32_t syncCacheEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, LRUHand int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) { // append to log buffer if (syncLogBufferAppend(ths->pLogBuf, ths, pEntry) < 0) { - sError("vgId:%d, failed to enqueue sync log buffer. index:%" PRId64 "", ths->vgId, pEntry->index); + sError("vgId:%d, failed to enqueue sync log buffer, index:%" PRId64, ths->vgId, pEntry->index); + terrno = TSDB_CODE_SYN_BUFFER_FULL; + (void)syncLogFsmExecute(ths, ths->pFsm, ths->state, ths->pRaftStore->currentTerm, pEntry, + TSDB_CODE_SYN_BUFFER_FULL); + syncEntryDestroy(pEntry); return -1; } @@ -2685,8 +2689,8 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn } int32_t code = syncNodeAppend(ths, pEntry); - if (code < 0 && ths->vgId != 1 && vnodeIsMsgBlock(pEntry->originalRpcType)) { - ASSERTS(false, "failed to append blocking msg"); + if (code < 0) { + sNError(ths, "failed to append blocking msg"); } return code; } diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index f2c86cef19..ef37600e98 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -26,6 +26,11 @@ #include "syncSnapshot.h" #include "syncUtil.h" +static bool syncIsMsgBlock(tmsg_t type) { + return (type == TDMT_VND_CREATE_TABLE) || (type == TDMT_VND_ALTER_TABLE) || (type == TDMT_VND_DROP_TABLE) || + (type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM); +} + int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf) { taosThreadMutexLock(&pBuf->mutex); int64_t index = pBuf->endIndex; @@ -441,26 +446,25 @@ _out: return matchIndex; } -int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry) { - ASSERTS(pFsm->FpCommitCb != NULL, "No commit cb registered for the FSM"); - +int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry, + int32_t applyCode) { if ((pNode->replicaNum == 1) && pNode->restoreFinish && pNode->vgId != 1) { return 0; } - if (pNode->vgId != 1 && vnodeIsMsgBlock(pEntry->originalRpcType)) { - sTrace("vgId:%d, blocking msg ready to execute. index:%" PRId64 ", term: %" PRId64 ", type: %s", pNode->vgId, - pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType)); + if (pNode->vgId != 1 && syncIsMsgBlock(pEntry->originalRpcType)) { + sTrace("vgId:%d, blocking msg ready to execute, index:%" PRId64 ", term:%" PRId64 ", type:%s code:0x%x", + pNode->vgId, pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType), applyCode); } - SRpcMsg rpcMsg = {0}; + SRpcMsg rpcMsg = {.code = applyCode}; syncEntry2OriginalRpc(pEntry, &rpcMsg); SFsmCbMeta cbMeta = {0}; cbMeta.index = pEntry->index; cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(pNode, pEntry->index); cbMeta.isWeak = pEntry->isWeak; - cbMeta.code = 0; + cbMeta.code = applyCode; cbMeta.state = role; cbMeta.seqNum = pEntry->seqNum; cbMeta.term = pEntry->term; @@ -469,7 +473,6 @@ int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, Syn (void)syncRespMgrGetAndDel(pNode->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); int32_t code = pFsm->FpCommitCb(pFsm, &rpcMsg, &cbMeta); - ASSERT(rpcMsg.pCont == NULL); return code; } @@ -520,7 +523,7 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm pEntry->term, TMSG_INFO(pEntry->originalRpcType)); } - if (syncLogFsmExecute(pNode, pFsm, role, term, pEntry) != 0) { + if (syncLogFsmExecute(pNode, pFsm, role, term, pEntry, 0) != 0) { sError("vgId:%d, failed to execute sync log entry. index:%" PRId64 ", term:%" PRId64 ", role: %d, current term: %" PRId64, vgId, pEntry->index, pEntry->term, role, term); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 2d7121e2ae..ff61c7cdc4 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -407,6 +407,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_STANDBY_NOT_READY, "Sync not ready for st TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BATCH_ERROR, "Sync batch error") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RESTORING, "Sync is restoring") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG, "Sync invalid snapshot msg") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BUFFER_FULL, "Sync buffer is full") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INTERNAL_ERROR, "Sync internal error") //tq From a7bfeb5fbe1492bb9c486601cedd980ba16044ef Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 26 Dec 2022 23:10:46 +0800 Subject: [PATCH 38/53] fix: check existence of SMsgHead in SSyncRaftEntry data or SRpcMsg pCont --- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 8 ++++++++ source/libs/sync/src/syncMain.c | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 7e3915f3d1..faa94a335d 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -233,6 +233,14 @@ int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { + if (pRpc->contLen < sizeof(SMsgHead)) { + dError("invalid rpc msg since no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType), + pRpc->contLen); + rpcFreeCont(pRpc->pCont); + pRpc->pCont = NULL; + return -1; + } + SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg), RPC_QITEM, pRpc->contLen); if (pMsg == NULL) { rpcFreeCont(pRpc->pCont); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 152e16bd2e..b071aec2f4 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2384,6 +2384,13 @@ int32_t syncCacheEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, LRUHand } int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) { + if (pEntry->dataLen < sizeof(SMsgHead)) { + sError("vgId:%d, cannot append an invalid client request with no msg head. type:%s, dataLen:%d", ths->vgId, + TMSG_INFO(pEntry->originalRpcType), pEntry->dataLen); + syncEntryDestroy(pEntry); + return -1; + } + // append to log buffer if (syncLogBufferAppend(ths->pLogBuf, ths, pEntry) < 0) { sError("vgId:%d, failed to enqueue sync log buffer. index:%" PRId64 "", ths->vgId, pEntry->index); @@ -2679,16 +2686,21 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index); } + if (pEntry == NULL) { + sError("vgId:%d, failed to process client request since %s.", ths->vgId, terrstr()); + return -1; + } + if (ths->state == TAOS_SYNC_STATE_LEADER) { if (pRetIndex) { (*pRetIndex) = index; } int32_t code = syncNodeAppend(ths, pEntry); - if (code < 0 && ths->vgId != 1 && vnodeIsMsgBlock(pEntry->originalRpcType)) { - ASSERTS(false, "failed to append blocking msg"); - } return code; + } else { + syncEntryDestroy(pEntry); + pEntry = NULL; } return -1; From 58b5d2e0aafc4a0cfbc86c2b917cb4d630beba64 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 27 Dec 2022 11:02:07 +0800 Subject: [PATCH 39/53] fix(query): fix race condition. --- source/dnode/vnode/src/meta/metaCache.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 513ee5a1c2..d68658b0d9 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -57,7 +57,6 @@ struct SMetaCache { TdThreadMutex lock; SHashObj* pTableEntry; SLRUCache* pUidResCache; - uint64_t keyBuf[3]; } sTagFilterResCache; }; @@ -429,20 +428,20 @@ int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pK bool* acquireRes) { // generate the composed key for LRU cache SLRUCache* pCache = pMeta->pCache->sTagFilterResCache.pUidResCache; - uint64_t* pBuf = pMeta->pCache->sTagFilterResCache.keyBuf; SHashObj* pTableMap = pMeta->pCache->sTagFilterResCache.pTableEntry; TdThreadMutex* pLock = &pMeta->pCache->sTagFilterResCache.lock; + uint64_t buf[3] = {0}; uint32_t times = 0; *acquireRes = 0; - pBuf[0] = suid; - memcpy(&pBuf[1], pKey, keyLen); + buf[0] = suid; + memcpy(&buf[1], pKey, keyLen); taosThreadMutexLock(pLock); int32_t len = keyLen + sizeof(uint64_t); - LRUHandle* pHandle = taosLRUCacheLookup(pCache, pBuf, len); + LRUHandle* pHandle = taosLRUCacheLookup(pCache, buf, len); if (pHandle == NULL) { taosThreadMutexUnlock(pLock); return TSDB_CODE_SUCCESS; @@ -476,10 +475,10 @@ int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pK SListNode* pNode = NULL; while ((pNode = tdListNext(&iter)) != NULL) { - memcpy(&pBuf[1], pNode->data, keyLen); + memcpy(&buf[1], pNode->data, keyLen); // check whether it is existed in LRU cache, and remove it from linked list if not. - LRUHandle* pRes = taosLRUCacheLookup(pCache, pBuf, len); + LRUHandle* pRes = taosLRUCacheLookup(pCache, buf, len); if (pRes == NULL) { // remove the item in the linked list taosArrayPush(pInvalidRes, &pNode); } else { @@ -547,14 +546,14 @@ int32_t metaUidFilterCachePut(SMeta* pMeta, uint64_t suid, const void* pKey, int tdListAppend(&(*pEntry)->list, pKey); } - uint64_t* pBuf = pMeta->pCache->sTagFilterResCache.keyBuf; - pBuf[0] = suid; + uint64_t buf[3] = {0}; + buf[0] = suid; - memcpy(&pBuf[1], pKey, keyLen); + memcpy(&buf[1], pKey, keyLen); ASSERT(sizeof(uint64_t) + keyLen == 24); // add to cache. - taosLRUCacheInsert(pCache, pBuf, sizeof(uint64_t) + keyLen, pPayload, payloadLen, freePayload, NULL, + taosLRUCacheInsert(pCache, buf, sizeof(uint64_t) + keyLen, pPayload, payloadLen, freePayload, NULL, TAOS_LRU_PRIORITY_LOW); taosThreadMutexUnlock(pLock); From 5826e40aa7f228c31187c839456022fc483e2f44 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 11:19:36 +0800 Subject: [PATCH 40/53] enh: drop stream eagerly --- source/libs/stream/src/streamExec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 20608a6cf3..8e746c6738 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -48,6 +48,10 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* // exec while (1) { + if (pTask->taskStatus == TASK_STATUS__DROPPING) { + return 0; + } + SSDataBlock* output = NULL; uint64_t ts = 0; if ((code = qExecTask(exec, &output, &ts)) < 0) { From bb9f2651f1a8b34ec843e10ebb4fd5f8929cc458 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Tue, 27 Dec 2022 13:03:26 +0800 Subject: [PATCH 41/53] chore: add comp postfix for taos-tools (#19169) --- packaging/tools/makepkg.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index f30a8a637e..0ee548242f 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -348,7 +348,8 @@ cd ${release_dir} # install_dir has been distinguishes cluster from edege, so comments this code pkg_name=${install_dir}-${osType}-${cpuType} -taostools_pkg_name=${taostools_install_dir}-${osType}-${cpuType} +versionCompFirst=$(echo ${versionComp} | awk -F '.' '{print $1}') +taostools_pkg_name=${taostools_install_dir}-${osType}-${cpuType}-comp${versionCompFirst} # if [ "$verMode" == "cluster" ]; then # pkg_name=${install_dir}-${osType}-${cpuType} From 5ae6829a8379cfb1191697d381fb305d3343d01a Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 27 Dec 2022 13:28:28 +0800 Subject: [PATCH 42/53] fix: add sem free and init log --- source/libs/scheduler/src/schJob.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index d422f0e88f..6a8f81f8c7 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -668,6 +668,7 @@ void schFreeJobImpl(void *job) { taosMemoryFreeClear(pJob->userRes.execRes); taosMemoryFreeClear(pJob->fetchRes); taosMemoryFreeClear(pJob->sql); + tsem_destroy(&pJob->rspSem); taosMemoryFree(pJob); int32_t jobNum = atomic_sub_fetch_32(&schMgmt.jobNum, 1); @@ -748,7 +749,10 @@ int32_t schInitJob(int64_t *pJobId, SSchedulerReq *pReq) { SCH_ERR_JRET(TSDB_CODE_OUT_OF_MEMORY); } - tsem_init(&pJob->rspSem, 0, 0); + if (tsem_init(&pJob->rspSem, 0, 0)) { + SCH_JOB_ELOG("tsem_init failed, errno:%d", errno); + SCH_ERR_JRET(TSDB_CODE_OUT_OF_MEMORY); + } pJob->refId = taosAddRef(schMgmt.jobRef, pJob); if (pJob->refId < 0) { From b2cec0f1e9c3c4f69fbb156513f22170dea2835e Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 13:57:00 +0800 Subject: [PATCH 43/53] fix: skip msg --- source/dnode/vnode/src/tq/tq.c | 4 +++- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 045b497371..e366795fd3 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1004,8 +1004,10 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { int32_t len; tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code); if (code < 0) { - ASSERT(0); + tqDebug("tq encode stream check rsp error"); + return -1; } + void* buf = rpcMallocCont(sizeof(SMsgHead) + len); ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index fe9aad4a20..8d53579483 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -197,7 +197,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp if (!syncUtilUserCommit(pMsg->msgType)) goto _exit; - if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE) { + if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE || pMsg->msgType == TDMT_STREAM_TASK_CHECK_RSP) { if (tqCheckLogInWal(pVnode->pTq, version)) return 0; } From b7f6d152f86431da4f1a0841778a463783bbedd1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 14:14:18 +0800 Subject: [PATCH 44/53] fix: display the correct log information --- source/common/src/tglobal.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9caf0cc33e..ad89428d07 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1316,12 +1316,17 @@ void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal, if (pItem != NULL && (rewrite || pItem->i32 == 0)) { pItem->i32 = flagVal; } - *pFlagPtr = flagVal; + if (pFlagPtr != NULL) { + *pFlagPtr = flagVal; + } } void taosSetAllDebugFlag(int32_t flag, bool rewrite) { if (flag <= 0) return; + taosSetDebugFlag(NULL, "debugFlag", flag, rewrite); + taosSetDebugFlag(NULL, "simDebugFlag", flag, rewrite); + taosSetDebugFlag(NULL, "tmrDebugFlag", flag, rewrite); taosSetDebugFlag(&uDebugFlag, "uDebugFlag", flag, rewrite); taosSetDebugFlag(&rpcDebugFlag, "rpcDebugFlag", flag, rewrite); taosSetDebugFlag(&jniDebugFlag, "jniDebugFlag", flag, rewrite); @@ -1333,6 +1338,7 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&wDebugFlag, "wDebugFlag", flag, rewrite); taosSetDebugFlag(&sDebugFlag, "sDebugFlag", flag, rewrite); taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); + taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); taosSetDebugFlag(&tqDebugFlag, "tqDebugFlag", flag, rewrite); taosSetDebugFlag(&fsDebugFlag, "fsDebugFlag", flag, rewrite); taosSetDebugFlag(&udfDebugFlag, "udfDebugFlag", flag, rewrite); @@ -1340,6 +1346,5 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&idxDebugFlag, "idxDebugFlag", flag, rewrite); taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite); taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite); - taosSetDebugFlag(&metaDebugFlag, "tmrDebugFlag", flag, rewrite); uInfo("all debug flag are set to %d", flag); } From 861493010ea689b790defe795b376c619f2f00f1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 14:15:20 +0800 Subject: [PATCH 45/53] fix: display the correct log information --- source/common/src/tglobal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ad89428d07..d3fd625a91 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1338,7 +1338,6 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&wDebugFlag, "wDebugFlag", flag, rewrite); taosSetDebugFlag(&sDebugFlag, "sDebugFlag", flag, rewrite); taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); - taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); taosSetDebugFlag(&tqDebugFlag, "tqDebugFlag", flag, rewrite); taosSetDebugFlag(&fsDebugFlag, "fsDebugFlag", flag, rewrite); taosSetDebugFlag(&udfDebugFlag, "udfDebugFlag", flag, rewrite); From ab8efc828d4837fb5447355663772617e656669d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 14:23:09 +0800 Subject: [PATCH 46/53] enh: check existence of msg head at SRpcMsg pCont in vmPutMsgToQueue --- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index faa94a335d..5afb9a0512 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -140,6 +140,12 @@ static void vmProcessSyncQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOf static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtype) { const STraceId *trace = &pMsg->info.traceId; + if (pMsg->contLen < sizeof(SMsgHead)) { + dGError("invalid rpc msg with no msg head at pCont. pMsg:%p, type:%s, contLen:%d", pMsg, TMSG_INFO(pMsg->msgType), + pMsg->contLen); + return -1; + } + SMsgHead *pHead = pMsg->pCont; int32_t code = 0; @@ -234,7 +240,7 @@ int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { if (pRpc->contLen < sizeof(SMsgHead)) { - dError("invalid rpc msg since no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType), + dError("invalid rpc msg with no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType), pRpc->contLen); rpcFreeCont(pRpc->pCont); pRpc->pCont = NULL; From 39eb62d80d3fb20a37e9319b856f01a3770ae7c1 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 14:35:55 +0800 Subject: [PATCH 47/53] enh: set scan limit --- include/libs/executor/executor.h | 1 + source/libs/executor/inc/executorimpl.h | 128 +++++++++++++----------- source/libs/executor/src/executor.c | 4 + source/libs/executor/src/scanoperator.c | 44 +++++--- source/libs/stream/src/streamExec.c | 6 +- 5 files changed, 105 insertions(+), 78 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 412b4b4cf6..cfd5bd1ed7 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -213,6 +213,7 @@ int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver); int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver); int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); int32_t qStreamRestoreParam(qTaskInfo_t tinfo); +bool qStreamRecoverScanFinished(qTaskInfo_t tinfo); #ifdef __cplusplus } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 647da78a78..e3b4b57052 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -136,6 +136,7 @@ typedef struct { SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; int8_t recoverStep; + int8_t recoverScanFinished; SQueryTableDataCond tableCond; int64_t fillHistoryVer1; int64_t fillHistoryVer2; @@ -182,7 +183,7 @@ struct SExecTaskInfo { SSubplan* pSubplan; struct SOperatorInfo* pRoot; SLocalFetch localFetch; - SArray* pResultBlockList;// result block list + SArray* pResultBlockList; // result block list STaskStopInfo stopInfo; }; @@ -199,7 +200,7 @@ typedef struct SOperatorFpSet { __optr_fn_t getNextFn; __optr_fn_t cleanupFn; // call this function to release the allocated resources ASAP __optr_close_fn_t closeFn; - __optr_reqBuf_fn_t reqBufFn; // total used buffer for blocking operator + __optr_reqBuf_fn_t reqBufFn; // total used buffer for blocking operator __optr_encode_fn_t encodeResultRow; __optr_decode_fn_t decodeResultRow; __optr_explain_fn_t getExplainFn; @@ -255,22 +256,22 @@ typedef struct SLimitInfo { } SLimitInfo; typedef struct SExchangeInfo { - SArray* pSources; - SArray* pSourceDataInfo; - tsem_t ready; - void* pTransporter; + SArray* pSources; + SArray* pSourceDataInfo; + tsem_t ready; + void* pTransporter; // SArray, result block list, used to keep the multi-block that // passed by downstream operator - SArray* pResultBlockList; - SArray* pRecycledBlocks;// build a pool for small data block to avoid to repeatly create and then destroy. - SSDataBlock* pDummyBlock; // dummy block, not keep data - bool seqLoadData; // sequential load data or not, false by default - int32_t current; + SArray* pResultBlockList; + SArray* pRecycledBlocks; // build a pool for small data block to avoid to repeatly create and then destroy. + SSDataBlock* pDummyBlock; // dummy block, not keep data + bool seqLoadData; // sequential load data or not, false by default + int32_t current; SLoadRemoteDataInfo loadInfo; uint64_t self; SLimitInfo limitInfo; - int64_t openedTs; // start exec time stamp, todo: move to SLoadRemoteDataInfo + int64_t openedTs; // start exec time stamp, todo: move to SLoadRemoteDataInfo } SExchangeInfo; typedef struct SScanInfo { @@ -305,9 +306,9 @@ typedef struct { } SAggOptrPushDownInfo; typedef struct STableMetaCacheInfo { - SLRUCache* pTableMetaEntryCache; // 100 by default - uint64_t metaFetch; - uint64_t cacheHit; + SLRUCache* pTableMetaEntryCache; // 100 by default + uint64_t metaFetch; + uint64_t cacheHit; } STableMetaCacheInfo; typedef struct STableScanBase { @@ -325,46 +326,46 @@ typedef struct STableScanBase { } STableScanBase; typedef struct STableScanInfo { - STableScanBase base; - SScanInfo scanInfo; - int32_t scanTimes; - SSDataBlock* pResBlock; - SSampleExecInfo sample; // sample execution info - int32_t currentGroupId; - int32_t currentTable; - int8_t scanMode; - int8_t assignBlockUid; - bool hasGroupByTag; + STableScanBase base; + SScanInfo scanInfo; + int32_t scanTimes; + SSDataBlock* pResBlock; + SSampleExecInfo sample; // sample execution info + int32_t currentGroupId; + int32_t currentTable; + int8_t scanMode; + int8_t assignBlockUid; + bool hasGroupByTag; } STableScanInfo; typedef struct STableMergeScanInfo { - int32_t tableStartIndex; - int32_t tableEndIndex; - bool hasGroupId; - uint64_t groupId; - SArray* queryConds; // array of queryTableDataCond - STableScanBase base; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort - SArray* pSortInfo; - SSortHandle* pSortHandle; - SSDataBlock* pSortInputBlock; - int64_t startTs; // sort start time - SArray* sortSourceParams; - SLimitInfo limitInfo; - int64_t numOfRows; - SScanInfo scanInfo; - SSDataBlock* pResBlock; - SSampleExecInfo sample; // sample execution info - SSortExecInfo sortExecInfo; + int32_t tableStartIndex; + int32_t tableEndIndex; + bool hasGroupId; + uint64_t groupId; + SArray* queryConds; // array of queryTableDataCond + STableScanBase base; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SArray* pSortInfo; + SSortHandle* pSortHandle; + SSDataBlock* pSortInputBlock; + int64_t startTs; // sort start time + SArray* sortSourceParams; + SLimitInfo limitInfo; + int64_t numOfRows; + SScanInfo scanInfo; + SSDataBlock* pResBlock; + SSampleExecInfo sample; // sample execution info + SSortExecInfo sortExecInfo; } STableMergeScanInfo; typedef struct STagScanInfo { - SColumnInfo* pCols; - SSDataBlock* pRes; - SColMatchInfo matchInfo; - int32_t curPos; - SReadHandle readHandle; + SColumnInfo* pCols; + SSDataBlock* pRes; + SColMatchInfo matchInfo; + int32_t curPos; + SReadHandle readHandle; } STagScanInfo; typedef enum EStreamScanMode { @@ -468,6 +469,11 @@ typedef struct SStreamScanInfo { SNodeList* pGroupTags; SNode* pTagCond; SNode* pTagIndexCond; + + // recover + int32_t blockRecoverContiCnt; + int32_t blockRecoverTotCnt; + } SStreamScanInfo; typedef struct { @@ -499,8 +505,8 @@ typedef struct STableCountScanOperatorInfo { STableCountScanSupp supp; - int32_t currGrpIdx; - SArray* stbUidList; // when group by db_name and/or stable_name + int32_t currGrpIdx; + SArray* stbUidList; // when group by db_name and/or stable_name } STableCountScanOperatorInfo; typedef struct SOptrBasicInfo { @@ -678,19 +684,19 @@ void setOperatorInfo(SOperatorInfo* pOperator, const char* name, int32 void destroyOperatorInfo(SOperatorInfo* pOperator); int32_t optrDefaultBufFn(SOperatorInfo* pOperator); -void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); -void cleanupBasicInfo(SOptrBasicInfo* pInfo); +void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); +void cleanupBasicInfo(SOptrBasicInfo* pInfo); int32_t initExprSupp(SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfExpr); void cleanupExprSupp(SExprSupp* pSup); -void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs); +void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs); int32_t initAggSup(SExprSupp* pSup, SAggSupporter* pAggSup, SExprInfo* pExprInfo, int32_t numOfCols, size_t keyBufSize, const char* pkey); void cleanupAggSup(SAggSupporter* pAggSup); -void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); +void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pBuf); @@ -803,10 +809,10 @@ void setInputDataBlock(SExprSupp* pExprSupp, SSDataBlock* pBlock, int32_t order, int32_t checkForQueryBuf(size_t numOfTables); -bool isTaskKilled(SExecTaskInfo* pTaskInfo); -void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); -void doDestroyTask(SExecTaskInfo* pTaskInfo); -void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); +bool isTaskKilled(SExecTaskInfo* pTaskInfo); +void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); +void doDestroyTask(SExecTaskInfo* pTaskInfo); +void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, char* sql, EOPTR_EXEC_MODEL model); @@ -828,8 +834,8 @@ bool isDeletedWindow(STimeWindow* pWin, uint64_t groupId, SAggSupporter* pSup); bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, SStreamState* pState, STimeWindowAggSupp* pTwSup); void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, uint64_t* pGp, void* pTbName); -uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); -void calBlockTbName(SStreamScanInfo* pInfo, SSDataBlock* pBlock); +uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); +void calBlockTbName(SStreamScanInfo* pInfo, SSDataBlock* pBlock); int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SExprSupp* pSup, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 75de012947..e5ff104d5c 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -936,6 +936,10 @@ int32_t qStreamRestoreParam(qTaskInfo_t tinfo) { } return 0; } +bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + return pTaskInfo->streamInfo.recoverScanFinished; +} void* qExtractReaderFromStreamScanner(void* scanner) { SStreamScanInfo* pInfo = scanner; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d074ceede8..79687a5ff8 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -768,8 +768,8 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) { tableListGetGroupList(pTaskInfo->pTableInfoList, pInfo->currentGroupId, &pList, &num); ASSERT(pInfo->base.dataReader == NULL); - int32_t code = tsdbReaderOpen(pInfo->base.readHandle.vnode, &pInfo->base.cond, pList, num, - pInfo->pResBlock, (STsdbReader**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo)); + int32_t code = tsdbReaderOpen(pInfo->base.readHandle.vnode, &pInfo->base.cond, pList, num, pInfo->pResBlock, + (STsdbReader**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { T_LONG_JMP(pTaskInfo->env, code); } @@ -986,8 +986,8 @@ static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbU SSDataBlock* pBlock = pTableScanInfo->pResBlock; STsdbReader* pReader = NULL; - int32_t code = tsdbReaderOpen(pTableScanInfo->base.readHandle.vnode, &cond, &tblInfo, 1, pBlock, (STsdbReader**)&pReader, - GET_TASKID(pTaskInfo)); + int32_t code = tsdbReaderOpen(pTableScanInfo->base.readHandle.vnode, &cond, &tblInfo, 1, pBlock, + (STsdbReader**)&pReader, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { terrno = code; T_LONG_JMP(pTaskInfo->env, code); @@ -995,7 +995,7 @@ static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbU } if (tsdbNextDataBlock(pReader)) { - /*SSDataBlock* p = */tsdbRetrieveDataBlock(pReader, NULL); + /*SSDataBlock* p = */ tsdbRetrieveDataBlock(pReader, NULL); doSetTagColumnData(&pTableScanInfo->base, pBlock, pTaskInfo, pBlock->info.rows); pBlock->info.id.groupId = getTableGroupId(pTaskInfo->pTableInfoList, pBlock->info.id.uid); } @@ -1224,7 +1224,7 @@ static int32_t generateIntervalScanRange(SStreamScanInfo* pInfo, SSDataBlock* pS SColumnInfoData* pSrcUidCol = taosArrayGet(pSrcBlock->pDataBlock, UID_COLUMN_INDEX); SColumnInfoData* pSrcGpCol = taosArrayGet(pSrcBlock->pDataBlock, GROUPID_COLUMN_INDEX); - uint64_t* srcUidData = (uint64_t*)pSrcUidCol->pData; + uint64_t* srcUidData = (uint64_t*)pSrcUidCol->pData; ASSERT(pSrcStartTsCol->info.type == TSDB_DATA_TYPE_TIMESTAMP); TSKEY* srcStartTsCol = (TSKEY*)pSrcStartTsCol->pData; TSKEY* srcEndTsCol = (TSKEY*)pSrcEndTsCol->pData; @@ -1753,11 +1753,18 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->scanTimes = 0; pTSInfo->currentGroupId = -1; pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN; + pTaskInfo->streamInfo.recoverScanFinished = false; } if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN) { + if (pInfo->blockRecoverContiCnt > 100) { + pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt; + pInfo->blockRecoverContiCnt = 0; + return NULL; + } SSDataBlock* pBlock = doTableScan(pInfo->pTableScanOp); if (pBlock != NULL) { + pInfo->blockRecoverContiCnt++; calBlockTbName(pInfo, pBlock); if (pInfo->pUpdateInfo) { TSKEY maxTs = updateInfoFillBlockData(pInfo->pUpdateInfo, pBlock, pInfo->primaryTsIndex); @@ -1775,6 +1782,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->base.cond.startVersion = -1; pTSInfo->base.cond.endVersion = -1; + pTaskInfo->streamInfo.recoverScanFinished = true; return NULL; } @@ -2285,7 +2293,8 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys if (pHandle->initTableReader) { pTSInfo->scanMode = TABLE_SCAN__TABLE_ORDER; pTSInfo->base.dataReader = NULL; - code = tsdbReaderOpen(pHandle->vnode, &pTSInfo->base.cond, pList, num, pTSInfo->pResBlock, &pTSInfo->base.dataReader, NULL); + code = tsdbReaderOpen(pHandle->vnode, &pTSInfo->base.cond, pList, num, pTSInfo->pResBlock, + &pTSInfo->base.dataReader, NULL); if (code != 0) { terrno = code; destroyTableScanOperatorInfo(pTableScanOp); @@ -2355,7 +2364,8 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pOperator->exprSupp.numOfExprs = taosArrayGetSize(pInfo->pRes->pDataBlock); __optr_fn_t nextFn = pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM ? doStreamScan : doQueueScan; - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL); return pOperator; @@ -2492,7 +2502,8 @@ SOperatorInfo* createTagScanOperatorInfo(SReadHandle* pReadHandle, STagScanPhysi initResultSizeInfo(&pOperator->resultInfo, 4096); blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doTagScan, NULL, destroyTagScanOperatorInfo, optrDefaultBufFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doTagScan, NULL, destroyTagScanOperatorInfo, optrDefaultBufFn, NULL); return pOperator; @@ -2513,11 +2524,12 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { SQueryTableDataCond* pQueryCond = taosArrayGet(pInfo->queryConds, readIdx); - int64_t st = taosGetTimestampUs(); - void* p = tableListGetInfo(pTaskInfo->pTableInfoList, readIdx + pInfo->tableStartIndex); + int64_t st = taosGetTimestampUs(); + void* p = tableListGetInfo(pTaskInfo->pTableInfoList, readIdx + pInfo->tableStartIndex); SReadHandle* pHandle = &pInfo->base.readHandle; - int32_t code = tsdbReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, &pInfo->base.dataReader, GET_TASKID(pTaskInfo)); + int32_t code = + tsdbReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, &pInfo->base.dataReader, GET_TASKID(pTaskInfo)); if (code != 0) { T_LONG_JMP(pTaskInfo->env, code); } @@ -2915,8 +2927,8 @@ static void buildVnodeGroupedNtbTableCount(STableCountScanOperatorInfo* SSDataBlock* pRes, char* dbName); static void buildVnodeFilteredTbCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, STableCountScanSupp* pSupp, SSDataBlock* pRes, char* dbName); -static void buildVnodeGroupedTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, - STableCountScanSupp* pSupp, SSDataBlock* pRes, int32_t vgId, char* dbName); +static void buildVnodeGroupedTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, + STableCountScanSupp* pSupp, SSDataBlock* pRes, int32_t vgId, char* dbName); static SSDataBlock* buildVnodeDbTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, STableCountScanSupp* pSupp, SSDataBlock* pRes); static void buildSysDbGroupedTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, @@ -3041,8 +3053,8 @@ SOperatorInfo* createTableCountScanOperatorInfo(SReadHandle* readHandle, STableC setOperatorInfo(pOperator, "TableCountScanOperator", QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN, false, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = - createOperatorFpSet(optrDummyOpenFn, doTableCountScan, NULL, destoryTableCountScanOperator, optrDefaultBufFn, NULL); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doTableCountScan, NULL, destoryTableCountScanOperator, + optrDefaultBufFn, NULL); return pOperator; _error: diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 8e746c6738..3d42f759cb 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -116,7 +116,11 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { ASSERT(0); } if (output == NULL) { - finished = true; + if (qStreamRecoverScanFinished(exec)) { + finished = true; + } else { + qSetStreamOpOpen(exec); + } break; } From e5c4196c98324ebfa2c6caa233fe85948b7165e4 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Tue, 27 Dec 2022 17:19:47 +0800 Subject: [PATCH 48/53] fix:calculate next sliding window error --- source/libs/executor/src/timewindowoperator.c | 19 ++- tests/script/tsim/stream/basic3.sim | 4 +- tests/script/tsim/stream/sliding.sim | 117 ++++++++++++++++++ 3 files changed, 136 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 2af551b832..813da3c436 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2347,6 +2347,17 @@ void doBuildResult(SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* buildDataBlockFromGroupRes(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); } +static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, + TSKEY* primaryKeys, int32_t prevPosition) { + int32_t startPos = prevPosition + 1; + if (startPos == pDataBlockInfo->rows) { + startPos = -1; + } else { + *pNext = getFinalTimeWindow(primaryKeys[startPos], pInterval); + } + return startPos; +} + static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId, SHashObj* pUpdatedMap) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info; @@ -2457,8 +2468,12 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p } int32_t prevEndPos = (forwardRows - 1) * step + startPos; ASSERT(pSDataBlock->info.window.skey > 0 && pSDataBlock->info.window.ekey > 0); - startPos = - getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); + if (IS_FINAL_OP(pInfo)) { + startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos); + } else { + startPos = + getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); + } if (startPos < 0) { break; } diff --git a/tests/script/tsim/stream/basic3.sim b/tests/script/tsim/stream/basic3.sim index 48fb860a72..41e19b19af 100644 --- a/tests/script/tsim/stream/basic3.sim +++ b/tests/script/tsim/stream/basic3.sim @@ -1,7 +1,7 @@ system sh/stop_dnodes.sh system sh/deploy.sh -n dnode1 -i 1 -system sh/cfg.sh -n dnode1 -c debugflag -v 131 -system sh/exec.sh -n dnode1 -s start -v +system sh/cfg.sh -n dnode1 -c debugflag 131 +system sh/exec.sh -n dnode1 -s start sleep 5000 diff --git a/tests/script/tsim/stream/sliding.sim b/tests/script/tsim/stream/sliding.sim index c9a1ddd922..8287274cd2 100644 --- a/tests/script/tsim/stream/sliding.sim +++ b/tests/script/tsim/stream/sliding.sim @@ -672,6 +672,123 @@ if $data61 != 1 then goto loop5 endi +print step 8 + +sql drop stream IF EXISTS streams4; +sql drop database IF EXISTS test4; + +sql create database test4 vgroups 6; +sql use test4; +sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); + +sql create stream streams4 trigger at_once into streamt4 as select _wstart as ts, count(*),min(a) c1 from st interval(10s) sliding(5s); + +sql insert into t1 values(1648791213000,1,1,1,1.0); +sql insert into t1 values(1648791243000,2,1,1,1.0); + +sql insert into t2 values(1648791273000,3,1,1,1.0); +sql insert into t2 values(1648791313000,4,1,1,1.0); + +$loop_count = 0 + +loop6: +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +sql select * from streamt4 order by 1; + +# row 0 +if $rows != 8 then + print ====loop6=rows=$rows + goto loop6 +endi + +if $data01 != 1 then + print ====loop6=data01=$data01 + goto loop6 +endi + +if $data02 != 1 then + print ====loop6=data02=$data02 + return -1 +endi + +if $data11 != 1 then + print ====loop6=data11=$data11 + goto loop6 +endi + +if $data12 != 1 then + print ====loop6=data12=$data12 + return -1 +endi + +if $data21 != 1 then + print ====loop6=data21=$data21 + goto loop6 +endi + +if $data22 != 2 then + print ====loop6=data22=$data22 + return -1 +endi + +if $data31 != 1 then + print ====loop6=data31=$data31 + goto loop6 +endi + +if $data32 != 2 then + print ====loop6=data32=$data32 + return -1 +endi + +if $data41 != 1 then + print ====loop6=data41=$data41 + goto loop6 +endi + +if $data42 != 3 then + print ====loop6=data42=$data42 + return -1 +endi + +if $data51 != 1 then + print ====loop6=data51=$data51 + goto loop6 +endi + +if $data52 != 3 then + print ====loop6=data52=$data52 + return -1 +endi + +if $data61 != 1 then + print ====loop6=data61=$data61 + return -1 +endi + +if $data62 != 4 then + print ====loop6=data62=$data62 + return -1 +endi + +if $data71 != 1 then + print ====loop6=data71=$data71 + return -1 +endi + +if $data72 != 4 then + print ====loop6=data72=$data72 + return -1 +endi + $loop_all = $loop_all + 1 print ============loop_all=$loop_all From f7b17a43a77dd63375545b1e53e663dbd908141f Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Tue, 27 Dec 2022 17:23:10 +0800 Subject: [PATCH 49/53] fix: taosbenchmark no vgroup if host specified for main (#19180) --- cmake/taostools_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index 0cc57d1246..e23ebb104b 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 261fcca + GIT_TAG 11b60a4 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From 71a28483762ed733c6d15be7c47bcbb919fa9625 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 17:51:52 +0800 Subject: [PATCH 50/53] fix: double free on append log failed --- source/libs/sync/src/syncPipeline.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index ef37600e98..ee649c268c 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -45,7 +45,7 @@ int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt if (index - pBuf->startIndex >= pBuf->size) { sError("vgId:%d, failed to append due to sync log buffer full. index:%" PRId64 "", pNode->vgId, index); - goto _out; + goto _err; } ASSERT(index == pBuf->endIndex); @@ -66,9 +66,8 @@ int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt taosThreadMutexUnlock(&pBuf->mutex); return 0; -_out: +_err: syncLogBufferValidate(pBuf); - syncEntryDestroy(pEntry); taosThreadMutexUnlock(&pBuf->mutex); return -1; } From d949008543973100510e6066305091521de1300d Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Tue, 27 Dec 2022 18:53:06 +0800 Subject: [PATCH 51/53] test: add test case for tmq replica 3 --- tests/parallel_test/cases.task | 4 ++-- tests/system-test/7-tmq/subscribeDb.py | 12 ++++++++---- tests/system-test/7-tmq/subscribeDb0.py | 7 +++++-- tests/system-test/7-tmq/tmqCommon.py | 2 +- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 57fff095e8..f5f04ff1e9 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -677,8 +677,8 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/create_wrong_topic.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/dropDbR3ConflictTransaction.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/basic5.py -,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb.py -,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb0.py +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb.py -N 3 -n 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb0.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb1.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb2.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb3.py diff --git a/tests/system-test/7-tmq/subscribeDb.py b/tests/system-test/7-tmq/subscribeDb.py index fd06eedefd..0fa9bcfbd4 100644 --- a/tests/system-test/7-tmq/subscribeDb.py +++ b/tests/system-test/7-tmq/subscribeDb.py @@ -61,7 +61,7 @@ class TDTestCase: def insertConsumerInfo(self,consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifmanualcommit,cdbName='cdb'): sql = "insert into %s.consumeinfo values "%cdbName - sql += "(now, %d, '%s', '%s', %d, %d, %d)"%(consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) + sql += "(now + %ds, %d, '%s', '%s', %d, %d, %d)"%(consumerId, consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) tdLog.info("consume info sql: %s"%sql) tdSql.query(sql) @@ -174,12 +174,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() @@ -271,12 +272,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() @@ -337,6 +339,7 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath @@ -406,12 +409,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() diff --git a/tests/system-test/7-tmq/subscribeDb0.py b/tests/system-test/7-tmq/subscribeDb0.py index d4c5e2f87f..50ef52cb15 100644 --- a/tests/system-test/7-tmq/subscribeDb0.py +++ b/tests/system-test/7-tmq/subscribeDb0.py @@ -174,12 +174,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() @@ -191,6 +192,7 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath @@ -254,12 +256,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() diff --git a/tests/system-test/7-tmq/tmqCommon.py b/tests/system-test/7-tmq/tmqCommon.py index 141d013270..4cda062401 100644 --- a/tests/system-test/7-tmq/tmqCommon.py +++ b/tests/system-test/7-tmq/tmqCommon.py @@ -60,7 +60,7 @@ class TMQCom: def insertConsumerInfo(self,consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifmanualcommit,cdbName='cdb'): sql = "insert into %s.consumeinfo values "%cdbName - sql += "(now, %d, '%s', '%s', %d, %d, %d)"%(consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) + sql += "(now + %ds, %d, '%s', '%s', %d, %d, %d)"%(consumerId, consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) tdLog.info("consume info sql: %s"%sql) tdSql.query(sql) From 09681e37ca2ba7851585bb701712478b5659a937 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 28 Dec 2022 10:20:18 +0800 Subject: [PATCH 52/53] change systerm code and avoid mem leak --- source/dnode/mgmt/exe/dmMain.c | 5 +++++ source/libs/transport/src/transSvr.c | 15 ++++++++------- source/util/src/terror.c | 9 +++------ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index a8103351b4..a432201413 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -268,6 +268,11 @@ int mainWindows(int argc, char **argv) { if (dmInit() != 0) { dError("failed to init dnode since %s", terrstr()); + + taosCleanupCfg(); + taosCloseLog(); + taosCleanupArgs(); + taosConvDestroy(); return -1; } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 2b1f68d5f6..7384877313 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -1001,6 +1001,13 @@ void* transInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, uv_loop_init(srv->loop); char pipeName[PATH_MAX]; + + if (false == taosValidIpAndPort(srv->ip, srv->port)) { + terrno = TAOS_SYSTEM_ERROR(errno); + tError("invalid ip/port, %d:%d, reason:%s", srv->ip, srv->port, terrstr()); + goto End; + } + #if defined(WINDOWS) || defined(DARWIN) int ret = uv_pipe_init(srv->loop, &srv->pipeListen, 0); if (ret != 0) { @@ -1087,12 +1094,6 @@ void* transInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, } #endif - if (false == taosValidIpAndPort(srv->ip, srv->port)) { - terrno = TAOS_SYSTEM_ERROR(errno); - tError("invalid ip/port, %d:%d, reason:%s", srv->ip, srv->port, terrstr()); - goto End; - } - if (false == addHandleToAcceptloop(srv)) { goto End; } @@ -1185,8 +1186,8 @@ void transCloseServer(void* arg) { // impl later SServerObj* srv = arg; - tDebug("send quit msg to accept thread"); if (srv->inited) { + tDebug("send quit msg to accept thread"); uv_async_send(srv->pAcceptAsync); taosThreadJoin(srv->thread, NULL); SRV_RELEASE_UV(srv->loop); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index ff61c7cdc4..6f8b0d8e04 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -643,13 +643,10 @@ const char* tstrerror(int32_t err) { // this is a system errno if ((err & 0x00ff0000) == 0x00ff0000) { int32_t code = err & 0x0000ffff; - if (code >= 0 && code < 36) { - return strerror(code); - } else { - return "unknown err"; - } + // strerror can handle any invalid code + // invalid code return Unknown error + return strerror(code); } - int32_t s = 0; int32_t e = sizeof(errors) / sizeof(errors[0]); From eddaaf24e47b1606796cad4bea3802d0017d684b Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Wed, 28 Dec 2022 10:22:13 +0800 Subject: [PATCH 53/53] fix: escape error in create topic --- source/libs/parser/inc/parAst.h | 2 +- source/libs/parser/src/parAstCreater.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/parser/inc/parAst.h b/source/libs/parser/inc/parAst.h index ef67c7536f..20a2520a79 100644 --- a/source/libs/parser/inc/parAst.h +++ b/source/libs/parser/inc/parAst.h @@ -196,7 +196,7 @@ SNode* createDropComponentNodeStmt(SAstCreateContext* pCxt, ENodeType type, cons SNode* createCreateTopicStmtUseQuery(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, SNode* pQuery); SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, - const SToken* pSubDbName, bool withMeta); + SToken* pSubDbName, bool withMeta); SNode* createCreateTopicStmtUseTable(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, SNode* pRealTable, bool withMeta); SNode* createDropTopicStmt(SAstCreateContext* pCxt, bool ignoreNotExists, const SToken* pTopicName); diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index 4077c27840..17d8297cf0 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -1579,8 +1579,11 @@ SNode* createCreateTopicStmtUseQuery(SAstCreateContext* pCxt, bool ignoreExists, } SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, - const SToken* pSubDbName, bool withMeta) { + SToken* pSubDbName, bool withMeta) { CHECK_PARSER_STATUS(pCxt); + if (!checkDbName(pCxt, pSubDbName, true)) { + return NULL; + } SCreateTopicStmt* pStmt = (SCreateTopicStmt*)nodesMakeNode(QUERY_NODE_CREATE_TOPIC_STMT); CHECK_OUT_OF_MEM(pStmt); COPY_STRING_FORM_ID_TOKEN(pStmt->topicName, pTopicName);