From 02dbf353547896a8120bd8f5d685b370fd97f618 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 16:57:16 +0800 Subject: [PATCH 01/82] fix(query): return TSDB_CODE_RPC_VGROUP_NOT_CONNECTED if all nodes in vgroups is offline --- include/util/taoserror.h | 1 + source/libs/transport/src/transCli.c | 9 +++++++++ source/util/src/terror.c | 1 + 3 files changed, 11 insertions(+) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 52221bdd44..f1f35a67a4 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -65,6 +65,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0017) // #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) // #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // +#define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d144a76eb0..04783605f1 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1670,6 +1670,15 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } } + // check whole vnodes is offline on this vgroup + if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { + if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { + if (pMsg->msg.msgType == TDMT_VND_SUBMIT || pMsg->msg.msgType == TDMT_SCH_QUERY) { + pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + } + } + } + STraceId* trace = &pResp->info.traceId; bool hasEpSet = cliTryExtractEpSet(pResp, &pCtx->epSet); if (hasEpSet) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 4b9dde5059..1871697ddc 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -51,6 +51,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQD TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_BROKEN_LINK, "Conn is broken") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_TIMEOUT, "Conn read timeout") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_CONNECTED, "Vgroup could not be connected") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Client and server's time is not synchronized") From 7d9e5924fa1203ca570fcfa7e7c77fa791c6133c Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 18:06:04 +0800 Subject: [PATCH 02/82] fix(query): remove check msgType --- source/libs/transport/src/transCli.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 04783605f1..62355b710b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1673,9 +1673,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { // check whole vnodes is offline on this vgroup if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { - if (pMsg->msg.msgType == TDMT_VND_SUBMIT || pMsg->msg.msgType == TDMT_SCH_QUERY) { pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; - } } } From df6102f66e588c5d60748aedcb037a58fdfaa078 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 18:14:24 +0800 Subject: [PATCH 03/82] fix(query): TSDB_CODE_RPC_VGROUP_NOT_CONNECTED same condition check with TSDB_CODE_RPC_NETWORK_UNAVAIL --- include/libs/qcom/query.h | 2 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 1 + source/dnode/mnode/impl/src/mndTrans.c | 2 +- source/libs/function/src/udfd.c | 1 + source/libs/scheduler/inc/schInt.h | 2 +- source/libs/transport/src/transCli.c | 2 +- 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 3f11d2a218..efc86d302d 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -268,7 +268,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_RESTORING || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_MNODE_NOT_FOUND) -#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) +#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define NEED_REDIRECT_ERROR(_code) \ (NO_RET_REDIRECT_ERROR(_code) || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 5e1dcc6353..fe676c769c 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -233,6 +233,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { rpcRe static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_MNODE_NOT_FOUND || + code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_VND_STOPPED || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index b92be19741..35a094cb65 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -957,7 +957,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { for (int32_t i = 0; i < size; ++i) { SRpcHandleInfo *pInfo = taosArrayGet(pTrans->pRpcArray, i); if (pInfo->handle != NULL) { - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { code = TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL; } if (i != 0 && code == 0) { diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index 40c75ce6ba..d6753a55fc 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -606,6 +606,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { } static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_SYN_NOT_LEADER || + code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_MNODE_NOT_FOUND || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 48df7e36a3..2ab37c1715 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -375,7 +375,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_WAIT(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) -#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) \ (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)) || (_task)->redirectCtx.inRedirect)) #define SCH_REDIRECT_MSGTYPE(_msgType) \ diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 62355b710b..e8b8c95eb1 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1665,7 +1665,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCode != TSDB_CODE_SUCCESS) { int32_t code = pResp->code; // return internal code app - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { pResp->code = pCtx->retryCode; } } From c66b7df5d717d26acace1e2d6d915348922d0506 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 19:13:00 +0800 Subject: [PATCH 04/82] feat(rpc): fixed build error --- include/libs/qcom/query.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index efc86d302d..98cc95facf 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -268,7 +268,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_RESTORING || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_MNODE_NOT_FOUND) -#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define NEED_REDIRECT_ERROR(_code) \ (NO_RET_REDIRECT_ERROR(_code) || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ From 8774d2c17b8e6b81dda6c18b0977b358964ec479 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 19:14:57 +0800 Subject: [PATCH 05/82] feat(rpc): fixed build error --- source/libs/scheduler/inc/schInt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 2ab37c1715..7730a722ab 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -375,7 +375,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_WAIT(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) -#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) #define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) \ (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)) || (_task)->redirectCtx.inRedirect)) #define SCH_REDIRECT_MSGTYPE(_msgType) \ From 37ea86f8ed0238dd32e0c3394a60775d1e93d392 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 20 Dec 2022 21:13:33 +0800 Subject: [PATCH 06/82] fix(rpc): add msgType condition --- source/libs/transport/src/transCli.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index e8b8c95eb1..bf43b0c0d0 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1673,7 +1673,14 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { // check whole vnodes is offline on this vgroup if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { - pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + switch (pMsg->msg.msgType) { + case TDMT_VND_BATCH_META: + case TDMT_VND_SUBMIT: + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: + pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + break; + } } } From 15160544c5fec4521cfc7f1f72b083a524eca235 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 21 Dec 2022 11:48:24 +0800 Subject: [PATCH 07/82] feat(rpc): move the check msgType to client --- include/util/taoserror.h | 1 + source/client/src/clientImpl.c | 20 ++++++++++++++++++++ source/libs/transport/src/transCli.c | 15 +++++---------- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index f1f35a67a4..8d79946633 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -66,6 +66,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) // #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // #define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" +#define TSDB_CODE_RPC_VGROUP_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index d792896b2d..1e22498b50 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1424,6 +1424,26 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { memcpy((void*)tEpSet, (void*)pEpSet, sizeof(SEpSet)); } + switch (pMsg->msg.msgType) { + case TDMT_VND_BATCH_META: + case TDMT_VND_SUBMIT: + case TDMT_SCH_QUERY: + case TDMT_SCH_MERGE_QUERY: + // uniform to one error code: TSDB_CODE_RPC_VGROUP_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + } + break; + default: + // restore origin code + if (pMsg->code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + } else if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; + } + break; + } + AsyncArg* arg = taosMemoryCalloc(1, sizeof(AsyncArg)); arg->msg = *pMsg; arg->pEpset = tEpSet; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index bf43b0c0d0..21e66870e0 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1671,16 +1671,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } // check whole vnodes is offline on this vgroup - if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { - if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { - switch (pMsg->msg.msgType) { - case TDMT_VND_BATCH_META: - case TDMT_VND_SUBMIT: - case TDMT_SCH_QUERY: - case TDMT_SCH_MERGE_QUERY: - pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; - break; - } + if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { + if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + } else if (pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { + pResp->code = TSDB_CODE_RPC_VGROUP_BROKEN_LINK; } } From 5ca52595aedba3a85f9d94d93803750a941857f4 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 21 Dec 2022 12:20:26 +0800 Subject: [PATCH 08/82] feat(rpc): move the check msgType to client build error --- source/client/src/clientImpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 1e22498b50..0bf91c6db8 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1424,7 +1424,7 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { memcpy((void*)tEpSet, (void*)pEpSet, sizeof(SEpSet)); } - switch (pMsg->msg.msgType) { + switch (pMsg->msgType) { case TDMT_VND_BATCH_META: case TDMT_VND_SUBMIT: case TDMT_SCH_QUERY: From ced76efde0ec2b8d40eca54a23cf6b7e63df99b7 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 21 Dec 2022 12:44:29 +0800 Subject: [PATCH 09/82] feat(rpc): move the check msgType to cliet --- source/client/src/clientImpl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 0bf91c6db8..2e76d50be5 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1424,11 +1424,12 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { memcpy((void*)tEpSet, (void*)pEpSet, sizeof(SEpSet)); } + // pMsg is response msg switch (pMsg->msgType) { - case TDMT_VND_BATCH_META: - case TDMT_VND_SUBMIT: - case TDMT_SCH_QUERY: - case TDMT_SCH_MERGE_QUERY: + case TDMT_VND_BATCH_META + 1: + case TDMT_VND_SUBMIT + 1: + case TDMT_SCH_QUERY + 1: + case TDMT_SCH_MERGE_QUERY + 1: // uniform to one error code: TSDB_CODE_RPC_VGROUP_NOT_CONNECTED if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { pMsg->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; From cd1b87031a35c78bd450ca97accd9f8cc3fb3ead Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Thu, 22 Dec 2022 10:22:51 +0800 Subject: [PATCH 10/82] fix(query): modify error describe --- include/libs/qcom/query.h | 2 +- include/util/taoserror.h | 4 ++-- source/client/src/clientImpl.c | 10 +++++----- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/dnode/mnode/impl/src/mndTrans.c | 2 +- source/libs/function/src/udfd.c | 2 +- source/libs/scheduler/inc/schInt.h | 2 +- source/libs/transport/src/transCli.c | 6 +++--- source/util/src/terror.c | 2 +- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 98cc95facf..5b640dce92 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -268,7 +268,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_RESTORING || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_MNODE_NOT_FOUND) -#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define NO_RET_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) #define NEED_REDIRECT_ERROR(_code) \ (NO_RET_REDIRECT_ERROR(_code) || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 8d79946633..6dcf244823 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -65,8 +65,8 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0017) // #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) // #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // -#define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" -#define TSDB_CODE_RPC_VGROUP_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // +#define TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" +#define TSDB_CODE_RPC_SOMENODE_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 2e76d50be5..b480b0b230 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1430,16 +1430,16 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { case TDMT_VND_SUBMIT + 1: case TDMT_SCH_QUERY + 1: case TDMT_SCH_MERGE_QUERY + 1: - // uniform to one error code: TSDB_CODE_RPC_VGROUP_NOT_CONNECTED - if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; } break; default: // restore origin code - if (pMsg->code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - } else if (pMsg->code == TSDB_CODE_RPC_VGROUP_BROKEN_LINK) { + } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; } break; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index fe676c769c..2dd98f8cd1 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -233,7 +233,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { rpcRe static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_MNODE_NOT_FOUND || - code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || + code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_VND_STOPPED || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 35a094cb65..c8d22d13ea 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -957,7 +957,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { for (int32_t i = 0; i < size; ++i) { SRpcHandleInfo *pInfo = taosArrayGet(pTrans->pRpcArray, i); if (pInfo->handle != NULL) { - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { code = TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL; } if (i != 0 && code == 0) { diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index d6753a55fc..6c88e4d5c8 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -606,7 +606,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { } static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_SYN_NOT_LEADER || - code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || + code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_MNODE_NOT_FOUND || code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 7730a722ab..e8216fcd7c 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -375,7 +375,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_JOB_NEED_WAIT(_job) (!SCH_IS_QUERY_JOB(_job)) #define SCH_JOB_NEED_DROP(_job) (SCH_IS_QUERY_JOB(_job)) #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) -#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) +#define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) #define SCH_MERGE_TASK_NETWORK_ERR(_task, _code, _len) \ (SCH_NETWORK_ERR(_code) && (((_len) > 0) || (!SCH_IS_DATA_BIND_TASK(_task)) || (_task)->redirectCtx.inRedirect)) #define SCH_REDIRECT_MSGTYPE(_msgType) \ diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 21e66870e0..1dc79e0cfb 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1665,7 +1665,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCode != TSDB_CODE_SUCCESS) { int32_t code = pResp->code; // return internal code app - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { pResp->code = pCtx->retryCode; } } @@ -1673,9 +1673,9 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { // check whole vnodes is offline on this vgroup if (pCtx->epsetRetryCnt >= pCtx->epSet.numOfEps || pCtx->retryStep > 0) { if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - pResp->code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; + pResp->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; } else if (pResp->code == TSDB_CODE_RPC_BROKEN_LINK) { - pResp->code = TSDB_CODE_RPC_VGROUP_BROKEN_LINK; + pResp->code = TSDB_CODE_RPC_SOMENODE_BROKEN_LINK; } } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 1871697ddc..ec469e3c62 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -51,7 +51,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQD TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_BROKEN_LINK, "Conn is broken") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_TIMEOUT, "Conn read timeout") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_CONNECTED, "Vgroup could not be connected") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED, "some vnode/qnode/mnode(s) out of service") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Client and server's time is not synchronized") From 48b6bd438d5d10529a97f73b0656ad55f567a275 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 22 Dec 2022 17:30:02 +0800 Subject: [PATCH 11/82] fix: restart snapshot sender on receiver is restart --- include/util/taoserror.h | 1 + source/dnode/vnode/src/vnd/vnodeCommit.c | 7 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 2 +- source/dnode/vnode/src/vnd/vnodeSync.c | 4 +- source/libs/sync/src/syncSnapshot.c | 137 ++++++++++++--------- source/libs/sync/test/syncRaftLogTest2.cpp | 2 +- source/libs/sync/test/syncRaftLogTest3.cpp | 2 +- source/util/src/terror.c | 1 + 8 files changed, 90 insertions(+), 66 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 7cc1a47404..2d7b15ebda 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -517,6 +517,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SYN_STANDBY_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0912) #define TSDB_CODE_SYN_BATCH_ERROR TAOS_DEF_ERROR_CODE(0, 0x0913) #define TSDB_CODE_SYN_RESTORING TAOS_DEF_ERROR_CODE(0, 0x0914) +#define TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG TAOS_DEF_ERROR_CODE(0, 0x0915) // internal #define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF) // tq diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index be977e7cbd..f78956a431 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -234,10 +234,11 @@ int vnodeAsyncCommit(SVnode *pVnode) { _exit: if (code) { - vError("vgId:%d, %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), + vError("vgId:%d, vnode async commit failed since %s, commitId:%" PRId64, TD_VID(pVnode), tstrerror(code), pVnode->state.commitID); } else { - vDebug("vgId:%d, %s done", TD_VID(pVnode), __func__); + vInfo("vgId:%d, vnode async commit done, commitId:%" PRId64 " term:%" PRId64 " applied:%" PRId64, TD_VID(pVnode), + pVnode->state.commitID, pVnode->state.applyTerm, pVnode->state.applied); } return code; } @@ -256,7 +257,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { char dir[TSDB_FILENAME_LEN] = {0}; SVnode *pVnode = pInfo->pVnode; - vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), + vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); // persist wal before starting diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index fcfacd1ca9..dbd06d6ec0 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -423,7 +423,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { ASSERT(pHdr->index == pWriter->index + 1); pWriter->index = pHdr->index; - vInfo("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d nData:%d", TD_VID(pVnode), pHdr->index, + vInfo("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d blockLen:%d", TD_VID(pVnode), pHdr->index, pHdr->type, nData); switch (pHdr->type) { diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 5caaae502f..0437703c92 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -465,9 +465,9 @@ static int32_t vnodeSnapshotStopWrite(const SSyncFSM *pFsm, void *pWriter, bool static int32_t vnodeSnapshotDoWrite(const SSyncFSM *pFsm, void *pWriter, void *pBuf, int32_t len) { SVnode *pVnode = pFsm->data; - vDebug("vgId:%d, continue write vnode snapshot, len:%d", pVnode->config.vgId, len); + vDebug("vgId:%d, continue write vnode snapshot, blockLen:%d", pVnode->config.vgId, len); int32_t code = vnodeSnapWrite(pWriter, pBuf, len); - vDebug("vgId:%d, continue write vnode snapshot finished, len:%d", pVnode->config.vgId, len); + vDebug("vgId:%d, continue write vnode snapshot finished, blockLen:%d", pVnode->config.vgId, len); return code; } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 30324c1113..7a35f90165 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -294,7 +294,7 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { } if (snapshotSenderIsStart(pSender)) { - sSError(pSender, "snapshot sender already start, ignore"); + sSInfo(pSender, "snapshot sender already start, ignore"); return 0; } @@ -523,7 +523,7 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap static int32_t snapshotReceiverGotData(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { if (pMsg->seq != pReceiver->ack + 1) { sRError(pReceiver, "snapshot receiver invalid seq, ack:%d seq:%d", pReceiver->ack, pMsg->seq); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG; return -1; } @@ -721,8 +721,12 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS timeNow = taosGetTimestampMs(); } + int32_t code = 0; if (snapshotReceiverGotData(pReceiver, pMsg) != 0) { - return -1; + code = terrno; + if (code >= SYNC_SNAPSHOT_SEQ_INVALID) { + code = TSDB_CODE_SYN_INTERNAL_ERROR; + } } // build msg @@ -740,7 +744,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg @@ -861,7 +865,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process force stop"); snapshotReceiverForceStop(pReceiver); } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { - syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq"); + syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq data"); syncNodeOnSnapshotTransfering(pSyncNode, pMsg); } else { // error log @@ -982,68 +986,85 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { } // state, term, seq/ack - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { - // prepare , send begin msg - if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot"); - syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); - return 0; - } + if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { + sSError(pSender, "snapshot sender not leader"); + return -1; + } - if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq begin"); - if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { - return -1; - } + if (pMsg->term != pSyncNode->pRaftStore->currentTerm) { + sSError(pSender, "snapshot sender term not equal"); + return -1; + } - if (snapshotSend(pSender) != 0) { - return -1; - } - return 0; - } + if (pMsg->code != 0) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code"); + sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code); + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogReplMgrReset(pMgr); + } - // receive ack is finish, close sender - if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end"); - snapshotSenderStop(pSender, true); - SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); - syncLogReplMgrReset(pMgr); - } - return 0; - } + return -1; + } - // send next msg - if (pMsg->ack == pSender->seq) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq"); - // update sender ack - if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { - return -1; - } - if (snapshotSend(pSender) != 0) { - return -1; - } + // prepare , send begin msg + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot"); + syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); + return 0; + } - } else if (pMsg->ack == pSender->seq - 1) { - // maybe resend - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend"); - snapshotReSend(pSender); - - } else { - // error log - sSError(pSender, "snapshot sender recv error ack:%d, my seq:%d", pMsg->ack, pSender->seq); - return -1; - } - } else { - // error log - sSError(pSender, "snapshot sender term not equal"); + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq begin"); + if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { return -1; } + + if (snapshotSend(pSender) != 0) { + return -1; + } + return 0; + } + + // receive ack is finish, close sender + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end"); + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); + syncLogReplMgrReset(pMgr); + } + return 0; + } + + // send next msg + if (pMsg->ack == pSender->seq) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq data"); + // update sender ack + if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { + return -1; + } + if (snapshotSend(pSender) != 0) { + return -1; + } + + } else if (pMsg->ack == pSender->seq - 1) { + // maybe resend + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend"); + snapshotReSend(pSender); + } else { // error log - sSError(pSender, "snapshot sender not leader"); + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error ack"); + sSError(pSender, "snapshot sender receive error ack:%d, my seq:%d", pMsg->ack, pSender->seq); + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogReplMgrReset(pMgr); + } + return -1; } diff --git a/source/libs/sync/test/syncRaftLogTest2.cpp b/source/libs/sync/test/syncRaftLogTest2.cpp index a7752dcb8b..de9137fbe1 100644 --- a/source/libs/sync/test/syncRaftLogTest2.cpp +++ b/source/libs/sync/test/syncRaftLogTest2.cpp @@ -47,7 +47,7 @@ void init() { pSyncNode->pWal = pWal; pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); - pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; + // pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; } void cleanup() { diff --git a/source/libs/sync/test/syncRaftLogTest3.cpp b/source/libs/sync/test/syncRaftLogTest3.cpp index 31c06625aa..1eed08102f 100644 --- a/source/libs/sync/test/syncRaftLogTest3.cpp +++ b/source/libs/sync/test/syncRaftLogTest3.cpp @@ -47,7 +47,7 @@ void init() { pSyncNode->pWal = pWal; pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); - pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; + // pSyncNode->pFsm->FpGetSnapshotInfo = GetSnapshotCb; } void cleanup() { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index d73c8661fc..e59b1daa05 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -405,6 +405,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_PROPOSE_NOT_READY, "Sync not ready for pr TAOS_DEFINE_ERROR(TSDB_CODE_SYN_STANDBY_NOT_READY, "Sync not ready for standby") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BATCH_ERROR, "Sync batch error") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RESTORING, "Sync is restoring") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG, "Sync invalid snapshot msg") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INTERNAL_ERROR, "Sync internal error") //tq From 336ee146e618d4f81c42c3b4eba128ca66113d0d Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 17:56:56 +0800 Subject: [PATCH 12/82] fix(tdb/restore): seek jfd to begin --- source/libs/tdb/src/db/tdbPager.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 8d9933b160..2a2a6f8bbd 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -869,6 +869,12 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + if (tdbOsLSeek(jfd, 0L, SEEK_SET) < 0) { + tdbError("failed to lseek jfd due to %s. file:%s, offset:0", strerror(errno), pPager->dbFileName); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + pageBuf = tdbOsCalloc(1, pPager->pageSize); if (pageBuf == NULL) { return -1; From 99bc54dfd9dc66ffd952a74da6c68883ca9c1221 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 13/82] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..e0c3397d63 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); From 78af4f54a3108246b397d5859a6f3f4795b0e440 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 14/82] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..eb7bcf9385 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); From 7a38465c760eedb9f0893c8579c33cab3676879a Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 15/82] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..ce20f6808f 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -929,7 +933,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { + if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); From 773423a6b64e8f5951affa2f7fe7b156ad7cbf1f Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 09:11:45 +0800 Subject: [PATCH 16/82] fix: remove some logs --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++-- source/libs/sync/src/syncSnapshot.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index dbd06d6ec0..0362d4af2a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -423,8 +423,8 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { ASSERT(pHdr->index == pWriter->index + 1); pWriter->index = pHdr->index; - vInfo("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d blockLen:%d", TD_VID(pVnode), pHdr->index, - pHdr->type, nData); + vDebug("vgId:%d, vnode snapshot write data, index:%" PRId64 " type:%d blockLen:%d", TD_VID(pVnode), pHdr->index, + pHdr->type, nData); switch (pHdr->type) { case SNAP_DATA_CFG: { diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 7a35f90165..9e1c6b36f1 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -49,7 +49,6 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &pSender->snapshot); pSender->finish = false; - sDebug("vgId:%d, snapshot sender create", pSender->pSyncNode->vgId); return pSender; } From 0c64cfc309e41de75bb10ef2ae7c126413ad949b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 17/82] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..62d82edeb1 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -929,7 +933,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { + if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); @@ -975,7 +984,12 @@ int tdbPagerRollback(SPager *pPager) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbOsRemove(name) < 0 && errno != ENOENT) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbOsRemove(jname) < 0 && errno != ENOENT) { tdbCloseDir(&pDir); tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name); From b59bee6696b159ad6bb8fed620de3f70057174f1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 11:03:50 +0800 Subject: [PATCH 18/82] fix: restart snapshot sender on receiver is restart --- source/libs/sync/src/syncPipeline.c | 2 +- source/libs/sync/src/syncRaftLog.c | 24 ++++++++--------- source/libs/sync/src/syncSnapshot.c | 41 ++++++++++++++++++----------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index f438856ace..225de30755 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -112,7 +112,7 @@ SyncTerm syncLogReplMgrGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, S return prevLogTerm; } - sError("vgId:%d, failed to get log term since %s. index: %" PRId64 "", pNode->vgId, terrstr(), prevIndex); + sInfo("vgId:%d, failed to get log term since %s. index:%" PRId64, pNode->vgId, terrstr(), prevIndex); terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; return -1; } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 018ac5bb7d..3f9f397ef5 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -115,8 +115,8 @@ static int32_t raftLogRestoreFromSnapshot(struct SSyncLogStore* pLogStore, SyncI const char* sysErrStr = strerror(errno); sNError(pData->pSyncNode, - "wal restore from snapshot error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - snapshotIndex, err, err, errStr, sysErr, sysErrStr); + "wal restore from snapshot error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", snapshotIndex, + err, errStr, sysErr, sysErrStr); return -1; } @@ -212,8 +212,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sNError(pData->pSyncNode, "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pEntry->index, err, err, errStr, sysErr, sysErrStr); + sNError(pData->pSyncNode, "wal write error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", + pEntry->index, err, errStr, sysErr, sysErrStr); return -1; } @@ -257,11 +257,11 @@ int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncR const char* sysErrStr = strerror(errno); if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - sNTrace(pData->pSyncNode, "wal read not exist, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, - err, err, errStr, sysErr, sysErrStr); + sNTrace(pData->pSyncNode, "wal read not exist, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", index, + err, errStr, sysErr, sysErrStr); } else { - sNTrace(pData->pSyncNode, "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, - err, err, errStr, sysErr, sysErrStr); + sNTrace(pData->pSyncNode, "wal read error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", index, err, + errStr, sysErr, sysErrStr); } /* @@ -341,8 +341,8 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pData->pSyncNode->vgId, fromIndex, err, err, errStr, sysErr, sysErrStr); + sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", + pData->pSyncNode->vgId, fromIndex, err, errStr, sysErr, sysErrStr); } // event log @@ -392,8 +392,8 @@ int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { const char* errStr = tstrerror(err); int32_t sysErr = errno; const char* sysErrStr = strerror(errno); - sError("vgId:%d, wal update commit index error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pData->pSyncNode->vgId, index, err, err, errStr, sysErr, sysErrStr); + sError("vgId:%d, wal update commit index error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", + pData->pSyncNode->vgId, index, err, errStr, sysErr, sysErrStr); return -1; } return 0; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 9e1c6b36f1..54c11a503b 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -747,7 +747,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg - syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver receiving"); + syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver received"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { sRError(pReceiver, "snapshot receiver send resp failed since %s", terrstr()); return -1; @@ -979,32 +979,31 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { return -1; } - if (pMsg->startTime != pSender->startTime) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "sender:% " PRId64 " receiver:%" PRId64 " time not match"); - return -1; - } - // state, term, seq/ack if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader"); sSError(pSender, "snapshot sender not leader"); - return -1; + goto _ERROR; + } + + if (pMsg->startTime != pSender->startTime) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver time not match"); + sSError(pSender, "sender:%" PRId64 " receiver:%" PRId64 " time not match, code:0x%x", pMsg->startTime, + pSender->startTime, pMsg->code); + goto _ERROR; } if (pMsg->term != pSyncNode->pRaftStore->currentTerm) { - sSError(pSender, "snapshot sender term not equal"); - return -1; + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match"); + sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term, + pSyncNode->pRaftStore->currentTerm); + goto _ERROR; } if (pMsg->code != 0) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code"); sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code); - snapshotSenderStop(pSender, true); - SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogReplMgrReset(pMgr); - } - - return -1; + goto _ERROR; } // prepare , send begin msg @@ -1068,4 +1067,14 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { } return 0; + +_ERROR: + snapshotSenderStop(pSender, true); + SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); + if (pMgr) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); + syncLogReplMgrReset(pMgr); + } + + return -1; } From 90e44ced3eba30d57a6700a4b23da1e5f2841bf8 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 11:15:45 +0800 Subject: [PATCH 19/82] test: add logs --- tests/script/sh/deploy.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index 662c4a1a6c..217bd66ef6 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -134,6 +134,7 @@ echo "mDebugFlag 143" >> $TAOS_CFG echo "wDebugFlag 143" >> $TAOS_CFG echo "sDebugFlag 143" >> $TAOS_CFG echo "tsdbDebugFlag 143" >> $TAOS_CFG +echo "tdbDebugFlag 143" >> $TAOS_CFG echo "tqDebugFlag 143" >> $TAOS_CFG echo "fsDebugFlag 143" >> $TAOS_CFG echo "idxDebugFlag 143" >> $TAOS_CFG From 4f33119d461b0f9a8fbdc2fa1eda92555be062ab Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 23 Dec 2022 13:07:10 +0800 Subject: [PATCH 20/82] test: adjust test.sh --- tests/script/test.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/script/test.sh b/tests/script/test.sh index a7a5d34fbe..19180382fd 100755 --- a/tests/script/test.sh +++ b/tests/script/test.sh @@ -10,13 +10,11 @@ set +e #set -x FILE_NAME= -RELEASE=0 -ASYNC=0 VALGRIND=0 -UNIQUE=0 +TEST=0 UNAME_BIN=`which uname` OS_TYPE=`$UNAME_BIN` -while getopts "f:agvum" arg +while getopts "f:tgv" arg do case $arg in f) @@ -25,8 +23,8 @@ do v) VALGRIND=1 ;; - u) - UNIQUE=1 + t) + TEST=1 ;; g) VALGRIND=2 @@ -140,6 +138,11 @@ if [ -n "$FILE_NAME" ]; then result=$? echo "Execute result:" $result + if [ $TEST -eq 1 ]; then + echo "Exit without check asan errors" + exit 1 + fi + if [ $result -eq 0 ]; then $CODE_DIR/sh/sigint_stop_dnodes.sh $CODE_DIR/sh/checkAsan.sh From 99ef55c7d456756549235ca172d2fbaebbf15937 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Fri, 23 Dec 2022 16:02:08 +0800 Subject: [PATCH 21/82] query(fix): return new error code except connect msg --- source/client/src/clientImpl.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index b480b0b230..4b4a1c368f 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1425,24 +1425,18 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { } // pMsg is response msg - switch (pMsg->msgType) { - case TDMT_VND_BATCH_META + 1: - case TDMT_VND_SUBMIT + 1: - case TDMT_SCH_QUERY + 1: - case TDMT_SCH_MERGE_QUERY + 1: - // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED - if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; - } - break; - default: - // restore origin code - if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { - pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; - } - break; + if (pMsg->msgType != TDMT_MND_CONNECT + 1) { + // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; + } + } else { + // restore origin code + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { + pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; + } } AsyncArg* arg = taosMemoryCalloc(1, sizeof(AsyncArg)); From d0f789744b6521f0ce3bf82cf870f47fe6bf9aac Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Fri, 23 Dec 2022 16:04:48 +0800 Subject: [PATCH 22/82] query(fix): return new error code except connect msg --- source/client/src/clientImpl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 4b4a1c368f..456a059191 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1425,18 +1425,18 @@ void processMsgFromServer(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { } // pMsg is response msg - if (pMsg->msgType != TDMT_MND_CONNECT + 1) { - // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED - if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { - pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; - } - } else { + if (pMsg->msgType == TDMT_MND_CONNECT + 1) { // restore origin code if (pMsg->code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED) { pMsg->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; } else if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { pMsg->code = TSDB_CODE_RPC_BROKEN_LINK; } + } else { + // uniform to one error code: TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED + if (pMsg->code == TSDB_CODE_RPC_SOMENODE_BROKEN_LINK) { + pMsg->code = TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED; + } } AsyncArg* arg = taosMemoryCalloc(1, sizeof(AsyncArg)); From 4a97f25725843afa08aacdb3ade7c113ffd2a524 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Sat, 24 Dec 2022 11:16:45 +0800 Subject: [PATCH 23/82] fix(query): show sql len on systable length modify from 1024 to 2048 --- include/util/tdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index e1d421a399..d585912259 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -254,7 +254,7 @@ typedef enum ELogicConditionType { #define TSDB_EP_LEN (TSDB_FQDN_LEN + 6) #define TSDB_IPv4ADDR_LEN 16 #define TSDB_FILENAME_LEN 128 -#define TSDB_SHOW_SQL_LEN 1024 +#define TSDB_SHOW_SQL_LEN 2048 #define TSDB_SLOW_QUERY_SQL_LEN 512 #define TSDB_SHOW_SUBQUERY_LEN 1000 From 05997a3dcf581bfb067851e340fd26a1c58b92f5 Mon Sep 17 00:00:00 2001 From: xleili Date: Sat, 24 Dec 2022 11:16:12 -0500 Subject: [PATCH 24/82] docs:release 3.0.2.2 --- docs/en/28-releases/01-tdengine.md | 4 ++++ docs/en/28-releases/02-tools.md | 4 ++++ docs/zh/28-releases/01-tdengine.md | 4 ++++ docs/zh/28-releases/02-tools.md | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/docs/en/28-releases/01-tdengine.md b/docs/en/28-releases/01-tdengine.md index bcfcaf9ffb..9b83c5fd65 100644 --- a/docs/en/28-releases/01-tdengine.md +++ b/docs/en/28-releases/01-tdengine.md @@ -10,6 +10,10 @@ For TDengine 2.x installation packages by version, please visit [here](https://w import Release from "/components/ReleaseV3"; +## 3.0.2.2 + + + ## 3.0.2.1 diff --git a/docs/en/28-releases/02-tools.md b/docs/en/28-releases/02-tools.md index 6013aacc35..dd44e43ab6 100644 --- a/docs/en/28-releases/02-tools.md +++ b/docs/en/28-releases/02-tools.md @@ -10,6 +10,10 @@ For other historical version installers, please visit [here](https://www.taosdat import Release from "/components/ReleaseV3"; +## 2.4.0 + + + ## 2.3.3 diff --git a/docs/zh/28-releases/01-tdengine.md b/docs/zh/28-releases/01-tdengine.md index 0fe6555162..e19b2133e4 100644 --- a/docs/zh/28-releases/01-tdengine.md +++ b/docs/zh/28-releases/01-tdengine.md @@ -10,6 +10,10 @@ TDengine 2.x 各版本安装包请访问[这里](https://www.taosdata.com/all-do import Release from "/components/ReleaseV3"; +## 3.0.2.2 + + + ## 3.0.2.1 diff --git a/docs/zh/28-releases/02-tools.md b/docs/zh/28-releases/02-tools.md index 331f6832c0..b2ead5b264 100644 --- a/docs/zh/28-releases/02-tools.md +++ b/docs/zh/28-releases/02-tools.md @@ -10,6 +10,10 @@ taosTools 各版本安装包下载链接如下: import Release from "/components/ReleaseV3"; +## 2.4.0 + + + ## 2.3.3 From c4f40874c11c87db89281abd58b21f9af493c2f0 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 22 Dec 2022 20:29:42 +0800 Subject: [PATCH 25/82] tdb/pager: debug logs for pager restore --- source/libs/tdb/src/db/tdbPager.c | 20 +++++++++++++++++--- source/libs/tdb/src/db/tdbTable.c | 4 ++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 2a2a6f8bbd..62d82edeb1 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -880,6 +880,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); + for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) { // read pgno & the page from journal SPgno pgno; @@ -890,6 +892,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } + tdbTrace("pager/restore: restore pgno:%d,", pgno); + ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); @@ -929,7 +933,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { + if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; @@ -949,7 +953,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbPagerRestore(pPager, name) < 0) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbPagerRestore(pPager, jname) < 0) { tdbCloseDir(&pDir); tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); @@ -975,7 +984,12 @@ int tdbPagerRollback(SPager *pPager) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { - if (tdbOsRemove(name) < 0 && errno != ENOENT) { + char jname[TD_PATH_MAX] = {0}; + int dirLen = strlen(pPager->pEnv->dbName); + memcpy(jname, pPager->pEnv->dbName, dirLen); + jname[dirLen] = '/'; + memcpy(jname + dirLen + 1, name, strlen(name)); + if (tdbOsRemove(jname) < 0 && errno != ENOENT) { tdbCloseDir(&pDir); tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name); diff --git a/source/libs/tdb/src/db/tdbTable.c b/source/libs/tdb/src/db/tdbTable.c index 2950169979..972e2f29e5 100644 --- a/source/libs/tdb/src/db/tdbTable.c +++ b/source/libs/tdb/src/db/tdbTable.c @@ -108,13 +108,13 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF ASSERT(pPager != NULL); if (rollback) { - tdbPagerRollback(pPager); - } else { ret = tdbPagerRestoreJournals(pPager); if (ret < 0) { tdbOsFree(pTb); return -1; } + } else { + tdbPagerRollback(pPager); } // pTb->pBt From 3ea9fadfc8c146784dd116914e1aed876716a95b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 26 Dec 2022 10:10:49 +0800 Subject: [PATCH 26/82] fix(vnd): save vnode info at the begining of vnode prepare commit --- source/dnode/vnode/src/vnd/vnodeCommit.c | 67 +++++++++++++++++------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index be977e7cbd..1aefe19476 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -184,16 +184,51 @@ _err: return -1; } -static void vnodePrepareCommit(SVnode *pVnode) { +static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { + int32_t code = 0; + int32_t lino = 0; + char dir[TSDB_FILENAME_LEN] = {0}; + tsem_wait(&pVnode->canCommit); + pVnode->state.commitTerm = pVnode->state.applyTerm; + + pInfo->info.config = pVnode->config; + pInfo->info.state.committed = pVnode->state.applied; + pInfo->info.state.commitTerm = pVnode->state.applyTerm; + pInfo->info.state.commitID = pVnode->state.commitID; + pInfo->pVnode = pVnode; + pInfo->txn = metaGetTxn(pVnode->pMeta); + + // save info + if (pVnode->pTfs) { + snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); + } else { + snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path); + } + if (vnodeSaveInfo(dir, &pInfo->info) < 0) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbPrepareCommit(pVnode->pTsdb); - metaPrepareAsyncCommit(pVnode->pMeta); smaPrepareAsyncCommit(pVnode->pSma); + metaPrepareAsyncCommit(pVnode->pMeta); + vnodeBufPoolUnRef(pVnode->inUse); pVnode->inUse = NULL; + +_exit: + if (code) { + vError("vgId:%d, %s failed at line %d since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, lino, + tstrerror(code), pVnode->state.commitID); + } else { + vDebug("vgId:%d, %s done", TD_VID(pVnode), __func__); + } + return code; } + static int32_t vnodeCommitTask(void *arg) { int32_t code = 0; @@ -213,27 +248,26 @@ _exit: int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; - // prepare to commit - vnodePrepareCommit(pVnode); - - // schedule the task - pVnode->state.commitTerm = pVnode->state.applyTerm; - SCommitInfo *pInfo = (SCommitInfo *)taosMemoryCalloc(1, sizeof(*pInfo)); if (NULL == pInfo) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - pInfo->info.config = pVnode->config; - pInfo->info.state.committed = pVnode->state.applied; - pInfo->info.state.commitTerm = pVnode->state.applyTerm; - pInfo->info.state.commitID = pVnode->state.commitID; - pInfo->pVnode = pVnode; - pInfo->txn = metaGetTxn(pVnode->pMeta); + + // prepare to commit + code = vnodePrepareCommit(pVnode, pInfo); + if (TSDB_CODE_SUCCESS != code) { + goto _exit; + } + + // schedule the task vnodeScheduleTask(vnodeCommitTask, pInfo); _exit: if (code) { + if (NULL != pInfo) { + taosMemoryFree(pInfo); + } vError("vgId:%d, %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), pVnode->state.commitID); } else { @@ -265,16 +299,11 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { return -1; } - // save info if (pVnode->pTfs) { snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); } else { snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path); } - if (vnodeSaveInfo(dir, &pInfo->info) < 0) { - code = terrno; - TSDB_CHECK_CODE(code, lino, _exit); - } // walBeginSnapshot(pVnode->pWal, pVnode->state.applied); syncBeginSnapshot(pVnode->sync, pVnode->state.applied); From 70ffbabcbf5fa8d98b18835cd436b1100be4e722 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Mon, 26 Dec 2022 10:15:14 +0800 Subject: [PATCH 27/82] fix:remove assert --- source/libs/stream/src/streamState.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index af1d738de0..6670bf463e 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -656,8 +656,7 @@ int32_t streamStateSessionClear(SStreamState* pState) { void* buf = NULL; int32_t size = 0; int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size); - if (code == 0) { - ASSERT(size > 0); + if (code == 0 && size > 0) { memset(buf, 0, size); streamStateSessionPut(pState, &delKey, buf, size); } else { From a5165ab5e7dcff44dc050f2bbcfe57d879f187d3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 10:58:17 +0800 Subject: [PATCH 28/82] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbMergeTree.c | 1 - source/dnode/vnode/src/tsdb/tsdbRead.c | 143 ++++++-------------- source/libs/executor/src/dataDeleter.c | 5 +- source/libs/executor/src/dataDispatcher.c | 13 +- source/libs/executor/src/exchangeoperator.c | 1 - source/libs/executor/src/executorimpl.c | 38 +----- source/libs/executor/src/joinoperator.c | 58 ++++---- source/libs/executor/src/sortoperator.c | 4 - source/libs/executor/src/tsimplehash.c | 5 +- source/libs/executor/src/tsort.c | 1 + 10 files changed, 85 insertions(+), 184 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 41bf823095..beb4303156 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -567,7 +567,6 @@ int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFRead pMTree->pLoadInfo = pBlockLoadInfo; pMTree->destroyLoadInfo = destroyLoadInfo; - ASSERT(pMTree->pLoadInfo != NULL); for (int32_t i = 0; i < pFReader->pSet->nSttF; ++i) { // open all last file struct SLDataIter *pIter = NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 050d03cf73..346ad854df 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -243,7 +243,7 @@ static int32_t setColumnIdSlotList(SBlockLoadSuppInfo* pSupInfo, SColumnInfo* pC return TSDB_CODE_SUCCESS; } -static void updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { +static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { int32_t i = 0, j = 0; while(i < pSchema->numOfCols && j < pSupInfo->numOfCols) { @@ -251,7 +251,7 @@ static void updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) if (pTCol->colId == pSupInfo->colId[j]) { if (!IS_BSMA_ON(pTCol)) { pSupInfo->smaValid = false; - return; + return TSDB_CODE_SUCCESS; } i += 1; @@ -260,9 +260,11 @@ static void updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) // do nothing i += 1; } else { - ASSERT(0); + return TSDB_CODE_INVALID_PARA; } } + + return TSDB_CODE_SUCCESS; } static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { @@ -579,7 +581,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd } if (VND_IS_TSMA(pVnode)) { - tsdbDebug("vgId:%d, tsma is selected to query", TD_VID(pVnode)); + tsdbDebug("vgId:%d, tsma is selected to query, %s", TD_VID(pVnode), idstr); } initReaderStatus(&pReader->status); @@ -594,7 +596,6 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pReader->type = pCond->type; pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket - ASSERT(pCond->numOfCols > 0); if (pReader->pResBlock == NULL) { pReader->freeBlock = true; @@ -605,6 +606,12 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd } } + if (pCond->numOfCols <= 0) { + tsdbError("vgId:%d, invalid column number %d in query cond, %s", TD_VID(pVnode), pCond->numOfCols, idstr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + // todo refactor. limitOutputBufferSize(pCond, &pReader->capacity); @@ -794,8 +801,9 @@ static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_ } static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { - if (taosArrayGetSize(pBlockIter->blockList) == 0) { - ASSERT(pBlockIter->numOfBlocks == taosArrayGetSize(pBlockIter->blockList)); + size_t num = taosArrayGetSize(pBlockIter->blockList); + if (num == 0) { + ASSERT(pBlockIter->numOfBlocks == num); return NULL; } @@ -805,73 +813,6 @@ static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { static SDataBlk* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } -int32_t binarySearchForTs(char* pValue, int num, TSKEY key, int order) { - int32_t midPos = -1; - int32_t numOfRows; - - ASSERT(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); - - TSKEY* keyList = (TSKEY*)pValue; - int32_t firstPos = 0; - int32_t lastPos = num - 1; - - if (order == TSDB_ORDER_DESC) { - // find the first position which is smaller than the key - while (1) { - if (key >= keyList[firstPos]) return firstPos; - if (key == keyList[lastPos]) return lastPos; - - if (key < keyList[lastPos]) { - lastPos += 1; - if (lastPos >= num) { - return -1; - } else { - return lastPos; - } - } - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1) + firstPos; - - if (key < keyList[midPos]) { - firstPos = midPos + 1; - } else if (key > keyList[midPos]) { - lastPos = midPos - 1; - } else { - break; - } - } - - } else { - // find the first position which is bigger than the key - while (1) { - if (key <= keyList[firstPos]) return firstPos; - if (key == keyList[lastPos]) return lastPos; - - if (key > keyList[lastPos]) { - lastPos = lastPos + 1; - if (lastPos >= num) - return -1; - else - return lastPos; - } - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1u) + firstPos; - - if (key < keyList[midPos]) { - lastPos = midPos - 1; - } else if (key > keyList[midPos]) { - firstPos = midPos + 1; - } else { - break; - } - } - } - - return midPos; -} - static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { // start end position int s, e; @@ -972,8 +913,8 @@ static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo int32_t step = asc? 1:-1; - // make sure it is aligned to 8bit - ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); + // make sure it is aligned to 8bit, the allocated memory address is aligned to 256bit +// ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); // 1. copy data in a batch model memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); @@ -1183,7 +1124,6 @@ static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockI SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - ASSERT(pBlockInfo != NULL); SDataBlk* pBlock = getCurrentBlock(pBlockIter); code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); @@ -1221,8 +1161,6 @@ static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { } static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) { - ASSERT(numOfTables >= 1); - pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables); @@ -1329,7 +1267,10 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte sup.numOfTables += 1; } - ASSERT(numOfBlocks == cnt); + if (numOfBlocks != cnt && sup.numOfTables != numOfTables) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_INVALID_PARA; + } // since there is only one table qualified, blocks are not sorted if (sup.numOfTables == 1) { @@ -1351,10 +1292,9 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, pReader->idStr); - ASSERT(cnt <= numOfBlocks && sup.numOfTables <= numOfTables); - SMultiwayMergeTreeInfo* pTree = NULL; - uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); + + uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); if (ret != TSDB_CODE_SUCCESS) { cleanupBlockOrderSupporter(&sup); return TSDB_CODE_OUT_OF_MEMORY; @@ -1432,8 +1372,6 @@ static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBl } static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) { - ASSERT(pBlockIter != NULL && pFBlockInfo != NULL); - int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1; int32_t index = pBlockIter->index; @@ -1924,7 +1862,6 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, } doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, &merge, &pReader->verRange); - ASSERT(mergeBlockData); // merge with block data if ts == key if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) { @@ -1990,7 +1927,6 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader tRowMergerClear(&merge); return code; } else { - ASSERT(0); return TSDB_CODE_SUCCESS; } } else { // desc order @@ -2011,7 +1947,6 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pDelList, pReader); TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); - ASSERT(pRow != NULL && piRow != NULL); int64_t tsLast = INT64_MIN; if (hasDataInLastBlock(pLastBlockReader)) { @@ -2235,7 +2170,6 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea if (pReader->pReadSnap->pMem != NULL) { d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); if (d != NULL) { - ASSERT(pBlockScanInfo->iter.iter == NULL); code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL); @@ -2349,10 +2283,9 @@ static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader) { static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) { - if (pBlockData->nRow > 0) { - ASSERT(pBlockData->nRow == pDumpInfo->totalRows); + if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) { + return false; // this is an invalid result. } - return pBlockData->nRow > 0 && (!pDumpInfo->allDumped); } @@ -2583,7 +2516,6 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* int32_t code = 0; SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - ASSERT(pReader->pReadSnap != NULL); SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile && taosArrayGetSize(pReader->pDelIdx) > 0) { @@ -2868,7 +2800,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); if (pBlockInfo == NULL) { // build data block from last data file - ASSERT(pBlockIter->numOfBlocks == 0); code = buildComposedDataBlock(pReader); } else if (fileBlockShouldLoad(pReader, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader)) { code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); @@ -3837,7 +3768,10 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL } if (pReader->pSchema != NULL) { - updateBlockSMAInfo(pReader->pSchema, &pReader->suppInfo); + code = updateBlockSMAInfo(pReader->pSchema, &pReader->suppInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } } STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; @@ -4113,25 +4047,27 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ } int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave) { + SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; + int32_t code = 0; - SColumnDataAgg ***pBlockSMA = &pDataBlock->pBlockAgg; *allHave = false; + *pBlockSMA = NULL; if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { - *pBlockSMA = NULL; return TSDB_CODE_SUCCESS; } // there is no statistics data for composed block if (pReader->status.composedDataBlock || (!pReader->suppInfo.smaValid)) { - *pBlockSMA = NULL; return TSDB_CODE_SUCCESS; } SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - ASSERT(pReader->pResBlock->info.id.uid == pFBlock->uid); + if (pReader->pResBlock->info.id.uid != pFBlock->uid) { + return TSDB_CODE_SUCCESS; + } SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); if (tDataBlkHasSma(pBlock)) { @@ -4187,7 +4123,7 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, } else if (pAgg->colId < pSup->colId[j]) { i += 1; } else if (pSup->colId[j] < pAgg->colId) { - ASSERT(pSup->colId[j] == PRIMARYKEY_TIMESTAMP_COL_ID); + // ASSERT(pSup->colId[j] == PRIMARYKEY_TIMESTAMP_COL_ID); pResBlock->pBlockAgg[pSup->slotId[j]] = &pSup->tsColAgg; j += 1; } @@ -4418,9 +4354,12 @@ int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int6 return terrno; } sversion = mr.me.stbEntry.schemaRow.version; - } else { - ASSERT(mr.me.type == TSDB_NORMAL_TABLE); + } else if (mr.me.type == TSDB_NORMAL_TABLE) { sversion = mr.me.ntbEntry.schemaRow.version; + } else { + terrno = TSDB_CODE_INVALID_PARA; + metaReaderClear(&mr); + return terrno; } metaReaderClear(&mr); diff --git a/source/libs/executor/src/dataDeleter.c b/source/libs/executor/src/dataDeleter.c index eff7a5ef93..a8051ea7c3 100644 --- a/source/libs/executor/src/dataDeleter.c +++ b/source/libs/executor/src/dataDeleter.c @@ -62,8 +62,8 @@ static void toDataCacheEntry(SDataDeleterHandle* pHandle, const SInputData* pInp pEntry->numOfCols = taosArrayGetSize(pInput->pData->pDataBlock); pEntry->dataLen = sizeof(SDeleterRes); - ASSERT(1 == pEntry->numOfRows); - ASSERT(3 == pEntry->numOfCols); +// ASSERT(1 == pEntry->numOfRows); +// ASSERT(3 == pEntry->numOfCols); pBuf->useSize = sizeof(SDataCacheEntry); @@ -167,7 +167,6 @@ static void getDataLength(SDataSinkHandle* pHandle, int64_t* pLen, bool* pQueryE SDataDeleterBuf* pBuf = NULL; taosReadQitem(pDeleter->pDataBlocks, (void**)&pBuf); - ASSERT(NULL != pBuf); memcpy(&pDeleter->nextOutput, pBuf, sizeof(SDataDeleterBuf)); taosFreeQitem(pBuf); diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index c2fa438c80..a603bffba5 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -77,8 +77,8 @@ static void toDataCacheEntry(SDataDispatchHandle* pHandle, const SInputData* pIn pBuf->useSize = sizeof(SDataCacheEntry); pEntry->dataLen = blockEncode(pInput->pData, pEntry->data, numOfCols); - ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); - ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); +// ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); +// ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); pBuf->useSize += pEntry->dataLen; @@ -162,15 +162,14 @@ static void getDataLength(SDataSinkHandle* pHandle, int64_t* pLen, bool* pQueryE SDataDispatchBuf* pBuf = NULL; taosReadQitem(pDispatcher->pDataBlocks, (void**)&pBuf); - ASSERT(NULL != pBuf); memcpy(&pDispatcher->nextOutput, pBuf, sizeof(SDataDispatchBuf)); taosFreeQitem(pBuf); SDataCacheEntry* pEntry = (SDataCacheEntry*)pDispatcher->nextOutput.pData; *pLen = pEntry->dataLen; - ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); - ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); +// ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); +// ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); *pQueryEnd = pDispatcher->queryEnd; qDebug("got data len %" PRId64 ", row num %d in sink", *pLen, @@ -193,8 +192,8 @@ static int32_t getDataBlock(SDataSinkHandle* pHandle, SOutputData* pOutput) { pOutput->numOfCols = pEntry->numOfCols; pOutput->compressed = pEntry->compressed; - ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); - ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); +// ASSERT(pEntry->numOfRows == *(int32_t*)(pEntry->data + 8)); +// ASSERT(pEntry->numOfCols == *(int32_t*)(pEntry->data + 8 + 4)); atomic_sub_fetch_64(&pDispatcher->cachedSize, pEntry->dataLen); atomic_sub_fetch_64(&gDataSinkStat.cachedSize, pEntry->dataLen); diff --git a/source/libs/executor/src/exchangeoperator.c b/source/libs/executor/src/exchangeoperator.c index 4103ca82dc..9873c52006 100644 --- a/source/libs/executor/src/exchangeoperator.c +++ b/source/libs/executor/src/exchangeoperator.c @@ -373,7 +373,6 @@ int32_t loadRemoteDataCallback(void* param, SDataBuf* pMsg, int32_t code) { pRsp->useconds = htobe64(pRsp->useconds); pRsp->numOfBlocks = htonl(pRsp->numOfBlocks); - ASSERT(pRsp != NULL); qDebug("%s fetch rsp received, index:%d, blocks:%d, rows:%" PRId64 ", %p", pSourceDataInfo->taskId, index, pRsp->numOfBlocks, pRsp->numOfRows, pExchangeInfo); } else { diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 668a93740d..fde13498ea 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -104,8 +104,6 @@ static int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, void setOperatorCompleted(SOperatorInfo* pOperator) { pOperator->status = OP_EXEC_DONE; - ASSERT(pOperator->pTaskInfo != NULL); - pOperator->cost.totalCost = (taosGetTimestampUs() - pOperator->pTaskInfo->cost.start) / 1000.0; setTaskStatus(pOperator->pTaskInfo, TASK_COMPLETED); } @@ -524,7 +522,7 @@ bool functionNeedToExecute(SqlFunctionCtx* pCtx) { return true; } -static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t type, +static int32_t doCreateConstantValColumnSMAInfo(SInputColumnInfoData* pInput, SFunctParam* pFuncParam, int32_t type, int32_t paramIndex, int32_t numOfRows) { if (pInput->pData[paramIndex] == NULL) { pInput->pData[paramIndex] = taosMemoryCalloc(1, sizeof(SColumnInfoData)); @@ -548,8 +546,6 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF da = pInput->pColumnDataAgg[paramIndex]; } - ASSERT(!IS_VAR_DATA_TYPE(type)); - if (type == TSDB_DATA_TYPE_BIGINT) { int64_t v = pFuncParam->param.i; *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .sum = v * numOfRows}; @@ -570,7 +566,7 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF } else if (type == TSDB_DATA_TYPE_TIMESTAMP) { // do nothing } else { - ASSERT(0); + qError("invalid constant type for sma info"); } return TSDB_CODE_SUCCESS; @@ -600,7 +596,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB // the data in the corresponding SColumnInfoData will not be used. pInput->pData[j] = taosArrayGet(pBlock->pDataBlock, slotId); } else if (pFuncParam->type == FUNC_PARAM_TYPE_VALUE) { - doCreateConstantValColumnAggInfo(pInput, pFuncParam, pFuncParam->param.nType, j, pBlock->info.rows); + doCreateConstantValColumnSMAInfo(pInput, pFuncParam, pFuncParam->param.nType, j, pBlock->info.rows); } } } else { @@ -2217,13 +2213,11 @@ int32_t extractTableScanNode(SPhysiNode* pNode, STableScanPhysiNode** ppNode) { *ppNode = (STableScanPhysiNode*)pNode; return 0; } else { - ASSERT(0); terrno = TSDB_CODE_APP_ERROR; return -1; } } else { if (LIST_LENGTH(pNode->pChildren) != 1) { - ASSERT(0); terrno = TSDB_CODE_APP_ERROR; return -1; } @@ -2233,32 +2227,6 @@ int32_t extractTableScanNode(SPhysiNode* pNode, STableScanPhysiNode** ppNode) { return -1; } -#if 0 -int32_t rebuildReader(SOperatorInfo* pOperator, SSubplan* plan, SReadHandle* pHandle, int64_t uid, int64_t ts) { - STableScanInfo* pTableScanInfo = NULL; - if (extractTbscanInStreamOpTree(pOperator, &pTableScanInfo) < 0) { - return -1; - } - - STableScanPhysiNode* pNode = NULL; - if (extractTableScanNode(plan->pNode, &pNode) < 0) { - ASSERT(0); - } - - tsdbReaderClose(pTableScanInfo->dataReader); - - STableListInfo info = {0}; - pTableScanInfo->dataReader = doCreateDataReader(pNode, pHandle, &info, NULL); - if (pTableScanInfo->dataReader == NULL) { - ASSERT(0); - qError("failed to create data reader"); - return TSDB_CODE_APP_ERROR; - } - // TODO: set uid and ts to data reader - return 0; -} -#endif - int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, qTaskInfo_t* pTaskInfo, SReadHandle* readHandle) { SExecTaskInfo* pTask = *(SExecTaskInfo**)pTaskInfo; diff --git a/source/libs/executor/src/joinoperator.c b/source/libs/executor/src/joinoperator.c index 8a097a23ce..88ed9eccb3 100644 --- a/source/libs/executor/src/joinoperator.c +++ b/source/libs/executor/src/joinoperator.c @@ -42,38 +42,40 @@ typedef struct SJoinOperatorInfo { static void setJoinColumnInfo(SColumnInfo* pColumn, const SColumnNode* pColumnNode); static SSDataBlock* doMergeJoin(struct SOperatorInfo* pOperator); static void destroyMergeJoinOperator(void* param); -static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t numOfDownstream, - SSortMergeJoinPhysiNode* pJoinNode); +static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t num, + SSortMergeJoinPhysiNode* pJoinNode, const char* idStr); -static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t numOfDownstream, - SSortMergeJoinPhysiNode* pJoinNode) { +static void extractTimeCondition(SJoinOperatorInfo* pInfo, SOperatorInfo** pDownstream, int32_t num, + SSortMergeJoinPhysiNode* pJoinNode, const char* idStr) { SNode* pMergeCondition = pJoinNode->pMergeCondition; - if (nodeType(pMergeCondition) == QUERY_NODE_OPERATOR) { - SOperatorNode* pNode = (SOperatorNode*)pMergeCondition; - SColumnNode* col1 = (SColumnNode*)pNode->pLeft; - SColumnNode* col2 = (SColumnNode*)pNode->pRight; - SColumnNode* leftTsCol = NULL; - SColumnNode* rightTsCol = NULL; - if (col1->dataBlockId == col2->dataBlockId ) { + if (nodeType(pMergeCondition) != QUERY_NODE_OPERATOR) { + qError("not support this in join operator, %s", idStr); + return; // do not handle this + } + + SOperatorNode* pNode = (SOperatorNode*)pMergeCondition; + SColumnNode* col1 = (SColumnNode*)pNode->pLeft; + SColumnNode* col2 = (SColumnNode*)pNode->pRight; + SColumnNode* leftTsCol = NULL; + SColumnNode* rightTsCol = NULL; + if (col1->dataBlockId == col2->dataBlockId) { + leftTsCol = col1; + rightTsCol = col2; + } else { + if (col1->dataBlockId == pDownstream[0]->resultDataBlockId) { + ASSERT(col2->dataBlockId == pDownstream[1]->resultDataBlockId); leftTsCol = col1; rightTsCol = col2; } else { - if (col1->dataBlockId == pDownstream[0]->resultDataBlockId) { - ASSERT(col2->dataBlockId == pDownstream[1]->resultDataBlockId); - leftTsCol = col1; - rightTsCol = col2; - } else { - ASSERT(col1->dataBlockId == pDownstream[1]->resultDataBlockId); - ASSERT(col2->dataBlockId == pDownstream[0]->resultDataBlockId); - leftTsCol = col2; - rightTsCol = col1; - } + ASSERT(col1->dataBlockId == pDownstream[1]->resultDataBlockId); + ASSERT(col2->dataBlockId == pDownstream[0]->resultDataBlockId); + leftTsCol = col2; + rightTsCol = col1; } - setJoinColumnInfo(&pInfo->leftCol, leftTsCol); - setJoinColumnInfo(&pInfo->rightCol, rightTsCol); - } else { - ASSERT(false); - }} + } + setJoinColumnInfo(&pInfo->leftCol, leftTsCol); + setJoinColumnInfo(&pInfo->rightCol, rightTsCol); +} SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo) { @@ -97,7 +99,7 @@ SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t pOperator->exprSupp.pExprInfo = pExprInfo; pOperator->exprSupp.numOfExprs = numOfCols; - extractTimeCondition(pInfo, pDownstream, numOfDownstream, pJoinNode); + extractTimeCondition(pInfo, pDownstream, numOfDownstream, pJoinNode, GET_TASKID(pTaskInfo)); if (pJoinNode->pOnConditions != NULL && pJoinNode->node.pConditions != NULL) { pInfo->pCondAfterMerge = nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); @@ -364,8 +366,6 @@ static bool mergeJoinGetNextTimestamp(SOperatorInfo* pOperator, int64_t* pLeftTs char* pRightVal = colDataGetData(pRightCol, pJoinInfo->rightPos); *pRightTs = *(int64_t*)pRightVal; - ASSERT(pLeftCol->info.type == TSDB_DATA_TYPE_TIMESTAMP); - ASSERT(pRightCol->info.type == TSDB_DATA_TYPE_TIMESTAMP); return true; } diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 7ac007b7cb..ee7f88e813 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -139,7 +139,6 @@ SSDataBlock* getSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, i int32_t numOfCols = taosArrayGetSize(pColMatchInfo); for (int32_t i = 0; i < numOfCols; ++i) { SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); - // ASSERT(pmInfo->matchType == COL_MATCH_FROM_SLOT_ID); SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); @@ -272,7 +271,6 @@ void destroySortOperatorInfo(void* param) { } int32_t getExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - ASSERT(pOptr != NULL); SSortExecInfo* pInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); SSortOperatorInfo* pOperatorInfo = (SSortOperatorInfo*)pOptr->info; @@ -329,7 +327,6 @@ SSDataBlock* getGroupSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlo int32_t numOfCols = taosArrayGetSize(pColMatchInfo); for (int32_t i = 0; i < numOfCols; ++i) { SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); - // ASSERT(pmInfo->matchType == COL_MATCH_FROM_SLOT_ID); SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); @@ -746,7 +743,6 @@ void destroyMultiwayMergeOperatorInfo(void* param) { } int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - ASSERT(pOptr != NULL); SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; diff --git a/source/libs/executor/src/tsimplehash.c b/source/libs/executor/src/tsimplehash.c index 484d917069..fd6215e3a1 100644 --- a/source/libs/executor/src/tsimplehash.c +++ b/source/libs/executor/src/tsimplehash.c @@ -49,7 +49,9 @@ static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { } SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) { - ASSERT(fn != NULL); + if (fn == NULL) { + return NULL; + } if (capacity == 0) { capacity = 4; @@ -66,7 +68,6 @@ SSHashObj *tSimpleHashInit(size_t capacity, _hash_fn_t fn) { pHashObj->equalFp = memcmp; pHashObj->hashFp = fn; - ASSERT((pHashObj->capacity & (pHashObj->capacity - 1)) == 0); pHashObj->hashList = (SHNode **)taosMemoryCalloc(pHashObj->capacity, sizeof(void *)); if (!pHashObj->hashList) { diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 30911887bb..fa0cdb3943 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -800,6 +800,7 @@ STupleHandle* tsortNextTuple(SSortHandle* pHandle) { } } + // all sources are completed. if (pHandle->cmpParam.numOfSources == pHandle->numOfCompletedSources) { return NULL; } From 1a07451bc3cd629ac62b38b774ca54f6407c6dbc Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 13:45:25 +0800 Subject: [PATCH 29/82] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 12 +++++------- source/libs/executor/src/executorimpl.c | 4 +--- source/libs/executor/src/scanoperator.c | 24 ------------------------ 3 files changed, 6 insertions(+), 34 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 346ad854df..d69b8da667 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -4095,13 +4095,10 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, // update the number of NULL data rows size_t numOfCols = pSup->numOfCols; - int32_t i = 0, j = 0; - size_t size = taosArrayGetSize(pSup->pColAgg); - // ensure capacity - if(pDataBlock->pDataBlock) { - size_t colsNum = taosArrayGetSize(pDataBlock->pDataBlock); - taosArrayEnsureCap(pSup->pColAgg, colsNum); + if (pDataBlock->pDataBlock) { + size_t colsNum = taosArrayGetSize(pDataBlock->pDataBlock); + taosArrayEnsureCap(pSup->pColAgg, colsNum); } SSDataBlock* pResBlock = pReader->pResBlock; @@ -4112,8 +4109,9 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, // do fill all null column value SMA info doFillNullColSMA(pSup, pBlock->nRow, numOfCols, pTsAgg); + size_t size = taosArrayGetSize(pSup->pColAgg); - i = 0, j = 0; + int32_t i = 0, j = 0; while (j < numOfCols && i < size) { SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); if (pAgg->colId == pSup->colId[j]) { diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index fde13498ea..9a5729e161 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1573,8 +1573,7 @@ void destroyOperatorInfo(SOperatorInfo* pOperator) { // each operator should be set their own function to return total cost buffer int32_t optrDefaultBufFn(SOperatorInfo* pOperator) { if (pOperator->blocking) { - ASSERT(0); - return 0; + return -1; } else { return 0; } @@ -2201,7 +2200,6 @@ static int32_t extractTbscanInStreamOpTree(SOperatorInfo* pOperator, STableScanI return extractTbscanInStreamOpTree(pOperator->pDownstream[0], ppInfo); } else { SStreamScanInfo* pInfo = pOperator->info; - ASSERT(pInfo->pTableScanOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN); *ppInfo = pInfo->pTableScanOp->info; return 0; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d074ceede8..b23f5c4b6e 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -232,30 +232,6 @@ static bool doLoadBlockSMA(STableScanBase* pTableScanInfo, SSDataBlock* pBlock, if (!allColumnsHaveAgg) { return false; } - -#if 0 - // if (allColumnsHaveAgg == true) { - int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - - // todo create this buffer during creating operator - if (pBlock->pBlockAgg == NULL) { - pBlock->pBlockAgg = taosMemoryCalloc(numOfCols, POINTER_BYTES); - if (pBlock->pBlockAgg == NULL) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - } - - size_t num = taosArrayGetSize(pTableScanInfo->matchInfo.pList); - for (int32_t i = 0; i < num; ++i) { - SColMatchItem* pColMatchInfo = taosArrayGet(pTableScanInfo->matchInfo.pList, i); - if (!pColMatchInfo->needOutput) { - continue; - } - - pBlock->pBlockAgg[pColMatchInfo->dstSlotId] = pColAgg[i]; - } -#endif - return true; } From f54f6201be005cb489910326ab2821d88523dd90 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 14:07:22 +0800 Subject: [PATCH 30/82] fix(query): fix coverity issues. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 2 -- source/libs/executor/src/scanoperator.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index d69b8da667..4f9e419f78 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3777,9 +3777,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; pReader->status.pTableMap = createDataBlockScanInfo(p, &pReader->blockInfoBuf, pTableList, numOfTables); if (pReader->status.pTableMap == NULL) { - tsdbReaderClose(p); *ppReader = NULL; - code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b23f5c4b6e..0d7098a8ea 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2182,6 +2182,7 @@ static void destroyStreamScanOperatorInfo(void* param) { SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond, SExecTaskInfo* pTaskInfo) { + SArray* pColIds = NULL; SStreamScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); @@ -2204,7 +2205,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys } int32_t numOfOutput = taosArrayGetSize(pInfo->matchInfo.pList); - SArray* pColIds = taosArrayInit(numOfOutput, sizeof(int16_t)); + pColIds = taosArrayInit(numOfOutput, sizeof(int16_t)); for (int32_t i = 0; i < numOfOutput; ++i) { SColMatchItem* id = taosArrayGet(pInfo->matchInfo.pList, i); @@ -2300,6 +2301,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys memcpy(&pTaskInfo->streamInfo.tableCond, &pTSInfo->base.cond, sizeof(SQueryTableDataCond)); } else { taosArrayDestroy(pColIds); + pColIds = NULL; } // create the pseduo columns info From 8a4fb244fbbb2f7fd47bd6513b8fc1ccd714b239 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 26 Dec 2022 14:20:05 +0800 Subject: [PATCH 31/82] fix(query): fix coverity issues. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 11 ++++++++++- source/libs/executor/src/sortoperator.c | 15 ++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 4f9e419f78..a18533cf56 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -1013,11 +1013,20 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn // pDumpInfo->rowIndex = 0; } else if (!asc && pReader->window.ekey >= pBlock->maxKey.ts) { // pDumpInfo->rowIndex = pBlock->nRow - 1; - } else { + } else { // find the appropriate the start position in current block, and set it to be the current rowIndex int32_t pos = asc ? pBlock->nRow - 1 : 0; int32_t order = asc ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; int64_t key = asc ? pReader->window.skey : pReader->window.ekey; pDumpInfo->rowIndex = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, order); + + if (pDumpInfo->rowIndex < 0) { + tsdbError( + "%p failed to locate the start position in current block, global index:%d, table index:%d, brange:%" PRId64 + "-%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 " %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->minVer, + pBlock->maxVer, pReader->idStr); + return TSDB_CODE_INVALID_PARA; + } } } diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index ee7f88e813..f5dc6cc623 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -46,13 +46,15 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* pOperator->pTaskInfo = pTaskInfo; SDataBlockDescNode* pDescNode = pSortNode->node.pOutputDataBlockDesc; - int32_t numOfCols = 0; - SSDataBlock* pResBlock = createDataBlockFromDescNode(pDescNode); - SExprInfo* pExprInfo = createExprInfo(pSortNode->pExprs, NULL, &numOfCols); + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pSortNode->pExprs, NULL, &numOfCols); int32_t numOfOutputCols = 0; int32_t code = extractColMatchInfo(pSortNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, &pInfo->matchInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } pOperator->exprSupp.pCtx = createSqlFunctionCtx(pExprInfo, numOfCols, &pOperator->exprSupp.rowEntryInfoOffset); initResultSizeInfo(&pOperator->resultInfo, 1024); @@ -61,7 +63,7 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* goto _error; } - pInfo->binfo.pRes = pResBlock; + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); pInfo->pSortInfo = createSortInfo(pSortNode->pSortKeys); initLimitInfo(pSortNode->node.pLimit, pSortNode->node.pSlimit, &pInfo->limitInfo); @@ -86,7 +88,10 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* _error: pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; - taosMemoryFree(pInfo); + if (pInfo != NULL) { + destroySortOperatorInfo(pInfo); + } + taosMemoryFree(pOperator); return NULL; } From 668a26c1b0bd83f421af0f2b0f4a60db638abd04 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 26 Dec 2022 15:03:18 +0800 Subject: [PATCH 32/82] enh: handle error while transfer snapshot --- source/libs/sync/inc/syncInt.h | 2 +- source/libs/sync/inc/syncPipeline.h | 2 +- source/libs/sync/inc/syncSnapshot.h | 6 +- source/libs/sync/src/syncMain.c | 90 +++++++------- source/libs/sync/src/syncPipeline.c | 5 +- source/libs/sync/src/syncSnapshot.c | 179 ++++++++++++---------------- 6 files changed, 130 insertions(+), 154 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 6af60af43d..b5227152df 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -238,7 +238,7 @@ int32_t syncNodeStopPingTimer(SSyncNode* pSyncNode); int32_t syncNodeStartElectTimer(SSyncNode* pSyncNode, int32_t ms); int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode); int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms); -int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode); +void syncNodeResetElectTimer(SSyncNode* pSyncNode); int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode); int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode); int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index 8c7edf85ff..9188be2b42 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -61,7 +61,7 @@ typedef struct SSyncLogBuffer { // SSyncLogRepMgr SSyncLogReplMgr* syncLogReplMgrCreate(); void syncLogReplMgrDestroy(SSyncLogReplMgr* pMgr); -int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr); +void syncLogReplMgrReset(SSyncLogReplMgr* pMgr); int32_t syncNodeLogReplMgrInit(SSyncNode* pNode); void syncNodeLogReplMgrDestroy(SSyncNode* pNode); diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 2b6e14a457..974a8f968e 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -56,7 +56,7 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI void snapshotSenderDestroy(SSyncSnapshotSender *pSender); bool snapshotSenderIsStart(SSyncSnapshotSender *pSender); int32_t snapshotSenderStart(SSyncSnapshotSender *pSender); -int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish); +void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish); int32_t snapshotSend(SSyncSnapshotSender *pSender); int32_t snapshotReSend(SSyncSnapshotSender *pSender); @@ -79,8 +79,8 @@ typedef struct SSyncSnapshotReceiver { SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId); void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver); -int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg); -int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg); +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver); bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index f1aa9312c6..152e16bd2e 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -200,12 +200,15 @@ int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg) { code = syncNodeOnLocalCmd(pSyncNode, pMsg); break; default: - sError("vgId:%d, failed to process msg:%p since invalid type:%s", pSyncNode->vgId, pMsg, - TMSG_INFO(pMsg->msgType)); + terrno = TSDB_CODE_MSG_NOT_PROCESSED; code = -1; } syncNodeRelease(pSyncNode); + if (code != 0) { + sDebug("vgId:%d, failed to process sync msg:%p type:%s since 0x%x", pSyncNode->vgId, pMsg, TMSG_INFO(pMsg->msgType), + terrno); + } return code; } @@ -228,8 +231,7 @@ int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq) { syncNodeRelease(pNode); if (ret == 1) { - sInfo("send timeout response, seq:%" PRId64 " handle:%p ahandle:%p", seq, rpcMsg.info.handle, - rpcMsg.info.ahandle); + sInfo("send timeout response, seq:%" PRId64 " handle:%p ahandle:%p", seq, rpcMsg.info.handle, rpcMsg.info.ahandle); rpcSendResponse(&rpcMsg); return 0; } else { @@ -1084,13 +1086,17 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { // snapshot senders for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i); - // ASSERT(pSender != NULL); - (pSyncNode->senders)[i] = pSender; - sSDebug(pSender, "snapshot sender create new while open, data:%p", pSender); + if (pSender == NULL) return NULL; + + pSyncNode->senders[i] = pSender; + sSDebug(pSender, "snapshot sender create while open sync node, data:%p", pSender); } // snapshot receivers pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, EMPTY_RAFT_ID); + if (pSyncNode->pNewNodeReceiver == NULL) return NULL; + sRDebug(pSyncNode->pNewNodeReceiver, "snapshot receiver create while open sync node, data:%p", + pSyncNode->pNewNodeReceiver); // is config changing pSyncNode->changing = false; @@ -1131,10 +1137,8 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->hbrSlowNum = 0; pSyncNode->tmrRoutineNum = 0; - sNInfo(pSyncNode, "sync open, node:%p", pSyncNode); - sTrace("vgId:%d, tsElectInterval:%d, tsHeartbeatInterval:%d, tsHeartbeatTimeout:%d", pSyncNode->vgId, tsElectInterval, - tsHeartbeatInterval, tsHeartbeatTimeout); - + sNInfo(pSyncNode, "sync open, node:%p electInterval:%d heartbeatInterval:%d heartbeatTimeout:%d", pSyncNode, + tsElectInterval, tsHeartbeatInterval, tsHeartbeatTimeout); return pSyncNode; _error: @@ -1251,6 +1255,8 @@ void syncNodePreClose(SSyncNode* pSyncNode) { snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); } + sDebug("vgId:%d, snapshot receiver destroy while preclose sync node, data:%p", pSyncNode->vgId, + pSyncNode->pNewNodeReceiver); snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver); pSyncNode->pNewNodeReceiver = NULL; } @@ -1295,15 +1301,15 @@ void syncNodeClose(SSyncNode* pSyncNode) { syncNodeStopHeartbeatTimer(pSyncNode); for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - if ((pSyncNode->senders)[i] != NULL) { - sSTrace((pSyncNode->senders)[i], "snapshot sender destroy while close, data:%p", (pSyncNode->senders)[i]); + if (pSyncNode->senders[i] != NULL) { + sDebug("vgId:%d, snapshot sender destroy while close, data:%p", pSyncNode->vgId, pSyncNode->senders[i]); - if (snapshotSenderIsStart((pSyncNode->senders)[i])) { - snapshotSenderStop((pSyncNode->senders)[i], false); + if (snapshotSenderIsStart(pSyncNode->senders[i])) { + snapshotSenderStop(pSyncNode->senders[i], false); } - snapshotSenderDestroy((pSyncNode->senders)[i]); - (pSyncNode->senders)[i] = NULL; + snapshotSenderDestroy(pSyncNode->senders[i]); + pSyncNode->senders[i] = NULL; } } @@ -1312,6 +1318,7 @@ void syncNodeClose(SSyncNode* pSyncNode) { snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); } + sDebug("vgId:%d, snapshot receiver destroy while close, data:%p", pSyncNode->vgId, pSyncNode->pNewNodeReceiver); snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver); pSyncNode->pNewNodeReceiver = NULL; } @@ -1382,8 +1389,7 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms) { return ret; } -int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) { - int32_t ret = 0; +void syncNodeResetElectTimer(SSyncNode* pSyncNode) { int32_t electMS; if (pSyncNode->pRaftCfg->isStandBy) { @@ -1391,11 +1397,11 @@ int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) { } else { electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); } - ret = syncNodeRestartElectTimer(pSyncNode, electMS); + + (void)syncNodeRestartElectTimer(pSyncNode, electMS); sNTrace(pSyncNode, "reset elect timer, min:%d, max:%d, ms:%d", pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine, electMS); - return ret; } static int32_t syncNodeDoStartHeartbeatTimer(SSyncNode* pSyncNode) { @@ -1455,23 +1461,20 @@ int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) { return 0; } -// utils -------------- int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg) { SEpSet epSet; syncUtilRaftId2EpSet(destRaftId, &epSet); - if (pSyncNode->syncSendMSg != NULL) { - // htonl - syncUtilMsgHtoN(pMsg->pCont); + if (pSyncNode->syncSendMSg != NULL) { + syncUtilMsgHtoN(pMsg->pCont); pMsg->info.noResp = 1; - pSyncNode->syncSendMSg(&epSet, pMsg); + return pSyncNode->syncSendMSg(&epSet, pMsg); } else { sError("vgId:%d, sync send msg by id error, fp-send-msg is null", pSyncNode->vgId); rpcFreeCont(pMsg->pCont); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } - - return 0; } int32_t syncNodeSendMsgByInfo(const SNodeInfo* nodeInfo, SSyncNode* pSyncNode, SRpcMsg* pMsg) { @@ -1586,7 +1589,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId)); SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA]; for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - oldSenders[i] = (pSyncNode->senders)[i]; + oldSenders[i] = pSyncNode->senders[i]; sSTrace(oldSenders[i], "snapshot sender save old"); } @@ -1625,7 +1628,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde // clear new for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - (pSyncNode->senders)[i] = NULL; + pSyncNode->senders[i] = NULL; } // reset new @@ -1640,16 +1643,16 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde sNTrace(pSyncNode, "snapshot sender reset for: %" PRId64 ", newIndex:%d, %s:%d, %p", (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j]); - (pSyncNode->senders)[i] = oldSenders[j]; + pSyncNode->senders[i] = oldSenders[j]; oldSenders[j] = NULL; reset = true; // reset replicaIndex - int32_t oldreplicaIndex = (pSyncNode->senders)[i]->replicaIndex; - (pSyncNode->senders)[i]->replicaIndex = i; + int32_t oldreplicaIndex = pSyncNode->senders[i]->replicaIndex; + pSyncNode->senders[i]->replicaIndex = i; sNTrace(pSyncNode, "snapshot sender udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d", oldreplicaIndex, - i, host, port, (pSyncNode->senders)[i], reset); + i, host, port, pSyncNode->senders[i], reset); break; } @@ -1658,18 +1661,23 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde // create new for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { - if ((pSyncNode->senders)[i] == NULL) { - (pSyncNode->senders)[i] = snapshotSenderCreate(pSyncNode, i); - sSTrace((pSyncNode->senders)[i], "snapshot sender create new while reconfig, data:%p", (pSyncNode->senders)[i]); + if (pSyncNode->senders[i] == NULL) { + pSyncNode->senders[i] = snapshotSenderCreate(pSyncNode, i); + if (pSyncNode->senders[i] == NULL) { + // will be created later while send snapshot + sSError(pSyncNode->senders[i], "snapshot sender create failed while reconfig"); + } else { + sSDebug(pSyncNode->senders[i], "snapshot sender create while reconfig, data:%p", pSyncNode->senders[i]); + } } else { - sSTrace((pSyncNode->senders)[i], "snapshot sender already exist, data:%p", (pSyncNode->senders)[i]); + sSDebug(pSyncNode->senders[i], "snapshot sender already exist, data:%p", pSyncNode->senders[i]); } } // free old for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { if (oldSenders[i] != NULL) { - sNTrace(pSyncNode, "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i); + sSDebug(oldSenders[i], "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i); snapshotSenderDestroy(oldSenders[i]); oldSenders[i] = NULL; } @@ -1844,8 +1852,8 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId)); if (pMySender != NULL) { for (int32_t i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) { - if ((pSyncNode->senders)[i]->privateTerm > pMySender->privateTerm) { - pMySender->privateTerm = (pSyncNode->senders)[i]->privateTerm; + if (pSyncNode->senders[i]->privateTerm > pMySender->privateTerm) { + pMySender->privateTerm = pSyncNode->senders[i]->privateTerm; } } (pMySender->privateTerm) += 100; diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 225de30755..f2c86cef19 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -566,7 +566,9 @@ _out: return ret; } -int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr) { +void syncLogReplMgrReset(SSyncLogReplMgr* pMgr) { + if (pMgr == NULL) return; + ASSERT(pMgr->startIndex >= 0); for (SyncIndex index = pMgr->startIndex; index < pMgr->endIndex; index++) { memset(&pMgr->states[index % pMgr->size], 0, sizeof(pMgr->states[0])); @@ -576,7 +578,6 @@ int32_t syncLogReplMgrReset(SSyncLogReplMgr* pMgr) { pMgr->endIndex = 0; pMgr->restored = false; pMgr->retryBackoff = 0; - return 0; } int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 54c11a503b..f2da2fb2ce 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -54,7 +54,6 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { if (pSender == NULL) return; - sDebug("vgId:%d, snapshot sender destroy", pSender->pSyncNode->vgId); // free current block if (pSender->pCurrentBlock != NULL) { @@ -75,12 +74,6 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { - if (snapshotSenderIsStart(pSender)) { - sSError(pSender, "vgId:%d, snapshot sender is already start"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - pSender->start = true; pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; @@ -95,7 +88,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pSender->snapshot.lastApplyTerm = SYNC_TERM_INVALID; pSender->snapshot.lastConfigIndex = SYNC_INDEX_INVALID; - memset(&(pSender->lastConfig), 0, sizeof(pSender->lastConfig)); + memset(&pSender->lastConfig, 0, sizeof(pSender->lastConfig)); pSender->sendingMS = 0; pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; pSender->startTime = taosGetTimestampMs(); @@ -111,7 +104,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; - pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -122,7 +115,6 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pMsg->seq = SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT; // event log - sSDebug(pSender, "snapshot sender start"); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender start"); // send msg @@ -134,7 +126,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { return 0; } -int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { +void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { sSDebug(pSender, "snapshot sender stop, finish:%d reader:%p", finish, pSender->pReader); // update flag @@ -154,8 +146,6 @@ int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { pSender->pCurrentBlock = NULL; pSender->blockLen = 0; } - - return 0; } // when sender receive ack, call this function to send msg from seq @@ -177,8 +167,8 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { } if (pSender->blockLen > 0) { - sSDebug(pSender, "snapshot sender continue to read, blockLen:%d seq:%d", pSender->blockLen, pSender->seq); // has read data + sSDebug(pSender, "snapshot sender continue to read, blockLen:%d seq:%d", pSender->blockLen, pSender->seq); } else { // read finish, update seq to end pSender->seq = SYNC_SNAPSHOT_SEQ_END; @@ -194,7 +184,7 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; - pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -202,7 +192,6 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; - // pMsg->privateTerm = pSender->privateTerm; if (pSender->pCurrentBlock != NULL) { memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); @@ -210,10 +199,8 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { // event log if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) { - sSDebug(pSender, "snapshot sender finish, seq:%d", pSender->seq); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender finish"); } else { - sSDebug(pSender, "snapshot sender sending, seq:%d", pSender->seq); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender sending"); } @@ -238,7 +225,7 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; - pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -248,12 +235,10 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { pMsg->seq = pSender->seq; if (pSender->pCurrentBlock != NULL && pSender->blockLen > 0) { - // pMsg->privateTerm = pSender->privateTerm; memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); } // event log - sSDebug(pSender, "snapshot sender resend, seq:%d", pSender->seq); syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender resend"); // send msg @@ -299,13 +284,10 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { if (pSender->finish && taosGetTimestampMs() - pSender->endTime < SNAPSHOT_WAIT_MS) { sSInfo(pSender, "snapshot sender start too frequently, ignore"); - return 1; + return 0; } - char host[64]; - uint16_t port; - syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - sSInfo(pSender, "snapshot sender start for peer:%s:%u", host, port); + sSInfo(pSender, "snapshot sender start"); int32_t code = snapshotSenderStart(pSender); if (code != 0) { @@ -338,13 +320,11 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from pReceiver->snapshot.lastApplyTerm = 0; pReceiver->snapshot.lastConfigIndex = SYNC_INDEX_INVALID; - sDebug("vgId:%d, snapshot receiver create", pSyncNode->vgId); return pReceiver; } void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { if (pReceiver == NULL) return; - sDebug("vgId:%d, snapshot receiver destroy", pReceiver->pSyncNode->vgId); // close writer if (pReceiver->pWriter != NULL) { @@ -368,7 +348,6 @@ void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) { // force close, abandon incomplete data if (pReceiver->pWriter != NULL) { - // event log int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false, &pReceiver->snapshot); if (ret != 0) { @@ -380,13 +359,7 @@ void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) { pReceiver->start = false; } -int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { - if (!snapshotReceiverIsStart(pReceiver)) { - sRError(pReceiver, "snapshot receiver is not start"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - +static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { if (pReceiver->pWriter != NULL) { sRError(pReceiver, "vgId:%d, snapshot receiver writer is not null"); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; @@ -416,10 +389,10 @@ int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapsh return 0; } -int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pPreMsg) { +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pPreMsg) { if (snapshotReceiverIsStart(pReceiver)) { sRInfo(pReceiver, "snapshot receiver has started"); - return 0; + return; } pReceiver->start = true; @@ -430,12 +403,11 @@ int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend // event log sRInfo(pReceiver, "snapshot receiver is start"); - return 0; } // just set start = false // FpSnapshotStopWrite should not be called, assert writer == NULL -int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { sRInfo(pReceiver, "snapshot receiver stop, not apply, writer:%p", pReceiver->pWriter); if (pReceiver->pWriter != NULL) { @@ -450,17 +422,10 @@ int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { } pReceiver->start = false; - return 0; } // when recv last snapshot block, apply data into snapshot static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { - if (pMsg->seq != SYNC_SNAPSHOT_SEQ_END) { - sRError(pReceiver, "snapshot receiver seq:%d is invalid", pMsg->seq); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - int32_t code = 0; if (pReceiver->pWriter != NULL) { // write data @@ -582,6 +547,7 @@ SyncIndex syncNodeGetSnapBeginIndex(SSyncNode *ths) { static int32_t syncNodeOnSnapshotPre(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; int64_t timeNow = taosGetTimestampMs(); + int32_t code = 0; if (snapshotReceiverIsStart(pReceiver)) { // already start @@ -593,14 +559,14 @@ static int32_t syncNodeOnSnapshotPre(SSyncNode *pSyncNode, SyncSnapshotSend *pMs sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " == msg startTime:%" PRId64 " send reply", pReceiver->startTime, pMsg->startTime); goto _SEND_REPLY; - } else { // ignore - sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore", - pReceiver->startTime, pMsg->startTime); - return 0; + sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore", + pReceiver->startTime, pMsg->startTime); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + code = terrno; + goto _SEND_REPLY; } - } else { // start new sRInfo(pReceiver, "snapshot receiver not start yet so start new one"); @@ -611,7 +577,8 @@ _START_RECEIVER: if (timeNow - pMsg->startTime > SNAPSHOT_MAX_CLOCK_SKEW_MS) { sRError(pReceiver, "snapshot receiver time skew too much, now:%" PRId64 " msg startTime:%" PRId64, timeNow, pMsg->startTime); - return -1; + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + code = terrno; } else { // waiting for clock match while (timeNow < pMsg->startTime) { @@ -647,7 +614,7 @@ _SEND_REPLY: pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pMsg->seq; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); // send msg @@ -657,26 +624,36 @@ _SEND_REPLY: return -1; } - return 0; + return code; } static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // condition 1 SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; + int32_t code = TSDB_CODE_SYN_INTERNAL_ERROR; if (!snapshotReceiverIsStart(pReceiver)) { - sRError(pReceiver, "snapshot receiver not start"); - return -1; + sRError(pReceiver, "snapshot receiver begin failed since not start"); + goto _SEND_REPLY; } if (pReceiver->startTime != pMsg->startTime) { - sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " not equal to msg startTime:%" PRId64, + sRError(pReceiver, "snapshot receiver begin failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, pReceiver->startTime, pMsg->startTime); - return -1; + goto _SEND_REPLY; } // start writer - snapshotReceiverStartWriter(pReceiver, pMsg); + if (snapshotReceiverStartWriter(pReceiver, pMsg) != 0) { + sRError(pReceiver, "snapshot receiver begin failed since start writer failed"); + goto _SEND_REPLY; + } + + code = 0; +_SEND_REPLY: + if (code != 0 && terrno != 0) { + code = terrno; + } // build msg SRpcMsg rpcMsg = {0}; @@ -693,7 +670,7 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg @@ -703,10 +680,10 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p return -1; } - return 0; + return code; } -static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { +static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // condition 4 // transfering SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; @@ -753,7 +730,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS return -1; } - return 0; + return code; } static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { @@ -790,7 +767,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; + pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; // send msg @@ -800,7 +777,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs return -1; } - return 0; + return code; } // receiver on message @@ -830,12 +807,14 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { // if already drop replica, do not process if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "not in my config"); - return 0; + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; } if (pMsg->term < pSyncNode->pRaftStore->currentTerm) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject since small term"); - return 0; + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; } if (pMsg->term > pSyncNode->pRaftStore->currentTerm) { @@ -844,20 +823,21 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncNodeResetElectTimer(pSyncNode); // state, term, seq/ack + int32_t code = 0; if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot"); - syncNodeOnSnapshotPre(pSyncNode, pMsg); + code = syncNodeOnSnapshotPre(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq begin"); - syncNodeOnSnapshotBegin(pSyncNode, pMsg); + code = syncNodeOnSnapshotBegin(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq end"); - syncNodeOnSnapshotEnd(pSyncNode, pMsg); + code = syncNodeOnSnapshotEnd(pSyncNode, pMsg); if (syncLogBufferReInit(pSyncNode->pLogBuf, pSyncNode) != 0) { sRError(pReceiver, "failed to reinit log buffer since %s", terrstr()); - return -1; + code = -1; } } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { // force close, no response @@ -865,35 +845,27 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { snapshotReceiverForceStop(pReceiver); } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq data"); - syncNodeOnSnapshotTransfering(pSyncNode, pMsg); + code = syncNodeOnSnapshotReceive(pSyncNode, pMsg); } else { // error log sRError(pReceiver, "snapshot receiver recv error seq:%d, my ack:%d", pMsg->seq, pReceiver->ack); - return -1; + code = -1; } } else { // error log sRError(pReceiver, "snapshot receiver term not equal"); - return -1; + code = -1; } } else { // error log sRError(pReceiver, "snapshot receiver not follower"); - return -1; + code = -1; } - return 0; + return code; } -int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { - // get sender - SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, &(pMsg->srcId)); - if (pSender == NULL) { - sNError(pSyncNode, "prepare snapshot error since sender is null"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - +static int32_t syncNodeOnSnapshotPreRsp(SSyncNode *pSyncNode, SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { SSnapshot snapshot = {0}; pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); @@ -915,7 +887,7 @@ int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) pSender->snapshot = snapshot; // start reader - int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &(pSender->snapshotParam), &(pSender->pReader)); + int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); if (code != 0) { sSError(pSender, "prepare snapshot failed since %s", terrstr()); return -1; @@ -936,7 +908,7 @@ int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) SyncSnapshotSend *pSendMsg = rpcMsg.pCont; pSendMsg->srcId = pSender->pSyncNode->myRaftId; - pSendMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pSendMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; pSendMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; pSendMsg->beginIndex = pSender->snapshotParam.start; pSendMsg->lastIndex = pSender->snapshot.lastApplyIndex; @@ -966,8 +938,9 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { SyncSnapshotRsp *pMsg = pRpcMsg->pCont; // if already drop replica, do not process - if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId))) { + if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "maybe replica already dropped"); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } @@ -983,6 +956,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader"); sSError(pSender, "snapshot sender not leader"); + terrno = TSDB_CODE_SYN_NOT_LEADER; goto _ERROR; } @@ -990,6 +964,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver time not match"); sSError(pSender, "sender:%" PRId64 " receiver:%" PRId64 " time not match, code:0x%x", pMsg->startTime, pSender->startTime, pMsg->code); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; goto _ERROR; } @@ -997,20 +972,21 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match"); sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term, pSyncNode->pRaftStore->currentTerm); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; goto _ERROR; } if (pMsg->code != 0) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code"); sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code); + terrno = pMsg->code; goto _ERROR; } // prepare , send begin msg if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot"); - syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); - return 0; + return syncNodeOnSnapshotPreRsp(pSyncNode, pSender, pMsg); } if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { @@ -1030,10 +1006,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end"); snapshotSenderStop(pSender, true); SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); - syncLogReplMgrReset(pMgr); - } + syncLogReplMgrReset(pMgr); return 0; } @@ -1047,22 +1020,19 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { if (snapshotSend(pSender) != 0) { return -1; } - } else if (pMsg->ack == pSender->seq - 1) { // maybe resend syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend"); - snapshotReSend(pSender); - + if (snapshotReSend(pSender) != 0) { + return -1; + } } else { // error log syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error ack"); sSError(pSender, "snapshot sender receive error ack:%d, my seq:%d", pMsg->ack, pSender->seq); snapshotSenderStop(pSender, true); SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogReplMgrReset(pMgr); - } - + syncLogReplMgrReset(pMgr); return -1; } @@ -1071,10 +1041,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { _ERROR: snapshotSenderStop(pSender, true); SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId); - if (pMgr) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr"); - syncLogReplMgrReset(pMgr); - } + syncLogReplMgrReset(pMgr); return -1; } From b381c42c9de6f0335117ebb7334a9828e7e785ff Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 26 Dec 2022 15:59:45 +0800 Subject: [PATCH 33/82] enh: adjust log while transfer snapshot --- source/dnode/vnode/src/meta/metaSnapshot.c | 4 ++-- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaSnapshot.c b/source/dnode/vnode/src/meta/metaSnapshot.c index 054e785980..63d044df6e 100644 --- a/source/dnode/vnode/src/meta/metaSnapshot.c +++ b/source/dnode/vnode/src/meta/metaSnapshot.c @@ -113,8 +113,8 @@ int32_t metaSnapRead(SMetaSnapReader* pReader, uint8_t** ppData) { pHdr->size = nData; memcpy(pHdr->data, pData, nData); - metaInfo("vgId:%d, vnode snapshot meta read data, version:%" PRId64 " uid:%" PRId64 " nData:%d", - TD_VID(pReader->pMeta->pVnode), key.version, key.uid, nData); + metaDebug("vgId:%d, vnode snapshot meta read data, version:%" PRId64 " uid:%" PRId64 " blockLen:%d", + TD_VID(pReader->pMeta->pVnode), key.version, key.uid, nData); _exit: return code; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 0362d4af2a..2fc06fba86 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -257,8 +257,8 @@ _exit: pReader->index++; *nData = sizeof(SSnapDataHdr) + pHdr->size; pHdr->index = pReader->index; - vInfo("vgId:%d, vnode snapshot read data,index:%" PRId64 " type:%d nData:%d ", TD_VID(pReader->pVnode), - pReader->index, pHdr->type, *nData); + vDebug("vgId:%d, vnode snapshot read data, index:%" PRId64 " type:%d blockLen:%d ", TD_VID(pReader->pVnode), + pReader->index, pHdr->type, *nData); } else { vInfo("vgId:%d, vnode snapshot read data end, index:%" PRId64, TD_VID(pReader->pVnode), pReader->index); } From 962fab4ff3c84fc65e2f150a227be5c5b5f3c288 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Mon, 26 Dec 2022 17:32:41 +0800 Subject: [PATCH 34/82] fix:remove assert --- source/libs/executor/src/timewindowoperator.c | 34 ++++--------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 8107cea4a0..e5dcc43797 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -907,7 +907,7 @@ static void removeDeleteResults(SHashObj* pUpdatedMap, SArray* pDelWins) { } bool isOverdue(TSKEY ekey, STimeWindowAggSupp* pTwSup) { - ASSERT(pTwSup->maxTs == INT64_MIN || pTwSup->maxTs > 0); + ASSERTS(pTwSup->maxTs == INT64_MIN || pTwSup->maxTs > 0, "maxts should greater than 0"); return pTwSup->maxTs != INT64_MIN && ekey < pTwSup->maxTs - pTwSup->waterMark; } @@ -1396,7 +1396,6 @@ static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SHashObj* resWins) { while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { void* key = tSimpleHashGetKey(pIte, &keyLen); uint64_t groupId = *(uint64_t*)key; - ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); SResultRowPosition* pPos = (SResultRowPosition*)pIte; int32_t code = saveWinResult(ts, pPos->pageId, pPos->offset, groupId, resWins); @@ -1547,7 +1546,7 @@ static void closeChildIntervalWindow(SOperatorInfo* pOperator, SArray* pChildren for (int32_t i = 0; i < size; i++) { SOperatorInfo* pChildOp = taosArrayGetP(pChildren, i); SStreamIntervalOperatorInfo* pChInfo = pChildOp->info; - ASSERT(pChInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE); + ASSERTS(pChInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE, "children trigger type should be at once"); pChInfo->twAggSup.maxTs = TMAX(pChInfo->twAggSup.maxTs, maxTs); closeStreamIntervalWindow(pChInfo->aggSup.pResultRowHashTable, &pChInfo->twAggSup, &pChInfo->interval, NULL, NULL, NULL, pOperator); @@ -1767,8 +1766,6 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPh .maxTs = INT64_MIN, }; - ASSERT(as.calTrigger != STREAM_TRIGGER_MAX_DELAY); - pInfo->win = pTaskInfo->window; pInfo->inputOrder = (pPhyNode->window.inputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; pInfo->resultTsOrder = (pPhyNode->window.outputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; @@ -2252,7 +2249,6 @@ static void doBuildPullDataBlock(SArray* array, int32_t* pIndex, SSDataBlock* pB return; } blockDataEnsureCapacity(pBlock, size - (*pIndex)); - ASSERT(3 <= taosArrayGetSize(pBlock->pDataBlock)); SColumnInfoData* pStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); SColumnInfoData* pGroupId = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); @@ -2359,7 +2355,6 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p SResultRow* pResult = NULL; int32_t forwardRows = 0; - ASSERT(pSDataBlock->pDataBlock != NULL); SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); tsCols = (int64_t*)pColDataInfo->pData; @@ -2482,7 +2477,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); if (pInfo->pPullDataRes->info.rows != 0) { // process the rest of the data - ASSERT(IS_FINAL_OP(pInfo)); printDataBlock(pInfo->pPullDataRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); return pInfo->pPullDataRes; } @@ -2543,7 +2537,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { pInfo->numOfDatapack++; printDataBlock(pBlock, IS_FINAL_OP(pInfo) ? "interval final recv" : "interval semi recv"); - ASSERT(pBlock->info.type != STREAM_INVERT); if (pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_PULL_DATA) { pInfo->binfo.pRes->info.type = pBlock->info.type; } else if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || @@ -2633,7 +2626,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); if (pInfo->pPullDataRes->info.rows != 0) { // process the rest of the data - ASSERT(IS_FINAL_OP(pInfo)); printDataBlock(pInfo->pPullDataRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); return pInfo->pPullDataRes; } @@ -2688,7 +2680,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, .deleteMarkSaved = 0, .calTriggerSaved = 0, }; - ASSERT(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY); + ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; initResultSizeInfo(&pOperator->resultInfo, 4096); @@ -2713,7 +2705,6 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, initStreamFunciton(pOperator->exprSupp.pCtx, pOperator->exprSupp.numOfExprs); - ASSERT(numOfCols > 0); initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); pInfo->pState = taosMemoryCalloc(1, sizeof(SStreamState)); @@ -2724,6 +2715,9 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->pChildren = NULL; if (numOfChild > 0) { pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); + if (!pInfo->pChildren) { + goto _error; + } for (int32_t i = 0; i < numOfChild; i++) { SOperatorInfo* pChildOp = createStreamFinalIntervalOperatorInfo(NULL, pPhyNode, pTaskInfo, 0); if (pChildOp) { @@ -2746,7 +2740,6 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, // semi interval operator does not catch result pInfo->isFinal = false; pOperator->name = "StreamSemiIntervalOperator"; - ASSERT(pInfo->aggSup.currentPageId == -1); } if (!IS_FINAL_OP(pInfo) || numOfChild == 0) { @@ -3162,15 +3155,6 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } } -void deleteWindow(SArray* pWinInfos, int32_t index, FDelete fp) { - ASSERT(index >= 0 && index < taosArrayGetSize(pWinInfos)); - if (fp) { - void* ptr = taosArrayGet(pWinInfos, index); - fp(ptr); - } - taosArrayRemove(pWinInfos, index); -} - static void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArray* result) { SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); TSKEY* startDatas = (TSKEY*)pStartTsCol->pData; @@ -3218,7 +3202,6 @@ static int32_t copyUpdateResult(SSHashObj* pStUpdated, SArray* pUpdated) { int32_t iter = 0; while ((pIte = tSimpleHashIterate(pStUpdated, pIte, &iter)) != NULL) { void* key = tSimpleHashGetKey(pIte, &keyLen); - ASSERT(keyLen == sizeof(SSessionKey)); taosArrayPush(pUpdated, key); } taosArraySort(pUpdated, sessionKeyCompareAsc); @@ -3279,7 +3262,6 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; int32_t numOfOutput = pSup->numOfExprs; int32_t numOfChildren = taosArrayGetSize(pInfo->pChildren); - ASSERT(pInfo->pChildren); for (int32_t i = 0; i < size; i++) { SSessionKey* pWinKey = taosArrayGet(pWinArray, i); @@ -3380,7 +3362,6 @@ static void copyDeleteWindowInfo(SArray* pResWins, SSHashObj* pStDeleted) { void initGroupResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayList) { pGroupResInfo->pRows = pArrayList; pGroupResInfo->index = 0; - ASSERT(pGroupResInfo->index <= getNumOfTotalRes(pGroupResInfo)); } void doBuildSessionResult(SOperatorInfo* pOperator, SStreamState* pState, SGroupResInfo* pGroupResInfo, @@ -4811,7 +4792,6 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys int32_t code = TSDB_CODE_SUCCESS; int32_t numOfCols = 0; SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); - ASSERT(numOfCols > 0); SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); SInterval interval = { @@ -4831,7 +4811,7 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys .deleteMark = getDeleteMark(pIntervalPhyNode), }; - ASSERT(twAggSupp.calTrigger != STREAM_TRIGGER_MAX_DELAY); + ASSERTS(twAggSupp.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); pOperator->pTaskInfo = pTaskInfo; pInfo->interval = interval; From e08797ac0a6274b393ac09f6765b98b73de70990 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 26 Dec 2022 19:43:37 +0800 Subject: [PATCH 35/82] fix: ctg lock/unlock logic --- source/libs/catalog/src/ctgAsync.c | 7 +++++-- source/libs/catalog/src/ctgCache.c | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index acd18fcca5..e9273c5b1f 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -471,16 +471,19 @@ int32_t ctgHandleForceUpdate(SCatalog* pCtg, int32_t taskNum, SCtgJob* pJob, con } int32_t ctgInitTask(SCtgJob* pJob, CTG_TASK_TYPE type, void* param, int32_t* taskId) { + int32_t code = 0; int32_t tid = atomic_fetch_add_32(&pJob->taskIdx, 1); CTG_LOCK(CTG_WRITE, &pJob->taskLock); - CTG_ERR_RET((*gCtgAsyncFps[type].initFp)(pJob, tid, param)); - CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); + CTG_ERR_JRET((*gCtgAsyncFps[type].initFp)(pJob, tid, param)); if (taskId) { *taskId = tid; } +_return: + CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); + return TSDB_CODE_SUCCESS; } diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index fe83854a91..c266cc1df9 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -2500,6 +2500,7 @@ int32_t ctgGetTbMetasFromCache(SCatalog *pCtg, SRequestConnInfo *pConn, SCtgTbMe CTG_LOCK(CTG_READ, &pCache->metaLock); if (NULL == pCache->pMeta) { + CTG_UNLOCK(CTG_READ, &pCache->metaLock); ctgDebug("tb %s meta not in cache, dbFName:%s", pName->tname, dbFName); ctgAddFetch(&ctx->pFetchs, dbIdx, i, fetchIdx, baseResIdx + i, flag); taosArraySetSize(ctx->pResList, taosArrayGetSize(ctx->pResList) + 1); From 5e6230fac6ca40bde9c1a1f82d67d11caa69dc95 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 26 Dec 2022 19:46:39 +0800 Subject: [PATCH 36/82] fix: ctg lock/unlock logic --- source/libs/catalog/src/ctgAsync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index e9273c5b1f..b8590c9255 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -484,7 +484,7 @@ int32_t ctgInitTask(SCtgJob* pJob, CTG_TASK_TYPE type, void* param, int32_t* tas _return: CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); - return TSDB_CODE_SUCCESS; + return code; } int32_t ctgInitJob(SCatalog* pCtg, SRequestConnInfo* pConn, SCtgJob** job, const SCatalogReq* pReq, catalogCallback fp, From d8922990788c401eb6e9eaa282f5675a73ee81b6 Mon Sep 17 00:00:00 2001 From: Xuefeng Tan <1172915550@qq.com> Date: Mon, 26 Dec 2022 20:02:26 +0800 Subject: [PATCH 37/82] fix(taosAdapter): invalid pointer on stack (#19156) --- cmake/taosadapter_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taosadapter_CMakeLists.txt.in b/cmake/taosadapter_CMakeLists.txt.in index a47b3b0feb..31ca6b30fa 100644 --- a/cmake/taosadapter_CMakeLists.txt.in +++ b/cmake/taosadapter_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taosadapter ExternalProject_Add(taosadapter GIT_REPOSITORY https://github.com/taosdata/taosadapter.git - GIT_TAG f0c1753 + GIT_TAG 5662a6d SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From 794fb5d1b516800e5b6bde26367c851fcc0057b9 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 26 Dec 2022 20:40:33 +0800 Subject: [PATCH 38/82] fix: handle error if sync buffer is full --- include/util/taoserror.h | 1 + source/dnode/vnode/src/vnd/vnodeSync.c | 4 ++-- source/libs/sync/inc/syncPipeline.h | 2 ++ source/libs/sync/src/syncMain.c | 10 +++++++--- source/libs/sync/src/syncPipeline.c | 23 +++++++++++++---------- source/util/src/terror.c | 1 + 6 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 3823252de6..b315432be1 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -520,6 +520,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SYN_BATCH_ERROR TAOS_DEF_ERROR_CODE(0, 0x0913) #define TSDB_CODE_SYN_RESTORING TAOS_DEF_ERROR_CODE(0, 0x0914) #define TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG TAOS_DEF_ERROR_CODE(0, 0x0915) // internal +#define TSDB_CODE_SYN_BUFFER_FULL TAOS_DEF_ERROR_CODE(0, 0x0916) // #define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF) // tq diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 0437703c92..5697487743 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -391,9 +391,9 @@ static int32_t vnodeSyncApplyMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, const SFsm const STraceId *trace = &pMsg->info.traceId; vGTrace("vgId:%d, commit-cb is excuted, fsm:%p, index:%" PRId64 ", term:%" PRIu64 ", msg-index:%" PRId64 - ", weak:%d, code:%d, state:%d %s, type:%s", + ", weak:%d, code:%d, state:%d %s, type:%s code:0x%x", pVnode->config.vgId, pFsm, pMeta->index, pMeta->term, pMsg->info.conn.applyIndex, pMeta->isWeak, pMeta->code, - pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType)); + pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType), pMsg->code); return tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, pMsg); } diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index 9188be2b42..a0a0691694 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -109,6 +109,8 @@ SSyncRaftEntry* syncLogBufferGetOneEntry(SSyncLogBuffer* pBuf, SSyncNode* pNode, int32_t syncLogBufferValidate(SSyncLogBuffer* pBuf); int32_t syncLogBufferRollback(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncIndex toIndex); +int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry, + int32_t applyCode); #ifdef __cplusplus } #endif diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 152e16bd2e..636890b5aa 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2386,7 +2386,11 @@ int32_t syncCacheEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, LRUHand int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) { // append to log buffer if (syncLogBufferAppend(ths->pLogBuf, ths, pEntry) < 0) { - sError("vgId:%d, failed to enqueue sync log buffer. index:%" PRId64 "", ths->vgId, pEntry->index); + sError("vgId:%d, failed to enqueue sync log buffer, index:%" PRId64, ths->vgId, pEntry->index); + terrno = TSDB_CODE_SYN_BUFFER_FULL; + (void)syncLogFsmExecute(ths, ths->pFsm, ths->state, ths->pRaftStore->currentTerm, pEntry, + TSDB_CODE_SYN_BUFFER_FULL); + syncEntryDestroy(pEntry); return -1; } @@ -2685,8 +2689,8 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn } int32_t code = syncNodeAppend(ths, pEntry); - if (code < 0 && ths->vgId != 1 && vnodeIsMsgBlock(pEntry->originalRpcType)) { - ASSERTS(false, "failed to append blocking msg"); + if (code < 0) { + sNError(ths, "failed to append blocking msg"); } return code; } diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index f2c86cef19..ef37600e98 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -26,6 +26,11 @@ #include "syncSnapshot.h" #include "syncUtil.h" +static bool syncIsMsgBlock(tmsg_t type) { + return (type == TDMT_VND_CREATE_TABLE) || (type == TDMT_VND_ALTER_TABLE) || (type == TDMT_VND_DROP_TABLE) || + (type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM); +} + int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf) { taosThreadMutexLock(&pBuf->mutex); int64_t index = pBuf->endIndex; @@ -441,26 +446,25 @@ _out: return matchIndex; } -int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry) { - ASSERTS(pFsm->FpCommitCb != NULL, "No commit cb registered for the FSM"); - +int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry, + int32_t applyCode) { if ((pNode->replicaNum == 1) && pNode->restoreFinish && pNode->vgId != 1) { return 0; } - if (pNode->vgId != 1 && vnodeIsMsgBlock(pEntry->originalRpcType)) { - sTrace("vgId:%d, blocking msg ready to execute. index:%" PRId64 ", term: %" PRId64 ", type: %s", pNode->vgId, - pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType)); + if (pNode->vgId != 1 && syncIsMsgBlock(pEntry->originalRpcType)) { + sTrace("vgId:%d, blocking msg ready to execute, index:%" PRId64 ", term:%" PRId64 ", type:%s code:0x%x", + pNode->vgId, pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType), applyCode); } - SRpcMsg rpcMsg = {0}; + SRpcMsg rpcMsg = {.code = applyCode}; syncEntry2OriginalRpc(pEntry, &rpcMsg); SFsmCbMeta cbMeta = {0}; cbMeta.index = pEntry->index; cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(pNode, pEntry->index); cbMeta.isWeak = pEntry->isWeak; - cbMeta.code = 0; + cbMeta.code = applyCode; cbMeta.state = role; cbMeta.seqNum = pEntry->seqNum; cbMeta.term = pEntry->term; @@ -469,7 +473,6 @@ int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, Syn (void)syncRespMgrGetAndDel(pNode->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); int32_t code = pFsm->FpCommitCb(pFsm, &rpcMsg, &cbMeta); - ASSERT(rpcMsg.pCont == NULL); return code; } @@ -520,7 +523,7 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm pEntry->term, TMSG_INFO(pEntry->originalRpcType)); } - if (syncLogFsmExecute(pNode, pFsm, role, term, pEntry) != 0) { + if (syncLogFsmExecute(pNode, pFsm, role, term, pEntry, 0) != 0) { sError("vgId:%d, failed to execute sync log entry. index:%" PRId64 ", term:%" PRId64 ", role: %d, current term: %" PRId64, vgId, pEntry->index, pEntry->term, role, term); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 2d7121e2ae..ff61c7cdc4 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -407,6 +407,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_STANDBY_NOT_READY, "Sync not ready for st TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BATCH_ERROR, "Sync batch error") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RESTORING, "Sync is restoring") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG, "Sync invalid snapshot msg") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BUFFER_FULL, "Sync buffer is full") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INTERNAL_ERROR, "Sync internal error") //tq From a7bfeb5fbe1492bb9c486601cedd980ba16044ef Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 26 Dec 2022 23:10:46 +0800 Subject: [PATCH 39/82] fix: check existence of SMsgHead in SSyncRaftEntry data or SRpcMsg pCont --- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 8 ++++++++ source/libs/sync/src/syncMain.c | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 7e3915f3d1..faa94a335d 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -233,6 +233,14 @@ int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { + if (pRpc->contLen < sizeof(SMsgHead)) { + dError("invalid rpc msg since no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType), + pRpc->contLen); + rpcFreeCont(pRpc->pCont); + pRpc->pCont = NULL; + return -1; + } + SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg), RPC_QITEM, pRpc->contLen); if (pMsg == NULL) { rpcFreeCont(pRpc->pCont); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 152e16bd2e..b071aec2f4 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2384,6 +2384,13 @@ int32_t syncCacheEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, LRUHand } int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) { + if (pEntry->dataLen < sizeof(SMsgHead)) { + sError("vgId:%d, cannot append an invalid client request with no msg head. type:%s, dataLen:%d", ths->vgId, + TMSG_INFO(pEntry->originalRpcType), pEntry->dataLen); + syncEntryDestroy(pEntry); + return -1; + } + // append to log buffer if (syncLogBufferAppend(ths->pLogBuf, ths, pEntry) < 0) { sError("vgId:%d, failed to enqueue sync log buffer. index:%" PRId64 "", ths->vgId, pEntry->index); @@ -2679,16 +2686,21 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index); } + if (pEntry == NULL) { + sError("vgId:%d, failed to process client request since %s.", ths->vgId, terrstr()); + return -1; + } + if (ths->state == TAOS_SYNC_STATE_LEADER) { if (pRetIndex) { (*pRetIndex) = index; } int32_t code = syncNodeAppend(ths, pEntry); - if (code < 0 && ths->vgId != 1 && vnodeIsMsgBlock(pEntry->originalRpcType)) { - ASSERTS(false, "failed to append blocking msg"); - } return code; + } else { + syncEntryDestroy(pEntry); + pEntry = NULL; } return -1; From 58b5d2e0aafc4a0cfbc86c2b917cb4d630beba64 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 27 Dec 2022 11:02:07 +0800 Subject: [PATCH 40/82] fix(query): fix race condition. --- source/dnode/vnode/src/meta/metaCache.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 513ee5a1c2..d68658b0d9 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -57,7 +57,6 @@ struct SMetaCache { TdThreadMutex lock; SHashObj* pTableEntry; SLRUCache* pUidResCache; - uint64_t keyBuf[3]; } sTagFilterResCache; }; @@ -429,20 +428,20 @@ int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pK bool* acquireRes) { // generate the composed key for LRU cache SLRUCache* pCache = pMeta->pCache->sTagFilterResCache.pUidResCache; - uint64_t* pBuf = pMeta->pCache->sTagFilterResCache.keyBuf; SHashObj* pTableMap = pMeta->pCache->sTagFilterResCache.pTableEntry; TdThreadMutex* pLock = &pMeta->pCache->sTagFilterResCache.lock; + uint64_t buf[3] = {0}; uint32_t times = 0; *acquireRes = 0; - pBuf[0] = suid; - memcpy(&pBuf[1], pKey, keyLen); + buf[0] = suid; + memcpy(&buf[1], pKey, keyLen); taosThreadMutexLock(pLock); int32_t len = keyLen + sizeof(uint64_t); - LRUHandle* pHandle = taosLRUCacheLookup(pCache, pBuf, len); + LRUHandle* pHandle = taosLRUCacheLookup(pCache, buf, len); if (pHandle == NULL) { taosThreadMutexUnlock(pLock); return TSDB_CODE_SUCCESS; @@ -476,10 +475,10 @@ int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pK SListNode* pNode = NULL; while ((pNode = tdListNext(&iter)) != NULL) { - memcpy(&pBuf[1], pNode->data, keyLen); + memcpy(&buf[1], pNode->data, keyLen); // check whether it is existed in LRU cache, and remove it from linked list if not. - LRUHandle* pRes = taosLRUCacheLookup(pCache, pBuf, len); + LRUHandle* pRes = taosLRUCacheLookup(pCache, buf, len); if (pRes == NULL) { // remove the item in the linked list taosArrayPush(pInvalidRes, &pNode); } else { @@ -547,14 +546,14 @@ int32_t metaUidFilterCachePut(SMeta* pMeta, uint64_t suid, const void* pKey, int tdListAppend(&(*pEntry)->list, pKey); } - uint64_t* pBuf = pMeta->pCache->sTagFilterResCache.keyBuf; - pBuf[0] = suid; + uint64_t buf[3] = {0}; + buf[0] = suid; - memcpy(&pBuf[1], pKey, keyLen); + memcpy(&buf[1], pKey, keyLen); ASSERT(sizeof(uint64_t) + keyLen == 24); // add to cache. - taosLRUCacheInsert(pCache, pBuf, sizeof(uint64_t) + keyLen, pPayload, payloadLen, freePayload, NULL, + taosLRUCacheInsert(pCache, buf, sizeof(uint64_t) + keyLen, pPayload, payloadLen, freePayload, NULL, TAOS_LRU_PRIORITY_LOW); taosThreadMutexUnlock(pLock); From 5826e40aa7f228c31187c839456022fc483e2f44 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 11:19:36 +0800 Subject: [PATCH 41/82] enh: drop stream eagerly --- source/libs/stream/src/streamExec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 20608a6cf3..8e746c6738 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -48,6 +48,10 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* // exec while (1) { + if (pTask->taskStatus == TASK_STATUS__DROPPING) { + return 0; + } + SSDataBlock* output = NULL; uint64_t ts = 0; if ((code = qExecTask(exec, &output, &ts)) < 0) { From bb9f2651f1a8b34ec843e10ebb4fd5f8929cc458 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Tue, 27 Dec 2022 13:03:26 +0800 Subject: [PATCH 42/82] chore: add comp postfix for taos-tools (#19169) --- packaging/tools/makepkg.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index f30a8a637e..0ee548242f 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -348,7 +348,8 @@ cd ${release_dir} # install_dir has been distinguishes cluster from edege, so comments this code pkg_name=${install_dir}-${osType}-${cpuType} -taostools_pkg_name=${taostools_install_dir}-${osType}-${cpuType} +versionCompFirst=$(echo ${versionComp} | awk -F '.' '{print $1}') +taostools_pkg_name=${taostools_install_dir}-${osType}-${cpuType}-comp${versionCompFirst} # if [ "$verMode" == "cluster" ]; then # pkg_name=${install_dir}-${osType}-${cpuType} From 5ae6829a8379cfb1191697d381fb305d3343d01a Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 27 Dec 2022 13:28:28 +0800 Subject: [PATCH 43/82] fix: add sem free and init log --- source/libs/scheduler/src/schJob.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/libs/scheduler/src/schJob.c b/source/libs/scheduler/src/schJob.c index d422f0e88f..6a8f81f8c7 100644 --- a/source/libs/scheduler/src/schJob.c +++ b/source/libs/scheduler/src/schJob.c @@ -668,6 +668,7 @@ void schFreeJobImpl(void *job) { taosMemoryFreeClear(pJob->userRes.execRes); taosMemoryFreeClear(pJob->fetchRes); taosMemoryFreeClear(pJob->sql); + tsem_destroy(&pJob->rspSem); taosMemoryFree(pJob); int32_t jobNum = atomic_sub_fetch_32(&schMgmt.jobNum, 1); @@ -748,7 +749,10 @@ int32_t schInitJob(int64_t *pJobId, SSchedulerReq *pReq) { SCH_ERR_JRET(TSDB_CODE_OUT_OF_MEMORY); } - tsem_init(&pJob->rspSem, 0, 0); + if (tsem_init(&pJob->rspSem, 0, 0)) { + SCH_JOB_ELOG("tsem_init failed, errno:%d", errno); + SCH_ERR_JRET(TSDB_CODE_OUT_OF_MEMORY); + } pJob->refId = taosAddRef(schMgmt.jobRef, pJob); if (pJob->refId < 0) { From b2cec0f1e9c3c4f69fbb156513f22170dea2835e Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 13:57:00 +0800 Subject: [PATCH 44/82] fix: skip msg --- source/dnode/vnode/src/tq/tq.c | 4 +++- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 045b497371..e366795fd3 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1004,8 +1004,10 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { int32_t len; tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code); if (code < 0) { - ASSERT(0); + tqDebug("tq encode stream check rsp error"); + return -1; } + void* buf = rpcMallocCont(sizeof(SMsgHead) + len); ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index fe9aad4a20..8d53579483 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -197,7 +197,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp if (!syncUtilUserCommit(pMsg->msgType)) goto _exit; - if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE) { + if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE || pMsg->msgType == TDMT_STREAM_TASK_CHECK_RSP) { if (tqCheckLogInWal(pVnode->pTq, version)) return 0; } From b7f6d152f86431da4f1a0841778a463783bbedd1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 14:14:18 +0800 Subject: [PATCH 45/82] fix: display the correct log information --- source/common/src/tglobal.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9caf0cc33e..ad89428d07 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1316,12 +1316,17 @@ void taosSetDebugFlag(int32_t *pFlagPtr, const char *flagName, int32_t flagVal, if (pItem != NULL && (rewrite || pItem->i32 == 0)) { pItem->i32 = flagVal; } - *pFlagPtr = flagVal; + if (pFlagPtr != NULL) { + *pFlagPtr = flagVal; + } } void taosSetAllDebugFlag(int32_t flag, bool rewrite) { if (flag <= 0) return; + taosSetDebugFlag(NULL, "debugFlag", flag, rewrite); + taosSetDebugFlag(NULL, "simDebugFlag", flag, rewrite); + taosSetDebugFlag(NULL, "tmrDebugFlag", flag, rewrite); taosSetDebugFlag(&uDebugFlag, "uDebugFlag", flag, rewrite); taosSetDebugFlag(&rpcDebugFlag, "rpcDebugFlag", flag, rewrite); taosSetDebugFlag(&jniDebugFlag, "jniDebugFlag", flag, rewrite); @@ -1333,6 +1338,7 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&wDebugFlag, "wDebugFlag", flag, rewrite); taosSetDebugFlag(&sDebugFlag, "sDebugFlag", flag, rewrite); taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); + taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); taosSetDebugFlag(&tqDebugFlag, "tqDebugFlag", flag, rewrite); taosSetDebugFlag(&fsDebugFlag, "fsDebugFlag", flag, rewrite); taosSetDebugFlag(&udfDebugFlag, "udfDebugFlag", flag, rewrite); @@ -1340,6 +1346,5 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&idxDebugFlag, "idxDebugFlag", flag, rewrite); taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite); taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite); - taosSetDebugFlag(&metaDebugFlag, "tmrDebugFlag", flag, rewrite); uInfo("all debug flag are set to %d", flag); } From 861493010ea689b790defe795b376c619f2f00f1 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 14:15:20 +0800 Subject: [PATCH 46/82] fix: display the correct log information --- source/common/src/tglobal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ad89428d07..d3fd625a91 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1338,7 +1338,6 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&wDebugFlag, "wDebugFlag", flag, rewrite); taosSetDebugFlag(&sDebugFlag, "sDebugFlag", flag, rewrite); taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); - taosSetDebugFlag(&tsdbDebugFlag, "tsdbDebugFlag", flag, rewrite); taosSetDebugFlag(&tqDebugFlag, "tqDebugFlag", flag, rewrite); taosSetDebugFlag(&fsDebugFlag, "fsDebugFlag", flag, rewrite); taosSetDebugFlag(&udfDebugFlag, "udfDebugFlag", flag, rewrite); From ab8efc828d4837fb5447355663772617e656669d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 14:23:09 +0800 Subject: [PATCH 47/82] enh: check existence of msg head at SRpcMsg pCont in vmPutMsgToQueue --- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index faa94a335d..5afb9a0512 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -140,6 +140,12 @@ static void vmProcessSyncQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOf static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtype) { const STraceId *trace = &pMsg->info.traceId; + if (pMsg->contLen < sizeof(SMsgHead)) { + dGError("invalid rpc msg with no msg head at pCont. pMsg:%p, type:%s, contLen:%d", pMsg, TMSG_INFO(pMsg->msgType), + pMsg->contLen); + return -1; + } + SMsgHead *pHead = pMsg->pCont; int32_t code = 0; @@ -234,7 +240,7 @@ int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { if (pRpc->contLen < sizeof(SMsgHead)) { - dError("invalid rpc msg since no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType), + dError("invalid rpc msg with no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType), pRpc->contLen); rpcFreeCont(pRpc->pCont); pRpc->pCont = NULL; From 39eb62d80d3fb20a37e9319b856f01a3770ae7c1 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 14:35:55 +0800 Subject: [PATCH 48/82] enh: set scan limit --- include/libs/executor/executor.h | 1 + source/libs/executor/inc/executorimpl.h | 128 +++++++++++++----------- source/libs/executor/src/executor.c | 4 + source/libs/executor/src/scanoperator.c | 44 +++++--- source/libs/stream/src/streamExec.c | 6 +- 5 files changed, 105 insertions(+), 78 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 412b4b4cf6..cfd5bd1ed7 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -213,6 +213,7 @@ int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver); int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver); int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); int32_t qStreamRestoreParam(qTaskInfo_t tinfo); +bool qStreamRecoverScanFinished(qTaskInfo_t tinfo); #ifdef __cplusplus } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 647da78a78..e3b4b57052 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -136,6 +136,7 @@ typedef struct { SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; int8_t recoverStep; + int8_t recoverScanFinished; SQueryTableDataCond tableCond; int64_t fillHistoryVer1; int64_t fillHistoryVer2; @@ -182,7 +183,7 @@ struct SExecTaskInfo { SSubplan* pSubplan; struct SOperatorInfo* pRoot; SLocalFetch localFetch; - SArray* pResultBlockList;// result block list + SArray* pResultBlockList; // result block list STaskStopInfo stopInfo; }; @@ -199,7 +200,7 @@ typedef struct SOperatorFpSet { __optr_fn_t getNextFn; __optr_fn_t cleanupFn; // call this function to release the allocated resources ASAP __optr_close_fn_t closeFn; - __optr_reqBuf_fn_t reqBufFn; // total used buffer for blocking operator + __optr_reqBuf_fn_t reqBufFn; // total used buffer for blocking operator __optr_encode_fn_t encodeResultRow; __optr_decode_fn_t decodeResultRow; __optr_explain_fn_t getExplainFn; @@ -255,22 +256,22 @@ typedef struct SLimitInfo { } SLimitInfo; typedef struct SExchangeInfo { - SArray* pSources; - SArray* pSourceDataInfo; - tsem_t ready; - void* pTransporter; + SArray* pSources; + SArray* pSourceDataInfo; + tsem_t ready; + void* pTransporter; // SArray, result block list, used to keep the multi-block that // passed by downstream operator - SArray* pResultBlockList; - SArray* pRecycledBlocks;// build a pool for small data block to avoid to repeatly create and then destroy. - SSDataBlock* pDummyBlock; // dummy block, not keep data - bool seqLoadData; // sequential load data or not, false by default - int32_t current; + SArray* pResultBlockList; + SArray* pRecycledBlocks; // build a pool for small data block to avoid to repeatly create and then destroy. + SSDataBlock* pDummyBlock; // dummy block, not keep data + bool seqLoadData; // sequential load data or not, false by default + int32_t current; SLoadRemoteDataInfo loadInfo; uint64_t self; SLimitInfo limitInfo; - int64_t openedTs; // start exec time stamp, todo: move to SLoadRemoteDataInfo + int64_t openedTs; // start exec time stamp, todo: move to SLoadRemoteDataInfo } SExchangeInfo; typedef struct SScanInfo { @@ -305,9 +306,9 @@ typedef struct { } SAggOptrPushDownInfo; typedef struct STableMetaCacheInfo { - SLRUCache* pTableMetaEntryCache; // 100 by default - uint64_t metaFetch; - uint64_t cacheHit; + SLRUCache* pTableMetaEntryCache; // 100 by default + uint64_t metaFetch; + uint64_t cacheHit; } STableMetaCacheInfo; typedef struct STableScanBase { @@ -325,46 +326,46 @@ typedef struct STableScanBase { } STableScanBase; typedef struct STableScanInfo { - STableScanBase base; - SScanInfo scanInfo; - int32_t scanTimes; - SSDataBlock* pResBlock; - SSampleExecInfo sample; // sample execution info - int32_t currentGroupId; - int32_t currentTable; - int8_t scanMode; - int8_t assignBlockUid; - bool hasGroupByTag; + STableScanBase base; + SScanInfo scanInfo; + int32_t scanTimes; + SSDataBlock* pResBlock; + SSampleExecInfo sample; // sample execution info + int32_t currentGroupId; + int32_t currentTable; + int8_t scanMode; + int8_t assignBlockUid; + bool hasGroupByTag; } STableScanInfo; typedef struct STableMergeScanInfo { - int32_t tableStartIndex; - int32_t tableEndIndex; - bool hasGroupId; - uint64_t groupId; - SArray* queryConds; // array of queryTableDataCond - STableScanBase base; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort - SArray* pSortInfo; - SSortHandle* pSortHandle; - SSDataBlock* pSortInputBlock; - int64_t startTs; // sort start time - SArray* sortSourceParams; - SLimitInfo limitInfo; - int64_t numOfRows; - SScanInfo scanInfo; - SSDataBlock* pResBlock; - SSampleExecInfo sample; // sample execution info - SSortExecInfo sortExecInfo; + int32_t tableStartIndex; + int32_t tableEndIndex; + bool hasGroupId; + uint64_t groupId; + SArray* queryConds; // array of queryTableDataCond + STableScanBase base; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SArray* pSortInfo; + SSortHandle* pSortHandle; + SSDataBlock* pSortInputBlock; + int64_t startTs; // sort start time + SArray* sortSourceParams; + SLimitInfo limitInfo; + int64_t numOfRows; + SScanInfo scanInfo; + SSDataBlock* pResBlock; + SSampleExecInfo sample; // sample execution info + SSortExecInfo sortExecInfo; } STableMergeScanInfo; typedef struct STagScanInfo { - SColumnInfo* pCols; - SSDataBlock* pRes; - SColMatchInfo matchInfo; - int32_t curPos; - SReadHandle readHandle; + SColumnInfo* pCols; + SSDataBlock* pRes; + SColMatchInfo matchInfo; + int32_t curPos; + SReadHandle readHandle; } STagScanInfo; typedef enum EStreamScanMode { @@ -468,6 +469,11 @@ typedef struct SStreamScanInfo { SNodeList* pGroupTags; SNode* pTagCond; SNode* pTagIndexCond; + + // recover + int32_t blockRecoverContiCnt; + int32_t blockRecoverTotCnt; + } SStreamScanInfo; typedef struct { @@ -499,8 +505,8 @@ typedef struct STableCountScanOperatorInfo { STableCountScanSupp supp; - int32_t currGrpIdx; - SArray* stbUidList; // when group by db_name and/or stable_name + int32_t currGrpIdx; + SArray* stbUidList; // when group by db_name and/or stable_name } STableCountScanOperatorInfo; typedef struct SOptrBasicInfo { @@ -678,19 +684,19 @@ void setOperatorInfo(SOperatorInfo* pOperator, const char* name, int32 void destroyOperatorInfo(SOperatorInfo* pOperator); int32_t optrDefaultBufFn(SOperatorInfo* pOperator); -void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); -void cleanupBasicInfo(SOptrBasicInfo* pInfo); +void initBasicInfo(SOptrBasicInfo* pInfo, SSDataBlock* pBlock); +void cleanupBasicInfo(SOptrBasicInfo* pInfo); int32_t initExprSupp(SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfExpr); void cleanupExprSupp(SExprSupp* pSup); -void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs); +void destroyExprInfo(SExprInfo* pExpr, int32_t numOfExprs); int32_t initAggSup(SExprSupp* pSup, SAggSupporter* pAggSup, SExprInfo* pExprInfo, int32_t numOfCols, size_t keyBufSize, const char* pkey); void cleanupAggSup(SAggSupporter* pAggSup); -void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); +void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pBuf); @@ -803,10 +809,10 @@ void setInputDataBlock(SExprSupp* pExprSupp, SSDataBlock* pBlock, int32_t order, int32_t checkForQueryBuf(size_t numOfTables); -bool isTaskKilled(SExecTaskInfo* pTaskInfo); -void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); -void doDestroyTask(SExecTaskInfo* pTaskInfo); -void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); +bool isTaskKilled(SExecTaskInfo* pTaskInfo); +void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); +void doDestroyTask(SExecTaskInfo* pTaskInfo); +void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHandle* pHandle, uint64_t taskId, char* sql, EOPTR_EXEC_MODEL model); @@ -828,8 +834,8 @@ bool isDeletedWindow(STimeWindow* pWin, uint64_t groupId, SAggSupporter* pSup); bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, SStreamState* pState, STimeWindowAggSupp* pTwSup); void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, uint64_t* pGp, void* pTbName); -uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); -void calBlockTbName(SStreamScanInfo* pInfo, SSDataBlock* pBlock); +uint64_t calGroupIdByData(SPartitionBySupporter* pParSup, SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t rowId); +void calBlockTbName(SStreamScanInfo* pInfo, SSDataBlock* pBlock); int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPosition, SExprSupp* pSup, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 75de012947..e5ff104d5c 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -936,6 +936,10 @@ int32_t qStreamRestoreParam(qTaskInfo_t tinfo) { } return 0; } +bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + return pTaskInfo->streamInfo.recoverScanFinished; +} void* qExtractReaderFromStreamScanner(void* scanner) { SStreamScanInfo* pInfo = scanner; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d074ceede8..79687a5ff8 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -768,8 +768,8 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) { tableListGetGroupList(pTaskInfo->pTableInfoList, pInfo->currentGroupId, &pList, &num); ASSERT(pInfo->base.dataReader == NULL); - int32_t code = tsdbReaderOpen(pInfo->base.readHandle.vnode, &pInfo->base.cond, pList, num, - pInfo->pResBlock, (STsdbReader**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo)); + int32_t code = tsdbReaderOpen(pInfo->base.readHandle.vnode, &pInfo->base.cond, pList, num, pInfo->pResBlock, + (STsdbReader**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { T_LONG_JMP(pTaskInfo->env, code); } @@ -986,8 +986,8 @@ static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbU SSDataBlock* pBlock = pTableScanInfo->pResBlock; STsdbReader* pReader = NULL; - int32_t code = tsdbReaderOpen(pTableScanInfo->base.readHandle.vnode, &cond, &tblInfo, 1, pBlock, (STsdbReader**)&pReader, - GET_TASKID(pTaskInfo)); + int32_t code = tsdbReaderOpen(pTableScanInfo->base.readHandle.vnode, &cond, &tblInfo, 1, pBlock, + (STsdbReader**)&pReader, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { terrno = code; T_LONG_JMP(pTaskInfo->env, code); @@ -995,7 +995,7 @@ static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbU } if (tsdbNextDataBlock(pReader)) { - /*SSDataBlock* p = */tsdbRetrieveDataBlock(pReader, NULL); + /*SSDataBlock* p = */ tsdbRetrieveDataBlock(pReader, NULL); doSetTagColumnData(&pTableScanInfo->base, pBlock, pTaskInfo, pBlock->info.rows); pBlock->info.id.groupId = getTableGroupId(pTaskInfo->pTableInfoList, pBlock->info.id.uid); } @@ -1224,7 +1224,7 @@ static int32_t generateIntervalScanRange(SStreamScanInfo* pInfo, SSDataBlock* pS SColumnInfoData* pSrcUidCol = taosArrayGet(pSrcBlock->pDataBlock, UID_COLUMN_INDEX); SColumnInfoData* pSrcGpCol = taosArrayGet(pSrcBlock->pDataBlock, GROUPID_COLUMN_INDEX); - uint64_t* srcUidData = (uint64_t*)pSrcUidCol->pData; + uint64_t* srcUidData = (uint64_t*)pSrcUidCol->pData; ASSERT(pSrcStartTsCol->info.type == TSDB_DATA_TYPE_TIMESTAMP); TSKEY* srcStartTsCol = (TSKEY*)pSrcStartTsCol->pData; TSKEY* srcEndTsCol = (TSKEY*)pSrcEndTsCol->pData; @@ -1753,11 +1753,18 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->scanTimes = 0; pTSInfo->currentGroupId = -1; pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN; + pTaskInfo->streamInfo.recoverScanFinished = false; } if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN) { + if (pInfo->blockRecoverContiCnt > 100) { + pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt; + pInfo->blockRecoverContiCnt = 0; + return NULL; + } SSDataBlock* pBlock = doTableScan(pInfo->pTableScanOp); if (pBlock != NULL) { + pInfo->blockRecoverContiCnt++; calBlockTbName(pInfo, pBlock); if (pInfo->pUpdateInfo) { TSKEY maxTs = updateInfoFillBlockData(pInfo->pUpdateInfo, pBlock, pInfo->primaryTsIndex); @@ -1775,6 +1782,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->base.cond.startVersion = -1; pTSInfo->base.cond.endVersion = -1; + pTaskInfo->streamInfo.recoverScanFinished = true; return NULL; } @@ -2285,7 +2293,8 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys if (pHandle->initTableReader) { pTSInfo->scanMode = TABLE_SCAN__TABLE_ORDER; pTSInfo->base.dataReader = NULL; - code = tsdbReaderOpen(pHandle->vnode, &pTSInfo->base.cond, pList, num, pTSInfo->pResBlock, &pTSInfo->base.dataReader, NULL); + code = tsdbReaderOpen(pHandle->vnode, &pTSInfo->base.cond, pList, num, pTSInfo->pResBlock, + &pTSInfo->base.dataReader, NULL); if (code != 0) { terrno = code; destroyTableScanOperatorInfo(pTableScanOp); @@ -2355,7 +2364,8 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pOperator->exprSupp.numOfExprs = taosArrayGetSize(pInfo->pRes->pDataBlock); __optr_fn_t nextFn = pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM ? doStreamScan : doQueueScan; - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL); return pOperator; @@ -2492,7 +2502,8 @@ SOperatorInfo* createTagScanOperatorInfo(SReadHandle* pReadHandle, STagScanPhysi initResultSizeInfo(&pOperator->resultInfo, 4096); blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doTagScan, NULL, destroyTagScanOperatorInfo, optrDefaultBufFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doTagScan, NULL, destroyTagScanOperatorInfo, optrDefaultBufFn, NULL); return pOperator; @@ -2513,11 +2524,12 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { SQueryTableDataCond* pQueryCond = taosArrayGet(pInfo->queryConds, readIdx); - int64_t st = taosGetTimestampUs(); - void* p = tableListGetInfo(pTaskInfo->pTableInfoList, readIdx + pInfo->tableStartIndex); + int64_t st = taosGetTimestampUs(); + void* p = tableListGetInfo(pTaskInfo->pTableInfoList, readIdx + pInfo->tableStartIndex); SReadHandle* pHandle = &pInfo->base.readHandle; - int32_t code = tsdbReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, &pInfo->base.dataReader, GET_TASKID(pTaskInfo)); + int32_t code = + tsdbReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, &pInfo->base.dataReader, GET_TASKID(pTaskInfo)); if (code != 0) { T_LONG_JMP(pTaskInfo->env, code); } @@ -2915,8 +2927,8 @@ static void buildVnodeGroupedNtbTableCount(STableCountScanOperatorInfo* SSDataBlock* pRes, char* dbName); static void buildVnodeFilteredTbCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, STableCountScanSupp* pSupp, SSDataBlock* pRes, char* dbName); -static void buildVnodeGroupedTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, - STableCountScanSupp* pSupp, SSDataBlock* pRes, int32_t vgId, char* dbName); +static void buildVnodeGroupedTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, + STableCountScanSupp* pSupp, SSDataBlock* pRes, int32_t vgId, char* dbName); static SSDataBlock* buildVnodeDbTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, STableCountScanSupp* pSupp, SSDataBlock* pRes); static void buildSysDbGroupedTableCount(SOperatorInfo* pOperator, STableCountScanOperatorInfo* pInfo, @@ -3041,8 +3053,8 @@ SOperatorInfo* createTableCountScanOperatorInfo(SReadHandle* readHandle, STableC setOperatorInfo(pOperator, "TableCountScanOperator", QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN, false, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = - createOperatorFpSet(optrDummyOpenFn, doTableCountScan, NULL, destoryTableCountScanOperator, optrDefaultBufFn, NULL); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doTableCountScan, NULL, destoryTableCountScanOperator, + optrDefaultBufFn, NULL); return pOperator; _error: diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 8e746c6738..3d42f759cb 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -116,7 +116,11 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { ASSERT(0); } if (output == NULL) { - finished = true; + if (qStreamRecoverScanFinished(exec)) { + finished = true; + } else { + qSetStreamOpOpen(exec); + } break; } From e5c4196c98324ebfa2c6caa233fe85948b7165e4 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Tue, 27 Dec 2022 17:19:47 +0800 Subject: [PATCH 49/82] fix:calculate next sliding window error --- source/libs/executor/src/timewindowoperator.c | 19 ++- tests/script/tsim/stream/basic3.sim | 4 +- tests/script/tsim/stream/sliding.sim | 117 ++++++++++++++++++ 3 files changed, 136 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 2af551b832..813da3c436 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2347,6 +2347,17 @@ void doBuildResult(SOperatorInfo* pOperator, SStreamState* pState, SSDataBlock* buildDataBlockFromGroupRes(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); } +static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, + TSKEY* primaryKeys, int32_t prevPosition) { + int32_t startPos = prevPosition + 1; + if (startPos == pDataBlockInfo->rows) { + startPos = -1; + } else { + *pNext = getFinalTimeWindow(primaryKeys[startPos], pInterval); + } + return startPos; +} + static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId, SHashObj* pUpdatedMap) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info; @@ -2457,8 +2468,12 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p } int32_t prevEndPos = (forwardRows - 1) * step + startPos; ASSERT(pSDataBlock->info.window.skey > 0 && pSDataBlock->info.window.ekey > 0); - startPos = - getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); + if (IS_FINAL_OP(pInfo)) { + startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos); + } else { + startPos = + getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); + } if (startPos < 0) { break; } diff --git a/tests/script/tsim/stream/basic3.sim b/tests/script/tsim/stream/basic3.sim index 48fb860a72..41e19b19af 100644 --- a/tests/script/tsim/stream/basic3.sim +++ b/tests/script/tsim/stream/basic3.sim @@ -1,7 +1,7 @@ system sh/stop_dnodes.sh system sh/deploy.sh -n dnode1 -i 1 -system sh/cfg.sh -n dnode1 -c debugflag -v 131 -system sh/exec.sh -n dnode1 -s start -v +system sh/cfg.sh -n dnode1 -c debugflag 131 +system sh/exec.sh -n dnode1 -s start sleep 5000 diff --git a/tests/script/tsim/stream/sliding.sim b/tests/script/tsim/stream/sliding.sim index c9a1ddd922..8287274cd2 100644 --- a/tests/script/tsim/stream/sliding.sim +++ b/tests/script/tsim/stream/sliding.sim @@ -672,6 +672,123 @@ if $data61 != 1 then goto loop5 endi +print step 8 + +sql drop stream IF EXISTS streams4; +sql drop database IF EXISTS test4; + +sql create database test4 vgroups 6; +sql use test4; +sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); + +sql create stream streams4 trigger at_once into streamt4 as select _wstart as ts, count(*),min(a) c1 from st interval(10s) sliding(5s); + +sql insert into t1 values(1648791213000,1,1,1,1.0); +sql insert into t1 values(1648791243000,2,1,1,1.0); + +sql insert into t2 values(1648791273000,3,1,1,1.0); +sql insert into t2 values(1648791313000,4,1,1,1.0); + +$loop_count = 0 + +loop6: +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +sql select * from streamt4 order by 1; + +# row 0 +if $rows != 8 then + print ====loop6=rows=$rows + goto loop6 +endi + +if $data01 != 1 then + print ====loop6=data01=$data01 + goto loop6 +endi + +if $data02 != 1 then + print ====loop6=data02=$data02 + return -1 +endi + +if $data11 != 1 then + print ====loop6=data11=$data11 + goto loop6 +endi + +if $data12 != 1 then + print ====loop6=data12=$data12 + return -1 +endi + +if $data21 != 1 then + print ====loop6=data21=$data21 + goto loop6 +endi + +if $data22 != 2 then + print ====loop6=data22=$data22 + return -1 +endi + +if $data31 != 1 then + print ====loop6=data31=$data31 + goto loop6 +endi + +if $data32 != 2 then + print ====loop6=data32=$data32 + return -1 +endi + +if $data41 != 1 then + print ====loop6=data41=$data41 + goto loop6 +endi + +if $data42 != 3 then + print ====loop6=data42=$data42 + return -1 +endi + +if $data51 != 1 then + print ====loop6=data51=$data51 + goto loop6 +endi + +if $data52 != 3 then + print ====loop6=data52=$data52 + return -1 +endi + +if $data61 != 1 then + print ====loop6=data61=$data61 + return -1 +endi + +if $data62 != 4 then + print ====loop6=data62=$data62 + return -1 +endi + +if $data71 != 1 then + print ====loop6=data71=$data71 + return -1 +endi + +if $data72 != 4 then + print ====loop6=data72=$data72 + return -1 +endi + $loop_all = $loop_all + 1 print ============loop_all=$loop_all From f7b17a43a77dd63375545b1e53e663dbd908141f Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Tue, 27 Dec 2022 17:23:10 +0800 Subject: [PATCH 50/82] fix: taosbenchmark no vgroup if host specified for main (#19180) --- cmake/taostools_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index 0cc57d1246..e23ebb104b 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 261fcca + GIT_TAG 11b60a4 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From f608481ffb064e045f9a4704486f3e3b7954a37a Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Tue, 27 Dec 2022 16:49:23 +0800 Subject: [PATCH 51/82] fix(meta/assert/cleanup): remove asserts --- source/dnode/vnode/src/meta/metaCache.c | 28 +++++--- source/dnode/vnode/src/meta/metaEntry.c | 8 ++- source/dnode/vnode/src/meta/metaOpen.c | 5 +- source/dnode/vnode/src/meta/metaQuery.c | 12 +++- source/dnode/vnode/src/meta/metaSnapshot.c | 33 ++++++++-- source/dnode/vnode/src/meta/metaTable.c | 77 +++++++++++++++++----- 6 files changed, 127 insertions(+), 36 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 513ee5a1c2..bed34ea9be 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -55,9 +55,9 @@ struct SMetaCache { // query cache struct STagFilterResCache { TdThreadMutex lock; - SHashObj* pTableEntry; - SLRUCache* pUidResCache; - uint64_t keyBuf[3]; + SHashObj* pTableEntry; + SLRUCache* pUidResCache; + uint64_t keyBuf[3]; } sTagFilterResCache; }; @@ -211,7 +211,7 @@ _exit: int32_t metaCacheUpsert(SMeta* pMeta, SMetaInfo* pInfo) { int32_t code = 0; - // ASSERT(metaIsWLocked(pMeta)); + // meta is wlocked for calling this func. // search SMetaCache* pCache = pMeta->pCache; @@ -222,7 +222,10 @@ int32_t metaCacheUpsert(SMeta* pMeta, SMetaInfo* pInfo) { } if (*ppEntry) { // update - ASSERT(pInfo->suid == (*ppEntry)->info.suid); + if (pInfo->suid != (*ppEntry)->info.suid) { + metaError("meta/cache: suid should be same as the one in cache."); + return TSDB_CODE_FAILED; + } if (pInfo->version > (*ppEntry)->info.version) { (*ppEntry)->info.version = pInfo->version; (*ppEntry)->info.skmVer = pInfo->skmVer; @@ -341,7 +344,7 @@ _exit: int32_t metaStatsCacheUpsert(SMeta* pMeta, SMetaStbStats* pInfo) { int32_t code = 0; - // ASSERT(metaIsWLocked(pMeta)); + // meta is wlocked for calling this func. // search SMetaCache* pCache = pMeta->pCache; @@ -450,7 +453,11 @@ int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pK // do some book mark work after acquiring the filter result from cache STagFilterResEntry** pEntry = taosHashGet(pTableMap, &suid, sizeof(uint64_t)); - ASSERT(pEntry != NULL); + if (NULL == pEntry) { + metaError("meta/cache: pEntry should not be NULL."); + return TSDB_CODE_FAILED; + } + *acquireRes = 1; const char* p = taosLRUCacheValue(pCache, pHandle); @@ -495,7 +502,7 @@ int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pK taosMemoryFree(*p1); } - atomic_store_32(&(*pEntry)->qTimes, 0); // reset the query times + atomic_store_32(&(*pEntry)->qTimes, 0); // reset the query times taosArrayDestroy(pInvalidRes); taosThreadMutexUnlock(pLock); @@ -551,7 +558,10 @@ int32_t metaUidFilterCachePut(SMeta* pMeta, uint64_t suid, const void* pKey, int pBuf[0] = suid; memcpy(&pBuf[1], pKey, keyLen); - ASSERT(sizeof(uint64_t) + keyLen == 24); + if (sizeof(uint64_t) + keyLen != 24) { + metaError("meta/cache: incorrect keyLen:%" PRId32 " length.", keyLen); + return TSDB_CODE_FAILED; + } // add to cache. taosLRUCacheInsert(pCache, pBuf, sizeof(uint64_t) + keyLen, pPayload, payloadLen, freePayload, NULL, diff --git a/source/dnode/vnode/src/meta/metaEntry.c b/source/dnode/vnode/src/meta/metaEntry.c index 72f7365a1e..e50931ac06 100644 --- a/source/dnode/vnode/src/meta/metaEntry.c +++ b/source/dnode/vnode/src/meta/metaEntry.c @@ -51,7 +51,9 @@ int metaEncodeEntry(SEncoder *pCoder, const SMetaEntry *pME) { } else if (pME->type == TSDB_TSMA_TABLE) { if (tEncodeTSma(pCoder, pME->smaEntry.tsma) < 0) return -1; } else { - ASSERT(0); + metaError("meta/entry: invalide table type: %" PRId8 " encode failed.", pME->type); + + return -1; } tEndEncode(pCoder); @@ -99,7 +101,9 @@ int metaDecodeEntry(SDecoder *pCoder, SMetaEntry *pME) { } if (tDecodeTSma(pCoder, pME->smaEntry.tsma, true) < 0) return -1; } else { - ASSERT(0); + metaError("meta/entry: invalide table type: %" PRId8 " decode failed.", pME->type); + + return -1; } tEndDecode(pCoder); diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 1b5f742559..a78239eb56 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -358,7 +358,10 @@ static int tagIdxKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kL return -1; } - ASSERT(pTagIdxKey1->type == pTagIdxKey2->type); + if (pTagIdxKey1->type != pTagIdxKey2->type) { + metaError("meta/open: incorrect tag idx type."); + return TSDB_CODE_FAILED; + } // check NULL, NULL is always the smallest if (pTagIdxKey1->isNull && !pTagIdxKey2->isNull) { diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 6c468b89f6..c6e518e0ca 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -661,7 +661,11 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv goto _exit; } - ASSERT(c); + if (c == 0) { + metaError("meta/query: incorrect c: %" PRId32 ".", c); + code = TSDB_CODE_FAILED; + goto _exit; + } if (c < 0) { tdbTbcMoveToPrev(pSkmDbC); @@ -685,7 +689,11 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv } } - ASSERT(sver > 0); + if (sver <= 0) { + metaError("meta/query: incorrect sver: %" PRId32 ".", sver); + code = TSDB_CODE_FAILED; + goto _exit; + } skmDbKey.uid = suid ? suid : uid; skmDbKey.sver = sver; diff --git a/source/dnode/vnode/src/meta/metaSnapshot.c b/source/dnode/vnode/src/meta/metaSnapshot.c index 054e785980..d37b0ea3fa 100644 --- a/source/dnode/vnode/src/meta/metaSnapshot.c +++ b/source/dnode/vnode/src/meta/metaSnapshot.c @@ -100,7 +100,10 @@ int32_t metaSnapRead(SMetaSnapReader* pReader, uint8_t** ppData) { break; } - ASSERT(pData && nData); + if (!pData || !nData) { + metaError("meta/snap: invalide nData: %" PRId32 " meta snap read failed.", nData); + goto _exit; + } *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + nData); if (*ppData == NULL) { @@ -356,7 +359,11 @@ int32_t buildSnapContext(SMeta* pMeta, int64_t snapVersion, int64_t suid, int8_t for (int i = 0; i < taosArrayGetSize(ctx->idList); i++) { int64_t* uid = taosArrayGet(ctx->idList, i); SIdInfo* idData = (SIdInfo*)taosHashGet(ctx->idVersion, uid, sizeof(int64_t)); - ASSERT(idData); + if (!idData) { + metaError("meta/snap: null idData"); + return TSDB_CODE_FAILED; + } + idData->index = i; metaDebug("tmqsnap init idVersion uid:%" PRIi64 " version:%" PRIi64 " index:%d", *uid, idData->version, idData->index); @@ -473,7 +480,10 @@ int32_t getMetafromSnapShot(SSnapContext* ctx, void** pBuf, int32_t* contLen, in int64_t* uidTmp = taosArrayGet(ctx->idList, ctx->index); ctx->index++; SIdInfo* idInfo = (SIdInfo*)taosHashGet(ctx->idVersion, uidTmp, sizeof(tb_uid_t)); - ASSERT(idInfo); + if (!idInfo) { + metaError("meta/snap: null idInfo"); + return TSDB_CODE_FAILED; + } *uid = *uidTmp; ret = MoveToPosition(ctx, idInfo->version, *uidTmp); @@ -507,7 +517,11 @@ int32_t getMetafromSnapShot(SSnapContext* ctx, void** pBuf, int32_t* contLen, in (ctx->subType == TOPIC_SUB_TYPE__TABLE && me.type == TSDB_CHILD_TABLE && me.ctbEntry.suid == ctx->suid)) { STableInfoForChildTable* data = (STableInfoForChildTable*)taosHashGet(ctx->suidInfo, &me.ctbEntry.suid, sizeof(tb_uid_t)); - ASSERT(data); + if (!data) { + metaError("meta/snap: null data"); + return TSDB_CODE_FAILED; + } + SVCreateTbReq req = {0}; req.type = TSDB_CHILD_TABLE; @@ -528,7 +542,8 @@ int32_t getMetafromSnapShot(SSnapContext* ctx, void** pBuf, int32_t* contLen, in } else { SArray* pTagVals = NULL; if (tTagToValArray((const STag*)p, &pTagVals) != 0) { - ASSERT(0); + metaError("meta/snap: tag to val array failed."); + return TSDB_CODE_FAILED; } int16_t nCols = taosArrayGetSize(pTagVals); for (int j = 0; j < nCols; ++j) { @@ -572,7 +587,8 @@ int32_t getMetafromSnapShot(SSnapContext* ctx, void** pBuf, int32_t* contLen, in ret = buildNormalChildTableInfo(&req, pBuf, contLen); *type = TDMT_VND_CREATE_TABLE; } else { - ASSERT(0); + metaError("meta/snap: invalid topic sub type: %" PRId8 " get meta from snap failed.", ctx->subType); + ret = -1; } tDecoderClear(&dc); @@ -593,7 +609,10 @@ SMetaTableInfo getUidfromSnapShot(SSnapContext* ctx) { int64_t* uidTmp = taosArrayGet(ctx->idList, ctx->index); ctx->index++; SIdInfo* idInfo = (SIdInfo*)taosHashGet(ctx->idVersion, uidTmp, sizeof(tb_uid_t)); - ASSERT(idInfo); + if (!idInfo) { + metaError("meta/snap: null idInfo"); + return result; + } int32_t ret = MoveToPosition(ctx, idInfo->version, *uidTmp); if (ret != 0) { diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index 60a27bec10..0aaf6417a8 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -46,7 +46,7 @@ static void metaGetEntryInfo(const SMetaEntry *pEntry, SMetaInfo *pInfo) { pInfo->suid = 0; pInfo->skmVer = pEntry->ntbEntry.schemaRow.version; } else { - ASSERT(0); + metaError("meta/table: invalide table type: %" PRId8 " get entry info failed.", pEntry->type); } } @@ -342,10 +342,18 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); ret = tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = pReq->suid, .version = oversion}), sizeof(STbDbKey), &c); - ASSERT(ret == 0 && c == 0); + if (!(ret == 0 && c == 0)) { + metaError("meta/table: invalide ret: %" PRId32 " or c: %" PRId32 "alter stb failed.", ret, c); + return -1; + } ret = tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); - ASSERT(ret == 0); + if (ret < 0) { + tdbTbcClose(pTbDbc); + + terrno = TSDB_CODE_TDB_STB_NOT_EXIST; + return -1; + } oStbEntry.pBuf = taosMemoryMalloc(nData); memcpy(oStbEntry.pBuf, pData, nData); @@ -558,7 +566,8 @@ static void metaBuildTtlIdxKey(STtlIdxKey *ttlKey, const SMetaEntry *pME) { ctime = pME->ntbEntry.ctime; ttlDays = pME->ntbEntry.ttlDays; } else { - ASSERT(0); + metaError("meta/table: invalide table type: %" PRId8 " build ttl idx key failed.", pME->type); + return; } if (ttlDays <= 0) return; @@ -773,7 +782,10 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c); - ASSERT(c == 0); + if (c != 0) { + metaError("meta/table: invalide c: %" PRId32 " alt tb column failed.", c); + return -1; + } tdbTbcGet(pUidIdxc, NULL, NULL, &pData, &nData); oversion = ((SUidIdxVal *)pData)[0].version; @@ -783,7 +795,11 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c); - ASSERT(c == 0); + if (c != 0) { + metaError("meta/table: invalide c: %" PRId32 " alt tb column failed.", c); + return -1; + } + tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); // get table entry @@ -792,7 +808,11 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl memcpy(entry.pBuf, pData, nData); tDecoderInit(&dc, entry.pBuf, nData); ret = metaDecodeEntry(&dc, &entry); - ASSERT(ret == 0); + if (ret != 0) { + tDecoderClear(&dc); + metaError("meta/table: invalide ret: %" PRId32 " alt tb column failed.", ret); + return -1; + } if (entry.type != TSDB_NORMAL_TABLE) { terrno = TSDB_CODE_VND_INVALID_TABLE_ACTION; @@ -812,7 +832,11 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl if (iCol >= pSchema->nCols) break; pColumn = &pSchema->pSchema[iCol]; - ASSERT(pAlterTbReq->colName); + if (NULL == pAlterTbReq->colName) { + metaError("meta/table: null pAlterTbReq->colName"); + return -1; + } + if (strcmp(pColumn->name, pAlterTbReq->colName) == 0) break; iCol++; } @@ -964,7 +988,10 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c); - ASSERT(c == 0); + if (c != 0) { + metaError("meta/table: invalide c: %" PRId32 " update tb tag val failed.", c); + return -1; + } tdbTbcGet(pUidIdxc, NULL, NULL, &pData, &nData); oversion = ((SUidIdxVal *)pData)[0].version; @@ -977,7 +1004,11 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA /* get ctbEntry */ tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c); - ASSERT(c == 0); + if (c != 0) { + metaError("meta/table: invalide c: %" PRId32 " update tb tag val failed.", c); + return -1; + } + tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); ctbEntry.pBuf = taosMemoryMalloc(nData); @@ -1075,7 +1106,11 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA metaUpdateTagIdx(pMeta, &ctbEntry); } - ASSERT(ctbEntry.ctbEntry.pTags); + if (NULL == ctbEntry.ctbEntry.pTags) { + metaError("meta/table: null tags, update tag val failed."); + goto _err; + } + SCtbIdxKey ctbIdxKey = {.suid = ctbEntry.ctbEntry.suid, .uid = uid}; tdbTbUpsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), ctbEntry.ctbEntry.pTags, ((STag *)(ctbEntry.ctbEntry.pTags))->len, pMeta->txn); @@ -1130,7 +1165,10 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c); - ASSERT(c == 0); + if (c != 0) { + metaError("meta/table: invalide c: %" PRId32 " update tb options failed.", c); + return -1; + } tdbTbcGet(pUidIdxc, NULL, NULL, &pData, &nData); oversion = ((SUidIdxVal *)pData)[0].version; @@ -1140,7 +1178,11 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c); - ASSERT(c == 0); + if (c != 0) { + metaError("meta/table: invalide c: %" PRId32 " update tb options failed.", c); + return -1; + } + tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); // get table entry @@ -1149,7 +1191,11 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p memcpy(entry.pBuf, pData, nData); tDecoderInit(&dc, entry.pBuf, nData); ret = metaDecodeEntry(&dc, &entry); - ASSERT(ret == 0); + if (ret != 0) { + tDecoderClear(&dc); + metaError("meta/table: invalide ret: %" PRId32 " alt tb options failed.", ret); + return -1; + } entry.version = version; metaWLock(pMeta); @@ -1408,7 +1454,8 @@ static int metaSaveToSkmDb(SMeta *pMeta, const SMetaEntry *pME) { } else if (pME->type == TSDB_NORMAL_TABLE) { pSW = &pME->ntbEntry.schemaRow; } else { - ASSERT(0); + metaError("meta/table: invalide table type: %" PRId8 " save skm db failed.", pME->type); + return TSDB_CODE_FAILED; } skmDbKey.uid = pME->uid; From 71a28483762ed733c6d15be7c47bcbb919fa9625 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 17:51:52 +0800 Subject: [PATCH 52/82] fix: double free on append log failed --- source/libs/sync/src/syncPipeline.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index ef37600e98..ee649c268c 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -45,7 +45,7 @@ int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt if (index - pBuf->startIndex >= pBuf->size) { sError("vgId:%d, failed to append due to sync log buffer full. index:%" PRId64 "", pNode->vgId, index); - goto _out; + goto _err; } ASSERT(index == pBuf->endIndex); @@ -66,9 +66,8 @@ int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt taosThreadMutexUnlock(&pBuf->mutex); return 0; -_out: +_err: syncLogBufferValidate(pBuf); - syncEntryDestroy(pEntry); taosThreadMutexUnlock(&pBuf->mutex); return -1; } From bbab03274a903014910e3f29869bf51c569be87f Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 27 Dec 2022 18:02:43 +0800 Subject: [PATCH 53/82] fix memory leak --- source/libs/stream/src/streamDispatch.c | 3 --- source/libs/stream/src/streamRecover.c | 1 + source/libs/stream/src/streamTask.c | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 4e0b0630bc..f2b1db19e8 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -487,8 +487,6 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat taosMemoryFree(pReqs); } return code; - } else { - ASSERT(0); } return 0; } @@ -514,7 +512,6 @@ int32_t streamDispatch(SStreamTask* pTask) { int32_t code = 0; if (streamDispatchAllBlocks(pTask, pBlock) < 0) { - ASSERT(0); code = -1; streamQueueProcessFail(pTask->outputQueue); atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 6889a870d1..52777fd834 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -143,6 +143,7 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* ASSERT(left >= 0); if (left == 0) { taosArrayDestroy(pTask->checkReqIds); + pTask->checkReqIds = NULL; streamTaskLaunchRecover(pTask, version); } } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index f7252ed8a0..e9aba0bc39 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -175,6 +175,8 @@ void tFreeSStreamTask(SStreamTask* pTask) { } if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); + taosArrayDestroy(pTask->checkReqIds); + pTask->checkReqIds = NULL; } if (pTask->pState) streamStateClose(pTask->pState); From d949008543973100510e6066305091521de1300d Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Tue, 27 Dec 2022 18:53:06 +0800 Subject: [PATCH 54/82] test: add test case for tmq replica 3 --- tests/parallel_test/cases.task | 4 ++-- tests/system-test/7-tmq/subscribeDb.py | 12 ++++++++---- tests/system-test/7-tmq/subscribeDb0.py | 7 +++++-- tests/system-test/7-tmq/tmqCommon.py | 2 +- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 57fff095e8..f5f04ff1e9 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -677,8 +677,8 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/create_wrong_topic.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/dropDbR3ConflictTransaction.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/basic5.py -,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb.py -,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb0.py +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb.py -N 3 -n 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb0.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb1.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb2.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeDb3.py diff --git a/tests/system-test/7-tmq/subscribeDb.py b/tests/system-test/7-tmq/subscribeDb.py index fd06eedefd..0fa9bcfbd4 100644 --- a/tests/system-test/7-tmq/subscribeDb.py +++ b/tests/system-test/7-tmq/subscribeDb.py @@ -61,7 +61,7 @@ class TDTestCase: def insertConsumerInfo(self,consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifmanualcommit,cdbName='cdb'): sql = "insert into %s.consumeinfo values "%cdbName - sql += "(now, %d, '%s', '%s', %d, %d, %d)"%(consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) + sql += "(now + %ds, %d, '%s', '%s', %d, %d, %d)"%(consumerId, consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) tdLog.info("consume info sql: %s"%sql) tdSql.query(sql) @@ -174,12 +174,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() @@ -271,12 +272,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() @@ -337,6 +339,7 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath @@ -406,12 +409,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() diff --git a/tests/system-test/7-tmq/subscribeDb0.py b/tests/system-test/7-tmq/subscribeDb0.py index d4c5e2f87f..50ef52cb15 100644 --- a/tests/system-test/7-tmq/subscribeDb0.py +++ b/tests/system-test/7-tmq/subscribeDb0.py @@ -174,12 +174,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() @@ -191,6 +192,7 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath @@ -254,12 +256,13 @@ class TDTestCase: 'ctbNum': 10, \ 'rowsPerTbl': 5000, \ 'batchNum': 100, \ + 'replica': self.replicaVar, \ 'startTs': 1640966400000} # 2022-01-01 00:00:00.000 parameterDict['cfg'] = cfgPath self.initConsumerTable() - tdSql.execute("create database if not exists %s vgroups %d" %(parameterDict['dbName'], parameterDict['vgroups'])) + tdSql.execute("create database if not exists %s vgroups %d replica %d" %(parameterDict['dbName'], parameterDict['vgroups'], parameterDict['replica'])) prepareEnvThread = threading.Thread(target=self.prepareEnv, kwargs=parameterDict) prepareEnvThread.start() diff --git a/tests/system-test/7-tmq/tmqCommon.py b/tests/system-test/7-tmq/tmqCommon.py index 141d013270..4cda062401 100644 --- a/tests/system-test/7-tmq/tmqCommon.py +++ b/tests/system-test/7-tmq/tmqCommon.py @@ -60,7 +60,7 @@ class TMQCom: def insertConsumerInfo(self,consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifmanualcommit,cdbName='cdb'): sql = "insert into %s.consumeinfo values "%cdbName - sql += "(now, %d, '%s', '%s', %d, %d, %d)"%(consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) + sql += "(now + %ds, %d, '%s', '%s', %d, %d, %d)"%(consumerId, consumerId, topicList, keyList, expectrowcnt, ifcheckdata, ifmanualcommit) tdLog.info("consume info sql: %s"%sql) tdSql.query(sql) From 09681e37ca2ba7851585bb701712478b5659a937 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 28 Dec 2022 10:20:18 +0800 Subject: [PATCH 55/82] change systerm code and avoid mem leak --- source/dnode/mgmt/exe/dmMain.c | 5 +++++ source/libs/transport/src/transSvr.c | 15 ++++++++------- source/util/src/terror.c | 9 +++------ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index a8103351b4..a432201413 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -268,6 +268,11 @@ int mainWindows(int argc, char **argv) { if (dmInit() != 0) { dError("failed to init dnode since %s", terrstr()); + + taosCleanupCfg(); + taosCloseLog(); + taosCleanupArgs(); + taosConvDestroy(); return -1; } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 2b1f68d5f6..7384877313 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -1001,6 +1001,13 @@ void* transInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, uv_loop_init(srv->loop); char pipeName[PATH_MAX]; + + if (false == taosValidIpAndPort(srv->ip, srv->port)) { + terrno = TAOS_SYSTEM_ERROR(errno); + tError("invalid ip/port, %d:%d, reason:%s", srv->ip, srv->port, terrstr()); + goto End; + } + #if defined(WINDOWS) || defined(DARWIN) int ret = uv_pipe_init(srv->loop, &srv->pipeListen, 0); if (ret != 0) { @@ -1087,12 +1094,6 @@ void* transInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, } #endif - if (false == taosValidIpAndPort(srv->ip, srv->port)) { - terrno = TAOS_SYSTEM_ERROR(errno); - tError("invalid ip/port, %d:%d, reason:%s", srv->ip, srv->port, terrstr()); - goto End; - } - if (false == addHandleToAcceptloop(srv)) { goto End; } @@ -1185,8 +1186,8 @@ void transCloseServer(void* arg) { // impl later SServerObj* srv = arg; - tDebug("send quit msg to accept thread"); if (srv->inited) { + tDebug("send quit msg to accept thread"); uv_async_send(srv->pAcceptAsync); taosThreadJoin(srv->thread, NULL); SRV_RELEASE_UV(srv->loop); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index ff61c7cdc4..6f8b0d8e04 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -643,13 +643,10 @@ const char* tstrerror(int32_t err) { // this is a system errno if ((err & 0x00ff0000) == 0x00ff0000) { int32_t code = err & 0x0000ffff; - if (code >= 0 && code < 36) { - return strerror(code); - } else { - return "unknown err"; - } + // strerror can handle any invalid code + // invalid code return Unknown error + return strerror(code); } - int32_t s = 0; int32_t e = sizeof(errors) / sizeof(errors[0]); From eddaaf24e47b1606796cad4bea3802d0017d684b Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Wed, 28 Dec 2022 10:22:13 +0800 Subject: [PATCH 56/82] fix: escape error in create topic --- source/libs/parser/inc/parAst.h | 2 +- source/libs/parser/src/parAstCreater.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/parser/inc/parAst.h b/source/libs/parser/inc/parAst.h index ef67c7536f..20a2520a79 100644 --- a/source/libs/parser/inc/parAst.h +++ b/source/libs/parser/inc/parAst.h @@ -196,7 +196,7 @@ SNode* createDropComponentNodeStmt(SAstCreateContext* pCxt, ENodeType type, cons SNode* createCreateTopicStmtUseQuery(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, SNode* pQuery); SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, - const SToken* pSubDbName, bool withMeta); + SToken* pSubDbName, bool withMeta); SNode* createCreateTopicStmtUseTable(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, SNode* pRealTable, bool withMeta); SNode* createDropTopicStmt(SAstCreateContext* pCxt, bool ignoreNotExists, const SToken* pTopicName); diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index 4077c27840..17d8297cf0 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -1579,8 +1579,11 @@ SNode* createCreateTopicStmtUseQuery(SAstCreateContext* pCxt, bool ignoreExists, } SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, const SToken* pTopicName, - const SToken* pSubDbName, bool withMeta) { + SToken* pSubDbName, bool withMeta) { CHECK_PARSER_STATUS(pCxt); + if (!checkDbName(pCxt, pSubDbName, true)) { + return NULL; + } SCreateTopicStmt* pStmt = (SCreateTopicStmt*)nodesMakeNode(QUERY_NODE_CREATE_TOPIC_STMT); CHECK_OUT_OF_MEM(pStmt); COPY_STRING_FORM_ID_TOKEN(pStmt->topicName, pTopicName); From a29dea91a5d815dfaa5bdee0997af24d27d403a1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 28 Dec 2022 13:32:42 +0800 Subject: [PATCH 57/82] fix: covert scalar param to data block --- source/libs/function/src/tudf.c | 44 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index 0b309bc8f5..5232801dc8 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -841,36 +841,40 @@ int32_t convertScalarParamToDataBlock(SScalarParam *input, int32_t numOfCols, SS for (int32_t i = 0; i < numOfCols; ++i) { numOfRows = (input[i].numOfRows > numOfRows) ? input[i].numOfRows : numOfRows; } - output->info.rows = numOfRows; - output->pDataBlock = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); - for (int32_t i = 0; i < numOfCols; ++i) { - if ((input+i)->numOfRows < numOfRows) { - SColumnInfoData* pColInfoData = (input+i)->columnData; - int32_t startRow = (input+i)->numOfRows; - int32_t expandRows = numOfRows - startRow; - colInfoDataEnsureCapacity(pColInfoData, numOfRows, false); + + // create the basic block info structure + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pInfo = input[i].columnData; + SColumnInfoData d = {0}; + d.info = pInfo->info; + + blockDataAppendColInfo(output, &d); + } + + blockDataEnsureCapacity(output, numOfRows); + + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pDest = taosArrayGet(output->pDataBlock, i); + + SColumnInfoData* pColInfoData = input[i].columnData; + colDataAssign(pDest, pColInfoData, input[i].numOfRows, &output->info); + + if (input[i].numOfRows < numOfRows) { + int32_t startRow = input[i].numOfRows; + int expandRows = numOfRows - startRow; bool isNull = colDataIsNull_s(pColInfoData, (input+i)->numOfRows - 1); if (isNull) { - colDataAppendNNULL(pColInfoData, startRow, expandRows); + colDataAppendNNULL(pDest, startRow, expandRows); } else { char* src = colDataGetData(pColInfoData, (input + i)->numOfRows - 1); - int32_t bytes = pColInfoData->info.bytes; - char* data = taosMemoryMalloc(bytes); - memcpy(data, src, bytes); for (int j = 0; j < expandRows; ++j) { - colDataAppend(pColInfoData, startRow+j, data, false); + colDataAppend(pDest, startRow+j, src, false); } //colDataAppendNItems(pColInfoData, startRow, data, expandRows); - taosMemoryFree(data); } } - - taosArrayPush(output->pDataBlock, (input + i)->columnData); - - if (IS_VAR_DATA_TYPE((input + i)->columnData->info.type)) { - output->info.hasVarCol = true; - } } + return 0; } From ad1ba74646b705d6c7718ee3aa390adc29201470 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 28 Dec 2022 13:52:40 +0800 Subject: [PATCH 58/82] fix: add block data clean up for the udf input data block --- source/libs/function/src/tudf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index 5232801dc8..e8206399b2 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -1829,7 +1829,7 @@ int32_t doCallUdfScalarFunc(UdfcFuncHandle handle, SScalarParam *input, int32_t taosArrayDestroy(resultBlock.pDataBlock); } - taosArrayDestroy(inputBlock.pDataBlock); + blockDataCleanup(&inputBlock); return err; } From 47d6af81a6f0b893379d0d30f2f26a9dff8b9394 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Mon, 26 Dec 2022 12:07:15 +0800 Subject: [PATCH 59/82] fix: disable fma by default for old cpu --- cmake/cmake.define | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/cmake.define b/cmake/cmake.define index 542b4b4489..d32200bb91 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -141,13 +141,13 @@ ELSE () SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") ENDIF() - IF (COMPILER_SUPPORT_FMA) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") - ENDIF() IF ("${SIMD_SUPPORT}" MATCHES "true") - IF (COMPILER_SUPPORT_AVX) + IF (COMPILER_SUPPORT_FMA) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") + ENDIF() + IF (COMPILER_SUPPORT_AVX) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") ENDIF() From 2680ebff380b7ea2fcd81de091f18f21f85f1562 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 28 Dec 2022 14:31:12 +0800 Subject: [PATCH 60/82] fix: set row for data block --- source/libs/function/src/tudf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index e8206399b2..cbfc0e360b 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -875,6 +875,8 @@ int32_t convertScalarParamToDataBlock(SScalarParam *input, int32_t numOfCols, SS } } + output->info.rows = numOfRows; + return 0; } From ed98fddf7497b40eddbfe6f4ac559cf0aef0ce05 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 28 Dec 2022 14:37:56 +0800 Subject: [PATCH 61/82] enh: adjusting the operation mode of the stream thread pool --- docs/en/14-reference/12-config/index.md | 2 +- docs/zh/14-reference/12-config/index.md | 2 +- include/common/tglobal.h | 2 +- include/util/tworker.h | 22 ++- source/common/src/tglobal.c | 14 +- source/dnode/mgmt/mgmt_vnode/inc/vmInt.h | 28 ++-- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 13 +- source/util/src/tworker.c | 148 ++++++++++++++++++-- 8 files changed, 185 insertions(+), 46 deletions(-) diff --git a/docs/en/14-reference/12-config/index.md b/docs/en/14-reference/12-config/index.md index bb5516ae70..274c995098 100644 --- a/docs/en/14-reference/12-config/index.md +++ b/docs/en/14-reference/12-config/index.md @@ -733,7 +733,7 @@ To prevent system resource from being exhausted by multiple concurrent streams, | 42 | numOfCommitThreads | Yes | Yes | | 43 | numOfMnodeReadThreads | No | Yes | | 44 | numOfVnodeQueryThreads | No | Yes | -| 45 | numOfVnodeStreamThreads | No | Yes | +| 45 | ratioOfVnodeStreamThreads | No | Yes | | 46 | numOfVnodeFetchThreads | No | Yes | | 47 | numOfVnodeRsmaThreads | No | Yes | | 48 | numOfQnodeQueryThreads | No | Yes | diff --git a/docs/zh/14-reference/12-config/index.md b/docs/zh/14-reference/12-config/index.md index 145c5eed93..959438f48c 100644 --- a/docs/zh/14-reference/12-config/index.md +++ b/docs/zh/14-reference/12-config/index.md @@ -709,7 +709,7 @@ charset 的有效值是 UTF-8。 | 42 | numOfCommitThreads | 是 | 是 | | | 43 | numOfMnodeReadThreads | 否 | 是 | | | 44 | numOfVnodeQueryThreads | 否 | 是 | | -| 45 | numOfVnodeStreamThreads | 否 | 是 | | +| 45 | ratioOfVnodeStreamThreads | 否 | 是 | | | 46 | numOfVnodeFetchThreads | 否 | 是 | | | 47 | numOfVnodeRsmaThreads | 否 | 是 | | | 48 | numOfQnodeQueryThreads | 否 | 是 | | diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 9e8a139b31..d445fc26e8 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -55,7 +55,7 @@ extern int32_t tsNumOfMnodeQueryThreads; extern int32_t tsNumOfMnodeFetchThreads; extern int32_t tsNumOfMnodeReadThreads; extern int32_t tsNumOfVnodeQueryThreads; -extern int32_t tsNumOfVnodeStreamThreads; +extern float tsRatioOfVnodeStreamThreads; extern int32_t tsNumOfVnodeFetchThreads; extern int32_t tsNumOfVnodeRsmaThreads; extern int32_t tsNumOfQnodeQueryThreads; diff --git a/include/util/tworker.h b/include/util/tworker.h index 8766f87a08..0636f16dbb 100644 --- a/include/util/tworker.h +++ b/include/util/tworker.h @@ -17,6 +17,7 @@ #define _TD_UTIL_WORKER_H_ #include "tqueue.h" +#include "tarray.h" #ifdef __cplusplus extern "C" { @@ -26,10 +27,10 @@ typedef struct SQWorkerPool SQWorkerPool; typedef struct SWWorkerPool SWWorkerPool; typedef struct SQWorker { - int32_t id; // worker id - int64_t pid; // thread pid - TdThread thread; // thread id - SQWorkerPool *pool; + int32_t id; // worker id + int64_t pid; // thread pid + TdThread thread; // thread id + void *pool; } SQWorker; typedef struct SQWorkerPool { @@ -42,6 +43,14 @@ typedef struct SQWorkerPool { TdThreadMutex mutex; } SQWorkerPool; +typedef struct SAutoQWorkerPool { + float ratio; + STaosQset *qset; + const char *name; + SArray *workers; + TdThreadMutex mutex; +} SAutoQWorkerPool; + typedef struct SWWorker { int32_t id; // worker id int64_t pid; // thread pid @@ -65,6 +74,11 @@ void tQWorkerCleanup(SQWorkerPool *pool); STaosQueue *tQWorkerAllocQueue(SQWorkerPool *pool, void *ahandle, FItem fp); void tQWorkerFreeQueue(SQWorkerPool *pool, STaosQueue *queue); +int32_t tAutoQWorkerInit(SAutoQWorkerPool *pool); +void tAutoQWorkerCleanup(SAutoQWorkerPool *pool); +STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem fp); +void tAutoQWorkerFreeQueue(SAutoQWorkerPool *pool, STaosQueue *queue); + int32_t tWWorkerInit(SWWorkerPool *pool); void tWWorkerCleanup(SWWorkerPool *pool); STaosQueue *tWWorkerAllocQueue(SWWorkerPool *pool, void *ahandle, FItems fp); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index d3fd625a91..98b9b566ec 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -47,7 +47,7 @@ int32_t tsNumOfMnodeQueryThreads = 4; int32_t tsNumOfMnodeFetchThreads = 1; int32_t tsNumOfMnodeReadThreads = 1; int32_t tsNumOfVnodeQueryThreads = 4; -int32_t tsNumOfVnodeStreamThreads = 2; +float tsRatioOfVnodeStreamThreads = 1.0; int32_t tsNumOfVnodeFetchThreads = 4; int32_t tsNumOfVnodeRsmaThreads = 2; int32_t tsNumOfQnodeQueryThreads = 4; @@ -392,9 +392,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfVnodeQueryThreads = TMAX(tsNumOfVnodeQueryThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, 0) != 0) return -1; - tsNumOfVnodeStreamThreads = tsNumOfCores / 4; - tsNumOfVnodeStreamThreads = TMAX(tsNumOfVnodeStreamThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeStreamThreads", tsNumOfVnodeStreamThreads, 4, 1024, 0) != 0) return -1; + if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 100, 0) != 0) return -1; tsNumOfVnodeFetchThreads = tsNumOfCores / 4; tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); @@ -513,11 +511,9 @@ static int32_t taosUpdateServerCfg(SConfig *pCfg) { pItem->stype = stype; } - pItem = cfgGetItem(tsCfg, "numOfVnodeStreamThreads"); + pItem = cfgGetItem(tsCfg, "ratioOfVnodeStreamThreads"); if (pItem != NULL && pItem->stype == CFG_STYPE_DEFAULT) { - tsNumOfVnodeStreamThreads = numOfCores / 4; - tsNumOfVnodeStreamThreads = TMAX(tsNumOfVnodeStreamThreads, 4); - pItem->i32 = tsNumOfVnodeStreamThreads; + pItem->fval = tsRatioOfVnodeStreamThreads; pItem->stype = stype; } @@ -710,7 +706,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfCommitThreads = cfgGetItem(pCfg, "numOfCommitThreads")->i32; tsNumOfMnodeReadThreads = cfgGetItem(pCfg, "numOfMnodeReadThreads")->i32; tsNumOfVnodeQueryThreads = cfgGetItem(pCfg, "numOfVnodeQueryThreads")->i32; - tsNumOfVnodeStreamThreads = cfgGetItem(pCfg, "numOfVnodeStreamThreads")->i32; + tsRatioOfVnodeStreamThreads = cfgGetItem(pCfg, "ratioOfVnodeStreamThreads")->fval; tsNumOfVnodeFetchThreads = cfgGetItem(pCfg, "numOfVnodeFetchThreads")->i32; tsNumOfVnodeRsmaThreads = cfgGetItem(pCfg, "numOfVnodeRsmaThreads")->i32; tsNumOfQnodeQueryThreads = cfgGetItem(pCfg, "numOfQnodeQueryThreads")->i32; diff --git a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h index b5c554e0ca..6e724f4d43 100644 --- a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h +++ b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h @@ -26,20 +26,20 @@ extern "C" { #endif typedef struct SVnodeMgmt { - SDnodeData *pData; - SMsgCb msgCb; - const char *path; - const char *name; - SQWorkerPool queryPool; - SQWorkerPool streamPool; - SWWorkerPool fetchPool; - SSingleWorker mgmtWorker; - SHashObj *hash; - TdThreadRwlock lock; - SVnodesStat state; - STfs *pTfs; - TdThread thread; - bool stop; + SDnodeData *pData; + SMsgCb msgCb; + const char *path; + const char *name; + SQWorkerPool queryPool; + SAutoQWorkerPool streamPool; + SWWorkerPool fetchPool; + SSingleWorker mgmtWorker; + SHashObj *hash; + TdThreadRwlock lock; + SVnodesStat state; + STfs *pTfs; + TdThread thread; + bool stop; } SVnodeMgmt; typedef struct { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 5afb9a0512..202dc50ac6 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -318,7 +318,7 @@ int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { (void)tMultiWorkerInit(&pVnode->pApplyW, &acfg); pVnode->pQueryQ = tQWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FItem)vmProcessQueryQueue); - pVnode->pStreamQ = tQWorkerAllocQueue(&pMgmt->streamPool, pVnode, (FItem)vmProcessStreamQueue); + pVnode->pStreamQ = tAutoQWorkerAllocQueue(&pMgmt->streamPool, pVnode, (FItem)vmProcessStreamQueue); pVnode->pFetchQ = tWWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FItems)vmProcessFetchQueue); if (pVnode->pWriteW.queue == NULL || pVnode->pSyncW.queue == NULL || pVnode->pSyncCtrlW.queue == NULL || @@ -344,7 +344,7 @@ int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { void vmFreeQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { tQWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ); - tQWorkerFreeQueue(&pMgmt->streamPool, pVnode->pStreamQ); + tAutoQWorkerFreeQueue(&pMgmt->streamPool, pVnode->pStreamQ); tWWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ); pVnode->pQueryQ = NULL; pVnode->pStreamQ = NULL; @@ -359,11 +359,10 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { pQPool->max = tsNumOfVnodeQueryThreads; if (tQWorkerInit(pQPool) != 0) return -1; - SQWorkerPool *pStreamPool = &pMgmt->streamPool; + SAutoQWorkerPool *pStreamPool = &pMgmt->streamPool; pStreamPool->name = "vnode-stream"; - pStreamPool->min = tsNumOfVnodeStreamThreads; - pStreamPool->max = tsNumOfVnodeStreamThreads; - if (tQWorkerInit(pStreamPool) != 0) return -1; + pStreamPool->ratio = tsRatioOfVnodeStreamThreads; + if (tAutoQWorkerInit(pStreamPool) != 0) return -1; SWWorkerPool *pFPool = &pMgmt->fetchPool; pFPool->name = "vnode-fetch"; @@ -385,7 +384,7 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { void vmStopWorker(SVnodeMgmt *pMgmt) { tQWorkerCleanup(&pMgmt->queryPool); - tQWorkerCleanup(&pMgmt->streamPool); + tAutoQWorkerCleanup(&pMgmt->streamPool); tWWorkerCleanup(&pMgmt->fetchPool); dDebug("vnode workers are closed"); } diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index 863cee9b08..bb9d1e44af 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -36,7 +36,7 @@ int32_t tQWorkerInit(SQWorkerPool *pool) { worker->pool = pool; } - uDebug("worker:%s is initialized, min:%d max:%d", pool->name, pool->min, pool->max); + uInfo("worker:%s is initialized, min:%d max:%d", pool->name, pool->min, pool->max); return 0; } @@ -51,8 +51,10 @@ void tQWorkerCleanup(SQWorkerPool *pool) { for (int32_t i = 0; i < pool->max; ++i) { SQWorker *worker = pool->workers + i; if (taosCheckPthreadValid(worker->thread)) { + uInfo("worker:%s:%d is stopping", pool->name, worker->id); taosThreadJoin(worker->thread, NULL); taosThreadClear(&worker->thread); + uInfo("worker:%s:%d is stopped", pool->name, worker->id); } } @@ -60,7 +62,7 @@ void tQWorkerCleanup(SQWorkerPool *pool) { taosCloseQset(pool->qset); taosThreadMutexDestroy(&pool->mutex); - uDebug("worker:%s is closed", pool->name); + uInfo("worker:%s is closed", pool->name); } static void *tQWorkerThreadFp(SQWorker *worker) { @@ -119,7 +121,7 @@ STaosQueue *tQWorkerAllocQueue(SQWorkerPool *pool, void *ahandle, FItem fp) { taosThreadAttrDestroy(&thAttr); pool->num++; - uDebug("worker:%s:%d is launched, total:%d", pool->name, worker->id, pool->num); + uInfo("worker:%s:%d is launched, total:%d", pool->name, worker->id, pool->num); } while (pool->num < pool->min); } @@ -130,7 +132,132 @@ STaosQueue *tQWorkerAllocQueue(SQWorkerPool *pool, void *ahandle, FItem fp) { } void tQWorkerFreeQueue(SQWorkerPool *pool, STaosQueue *queue) { - uDebug("worker:%s, queue:%p is freed", pool->name, queue); + uInfo("worker:%s, queue:%p is freed", pool->name, queue); + taosCloseQueue(queue); +} + +int32_t tAutoQWorkerInit(SAutoQWorkerPool *pool) { + pool->qset = taosOpenQset(); + pool->workers = taosArrayInit(2, sizeof(SQWorker)); + if (pool->workers == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + (void)taosThreadMutexInit(&pool->mutex, NULL); + + uInfo("worker:%s is initialized as auto", pool->name); + return 0; +} + +void tAutoQWorkerCleanup(SAutoQWorkerPool *pool) { + int32_t size = taosArrayGetSize(pool->workers); + for (int32_t i = 0; i < size; ++i) { + SQWorker *worker = taosArrayGet(pool->workers, i); + if (taosCheckPthreadValid(worker->thread)) { + taosQsetThreadResume(pool->qset); + } + } + + for (int32_t i = 0; i < size; ++i) { + SQWorker *worker = taosArrayGet(pool->workers, i); + if (taosCheckPthreadValid(worker->thread)) { + uInfo("worker:%s:%d is stopping", pool->name, worker->id); + taosThreadJoin(worker->thread, NULL); + taosThreadClear(&worker->thread); + uInfo("worker:%s:%d is stopped", pool->name, worker->id); + } + } + + taosArrayDestroy(pool->workers); + taosCloseQset(pool->qset); + taosThreadMutexDestroy(&pool->mutex); + + uInfo("worker:%s is closed", pool->name); +} + +static void *tAutoQWorkerThreadFp(SQWorker *worker) { + SAutoQWorkerPool *pool = worker->pool; + SQueueInfo qinfo = {0}; + void *msg = NULL; + int32_t code = 0; + + taosBlockSIGPIPE(); + setThreadName(pool->name); + worker->pid = taosGetSelfPthreadId(); + uInfo("worker:%s:%d is running, thread:%08" PRId64, pool->name, worker->id, worker->pid); + + while (1) { + if (taosReadQitemFromQset(pool->qset, (void **)&msg, &qinfo) == 0) { + uInfo("worker:%s:%d qset:%p, got no message and exiting, thread:%08" PRId64, pool->name, worker->id, pool->qset, + worker->pid); + break; + } + + if (qinfo.fp != NULL) { + qinfo.workerId = worker->id; + qinfo.threadNum = taosArrayGetSize(pool->workers); + (*((FItem)qinfo.fp))(&qinfo, msg); + } + + taosUpdateItemSize(qinfo.queue, 1); + } + + return NULL; +} + +STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem fp) { + STaosQueue *queue = taosOpenQueue(); + if (queue == NULL) return NULL; + + taosThreadMutexLock(&pool->mutex); + taosSetQueueFp(queue, fp, NULL); + taosAddIntoQset(pool->qset, queue, ahandle); + + int32_t queueNum = taosGetQueueNumber(pool->qset); + int32_t curWorkerNum = taosArrayGetSize(pool->workers); + int32_t dstWorkerNum = ceil(queueNum * pool->ratio); + if (dstWorkerNum < 1) dstWorkerNum = 1; + // spawn a thread to process queue + + while (curWorkerNum < dstWorkerNum) { + SQWorker wobj = { + .id = (int32_t)taosArrayGetSize(pool->workers), + .pool = pool, + }; + SQWorker *worker = taosArrayPush(pool->workers, &wobj); + if (worker == NULL) { + uError("worker:%s:%d failed to create, total:%d", pool->name, wobj.id, (int32_t)taosArrayGetSize(pool->workers)); + taosCloseQueue(queue); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + TdThreadAttr thAttr; + taosThreadAttrInit(&thAttr); + taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); + + if (taosThreadCreate(&worker->thread, &thAttr, (ThreadFp)tAutoQWorkerThreadFp, worker) != 0) { + uError("worker:%s:%d failed to create thread, total:%d", pool->name, wobj.id, + (int32_t)taosArrayGetSize(pool->workers)); + taosCloseQueue(queue); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + taosThreadAttrDestroy(&thAttr); + uInfo("worker:%s:%d is launched, total:%d", pool->name, worker->id, (int32_t)taosArrayGetSize(pool->workers)); + + curWorkerNum++; + } + + taosThreadMutexUnlock(&pool->mutex); + uInfo("worker:%s, queue:%p is allocated, ahandle:%p", pool->name, queue, ahandle); + + return queue; +} + +void tAutoQWorkerFreeQueue(SAutoQWorkerPool *pool, STaosQueue *queue) { + uInfo("worker:%s, queue:%p is freed", pool->name, queue); taosCloseQueue(queue); } @@ -152,7 +279,7 @@ int32_t tWWorkerInit(SWWorkerPool *pool) { worker->pool = pool; } - uDebug("worker:%s is initialized, max:%d", pool->name, pool->max); + uInfo("worker:%s is initialized, max:%d", pool->name, pool->max); return 0; } @@ -169,17 +296,19 @@ void tWWorkerCleanup(SWWorkerPool *pool) { for (int32_t i = 0; i < pool->max; ++i) { SWWorker *worker = pool->workers + i; if (taosCheckPthreadValid(worker->thread)) { + uInfo("worker:%s:%d is stopping", pool->name, worker->id); taosThreadJoin(worker->thread, NULL); taosThreadClear(&worker->thread); taosFreeQall(worker->qall); taosCloseQset(worker->qset); + uInfo("worker:%s:%d is stopped", pool->name, worker->id); } } taosMemoryFreeClear(pool->workers); taosThreadMutexDestroy(&pool->mutex); - uDebug("worker:%s is closed", pool->name); + uInfo("worker:%s is closed", pool->name); } static void *tWWorkerThreadFp(SWWorker *worker) { @@ -235,7 +364,7 @@ STaosQueue *tWWorkerAllocQueue(SWWorkerPool *pool, void *ahandle, FItems fp) { taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); if (taosThreadCreate(&worker->thread, &thAttr, (ThreadFp)tWWorkerThreadFp, worker) != 0) goto _OVER; - uDebug("worker:%s:%d is launched, max:%d", pool->name, worker->id, pool->max); + uInfo("worker:%s:%d is launched, max:%d", pool->name, worker->id, pool->max); pool->nextId = (pool->nextId + 1) % pool->max; taosThreadAttrDestroy(&thAttr); @@ -259,13 +388,14 @@ _OVER: } else { while (worker->pid <= 0) taosMsleep(10); queue->threadId = worker->pid; - uInfo("worker:%s, queue:%p is allocated, ahandle:%p thread:%08" PRId64, pool->name, queue, ahandle, queue->threadId); + uInfo("worker:%s, queue:%p is allocated, ahandle:%p thread:%08" PRId64, pool->name, queue, ahandle, + queue->threadId); return queue; } } void tWWorkerFreeQueue(SWWorkerPool *pool, STaosQueue *queue) { - uDebug("worker:%s, queue:%p is freed", pool->name, queue); + uInfo("worker:%s, queue:%p is freed", pool->name, queue); taosCloseQueue(queue); } From 34c454b13c04e89a9fdf3a5720a55dcecfc6c78f Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 28 Dec 2022 15:19:35 +0800 Subject: [PATCH 62/82] handle except on taosd rpc-client --- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 756f97d1d0..0ff41d429e 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -111,12 +111,12 @@ static void dmProcessRpcMsg(SDnode *pDnode, SRpcMsg *pRpc, SEpSet *pEpSet) { dGError("msg:%p, type:%s pCont is NULL", pRpc, TMSG_INFO(pRpc->msgType)); terrno = TSDB_CODE_INVALID_MSG_LEN; goto _OVER; - } /* else if ((pRpc->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRpc->code == TSDB_CODE_RPC_BROKEN_LINK) && - (!IsReq(pRpc)) && (pRpc->pCont == NULL)) { - dGError("msg:%p, type:%s pCont is NULL, err: %s", pRpc, TMSG_INFO(pRpc->msgType), tstrerror(pRpc->code)); - terrno = pRpc->code; - goto _OVER; - }*/ + } else if ((pRpc->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRpc->code == TSDB_CODE_RPC_BROKEN_LINK) && + (!IsReq(pRpc)) && (pRpc->pCont == NULL)) { + dGError("msg:%p, type:%s pCont is NULL, err: %s", pRpc, TMSG_INFO(pRpc->msgType), tstrerror(pRpc->code)); + terrno = pRpc->code; + goto _OVER; + } if (pHandle->defaultNtype == NODE_END) { dGError("msg:%p, type:%s not processed since no handle", pRpc, TMSG_INFO(pRpc->msgType)); @@ -248,9 +248,9 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { rpcRe static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_MNODE_NOT_FOUND || - code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED || - code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_VND_STOPPED || - code == TSDB_CODE_APP_IS_STARTING || code == TSDB_CODE_APP_IS_STOPPING) { + code == TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED || code == TSDB_CODE_SYN_NOT_LEADER || + code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_VND_STOPPED || code == TSDB_CODE_APP_IS_STARTING || + code == TSDB_CODE_APP_IS_STOPPING) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || msgType == TDMT_SCH_MERGE_FETCH) { return false; From 4aae5244444a14922dddfb22baade1ebbb9ea96d Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Wed, 28 Dec 2022 15:42:45 +0800 Subject: [PATCH 63/82] Update 24-show.md --- docs/en/12-taos-sql/24-show.md | 62 +++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index 08e8df6252..ae34f9e459 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -178,75 +178,135 @@ SHOW TABLE DISTRIBUTED table_name; Shows how table data is distributed. -Examples: show table distributed d0\G; Display the block distribution of table `d0` in detailed format. +Examples: Below is an example of this command to display the block distribution of table `d0` in detailed format. +```sql +show table distributed d0\G; +``` + +
+ Show Example +

 *************************** 1.row ***************************
 _block_dist: Total_Blocks=[5] Total_Size=[93.65 Kb] Average_size=[18.73 Kb] Compression_Ratio=[23.98 %]
 
 Total_Blocks :  Table `d0` contains total 5 blocks
+
 Total_Size:  The total size of all the data blocks in table `d0` is 93.65 KB 
+
 Average_size:  The average size of each block is 18.73 KB
+
 Compression_Ratio: The data compression rate is 23.98%
  
 *************************** 2.row ***************************
 _block_dist: Total_Rows=[20000] Inmem_Rows=[0] MinRows=[3616] MaxRows=[4096] Average_Rows=[4000]
 
 Total_Rows: Table `d0` contains 20,000 rows
+
 Inmem_Rows: The rows still in memory, i.e. not committed in disk, is 0, i.e. none such rows
+
 MinRows:  The minimum number of rows in a block is 3,616 
+
 MaxRows: The maximum number of rows in a block is 4,096B
+
 Average_Rows: The average number of rows in a block is 4,000
 
 *************************** 3.row ***************************
 _block_dist: Total_Tables=[1] Total_Files=[2]
 
 Total_Tables:  The number of child tables, 1 in this example
+
 Total_Files:   The number of files storing the table's data, 2 in this example
 
 *************************** 4.row ***************************
+
 _block_dist: --------------------------------------------------------------------------------
+
 *************************** 5.row ***************************
+
 _block_dist: 0100 |
+
 *************************** 6.row ***************************
+
 _block_dist: 0299 |
+
 *************************** 7.row ***************************
+
 _block_dist: 0498 |
+
 *************************** 8.row ***************************
+
 _block_dist: 0697 |
+
 *************************** 9.row ***************************
+
 _block_dist: 0896 |
+
 *************************** 10.row ***************************
+
 _block_dist: 1095 |
+
 *************************** 11.row ***************************
+
 _block_dist: 1294 |
+
 *************************** 12.row ***************************
+
 _block_dist: 1493 |
+
 *************************** 13.row ***************************
+
 _block_dist: 1692 |
+
 *************************** 14.row ***************************
+
 _block_dist: 1891 |
+
 *************************** 15.row ***************************
+
 _block_dist: 2090 |
+
 *************************** 16.row ***************************
+
 _block_dist: 2289 |
+
 *************************** 17.row ***************************
+
 _block_dist: 2488 |
+
 *************************** 18.row ***************************
+
 _block_dist: 2687 |
+
 *************************** 19.row ***************************
+
 _block_dist: 2886 |
+
 *************************** 20.row ***************************
+
 _block_dist: 3085 |
+
 *************************** 21.row ***************************
+
 _block_dist: 3284 |
+
 *************************** 22.row ***************************
+
 _block_dist: 3483 |||||||||||||||||  1 (20.00%)
+
 *************************** 23.row ***************************
+
 _block_dist: 3682 |
+
 *************************** 24.row ***************************
+
 _block_dist: 3881 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||  4 (80.00%)
+
 Query OK, 24 row(s) in set (0.002444s)
 
+
+
+ The above show the block distribution percentage according to the number of rows in each block. In the above example, `_block_dist: 3483 ||||||||||||||||| 1 (20.00%)` means there is one block whose rows is between 3,483 and 3,681. `_block_dist: 3881 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 4 (80.00%)` means there are 4 blocks whose rows is between 3,881 and 4,096. The number of blocks whose rows fall in other range is zero. ## SHOW TAGS From d1001dde7e184f4e43ff1c2056459f92f03f977b Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Wed, 28 Dec 2022 15:44:09 +0800 Subject: [PATCH 64/82] Update 24-show.md --- docs/en/12-taos-sql/24-show.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index ae34f9e459..ace0e4b70c 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -307,7 +307,9 @@ Query OK, 24 row(s) in set (0.002444s) - The above show the block distribution percentage according to the number of rows in each block. In the above example, `_block_dist: 3483 ||||||||||||||||| 1 (20.00%)` means there is one block whose rows is between 3,483 and 3,681. `_block_dist: 3881 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 4 (80.00%)` means there are 4 blocks whose rows is between 3,881 and 4,096. The number of blocks whose rows fall in other range is zero. + The above show the block distribution percentage according to the number of rows in each block. In the above example, we can get below information: + - `_block_dist: 3483 ||||||||||||||||| 1 (20.00%)` means there is one block whose rows is between 3,483 and 3,681. + - `_block_dist: 3881 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 4 (80.00%)` means there are 4 blocks whose rows is between 3,881 and 4,096. - The number of blocks whose rows fall in other range is zero. ## SHOW TAGS From cb7c141660a516c6fd1c4436afcb46570f119323 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Wed, 28 Dec 2022 15:44:40 +0800 Subject: [PATCH 65/82] Update 24-show.md --- docs/en/12-taos-sql/24-show.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index ace0e4b70c..2db3e7cb31 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -309,7 +309,7 @@ Query OK, 24 row(s) in set (0.002444s) The above show the block distribution percentage according to the number of rows in each block. In the above example, we can get below information: - `_block_dist: 3483 ||||||||||||||||| 1 (20.00%)` means there is one block whose rows is between 3,483 and 3,681. - - `_block_dist: 3881 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 4 (80.00%)` means there are 4 blocks whose rows is between 3,881 and 4,096. - The number of blocks whose rows fall in other range is zero. + - `_block_dist: 3881 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 4 (80.00%)` means there are 4 blocks whose rows is between 3,881 and 4,096. - The number of blocks whose rows fall in other range is zero. ## SHOW TAGS From 0721ac3d560ab83d0cf307d23d13d3e6599605ad Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Wed, 28 Dec 2022 15:46:46 +0800 Subject: [PATCH 66/82] Update 24-show.md --- docs/zh/12-taos-sql/24-show.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/zh/12-taos-sql/24-show.md b/docs/zh/12-taos-sql/24-show.md index a65746e7f9..2b875199b5 100644 --- a/docs/zh/12-taos-sql/24-show.md +++ b/docs/zh/12-taos-sql/24-show.md @@ -183,6 +183,10 @@ SHOW TABLE DISTRIBUTED table_name; 语句: show table distributed d0\G; 竖行显示表 d0 的 BLOCK 分布情况 +
+ 显示示例 +

+
 *************************** 1.row ***************************
 
 _block_dist: Total_Blocks=[5] Total_Size=[93.65 Kb] Average_size=[18.73 Kb] Compression_Ratio=[23.98 %]
@@ -244,6 +248,8 @@ _block_dist: 3881 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
 
 Query OK, 24 row(s) in set (0.002444s)
 
+
+
上面是块中包含数据行数的块儿分布情况图,这里的 0100 0299 0498 … 表示的是每个块中包含的数据行数,上面的意思就是这个表的 5 个块,分布在 3483 ~3681 行的块有 1 个,占整个块的 20%,分布在 3881 ~ 4096(最大行数)的块数为 4 个,占整个块的 80%, 其它区域内分布块数为 0。 From b1d141ecab7694640c921558585f33b840bca54b Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Wed, 28 Dec 2022 16:10:53 +0800 Subject: [PATCH 67/82] fix: prohibit triggering modes other than at_once in non window stream --- source/libs/parser/src/parTranslater.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 37703534d6..0a20484179 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -5640,7 +5640,8 @@ static int32_t addSubtableInfoToCreateStreamQuery(STranslateContext* pCxt, SCrea return code; } -static int32_t checkStreamQuery(STranslateContext* pCxt, SSelectStmt* pSelect) { +static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { + SSelectStmt* pSelect = (SSelectStmt*)pStmt->pQuery; if (TSDB_DATA_TYPE_TIMESTAMP != ((SExprNode*)nodesListGetNode(pSelect->pProjectionList, 0))->resType.type || !pSelect->isTimeLineResult || crossTableWithoutAggOper(pSelect) || NULL != pSelect->pOrderByList || crossTableWithUdaf(pSelect)) { @@ -5650,6 +5651,10 @@ static int32_t checkStreamQuery(STranslateContext* pCxt, SSelectStmt* pSelect) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "SUBTABLE expression must be of VARCHAR type"); } + if (NULL == pSelect->pWindow && STREAM_TRIGGER_AT_ONCE != pStmt->pOptions->triggerType) { + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "The trigger mode of non window query can only be AT_ONCE"); + } return TSDB_CODE_SUCCESS; } @@ -5663,7 +5668,7 @@ static int32_t buildCreateStreamQuery(STranslateContext* pCxt, SCreateStreamStmt code = translateQuery(pCxt, pStmt->pQuery); } if (TSDB_CODE_SUCCESS == code) { - code = checkStreamQuery(pCxt, (SSelectStmt*)pStmt->pQuery); + code = checkStreamQuery(pCxt, pStmt); } if (TSDB_CODE_SUCCESS == code) { getSourceDatabase(pStmt->pQuery, pCxt->pParseCxt->acctId, pReq->sourceDB); From 5c36d90bc27e7b3690d535c36bfc7ad158922da0 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 28 Dec 2022 16:18:24 +0800 Subject: [PATCH 68/82] fix: free the column info array --- source/libs/function/src/tudf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index cbfc0e360b..acfd5c07af 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -1830,8 +1830,8 @@ int32_t doCallUdfScalarFunc(UdfcFuncHandle handle, SScalarParam *input, int32_t convertDataBlockToScalarParm(&resultBlock, output); taosArrayDestroy(resultBlock.pDataBlock); } - - blockDataCleanup(&inputBlock); + + blockDataFreeRes(&inputBlock); return err; } From c64fbb59ea0fefecd6d6a4994bb02d59752bbb93 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 28 Dec 2022 16:39:02 +0800 Subject: [PATCH 69/82] fix: support_vnodes and the max supported vgroups mis-match --- source/dnode/mnode/impl/inc/mndDef.h | 1 + source/dnode/mnode/impl/src/mndVgroup.c | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 4e93a1d96e..2f824b48b4 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -193,6 +193,7 @@ typedef struct { int64_t lastAccessTime; int32_t accessTimes; int32_t numOfVnodes; + int32_t numOfOtherNodes; int32_t numOfSupportVnodes; float numOfCores; int64_t memTotal; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 31ab1f3259..c6b0fe49a0 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -425,6 +425,7 @@ void *mndBuildDropVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVgOb static bool mndResetDnodesArrayFp(SMnode *pMnode, void *pObj, void *p1, void *p2, void *p3) { SDnodeObj *pDnode = pObj; pDnode->numOfVnodes = 0; + pDnode->numOfOtherNodes = 0; return true; } @@ -447,7 +448,7 @@ static bool mndBuildDnodesArrayFp(SMnode *pMnode, void *pObj, void *p1, void *p2 pDnode->numOfVnodes, pDnode->numOfSupportVnodes, isMnode, online, pDnode->memAvail, pDnode->memUsed); if (isMnode) { - pDnode->numOfVnodes++; + pDnode->numOfOtherNodes++; } if (online && pDnode->numOfSupportVnodes > 0) { @@ -468,14 +469,24 @@ SArray *mndBuildDnodesArray(SMnode *pMnode, int32_t exceptDnodeId) { sdbTraverse(pSdb, SDB_DNODE, mndResetDnodesArrayFp, NULL, NULL, NULL); sdbTraverse(pSdb, SDB_DNODE, mndBuildDnodesArrayFp, pArray, &exceptDnodeId, NULL); + + mDebug("build %d dnodes array", (int32_t)taosArrayGetSize(pArray)); + for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) { + SDnodeObj *pDnode = taosArrayGet(pArray, i); + mDebug("dnode:%d, vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes); + } return pArray; } static int32_t mndCompareDnodeId(int32_t *dnode1Id, int32_t *dnode2Id) { return *dnode1Id >= *dnode2Id ? 1 : 0; } +static float mndGetDnodeScore(SDnodeObj *pDnode) { + return ((float)pDnode->numOfVnodes + (float)pDnode->numOfOtherNodes * 0.9) / pDnode->numOfSupportVnodes; +} + static int32_t mndCompareDnodeVnodes(SDnodeObj *pDnode1, SDnodeObj *pDnode2) { - float d1Score = (float)pDnode1->numOfVnodes / pDnode1->numOfSupportVnodes; - float d2Score = (float)pDnode2->numOfVnodes / pDnode2->numOfSupportVnodes; + float d1Score = mndGetDnodeScore(pDnode1); + float d2Score = mndGetDnodeScore(pDnode2); return d1Score >= d2Score ? 1 : 0; } @@ -494,7 +505,12 @@ static int32_t mndGetAvailableDnode(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup int32_t allocedVnodes = 0; void *pIter = NULL; + mDebug("start to sort %d dnodes", (int32_t)taosArrayGetSize(pArray)); taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); + for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) { + SDnodeObj *pDnode = taosArrayGet(pArray, i); + mDebug("dnode:%d, score:%f", pDnode->id, mndGetDnodeScore(pDnode)); + } int32_t size = taosArrayGetSize(pArray); if (size < pVgroup->replica) { From 367b6512e94cd2c7f5b66079534b87b4691bac5c Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 28 Dec 2022 17:02:50 +0800 Subject: [PATCH 70/82] fix: heap-buffer-overflow in auto qworker --- source/util/src/tworker.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index bb9d1e44af..a9a84c1860 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -138,7 +138,7 @@ void tQWorkerFreeQueue(SQWorkerPool *pool, STaosQueue *queue) { int32_t tAutoQWorkerInit(SAutoQWorkerPool *pool) { pool->qset = taosOpenQset(); - pool->workers = taosArrayInit(2, sizeof(SQWorker)); + pool->workers = taosArrayInit(2, sizeof(SQWorker *)); if (pool->workers == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -153,20 +153,21 @@ int32_t tAutoQWorkerInit(SAutoQWorkerPool *pool) { void tAutoQWorkerCleanup(SAutoQWorkerPool *pool) { int32_t size = taosArrayGetSize(pool->workers); for (int32_t i = 0; i < size; ++i) { - SQWorker *worker = taosArrayGet(pool->workers, i); + SQWorker *worker = taosArrayGetP(pool->workers, i); if (taosCheckPthreadValid(worker->thread)) { taosQsetThreadResume(pool->qset); } } for (int32_t i = 0; i < size; ++i) { - SQWorker *worker = taosArrayGet(pool->workers, i); + SQWorker *worker = taosArrayGetP(pool->workers, i); if (taosCheckPthreadValid(worker->thread)) { uInfo("worker:%s:%d is stopping", pool->name, worker->id); taosThreadJoin(worker->thread, NULL); taosThreadClear(&worker->thread); uInfo("worker:%s:%d is stopped", pool->name, worker->id); } + taosMemoryFree(worker); } taosArrayDestroy(pool->workers); @@ -218,27 +219,28 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem int32_t curWorkerNum = taosArrayGetSize(pool->workers); int32_t dstWorkerNum = ceil(queueNum * pool->ratio); if (dstWorkerNum < 1) dstWorkerNum = 1; - // spawn a thread to process queue + // spawn a thread to process queue while (curWorkerNum < dstWorkerNum) { - SQWorker wobj = { - .id = (int32_t)taosArrayGetSize(pool->workers), - .pool = pool, - }; - SQWorker *worker = taosArrayPush(pool->workers, &wobj); - if (worker == NULL) { - uError("worker:%s:%d failed to create, total:%d", pool->name, wobj.id, (int32_t)taosArrayGetSize(pool->workers)); + SQWorker *worker = taosMemoryCalloc(1, sizeof(SQWorker)); + if (worker == NULL || taosArrayPush(pool->workers, &worker) == NULL) { + uError("worker:%s:%d failed to create", pool->name, curWorkerNum); + taosMemoryFree(worker); taosCloseQueue(queue); terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + worker->id = curWorkerNum; + worker->pool = pool; + TdThreadAttr thAttr; taosThreadAttrInit(&thAttr); taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); if (taosThreadCreate(&worker->thread, &thAttr, (ThreadFp)tAutoQWorkerThreadFp, worker) != 0) { - uError("worker:%s:%d failed to create thread, total:%d", pool->name, wobj.id, - (int32_t)taosArrayGetSize(pool->workers)); + uError("worker:%s:%d failed to create thread, total:%d", pool->name, worker->id, curWorkerNum); + (void)taosArrayPop(pool->workers); + taosMemoryFree(worker); taosCloseQueue(queue); terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; From 4734795fe14a29ee8b0eea7f387157e9ba7ac97c Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Wed, 28 Dec 2022 15:05:37 +0800 Subject: [PATCH 71/82] feat(stream):stream insert data into an existing table --- include/common/tmsg.h | 4 ++++ source/common/src/tmsg.c | 2 ++ source/dnode/mnode/impl/src/mndStream.c | 2 +- source/libs/parser/src/parTranslater.c | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 603d6cfd67..800f9e2eb7 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1751,6 +1751,8 @@ typedef struct { #define STREAM_FILL_HISTORY_ON 1 #define STREAM_FILL_HISTORY_OFF 0 #define STREAM_DEFAULT_FILL_HISTORY STREAM_FILL_HISTORY_OFF +#define STREAM_CREATE_STABLE_TRUE 1 +#define STREAM_CREATE_STABLE_FALSE 0 typedef struct { char name[TSDB_STREAM_FNAME_LEN]; @@ -1768,6 +1770,8 @@ typedef struct { SArray* pTags; // array of SField // 3.0.20 int64_t checkpointFreq; // ms + // 3.0.2.3 + int8_t createStb; } SCMCreateStreamReq; typedef struct { diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index bf5a76a41b..83f447fd0e 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -5424,6 +5424,7 @@ int32_t tSerializeSCMCreateStreamReq(void *buf, int32_t bufLen, const SCMCreateS if (tEncodeI32(&encoder, pField->bytes) < 0) return -1; if (tEncodeCStr(&encoder, pField->name) < 0) return -1; } + if (tEncodeI8(&encoder, pReq->createStb) < 0) return -1; tEndEncode(&encoder); @@ -5484,6 +5485,7 @@ int32_t tDeserializeSCMCreateStreamReq(void *buf, int32_t bufLen, SCMCreateStrea } } } + if (tDecodeI8(&decoder, &pReq->createStb) < 0) return -1; tEndDecode(&decoder); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 57f9c085ad..61374aa0bf 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -637,7 +637,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { if (mndTrancCheckConflict(pMnode, pTrans) != 0) goto _OVER; // create stb for stream - if (mndCreateStbForStream(pMnode, pTrans, &streamObj, pReq->info.conn.user) < 0) { + if (createStreamReq.createStb == STREAM_CREATE_STABLE_TRUE && mndCreateStbForStream(pMnode, pTrans, &streamObj, pReq->info.conn.user) < 0) { mError("trans:%d, failed to create stb for stream %s since %s", pTrans->id, createStreamReq.name, terrstr()); mndTransDrop(pTrans); goto _OVER; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 3048d53738..a3fc4b27fd 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -5766,6 +5766,8 @@ static int32_t buildCreateStreamReq(STranslateContext* pCxt, SCreateStreamStmt* pReq->numOfTags = LIST_LENGTH(pStmt->pTags); } + pReq->createStb = STREAM_CREATE_STABLE_TRUE; + return code; } From 0facc8e0885007c9bd99612c2efcb0631ee3f43b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 28 Dec 2022 14:44:46 +0800 Subject: [PATCH 72/82] fix(tdb/assert/cleanup): remove asserts --- source/libs/tdb/src/db/tdbBtree.c | 112 ++++++++--------- source/libs/tdb/src/db/tdbDb.c | 10 +- source/libs/tdb/src/db/tdbPCache.c | 47 ++++--- source/libs/tdb/src/db/tdbPage.c | 194 +++++++++++++++++++++++------ source/libs/tdb/src/db/tdbPager.c | 60 ++++----- source/libs/tdb/src/db/tdbTable.c | 2 - source/libs/tdb/src/db/tdbTxn.c | 5 +- 7 files changed, 280 insertions(+), 150 deletions(-) diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index 5c1f264460..8ce6777339 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -74,7 +74,10 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg SBTree *pBt; int ret; - ASSERT(keyLen != 0); + if (keyLen == 0) { + tdbError("tdb/btree-open: key len cannot be zero."); + return -1; + } *ppBt = NULL; @@ -152,7 +155,11 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg tdbPostCommit(pPager->pEnv, txn); } - ASSERT(pgno != 0); + if (pgno == 0) { + tdbError("tdb/btree-open: pgno cannot be zero."); + tdbOsFree(pBt); + return -1; + } pBt->root = pgno; /* // TODO: pBt->root @@ -192,7 +199,7 @@ int tdbBtreeInsert(SBTree *pBt, const void *pKey, int kLen, const void *pVal, in ret = tdbBtcMoveTo(&btc, pKey, kLen, &c); if (ret < 0) { tdbBtcClose(&btc); - ASSERT(0); + tdbError("tdb/btree-insert: btc move to failed with ret: %d.", ret); return -1; } @@ -202,17 +209,17 @@ int tdbBtreeInsert(SBTree *pBt, const void *pKey, int kLen, const void *pVal, in if (c > 0) { btc.idx++; } else if (c == 0) { - // dup key not allowed - tdbError("unable to insert dup key. pKey: %p, kLen: %d, btc: %p, pTxn: %p", pKey, kLen, &btc, pTxn); - // ASSERT(0); + // dup key not allowed with insert + tdbBtcClose(&btc); + tdbError("tdb/btree-insert: dup key. pKey: %p, kLen: %d, btc: %p, pTxn: %p", pKey, kLen, &btc, pTxn); return -1; } } ret = tdbBtcUpsert(&btc, pKey, kLen, pVal, vLen, 1); if (ret < 0) { - ASSERT(0); tdbBtcClose(&btc); + tdbError("tdb/btree-insert: btc upsert failed with ret: %d.", ret); return -1; } @@ -233,7 +240,7 @@ int tdbBtreeDelete(SBTree *pBt, const void *pKey, int kLen, TXN *pTxn) { ret = tdbBtcMoveTo(&btc, pKey, kLen, &c); if (ret < 0) { tdbBtcClose(&btc); - ASSERT(0); + tdbError("tdb/btree-delete: btc move to failed with ret: %d.", ret); return -1; } @@ -264,7 +271,7 @@ int tdbBtreeUpsert(SBTree *pBt, const void *pKey, int nKey, const void *pData, i // move the cursor ret = tdbBtcMoveTo(&btc, pKey, nKey, &c); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btree-upsert: btc move to failed with ret: %d.", ret); tdbBtcClose(&btc); return -1; } @@ -280,8 +287,8 @@ int tdbBtreeUpsert(SBTree *pBt, const void *pKey, int nKey, const void *pData, i ret = tdbBtcUpsert(&btc, pKey, nKey, pData, nData, c); if (ret < 0) { - ASSERT(0); tdbBtcClose(&btc); + tdbError("tdb/btree-upsert: btc upsert failed with ret: %d.", ret); return -1; } @@ -309,7 +316,8 @@ int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkL ret = tdbBtcMoveTo(&btc, pKey, kLen, &cret); if (ret < 0) { tdbBtcClose(&btc); - ASSERT(0); + tdbError("tdb/btree-pget: btc move to failed with ret: %d.", ret); + return -1; } if (btc.idx < 0 || cret) { @@ -325,7 +333,7 @@ int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkL pTKey = tdbRealloc(*ppKey, cd.kLen); if (pTKey == NULL) { tdbBtcClose(&btc); - ASSERT(0); + tdbError("tdb/btree-pget: realloc pTKey failed."); return -1; } *ppKey = pTKey; @@ -337,7 +345,7 @@ int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkL pTVal = tdbRealloc(*ppVal, cd.vLen); if (pTVal == NULL) { tdbBtcClose(&btc); - ASSERT(0); + tdbError("tdb/btree-pget: realloc pTVal failed."); return -1; } *ppVal = pTVal; @@ -350,7 +358,7 @@ int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkL } if (TDB_CELLDECODER_FREE_VAL(&cd)) { - tdbDebug("tdb btc/pget/2 decoder: %p pVal free: %p", &cd, cd.pVal); + tdbTrace("tdb btc/pget/2 decoder: %p pVal free: %p", &cd, cd.pVal); tdbFree(cd.pVal); } @@ -381,36 +389,7 @@ static int tdbDefaultKeyCmprFn(const void *pKey1, int keyLen1, const void *pKey2 } return cret; } -/* -static int tdbBtreeOpenImpl(SBTree *pBt) { - // Try to get the root page of the an existing btree - SPgno pgno; - SPage *pPage; - int ret; - { - // 1. TODO: Search the main DB to check if the DB exists - ret = tdbPagerOpenDB(pBt->pPager, &pgno, true, pBt); - ASSERT(ret == 0); - } - - if (pgno != 0) { - pBt->root = pgno; - return 0; - } - - // Try to create a new database - ret = tdbPagerAllocPage(pBt->pPager, &pgno); - if (ret < 0) { - ASSERT(0); - return -1; - } - - ASSERT(pgno != 0); - pBt->root = pgno; - return 0; -} -*/ int tdbBtreeInitPage(SPage *pPage, void *arg, int init) { SBTree *pBt; u8 flags; @@ -560,7 +539,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx ret = tdbPagerFetchPage(pBt->pPager, &pgno, pOlds + i, tdbBtreeInitPage, &((SBtreeInitPageArg){.pBt = pBt, .flags = 0}), pTxn); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btree-balance: fetch page failed with ret: %d.", ret); return -1; } @@ -722,7 +701,8 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx iarg.flags = flags; ret = tdbPagerFetchPage(pBt->pPager, &pgno, pNews + iNew, tdbBtreeInitPage, &iarg, pTxn); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btree-balance: fetch page failed with ret: %d.", ret); + return -1; } ret = tdbPagerWrite(pBt->pPager, pNews[iNew]); @@ -1216,8 +1196,8 @@ static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const vo ret = tdbBtreeEncodePayload(pPage, pCell, nHeader, pKey, kLen, pVal, vLen, &nPayload, pTxn, pBt); if (ret < 0) { // TODO - ASSERT(0); - return 0; + tdbError("tdb/btree-encode-cell: encode payload failed with ret: %d.", ret); + return -1; } *szCell = nHeader + nPayload; @@ -1577,7 +1557,7 @@ int tdbBtcMoveToFirst(SBTC *pBtc) { ret = tdbPagerFetchPage(pPager, &pBt->root, &(pBtc->pPage), tdbBtreeInitPage, &((SBtreeInitPageArg){.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF}), pBtc->pTxn); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-move-tofirst: fetch page failed with ret: %d.", ret); return -1; } @@ -1621,7 +1601,7 @@ int tdbBtcMoveToFirst(SBTC *pBtc) { ret = tdbBtcMoveDownward(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-move-tofirst: btc move downward failed with ret: %d.", ret); return -1; } @@ -1646,7 +1626,7 @@ int tdbBtcMoveToLast(SBTC *pBtc) { ret = tdbPagerFetchPage(pPager, &pBt->root, &(pBtc->pPage), tdbBtreeInitPage, &((SBtreeInitPageArg){.pBt = pBt, .flags = TDB_BTREE_ROOT | TDB_BTREE_LEAF}), pBtc->pTxn); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-move-tolast: fetch page failed with ret: %d.", ret); return -1; } @@ -1694,7 +1674,7 @@ int tdbBtcMoveToLast(SBTC *pBtc) { ret = tdbBtcMoveDownward(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-move-tolast: btc move downward failed with ret: %d.", ret); return -1; } @@ -1752,7 +1732,7 @@ int tdbBtreeNext(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) { ret = tdbBtcMoveToNext(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btree-next: btc move to next failed with ret: %d.", ret); return -1; } @@ -1798,7 +1778,7 @@ int tdbBtreePrev(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) { ret = tdbBtcMoveToPrev(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btree-prev: btc move to prev failed with ret: %d.", ret); return -1; } @@ -1841,7 +1821,7 @@ int tdbBtcMoveToNext(SBTC *pBtc) { ret = tdbBtcMoveDownward(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-move-tonext: btc move downward failed with ret: %d.", ret); return -1; } @@ -1914,7 +1894,7 @@ static int tdbBtcMoveDownward(SBTC *pBtc) { ret = tdbPagerFetchPage(pBtc->pBt->pPager, &pgno, &pBtc->pPage, tdbBtreeInitPage, &((SBtreeInitPageArg){.pBt = pBtc->pBt, .flags = 0}), pBtc->pTxn); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-move-downward: fetch page failed with ret: %d.", ret); return -1; } @@ -1969,7 +1949,10 @@ int tdbBtcDelete(SBTC *pBtc) { int nKey; int ret; - ASSERT(idx >= 0 && idx < nCells); + if (idx < 0 || idx >= nCells) { + tdbError("tdb/btc-delete: idx: %d out of range[%d, %d).", idx, 0, nCells); + return -1; + } // drop the cell on the leaf ret = tdbPagerWrite(pPager, pBtc->pPage); @@ -2007,7 +1990,7 @@ int tdbBtcDelete(SBTC *pBtc) { ret = tdbPageUpdateCell(pPage, idx, pCell, szCell, pBtc->pTxn, pBtc->pBt); if (ret < 0) { tdbOsFree(pCell); - ASSERT(0); + tdbError("tdb/btc-delete: page update cell failed with ret: %d.", ret); return -1; } tdbOsFree(pCell); @@ -2022,7 +2005,7 @@ int tdbBtcDelete(SBTC *pBtc) { ret = tdbBtreeBalance(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-delete: btree balance failed with ret: %d.", ret); return -1; } } @@ -2045,7 +2028,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int szBuf = kLen + nData + 14; pBuf = tdbRealloc(pBtc->pBt->pBuf, pBtc->pBt->pageSize > szBuf ? szBuf : pBtc->pBt->pageSize); if (pBuf == NULL) { - ASSERT(0); + tdbError("tdb/btc-upsert: realloc pBuf failed."); return -1; } pBtc->pBt->pBuf = pBuf; @@ -2054,7 +2037,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int // encode cell ret = tdbBtreeEncodeCell(pBtc->pPage, pKey, kLen, pData, nData, pCell, &szCell, pBtc->pTxn, pBtc->pBt); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-upsert: btree encode cell failed with ret: %d.", ret); return -1; } @@ -2076,7 +2059,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int ret = tdbPageUpdateCell(pBtc->pPage, pBtc->idx, pCell, szCell, pBtc->pTxn, pBtc->pBt); } if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-upsert: page insert/update cell failed with ret: %d.", ret); return -1; } @@ -2084,7 +2067,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int if (pBtc->pPage->nOverflow > 0) { ret = tdbBtreeBalance(pBtc); if (ret < 0) { - ASSERT(0); + tdbError("tdb/btc-upsert: btree balance failed with ret: %d.", ret); return -1; } } @@ -2233,7 +2216,10 @@ int tdbBtcClose(SBTC *pBtc) { if (pBtc->iPage < 0) return 0; for (;;) { - ASSERT(pBtc->pPage); + if (NULL == pBtc->pPage) { + tdbError("tdb/btc-close: null ptr pPage."); + return -1; + } tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pPage, pBtc->pTxn); diff --git a/source/libs/tdb/src/db/tdbDb.c b/source/libs/tdb/src/db/tdbDb.c index c79279c658..952c49db73 100644 --- a/source/libs/tdb/src/db/tdbDb.c +++ b/source/libs/tdb/src/db/tdbDb.c @@ -247,7 +247,10 @@ void tdbEnvRemovePager(TDB *pDb, SPager *pPager) { // remove from the list for (ppPager = &pDb->pgrList; *ppPager && (*ppPager != pPager); ppPager = &((*ppPager)->pNext)) { } - ASSERT(*ppPager == pPager); + if (*ppPager != pPager) { + tdbError("tdb/db: invalid pPager: %p, *ppPager: %p", pPager, *ppPager); + return; + } *ppPager = pPager->pNext; // remove from hash @@ -255,7 +258,10 @@ void tdbEnvRemovePager(TDB *pDb, SPager *pPager) { ppPager = &pDb->pgrHash[hash % pDb->nPgrHash]; for (; *ppPager && *ppPager != pPager; ppPager = &((*ppPager)->pHashNext)) { } - ASSERT(*ppPager == pPager); + if (*ppPager != pPager) { + tdbError("tdb/db: invalid pPager: %p, *ppPager: %p", pPager, *ppPager); + return; + } *ppPager = pPager->pNext; // decrease the counter diff --git a/source/libs/tdb/src/db/tdbPCache.c b/source/libs/tdb/src/db/tdbPCache.c index 4896568c7f..262f3d27e6 100644 --- a/source/libs/tdb/src/db/tdbPCache.c +++ b/source/libs/tdb/src/db/tdbPCache.c @@ -236,10 +236,10 @@ void tdbPCacheInvalidatePage(SPCache *pCache, SPager *pPager, SPgno pgno) { void tdbPCacheRelease(SPCache *pCache, SPage *pPage, TXN *pTxn) { i32 nRef; - ASSERT(pTxn); - - // nRef = tdbUnrefPage(pPage); - // ASSERT(nRef >= 0); + if (!pTxn) { + tdbError("tdb/pcache: null ptr pTxn, release failed."); + return; + } tdbPCacheLock(pCache); nRef = tdbUnrefPage(pPage); @@ -275,7 +275,10 @@ static SPage *tdbPCacheFetchImpl(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) SPage *pPage = NULL; SPage *pPageH = NULL; - ASSERT(pTxn); + if (!pTxn) { + tdbError("tdb/pcache: null ptr pTxn, fetch impl failed."); + return NULL; + } // 1. Search the hash table pPage = pCache->pgHash[tdbPCachePageHash(pPgid) % pCache->nHash]; @@ -315,8 +318,8 @@ static SPage *tdbPCacheFetchImpl(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) if (!pPage && pTxn->xMalloc != NULL) { ret = tdbPageCreate(pCache->szPage, &pPage, pTxn->xMalloc, pTxn->xArg); if (ret < 0 || pPage == NULL) { - // TODO - ASSERT(0); + tdbError("tdb/pcache: ret: %" PRId32 " pPage: %p, page create failed.", ret, pPage); + // TODO: recycle other backup pages return NULL; } @@ -370,7 +373,11 @@ static SPage *tdbPCacheFetchImpl(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) static void tdbPCachePinPage(SPCache *pCache, SPage *pPage) { if (pPage->pLruNext != NULL) { - ASSERT(tdbGetPageRef(pPage) == 0); + int32_t nRef = tdbGetPageRef(pPage); + if (nRef != 0) { + tdbError("tdb/pcache: pin page's ref not zero: %" PRId32, nRef); + return; + } pPage->pLruPrev->pLruNext = pPage->pLruNext; pPage->pLruNext->pLruPrev = pPage->pLruPrev; @@ -383,13 +390,23 @@ static void tdbPCachePinPage(SPCache *pCache, SPage *pPage) { } static void tdbPCacheUnpinPage(SPCache *pCache, SPage *pPage) { - i32 nRef; - - ASSERT(pPage->isLocal); - ASSERT(!pPage->isDirty); - ASSERT(tdbGetPageRef(pPage) == 0); - - ASSERT(pPage->pLruNext == NULL); + i32 nRef = tdbGetPageRef(pPage); + if (nRef != 0) { + tdbError("tdb/pcache: unpin page's ref not zero: %" PRId32, nRef); + return; + } + if (!pPage->isLocal) { + tdbError("tdb/pcache: unpin page's not local: %" PRIu8, pPage->isLocal); + return; + } + if (pPage->isDirty) { + tdbError("tdb/pcache: unpin page's dirty: %" PRIu8, pPage->isDirty); + return; + } + if (NULL != pPage->pLruNext) { + tdbError("tdb/pcache: unpin page's pLruNext not null."); + return; + } tdbTrace("pCache:%p unpin page %p/%d, nPages:%d, pgno:%d, ", pCache, pPage, pPage->id, pCache->nPages, TDB_PAGE_PGNO(pPage)); diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index 50dc8e0a65..f19c3f28c0 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -43,9 +43,15 @@ int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t) u8 *ptr; int size; - ASSERT(xMalloc); + if (!xMalloc) { + tdbError("tdb/page-create: null xMalloc."); + return -1; + } - ASSERT(TDB_IS_PGSIZE_VLD(pageSize)); + if (!TDB_IS_PGSIZE_VLD(pageSize)) { + tdbError("tdb/page-create: invalid pageSize: %d.", pageSize); + return -1; + } *ppPage = NULL; size = pageSize + sizeof(*pPage); @@ -69,16 +75,24 @@ int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t) *ppPage = pPage; - tdbTrace("page/create: %p/%d %p", pPage, pPage->id, xMalloc); + tdbTrace("tdb/page-create: %p/%d %p", pPage, pPage->id, xMalloc); return 0; } int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) { u8 *ptr; - tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree); - ASSERT(!pPage->isDirty); - ASSERT(xFree); + tdbTrace("tdb/page-destroy: %p/%d %p", pPage, pPage->id, xFree); + + if (pPage->isDirty) { + tdbError("tdb/page-destroy: dirty page: %" PRIu8 ".", pPage->isDirty); + return -1; + } + + if (!xFree) { + tdbError("tdb/page-destroy: null xFree."); + return -1; + } for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { tdbTrace("tdbPage/destroy/free ovfl cell: %p/%p", pPage->apOvfl[iOvfl], pPage); @@ -105,7 +119,10 @@ void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell pPage->nOverflow = 0; pPage->xCellSize = xCellSize; - ASSERT((u8 *)pPage->pPageFtr == pPage->pFreeEnd); + if ((u8 *)pPage->pPageFtr != pPage->pFreeEnd) { + tdbError("tdb/page-zero: invalid page, pFreeEnd: %p, pPageFtr: %p", pPage->pFreeEnd, pPage->pPageFtr); + return; + } } void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)) { @@ -121,8 +138,15 @@ void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell pPage->nOverflow = 0; pPage->xCellSize = xCellSize; - ASSERT(pPage->pFreeEnd >= pPage->pFreeStart); - ASSERT(pPage->pFreeEnd - pPage->pFreeStart <= TDB_PAGE_NFREE(pPage)); + if (pPage->pFreeEnd < pPage->pFreeStart) { + tdbError("tdb/page-init: invalid page, pFreeEnd: %p, pFreeStart: %p", pPage->pFreeEnd, pPage->pFreeStart); + return; + } + if (pPage->pFreeEnd - pPage->pFreeStart > TDB_PAGE_NFREE(pPage)) { + tdbError("tdb/page-init: invalid page, pFreeEnd: %p, pFreeStart: %p, NFREE: %d", pPage->pFreeEnd, pPage->pFreeStart, + TDB_PAGE_NFREE(pPage)); + return; + } } int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl) { @@ -132,7 +156,11 @@ int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl int lidx; // local idx SCell *pNewCell; - ASSERT(szCell <= TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData)); + if (szCell > TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData)) { + tdbError("tdb/page-insert-cell: invalid page, szCell: %d, max free: %lu", szCell, + TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData)); + return -1; + } nFree = TDB_PAGE_NFREE(pPage); nCells = TDB_PAGE_NCELLS(pPage); @@ -176,7 +204,11 @@ int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl TDB_PAGE_CELL_OFFSET_AT_SET(pPage, lidx, pNewCell - pPage->pData); TDB_PAGE_NCELLS_SET(pPage, nCells + 1); - ASSERT(pPage->pFreeStart == pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * (nCells + 1)); + if (pPage->pFreeStart != pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * (nCells + 1)) { + tdbError("tdb/page-insert-cell: invalid page, pFreeStart: %p, pCellIdx: %p, nCells: %d", pPage->pFreeStart, + pPage->pCellIdx, nCells); + return -1; + } } for (; iOvfl < pPage->nOverflow; iOvfl++) { @@ -200,7 +232,10 @@ int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt) { nCells = TDB_PAGE_NCELLS(pPage); - ASSERT(idx >= 0 && idx < nCells + pPage->nOverflow); + if (idx < 0 || idx >= nCells + pPage->nOverflow) { + tdbError("tdb/page-drop-cell: idx: %d out of range, nCells: %d, nOvfl: %d.", idx, nCells, pPage->nOverflow); + return -1; + } iOvfl = 0; for (; iOvfl < pPage->nOverflow; iOvfl++) { @@ -228,7 +263,10 @@ int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt) { for (; iOvfl < pPage->nOverflow; iOvfl++) { pPage->aiOvfl[iOvfl]--; - ASSERT(pPage->aiOvfl[iOvfl] > 0); + if (pPage->aiOvfl[iOvfl] <= 0) { + tdbError("tdb/page-drop-cell: invalid ai idx: %d", pPage->aiOvfl[iOvfl]); + return -1; + } } return 0; @@ -240,12 +278,19 @@ void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { pToPage->pFreeStart = pToPage->pPageHdr + (pFromPage->pFreeStart - pFromPage->pPageHdr); pToPage->pFreeEnd = (u8 *)(pToPage->pPageFtr) - ((u8 *)pFromPage->pPageFtr - pFromPage->pFreeEnd); - ASSERT(pToPage->pFreeEnd >= pToPage->pFreeStart); + if (pToPage->pFreeEnd < pToPage->pFreeStart) { + tdbError("tdb/page-copy: invalid to page, pFreeStart: %p, pFreeEnd: %p", pToPage->pFreeStart, pToPage->pFreeEnd); + return; + } memcpy(pToPage->pPageHdr, pFromPage->pPageHdr, pFromPage->pFreeStart - pFromPage->pPageHdr); memcpy(pToPage->pFreeEnd, pFromPage->pFreeEnd, (u8 *)pFromPage->pPageFtr - pFromPage->pFreeEnd); - ASSERT(TDB_PAGE_CCELLS(pToPage) == pToPage->pFreeEnd - pToPage->pData); + if (TDB_PAGE_CCELLS(pToPage) != pToPage->pFreeEnd - pToPage->pData) { + tdbError("tdb/page-copy: invalid to page, cell body: %d, range: %ld", TDB_PAGE_CCELLS(pToPage), + pToPage->pFreeEnd - pToPage->pData); + return; + } delta = (pToPage->pPageHdr - pToPage->pData) - (pFromPage->pPageHdr - pFromPage->pData); if (delta != 0) { @@ -295,8 +340,16 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { *ppCell = NULL; nFree = TDB_PAGE_NFREE(pPage); - ASSERT(nFree >= szCell + TDB_PAGE_OFFSET_SIZE(pPage)); - ASSERT(TDB_PAGE_CCELLS(pPage) == pPage->pFreeEnd - pPage->pData); + if (nFree < szCell + TDB_PAGE_OFFSET_SIZE(pPage)) { + tdbError("tdb/page-allocate: invalid cell size, nFree: %d, szCell: %d, szOffset: %d", nFree, szCell, + TDB_PAGE_OFFSET_SIZE(pPage)); + return -1; + } + if (TDB_PAGE_CCELLS(pPage) != pPage->pFreeEnd - pPage->pData) { + tdbError("tdb/page-allocate: invalid page, cell body: %d, range: %ld", TDB_PAGE_CCELLS(pPage), + pPage->pFreeEnd - pPage->pData); + return -1; + } // 1. Try to allocate from the free space block area if (pPage->pFreeEnd - pPage->pFreeStart >= szCell + TDB_PAGE_OFFSET_SIZE(pPage)) { @@ -308,7 +361,10 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { // 2. Try to allocate from the page free list cellFree = TDB_PAGE_FCELL(pPage); - ASSERT(cellFree == 0 || cellFree >= pPage->pFreeEnd - pPage->pData); + if (cellFree != 0 && cellFree < pPage->pFreeEnd - pPage->pData) { + tdbError("tdb/page-allocate: cellFree: %d, pFreeEnd: %p, pData: %p.", cellFree, pPage->pFreeEnd, pPage->pData); + return -1; + } if (cellFree && pPage->pFreeEnd - pPage->pFreeStart >= TDB_PAGE_OFFSET_SIZE(pPage)) { SCell *pPrevFreeCell = NULL; int szPrevFreeCell; @@ -353,16 +409,30 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { // 3. Try to dfragment and allocate again tdbPageDefragment(pPage); - ASSERT(pPage->pFreeEnd - pPage->pFreeStart == nFree); - ASSERT(nFree == TDB_PAGE_NFREE(pPage)); - ASSERT(pPage->pFreeEnd - pPage->pData == TDB_PAGE_CCELLS(pPage)); + if (pPage->pFreeEnd - pPage->pFreeStart != nFree) { + tdbError("tdb/page-allocate: nFree: %d, pFreeStart: %p, pFreeEnd: %p.", nFree, pPage->pFreeStart, pPage->pFreeEnd); + return -1; + } + if (TDB_PAGE_NFREE(pPage) != nFree) { + tdbError("tdb/page-allocate: nFree: %d, page free: %d.", nFree, TDB_PAGE_NFREE(pPage)); + return -1; + } + if (pPage->pFreeEnd - pPage->pData != TDB_PAGE_CCELLS(pPage)) { + tdbError("tdb/page-allocate: ccells: %d, pFreeStart: %p, pData: %p.", TDB_PAGE_CCELLS(pPage), pPage->pFreeStart, + pPage->pData); + return -1; + } pPage->pFreeEnd -= szCell; pCell = pPage->pFreeEnd; TDB_PAGE_CCELLS_SET(pPage, pPage->pFreeEnd - pPage->pData); _alloc_finish: - ASSERT(pCell); + if (NULL == pCell) { + tdbError("tdb/page-allocate: null ptr pCell."); + return -1; + } + pPage->pFreeStart += TDB_PAGE_OFFSET_SIZE(pPage); TDB_PAGE_NFREE_SET(pPage, nFree - szCell - TDB_PAGE_OFFSET_SIZE(pPage)); *ppCell = pCell; @@ -375,9 +445,18 @@ static int tdbPageFree(SPage *pPage, int idx, SCell *pCell, int szCell) { u8 *dest; u8 *src; - ASSERT(pCell >= pPage->pFreeEnd); - ASSERT(pCell + szCell <= (u8 *)(pPage->pPageFtr)); - ASSERT(pCell == TDB_PAGE_CELL_AT(pPage, idx)); + if (pCell < pPage->pFreeEnd) { + tdbError("tdb/page-free: invalid cell, cell: %p, free end: %p", pCell, pPage->pFreeEnd); + return -1; + } + if (pCell + szCell > (u8 *)(pPage->pPageFtr)) { + tdbError("tdb/page-free: cell crosses page footer, cell: %p, size: %d footer: %p", pCell, szCell, pPage->pFreeEnd); + return -1; + } + if (pCell != TDB_PAGE_CELL_AT(pPage, idx)) { + tdbError("tdb/page-free: cell pos incorrect, cell: %p, pos: %p", pCell, TDB_PAGE_CELL_AT(pPage, idx)); + return -1; + } nFree = TDB_PAGE_NFREE(pPage); @@ -390,7 +469,8 @@ static int tdbPageFree(SPage *pPage, int idx, SCell *pCell, int szCell) { pPage->pPageMethods->setFreeCellInfo(pCell, szCell, cellFree); TDB_PAGE_FCELL_SET(pPage, pCell - pPage->pData); } else { - ASSERT(0); + tdbError("tdb/page-free: invalid cell size: %d", szCell); + return -1; } } @@ -417,7 +497,10 @@ static int tdbPageDefragment(SPage *pPage) { nFree = TDB_PAGE_NFREE(pPage); nCells = TDB_PAGE_NCELLS(pPage); - ASSERT(pPage->pFreeEnd - pPage->pFreeStart < nFree); + if (pPage->pFreeEnd - pPage->pFreeStart >= nFree) { + tdbError("tdb/page-defragment: invalid free range, nFree: %d.", nFree); + return -1; + } // Loop to compact the page content // Here we use an O(n^2) algorithm to do the job since @@ -443,11 +526,19 @@ static int tdbPageDefragment(SPage *pPage) { } } - ASSERT(pCell != NULL); + if (NULL == pCell) { + tdbError("tdb/page-defragment: null ptr pCell."); + return -1; + } szCell = (*pPage->xCellSize)(pPage, pCell, 0, NULL, NULL); - ASSERT(pCell + szCell <= pNextCell); + if (pCell + szCell > pNextCell) { + tdbError("tdb/page-defragment: invalid cell range, pCell: %p, szCell: %d, pNextCell: %p.", pCell, szCell, + pNextCell); + return -1; + } + if (pCell + szCell < pNextCell) { memmove(pNextCell - szCell, pCell, szCell); } @@ -457,7 +548,11 @@ static int tdbPageDefragment(SPage *pPage) { TDB_PAGE_CELL_OFFSET_AT_SET(pPage, idx, pNextCell - pPage->pData); } - ASSERT(pPage->pFreeEnd - pPage->pFreeStart == nFree); + if (pPage->pFreeEnd - pPage->pFreeStart != nFree) { + tdbError("tdb/page-defragment: invalid free range, nFree: %d.", nFree); + return -1; + } + TDB_PAGE_CCELLS_SET(pPage, pPage->pFreeEnd - pPage->pData); TDB_PAGE_FCELL_SET(pPage, 0); @@ -483,39 +578,59 @@ typedef struct { // cellNum static inline int getPageCellNum(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].cellNum; } static inline void setPageCellNum(SPage *pPage, int cellNum) { - ASSERT(cellNum < 65536); + if (cellNum >= 65536) { + tdbError("tdb/page-set-cell-num: invalid cellNum: %d.", cellNum); + return; + } ((SPageHdr *)(pPage->pPageHdr))[0].cellNum = (u16)cellNum; } // cellBody static inline int getPageCellBody(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].cellBody; } static inline void setPageCellBody(SPage *pPage, int cellBody) { - ASSERT(cellBody < 65536); + if (cellBody >= 65536) { + tdbError("tdb/page-set-cell-body: invalid cellBody: %d.", cellBody); + return; + } ((SPageHdr *)(pPage->pPageHdr))[0].cellBody = (u16)cellBody; } // cellFree static inline int getPageCellFree(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].cellFree; } static inline void setPageCellFree(SPage *pPage, int cellFree) { - ASSERT(cellFree < 65536); + if (cellFree >= 65536) { + tdbError("tdb/page-set-cell-free: invalid cellFree: %d.", cellFree); + return; + } ((SPageHdr *)(pPage->pPageHdr))[0].cellFree = (u16)cellFree; } // nFree static inline int getPageNFree(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].nFree; } static inline void setPageNFree(SPage *pPage, int nFree) { - ASSERT(nFree < 65536); + if (nFree >= 65536) { + tdbError("tdb/page-set-nfree: invalid nFree: %d.", nFree); + return; + } ((SPageHdr *)(pPage->pPageHdr))[0].nFree = (u16)nFree; } // cell offset static inline int getPageCellOffset(SPage *pPage, int idx) { - ASSERT(idx >= 0 && idx < getPageCellNum(pPage)); + int cellNum = getPageCellNum(pPage); + if (idx < 0 || idx >= cellNum) { + tdbError("tdb/page-cell-offset: idx: %d out of range[%d, %d).", idx, 0, cellNum); + return -1; + } + return ((u16 *)pPage->pCellIdx)[idx]; } static inline void setPageCellOffset(SPage *pPage, int idx, int offset) { - ASSERT(offset < 65536); + if (offset >= 65536) { + tdbError("tdb/page-set-cell-offset: invalid offset: %d.", offset); + return; + } ((u16 *)pPage->pCellIdx)[idx] = (u16)offset; } @@ -590,7 +705,12 @@ static inline void setLPageNFree(SPage *pPage, int nFree) { // cell offset static inline int getLPageCellOffset(SPage *pPage, int idx) { - ASSERT(idx >= 0 && idx < getLPageCellNum(pPage)); + int cellNum = getLPageCellNum(pPage); + if (idx < 0 || idx >= cellNum) { + tdbError("tdb/lpage-cell-offset: idx: %d out of range[%d, %d).", idx, 0, cellNum); + return -1; + } + return TDB_GET_U24(pPage->pCellIdx + 3 * idx); } diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 8d9933b160..b09e3580a1 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -14,7 +14,7 @@ */ #include "tdbInt.h" - +/* #pragma pack(push, 1) typedef struct { u8 hdrString[16]; @@ -26,7 +26,7 @@ typedef struct { #pragma pack(pop) TDB_STATIC_ASSERT(sizeof(SFileHdr) == 128, "Size of file header is not correct"); - +*/ struct hashset_st { size_t nbits; size_t mask; @@ -234,7 +234,6 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) { int ret; SPage **ppPage; - // ASSERT(pPager->inTran); if (pPage->isDirty) return 0; // ref page one more time so the page will not be release @@ -243,23 +242,8 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) { // Set page as dirty pPage->isDirty = 1; - /* - // Add page to dirty list(TODO: NOT use O(n^2) algorithm) - for (ppPage = &pPager->pDirty; (*ppPage) && TDB_PAGE_PGNO(*ppPage) < TDB_PAGE_PGNO(pPage); - ppPage = &((*ppPage)->pDirtyNext)) { - } - if (*ppPage && TDB_PAGE_PGNO(*ppPage) == TDB_PAGE_PGNO(pPage)) { - tdbUnrefPage(pPage); - - return 0; - } - - ASSERT(*ppPage == NULL || TDB_PAGE_PGNO(*ppPage) > TDB_PAGE_PGNO(pPage)); - pPage->pDirtyNext = *ppPage; - *ppPage = pPage; - */ - tdbTrace("put page: %p %d to dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt); + tdbTrace("tdb/pager-write: put page: %p %d to dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt); tRBTreePut(&pPager->rbt, (SRBTreeNode *)pPage); // Write page to journal if neccessary @@ -327,7 +311,11 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { while ((pNode = tRBTreeIterNext(&iter)) != NULL) { pPage = (SPage *)pNode; - ASSERT(pPage->nOverflow == 0); + if (pPage->nOverflow != 0) { + tdbError("tdb/pager-commit: %p, pPage: %p, ovfl: %d, commit page failed.", pPager, pPage, pPage->nOverflow); + return -1; + } + ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); @@ -652,12 +640,15 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa loadPage = 0; ret = tdbPagerAllocPage(pPager, &pgno); if (ret < 0) { - ASSERT(0); + tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno); return -1; } } - ASSERT(pgno > 0); + if (pgno == 0) { + tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno); + return -1; + } // fetch a page container memcpy(&pgid, pPager->fid, TDB_FILE_ID_LEN); @@ -671,7 +662,7 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa if (!TDB_PAGE_INITIALIZED(pPage)) { ret = tdbPagerInitPage(pPager, pPage, initPage, arg, loadPage); if (ret < 0) { - ASSERT(0); + tdbError("tdb/pager: %p, pPage: %p, init page failed.", pPager, pPage); return -1; } } @@ -679,8 +670,14 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa // printf("thread %" PRId64 " pager fetch page %d pgno %d ppage %p\n", taosGetSelfPthreadId(), pPage->id, // TDB_PAGE_PGNO(pPage), pPage); - ASSERT(TDB_PAGE_INITIALIZED(pPage)); - ASSERT(pPage->pPager == pPager); + if (!TDB_PAGE_INITIALIZED(pPage)) { + tdbError("tdb/pager: %p, pPage: %p, fetch page uninited.", pPager, pPage); + return -1; + } + if (pPage->pPager != pPager) { + tdbError("tdb/pager: %p/%p, fetch page failed.", pPager, pPage->pPager); + return -1; + } *ppgno = pgno; *ppPage = pPage; @@ -722,8 +719,10 @@ int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno) { return -1; } - ASSERT(*ppgno != 0); - + if (*ppgno == 0) { + tdbError("tdb/pager:%p, alloc new page failed.", pPager); + return -1; + } return 0; } @@ -752,7 +751,6 @@ static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage nRead = tdbOsPRead(pPager->fd, pPage->pData, pPage->pageSize, ((i64)pPage->pageSize) * (pgno - 1)); tdbTrace("tdb/pager:%p, pgno:%d, nRead:%" PRId64, pPager, pgno, nRead); if (nRead < pPage->pageSize) { - ASSERT(0); tdbError("tdb/pager:%p, pgno:%d, nRead:%" PRId64 "pgSize:%" PRId32, pPager, pgno, nRead, pPage->pageSize); TDB_UNLOCK_PAGE(pPage); return -1; @@ -763,7 +761,8 @@ static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage ret = (*initPage)(pPage, arg, init); if (ret < 0) { - ASSERT(0); + tdbError("tdb/pager:%p, pgno:%d, nRead:%" PRId64 "pgSize:%" PRId32 " init page failed.", pPager, pgno, nRead, + pPage->pageSize); TDB_UNLOCK_PAGE(pPage); return -1; } @@ -782,7 +781,8 @@ static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage } } } else { - ASSERT(0); + tdbError("tdb/pager:%p, pgno:%d, nRead:%" PRId64 "pgSize:%" PRId32 " lock page failed.", pPager, pgno, nRead, + pPage->pageSize); return -1; } diff --git a/source/libs/tdb/src/db/tdbTable.c b/source/libs/tdb/src/db/tdbTable.c index 2950169979..b6f712e585 100644 --- a/source/libs/tdb/src/db/tdbTable.c +++ b/source/libs/tdb/src/db/tdbTable.c @@ -105,8 +105,6 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF #endif - ASSERT(pPager != NULL); - if (rollback) { tdbPagerRollback(pPager); } else { diff --git a/source/libs/tdb/src/db/tdbTxn.c b/source/libs/tdb/src/db/tdbTxn.c index 055d9c7f98..bc23fdb759 100644 --- a/source/libs/tdb/src/db/tdbTxn.c +++ b/source/libs/tdb/src/db/tdbTxn.c @@ -18,7 +18,10 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg, int flags) { // not support read-committed version at the moment - ASSERT(flags == 0 || flags == (TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED)); + if (flags != 0 && flags != (TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED)) { + tdbError("tdb/txn: invalid txn flags: %" PRId32, flags); + return -1; + } pTxn->flags = flags; pTxn->txnId = txnid; From 4a0968e37242409013ec875c104d07ee6656e1d8 Mon Sep 17 00:00:00 2001 From: chenhaoran Date: Wed, 28 Dec 2022 19:05:33 +0800 Subject: [PATCH 73/82] test:add client-server compatibility test --- tests/system-test/compatibilityAllTest.sh | 46 +++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/system-test/compatibilityAllTest.sh diff --git a/tests/system-test/compatibilityAllTest.sh b/tests/system-test/compatibilityAllTest.sh new file mode 100644 index 0000000000..0e5d17504a --- /dev/null +++ b/tests/system-test/compatibilityAllTest.sh @@ -0,0 +1,46 @@ +#!/bin/bash +ulimit -c unlimited +#======================p1-insert=============== + +python3 ./test.py -f 0-others/taosShell.py +python3 ./test.py -f 0-others/taosShellError.py +python3 ./test.py -f 0-others/taosShellNetChk.py +python3 ./test.py -f 1-insert/alter_database.py +python3 ./test.py -f 1-insert/influxdb_line_taosc_insert.py +python3 ./test.py -f 1-insert/opentsdb_telnet_line_taosc_insert.py +python3 ./test.py -f 1-insert/opentsdb_json_taosc_insert.py +python3 ./test.py -f 1-insert/test_stmt_muti_insert_query.py +python3 ./test.py -f 1-insert/test_stmt_set_tbname_tag.py +python3 ./test.py -f 1-insert/alter_stable.py +python3 ./test.py -f 1-insert/alter_table.py +python3 ./test.py -f 1-insert/boundary.py +python3 ./test.py -f 2-query/top.py +python3 ./test.py -f 2-query/top.py -R +python3 ./test.py -f 2-query/tsbsQuery.py +python3 ./test.py -f 2-query/tsbsQuery.py -R +python3 ./test.py -f 2-query/ttl_comment.py +python3 ./test.py -f 2-query/ttl_comment.py -R +python3 ./test.py -f 2-query/twa.py +python3 ./test.py -f 2-query/twa.py -R +python3 ./test.py -f 2-query/union.py +python3 ./test.py -f 2-query/union.py -R +python3 ./test.py -f 6-cluster/5dnode1mnode.py +python3 ./test.py -f 6-cluster/5dnode2mnode.py -N 5 +python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py -N 5 -M 3 +python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py -N 5 -M 3 -i False +python3 ./test.py -f 6-cluster/5dnode3mnodeStop2Follower.py -N 5 -M 3 +python3 ./test.py -f 6-cluster/5dnode3mnodeStop2Follower.py -N 5 -M 3 -i False +python3 ./test.py -f 6-cluster/5dnode3mnodeStopLoop.py -N 5 -M 3 +python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopDnodeCreateDb.py -N 6 -M 3 +python3 ./test.py -f 6-cluster/5dnode3mnodeSep1VnodeStopDnodeCreateDb.py -N 6 -M 3 -n 3 +python3 ./test.py -f 7-tmq/subscribeStb4.py +python3 ./test.py -f 7-tmq/db.py +python3 ./test.py -f 7-tmq/tmqError.py +python3 ./test.py -f 7-tmq/schema.py +python3 ./test.py -f 7-tmq/stbFilter.py +python3 ./test.py -f 7-tmq/tmqCheckData.py +python3 ./test.py -f 7-tmq/tmqCheckData1.py +python3 ./test.py -f 7-tmq/tmqConsumerGroup.py +python3 ./test.py -f 7-tmq/tmqShow.py +python3 ./test.py -f 7-tmq/tmqAlterSchema.py +python3 ./test.py -f 99-TDcase/TD-20582.py \ No newline at end of file From 47930b02117f702dfcc6f7b713f5642a89a52055 Mon Sep 17 00:00:00 2001 From: chenhaoran Date: Wed, 28 Dec 2022 19:06:27 +0800 Subject: [PATCH 74/82] test:add client-server compatibility test --- tests/system-test/compatibilityAllTest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/compatibilityAllTest.sh b/tests/system-test/compatibilityAllTest.sh index 0e5d17504a..8b599afd86 100644 --- a/tests/system-test/compatibilityAllTest.sh +++ b/tests/system-test/compatibilityAllTest.sh @@ -43,4 +43,4 @@ python3 ./test.py -f 7-tmq/tmqCheckData1.py python3 ./test.py -f 7-tmq/tmqConsumerGroup.py python3 ./test.py -f 7-tmq/tmqShow.py python3 ./test.py -f 7-tmq/tmqAlterSchema.py -python3 ./test.py -f 99-TDcase/TD-20582.py \ No newline at end of file +python3 ./test.py -f 99-TDcase/TD-20582.py From 9e989691c3046d1ac83580ebaa2d736cc948566d Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 28 Dec 2022 19:10:12 +0800 Subject: [PATCH 75/82] fix: set dnode online after status check success --- source/dnode/mnode/impl/src/mndDnode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 58ae85a628..d7b16c2c8e 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -397,8 +397,6 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { bool reboot = (pDnode->rebootTime != statusReq.rebootTime); bool needCheck = !online || dnodeChanged || reboot; - pDnode->accessTimes++; - pDnode->lastAccessTime = curMs; const STraceId *trace = &pReq->info.traceId; mGTrace("dnode:%d, status received, accessTimes:%d check:%d online:%d reboot:%d changed:%d statusSeq:%d", pDnode->id, pDnode->accessTimes, needCheck, online, reboot, dnodeChanged, statusReq.statusSeq); @@ -534,6 +532,8 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { pReq->info.rsp = pHead; } + pDnode->accessTimes++; + pDnode->lastAccessTime = curMs; code = 0; _OVER: From 6929848ed3a978464ec894d13b2e406ed8cf5e20 Mon Sep 17 00:00:00 2001 From: chenhaoran Date: Wed, 28 Dec 2022 19:18:56 +0800 Subject: [PATCH 76/82] test:add client-server compatibility test --- tests/system-test/compatibilityAllTest.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tests/system-test/compatibilityAllTest.sh diff --git a/tests/system-test/compatibilityAllTest.sh b/tests/system-test/compatibilityAllTest.sh old mode 100644 new mode 100755 From cc29662bf98fe701d43ec219353e71348fbe735e Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 28 Dec 2022 20:09:02 +0800 Subject: [PATCH 77/82] fix: adjust weight of mnode while balance --- source/dnode/mnode/impl/src/mndVgroup.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index c6b0fe49a0..2550c68cfb 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -480,13 +480,14 @@ SArray *mndBuildDnodesArray(SMnode *pMnode, int32_t exceptDnodeId) { static int32_t mndCompareDnodeId(int32_t *dnode1Id, int32_t *dnode2Id) { return *dnode1Id >= *dnode2Id ? 1 : 0; } -static float mndGetDnodeScore(SDnodeObj *pDnode) { - return ((float)pDnode->numOfVnodes + (float)pDnode->numOfOtherNodes * 0.9) / pDnode->numOfSupportVnodes; +static float mndGetDnodeScore(SDnodeObj *pDnode, int32_t additionDnodes, float ratio) { + float totalDnodes = pDnode->numOfVnodes + (float)pDnode->numOfOtherNodes * ratio + additionDnodes; + return totalDnodes / pDnode->numOfSupportVnodes; } static int32_t mndCompareDnodeVnodes(SDnodeObj *pDnode1, SDnodeObj *pDnode2) { - float d1Score = mndGetDnodeScore(pDnode1); - float d2Score = mndGetDnodeScore(pDnode2); + float d1Score = mndGetDnodeScore(pDnode1, 0, 0.9); + float d2Score = mndGetDnodeScore(pDnode2, 0, 0.9); return d1Score >= d2Score ? 1 : 0; } @@ -509,7 +510,7 @@ static int32_t mndGetAvailableDnode(SMnode *pMnode, SDbObj *pDb, SVgObj *pVgroup taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); for (int32_t i = 0; i < (int32_t)taosArrayGetSize(pArray); ++i) { SDnodeObj *pDnode = taosArrayGet(pArray, i); - mDebug("dnode:%d, score:%f", pDnode->id, mndGetDnodeScore(pDnode)); + mDebug("dnode:%d, score:%f", pDnode->id, mndGetDnodeScore(pDnode, 0, 0.9)); } int32_t size = taosArrayGetSize(pArray); @@ -891,7 +892,7 @@ static int32_t mndAddVnodeToVgroup(SMnode *pMnode, STrans *pTrans, SVgObj *pVgro taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) { SDnodeObj *pDnode = taosArrayGet(pArray, i); - mInfo("dnode:%d, equivalent vnodes:%d", pDnode->id, pDnode->numOfVnodes); + mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes); } SVnodeGid *pVgid = &pVgroup->vnodeGid[pVgroup->replica]; @@ -951,7 +952,7 @@ static int32_t mndRemoveVnodeFromVgroup(SMnode *pMnode, STrans *pTrans, SVgObj * taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) { SDnodeObj *pDnode = taosArrayGet(pArray, i); - mInfo("dnode:%d, equivalent vnodes:%d", pDnode->id, pDnode->numOfVnodes); + mInfo("dnode:%d, equivalent vnodes:%d others:%d", pDnode->id, pDnode->numOfVnodes, pDnode->numOfOtherNodes); } int32_t code = -1; @@ -1986,16 +1987,16 @@ static int32_t mndBalanceVgroup(SMnode *pMnode, SRpcMsg *pReq, SArray *pArray) { taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) { SDnodeObj *pDnode = taosArrayGet(pArray, i); - mInfo("dnode:%d, equivalent vnodes:%d support:%d, score:%f", pDnode->id, pDnode->numOfVnodes, - pDnode->numOfSupportVnodes, (float)pDnode->numOfVnodes / pDnode->numOfSupportVnodes); + mInfo("dnode:%d, equivalent vnodes:%d others:%d support:%d, score:%f", pDnode->id, pDnode->numOfVnodes, + pDnode->numOfSupportVnodes, pDnode->numOfOtherNodes, mndGetDnodeScore(pDnode, 0, 1)); } SDnodeObj *pSrc = taosArrayGet(pArray, taosArrayGetSize(pArray) - 1); SDnodeObj *pDst = taosArrayGet(pArray, 0); - float srcScore = (float)(pSrc->numOfVnodes - 1) / pSrc->numOfSupportVnodes; - float dstScore = (float)(pDst->numOfVnodes + 1) / pDst->numOfSupportVnodes; - mInfo("trans:%d, after balance, src dnode:%d score:%f, dst dnode:%d score:%f", pTrans->id, pSrc->id, srcScore, + float srcScore = mndGetDnodeScore(pSrc, -1, 1); + float dstScore = mndGetDnodeScore(pDst, 1, 1); + mInfo("trans:%d, after balance, src dnode:%d score:%f, dst dnode:%d score:%f", pTrans->id, pSrc->id, dstScore, pDst->id, dstScore); if (srcScore > dstScore - 0.000001) { From bffe45c79b690cd82b9a53fe3ff3630c4c4ae613 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 28 Dec 2022 20:14:52 +0800 Subject: [PATCH 78/82] test: adjust some balance case --- tests/script/tsim/dnode/balance1.sim | 4 ++-- tests/script/tsim/dnode/balance2.sim | 4 ++-- tests/script/tsim/dnode/balance3.sim | 8 ++++---- tests/script/tsim/dnode/balancex.sim | 4 ++-- tests/script/tsim/dnode/vnode_clean.sim | 16 ++++++++-------- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/script/tsim/dnode/balance1.sim b/tests/script/tsim/dnode/balance1.sim index 2b0154c8e5..3bb9c4f3eb 100644 --- a/tests/script/tsim/dnode/balance1.sim +++ b/tests/script/tsim/dnode/balance1.sim @@ -74,10 +74,10 @@ sql insert into d2.t2 values(now+5s, 21) sql select * from information_schema.ins_dnodes print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(2)[2] -if $data(1)[2] != 0 then +if $data(1)[2] != 1 then return -1 endi -if $data(2)[2] != 2 then +if $data(2)[2] != 1 then return -1 endi diff --git a/tests/script/tsim/dnode/balance2.sim b/tests/script/tsim/dnode/balance2.sim index 3f5a42d4d3..9eeb7e1251 100644 --- a/tests/script/tsim/dnode/balance2.sim +++ b/tests/script/tsim/dnode/balance2.sim @@ -161,13 +161,13 @@ print dnode1 openVnodes $data(1)[2] print dnode3 openVnodes $data(3)[2] print dnode4 openVnodes $data(4)[2] print dnode5 openVnodes $data(5)[2] -if $data(1)[2] != 2 then +if $data(1)[2] != 3 then return -1 endi if $data(3)[2] != 3 then return -1 endi -if $data(4)[2] != 4 then +if $data(4)[2] != 3 then return -1 endi if $data(5)[2] != 3 then diff --git a/tests/script/tsim/dnode/balance3.sim b/tests/script/tsim/dnode/balance3.sim index ce328f10bd..2fb284b466 100644 --- a/tests/script/tsim/dnode/balance3.sim +++ b/tests/script/tsim/dnode/balance3.sim @@ -127,10 +127,10 @@ print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(2)[2] print dnode3 openVnodes $data(3)[2] print dnode4 openVnodes $data(4)[2] -if $data(1)[2] != 0 then +if $data(1)[2] != 1 then return -1 endi -if $data(2)[2] != 2 then +if $data(2)[2] != 1 then return -1 endi if $data(3)[2] != 2 then @@ -228,10 +228,10 @@ print dnode1 openVnodes $data(1)[2] print dnode3 openVnodes $data(3)[2] print dnode4 openVnodes $data(4)[2] print dnode5 openVnodes $data(5)[2] -if $data(1)[2] != 1 then +if $data(1)[2] != 2 then return -1 endi -if $data(3)[2] != 3 then +if $data(3)[2] != 2 then return -1 endi if $data(4)[2] != 3 then diff --git a/tests/script/tsim/dnode/balancex.sim b/tests/script/tsim/dnode/balancex.sim index 6b16e8b569..0cfc64a954 100644 --- a/tests/script/tsim/dnode/balancex.sim +++ b/tests/script/tsim/dnode/balancex.sim @@ -142,10 +142,10 @@ print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(2)[2] print dnode2 openVnodes $data(3)[2] print dnode2 openVnodes $data(4)[2] -if $data(1)[2] != 0 then +if $data(1)[2] != 1 then return -1 endi -if $data(2)[2] != 2 then +if $data(2)[2] != 1 then return -1 endi if $data(3)[2] != 2 then diff --git a/tests/script/tsim/dnode/vnode_clean.sim b/tests/script/tsim/dnode/vnode_clean.sim index 112e5f28a4..ba1d083c68 100644 --- a/tests/script/tsim/dnode/vnode_clean.sim +++ b/tests/script/tsim/dnode/vnode_clean.sim @@ -71,10 +71,10 @@ sql insert into d2.t2 values(now+5s, 21) sql select * from information_schema.ins_dnodes print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(2)[2] -if $data(1)[2] != 0 then +if $data(1)[2] != 1 then return -1 endi -if $data(2)[2] != 2 then +if $data(2)[2] != 1 then return -1 endi @@ -181,10 +181,10 @@ sql select * from information_schema.ins_dnodes print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(3)[2] print dnode2 openVnodes $data(4)[2] -if $data(1)[2] != 0 then +if $data(1)[2] != 1 then return -1 endi -if $data(3)[2] != 2 then +if $data(3)[2] != 1 then return -1 endi if $data(4)[2] != 1 then @@ -204,10 +204,10 @@ sql select * from information_schema.ins_dnodes print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(3)[2] print dnode2 openVnodes $data(4)[2] -if $data(1)[2] != 0 then +if $data(1)[2] != 1 then return -1 endi -if $data(3)[2] != 2 then +if $data(3)[2] != 1 then return -1 endi if $data(4)[2] != 2 then @@ -220,13 +220,13 @@ sql select * from information_schema.ins_dnodes print dnode1 openVnodes $data(1)[2] print dnode2 openVnodes $data(3)[2] print dnode2 openVnodes $data(4)[2] -if $data(1)[2] != 1 then +if $data(1)[2] != 2 then return -1 endi if $data(3)[2] != null then return -1 endi -if $data(4)[2] != 3 then +if $data(4)[2] != 2 then return -1 endi From 853b93fbe12d8165fa10756bca006d5cfe7b2733 Mon Sep 17 00:00:00 2001 From: xinsheng Ren <285808407@qq.com> Date: Thu, 29 Dec 2022 09:21:20 +0800 Subject: [PATCH 79/82] Fix/xsren/td 20817 mac fqdn (#19078) * fix/TD-20817-mac-fqdn use localhostname * fix/TD-20817-mac-fqdn redundant codes * fix/TD-20817-mac-fqdn,replace command with api Co-authored-by: facetosea <25808407@qq.com> Co-authored-by: Shuduo Sang --- include/os/osSysinfo.h | 1 + source/os/CMakeLists.txt | 4 +++ source/os/src/osSocket.c | 20 +++++++-------- source/os/src/osSysinfo.c | 51 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index 7765a60f88..dbe4d6801e 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -70,6 +70,7 @@ typedef struct { SysNameInfo taosGetSysNameInfo(); bool taosCheckCurrentInDll(); +int taosGetlocalhostname(char *hostname, size_t maxLen); #ifdef __cplusplus } diff --git a/source/os/CMakeLists.txt b/source/os/CMakeLists.txt index b7cb20896b..0d85a8bec4 100644 --- a/source/os/CMakeLists.txt +++ b/source/os/CMakeLists.txt @@ -44,6 +44,10 @@ if(TD_WINDOWS) os PUBLIC ws2_32 iconv msvcregex wcwidth winmm crashdump ) elseif(TD_DARWIN_64) + find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation) + target_link_libraries(os PUBLIC ${CORE_FOUNDATION_FRAMEWORK}) + find_library(SYSTEM_CONFIGURATION_FRAMEWORK SystemConfiguration) + target_link_libraries(os PUBLIC ${SYSTEM_CONFIGURATION_FRAMEWORK}) target_link_libraries( os PUBLIC dl m iconv ) diff --git a/source/os/src/osSocket.c b/source/os/src/osSocket.c index db2a9937b5..6611a937f2 100644 --- a/source/os/src/osSocket.c +++ b/source/os/src/osSocket.c @@ -988,7 +988,7 @@ int32_t taosGetFqdn(char *fqdn) { #endif char hostname[1024]; hostname[1023] = '\0'; - if (gethostname(hostname, 1023) == -1) { + if (taosGetlocalhostname(hostname, 1023) == -1) { #ifdef WINDOWS printf("failed to get hostname, reason:%s\n", strerror(WSAGetLastError())); #else @@ -998,30 +998,28 @@ int32_t taosGetFqdn(char *fqdn) { return -1; } - struct addrinfo hints = {0}; - struct addrinfo *result = NULL; #ifdef __APPLE__ // on macosx, hostname -f has the form of xxx.local // which will block getaddrinfo for a few seconds if AI_CANONNAME is set // thus, we choose AF_INET (ipv4 for the moment) to make getaddrinfo return // immediately - hints.ai_family = AF_INET; + // hints.ai_family = AF_INET; + strcpy(fqdn, hostname); + strcpy(fqdn+strlen(hostname), ".local"); #else // __APPLE__ + struct addrinfo hints = {0}; + struct addrinfo *result = NULL; hints.ai_flags = AI_CANONNAME; -#endif // __APPLE__ + int32_t ret = getaddrinfo(hostname, NULL, &hints, &result); if (!result) { fprintf(stderr, "failed to get fqdn, code:%d, reason:%s\n", ret, gai_strerror(ret)); return -1; } - -#ifdef __APPLE__ - // refer to comments above - strcpy(fqdn, hostname); -#else // __APPLE__ strcpy(fqdn, result->ai_canonname); -#endif // __APPLE__ freeaddrinfo(result); +#endif // __APPLE__ + return 0; } diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index e1abe84841..6c9bf40e4d 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -98,6 +98,9 @@ LONG WINAPI exceptionHandler(LPEXCEPTION_POINTERS exception); #include #include #include +#include +#include +#include #else @@ -1007,6 +1010,11 @@ SysNameInfo taosGetSysNameInfo() { tstrncpy(info.machine, uts.machine, sizeof(info.machine)); } + char localHostName[512]; + taosGetlocalhostname(localHostName, 512); + TdCmdPtr pCmd = taosOpenCmd("scutil --get LocalHostName"); + tstrncpy(info.nodename, localHostName, sizeof(info.nodename)); + return info; #else SysNameInfo info = {0}; @@ -1042,3 +1050,46 @@ bool taosCheckCurrentInDll() { return false; #endif } + +#ifdef _TD_DARWIN_64 +int taosGetMaclocalhostnameByCommand(char *hostname, size_t maxLen) { + TdCmdPtr pCmd = taosOpenCmd("scutil --get LocalHostName"); + if (pCmd != NULL) { + if (taosGetsCmd(pCmd, maxLen - 1, hostname) > 0) { + int len = strlen(hostname); + if (hostname[len - 1] == '\n') { + hostname[len - 1] = '\0'; + } + return 0; + } + taosCloseCmd(&pCmd); + } + return -1; +} + +int getMacLocalHostNameBySCD(char *hostname, size_t maxLen) { + SCDynamicStoreRef store = SCDynamicStoreCreate(NULL, CFSTR(""), NULL, NULL); + CFStringRef hostname_cfstr = SCDynamicStoreCopyLocalHostName(store); + if (hostname_cfstr != NULL) { + CFStringGetCString(hostname_cfstr, hostname, maxLen - 1, kCFStringEncodingMacRoman); + CFRelease(hostname_cfstr); + } else { + return -1; + } + CFRelease(store); + return 0; +} +#endif + +int taosGetlocalhostname(char *hostname, size_t maxLen) { +#ifdef _TD_DARWIN_64 + int res = getMacLocalHostNameBySCD(hostname, maxLen); + if (res != 0) { + return taosGetMaclocalhostnameByCommand(hostname, maxLen); + } else { + return 0; + } +#else + return gethostname(hostname, maxLen); +#endif +} From bd38f600c09ca3873b76d5561caa800200dc3187 Mon Sep 17 00:00:00 2001 From: xinsheng Ren <285808407@qq.com> Date: Thu, 29 Dec 2022 09:21:54 +0800 Subject: [PATCH 80/82] chore/enterprise condition fix (#19226) Co-authored-by: facetosea <25808407@qq.com> --- cmake/cmake.define | 1 + packaging/tools/make_install.bat | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/cmake.define b/cmake/cmake.define index d32200bb91..a739a77d21 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_VERBOSE_MAKEFILE OFF) +set(TD_BUILD_TAOSA_INTERNAL FALSE) #set output directory SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/build/lib) diff --git a/packaging/tools/make_install.bat b/packaging/tools/make_install.bat index 8853a014f9..bf7418ad79 100644 --- a/packaging/tools/make_install.bat +++ b/packaging/tools/make_install.bat @@ -58,7 +58,7 @@ if exist %binary_dir%\\build\\lib\\taosws.dll ( if exist %binary_dir%\\build\\bin\\taosdump.exe ( copy %binary_dir%\\build\\bin\\taosdump.exe %target_dir% > nul ) -if %Enterprise% ( +if %Enterprise% == TRUE ( if exist %binary_dir%\\build\\bin\\taosx.exe ( copy %binary_dir%\\build\\bin\\taosx.exe %target_dir% > nul ) From 1989fb8fadf475026eb804540459f254a48b670c Mon Sep 17 00:00:00 2001 From: Alex Duan <51781608+DuanKuanJun@users.noreply.github.com> Date: Thu, 29 Dec 2022 10:05:44 +0800 Subject: [PATCH 81/82] Update 20-keywords.md add alive to keywords --- docs/zh/12-taos-sql/20-keywords.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/zh/12-taos-sql/20-keywords.md b/docs/zh/12-taos-sql/20-keywords.md index 8013698fce..8fd704ef55 100644 --- a/docs/zh/12-taos-sql/20-keywords.md +++ b/docs/zh/12-taos-sql/20-keywords.md @@ -18,6 +18,7 @@ description: TDengine 保留关键字的详细列表 - ADD - AFTER - AGGREGATE +- ALIVE - ALL - ALTER - ANALYZE From 70b5a10ab0fa44a962ced515bbc5f491b1838cc5 Mon Sep 17 00:00:00 2001 From: Alex Duan <51781608+DuanKuanJun@users.noreply.github.com> Date: Thu, 29 Dec 2022 10:08:47 +0800 Subject: [PATCH 82/82] Update 20-keywords.md ADD ALIVE TO RESERVED KEYWORDS --- docs/en/12-taos-sql/20-keywords.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/12-taos-sql/20-keywords.md b/docs/en/12-taos-sql/20-keywords.md index 4b479b866b..23f85947e3 100644 --- a/docs/en/12-taos-sql/20-keywords.md +++ b/docs/en/12-taos-sql/20-keywords.md @@ -17,6 +17,7 @@ The following list shows all reserved keywords: - ADD - AFTER - AGGREGATE +- ALIVE - ALL - ALTER - ANALYZE