From 09e5ca7a0f8a3574efc0dc9fbabddf039003fac1 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 15:34:55 +0800 Subject: [PATCH 01/35] handle fastfail --- source/libs/transport/src/transCli.c | 85 +++++++++++++--------------- source/libs/transport/src/transSvr.c | 4 +- 2 files changed, 42 insertions(+), 47 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 111742a6f4..d8ea21c335 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -579,7 +579,7 @@ static void addConnToPool(void* pool, SCliConn* conn) { QUEUE_PUSH(&conn->list->conns, &conn->q); conn->list->size += 1; - if (conn->list->size >= 50) { + if (conn->list->size >= 250) { STaskArg* arg = taosMemoryCalloc(1, sizeof(STaskArg)); arg->param1 = conn; arg->param2 = thrd; @@ -882,47 +882,50 @@ void cliSend(SCliConn* pConn) { _RETURN: return; } +static void cliHandleFastFail(SCliConn* pConn, int status) { + SCliThrd* pThrd = pConn->hostThrd; + STrans* pTransInst = pThrd->pTransInst; + SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); + STraceId* trace = &pMsg->msg.info.traceId; + + tGError("%s msg %s failed to send, conn %p failed to connect to %s:%d, reason: %s", CONN_GET_INST_LABEL(pConn), + pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, pConn->port, uv_strerror(status)); + uv_timer_stop(pConn->timer); + pConn->timer->data = NULL; + taosArrayPush(pThrd->timerList, &pConn->timer); + pConn->timer = NULL; + + if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && + (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { + char* ip = pConn->ip; + uint32_t port = pConn->port; + char key[TSDB_FQDN_LEN + 64] = {0}; + CONN_CONSTRUCT_HASH_KEY(key, ip, port); + + SFailFastItem* item = taosHashGet(pThrd->failFastCache, key, strlen(key)); + int64_t cTimestamp = taosGetTimestampMs(); + if (item != NULL) { + int32_t elapse = cTimestamp - item->timestamp; + if (elapse >= 0 && elapse <= pTransInst->failFastInterval) { + item->count++; + } else { + item->count = 1; + item->timestamp = cTimestamp; + } + } else { + SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; + taosHashPut(pThrd->failFastCache, key, strlen(key), &item, sizeof(SFailFastItem)); + } + } + cliHandleExcept(pConn); +} void cliConnCb(uv_connect_t* req, int status) { SCliConn* pConn = req->data; SCliThrd* pThrd = pConn->hostThrd; - if (pConn->timer != NULL) { - uv_timer_stop(pConn->timer); - pConn->timer->data = NULL; - taosArrayPush(pThrd->timerList, &pConn->timer); - pConn->timer = NULL; - } - if (status != 0) { - SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); - STrans* pTransInst = pThrd->pTransInst; - - tError("%s msg %s failed to send, conn %p failed to connect to %s:%d, reason: %s", CONN_GET_INST_LABEL(pConn), - pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, pConn->port, uv_strerror(status)); - if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && - (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { - char* ip = pConn->ip; - uint32_t port = pConn->port; - char key[TSDB_FQDN_LEN + 64] = {0}; - CONN_CONSTRUCT_HASH_KEY(key, ip, port); - - SFailFastItem* item = taosHashGet(pThrd->failFastCache, key, strlen(key)); - int64_t cTimestamp = taosGetTimestampMs(); - if (item != NULL) { - int32_t elapse = cTimestamp - item->timestamp; - if (elapse >= 0 && elapse <= pTransInst->failFastInterval) { - item->count++; - } else { - item->count = 1; - item->timestamp = cTimestamp; - } - } else { - SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; - taosHashPut(pThrd->failFastCache, key, strlen(key), &item, sizeof(SFailFastItem)); - } - } - cliHandleExcept(pConn); + cliHandleFastFail(pConn, status); return; } struct sockaddr peername, sockname; @@ -1163,15 +1166,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { ret = uv_tcp_connect(&conn->connReq, (uv_tcp_t*)(conn->stream), (const struct sockaddr*)&addr, cliConnCb); if (ret != 0) { - tGError("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, - uv_err_name(ret)); - - uv_timer_stop(conn->timer); - conn->timer->data = NULL; - taosArrayPush(pThrd->timerList, &conn->timer); - conn->timer = NULL; - - cliHandleExcept(conn); + cliHandleFastFail(conn, ret); return; } uv_timer_start(conn->timer, cliConnTimeout, TRANS_CONN_TIMEOUT, 0); diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index fa8929f7d9..eecd260d35 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -246,11 +246,11 @@ static bool uvHandleReq(SSvrConn* pConn) { } } else { if (cost >= EXCEPTION_LIMIT_US) { - tGWarn("%s conn %p %s received from %s, local info:%s, len:%d, resp:%d, code:%d, cost:%dus, recv exception", + tGWarn("%s conn %p %s received from %s, local info:%s, len:%d, noResp:%d, code:%d, cost:%dus, recv exception", transLabel(pTransInst), pConn, TMSG_INFO(transMsg.msgType), pConn->dst, pConn->src, msgLen, pHead->noResp, transMsg.code, (int)(cost)); } else { - tGDebug("%s conn %p %s received from %s, local info:%s, len:%d, resp:%d, code:%d, cost:%dus", + tGDebug("%s conn %p %s received from %s, local info:%s, len:%d, noResp:%d, code:%d, cost:%dus", transLabel(pTransInst), pConn, TMSG_INFO(transMsg.msgType), pConn->dst, pConn->src, msgLen, pHead->noResp, transMsg.code, (int)(cost)); } From 009b6a61fa7d252d2cdc8a2c8bdba12ca776f376 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 17:25:37 +0800 Subject: [PATCH 02/35] enh: limit tcp session between cluster --- include/util/taoserror.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index d8eecdfc64..75bdf81a27 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -67,6 +67,10 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) // #define TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0020) // "Vgroup could not be connected" #define TSDB_CODE_RPC_SOMENODE_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0021) // +#define TSDB_CODE_RPC_MAX_SESSIONS TAOS_DEF_ERROR_CODE(0, 0x0022) // + + + //common & util #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) // From e44704b20ed534c2c204029fb3b5f8bc151489d2 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 17:26:11 +0800 Subject: [PATCH 03/35] opt: opt tag index --- include/libs/transport/trpc.h | 2 ++ source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 ++ source/libs/transport/inc/transportInt.h | 1 + source/libs/transport/src/trans.c | 1 + source/libs/transport/src/transCli.c | 19 ++++++++++++++++++- source/util/src/terror.c | 1 + 6 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index de3c2a9f52..ff68b72fc2 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -112,6 +112,8 @@ typedef struct SRpcInit { // fail fast fp RpcFFfp ffp; + int32_t connLimit; + void *parent; } SRpcInit; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index dcb63f6524..dc539ac15e 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -284,6 +284,8 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.failFastThreshold = 3; // failed threshold rpcInit.ffp = dmFailFastFp; + rpcInit.connLimit = 7500; + pTrans->clientRpc = rpcOpen(&rpcInit); if (pTrans->clientRpc == NULL) { dError("failed to init dnode rpc client"); diff --git a/source/libs/transport/inc/transportInt.h b/source/libs/transport/inc/transportInt.h index 2db4a72795..92477bb514 100644 --- a/source/libs/transport/inc/transportInt.h +++ b/source/libs/transport/inc/transportInt.h @@ -64,6 +64,7 @@ typedef struct { void (*destroyFp)(void* ahandle); bool (*failFastFp)(tmsg_t msgType); + int32_t connLimit; int index; void* parent; void* tcphandle; // returned handle from TCP initialization diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index 47b1ac5ca7..61ca9743b3 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -67,6 +67,7 @@ void* rpcOpen(const SRpcInit* pInit) { pRpc->startTimer = pInit->tfp; pRpc->destroyFp = pInit->dfp; pRpc->failFastFp = pInit->ffp; + pRpc->connLimit = pInit->connLimit; pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads; if (pRpc->numOfThreads <= 0) { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d8ea21c335..dfbc8a5af2 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -80,6 +80,7 @@ typedef struct SCliThrd { uint64_t nextTimeout; // next timeout void* pTransInst; // + int connCount; void (*destroyAhandleFp)(void* ahandle); SHashObj* fqdn2ipCache; SCvtAddr cvtAddr; @@ -671,7 +672,6 @@ static SCliConn* cliCreateConn(SCliThrd* pThrd) { conn->stream = (uv_stream_t*)taosMemoryMalloc(sizeof(uv_tcp_t)); uv_tcp_init(pThrd->loop, (uv_tcp_t*)(conn->stream)); conn->stream->data = conn; - // transSetConnOption((uv_tcp_t*)conn->stream); uv_timer_t* timer = taosArrayGetSize(pThrd->timerList) > 0 ? *(uv_timer_t**)taosArrayPop(pThrd->timerList) : NULL; if (timer == NULL) { @@ -694,6 +694,7 @@ static SCliConn* cliCreateConn(SCliThrd* pThrd) { conn->broken = 0; transRefCliHandle(conn); + atomic_add_fetch_32(&pThrd->connCount, 1); allocConnRef(conn, false); return conn; @@ -738,6 +739,8 @@ static void cliDestroy(uv_handle_t* handle) { conn->timer = NULL; } + atomic_sub_fetch_32(&pThrd->connCount, 1); + transReleaseExHandle(transGetRefMgt(), conn->refId); transRemoveExHandle(transGetRefMgt(), conn->refId); taosMemoryFree(conn->ip); @@ -1861,6 +1864,13 @@ int transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STran return TSDB_CODE_RPC_BROKEN_LINK; } + // read only + if (pTransInst->connLimit != 0 && atomic_load_32(&pThrd->connCount) >= pTransInst->connLimit) { + transFreeMsg(pReq->pCont); + transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); + return TSDB_CODE_RPC_MAX_SESSIONS; + } + TRACE_SET_MSGID(&pReq->info.traceId, tGenIdPI64()); STransConnCtx* pCtx = taosMemoryCalloc(1, sizeof(STransConnCtx)); @@ -1902,6 +1912,13 @@ int transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransMs transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); return TSDB_CODE_RPC_BROKEN_LINK; } + // not limit sync req + // read only + // if (pTransInst->connLimit != 0 && atomic_load_32(&pThrd->connCount) >= pTransInst->connLimit) { + // transFreeMsg(pReq->pCont); + // transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); + // return TSDB_CODE_RPC_MAX_SESSIONS; + //} tsem_t* sem = taosMemoryCalloc(1, sizeof(tsem_t)); tsem_init(sem, 0, 0); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 57b1998155..c07fa88af5 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -52,6 +52,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_PORT_EADDRINUSE, "Port already in use") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_BROKEN_LINK, "Conn is broken") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_TIMEOUT, "Conn read timeout") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_SOMENODE_NOT_CONNECTED, "some vnode/qnode/mnode(s) out of service") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_MAX_SESSIONS, "rpc open too many session") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Client and server's time is not synchronized") From b8dfc7144624fcfbbea1ae8f0c3c35a2bafc4bd7 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 19:24:48 +0800 Subject: [PATCH 04/35] handle too many session --- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 4 +- source/libs/transport/src/transCli.c | 93 +++++++++++-------- 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index dc539ac15e..bd08eda954 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -280,11 +280,11 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.retryMaxInterval = tsRedirectMaxPeriod; rpcInit.retryMaxTimouet = tsMaxRetryWaitTime; - rpcInit.failFastInterval = 1000; // interval threshold(ms) + rpcInit.failFastInterval = 5000; // interval threshold(ms) rpcInit.failFastThreshold = 3; // failed threshold rpcInit.ffp = dmFailFastFp; - rpcInit.connLimit = 7500; + rpcInit.connLimit = 3000; pTrans->clientRpc = rpcOpen(&rpcInit); if (pTrans->clientRpc == NULL) { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index dfbc8a5af2..4d7b6b5b2b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -40,9 +40,8 @@ typedef struct SCliConn { bool broken; // link broken or not ConnStatus status; // - int64_t refId; - char* ip; - uint32_t port; + int64_t refId; + char* ip; SDelayTask* task; @@ -86,6 +85,7 @@ typedef struct SCliThrd { SCvtAddr cvtAddr; SHashObj* failFastCache; + SHashObj* connLimit; SCliMsg* stopMsg; @@ -570,10 +570,8 @@ static void addConnToPool(void* pool, SCliConn* conn) { conn->status = ConnInPool; if (conn->list == NULL) { - char key[TSDB_FQDN_LEN + 64] = {0}; - CONN_CONSTRUCT_HASH_KEY(key, conn->ip, conn->port); tTrace("%s conn %p added to conn pool, read buf cap:%d", CONN_GET_INST_LABEL(conn), conn, conn->readBuf.cap); - conn->list = taosHashGet((SHashObj*)pool, key, strlen(key)); + conn->list = taosHashGet((SHashObj*)pool, conn->ip, strlen(conn->ip)); } else { tTrace("%s conn %p added to conn pool, read buf cap:%d", CONN_GET_INST_LABEL(conn), conn, conn->readBuf.cap); } @@ -751,6 +749,11 @@ static void cliDestroy(uv_handle_t* handle) { tTrace("%s conn %p destroy successfully", CONN_GET_INST_LABEL(conn), conn); transReqQueueClear(&conn->wreqQueue); transDestroyBuffer(&conn->readBuf); + + int32_t* oVal = taosHashGet(pThrd->connLimit, conn->ip, strlen(conn->ip)); + int32_t nVal = oVal == NULL ? 0 : (*oVal) - 1; + taosHashPut(pThrd->connLimit, conn->ip, strlen(conn->ip), &nVal, sizeof(nVal)); + taosMemoryFree(conn); } static bool cliHandleNoResp(SCliConn* conn) { @@ -892,8 +895,8 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); STraceId* trace = &pMsg->msg.info.traceId; - tGError("%s msg %s failed to send, conn %p failed to connect to %s:%d, reason: %s", CONN_GET_INST_LABEL(pConn), - pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, pConn->port, uv_strerror(status)); + tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, uv_strerror(status)); uv_timer_stop(pConn->timer); pConn->timer->data = NULL; taosArrayPush(pThrd->timerList, &pConn->timer); @@ -901,12 +904,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { - char* ip = pConn->ip; - uint32_t port = pConn->port; - char key[TSDB_FQDN_LEN + 64] = {0}; - CONN_CONSTRUCT_HASH_KEY(key, ip, port); - - SFailFastItem* item = taosHashGet(pThrd->failFastCache, key, strlen(key)); + SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip)); int64_t cTimestamp = taosGetTimestampMs(); if (item != NULL) { int32_t elapse = cTimestamp - item->timestamp; @@ -918,7 +916,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { } } else { SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; - taosHashPut(pThrd->failFastCache, key, strlen(key), &item, sizeof(SFailFastItem)); + taosHashPut(pThrd->failFastCache, pConn->ip, strlen(pConn->ip), &item, sizeof(SFailFastItem)); } } cliHandleExcept(pConn); @@ -931,9 +929,13 @@ void cliConnCb(uv_connect_t* req, int status) { cliHandleFastFail(pConn, status); return; } - struct sockaddr peername, sockname; - int addrlen = sizeof(peername); + int32_t* oVal = taosHashGet(pThrd->connLimit, pConn->ip, strlen(pConn->ip)); + int32_t nVal = oVal == NULL ? 0 : (*oVal) + 1; + taosHashPut(pThrd->connLimit, pConn->ip, strlen(pConn->ip), &nVal, sizeof(nVal)); + + struct sockaddr peername, sockname; + int addrlen = sizeof(peername); uv_tcp_getpeername((uv_tcp_t*)pConn->stream, &peername, &addrlen); transSockInfo2Str(&peername, pConn->dst); @@ -1068,6 +1070,24 @@ static FORCE_INLINE void cliUpdateFqdnCache(SHashObj* cache, char* fqdn) { return; } +static int32_t cliPreCheckSessionLimit(SCliThrd* pThrd, SCliMsg* pMsg) { + STrans* pTransInst = pThrd->pTransInst; + + STransConnCtx* pCtx = pMsg->ctx; + char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); + int32_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); + + char key[TSDB_FQDN_LEN + 64] = {0}; + CONN_CONSTRUCT_HASH_KEY(key, ip, port); + + int32_t* val = taosHashGet(pThrd->connLimit, key, strlen(key)); + if (val == NULL) return 0; + + if (*val >= pTransInst->connLimit) { + return -1; + } + return 0; +} void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { STrans* pTransInst = pThrd->pTransInst; STransConnCtx* pCtx = pMsg->ctx; @@ -1091,7 +1111,6 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { if (item != NULL) { int32_t elapse = (int32_t)(taosGetTimestampMs() - item->timestamp); if (item->count >= pTransInst->failFastThreshold && (elapse >= 0 && elapse <= pTransInst->failFastInterval)) { - STraceId* trace = &(pMsg->msg.info.traceId); tGTrace("%s, msg %s cancel to send, reason: failed to connect %s:%d: count: %d, at %d", pTransInst->label, TMSG_INFO(pMsg->msg.msgType), ip, port, item->count, elapse); destroyCmsg(pMsg); @@ -1113,6 +1132,13 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { return; } + if (conn == NULL && REQUEST_NO_RESP(&pMsg->msg) && 0 != cliPreCheckSessionLimit(pThrd, pMsg)) { + tGTrace("%s, msg %s cancel to send, reason: %s", pTransInst->label, TMSG_INFO(pMsg->msg.msgType), + tstrerror(TSDB_CODE_RPC_MAX_SESSIONS)); + destroyCmsg(pMsg); + return; + } + if (conn != NULL) { transCtxMerge(&conn->ctx, &pCtx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); @@ -1126,10 +1152,14 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { transCtxMerge(&conn->ctx, &pCtx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); - conn->ip = strdup(EPSET_GET_INUSE_IP(&pCtx->epSet)); - conn->port = EPSET_GET_INUSE_PORT(&pCtx->epSet); + char key[TSDB_FQDN_LEN + 64] = {0}; + char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); + uint16_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); + CONN_CONSTRUCT_HASH_KEY(key, ip, port); - uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, conn->ip); + conn->ip = strdup(key); + + uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, EPSET_GET_INUSE_IP(&pCtx->epSet)); if (ipaddr == 0xffffffff) { uv_timer_stop(conn->timer); conn->timer->data = NULL; @@ -1143,9 +1173,9 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = ipaddr; - addr.sin_port = (uint16_t)htons((uint16_t)conn->port); + addr.sin_port = (uint16_t)htons(port); - tGTrace("%s conn %p try to connect to %s:%d", pTransInst->label, conn, conn->ip, conn->port); + tGTrace("%s conn %p try to connect to %s", pTransInst->label, conn, conn->ip); int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); if (fd == -1) { tGError("%s conn %p failed to create socket, reason:%s", transLabel(pTransInst), conn, @@ -1199,7 +1229,6 @@ static void cliAsyncCb(uv_async_t* handle) { if (count >= 2) { tTrace("cli process batch size:%d", count); } - // if (!uv_is_active((uv_handle_t*)pThrd->prepare)) uv_prepare_start(pThrd->prepare, cliPrepareCb); if (pThrd->stopMsg != NULL) cliHandleQuit(pThrd->stopMsg, pThrd); } @@ -1412,6 +1441,7 @@ static SCliThrd* createThrdObj(void* trans) { pThrd->destroyAhandleFp = pTransInst->destroyFp; pThrd->fqdn2ipCache = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); pThrd->failFastCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); + pThrd->connLimit = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); pThrd->quit = false; return pThrd; @@ -1440,6 +1470,7 @@ static void destroyThrdObj(SCliThrd* pThrd) { taosMemoryFree(pThrd->loop); taosHashCleanup(pThrd->fqdn2ipCache); taosHashCleanup(pThrd->failFastCache); + taosHashCleanup(pThrd->connLimit); taosMemoryFree(pThrd); } @@ -1864,13 +1895,6 @@ int transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STran return TSDB_CODE_RPC_BROKEN_LINK; } - // read only - if (pTransInst->connLimit != 0 && atomic_load_32(&pThrd->connCount) >= pTransInst->connLimit) { - transFreeMsg(pReq->pCont); - transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); - return TSDB_CODE_RPC_MAX_SESSIONS; - } - TRACE_SET_MSGID(&pReq->info.traceId, tGenIdPI64()); STransConnCtx* pCtx = taosMemoryCalloc(1, sizeof(STransConnCtx)); @@ -1912,13 +1936,6 @@ int transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransMs transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); return TSDB_CODE_RPC_BROKEN_LINK; } - // not limit sync req - // read only - // if (pTransInst->connLimit != 0 && atomic_load_32(&pThrd->connCount) >= pTransInst->connLimit) { - // transFreeMsg(pReq->pCont); - // transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); - // return TSDB_CODE_RPC_MAX_SESSIONS; - //} tsem_t* sem = taosMemoryCalloc(1, sizeof(tsem_t)); tsem_init(sem, 0, 0); From f22d07319f36669143489e5d5e0d20e0a21d25c0 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 20:06:04 +0800 Subject: [PATCH 05/35] handle too many session --- include/libs/transport/trpc.h | 3 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 3 +- source/libs/transport/inc/transportInt.h | 4 ++- source/libs/transport/src/trans.c | 3 +- source/libs/transport/src/transCli.c | 32 +++++++++++++------ 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index ff68b72fc2..5787f41772 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -112,7 +112,8 @@ typedef struct SRpcInit { // fail fast fp RpcFFfp ffp; - int32_t connLimit; + int32_t connLimitNum; + int32_t connLimitLock; void *parent; } SRpcInit; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index bd08eda954..d23e67b195 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -284,7 +284,8 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.failFastThreshold = 3; // failed threshold rpcInit.ffp = dmFailFastFp; - rpcInit.connLimit = 3000; + rpcInit.connLimitNum = 3000; + rpcInit.connLimitLock = 1; pTrans->clientRpc = rpcOpen(&rpcInit); if (pTrans->clientRpc == NULL) { diff --git a/source/libs/transport/inc/transportInt.h b/source/libs/transport/inc/transportInt.h index 92477bb514..1fe32955b9 100644 --- a/source/libs/transport/inc/transportInt.h +++ b/source/libs/transport/inc/transportInt.h @@ -64,7 +64,9 @@ typedef struct { void (*destroyFp)(void* ahandle); bool (*failFastFp)(tmsg_t msgType); - int32_t connLimit; + int32_t connLimitNum; + int8_t connLimitLock; // 0: no lock. 1. lock + int index; void* parent; void* tcphandle; // returned handle from TCP initialization diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index 61ca9743b3..6eec54b370 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -67,7 +67,8 @@ void* rpcOpen(const SRpcInit* pInit) { pRpc->startTimer = pInit->tfp; pRpc->destroyFp = pInit->dfp; pRpc->failFastFp = pInit->ffp; - pRpc->connLimit = pInit->connLimit; + pRpc->connLimitNum = pInit->connLimitNum; + pRpc->connLimitLock = pInit->connLimitLock; pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads; if (pRpc->numOfThreads <= 0) { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 4d7b6b5b2b..a1b4766e80 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -85,7 +85,7 @@ typedef struct SCliThrd { SCvtAddr cvtAddr; SHashObj* failFastCache; - SHashObj* connLimit; + SHashObj* connLimitCache; SCliMsg* stopMsg; @@ -750,9 +750,9 @@ static void cliDestroy(uv_handle_t* handle) { transReqQueueClear(&conn->wreqQueue); transDestroyBuffer(&conn->readBuf); - int32_t* oVal = taosHashGet(pThrd->connLimit, conn->ip, strlen(conn->ip)); + int32_t* oVal = taosHashGet(pThrd->connLimitCache, conn->ip, strlen(conn->ip)); int32_t nVal = oVal == NULL ? 0 : (*oVal) - 1; - taosHashPut(pThrd->connLimit, conn->ip, strlen(conn->ip), &nVal, sizeof(nVal)); + taosHashPut(pThrd->connLimitCache, conn->ip, strlen(conn->ip), &nVal, sizeof(nVal)); taosMemoryFree(conn); } @@ -930,9 +930,9 @@ void cliConnCb(uv_connect_t* req, int status) { return; } - int32_t* oVal = taosHashGet(pThrd->connLimit, pConn->ip, strlen(pConn->ip)); + int32_t* oVal = taosHashGet(pThrd->connLimitCache, pConn->ip, strlen(pConn->ip)); int32_t nVal = oVal == NULL ? 0 : (*oVal) + 1; - taosHashPut(pThrd->connLimit, pConn->ip, strlen(pConn->ip), &nVal, sizeof(nVal)); + taosHashPut(pThrd->connLimitCache, pConn->ip, strlen(pConn->ip), &nVal, sizeof(nVal)); struct sockaddr peername, sockname; int addrlen = sizeof(peername); @@ -1080,10 +1080,10 @@ static int32_t cliPreCheckSessionLimit(SCliThrd* pThrd, SCliMsg* pMsg) { char key[TSDB_FQDN_LEN + 64] = {0}; CONN_CONSTRUCT_HASH_KEY(key, ip, port); - int32_t* val = taosHashGet(pThrd->connLimit, key, strlen(key)); + int32_t* val = taosHashGet(pThrd->connLimitCache, key, strlen(key)); if (val == NULL) return 0; - if (*val >= pTransInst->connLimit) { + if (*val >= pTransInst->connLimitNum) { return -1; } return 0; @@ -1441,7 +1441,8 @@ static SCliThrd* createThrdObj(void* trans) { pThrd->destroyAhandleFp = pTransInst->destroyFp; pThrd->fqdn2ipCache = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); pThrd->failFastCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); - pThrd->connLimit = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); + pThrd->connLimitCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, + pTransInst->connLimitLock == 0 ? HASH_NO_LOCK : HASH_ENTRY_LOCK); pThrd->quit = false; return pThrd; @@ -1470,7 +1471,7 @@ static void destroyThrdObj(SCliThrd* pThrd) { taosMemoryFree(pThrd->loop); taosHashCleanup(pThrd->fqdn2ipCache); taosHashCleanup(pThrd->failFastCache); - taosHashCleanup(pThrd->connLimit); + taosHashCleanup(pThrd->connLimitCache); taosMemoryFree(pThrd); } @@ -1894,6 +1895,19 @@ int transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STran transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); return TSDB_CODE_RPC_BROKEN_LINK; } + if (pTransInst->connLimitNum > 0 && REQUEST_NO_RESP(pReq)) { + char key[TSDB_FQDN_LEN + 64] = {0}; + char* ip = EPSET_GET_INUSE_IP((SEpSet*)pEpSet); + uint16_t port = EPSET_GET_INUSE_PORT((SEpSet*)pEpSet); + CONN_CONSTRUCT_HASH_KEY(key, ip, port); + + int32_t* val = taosHashGet(pThrd->connLimitCache, key, strlen(key)); + if (val != NULL && *val >= pTransInst->connLimitNum) { + transFreeMsg(pReq->pCont); + transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); + return TSDB_CODE_RPC_BROKEN_LINK; + } + } TRACE_SET_MSGID(&pReq->info.traceId, tGenIdPI64()); From 9c050c0ea626254e4bd45f8a202ee655383c59a0 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 20:15:34 +0800 Subject: [PATCH 06/35] handle too many session --- source/libs/transport/src/transCli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index a1b4766e80..2330b871d7 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1905,7 +1905,7 @@ int transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STran if (val != NULL && *val >= pTransInst->connLimitNum) { transFreeMsg(pReq->pCont); transReleaseExHandle(transGetInstMgt(), (int64_t)shandle); - return TSDB_CODE_RPC_BROKEN_LINK; + return TSDB_CODE_RPC_MAX_SESSIONS; } } From 015dd658e67c4141051a15070456d68e382ce822 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 21:16:08 +0800 Subject: [PATCH 07/35] fix: invalid read --- source/libs/transport/src/transCli.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 2330b871d7..22bcb93218 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -736,6 +736,9 @@ static void cliDestroy(uv_handle_t* handle) { conn->timer->data = NULL; conn->timer = NULL; } + int32_t* oVal = taosHashGet(pThrd->connLimitCache, conn->ip, strlen(conn->ip)); + int32_t nVal = oVal == NULL ? 0 : (*oVal) - 1; + taosHashPut(pThrd->connLimitCache, conn->ip, strlen(conn->ip), &nVal, sizeof(nVal)); atomic_sub_fetch_32(&pThrd->connCount, 1); @@ -750,10 +753,6 @@ static void cliDestroy(uv_handle_t* handle) { transReqQueueClear(&conn->wreqQueue); transDestroyBuffer(&conn->readBuf); - int32_t* oVal = taosHashGet(pThrd->connLimitCache, conn->ip, strlen(conn->ip)); - int32_t nVal = oVal == NULL ? 0 : (*oVal) - 1; - taosHashPut(pThrd->connLimitCache, conn->ip, strlen(conn->ip), &nVal, sizeof(nVal)); - taosMemoryFree(conn); } static bool cliHandleNoResp(SCliConn* conn) { From 5c6b5f0d0fd3b27312e65c38c4cd8e36ebcddf83 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 21:58:56 +0800 Subject: [PATCH 08/35] fix: invalid read --- source/libs/transport/src/transCli.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 22bcb93218..681644d3bf 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -891,11 +891,17 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); - STraceId* trace = &pMsg->msg.info.traceId; + SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); + + if (pMsg) { + STraceId* trace = &pMsg->msg.info.traceId; + tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, uv_strerror(status)); + } else { + tError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), 0, + pConn, pConn->ip, uv_strerror(status)); + } - tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, uv_strerror(status)); uv_timer_stop(pConn->timer); pConn->timer->data = NULL; taosArrayPush(pThrd->timerList, &pConn->timer); From 3fc8f6b73225a3988a499cab0928d2312c4fe513 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 15 Feb 2023 22:29:17 +0800 Subject: [PATCH 09/35] fix: invalid read --- source/libs/transport/src/transCli.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 681644d3bf..5170a68dbc 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -896,10 +896,10 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { if (pMsg) { STraceId* trace = &pMsg->msg.info.traceId; tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - pMsg ? TMSG_INFO(pMsg->msg.msgType) : 0, pConn, pConn->ip, uv_strerror(status)); + TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); } else { - tError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), 0, - pConn, pConn->ip, uv_strerror(status)); + tError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + TMSG_INFO(0), pConn, pConn->ip, uv_strerror(status)); } uv_timer_stop(pConn->timer); From 05e61bac89d6c757f9f3b20a50025ab236fbaeb5 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 00:12:26 +0800 Subject: [PATCH 10/35] fix: invalid read --- source/libs/transport/src/transCli.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 5170a68dbc..e91fe337fc 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -893,14 +893,14 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); - if (pMsg) { - STraceId* trace = &pMsg->msg.info.traceId; - tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); - } else { - tError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - TMSG_INFO(0), pConn, pConn->ip, uv_strerror(status)); - } + // if (pMsg) { + STraceId* trace = &pMsg->msg.info.traceId; + tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); + //} else { + // tError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + // TMSG_INFO(0), pConn, pConn->ip, uv_strerror(status)); + //} uv_timer_stop(pConn->timer); pConn->timer->data = NULL; From ebcbdcfddc27b684d160f4fbfb0e644c1a8b1c8e Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 00:30:26 +0800 Subject: [PATCH 11/35] fix: change parameter --- source/common/src/tglobal.c | 14 ++++++-------- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 6 +++++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index d4849650e6..ce4f2aa334 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -76,11 +76,11 @@ bool tsEnableTelem = true; int32_t tsTelemInterval = 43200; char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.taosdata.com"; uint16_t tsTelemPort = 80; -char* tsTelemUri = "/report"; +char *tsTelemUri = "/report"; -bool tsEnableCrashReport = true; -char* tsClientCrashReportUri = "/ccrashreport"; -char* tsSvrCrashReportUri = "/dcrashreport"; +bool tsEnableCrashReport = true; +char *tsClientCrashReportUri = "/ccrashreport"; +char *tsSvrCrashReportUri = "/dcrashreport"; // schemaless char tsSmlTagName[TSDB_COL_NAME_LEN] = "_tag_null"; @@ -211,9 +211,7 @@ int32_t taosSetTfsCfg(SConfig *pCfg) { int32_t taosSetTfsCfg(SConfig *pCfg); #endif -struct SConfig *taosGetCfg() { - return tsCfg; -} +struct SConfig *taosGetCfg() { return tsCfg; } static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile, char *apolloUrl) { @@ -498,7 +496,7 @@ static int32_t taosUpdateServerCfg(SConfig *pCfg) { pItem = cfgGetItem(tsCfg, "numOfRpcThreads"); if (pItem != NULL && pItem->stype == CFG_STYPE_DEFAULT) { tsNumOfRpcThreads = numOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, 4); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, TSDB_MAX_RPC_THREADS); pItem->i32 = tsNumOfRpcThreads; pItem->stype = stype; } diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index d23e67b195..b16ff6efac 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -284,7 +284,11 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.failFastThreshold = 3; // failed threshold rpcInit.ffp = dmFailFastFp; - rpcInit.connLimitNum = 3000; + int32_t connLimitNum = 30000 / (tsNumOfRpcThreads * 3); + connLimitNum = TMAX(connLimitNum, 500); + connLimitNum = TMIN(connLimitNum, 3000); + + rpcInit.connLimitNum = connLimitNum; rpcInit.connLimitLock = 1; pTrans->clientRpc = rpcOpen(&rpcInit); From 792e16befce5563751c41d67465d728ead159dd0 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 10:36:49 +0800 Subject: [PATCH 12/35] fix invalid read --- source/libs/transport/src/transCli.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index e91fe337fc..415005a1e1 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -893,14 +893,9 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); - // if (pMsg) { STraceId* trace = &pMsg->msg.info.traceId; tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); - //} else { - // tError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - // TMSG_INFO(0), pConn, pConn->ip, uv_strerror(status)); - //} uv_timer_stop(pConn->timer); pConn->timer->data = NULL; @@ -1158,13 +1153,13 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { transQueuePush(&conn->cliMsgs, pMsg); char key[TSDB_FQDN_LEN + 64] = {0}; - char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); + char* fqdn = EPSET_GET_INUSE_IP(&pCtx->epSet); uint16_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); - CONN_CONSTRUCT_HASH_KEY(key, ip, port); + CONN_CONSTRUCT_HASH_KEY(key, fqdn, port); conn->ip = strdup(key); - uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, EPSET_GET_INUSE_IP(&pCtx->epSet)); + uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, fqdn); if (ipaddr == 0xffffffff) { uv_timer_stop(conn->timer); conn->timer->data = NULL; From 27643f7c21ee4402c546ced4cd866a0c47bcc442 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 11:04:38 +0800 Subject: [PATCH 13/35] fix invalid read --- source/libs/transport/src/transCli.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 415005a1e1..2d09822dc7 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -158,6 +158,7 @@ static void cliHandleResp(SCliConn* conn); // handle except about conn static void cliHandleExcept(SCliConn* conn); static void cliReleaseUnfinishedMsg(SCliConn* conn); +static void cliHandleFastFail(SCliConn* pConn, int status); // handle req from app static void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd); @@ -488,9 +489,9 @@ void cliConnTimeout(uv_timer_t* handle) { uv_timer_stop(handle); handle->data = NULL; taosArrayPush(pThrd->timerList, &conn->timer); - conn->timer = NULL; - cliHandleExceptImpl(conn, -1); + + cliHandleFastFail(conn, UV_ECANCELED); } void cliReadTimeoutCb(uv_timer_t* handle) { // set up timeout cb @@ -897,11 +898,6 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); - uv_timer_stop(pConn->timer); - pConn->timer->data = NULL; - taosArrayPush(pThrd->timerList, &pConn->timer); - pConn->timer = NULL; - if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip)); @@ -921,12 +917,23 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { } cliHandleExcept(pConn); } + void cliConnCb(uv_connect_t* req, int status) { SCliConn* pConn = req->data; SCliThrd* pThrd = pConn->hostThrd; + bool timeout = false; + + if (pConn->timer == NULL) { + timeout = true; + } else { + uv_timer_stop(pConn->timer); + pConn->timer->data = NULL; + taosArrayPush(pThrd->timerList, &pConn->timer); + pConn->timer = NULL; + } if (status != 0) { - cliHandleFastFail(pConn, status); + if (timeout == false) cliHandleFastFail(pConn, status); return; } @@ -1199,6 +1206,11 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { ret = uv_tcp_connect(&conn->connReq, (uv_tcp_t*)(conn->stream), (const struct sockaddr*)&addr, cliConnCb); if (ret != 0) { + uv_timer_stop(conn->timer); + conn->timer->data = NULL; + taosArrayPush(pThrd->timerList, &conn->timer); + conn->timer = NULL; + cliHandleFastFail(conn, ret); return; } From a6a8daec23f83c5553c688ab15ba3db44fbd7ab7 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 15:09:39 +0800 Subject: [PATCH 14/35] fix: batch write --- include/libs/transport/trpc.h | 3 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 1 + source/libs/transport/inc/transportInt.h | 1 + source/libs/transport/src/trans.c | 1 + source/libs/transport/src/transCli.c | 271 +++++++++++++++++- 5 files changed, 266 insertions(+), 11 deletions(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index 5787f41772..acfd5dfb51 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -115,7 +115,8 @@ typedef struct SRpcInit { int32_t connLimitNum; int32_t connLimitLock; - void *parent; + int8_t supportBatch; // 0: no batch, 1. batch + void *parent; } SRpcInit; typedef struct { diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index b16ff6efac..8751b575f3 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -290,6 +290,7 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.connLimitNum = connLimitNum; rpcInit.connLimitLock = 1; + rpcInit.supportBatch = 1; pTrans->clientRpc = rpcOpen(&rpcInit); if (pTrans->clientRpc == NULL) { diff --git a/source/libs/transport/inc/transportInt.h b/source/libs/transport/inc/transportInt.h index 1fe32955b9..13adb4d2b4 100644 --- a/source/libs/transport/inc/transportInt.h +++ b/source/libs/transport/inc/transportInt.h @@ -66,6 +66,7 @@ typedef struct { int32_t connLimitNum; int8_t connLimitLock; // 0: no lock. 1. lock + int8_t supportBatch; // 0: no batch, 1: support batch int index; void* parent; diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index 6eec54b370..38ec1c7fdc 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -69,6 +69,7 @@ void* rpcOpen(const SRpcInit* pInit) { pRpc->failFastFp = pInit->ffp; pRpc->connLimitNum = pInit->connLimitNum; pRpc->connLimitLock = pInit->connLimitLock; + pRpc->supportBatch = pInit->supportBatch; pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads; if (pRpc->numOfThreads <= 0) { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 2d09822dc7..8063ac838b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -40,6 +40,8 @@ typedef struct SCliConn { bool broken; // link broken or not ConnStatus status; // + SCliBatch* pBatch; + int64_t refId; char* ip; @@ -62,6 +64,16 @@ typedef struct SCliMsg { int sent; //(0: no send, 1: alread sent) } SCliMsg; +typedef struct { + queue wq; + int32_t wLen; + int32_t batchSize; // + int32_t batch; + char* dst; + char* ip; + uint16_t port; +} SCliBatch; + typedef struct SCliThrd { TdThread thread; // tid int64_t pid; // pid @@ -86,6 +98,7 @@ typedef struct SCliThrd { SHashObj* failFastCache; SHashObj* connLimitCache; + SHashObj* batchCache; SCliMsg* stopMsg; @@ -132,6 +145,11 @@ static void cliAsyncCb(uv_async_t* handle); static void cliIdleCb(uv_idle_t* handle); static void cliPrepareCb(uv_prepare_t* handle); +static void cliSendBatch(const SCliBatch* pBatch, SCliThrd* pThrd); +static void cliSendBatchCb(uv_write_t* req, int status); +// callback after conn to server +static void cliConnBatchCb(uv_connect_t* req, int status); + static bool cliRecvReleaseReq(SCliConn* conn, STransMsgHead* pHead); static int32_t allocConnRef(SCliConn* conn, bool update); @@ -167,6 +185,8 @@ static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd); static void cliHandleUpdate(SCliMsg* pMsg, SCliThrd* pThrd); static void (*cliAsyncHandle[])(SCliMsg* pMsg, SCliThrd* pThrd) = {cliHandleReq, cliHandleQuit, cliHandleRelease, NULL, cliHandleUpdate}; +/// static void (*cliAsyncHandle[])(SCliMsg* pMsg, SCliThrd* pThrd) = {cliHandleReq, cliHandleQuit, cliHandleRelease, +/// NULL,cliHandleUpdate}; static FORCE_INLINE void destroyUserdata(STransMsg* userdata); static FORCE_INLINE void destroyCmsg(void* cmsg); @@ -287,6 +307,7 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { } destroyCmsg(msg); } + transQueueClear(&conn->cliMsgs); memset(&conn->ctx, 0, sizeof(conn->ctx)); } bool cliMaySendCachedMsg(SCliConn* conn) { @@ -888,6 +909,169 @@ void cliSend(SCliConn* pConn) { _RETURN: return; } + +static SCliBatch* cliDumpBatch(SCliBatch* pBatch) { + SCliBatch* pNewBatch = taosMemCalloc(1, sizeof(SClicBatch)); + pNewBatch->wq = pBatch->wq; + + pNewBatch->batchSize = pBatch->batchSize; + pNewBatch->batch = pBatch->batch; + pNewBatch->wLen = pBatch->wLen; + pNewBatch->dst = strdup(pBatch->dst); + pNewBatch->ip = strdup(pBatch->ip); + pNewBatch->port = pBatch->port; + + QUEUE_INIT(&pBatch->wq); + pBatch->batchSize = 0; + pBatch->batch = 0; + pBatch->wLen = 0; + + return pNewBatch; +} +static void cliDestroyBatch(SCliBatch* pBatch) { + while (!EMPTY_IS_EMPTY(&pBatch->wq)) { + queue* h = QUEUE_HEAD(&pBatch->wq); + SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); + + QUEUE_REMOVE(&pMsg->q); + destroyCmsg(p); + } + taosMemoryFree(pBatch->ip); + taosMemoryFree(pBatch->dst); + taosMemoryFree(pBatch); +} +static void cliSendBatch(SCliBatch* pBatch, SCliThrd* pThrd) { + if (pBatch->wLen == 0 || EMPTY_IS_EMPTY(&pBatch->wq)) { + return; + } + STrans* pTransInst = pThrd->pTransInst; + + SCliBatch* pNewBatch = cliDumpBatch(pBatch); + + SCliConn* conn = getConnFromPool(pThrd->pool, pBatch->ip, pBatch->port); + if (conn == NULL) { + conn = cliCreateConn(pThrd); + conn->pBatch = pNewBatch; + conn->ip = strdup(conn->pBatch->ip); + + uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, conn->ip); + if (ipaddr == 0xffffffff) { + uv_timer_stop(conn->timer); + conn->timer->data = NULL; + taosArrayPush(pThrd->timerList, &conn->timer); + conn->timer = NULL; + + cliHandleExcept(conn); + return; + } + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = ipaddr; + addr.sin_port = (uint16_t)htons(port); + + tGTrace("%s conn %p try to connect to %s", pTransInst->label, conn, conn->ip); + int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); + if (fd == -1) { + tGError("%s conn %p failed to create socket, reason:%s", transLabel(pTransInst), conn, + tstrerror(TAOS_SYSTEM_ERROR(errno))); + cliHandleExcept(conn); + errno = 0; + return; + } + int ret = uv_tcp_open((uv_tcp_t*)conn->stream, fd); + if (ret != 0) { + tGError("%s conn %p failed to set stream, reason:%s", transLabel(pTransInst), conn, uv_err_name(ret)); + cliHandleExcept(conn); + return; + } + ret = transSetConnOption((uv_tcp_t*)conn->stream); + if (ret != 0) { + tGError("%s conn %p failed to set socket opt, reason:%s", transLabel(pTransInst), conn, uv_err_name(ret)); + cliHandleExcept(conn); + return; + } + + ret = uv_tcp_connect(&conn->connReq, (uv_tcp_t*)(conn->stream), (const struct sockaddr*)&addr, cliConnCb); + if (ret != 0) { + uv_timer_stop(conn->timer); + conn->timer->data = NULL; + taosArrayPush(pThrd->timerList, &conn->timer); + conn->timer = NULL; + + cliHandleFastFail(conn, ret); + return; + } + uv_timer_start(conn->timer, cliConnTimeout, TRANS_CONN_TIMEOUT, 0); + return; + } + + conn->pBatch = pNewBatch; + + int32_t wLen = pBatch->wLen; + uv_buf_t* wb = taosMemoryCalloc(wLen, sizeof(uv_buf_t)); + int i = 0; + + while (!EMPTY_IS_EMPTY(&pBatch->wq)) { + queue* h = QUEUE_HEAD(&pBatch->wq); + SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); + QUEUE_REMOVE(&pMsg->q); + + transQueuePush(conn->cliMsgs, pMsg); + + STransConnCtx* pCtx = pCliMsg->ctx; + + STransMsg* pMsg = (STransMsg*)(&pCliMsg->msg); + if (pMsg->pCont == 0) { + pMsg->pCont = (void*)rpcMallocCont(0); + pMsg->contLen = 0; + } + + int msgLen = transMsgLenFromCont(pMsg->contLen); + STransMsgHead* pHead = transHeadFromCont(pMsg->pCont); + + if (pHead->comp == 0) { + pHead->ahandle = pCtx != NULL ? (uint64_t)pCtx->ahandle : 0; + pHead->noResp = REQUEST_NO_RESP(pMsg) ? 1 : 0; + pHead->persist = REQUEST_PERSIS_HANDLE(pMsg) ? 1 : 0; + pHead->msgType = pMsg->msgType; + pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); + pHead->release = REQUEST_RELEASE_HANDLE(pCliMsg) ? 1 : 0; + memcpy(pHead->user, pTransInst->user, strlen(pTransInst->user)); + pHead->traceId = pMsg->info.traceId; + pHead->magicNum = htonl(TRANS_MAGIC_NUM); + } + pHead->timestamp = taosHton64(taosGetTimestampUs()); + + if (pHead->comp == 0) { + if (pTransInst->compressSize != -1 && pTransInst->compressSize < pMsg->contLen) { + msgLen = transCompressMsg(pMsg->pCont, pMsg->contLen) + sizeof(STransMsgHead); + pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); + } + } else { + msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); + } + + wb[i++] = uv_buf_init((char*)pHead, msgLen); + } + + pBatch->wLen = 0; + uv_write_t* req = taosMemCalloc(1, sizeof(uv_write_t)); + req->data = pConn; + uv_write(req, (uv_stream_t*)conn->stream, wb, wLen, cliSendBatchCb); + taosMemoryFree(wb); +} +static void cliSendBatchCb(uv_write_t* req, int status) { + SCliConn* conn = req->data; + SCliThrd* thrd = conn->hostThrd; + cliDestroyBatch(conn->pBatch); + conn->pBatch = NULL; + + if (status != 0) { + cliHandleExcept(conn); + } else { + addConnToPool(thrd->pool, conn); + } +} static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; @@ -1218,29 +1402,93 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { } tGTrace("%s conn %p ready", pTransInst->label, conn); } -static void cliAsyncCb(uv_async_t* handle) { - SAsyncItem* item = handle->data; - SCliThrd* pThrd = item->pThrd; - SCliMsg* pMsg = NULL; - - // batch process to avoid to lock/unlock frequently - queue wq; - taosThreadMutexLock(&item->mtx); - QUEUE_MOVE(&item->qmsg, &wq); - taosThreadMutexUnlock(&item->mtx); +static void cliNoBatchDealReq(queue wq, SCliThrd* pThrd) { int count = 0; + while (!QUEUE_IS_EMPTY(&wq)) { queue* h = QUEUE_HEAD(&wq); QUEUE_REMOVE(h); SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); (*cliAsyncHandle[pMsg->type])(pMsg, pThrd); + count++; } if (count >= 2) { tTrace("cli process batch size:%d", count); } +} + +static void cliHandleBatch() static void cliBatchDealReq(queue wq, SCliThrd* pThrd) { + int count = 0; + while (!QUEUE_IS_EMPTY(&wq)) { + queue* h = QUEUE_HEAD(&wq); + QUEUE_REMOVE(h); + + SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); + if (REQUEST_NO_RESP(&pMsg->msg)) { + STransConnCtx* pCtx = pMsg->ctx; + + char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); + uint32_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); + char key[TSDB_FQDN_LEN + 64] = {0}; + CONN_CONSTRUCT_HASH_KEY(key, ip, port); + + SCliBatch *ppBatch = taosHashGet(pThrd->batchCache, key, sizeof(key))); + if (*ppBatch == NULL) { + SCliBatch* pBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); + QUEUE_INIT(&pBatch->wq); + QUEUE_PUSH(&pBatch->wq, h); + pBatch->wLen += 1; + pBatch->batchSize += pMsg->msg.contLen; + + pBatch->dst = strdup(key); + pBatch->ip = strdup(ip); + pBatch->port = (uint16_t)port; + + taosHashPut(pThrd->batchCache, key, sizeof(key), &pBatch, sizeof(void*)); + } else { + QUEUE_PUSH(&(*ppBatch)->wq, h); + (*pBatch)->wLen += 1; + (*pBatch)->batchSize += pMsg->msg.contLen; + } + } + (*cliAsyncHandle[pMsg->type])(pMsg, pThrd); + count++; + } + + void** pIter = taoskHashIterate(pThrd->batchCache, NULL); + while (pIter != NULL) { + SCliBatch* batch = (SCliBatch*)(*pIter); + + cliSendBatch(batch, pThrd); + pIter = (void**)taosHashIterate(info, pIter); + } + + if (count >= 2) { + tTrace("cli process batch size:%d", count); + } +} + +static void cliAsyncCb(uv_async_t* handle) { + SAsyncItem* item = handle->data; + SCliThrd* pThrd = item->pThrd; + STrans* pTransInst = pThrd->pTransInst; + + SCliMsg* pMsg = NULL; + // batch process to avoid to lock/unlock frequently + queue wq; + taosThreadMutexLock(&item->mtx); + QUEUE_MOVE(&item->qmsg, &wq); + taosThreadMutexUnlock(&item->mtx); + + int8_t supportBatch = pTransInst->supprtBatch; + if (supportBatch == 0) { + cliNotBatchDealReq(wq, pThrd); + } else if (supportBatch == 1) { + cliBatchDealReq(wq, pThrd); + } if (pThrd->stopMsg != NULL) cliHandleQuit(pThrd->stopMsg, pThrd); } @@ -1456,6 +1704,8 @@ static SCliThrd* createThrdObj(void* trans) { pThrd->connLimitCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, pTransInst->connLimitLock == 0 ? HASH_NO_LOCK : HASH_ENTRY_LOCK); + pThrd->batchCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, hash_no_lock); + pThrd->quit = false; return pThrd; } @@ -1484,6 +1734,7 @@ static void destroyThrdObj(SCliThrd* pThrd) { taosHashCleanup(pThrd->fqdn2ipCache); taosHashCleanup(pThrd->failFastCache); taosHashCleanup(pThrd->connLimitCache); + taosHashCleanup(pThrd->batchCache); taosMemoryFree(pThrd); } From c35b938698f913073b7ce52fc6cc8dd6354508c6 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 17:01:09 +0800 Subject: [PATCH 15/35] fix invalid read/write --- source/libs/transport/src/transCli.c | 265 ++++++++++++++------------- 1 file changed, 141 insertions(+), 124 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 8063ac838b..54203e8d7c 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -20,6 +20,15 @@ typedef struct SConnList { int32_t size; } SConnList; +typedef struct { + queue wq; + int32_t wLen; + int32_t batchSize; // + int32_t batch; + char* dst; + char* ip; + uint16_t port; +} SCliBatch; typedef struct SCliConn { T_REF_DECLARE() uv_connect_t connReq; @@ -64,16 +73,6 @@ typedef struct SCliMsg { int sent; //(0: no send, 1: alread sent) } SCliMsg; -typedef struct { - queue wq; - int32_t wLen; - int32_t batchSize; // - int32_t batch; - char* dst; - char* ip; - uint16_t port; -} SCliBatch; - typedef struct SCliThrd { TdThread thread; // tid int64_t pid; // pid @@ -145,10 +144,12 @@ static void cliAsyncCb(uv_async_t* handle); static void cliIdleCb(uv_idle_t* handle); static void cliPrepareCb(uv_prepare_t* handle); -static void cliSendBatch(const SCliBatch* pBatch, SCliThrd* pThrd); +static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd); +// static void cliConnBatchCb(uv_connect_t* req, int status); static void cliSendBatchCb(uv_write_t* req, int status); -// callback after conn to server -static void cliConnBatchCb(uv_connect_t* req, int status); +// static void cliConnBatchCb(uv_connect_t* req, int status); +// callback after conn to server +// static void cliConnBatchCb(uv_connect_t* req, int status); static bool cliRecvReleaseReq(SCliConn* conn, STransMsgHead* pHead); @@ -160,6 +161,7 @@ static SCliConn* cliCreateConn(SCliThrd* thrd); static void cliDestroyConn(SCliConn* pConn, bool clear /*clear tcp handle or not*/); static void cliDestroy(uv_handle_t* handle); static void cliSend(SCliConn* pConn); +static void cliSendBatch(SCliConn* pConn); static void cliDestroyConnMsgs(SCliConn* conn, bool destroy); // cli util func @@ -825,7 +827,63 @@ static void cliSendCb(uv_write_t* req, int status) { } uv_read_start((uv_stream_t*)pConn->stream, cliAllocRecvBufferCb, cliRecvCb); } +void cliSendBatch(SCliConn* pConn) { + SCliThrd* pThrd = pConn->hostThrd; + STrans* pTransInst = pThrd->pTransInst; + SCliBatch* pBatch = pConn->pBatch; + int32_t wLen = pBatch->wLen; + + uv_buf_t* wb = taosMemoryCalloc(wLen, sizeof(uv_buf_t)); + int i = 0; + + while (!QUEUE_IS_EMPTY(&pBatch->wq)) { + queue* h = QUEUE_HEAD(&pBatch->wq); + SCliMsg* pCliMsg = QUEUE_DATA(h, SCliMsg, q); + QUEUE_REMOVE(&pCliMsg->q); + + STransConnCtx* pCtx = pCliMsg->ctx; + + STransMsg* pMsg = (STransMsg*)(&pCliMsg->msg); + if (pMsg->pCont == 0) { + pMsg->pCont = (void*)rpcMallocCont(0); + pMsg->contLen = 0; + } + + int msgLen = transMsgLenFromCont(pMsg->contLen); + STransMsgHead* pHead = transHeadFromCont(pMsg->pCont); + + if (pHead->comp == 0) { + pHead->ahandle = pCtx != NULL ? (uint64_t)pCtx->ahandle : 0; + pHead->noResp = REQUEST_NO_RESP(pMsg) ? 1 : 0; + pHead->persist = REQUEST_PERSIS_HANDLE(pMsg) ? 1 : 0; + pHead->msgType = pMsg->msgType; + pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); + pHead->release = REQUEST_RELEASE_HANDLE(pCliMsg) ? 1 : 0; + memcpy(pHead->user, pTransInst->user, strlen(pTransInst->user)); + pHead->traceId = pMsg->info.traceId; + pHead->magicNum = htonl(TRANS_MAGIC_NUM); + } + pHead->timestamp = taosHton64(taosGetTimestampUs()); + + if (pHead->comp == 0) { + if (pTransInst->compressSize != -1 && pTransInst->compressSize < pMsg->contLen) { + msgLen = transCompressMsg(pMsg->pCont, pMsg->contLen) + sizeof(STransMsgHead); + pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); + } + } else { + msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); + } + + wb[i++] = uv_buf_init((char*)pHead, msgLen); + } + + pBatch->wLen = 0; + uv_write_t* req = taosMemoryCalloc(1, sizeof(uv_write_t)); + req->data = pConn; + uv_write(req, (uv_stream_t*)pConn->stream, wb, wLen, cliSendBatchCb); + taosMemoryFree(wb); +} void cliSend(SCliConn* pConn) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; @@ -911,8 +969,8 @@ _RETURN: } static SCliBatch* cliDumpBatch(SCliBatch* pBatch) { - SCliBatch* pNewBatch = taosMemCalloc(1, sizeof(SClicBatch)); - pNewBatch->wq = pBatch->wq; + SCliBatch* pNewBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); + memcpy(pNewBatch->wq, pBatch->wq, sizeof(pBatch->wq)); pNewBatch->batchSize = pBatch->batchSize; pNewBatch->batch = pBatch->batch; @@ -929,19 +987,19 @@ static SCliBatch* cliDumpBatch(SCliBatch* pBatch) { return pNewBatch; } static void cliDestroyBatch(SCliBatch* pBatch) { - while (!EMPTY_IS_EMPTY(&pBatch->wq)) { + while (!QUEUE_IS_EMPTY(&pBatch->wq)) { queue* h = QUEUE_HEAD(&pBatch->wq); - SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); + SCliMsg* p = QUEUE_DATA(h, SCliMsg, q); - QUEUE_REMOVE(&pMsg->q); + QUEUE_REMOVE(&p->q); destroyCmsg(p); } taosMemoryFree(pBatch->ip); taosMemoryFree(pBatch->dst); taosMemoryFree(pBatch); } -static void cliSendBatch(SCliBatch* pBatch, SCliThrd* pThrd) { - if (pBatch->wLen == 0 || EMPTY_IS_EMPTY(&pBatch->wq)) { +static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { + if (pBatch->wLen == 0 || QUEUE_IS_EMPTY(&pBatch->wq)) { return; } STrans* pTransInst = pThrd->pTransInst; @@ -961,33 +1019,32 @@ static void cliSendBatch(SCliBatch* pBatch, SCliThrd* pThrd) { taosArrayPush(pThrd->timerList, &conn->timer); conn->timer = NULL; - cliHandleExcept(conn); + cliHandleFastFail(conn, -1); return; } struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = ipaddr; - addr.sin_port = (uint16_t)htons(port); + addr.sin_port = (uint16_t)htons(pBatch->port); - tGTrace("%s conn %p try to connect to %s", pTransInst->label, conn, conn->ip); + tTrace("%s conn %p try to connect to %s", pTransInst->label, conn, pBatch->ip); int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); if (fd == -1) { - tGError("%s conn %p failed to create socket, reason:%s", transLabel(pTransInst), conn, - tstrerror(TAOS_SYSTEM_ERROR(errno))); - cliHandleExcept(conn); - errno = 0; + tError("%s conn %p failed to create socket, reason:%s", transLabel(pTransInst), conn, + tstrerror(TAOS_SYSTEM_ERROR(errno))); + cliHandleFastFail(conn, -1); return; } int ret = uv_tcp_open((uv_tcp_t*)conn->stream, fd); if (ret != 0) { - tGError("%s conn %p failed to set stream, reason:%s", transLabel(pTransInst), conn, uv_err_name(ret)); - cliHandleExcept(conn); + tError("%s conn %p failed to set stream, reason:%s", transLabel(pTransInst), conn, uv_err_name(ret)); + cliHandleFastFail(conn, -1); return; } ret = transSetConnOption((uv_tcp_t*)conn->stream); if (ret != 0) { - tGError("%s conn %p failed to set socket opt, reason:%s", transLabel(pTransInst), conn, uv_err_name(ret)); - cliHandleExcept(conn); + tError("%s conn %p failed to set socket opt, reason:%s", transLabel(pTransInst), conn, uv_err_name(ret)); + cliHandleFastFail(conn, -1); return; } @@ -997,8 +1054,7 @@ static void cliSendBatch(SCliBatch* pBatch, SCliThrd* pThrd) { conn->timer->data = NULL; taosArrayPush(pThrd->timerList, &conn->timer); conn->timer = NULL; - - cliHandleFastFail(conn, ret); + cliHandleFastFail(conn, -1); return; } uv_timer_start(conn->timer, cliConnTimeout, TRANS_CONN_TIMEOUT, 0); @@ -1006,59 +1062,7 @@ static void cliSendBatch(SCliBatch* pBatch, SCliThrd* pThrd) { } conn->pBatch = pNewBatch; - - int32_t wLen = pBatch->wLen; - uv_buf_t* wb = taosMemoryCalloc(wLen, sizeof(uv_buf_t)); - int i = 0; - - while (!EMPTY_IS_EMPTY(&pBatch->wq)) { - queue* h = QUEUE_HEAD(&pBatch->wq); - SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); - QUEUE_REMOVE(&pMsg->q); - - transQueuePush(conn->cliMsgs, pMsg); - - STransConnCtx* pCtx = pCliMsg->ctx; - - STransMsg* pMsg = (STransMsg*)(&pCliMsg->msg); - if (pMsg->pCont == 0) { - pMsg->pCont = (void*)rpcMallocCont(0); - pMsg->contLen = 0; - } - - int msgLen = transMsgLenFromCont(pMsg->contLen); - STransMsgHead* pHead = transHeadFromCont(pMsg->pCont); - - if (pHead->comp == 0) { - pHead->ahandle = pCtx != NULL ? (uint64_t)pCtx->ahandle : 0; - pHead->noResp = REQUEST_NO_RESP(pMsg) ? 1 : 0; - pHead->persist = REQUEST_PERSIS_HANDLE(pMsg) ? 1 : 0; - pHead->msgType = pMsg->msgType; - pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); - pHead->release = REQUEST_RELEASE_HANDLE(pCliMsg) ? 1 : 0; - memcpy(pHead->user, pTransInst->user, strlen(pTransInst->user)); - pHead->traceId = pMsg->info.traceId; - pHead->magicNum = htonl(TRANS_MAGIC_NUM); - } - pHead->timestamp = taosHton64(taosGetTimestampUs()); - - if (pHead->comp == 0) { - if (pTransInst->compressSize != -1 && pTransInst->compressSize < pMsg->contLen) { - msgLen = transCompressMsg(pMsg->pCont, pMsg->contLen) + sizeof(STransMsgHead); - pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); - } - } else { - msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); - } - - wb[i++] = uv_buf_init((char*)pHead, msgLen); - } - - pBatch->wLen = 0; - uv_write_t* req = taosMemCalloc(1, sizeof(uv_write_t)); - req->data = pConn; - uv_write(req, (uv_stream_t*)conn->stream, wb, wLen, cliSendBatchCb); - taosMemoryFree(wb); + cliSendBatch(conn); } static void cliSendBatchCb(uv_write_t* req, int status) { SCliConn* conn = req->data; @@ -1075,29 +1079,34 @@ static void cliSendBatchCb(uv_write_t* req, int status) { static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; + tError("conn %p free twice", pConn); + if (pConn->pBatch == NULL) { + SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); - SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); + STraceId* trace = &pMsg->msg.info.traceId; + tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); - STraceId* trace = &pMsg->msg.info.traceId; - tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); - - if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && - (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { - SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip)); - int64_t cTimestamp = taosGetTimestampMs(); - if (item != NULL) { - int32_t elapse = cTimestamp - item->timestamp; - if (elapse >= 0 && elapse <= pTransInst->failFastInterval) { - item->count++; + if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && + (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { + SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip)); + int64_t cTimestamp = taosGetTimestampMs(); + if (item != NULL) { + int32_t elapse = cTimestamp - item->timestamp; + if (elapse >= 0 && elapse <= pTransInst->failFastInterval) { + item->count++; + } else { + item->count = 1; + item->timestamp = cTimestamp; + } } else { - item->count = 1; - item->timestamp = cTimestamp; + SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; + taosHashPut(pThrd->failFastCache, pConn->ip, strlen(pConn->ip), &item, sizeof(SFailFastItem)); } - } else { - SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; - taosHashPut(pThrd->failFastCache, pConn->ip, strlen(pConn->ip), &item, sizeof(SFailFastItem)); } + } else { + cliDestroyBatch(pConn->pBatch); + pConn->pBatch = NULL; } cliHandleExcept(pConn); } @@ -1117,7 +1126,11 @@ void cliConnCb(uv_connect_t* req, int status) { } if (status != 0) { - if (timeout == false) cliHandleFastFail(pConn, status); + if (timeout == false) { + cliHandleFastFail(pConn, status); + } else if (timeout == true) { + // already deal by timeout + } return; } @@ -1135,8 +1148,11 @@ void cliConnCb(uv_connect_t* req, int status) { transSockInfo2Str(&sockname, pConn->src); tTrace("%s conn %p connect to server successfully", CONN_GET_INST_LABEL(pConn), pConn); - - cliSend(pConn); + if (pConn->pBatch != NULL) { + cliSendBatch(pConn); + } else { + cliSend(pConn); + } } static void cliHandleQuit(SCliMsg* pMsg, SCliThrd* pThrd) { @@ -1403,11 +1419,11 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { tGTrace("%s conn %p ready", pTransInst->label, conn); } -static void cliNoBatchDealReq(queue wq, SCliThrd* pThrd) { +static void cliNoBatchDealReq(queue* wq, SCliThrd* pThrd) { int count = 0; - while (!QUEUE_IS_EMPTY(&wq)) { - queue* h = QUEUE_HEAD(&wq); + while (!QUEUE_IS_EMPTY(wq)) { + queue* h = QUEUE_HEAD(wq); QUEUE_REMOVE(h); SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); @@ -1420,10 +1436,10 @@ static void cliNoBatchDealReq(queue wq, SCliThrd* pThrd) { } } -static void cliHandleBatch() static void cliBatchDealReq(queue wq, SCliThrd* pThrd) { +static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { int count = 0; - while (!QUEUE_IS_EMPTY(&wq)) { - queue* h = QUEUE_HEAD(&wq); + while (!QUEUE_IS_EMPTY(wq)) { + queue* h = QUEUE_HEAD(wq); QUEUE_REMOVE(h); SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); @@ -1435,8 +1451,8 @@ static void cliHandleBatch() static void cliBatchDealReq(queue wq, SCliThrd* pTh char key[TSDB_FQDN_LEN + 64] = {0}; CONN_CONSTRUCT_HASH_KEY(key, ip, port); - SCliBatch *ppBatch = taosHashGet(pThrd->batchCache, key, sizeof(key))); - if (*ppBatch == NULL) { + SCliBatch** ppBatch = taosHashGet(pThrd->batchCache, key, sizeof(key)); + if (ppBatch == NULL || *ppBatch == NULL) { SCliBatch* pBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); QUEUE_INIT(&pBatch->wq); QUEUE_PUSH(&pBatch->wq, h); @@ -1450,20 +1466,21 @@ static void cliHandleBatch() static void cliBatchDealReq(queue wq, SCliThrd* pTh taosHashPut(pThrd->batchCache, key, sizeof(key), &pBatch, sizeof(void*)); } else { QUEUE_PUSH(&(*ppBatch)->wq, h); - (*pBatch)->wLen += 1; - (*pBatch)->batchSize += pMsg->msg.contLen; + (*ppBatch)->wLen += 1; + (*ppBatch)->batchSize += pMsg->msg.contLen; } + return; } (*cliAsyncHandle[pMsg->type])(pMsg, pThrd); count++; } - void** pIter = taoskHashIterate(pThrd->batchCache, NULL); + void** pIter = taosHashIterate(pThrd->batchCache, NULL); while (pIter != NULL) { SCliBatch* batch = (SCliBatch*)(*pIter); - cliSendBatch(batch, pThrd); - pIter = (void**)taosHashIterate(info, pIter); + cliHandleBatchReq(batch, pThrd); + pIter = (void**)taosHashIterate(pThrd->batchCache, pIter); } if (count >= 2) { @@ -1483,11 +1500,11 @@ static void cliAsyncCb(uv_async_t* handle) { QUEUE_MOVE(&item->qmsg, &wq); taosThreadMutexUnlock(&item->mtx); - int8_t supportBatch = pTransInst->supprtBatch; + int8_t supportBatch = pTransInst->supportBatch; if (supportBatch == 0) { - cliNotBatchDealReq(wq, pThrd); + cliNoBatchDealReq(&wq, pThrd); } else if (supportBatch == 1) { - cliBatchDealReq(wq, pThrd); + cliBatchDealReq(&wq, pThrd); } if (pThrd->stopMsg != NULL) cliHandleQuit(pThrd->stopMsg, pThrd); @@ -1704,7 +1721,7 @@ static SCliThrd* createThrdObj(void* trans) { pThrd->connLimitCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, pTransInst->connLimitLock == 0 ? HASH_NO_LOCK : HASH_ENTRY_LOCK); - pThrd->batchCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, hash_no_lock); + pThrd->batchCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); pThrd->quit = false; return pThrd; From ee28ab3bf7271aee41c634d9ab44714c276df8fe Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 18:06:12 +0800 Subject: [PATCH 16/35] enh: batch send --- source/libs/transport/src/transCli.c | 29 +++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 54203e8d7c..47845b6336 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -837,10 +837,9 @@ void cliSendBatch(SCliConn* pConn) { uv_buf_t* wb = taosMemoryCalloc(wLen, sizeof(uv_buf_t)); int i = 0; - while (!QUEUE_IS_EMPTY(&pBatch->wq)) { - queue* h = QUEUE_HEAD(&pBatch->wq); + queue* h = NULL; + QUEUE_FOREACH(h, &pBatch->wq) { SCliMsg* pCliMsg = QUEUE_DATA(h, SCliMsg, q); - QUEUE_REMOVE(&pCliMsg->q); STransConnCtx* pCtx = pCliMsg->ctx; @@ -878,7 +877,6 @@ void cliSendBatch(SCliConn* pConn) { wb[i++] = uv_buf_init((char*)pHead, msgLen); } - pBatch->wLen = 0; uv_write_t* req = taosMemoryCalloc(1, sizeof(uv_write_t)); req->data = pConn; uv_write(req, (uv_stream_t*)pConn->stream, wb, wLen, cliSendBatchCb); @@ -970,7 +968,13 @@ _RETURN: static SCliBatch* cliDumpBatch(SCliBatch* pBatch) { SCliBatch* pNewBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); - memcpy(pNewBatch->wq, pBatch->wq, sizeof(pBatch->wq)); + + QUEUE_INIT(&pNewBatch->wq); + while (!QUEUE_IS_EMPTY(&pBatch->wq)) { + queue* h = QUEUE_HEAD(&pBatch->wq); + QUEUE_REMOVE(h); + QUEUE_PUSH(&pNewBatch->wq, h); + } pNewBatch->batchSize = pBatch->batchSize; pNewBatch->batch = pBatch->batch; @@ -1027,7 +1031,7 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { addr.sin_addr.s_addr = ipaddr; addr.sin_port = (uint16_t)htons(pBatch->port); - tTrace("%s conn %p try to connect to %s", pTransInst->label, conn, pBatch->ip); + tTrace("%s conn %p try to connect to %s", pTransInst->label, conn, pBatch->dst); int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); if (fd == -1) { tError("%s conn %p failed to create socket, reason:%s", transLabel(pTransInst), conn, @@ -1079,7 +1083,7 @@ static void cliSendBatchCb(uv_write_t* req, int status) { static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - tError("conn %p free twice", pConn); + tError("conn %p free twice, reason:%s", pConn, uv_err_name(status)); if (pConn->pBatch == NULL) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); @@ -1443,7 +1447,7 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { QUEUE_REMOVE(h); SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); - if (REQUEST_NO_RESP(&pMsg->msg)) { + if (pMsg->type == Normal && REQUEST_NO_RESP(&pMsg->msg)) { STransConnCtx* pCtx = pMsg->ctx; char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); @@ -1469,7 +1473,7 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { (*ppBatch)->wLen += 1; (*ppBatch)->batchSize += pMsg->msg.contLen; } - return; + continue; } (*cliAsyncHandle[pMsg->type])(pMsg, pThrd); count++; @@ -1751,6 +1755,13 @@ static void destroyThrdObj(SCliThrd* pThrd) { taosHashCleanup(pThrd->fqdn2ipCache); taosHashCleanup(pThrd->failFastCache); taosHashCleanup(pThrd->connLimitCache); + + void** pIter = taosHashIterate(pThrd->batchCache, NULL); + while (pIter != NULL) { + SCliBatch* batch = (SCliBatch*)(*pIter); + cliDestroyBatch(batch); + pIter = (void**)taosHashIterate(pThrd->batchCache, pIter); + } taosHashCleanup(pThrd->batchCache); taosMemoryFree(pThrd); } From c20fb3878da9ecc09e46b58382f01897fe4eb131 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 19:07:49 +0800 Subject: [PATCH 17/35] fix crash --- source/libs/transport/src/transCli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 47845b6336..9324e7b96f 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1014,9 +1014,11 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { if (conn == NULL) { conn = cliCreateConn(pThrd); conn->pBatch = pNewBatch; - conn->ip = strdup(conn->pBatch->ip); + conn->ip = strdup(pNewBatch->dst); - uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, conn->ip); + char* ip = pNewBatch->ip; + uint16_t port = pNewBatch->port; + uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, ip); if (ipaddr == 0xffffffff) { uv_timer_stop(conn->timer); conn->timer->data = NULL; @@ -1029,7 +1031,7 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = ipaddr; - addr.sin_port = (uint16_t)htons(pBatch->port); + addr.sin_port = (uint16_t)htons(port); tTrace("%s conn %p try to connect to %s", pTransInst->label, conn, pBatch->dst); int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); From 23e1cb8fd5fc939a8564382a751a699dc1140b92 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 19:17:39 +0800 Subject: [PATCH 18/35] fix mem leak --- source/libs/transport/src/transCli.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 9324e7b96f..316b312095 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1072,6 +1072,8 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { } static void cliSendBatchCb(uv_write_t* req, int status) { SCliConn* conn = req->data; + taosMemoryFree(req); + SCliThrd* thrd = conn->hostThrd; cliDestroyBatch(conn->pBatch); conn->pBatch = NULL; From 666cfd870103b88fe36e64271db66c3768df3700 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 19:29:06 +0800 Subject: [PATCH 19/35] add debug info --- source/libs/transport/src/transCli.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 316b312095..991d4e85c5 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -879,6 +879,8 @@ void cliSendBatch(SCliConn* pConn) { uv_write_t* req = taosMemoryCalloc(1, sizeof(uv_write_t)); req->data = pConn; + tDebug("%p conn %p start to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(pConn), pConn, + pBatch->wLen, pBatch->batchSize); uv_write(req, (uv_stream_t*)pConn->stream, wb, wLen, cliSendBatchCb); taosMemoryFree(wb); } @@ -1074,6 +1076,9 @@ static void cliSendBatchCb(uv_write_t* req, int status) { SCliConn* conn = req->data; taosMemoryFree(req); + tDebug("%p conn %p send batch msg out, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, conn->pBatch->wLen, + conn->pBatch->batchSize); + SCliThrd* thrd = conn->hostThrd; cliDestroyBatch(conn->pBatch); conn->pBatch = NULL; From 1f643f301df09590a3e941c681b3376a804dd9ed Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 19:31:40 +0800 Subject: [PATCH 20/35] change default value --- source/common/src/tglobal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ce4f2aa334..7853458a63 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -388,7 +388,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, 0) != 0) return -1; tsNumOfRpcThreads = tsNumOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, TSDB_MAX_RPC_THREADS); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, 4); if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; tsNumOfCommitThreads = tsNumOfCores / 2; From 60e148d52460c93b04127788601209cdd283de2d Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 19:31:46 +0800 Subject: [PATCH 21/35] change default value --- source/common/src/tglobal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 7853458a63..37e356da01 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -496,7 +496,7 @@ static int32_t taosUpdateServerCfg(SConfig *pCfg) { pItem = cfgGetItem(tsCfg, "numOfRpcThreads"); if (pItem != NULL && pItem->stype == CFG_STYPE_DEFAULT) { tsNumOfRpcThreads = numOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, TSDB_MAX_RPC_THREADS); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, 4); pItem->i32 = tsNumOfRpcThreads; pItem->stype = stype; } From 2220b7e9436cf40f51b8ba9aa925527947a104ce Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 20:26:41 +0800 Subject: [PATCH 22/35] update benchmark case --- source/libs/transport/test/cliBench.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/source/libs/transport/test/cliBench.c b/source/libs/transport/test/cliBench.c index 01e88b9988..8db1001995 100644 --- a/source/libs/transport/test/cliBench.c +++ b/source/libs/transport/test/cliBench.c @@ -32,22 +32,21 @@ typedef struct { void *pRpc; } SInfo; - void initLogEnv() { - const char *logDir = "/tmp/trans_cli"; - const char* defaultLogFileNamePrefix = "taoslog"; + const char *logDir = "/tmp/trans_cli"; + const char *defaultLogFileNamePrefix = "taoslog"; const int32_t maxLogFileNum = 10000; tsAsyncLog = 0; - //idxDebugFlag = 143; + // idxDebugFlag = 143; strcpy(tsLogDir, (char *)logDir); taosRemoveDir(tsLogDir); - taosMkDir(tsLogDir); - + taosMkDir(tsLogDir); + if (taosInitLog(defaultLogFileNamePrefix, maxLogFileNum) < 0) { - printf("failed to open log file in directory:%s\n", tsLogDir); + printf("failed to open log file in directory:%s\n", tsLogDir); } } - + static void processResponse(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) { SInfo *pInfo = (SInfo *)pMsg->info.ahandle; tDebug("thread:%d, response is received, type:%d contLen:%d code:0x%x", pInfo->index, pMsg->msgType, pMsg->contLen, @@ -72,11 +71,12 @@ static void *sendRequest(void *param) { rpcMsg.pCont = rpcMallocCont(pInfo->msgSize); rpcMsg.contLen = pInfo->msgSize; rpcMsg.info.ahandle = pInfo; + rpcMsg.info.noResp = 1; rpcMsg.msgType = 1; tDebug("thread:%d, send request, contLen:%d num:%d", pInfo->index, pInfo->msgSize, pInfo->num); rpcSendRequest(pInfo->pRpc, &pInfo->epSet, &rpcMsg, NULL); if (pInfo->num % 20000 == 0) tInfo("thread:%d, %d requests have been sent", pInfo->index, pInfo->num); - tsem_wait(&pInfo->rspSem); + // tsem_wait(&pInfo->rspSem); } tDebug("thread:%d, it is over", pInfo->index); @@ -112,7 +112,11 @@ int main(int argc, char *argv[]) { rpcInit.sessions = 100; rpcInit.idleTime = tsShellActivityTimer * 1000; rpcInit.user = "michael"; + rpcInit.connType = TAOS_CONN_CLIENT; + rpcInit.connLimitNum = 300; + rpcInit.connLimitLock = 1; + rpcInit.supportBatch = 0; rpcDebugFlag = 135; for (int i = 1; i < argc; ++i) { @@ -148,7 +152,6 @@ int main(int argc, char *argv[]) { exit(0); } } - initLogEnv(); From f975d8e0d921339f83a9bc74c5db11716811c357 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 16 Feb 2023 21:58:32 +0800 Subject: [PATCH 23/35] change default value --- include/util/tdef.h | 6 +++--- source/common/src/tglobal.c | 4 ++-- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- source/libs/transport/test/cliBench.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index e03352d98b..aeb8d08936 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -281,8 +281,8 @@ typedef enum ELogicConditionType { #define TSDB_DNODE_ROLE_MGMT 1 #define TSDB_DNODE_ROLE_VNODE 2 -#define TSDB_MAX_REPLICA 5 -#define TSDB_SYNC_LOG_BUFFER_SIZE 4096 +#define TSDB_MAX_REPLICA 5 +#define TSDB_SYNC_LOG_BUFFER_SIZE 4096 #define TSDB_SYNC_LOG_BUFFER_RETENTION (TSDB_SYNC_LOG_BUFFER_SIZE >> 4) #define TSDB_TBNAME_COLUMN_INDEX (-1) @@ -413,7 +413,7 @@ typedef enum ELogicConditionType { #ifdef WINDOWS #define TSDB_MAX_RPC_THREADS 4 // windows pipe only support 4 connections. #else -#define TSDB_MAX_RPC_THREADS 20 +#define TSDB_MAX_RPC_THREADS 10 #endif #define TSDB_QUERY_TYPE_NON_TYPE 0x00u // none type diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 37e356da01..e0b89aa32b 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -388,7 +388,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, 0) != 0) return -1; tsNumOfRpcThreads = tsNumOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, 4); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, TSDB_MAX_RPC_THREADS); if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; tsNumOfCommitThreads = tsNumOfCores / 2; @@ -496,7 +496,7 @@ static int32_t taosUpdateServerCfg(SConfig *pCfg) { pItem = cfgGetItem(tsCfg, "numOfRpcThreads"); if (pItem != NULL && pItem->stype == CFG_STYPE_DEFAULT) { tsNumOfRpcThreads = numOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, 4); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, TSDB_MAX_RPC_THREADS); pItem->i32 = tsNumOfRpcThreads; pItem->stype = stype; } diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 8751b575f3..1ddab769d8 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -286,7 +286,7 @@ int32_t dmInitClient(SDnode *pDnode) { int32_t connLimitNum = 30000 / (tsNumOfRpcThreads * 3); connLimitNum = TMAX(connLimitNum, 500); - connLimitNum = TMIN(connLimitNum, 3000); + connLimitNum = TMIN(connLimitNum, 1000); rpcInit.connLimitNum = connLimitNum; rpcInit.connLimitLock = 1; diff --git a/source/libs/transport/test/cliBench.c b/source/libs/transport/test/cliBench.c index 8db1001995..5901a71929 100644 --- a/source/libs/transport/test/cliBench.c +++ b/source/libs/transport/test/cliBench.c @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) { rpcInit.connType = TAOS_CONN_CLIENT; rpcInit.connLimitNum = 300; rpcInit.connLimitLock = 1; - rpcInit.supportBatch = 0; + rpcInit.supportBatch = 1; rpcDebugFlag = 135; for (int i = 1; i < argc; ++i) { From 27e6a0c344c608fbe4473cd2d75c0c61e32d80eb Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 08:45:10 +0800 Subject: [PATCH 24/35] change default value --- source/libs/transport/src/transCli.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 991d4e85c5..9cd9b4f7b5 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1697,7 +1697,11 @@ static SCliThrd* createThrdObj(void* trans) { taosMemoryFree(pThrd); return NULL; } - pThrd->asyncPool = transAsyncPoolCreate(pThrd->loop, 8, pThrd, cliAsyncCb); + if (pTransInst->supportBatch) { + pThrd->asyncPool = transAsyncPoolCreate(pThrd->loop, 4, pThrd, cliAsyncCb); + } else { + pThrd->asyncPool = transAsyncPoolCreate(pThrd->loop, 8, pThrd, cliAsyncCb); + } if (pThrd->asyncPool == NULL) { tError("failed to init async pool"); uv_loop_close(pThrd->loop); From 76e5213a233dfe68e527d9300ed43a9dd302f8c9 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 12:01:12 +0800 Subject: [PATCH 25/35] opt queue --- source/libs/transport/inc/transComm.h | 4 ++-- source/libs/transport/src/transCli.c | 16 ++-------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 5f964f6b1a..a41cc0068c 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -94,8 +94,8 @@ typedef void* queue[2]; /* Return the structure holding the given element. */ #define QUEUE_DATA(e, type, field) ((type*)((void*)((char*)(e)-offsetof(type, field)))) -//#define TRANS_RETRY_COUNT_LIMIT 100 // retry count limit -//#define TRANS_RETRY_INTERVAL 15 // retry interval (ms) +// #define TRANS_RETRY_COUNT_LIMIT 100 // retry count limit +// #define TRANS_RETRY_INTERVAL 15 // retry interval (ms) #define TRANS_CONN_TIMEOUT 3000 // connect timeout (ms) #define TRANS_READ_TIMEOUT 3000 // read timeout (ms) #define TRANS_PACKET_LIMIT 1024 * 1024 * 512 diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 9cd9b4f7b5..fbdc17040f 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -864,15 +864,7 @@ void cliSendBatch(SCliConn* pConn) { pHead->magicNum = htonl(TRANS_MAGIC_NUM); } pHead->timestamp = taosHton64(taosGetTimestampUs()); - - if (pHead->comp == 0) { - if (pTransInst->compressSize != -1 && pTransInst->compressSize < pMsg->contLen) { - msgLen = transCompressMsg(pMsg->pCont, pMsg->contLen) + sizeof(STransMsgHead); - pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); - } - } else { - msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); - } + msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); wb[i++] = uv_buf_init((char*)pHead, msgLen); } @@ -972,11 +964,7 @@ static SCliBatch* cliDumpBatch(SCliBatch* pBatch) { SCliBatch* pNewBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); QUEUE_INIT(&pNewBatch->wq); - while (!QUEUE_IS_EMPTY(&pBatch->wq)) { - queue* h = QUEUE_HEAD(&pBatch->wq); - QUEUE_REMOVE(h); - QUEUE_PUSH(&pNewBatch->wq, h); - } + QUEUE_MOVE(&pBatch->wq, &pNewBatch->wq); pNewBatch->batchSize = pBatch->batchSize; pNewBatch->batch = pBatch->batch; From a75e170151fc914938bbc262bdb011109e33be44 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 12:07:25 +0800 Subject: [PATCH 26/35] fix err code --- source/libs/transport/src/transCli.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index fbdc17040f..8309a41abd 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1080,7 +1080,9 @@ static void cliSendBatchCb(uv_write_t* req, int status) { static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - tError("conn %p free twice, reason:%s", pConn, uv_err_name(status)); + + if (status == -1) status = ENETUNREACH; + if (pConn->pBatch == NULL) { SCliMsg* pMsg = transQueueGet(&pConn->cliMsgs, 0); @@ -1106,6 +1108,8 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { } } } else { + tError("%s batch msg failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), + pConn, pConn->ip, uv_strerror(status)); cliDestroyBatch(pConn->pBatch); pConn->pBatch = NULL; } From 3de71e39b421ff126a47be7cffe685a490de4a20 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 13:27:36 +0800 Subject: [PATCH 27/35] opt code --- source/libs/transport/src/transCli.c | 34 ++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 8309a41abd..473467d753 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -164,6 +164,8 @@ static void cliSend(SCliConn* pConn); static void cliSendBatch(SCliConn* pConn); static void cliDestroyConnMsgs(SCliConn* conn, bool destroy); +static int32_t cliPreCheckSessionLimit(SCliThrd* pThrd, char* ip, uint16_t port); + // cli util func static FORCE_INLINE bool cliIsEpsetUpdated(int32_t code, STransConnCtx* pCtx); static FORCE_INLINE void cliMayCvtFqdnToIp(SEpSet* pEpSet, SCvtAddr* pCvtAddr); @@ -1000,7 +1002,14 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { SCliBatch* pNewBatch = cliDumpBatch(pBatch); - SCliConn* conn = getConnFromPool(pThrd->pool, pBatch->ip, pBatch->port); + SCliConn* conn = getConnFromPool(pThrd->pool, pNewBatch->ip, pNewBatch->port); + + if (conn == NULL && 0 != cliPreCheckSessionLimit(pThrd, pNewBatch->ip, pNewBatch->port)) { + tError("%s failed to send batch msg, batch size:%d, msgLen: %d", pTransInst->label, pNewBatch->wLen, + pNewBatch->batchSize); + cliDestroyBatch(pNewBatch); + return; + } if (conn == NULL) { conn = cliCreateConn(pThrd); conn->pBatch = pNewBatch; @@ -1064,16 +1073,17 @@ static void cliSendBatchCb(uv_write_t* req, int status) { SCliConn* conn = req->data; taosMemoryFree(req); - tDebug("%p conn %p send batch msg out, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, conn->pBatch->wLen, - conn->pBatch->batchSize); - SCliThrd* thrd = conn->hostThrd; cliDestroyBatch(conn->pBatch); conn->pBatch = NULL; if (status != 0) { + tDebug("%p conn %p failed to send batch msg, batch size:%d, msgLen:%d, reason:%s", CONN_GET_INST_LABEL(conn), conn, + conn->pBatch->wLen, conn->pBatch->batchSize, uv_err_name(status)); cliHandleExcept(conn); } else { + tDebug("%p conn %p succ to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, + conn->pBatch->wLen, conn->pBatch->batchSize); addConnToPool(thrd->pool, conn); } } @@ -1282,12 +1292,12 @@ static FORCE_INLINE void cliUpdateFqdnCache(SHashObj* cache, char* fqdn) { return; } -static int32_t cliPreCheckSessionLimit(SCliThrd* pThrd, SCliMsg* pMsg) { +static int32_t cliPreCheckSessionLimit(SCliThrd* pThrd, char* ip, uint16_t port) { STrans* pTransInst = pThrd->pTransInst; - STransConnCtx* pCtx = pMsg->ctx; - char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); - int32_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); + // STransConnCtx* pCtx = pMsg->ctx; + // char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); + // int32_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); char key[TSDB_FQDN_LEN + 64] = {0}; CONN_CONSTRUCT_HASH_KEY(key, ip, port); @@ -1306,6 +1316,8 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { cliMayCvtFqdnToIp(&pCtx->epSet, &pThrd->cvtAddr); STraceId* trace = &pMsg->msg.info.traceId; + char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); + uint16_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); if (!EPSET_IS_VALID(&pCtx->epSet)) { tGError("%s, msg %s sent with invalid epset", pTransInst->label, TMSG_INFO(pMsg->msg.msgType)); @@ -1314,9 +1326,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { } if (REQUEST_NO_RESP(&pMsg->msg) && (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { - char* ip = EPSET_GET_INUSE_IP(&pCtx->epSet); - uint32_t port = EPSET_GET_INUSE_PORT(&pCtx->epSet); - char key[TSDB_FQDN_LEN + 64] = {0}; + char key[TSDB_FQDN_LEN + 64] = {0}; CONN_CONSTRUCT_HASH_KEY(key, ip, port); SFailFastItem* item = taosHashGet(pThrd->failFastCache, key, strlen(key)); @@ -1344,7 +1354,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { return; } - if (conn == NULL && REQUEST_NO_RESP(&pMsg->msg) && 0 != cliPreCheckSessionLimit(pThrd, pMsg)) { + if (conn == NULL && REQUEST_NO_RESP(&pMsg->msg) && 0 != cliPreCheckSessionLimit(pThrd, ip, port)) { tGTrace("%s, msg %s cancel to send, reason: %s", pTransInst->label, TMSG_INFO(pMsg->msg.msgType), tstrerror(TSDB_CODE_RPC_MAX_SESSIONS)); destroyCmsg(pMsg); From d22e97d256debf168b6e38542a546c2cbdab71d5 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 13:56:42 +0800 Subject: [PATCH 28/35] fix invalid code --- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 6 +++--- source/libs/transport/src/transCli.c | 16 +++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 1ddab769d8..f35352268f 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -284,9 +284,9 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.failFastThreshold = 3; // failed threshold rpcInit.ffp = dmFailFastFp; - int32_t connLimitNum = 30000 / (tsNumOfRpcThreads * 3); - connLimitNum = TMAX(connLimitNum, 500); - connLimitNum = TMIN(connLimitNum, 1000); + int32_t connLimitNum = 10000 / (tsNumOfRpcThreads * 3); + connLimitNum = TMAX(connLimitNum, 100); + connLimitNum = TMIN(connLimitNum, 600); rpcInit.connLimitNum = connLimitNum; rpcInit.connLimitLock = 1; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 473467d753..e4fa91170d 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1070,22 +1070,24 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { cliSendBatch(conn); } static void cliSendBatchCb(uv_write_t* req, int status) { - SCliConn* conn = req->data; - taosMemoryFree(req); + SCliConn* conn = req->data; + SCliThrd* thrd = conn->hostThrd; + SCliBatch* p = conn->pBatch; - SCliThrd* thrd = conn->hostThrd; - cliDestroyBatch(conn->pBatch); conn->pBatch = NULL; if (status != 0) { tDebug("%p conn %p failed to send batch msg, batch size:%d, msgLen:%d, reason:%s", CONN_GET_INST_LABEL(conn), conn, - conn->pBatch->wLen, conn->pBatch->batchSize, uv_err_name(status)); + p->wLen, p->batchSize, uv_err_name(status)); cliHandleExcept(conn); } else { - tDebug("%p conn %p succ to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, - conn->pBatch->wLen, conn->pBatch->batchSize); + tDebug("%p conn %p succ to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, p->wLen, + p->batchSize); addConnToPool(thrd->pool, conn); } + + cliDestroyBatch(p); + taosMemoryFree(req); } static void cliHandleFastFail(SCliConn* pConn, int status) { SCliThrd* pThrd = pConn->hostThrd; From 597d7d3d9b4f8e2676e2ba7adafbbee4333854a3 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 20:47:38 +0800 Subject: [PATCH 29/35] opt transport --- source/libs/transport/src/transCli.c | 194 ++++++++++++++++++--------- 1 file changed, 133 insertions(+), 61 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index e4fa91170d..3bd764ff8d 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -21,14 +21,28 @@ typedef struct SConnList { } SConnList; typedef struct { - queue wq; - int32_t wLen; - int32_t batchSize; // - int32_t batch; + queue wq; + int32_t len; + + int connMax; + int connCnt; + int batchLenLimit; + char* dst; char* ip; uint16_t port; + +} SCliBatchList; + +typedef struct { + queue wq; + queue listq; + int32_t wLen; + int32_t batchSize; // + int32_t batch; + SCliBatchList* pList; } SCliBatch; + typedef struct SCliConn { T_REF_DECLARE() uv_connect_t connReq; @@ -866,14 +880,21 @@ void cliSendBatch(SCliConn* pConn) { pHead->magicNum = htonl(TRANS_MAGIC_NUM); } pHead->timestamp = taosHton64(taosGetTimestampUs()); - msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); + if (pHead->comp == 0) { + if (pTransInst->compressSize != -1 && pTransInst->compressSize < pMsg->contLen) { + msgLen = transCompressMsg(pMsg->pCont, pMsg->contLen) + sizeof(STransMsgHead); + pHead->msgLen = (int32_t)htonl((uint32_t)msgLen); + } + } else { + msgLen = (int32_t)ntohl((uint32_t)(pHead->msgLen)); + } wb[i++] = uv_buf_init((char*)pHead, msgLen); } uv_write_t* req = taosMemoryCalloc(1, sizeof(uv_write_t)); req->data = pConn; - tDebug("%p conn %p start to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(pConn), pConn, + tDebug("%s conn %p start to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(pConn), pConn, pBatch->wLen, pBatch->batchSize); uv_write(req, (uv_stream_t*)pConn->stream, wb, wLen, cliSendBatchCb); taosMemoryFree(wb); @@ -962,62 +983,37 @@ _RETURN: return; } -static SCliBatch* cliDumpBatch(SCliBatch* pBatch) { - SCliBatch* pNewBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); - - QUEUE_INIT(&pNewBatch->wq); - QUEUE_MOVE(&pBatch->wq, &pNewBatch->wq); - - pNewBatch->batchSize = pBatch->batchSize; - pNewBatch->batch = pBatch->batch; - pNewBatch->wLen = pBatch->wLen; - pNewBatch->dst = strdup(pBatch->dst); - pNewBatch->ip = strdup(pBatch->ip); - pNewBatch->port = pBatch->port; - - QUEUE_INIT(&pBatch->wq); - pBatch->batchSize = 0; - pBatch->batch = 0; - pBatch->wLen = 0; - - return pNewBatch; -} static void cliDestroyBatch(SCliBatch* pBatch) { while (!QUEUE_IS_EMPTY(&pBatch->wq)) { - queue* h = QUEUE_HEAD(&pBatch->wq); - SCliMsg* p = QUEUE_DATA(h, SCliMsg, q); + queue* h = QUEUE_HEAD(&pBatch->wq); + QUEUE_REMOVE(h); - QUEUE_REMOVE(&p->q); + SCliMsg* p = QUEUE_DATA(h, SCliMsg, q); destroyCmsg(p); } - taosMemoryFree(pBatch->ip); - taosMemoryFree(pBatch->dst); taosMemoryFree(pBatch); } static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { if (pBatch->wLen == 0 || QUEUE_IS_EMPTY(&pBatch->wq)) { return; } - STrans* pTransInst = pThrd->pTransInst; + STrans* pTransInst = pThrd->pTransInst; + SCliBatchList* pList = pBatch->pList; - SCliBatch* pNewBatch = cliDumpBatch(pBatch); + SCliConn* conn = getConnFromPool(pThrd->pool, pList->ip, pList->port); - SCliConn* conn = getConnFromPool(pThrd->pool, pNewBatch->ip, pNewBatch->port); - - if (conn == NULL && 0 != cliPreCheckSessionLimit(pThrd, pNewBatch->ip, pNewBatch->port)) { - tError("%s failed to send batch msg, batch size:%d, msgLen: %d", pTransInst->label, pNewBatch->wLen, - pNewBatch->batchSize); - cliDestroyBatch(pNewBatch); + if (conn == NULL && 0 != cliPreCheckSessionLimit(pThrd, pList->ip, pList->port)) { + tError("%s failed to send batch msg, batch size:%d, msgLen: %d", pTransInst->label, pBatch->wLen, + pBatch->batchSize); + cliDestroyBatch(pBatch); return; } if (conn == NULL) { conn = cliCreateConn(pThrd); - conn->pBatch = pNewBatch; - conn->ip = strdup(pNewBatch->dst); + conn->pBatch = pBatch; + conn->ip = strdup(pList->dst); - char* ip = pNewBatch->ip; - uint16_t port = pNewBatch->port; - uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, ip); + uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, pList->ip); if (ipaddr == 0xffffffff) { uv_timer_stop(conn->timer); conn->timer->data = NULL; @@ -1030,9 +1026,9 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = ipaddr; - addr.sin_port = (uint16_t)htons(port); + addr.sin_port = (uint16_t)htons(pList->port); - tTrace("%s conn %p try to connect to %s", pTransInst->label, conn, pBatch->dst); + tTrace("%s conn %p try to connect to %s", pTransInst->label, conn, pList->dst); int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); if (fd == -1) { tError("%s conn %p failed to create socket, reason:%s", transLabel(pTransInst), conn, @@ -1066,7 +1062,7 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { return; } - conn->pBatch = pNewBatch; + conn->pBatch = pBatch; cliSendBatch(conn); } static void cliSendBatchCb(uv_write_t* req, int status) { @@ -1074,15 +1070,33 @@ static void cliSendBatchCb(uv_write_t* req, int status) { SCliThrd* thrd = conn->hostThrd; SCliBatch* p = conn->pBatch; + SCliBatchList* pBatchList = p->pList; + + int32_t empty = QUEUE_IS_EMPTY(&pBatchList->wq); + pBatchList->connCnt -= 1; + conn->pBatch = NULL; if (status != 0) { - tDebug("%p conn %p failed to send batch msg, batch size:%d, msgLen:%d, reason:%s", CONN_GET_INST_LABEL(conn), conn, + tDebug("%s conn %p failed to send batch msg, batch size:%d, msgLen:%d, reason:%s", CONN_GET_INST_LABEL(conn), conn, p->wLen, p->batchSize, uv_err_name(status)); cliHandleExcept(conn); + } else { - tDebug("%p conn %p succ to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, p->wLen, + tDebug("%s conn %p succ to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, p->wLen, p->batchSize); + + if (empty == false) { + queue* h = QUEUE_HEAD(&pBatchList->wq); + QUEUE_REMOVE(h); + conn->pBatch = QUEUE_DATA(h, SCliBatch, listq); + + pBatchList->connCnt += 1; + pBatchList->len -= 1; + cliSendBatch(conn); + return; + } + addConnToPool(thrd->pool, conn); } @@ -1468,23 +1482,65 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { char key[TSDB_FQDN_LEN + 64] = {0}; CONN_CONSTRUCT_HASH_KEY(key, ip, port); - SCliBatch** ppBatch = taosHashGet(pThrd->batchCache, key, sizeof(key)); - if (ppBatch == NULL || *ppBatch == NULL) { + // SCliBatch** ppBatch = taosHashGet(pThrd->batchCache, key, sizeof(key)); + SCliBatchList** ppBatchList = taosHashGet(pThrd->batchCache, key, sizeof(key)); + if (ppBatchList == NULL || *ppBatchList == NULL) { + SCliBatchList* pBatchList = taosMemoryCalloc(1, sizeof(SCliBatchList)); + QUEUE_INIT(&pBatchList->wq); + pBatchList->connMax = 200; + pBatchList->connCnt = 0; + pBatchList->batchLenLimit = 16 * 1024; + pBatchList->ip = strdup(ip); + pBatchList->dst = strdup(key); + pBatchList->port = port; + SCliBatch* pBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); QUEUE_INIT(&pBatch->wq); + QUEUE_INIT(&pBatch->listq); + QUEUE_PUSH(&pBatch->wq, h); pBatch->wLen += 1; pBatch->batchSize += pMsg->msg.contLen; + pBatch->pList = pBatchList; - pBatch->dst = strdup(key); - pBatch->ip = strdup(ip); - pBatch->port = (uint16_t)port; + QUEUE_PUSH(&pBatchList->wq, &pBatch->listq); - taosHashPut(pThrd->batchCache, key, sizeof(key), &pBatch, sizeof(void*)); + taosHashPut(pThrd->batchCache, key, sizeof(key), &pBatchList, sizeof(void*)); } else { - QUEUE_PUSH(&(*ppBatch)->wq, h); - (*ppBatch)->wLen += 1; - (*ppBatch)->batchSize += pMsg->msg.contLen; + if (QUEUE_IS_EMPTY(&(*ppBatchList)->wq)) { + SCliBatch* pBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); + QUEUE_INIT(&pBatch->wq); + QUEUE_INIT(&pBatch->listq); + + QUEUE_PUSH(&pBatch->wq, h); + pBatch->wLen += 1; + pBatch->batchSize = pMsg->msg.contLen; + pBatch->pList = *ppBatchList; + + QUEUE_PUSH(&((*ppBatchList)->wq), &pBatch->listq); + (*ppBatchList)->len += 1; + + continue; + } + + queue* hdr = QUEUE_TAIL(&((*ppBatchList)->wq)); + SCliBatch* pBatch = QUEUE_DATA(hdr, SCliBatch, listq); + if ((pBatch->batchSize + pMsg->msg.contLen) < (*ppBatchList)->batchLenLimit) { + QUEUE_PUSH(&pBatch->wq, h); + pBatch->batchSize += pMsg->msg.contLen; + } else { + SCliBatch* pBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); + QUEUE_INIT(&pBatch->wq); + QUEUE_INIT(&pBatch->listq); + + QUEUE_PUSH(&pBatch->wq, h); + pBatch->wLen += 1; + pBatch->batchSize += pMsg->msg.contLen; + pBatch->pList = *ppBatchList; + + QUEUE_PUSH(&((*ppBatchList)->wq), &pBatch->listq); + (*ppBatchList)->len += 1; + } } continue; } @@ -1494,7 +1550,16 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { void** pIter = taosHashIterate(pThrd->batchCache, NULL); while (pIter != NULL) { - SCliBatch* batch = (SCliBatch*)(*pIter); + SCliBatchList* batchList = (SCliBatchList*)(*pIter); + if (QUEUE_IS_EMPTY(&batchList->wq) || batchList->connCnt >= batchList->connMax) { + continue; + } + queue* hr = QUEUE_HEAD(&batchList->wq); + QUEUE_REMOVE(hr); + + batchList->len -= 1; + + SCliBatch* batch = QUEUE_DATA(hr, SCliBatch, listq); cliHandleBatchReq(batch, pThrd); pIter = (void**)taosHashIterate(pThrd->batchCache, pIter); @@ -1775,8 +1840,15 @@ static void destroyThrdObj(SCliThrd* pThrd) { void** pIter = taosHashIterate(pThrd->batchCache, NULL); while (pIter != NULL) { - SCliBatch* batch = (SCliBatch*)(*pIter); - cliDestroyBatch(batch); + SCliBatchList* pBatchList = (SCliBatchList*)(*pIter); + while (!QUEUE_IS_EMPTY(&pBatchList->wq)) { + queue* h = QUEUE_HEAD(&pBatchList->wq); + QUEUE_REMOVE(h); + + SCliBatch* pBatch = QUEUE_DATA(h, SCliBatch, listq); + cliDestroyBatch(pBatch); + } + taosMemoryFree(pBatchList); pIter = (void**)taosHashIterate(pThrd->batchCache, pIter); } taosHashCleanup(pThrd->batchCache); From b894ba6f37bde7c803976fca9ee71bf7ce5397e0 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 21:59:23 +0800 Subject: [PATCH 30/35] opt trans --- source/libs/transport/src/transCli.c | 63 +++++++++++++++------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 3bd764ff8d..f106e07e37 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -159,11 +159,9 @@ static void cliIdleCb(uv_idle_t* handle); static void cliPrepareCb(uv_prepare_t* handle); static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd); -// static void cliConnBatchCb(uv_connect_t* req, int status); static void cliSendBatchCb(uv_write_t* req, int status); -// static void cliConnBatchCb(uv_connect_t* req, int status); -// callback after conn to server -// static void cliConnBatchCb(uv_connect_t* req, int status); + +SCliBatch* cliGetHeadFromList(SCliBatchList* pList); static bool cliRecvReleaseReq(SCliConn* conn, STransMsgHead* pHead); @@ -847,8 +845,11 @@ void cliSendBatch(SCliConn* pConn) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; - SCliBatch* pBatch = pConn->pBatch; - int32_t wLen = pBatch->wLen; + SCliBatch* pBatch = pConn->pBatch; + SCliBatchList* pList = pBatch->pList; + pList->connCnt += 1; + + int32_t wLen = pBatch->wLen; uv_buf_t* wb = taosMemoryCalloc(wLen, sizeof(uv_buf_t)); int i = 0; @@ -994,7 +995,7 @@ static void cliDestroyBatch(SCliBatch* pBatch) { taosMemoryFree(pBatch); } static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { - if (pBatch->wLen == 0 || QUEUE_IS_EMPTY(&pBatch->wq)) { + if (pBatch == NULL || pBatch->wLen == 0 || QUEUE_IS_EMPTY(&pBatch->wq)) { return; } STrans* pTransInst = pThrd->pTransInst; @@ -1071,8 +1072,7 @@ static void cliSendBatchCb(uv_write_t* req, int status) { SCliBatch* p = conn->pBatch; SCliBatchList* pBatchList = p->pList; - - int32_t empty = QUEUE_IS_EMPTY(&pBatchList->wq); + SCliBatch* nxtBatch = cliGetHeadFromList(pBatchList); pBatchList->connCnt -= 1; conn->pBatch = NULL; @@ -1081,23 +1081,17 @@ static void cliSendBatchCb(uv_write_t* req, int status) { tDebug("%s conn %p failed to send batch msg, batch size:%d, msgLen:%d, reason:%s", CONN_GET_INST_LABEL(conn), conn, p->wLen, p->batchSize, uv_err_name(status)); cliHandleExcept(conn); - + cliHandleBatchReq(nxtBatch, thrd); } else { tDebug("%s conn %p succ to send batch msg, batch size:%d, msgLen:%d", CONN_GET_INST_LABEL(conn), conn, p->wLen, p->batchSize); - if (empty == false) { - queue* h = QUEUE_HEAD(&pBatchList->wq); - QUEUE_REMOVE(h); - conn->pBatch = QUEUE_DATA(h, SCliBatch, listq); - - pBatchList->connCnt += 1; - pBatchList->len -= 1; + if (nxtBatch != NULL) { + conn->pBatch = nxtBatch; cliSendBatch(conn); - return; + } else { + addConnToPool(thrd->pool, conn); } - - addConnToPool(thrd->pool, conn); } cliDestroyBatch(p); @@ -1466,6 +1460,18 @@ static void cliNoBatchDealReq(queue* wq, SCliThrd* pThrd) { tTrace("cli process batch size:%d", count); } } +SCliBatch* cliGetHeadFromList(SCliBatchList* pList) { + if (QUEUE_IS_EMPTY(&pList->wq) || pList->connCnt >= pList->connMax) { + return NULL; + } + queue* hr = QUEUE_HEAD(&pList->wq); + QUEUE_REMOVE(hr); + + pList->len -= 1; + + SCliBatch* batch = QUEUE_DATA(hr, SCliBatch, listq); + return batch; +} static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { int count = 0; @@ -1528,6 +1534,7 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { if ((pBatch->batchSize + pMsg->msg.contLen) < (*ppBatchList)->batchLenLimit) { QUEUE_PUSH(&pBatch->wq, h); pBatch->batchSize += pMsg->msg.contLen; + pBatch->wLen += 1; } else { SCliBatch* pBatch = taosMemoryCalloc(1, sizeof(SCliBatch)); QUEUE_INIT(&pBatch->wq); @@ -1551,17 +1558,10 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { void** pIter = taosHashIterate(pThrd->batchCache, NULL); while (pIter != NULL) { SCliBatchList* batchList = (SCliBatchList*)(*pIter); - if (QUEUE_IS_EMPTY(&batchList->wq) || batchList->connCnt >= batchList->connMax) { - continue; + SCliBatch* batch = cliGetHeadFromList(batchList); + if (batch != NULL) { + cliHandleBatchReq(batch, pThrd); } - queue* hr = QUEUE_HEAD(&batchList->wq); - QUEUE_REMOVE(hr); - - batchList->len -= 1; - - SCliBatch* batch = QUEUE_DATA(hr, SCliBatch, listq); - - cliHandleBatchReq(batch, pThrd); pIter = (void**)taosHashIterate(pThrd->batchCache, pIter); } @@ -1848,7 +1848,10 @@ static void destroyThrdObj(SCliThrd* pThrd) { SCliBatch* pBatch = QUEUE_DATA(h, SCliBatch, listq); cliDestroyBatch(pBatch); } + taosMemoryFree(pBatchList->ip); + taosMemoryFree(pBatchList->dst); taosMemoryFree(pBatchList); + pIter = (void**)taosHashIterate(pThrd->batchCache, pIter); } taosHashCleanup(pThrd->batchCache); From f19fdaa142f70202854a9835ef1b02c9f1d50232 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 22:40:27 +0800 Subject: [PATCH 31/35] opt trans --- include/libs/transport/trpc.h | 5 +++-- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 3 ++- source/libs/transport/inc/transportInt.h | 1 + source/libs/transport/src/trans.c | 1 + source/libs/transport/src/transCli.c | 13 ++++++++++--- source/libs/transport/test/cliBench.c | 3 ++- 6 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/libs/transport/trpc.h b/include/libs/transport/trpc.h index acfd5dfb51..0cc0ab64ef 100644 --- a/include/libs/transport/trpc.h +++ b/include/libs/transport/trpc.h @@ -115,8 +115,9 @@ typedef struct SRpcInit { int32_t connLimitNum; int32_t connLimitLock; - int8_t supportBatch; // 0: no batch, 1. batch - void *parent; + int8_t supportBatch; // 0: no batch, 1. batch + int32_t batchSize; + void *parent; } SRpcInit; typedef struct { diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index f35352268f..4e9b7149e4 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -286,11 +286,12 @@ int32_t dmInitClient(SDnode *pDnode) { int32_t connLimitNum = 10000 / (tsNumOfRpcThreads * 3); connLimitNum = TMAX(connLimitNum, 100); - connLimitNum = TMIN(connLimitNum, 600); + connLimitNum = TMIN(connLimitNum, 500); rpcInit.connLimitNum = connLimitNum; rpcInit.connLimitLock = 1; rpcInit.supportBatch = 1; + rpcInit.batchSize = 64 * 1024; pTrans->clientRpc = rpcOpen(&rpcInit); if (pTrans->clientRpc == NULL) { diff --git a/source/libs/transport/inc/transportInt.h b/source/libs/transport/inc/transportInt.h index 13adb4d2b4..1f3c98ad72 100644 --- a/source/libs/transport/inc/transportInt.h +++ b/source/libs/transport/inc/transportInt.h @@ -67,6 +67,7 @@ typedef struct { int32_t connLimitNum; int8_t connLimitLock; // 0: no lock. 1. lock int8_t supportBatch; // 0: no batch, 1: support batch + int32_t batchSize; int index; void* parent; diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index 38ec1c7fdc..16ea25a41a 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -70,6 +70,7 @@ void* rpcOpen(const SRpcInit* pInit) { pRpc->connLimitNum = pInit->connLimitNum; pRpc->connLimitLock = pInit->connLimitLock; pRpc->supportBatch = pInit->supportBatch; + pRpc->batchSize = pInit->batchSize; pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads; if (pRpc->numOfThreads <= 0) { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index f106e07e37..f0635d376c 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -27,6 +27,7 @@ typedef struct { int connMax; int connCnt; int batchLenLimit; + int sending; char* dst; char* ip; @@ -992,6 +993,8 @@ static void cliDestroyBatch(SCliBatch* pBatch) { SCliMsg* p = QUEUE_DATA(h, SCliMsg, q); destroyCmsg(p); } + SCliBatchList* p = pBatch->pList; + p->sending -= 1; taosMemoryFree(pBatch); } static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { @@ -1461,11 +1464,12 @@ static void cliNoBatchDealReq(queue* wq, SCliThrd* pThrd) { } } SCliBatch* cliGetHeadFromList(SCliBatchList* pList) { - if (QUEUE_IS_EMPTY(&pList->wq) || pList->connCnt >= pList->connMax) { + if (QUEUE_IS_EMPTY(&pList->wq) || pList->connCnt > pList->connMax || pList->sending > pList->connMax) { return NULL; } queue* hr = QUEUE_HEAD(&pList->wq); QUEUE_REMOVE(hr); + pList->sending += 1; pList->len -= 1; @@ -1474,6 +1478,8 @@ SCliBatch* cliGetHeadFromList(SCliBatchList* pList) { } static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { + STrans* pInst = pThrd->pTransInst; + int count = 0; while (!QUEUE_IS_EMPTY(wq)) { queue* h = QUEUE_HEAD(wq); @@ -1493,9 +1499,10 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { if (ppBatchList == NULL || *ppBatchList == NULL) { SCliBatchList* pBatchList = taosMemoryCalloc(1, sizeof(SCliBatchList)); QUEUE_INIT(&pBatchList->wq); - pBatchList->connMax = 200; + pBatchList->connMax = pInst->connLimitNum; pBatchList->connCnt = 0; - pBatchList->batchLenLimit = 16 * 1024; + pBatchList->batchLenLimit = pInst->batchSize; + pBatchList->ip = strdup(ip); pBatchList->dst = strdup(key); pBatchList->port = port; diff --git a/source/libs/transport/test/cliBench.c b/source/libs/transport/test/cliBench.c index 5901a71929..aaee162cd7 100644 --- a/source/libs/transport/test/cliBench.c +++ b/source/libs/transport/test/cliBench.c @@ -114,8 +114,9 @@ int main(int argc, char *argv[]) { rpcInit.user = "michael"; rpcInit.connType = TAOS_CONN_CLIENT; - rpcInit.connLimitNum = 300; + rpcInit.connLimitNum = 10; rpcInit.connLimitLock = 1; + rpcInit.batchSize = 16 * 1024; rpcInit.supportBatch = 1; rpcDebugFlag = 135; From 0a22b24ed31ac1d5688397fd11e057d75e0934fd Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 23:03:09 +0800 Subject: [PATCH 32/35] opt trans --- source/libs/transport/src/transCli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index f0635d376c..b4b1bf1f51 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1502,6 +1502,7 @@ static void cliBatchDealReq(queue* wq, SCliThrd* pThrd) { pBatchList->connMax = pInst->connLimitNum; pBatchList->connCnt = 0; pBatchList->batchLenLimit = pInst->batchSize; + pBatchList->len += 1; pBatchList->ip = strdup(ip); pBatchList->dst = strdup(key); From 9db11ae559a487c3d7987299760291cd2aff14ab Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 17 Feb 2023 23:05:06 +0800 Subject: [PATCH 33/35] opt trans --- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 4e9b7149e4..7f9a261cf2 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -291,7 +291,7 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.connLimitNum = connLimitNum; rpcInit.connLimitLock = 1; rpcInit.supportBatch = 1; - rpcInit.batchSize = 64 * 1024; + rpcInit.batchSize = 16 * 1024; pTrans->clientRpc = rpcOpen(&rpcInit); if (pTrans->clientRpc == NULL) { From 2fced5c88b989ecf06b2275dab1018478318229d Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 18 Feb 2023 08:13:21 +0800 Subject: [PATCH 34/35] del case --- tests/parallel_test/cases.task | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index ee647500cf..16751423b1 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -301,7 +301,7 @@ ,,y,script,./test.sh -f tsim/vnode/replica3_repeat.sim ,,y,script,./test.sh -f tsim/vnode/replica3_vgroup.sim ,,y,script,./test.sh -f tsim/vnode/replica3_many.sim -,,y,script,./test.sh -f tsim/vnode/replica3_import.sim +#,,y,script,./test.sh -f tsim/vnode/replica3_import.sim ,,y,script,./test.sh -f tsim/vnode/stable_balance_replica1.sim ,,y,script,./test.sh -f tsim/vnode/stable_dnode2_stop.sim ,,y,script,./test.sh -f tsim/vnode/stable_dnode2.sim From a9b6d564257a27f79ad7c100ed97568fd1aa994b Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 18 Feb 2023 08:45:40 +0800 Subject: [PATCH 35/35] rm macro --- source/libs/transport/src/transCli.c | 2 -- source/libs/transport/src/transSvr.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index b4b1bf1f51..2c862ed45b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -12,7 +12,6 @@ * along with this program. If not, see . */ -#ifdef USE_UV #include "transComm.h" typedef struct SConnList { @@ -2423,4 +2422,3 @@ int64_t transAllocHandle() { return exh->refId; } -#endif diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index eecd260d35..04e094ae9a 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -12,8 +12,6 @@ * along with this program. If not, see . */ -#ifdef USE_UV - #include "transComm.h" static TdThreadOnce transModuleInit = PTHREAD_ONCE_INIT; @@ -1347,5 +1345,3 @@ _return2: } int transGetConnInfo(void* thandle, STransHandleInfo* pConnInfo) { return -1; } - -#endif