From 3b1664d9e4542f903920a4af11f8b7c46bf19aae Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 22 Jun 2022 18:50:39 +0800 Subject: [PATCH 01/17] feat: refactor rpc code --- include/libs/qcom/query.h | 35 +++--- include/os/osSocket.h | 5 +- source/client/src/clientImpl.c | 104 ++++++++--------- source/libs/qcom/src/queryUtil.c | 19 +-- source/libs/transport/inc/transComm.h | 26 +++++ source/libs/transport/src/transCli.c | 159 +++++++++++++++++--------- source/libs/transport/src/transComm.c | 2 +- source/libs/transport/src/transSvr.c | 26 ----- source/os/src/osSocket.c | 8 +- 9 files changed, 222 insertions(+), 162 deletions(-) diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index d562d07d77..0780343c64 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -16,6 +16,7 @@ #ifndef _TD_QUERY_H_ #define _TD_QUERY_H_ +// clang-foramt off #ifdef __cplusplus extern "C" { #endif @@ -71,7 +72,7 @@ typedef struct SIndexMeta { } SIndexMeta; typedef struct STbVerInfo { - char tbFName[TSDB_TABLE_FNAME_LEN]; + char tbFName[TSDB_TABLE_FNAME_LEN]; int32_t sversion; int32_t tversion; } STbVerInfo; @@ -141,7 +142,7 @@ typedef struct SDataBuf { typedef struct STargetInfo { ETargetType type; - char* dbFName; // used to update db's vgroup epset + char* dbFName; // used to update db's vgroup epset int32_t vgId; } STargetInfo; @@ -149,15 +150,15 @@ typedef int32_t (*__async_send_cb_fn_t)(void* param, const SDataBuf* pMsg, int32 typedef int32_t (*__async_exec_fn_t)(void* param); typedef struct SRequestConnInfo { - void* pTrans; - uint64_t requestId; - int64_t requestObjRefId; - SEpSet mgmtEps; + void* pTrans; + uint64_t requestId; + int64_t requestObjRefId; + SEpSet mgmtEps; } SRequestConnInfo; typedef struct SMsgSendInfo { - __async_send_cb_fn_t fp; // async callback function - STargetInfo target; // for update epset + __async_send_cb_fn_t fp; // async callback function + STargetInfo target; // for update epset void* param; uint64_t requestId; uint64_t requestObjRefId; @@ -206,9 +207,10 @@ int32_t queryCreateTableMetaFromMsg(STableMetaRsp* msg, bool isSuperTable, STabl char* jobTaskStatusStr(int32_t status); SSchema createSchema(int8_t type, int32_t bytes, col_id_t colId, const char* name); -void destroyQueryExecRes(SQueryExecRes* pRes); +void destroyQueryExecRes(SQueryExecRes* pRes); -extern int32_t (*queryBuildMsg[TDMT_MAX])(void *input, char **msg, int32_t msgSize, int32_t *msgLen, void*(*mallocFp)(int32_t)); +extern int32_t (*queryBuildMsg[TDMT_MAX])(void* input, char** msg, int32_t msgSize, int32_t* msgLen, + void* (*mallocFp)(int32_t)); extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t msgSize); #define SET_META_TYPE_NULL(t) (t) = META_TYPE_NULL_TABLE @@ -219,7 +221,7 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t #define NEED_CLIENT_RM_TBLMETA_ERROR(_code) \ ((_code) == TSDB_CODE_PAR_TABLE_NOT_EXIST || (_code) == TSDB_CODE_VND_TB_NOT_EXIST || \ (_code) == TSDB_CODE_PAR_INVALID_COLUMNS_NUM || (_code) == TSDB_CODE_PAR_INVALID_COLUMN || \ - (_code) == TSDB_CODE_PAR_TAGS_NOT_MATCHED || (_code) == TSDB_CODE_PAR_VALUE_TOO_LONG || \ + (_code) == TSDB_CODE_PAR_TAGS_NOT_MATCHED || (_code) == TSDB_CODE_PAR_VALUE_TOO_LONG || \ (_code) == TSDB_CODE_PAR_INVALID_DROP_COL || ((_code) == TSDB_CODE_TDB_INVALID_TABLE_ID)) #define NEED_CLIENT_REFRESH_VG_ERROR(_code) \ ((_code) == TSDB_CODE_VND_HASH_MISMATCH || (_code) == TSDB_CODE_VND_INVALID_VGROUP_ID) @@ -227,11 +229,13 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t #define NEED_CLIENT_HANDLE_ERROR(_code) \ (NEED_CLIENT_RM_TBLMETA_ERROR(_code) || NEED_CLIENT_REFRESH_VG_ERROR(_code) || \ NEED_CLIENT_REFRESH_TBLMETA_ERROR(_code)) -#define NEED_CLIENT_RM_TBLMETA_REQ(_type) ((_type) == TDMT_VND_CREATE_TABLE || (_type) == TDMT_VND_CREATE_STB \ - || (_type) == TDMT_VND_DROP_TABLE || (_type) == TDMT_VND_DROP_STB) +#define NEED_CLIENT_RM_TBLMETA_REQ(_type) \ + ((_type) == TDMT_VND_CREATE_TABLE || (_type) == TDMT_VND_CREATE_STB || (_type) == TDMT_VND_DROP_TABLE || \ + (_type) == TDMT_VND_DROP_STB) -#define NEED_SCHEDULER_RETRY_ERROR(_code) \ - ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) +#define NEED_SCHEDULER_RETRY_ERROR(_code) \ + ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ + (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) #define REQUEST_TOTAL_EXEC_TIMES 2 @@ -308,3 +312,4 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t #endif #endif /*_TD_QUERY_H_*/ + // clang-foramt on diff --git a/include/os/osSocket.h b/include/os/osSocket.h index 213a6930ee..9dd5b972fa 100644 --- a/include/os/osSocket.h +++ b/include/os/osSocket.h @@ -157,7 +157,10 @@ int32_t taosNonblockwrite(TdSocketPtr pSocket, char *ptr, int32_t nbytes); int64_t taosCopyFds(TdSocketPtr pSrcSocket, TdSocketPtr pDestSocket, int64_t len); void taosWinSocketInit(); -int taosCreateSocketWithTimeOutOpt(uint32_t conn_timeout_sec); +/* + * set timeout(ms) + */ +int32_t taosCreateSocketWithTimeout(uint32_t timeout); TdSocketPtr taosOpenUdpSocket(uint32_t localIp, uint16_t localPort); TdSocketPtr taosOpenTcpClientSocket(uint32_t ip, uint16_t port, uint32_t localIp); diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 8920922006..14649c9fd4 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -59,7 +59,7 @@ static STscObj* taosConnectImpl(const char* user, const char* auth, const char* SAppInstInfo* pAppInfo, int connType); STscObj* taos_connect_internal(const char* ip, const char* user, const char* pass, const char* auth, const char* db, - uint16_t port, int connType) { + uint16_t port, int connType) { if (taos_init() != TSDB_CODE_SUCCESS) { return NULL; } @@ -313,8 +313,8 @@ bool qnodeRequired(SRequestObj* pRequest) { } SAppInstInfo* pInfo = pRequest->pTscObj->pAppInfo; - bool required = false; - + bool required = false; + taosThreadMutexLock(&pInfo->qnodeMutex); required = (NULL == pInfo->pQnodeList); taosThreadMutexUnlock(&pInfo->qnodeMutex); @@ -419,11 +419,11 @@ int32_t buildVnodePolicyNodeList(SRequestObj* pRequest, SArray** pNodeList, SArr } for (int32_t j = 0; j < vgNum; ++j) { - SVgroupInfo* pInfo = taosArrayGet(pVg, j); + SVgroupInfo* pInfo = taosArrayGet(pVg, j); SQueryNodeLoad load = {0}; load.addr.nodeId = pInfo->vgId; load.addr.epSet = pInfo->epSet; - + taosArrayPush(nodeList, &load); } } @@ -481,17 +481,16 @@ _return: return TSDB_CODE_SUCCESS; } - -int32_t buildAsyncExecNodeList(SRequestObj* pRequest, SArray** pNodeList, SArray* pMnodeList, SMetaData *pResultMeta) { +int32_t buildAsyncExecNodeList(SRequestObj* pRequest, SArray** pNodeList, SArray* pMnodeList, SMetaData* pResultMeta) { SArray* pDbVgList = NULL; SArray* pQnodeList = NULL; int32_t code = 0; - + switch (tsQueryPolicy) { case QUERY_POLICY_VNODE: { if (pResultMeta) { pDbVgList = taosArrayInit(4, POINTER_BYTES); - + int32_t dbNum = taosArrayGetSize(pResultMeta->pDbVgroup); for (int32_t i = 0; i < dbNum; ++i) { SMetaRes* pRes = taosArrayGet(pResultMeta->pDbVgroup, i); @@ -500,9 +499,9 @@ int32_t buildAsyncExecNodeList(SRequestObj* pRequest, SArray** pNodeList, SArray } taosArrayPush(pDbVgList, &pRes->pRes); - } + } } - + code = buildVnodePolicyNodeList(pRequest, pNodeList, pMnodeList, pDbVgList); break; } @@ -523,7 +522,7 @@ int32_t buildAsyncExecNodeList(SRequestObj* pRequest, SArray** pNodeList, SArray } taosThreadMutexUnlock(&pInst->qnodeMutex); } - + code = buildQnodePolicyNodeList(pRequest, pNodeList, pMnodeList, pQnodeList); break; } @@ -534,7 +533,7 @@ int32_t buildAsyncExecNodeList(SRequestObj* pRequest, SArray** pNodeList, SArray taosArrayDestroy(pDbVgList); taosArrayDestroy(pQnodeList); - + return code; } @@ -542,43 +541,43 @@ int32_t buildSyncExecNodeList(SRequestObj* pRequest, SArray** pNodeList, SArray* SArray* pDbVgList = NULL; SArray* pQnodeList = NULL; int32_t code = 0; - + switch (tsQueryPolicy) { case QUERY_POLICY_VNODE: { int32_t dbNum = taosArrayGetSize(pRequest->dbList); if (dbNum > 0) { - SCatalog* pCtg = NULL; + SCatalog* pCtg = NULL; SAppInstInfo* pInst = pRequest->pTscObj->pAppInfo; code = catalogGetHandle(pInst->clusterId, &pCtg); if (code != TSDB_CODE_SUCCESS) { goto _return; } - pDbVgList = taosArrayInit(dbNum, POINTER_BYTES); + pDbVgList = taosArrayInit(dbNum, POINTER_BYTES); SArray* pVgList = NULL; for (int32_t i = 0; i < dbNum; ++i) { - char* dbFName = taosArrayGet(pRequest->dbList, i); + char* dbFName = taosArrayGet(pRequest->dbList, i); SRequestConnInfo conn = {.pTrans = pInst->pTransporter, .requestId = pRequest->requestId, .requestObjRefId = pRequest->self, - .mgmtEps = getEpSet_s(&pInst->mgmtEp)}; - + .mgmtEps = getEpSet_s(&pInst->mgmtEp)}; + code = catalogGetDBVgInfo(pCtg, &conn, dbFName, &pVgList); if (code) { goto _return; } - + taosArrayPush(pDbVgList, &pVgList); - } + } } - + code = buildVnodePolicyNodeList(pRequest, pNodeList, pMnodeList, pDbVgList); break; } case QUERY_POLICY_HYBRID: case QUERY_POLICY_QNODE: { getQnodeList(pRequest, &pQnodeList); - + code = buildQnodePolicyNodeList(pRequest, pNodeList, pMnodeList, pQnodeList); break; } @@ -591,11 +590,10 @@ _return: taosArrayDestroy(pDbVgList); taosArrayDestroy(pQnodeList); - + return code; } - int32_t scheduleAsyncQuery(SRequestObj* pRequest, SQueryPlan* pDag, SArray* pNodeList) { tsem_init(&schdRspSem, 0, 0); @@ -604,12 +602,12 @@ int32_t scheduleAsyncQuery(SRequestObj* pRequest, SQueryPlan* pDag, SArray* pNod .requestId = pRequest->requestId, .requestObjRefId = pRequest->self}; SSchedulerReq req = {.pConn = &conn, - .pNodeList = pNodeList, - .pDag = pDag, - .sql = pRequest->sqlstr, - .startTs = pRequest->metric.start, - .fp = schdExecCallback, - .cbParam = &res}; + .pNodeList = pNodeList, + .pDag = pDag, + .sql = pRequest->sqlstr, + .startTs = pRequest->metric.start, + .fp = schdExecCallback, + .cbParam = &res}; int32_t code = schedulerAsyncExecJob(&req, &pRequest->body.queryJob); @@ -656,13 +654,13 @@ int32_t scheduleQuery(SRequestObj* pRequest, SQueryPlan* pDag, SArray* pNodeList .requestId = pRequest->requestId, .requestObjRefId = pRequest->self}; SSchedulerReq req = {.pConn = &conn, - .pNodeList = pNodeList, - .pDag = pDag, - .sql = pRequest->sqlstr, - .startTs = pRequest->metric.start, - .fp = NULL, - .cbParam = NULL, - .reqKilled = &pRequest->killed}; + .pNodeList = pNodeList, + .pDag = pDag, + .sql = pRequest->sqlstr, + .startTs = pRequest->metric.start, + .fp = NULL, + .cbParam = NULL, + .reqKilled = &pRequest->killed}; int32_t code = schedulerExecJob(&req, &pRequest->body.queryJob, &res); pRequest->body.resInfo.execRes = res.res; @@ -819,8 +817,8 @@ void schedulerExecCb(SQueryResult* pResult, void* param, int32_t code) { } } - tscDebug("0x%" PRIx64 " enter scheduler exec cb, code:%d - %s, reqId:0x%" PRIx64, - pRequest->self, code, tstrerror(code), pRequest->requestId); + tscDebug("0x%" PRIx64 " enter scheduler exec cb, code:%d - %s, reqId:0x%" PRIx64, pRequest->self, code, + tstrerror(code), pRequest->requestId); STscObj* pTscObj = pRequest->pTscObj; if (code != TSDB_CODE_SUCCESS && NEED_CLIENT_HANDLE_ERROR(code)) { @@ -862,7 +860,7 @@ SRequestObj* launchQueryImpl(SRequestObj* pRequest, SQuery* pQuery, bool keepQue if (TSDB_CODE_SUCCESS == code) { SArray* pNodeList = NULL; buildSyncExecNodeList(pRequest, &pNodeList, pMnodeList); - + code = scheduleQuery(pRequest, pRequest->body.pDag, pNodeList); taosArrayDestroy(pNodeList); } @@ -915,7 +913,7 @@ SRequestObj* launchQuery(STscObj* pTscObj, const char* sql, int sqlLen) { return launchQueryImpl(pRequest, pQuery, false, NULL); } -void launchAsyncQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaData *pResultMeta) { +void launchAsyncQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaData* pResultMeta) { int32_t code = 0; switch (pQuery->execMode) { @@ -948,7 +946,7 @@ void launchAsyncQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaData *pResultM if (TSDB_CODE_SUCCESS == code) { SArray* pNodeList = NULL; buildAsyncExecNodeList(pRequest, &pNodeList, pMnodeList, pResultMeta); - + SRequestConnInfo conn = { .pTrans = pAppInfo->pTransporter, .requestId = pRequest->requestId, .requestObjRefId = pRequest->self}; SSchedulerReq req = {.pConn = &conn, @@ -1308,7 +1306,7 @@ TAOS* taos_connect_auth(const char* ip, const char* user, const char* auth, cons if (pObj) { return pObj->id; } - + return NULL; } @@ -1554,10 +1552,10 @@ static int32_t doConvertUCS4(SReqResultInfo* pResultInfo, int32_t numOfRows, int return TSDB_CODE_SUCCESS; } -static int32_t estimateJsonLen(SReqResultInfo* pResultInfo, int32_t numOfCols, int32_t numOfRows){ +static int32_t estimateJsonLen(SReqResultInfo* pResultInfo, int32_t numOfCols, int32_t numOfRows) { char* p = (char*)pResultInfo->pData; - int32_t len = sizeof(int32_t) + sizeof(uint64_t) + numOfCols * (sizeof(int16_t) + sizeof(int32_t)); + int32_t len = sizeof(int32_t) + sizeof(uint64_t) + numOfCols * (sizeof(int16_t) + sizeof(int32_t)); int32_t* colLength = (int32_t*)(p + len); len += sizeof(int32_t) * numOfCols; @@ -1567,7 +1565,7 @@ static int32_t estimateJsonLen(SReqResultInfo* pResultInfo, int32_t numOfCols, i if (pResultInfo->fields[i].type == TSDB_DATA_TYPE_JSON) { int32_t* offset = (int32_t*)pStart; - int32_t lenTmp = numOfRows * sizeof(int32_t); + int32_t lenTmp = numOfRows * sizeof(int32_t); len += lenTmp; pStart += lenTmp; @@ -1592,7 +1590,6 @@ static int32_t estimateJsonLen(SReqResultInfo* pResultInfo, int32_t numOfCols, i } else { ASSERT(0); } - } } else if (IS_VAR_DATA_TYPE(pResultInfo->fields[i].type)) { int32_t lenTmp = numOfRows * sizeof(int32_t); @@ -1616,13 +1613,13 @@ static int32_t doConvertJson(SReqResultInfo* pResultInfo, int32_t numOfCols, int break; } } - if(!needConvert) return TSDB_CODE_SUCCESS; + if (!needConvert) return TSDB_CODE_SUCCESS; - char* p = (char*)pResultInfo->pData; + char* p = (char*)pResultInfo->pData; int32_t dataLen = estimateJsonLen(pResultInfo, numOfCols, numOfRows); pResultInfo->convertJson = taosMemoryCalloc(1, dataLen); - if(pResultInfo->convertJson == NULL) return TSDB_CODE_OUT_OF_MEMORY; + if (pResultInfo->convertJson == NULL) return TSDB_CODE_OUT_OF_MEMORY; char* p1 = pResultInfo->convertJson; int32_t len = sizeof(int32_t) + sizeof(uint64_t) + numOfCols * (sizeof(int16_t) + sizeof(int32_t)); @@ -1691,7 +1688,7 @@ static int32_t doConvertJson(SReqResultInfo* pResultInfo, int32_t numOfCols, int ASSERT(0); } - offset1[j]= len; + offset1[j] = len; memcpy(pStart1 + len, dst, varDataTLen(dst)); len += varDataTLen(dst); } @@ -1709,7 +1706,6 @@ static int32_t doConvertJson(SReqResultInfo* pResultInfo, int32_t numOfCols, int pStart += len; pStart1 += len; memcpy(pStart1, pStart, colLen); - } pStart += colLen; pStart1 += colLen1; @@ -1777,7 +1773,7 @@ int32_t setResultDataPtr(SReqResultInfo* pResultInfo, TAOS_FIELD* pFields, int32 pStart += colLength[i]; } - if(convertUcs4){ + if (convertUcs4) { code = doConvertUCS4(pResultInfo, numOfRows, numOfCols, colLength); } diff --git a/source/libs/qcom/src/queryUtil.c b/source/libs/qcom/src/queryUtil.c index 2120d24d26..c92ab9b008 100644 --- a/source/libs/qcom/src/queryUtil.c +++ b/source/libs/qcom/src/queryUtil.c @@ -19,7 +19,7 @@ #include "tmsg.h" #include "trpc.h" #include "tsched.h" - +// clang-format off #define VALIDNUMOFCOLS(x) ((x) >= TSDB_MIN_COLUMNS && (x) <= TSDB_MAX_COLUMNS) #define VALIDNUMOFTAGS(x) ((x) >= 0 && (x) <= TSDB_MAX_TAGS) @@ -146,13 +146,15 @@ int32_t asyncSendMsgToServerExt(void* pTransporter, SEpSet* epSet, int64_t* pTra } memcpy(pMsg, pInfo->msgInfo.pData, pInfo->msgInfo.len); - SRpcMsg rpcMsg = {.msgType = pInfo->msgType, - .pCont = pMsg, - .contLen = pInfo->msgInfo.len, - .info.ahandle = (void*)pInfo, - .info.handle = pInfo->msgInfo.handle, - .info.persistHandle = persistHandle, - .code = 0}; + SRpcMsg rpcMsg = { + .msgType = pInfo->msgType, + .pCont = pMsg, + .contLen = pInfo->msgInfo.len, + .info.ahandle = (void*)pInfo, + .info.handle = pInfo->msgInfo.handle, + .info.persistHandle = persistHandle, + .code = 0 + }; assert(pInfo->fp != NULL); TRACE_SET_ROOTID(&rpcMsg.info.traceId, pInfo->requestId); rpcSendRequestWithCtx(pTransporter, epSet, &rpcMsg, pTransporterId, rpcCtx); @@ -220,3 +222,4 @@ void destroyQueryExecRes(SQueryExecRes* pRes) { qError("invalid exec result for request type %d", pRes->msgType); } } +// clang-format on diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 327fe50814..158926c520 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -238,6 +238,32 @@ int transSendAsync(SAsyncPool* pool, queue* mq); } \ } \ } while (0) + +#define ASYNC_CHECK_HANDLE(exh1, refId) \ + do { \ + if (refId > 0) { \ + tTrace("handle step1"); \ + SExHandle* exh2 = transAcquireExHandle(refMgt, refId); \ + if (exh2 == NULL || refId != exh2->refId) { \ + tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, \ + exh2 ? exh2->refId : 0, refId); \ + goto _return1; \ + } \ + } else if (refId == 0) { \ + tTrace("handle step2"); \ + SExHandle* exh2 = transAcquireExHandle(refMgt, refId); \ + if (exh2 == NULL || refId != exh2->refId) { \ + tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, refId, \ + exh2 ? exh2->refId : 0); \ + goto _return1; \ + } else { \ + refId = exh1->refId; \ + } \ + } else if (refId < 0) { \ + tTrace("handle step3"); \ + goto _return2; \ + } \ + } while (0) int transInitBuffer(SConnBuffer* buf); int transClearBuffer(SConnBuffer* buf); int transDestroyBuffer(SConnBuffer* buf); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 852ffc9a0e..402a26247a 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -25,7 +25,6 @@ typedef struct SCliConn { uv_write_t writeReq; void* hostThrd; - int hThrdIdx; SConnBuffer readBuf; STransQueue cliMsgs; @@ -36,6 +35,7 @@ typedef struct SCliConn { bool broken; // link broken or not ConnStatus status; // + int64_t refId; char* ip; uint32_t port; @@ -168,16 +168,24 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { snprintf(key, sizeof(key), "%s:%d", ip, (int)port); \ } while (0) -#define CONN_HOST_THREAD_IDX(conn) (conn ? ((SCliConn*)conn)->hThrdIdx : -1) +#define CONN_HOST_THREAD_IDX1(idx, exh, refId, pThrd) \ + do { \ + if (exh == NULL) { \ + idx = -1; \ + } else { \ + ASYNC_CHECK_HANDLE(exh, refId); \ + pThrd = (SCliThrdObj*)exh->pThrd; \ + } \ + } while (0) #define CONN_PERSIST_TIME(para) (para * 1000 * 10) #define CONN_GET_HOST_THREAD(conn) (conn ? ((SCliConn*)conn)->hostThrd : NULL) #define CONN_GET_INST_LABEL(conn) (((STrans*)(((SCliThrdObj*)(conn)->hostThrd)->pTransInst))->label) #define CONN_SHOULD_RELEASE(conn, head) \ do { \ if ((head)->release == 1 && (head->msgLen) == sizeof(*head)) { \ + int status = conn->status; \ uint64_t ahandle = head->ahandle; \ CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); \ - conn->status = ConnRelease; \ transClearBuffer(&conn->readBuf); \ transFreeMsg(transContFromHead((char*)head)); \ tDebug("%s conn %p receive release request, ref: %d", CONN_GET_INST_LABEL(conn), conn, T_REF_VAL_GET(conn)); \ @@ -186,7 +194,9 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { } \ destroyCmsg(pMsg); \ cliReleaseUnfinishedMsg(conn); \ - addConnToPool(((SCliThrdObj*)conn->hostThrd)->pool, conn); \ + if (status != ConnInPool) { \ + addConnToPool(((SCliThrdObj*)conn->hostThrd)->pool, conn); \ + } \ return; \ } \ } while (0) @@ -323,23 +333,29 @@ void cliHandleResp(SCliConn* conn) { transClearBuffer(&conn->readBuf); if (!CONN_NO_PERSIST_BY_APP(conn)) { - transMsg.info.handle = conn; + SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); + exh->handle = conn; + exh->pThrd = pThrd; + exh->refId = transAddExHandle(refMgt, exh); + + transMsg.info.handle = exh; + transMsg.info.refId = exh->refId; + conn->refId = exh->refId; tDebug("%s conn %p ref by app", CONN_GET_INST_LABEL(conn), conn); } - // char buf[64] = {0}; - // TRACE_TO_STR(&transMsg.info.traceId, buf); + STraceId* trace = &transMsg.info.traceId; - tGTrace("conn %p %s received from %s:%d, local info: %s:%d, msg size: %d, code: %d", conn, TMSG_INFO(pHead->msgType), - taosInetNtoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port), taosInetNtoa(conn->localAddr.sin_addr), - ntohs(conn->localAddr.sin_port), transMsg.contLen, transMsg.code); + tGTrace("%s conn %p %s received from %s:%d, local info: %s:%d, msg size: %d, code: %d", CONN_GET_INST_LABEL(conn), + conn, TMSG_INFO(pHead->msgType), taosInetNtoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port), + taosInetNtoa(conn->localAddr.sin_addr), ntohs(conn->localAddr.sin_port), transMsg.contLen, transMsg.code); if (pCtx == NULL && CONN_NO_PERSIST_BY_APP(conn)) { - tDebug("%s except, server continue send while cli ignore it", CONN_GET_INST_LABEL(conn)); + tDebug("%s except, conn %p read while cli ignore it", CONN_GET_INST_LABEL(conn), conn); // transUnrefCliHandle(conn); return; } if (CONN_RELEASE_BY_SERVER(conn) && transMsg.info.ahandle == NULL) { - tDebug("%s except, server continue send while cli ignore it", CONN_GET_INST_LABEL(conn)); + tDebug("%s except, conn %p read while cli ignore it", CONN_GET_INST_LABEL(conn), conn); // transUnrefCliHandle(conn); return; } @@ -476,10 +492,9 @@ static SCliConn* getConnFromPool(void* pool, char* ip, uint32_t port) { if (QUEUE_IS_EMPTY(&plist->conn)) { return NULL; } - queue* h = QUEUE_HEAD(&plist->conn); - // //QUEUE_REMOVE(h); + queue* h = QUEUE_HEAD(&plist->conn); SCliConn* conn = QUEUE_DATA(h, SCliConn, conn); - // conn->status = ConnNormal; + conn->status = ConnNormal; QUEUE_REMOVE(&conn->conn); QUEUE_INIT(&conn->conn); return conn; @@ -559,6 +574,13 @@ static SCliConn* cliCreateConn(SCliThrdObj* pThrd) { conn->status = ConnNormal; conn->broken = 0; transRefCliHandle(conn); + + SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); + exh->handle = conn; + exh->pThrd = pThrd; + exh->refId = transAddExHandle(refMgt, exh); + conn->refId = exh->refId; + return conn; } static void cliDestroyConn(SCliConn* conn, bool clear) { @@ -566,6 +588,7 @@ static void cliDestroyConn(SCliConn* conn, bool clear) { QUEUE_REMOVE(&conn->conn); QUEUE_INIT(&conn->conn); + transRemoveExHandle(refMgt, conn->refId); if (clear) { uv_close((uv_handle_t*)conn->stream, cliDestroy); } @@ -650,12 +673,10 @@ void cliSend(SCliConn* pConn) { uv_buf_t wb = uv_buf_init((char*)pHead, msgLen); - // char buf[64] = {0}; - // TRACE_TO_STR(&pMsg->info.traceId, buf); STraceId* trace = &pMsg->info.traceId; - tGTrace("conn %p %s is sent to %s:%d, local info %s:%d", pConn, TMSG_INFO(pHead->msgType), - taosInetNtoa(pConn->addr.sin_addr), ntohs(pConn->addr.sin_port), taosInetNtoa(pConn->localAddr.sin_addr), - ntohs(pConn->localAddr.sin_port)); + tGTrace("%s conn %p %s is sent to %s:%d, local info %s:%d", CONN_GET_INST_LABEL(pConn), pConn, + TMSG_INFO(pHead->msgType), taosInetNtoa(pConn->addr.sin_addr), ntohs(pConn->addr.sin_port), + taosInetNtoa(pConn->localAddr.sin_addr), ntohs(pConn->localAddr.sin_port)); if (pHead->persist == 1) { CONN_SET_PERSIST_BY_APP(pConn); @@ -663,7 +684,6 @@ void cliSend(SCliConn* pConn) { pConn->writeReq.data = pConn; uv_write(&pConn->writeReq, (uv_stream_t*)pConn->stream, &wb, 1, cliSendCb); - return; _RETURN: return; @@ -723,20 +743,32 @@ static void cliHandleUpdate(SCliMsg* pMsg, SCliThrdObj* pThrd) { } SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrdObj* pThrd) { - SCliConn* conn = NULL; - if (pMsg->msg.info.handle != NULL) { - conn = (SCliConn*)(pMsg->msg.info.handle); - if (conn != NULL) { - tTrace("%s conn %p reused", CONN_GET_INST_LABEL(conn), conn); + SCliConn* conn = NULL; + SRpcHandleInfo* pInfo = &pMsg->msg.info; + + SExHandle* exh = transAcquireExHandle(refMgt, pInfo->refId); + if (exh == NULL) { + if (pInfo->refId != 0) { + tTrace("%s conn %p ignore msg", CONN_GET_INST_LABEL(conn), conn); + assert(0); + return NULL; } } else { - STransConnCtx* pCtx = pMsg->ctx; - conn = getConnFromPool(pThrd->pool, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet)); - if (conn != NULL) { - tTrace("%s conn %p get from conn pool", CONN_GET_INST_LABEL(conn), conn); - } else { - tTrace("%s not found conn in conn pool %p", ((STrans*)pThrd->pTransInst)->label, pThrd->pool); - } + transReleaseExHandle(refMgt, pInfo->refId); + return exh->handle; + } + + STransConnCtx* pCtx = pMsg->ctx; + conn = getConnFromPool(pThrd->pool, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet)); + if (conn != NULL) { + exh = taosMemoryCalloc(1, sizeof(SExHandle)); + exh->handle = conn; + exh->pThrd = pThrd; + exh->refId = transAddExHandle(refMgt, exh); + conn->refId = exh->refId; + tTrace("%s conn %p get from conn pool", CONN_GET_INST_LABEL(conn), conn); + } else { + tTrace("%s not found conn in conn pool %p", ((STrans*)pThrd->pTransInst)->label, pThrd->pool); } return conn; } @@ -765,8 +797,6 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { SCliConn* conn = cliGetConn(pMsg, pThrd); if (conn != NULL) { - conn->hThrdIdx = pCtx->hThrdIdx; - transCtxMerge(&conn->ctx, &pCtx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); cliSend(conn); @@ -775,7 +805,6 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { transCtxMerge(&conn->ctx, &pCtx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); - conn->hThrdIdx = pCtx->hThrdIdx; conn->ip = strdup(EPSET_GET_INUSE_IP(&pCtx->epSet)); conn->port = EPSET_GET_INUSE_PORT(&pCtx->epSet); @@ -783,7 +812,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { if (ret) { tError("%s conn %p failed to set conn option, errmsg %s", transLabel(pTransInst), conn, uv_err_name(ret)); } - int fd = taosCreateSocketWithTimeOutOpt(TRANS_CONN_TIMEOUT); + int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT); if (fd == -1) { tTrace("%s conn %p failed to create socket", transLabel(pTransInst), conn); cliHandleExcept(conn); @@ -1009,7 +1038,9 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { tTrace("%s use remote epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); } - addConnToPool(pThrd->pool, pConn); + if (pConn->status != ConnInPool) { + addConnToPool(pThrd->pool, pConn); + } STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); arg->param1 = pMsg; @@ -1086,10 +1117,21 @@ void transReleaseCliHandle(void* handle) { } void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransCtx* ctx) { - STrans* pTransInst = (STrans*)shandle; - int idx = CONN_HOST_THREAD_IDX((SCliConn*)pReq->info.handle); + STrans* pTransInst = (STrans*)shandle; + SRpcHandleInfo* info = &pReq->info; + + int idx = -1; + SCliThrdObj* pThrd = NULL; + SExHandle* exh = info->handle; + int64_t refId = -1; + if (exh != NULL) { + refId = exh->refId; + } + + CONN_HOST_THREAD_IDX1(idx, exh, refId, pThrd); if (idx == -1) { idx = cliRBChoseIdx(pTransInst); + pThrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[idx]; } TRACE_SET_MSGID(&pReq->info.traceId, tGenIdPI64()); @@ -1097,7 +1139,6 @@ void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STra pCtx->epSet = *pEpSet; pCtx->ahandle = pReq->info.ahandle; pCtx->msgType = pReq->msgType; - pCtx->hThrdIdx = idx; if (ctx != NULL) { pCtx->appCtx = *ctx; @@ -1110,19 +1151,31 @@ void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STra cliMsg->st = taosGetTimestampUs(); cliMsg->type = Normal; - SCliThrdObj* thrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[idx]; - STraceId* trace = &pReq->info.traceId; - tGTrace("%s send request at thread:%08" PRId64 ", dst: %s:%d, app:%p", transLabel(pTransInst), thrd->pid, + tGTrace("%s send request at thread:%08" PRId64 ", dst: %s:%d, app:%p", transLabel(pTransInst), pThrd->pid, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle); - ASSERT(transSendAsync(thrd->asyncPool, &(cliMsg->q)) == 0); + ASSERT(transSendAsync(pThrd->asyncPool, &(cliMsg->q)) == 0); +_return1: + return; +_return2: + return; } void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransMsg* pRsp) { - STrans* pTransInst = (STrans*)shandle; - int idx = CONN_HOST_THREAD_IDX(pReq->info.handle); + STrans* pTransInst = (STrans*)shandle; + SRpcHandleInfo* info = &pReq->info; + SCliThrdObj* pThrd = NULL; + int idx = -1; + SExHandle* exh = info->handle; + int64_t refId = -1; + if (exh != NULL) { + refId = exh->refId; + } + + CONN_HOST_THREAD_IDX1(idx, exh, refId, pThrd); if (idx == -1) { idx = cliRBChoseIdx(pTransInst); + pThrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[idx]; } tsem_t* sem = taosMemoryCalloc(1, sizeof(tsem_t)); tsem_init(sem, 0, 0); @@ -1133,7 +1186,6 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM pCtx->epSet = *pEpSet; pCtx->ahandle = pReq->info.ahandle; pCtx->msgType = pReq->msgType; - pCtx->hThrdIdx = idx; pCtx->pSem = sem; pCtx->pRsp = pRsp; @@ -1143,16 +1195,18 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM cliMsg->st = taosGetTimestampUs(); cliMsg->type = Normal; - SCliThrdObj* thrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[idx]; - STraceId* trace = &pReq->info.traceId; - tGTrace("%s send request at thread:%08" PRId64 ", dst: %s:%d, app:%p", transLabel(pTransInst), thrd->pid, + tGTrace("%s send request at thread:%08" PRId64 ", dst: %s:%d, app:%p", transLabel(pTransInst), pThrd->pid, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle); - transSendAsync(thrd->asyncPool, &(cliMsg->q)); + transSendAsync(pThrd->asyncPool, &(cliMsg->q)); tsem_wait(sem); tsem_destroy(sem); taosMemoryFree(sem); +_return1: + return; +_return2: + return; } /* * @@ -1168,7 +1222,6 @@ void transSetDefaultAddr(void* ahandle, const char* ip, const char* fqdn) { } for (int i = 0; i < pTransInst->numOfThreads; i++) { STransConnCtx* pCtx = taosMemoryCalloc(1, sizeof(STransConnCtx)); - pCtx->hThrdIdx = i; pCtx->cvtAddr = cvtAddr; SCliMsg* cliMsg = taosMemoryCalloc(1, sizeof(SCliMsg)); diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 8cd7f9d827..5d342dd174 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -455,7 +455,7 @@ void transPrintEpSet(SEpSet* pEpSet) { return; } char buf[512] = {0}; - int len = snprintf(buf, sizeof(buf), "epset { "); + int len = snprintf(buf, sizeof(buf), "epset:{ "); for (int i = 0; i < pEpSet->numOfEps; i++) { if (i == pEpSet->numOfEps - 1) { len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d ", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port); diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 593a790a21..121fddc99a 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -206,32 +206,6 @@ static bool addHandleToAcceptloop(void* arg); } \ } while (0) -#define ASYNC_CHECK_HANDLE(exh1, refId) \ - do { \ - if (refId > 0) { \ - tTrace("handle step1"); \ - SExHandle* exh2 = transAcquireExHandle(refMgt, refId); \ - if (exh2 == NULL || refId != exh2->refId) { \ - tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, \ - exh2 ? exh2->refId : 0, refId); \ - goto _return1; \ - } \ - } else if (refId == 0) { \ - tTrace("handle step2"); \ - SExHandle* exh2 = transAcquireExHandle(refMgt, refId); \ - if (exh2 == NULL || refId != exh2->refId) { \ - tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, refId, \ - exh2 ? exh2->refId : 0); \ - goto _return1; \ - } else { \ - refId = exh1->refId; \ - } \ - } else if (refId < 0) { \ - tTrace("handle step3"); \ - goto _return2; \ - } \ - } while (0) - void uvAllocRecvBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) { SSvrConn* conn = handle->data; SConnBuffer* pBuf = &conn->readBuf; diff --git a/source/os/src/osSocket.c b/source/os/src/osSocket.c index 4d61e7036d..b0e07ff010 100644 --- a/source/os/src/osSocket.c +++ b/source/os/src/osSocket.c @@ -946,7 +946,7 @@ int32_t taosGetFqdn(char *fqdn) { #endif // __APPLE__ int32_t ret = getaddrinfo(hostname, NULL, &hints, &result); if (!result) { - fprintf(stderr,"failed to get fqdn, code:%d, reason:%s", ret, gai_strerror(ret)); + fprintf(stderr, "failed to get fqdn, code:%d, reason:%s", ret, gai_strerror(ret)); return -1; } @@ -1073,7 +1073,7 @@ int32_t taosCloseEpoll(TdEpollPtr *ppEpoll) { * Set TCP connection timeout per-socket level. * ref [https://github.com/libuv/help/issues/54] */ -int taosCreateSocketWithTimeOutOpt(uint32_t conn_timeout_sec) { +int32_t taosCreateSocketWithTimeout(uint32_t timeout) { #if defined(WINDOWS) SOCKET fd; #else @@ -1083,11 +1083,11 @@ int taosCreateSocketWithTimeOutOpt(uint32_t conn_timeout_sec) { return -1; } #if defined(WINDOWS) - if (0 != setsockopt(fd, IPPROTO_TCP, TCP_MAXRT, (char *)&conn_timeout_sec, sizeof(conn_timeout_sec))) { + if (0 != setsockopt(fd, IPPROTO_TCP, TCP_MAXRT, (char *)&timeout, sizeof(timeout))) { return -1; } #else // Linux like systems - uint32_t conn_timeout_ms = conn_timeout_sec * 1000; + uint32_t conn_timeout_ms = timeout * 1000; if (0 != setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, (char *)&conn_timeout_ms, sizeof(conn_timeout_ms))) { return -1; } From bfe5ab779b556372d418cd78f846e2e867c8ec9e Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 22 Jun 2022 19:20:35 +0800 Subject: [PATCH 02/17] fix: handle except --- source/libs/transport/src/transCli.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 402a26247a..3542333561 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -18,6 +18,10 @@ static int32_t transSCliInst = 0; static int32_t refMgt = 0; +typedef struct SExHandleWrap { + void* exhandle; + int64_t refId; +} SExHandleWrap; typedef struct SCliConn { T_REF_DECLARE() uv_connect_t connReq; @@ -338,9 +342,11 @@ void cliHandleResp(SCliConn* conn) { exh->pThrd = pThrd; exh->refId = transAddExHandle(refMgt, exh); - transMsg.info.handle = exh; - transMsg.info.refId = exh->refId; + SExHandleWrap* wrap = taosMemoryCalloc(1, sizeof(SExHandleWrap)); + wrap->exhandle = exh; + wrap->refId = exh->refId; conn->refId = exh->refId; + transMsg.info.handle = wrap; tDebug("%s conn %p ref by app", CONN_GET_INST_LABEL(conn), conn); } @@ -1007,6 +1013,8 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pMsg->sent = 0; tTrace("try to send req to next node"); pMsg->st = taosGetTimestampUs(); + + taosMemoryFree(pResp->info.handle); pCtx->retryCount += 1; if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { if (pCtx->retryCount < pEpSet->numOfEps * 3) { From d5f5c33c9c547c75dd9b3d031f9abb7840817d8e Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 23 Jun 2022 15:55:47 +0800 Subject: [PATCH 03/17] refactor code --- include/util/ttrace.h | 8 +- source/libs/transport/inc/transComm.h | 63 +++++----- source/libs/transport/src/transCli.c | 162 ++++++++++++-------------- source/libs/transport/src/transSvr.c | 10 +- 4 files changed, 116 insertions(+), 127 deletions(-) diff --git a/include/util/ttrace.h b/include/util/ttrace.h index 206cbbf28d..579768228a 100644 --- a/include/util/ttrace.h +++ b/include/util/ttrace.h @@ -45,9 +45,11 @@ typedef struct STraceId { #define TRACE_GET_MSGID(traceId) (traceId)->msgId -#define TRACE_TO_STR(traceId, buf) \ - do { \ - sprintf(buf, "0x%" PRIx64 ":0x%" PRIx64 "", traceId->rootId, traceId->msgId); \ +#define TRACE_TO_STR(traceId, buf) \ + do { \ + int64_t rootId = (traceId) != NULL ? (traceId)->rootId : 0; \ + int64_t msgId = (traceId) != NULL ? (traceId)->msgId : 0; \ + sprintf(buf, "0x%" PRIx64 ":0x%" PRIx64 "", rootId, msgId); \ } while (0) #ifdef __cplusplus diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 158926c520..5367f6b49d 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -105,6 +105,13 @@ typedef SRpcCtxVal STransCtxVal; typedef SRpcInfo STrans; typedef SRpcConnInfo STransHandleInfo; +// ref mgt +// handle +typedef struct SExHandle { + void* handle; + int64_t refId; + void* pThrd; +} SExHandle; /*convet from fqdn to ip */ typedef struct SCvtAddr { char ip[TSDB_FQDN_LEN]; @@ -239,30 +246,30 @@ int transSendAsync(SAsyncPool* pool, queue* mq); } \ } while (0) -#define ASYNC_CHECK_HANDLE(exh1, refId) \ - do { \ - if (refId > 0) { \ - tTrace("handle step1"); \ - SExHandle* exh2 = transAcquireExHandle(refMgt, refId); \ - if (exh2 == NULL || refId != exh2->refId) { \ - tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, \ - exh2 ? exh2->refId : 0, refId); \ - goto _return1; \ - } \ - } else if (refId == 0) { \ - tTrace("handle step2"); \ - SExHandle* exh2 = transAcquireExHandle(refMgt, refId); \ - if (exh2 == NULL || refId != exh2->refId) { \ - tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, refId, \ - exh2 ? exh2->refId : 0); \ - goto _return1; \ - } else { \ - refId = exh1->refId; \ - } \ - } else if (refId < 0) { \ - tTrace("handle step3"); \ - goto _return2; \ - } \ +#define ASYNC_CHECK_HANDLE(exh1, id) \ + do { \ + if (id > 0) { \ + tTrace("handle step1"); \ + SExHandle* exh2 = transAcquireExHandle(refMgt, id); \ + if (exh2 == NULL || id != exh2->refId) { \ + tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, \ + exh2 ? exh2->refId : 0, id); \ + goto _return1; \ + } \ + } else if (id == 0) { \ + tTrace("handle step2"); \ + SExHandle* exh2 = transAcquireExHandle(refMgt, id); \ + if (exh2 == NULL || id == exh2->refId) { \ + tTrace("handle %p except, may already freed, ignore msg, ref1: %" PRIu64 ", ref2 : %" PRIu64 "", exh1, id, \ + exh2 ? exh2->refId : 0); \ + goto _return1; \ + } else { \ + id = exh1->refId; \ + } \ + } else if (id < 0) { \ + tTrace("handle step3"); \ + goto _return2; \ + } \ } while (0) int transInitBuffer(SConnBuffer* buf); int transClearBuffer(SConnBuffer* buf); @@ -381,14 +388,6 @@ bool transEpSetIsEqual(SEpSet* a, SEpSet* b); */ void transThreadOnce(); -// ref mgt -// handle -typedef struct SExHandle { - void* handle; - int64_t refId; - void* pThrd; -} SExHandle; - void transInitEnv(); int32_t transOpenExHandleMgt(int size); void transCloseExHandleMgt(int32_t mgt); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 3542333561..e18723d976 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -18,10 +18,6 @@ static int32_t transSCliInst = 0; static int32_t refMgt = 0; -typedef struct SExHandleWrap { - void* exhandle; - int64_t refId; -} SExHandleWrap; typedef struct SCliConn { T_REF_DECLARE() uv_connect_t connReq; @@ -177,8 +173,8 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { if (exh == NULL) { \ idx = -1; \ } else { \ - ASYNC_CHECK_HANDLE(exh, refId); \ - pThrd = (SCliThrdObj*)exh->pThrd; \ + ASYNC_CHECK_HANDLE((exh), refId); \ + pThrd = (SCliThrdObj*)(exh)->pThrd; \ } \ } while (0) #define CONN_PERSIST_TIME(para) (para * 1000 * 10) @@ -201,6 +197,7 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { if (status != ConnInPool) { \ addConnToPool(((SCliThrdObj*)conn->hostThrd)->pool, conn); \ } \ + transRemoveExHandle(refMgt, conn->refId); \ return; \ } \ } while (0) @@ -335,18 +332,8 @@ void cliHandleResp(SCliConn* conn) { } // buf's mem alread translated to transMsg.pCont transClearBuffer(&conn->readBuf); - if (!CONN_NO_PERSIST_BY_APP(conn)) { - SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); - exh->handle = conn; - exh->pThrd = pThrd; - exh->refId = transAddExHandle(refMgt, exh); - - SExHandleWrap* wrap = taosMemoryCalloc(1, sizeof(SExHandleWrap)); - wrap->exhandle = exh; - wrap->refId = exh->refId; - conn->refId = exh->refId; - transMsg.info.handle = wrap; + transMsg.info.handle = (void*)conn->refId; tDebug("%s conn %p ref by app", CONN_GET_INST_LABEL(conn), conn); } @@ -357,12 +344,10 @@ void cliHandleResp(SCliConn* conn) { if (pCtx == NULL && CONN_NO_PERSIST_BY_APP(conn)) { tDebug("%s except, conn %p read while cli ignore it", CONN_GET_INST_LABEL(conn), conn); - // transUnrefCliHandle(conn); return; } if (CONN_RELEASE_BY_SERVER(conn) && transMsg.info.ahandle == NULL) { tDebug("%s except, conn %p read while cli ignore it", CONN_GET_INST_LABEL(conn), conn); - // transUnrefCliHandle(conn); return; } @@ -433,7 +418,7 @@ void cliHandleExcept(SCliConn* pConn) { return; } destroyCmsg(pMsg); - tTrace("%s conn %p start to destroy", CONN_GET_INST_LABEL(pConn), pConn); + tTrace("%s conn %p start to destroy, ref:%d", CONN_GET_INST_LABEL(pConn), pConn, T_REF_VAL_GET(pConn)); } while (!transQueueEmpty(&pConn->cliMsgs)); transUnrefCliHandle(pConn); } @@ -505,10 +490,22 @@ static SCliConn* getConnFromPool(void* pool, char* ip, uint32_t port) { QUEUE_INIT(&conn->conn); return conn; } +static void allocConnRef(SCliConn* conn, bool update) { + if (update) { + transRemoveExHandle(refMgt, conn->refId); + } + SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); + exh->handle = conn; + exh->pThrd = conn->hostThrd; + exh->refId = transAddExHandle(refMgt, exh); + conn->refId = exh->refId; +} static void addConnToPool(void* pool, SCliConn* conn) { SCliThrdObj* thrd = conn->hostThrd; CONN_HANDLE_THREAD_QUIT(thrd); + allocConnRef(conn, true); + STrans* pTransInst = thrd->pTransInst; conn->expireTime = taosGetTimestampMs() + CONN_PERSIST_TIME(pTransInst->idleTime); transQueueClear(&conn->cliMsgs); @@ -558,7 +555,8 @@ static void cliRecvCb(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf) { return; } if (nread < 0) { - tError("%s conn %p read error: %s", CONN_GET_INST_LABEL(conn), conn, uv_err_name(nread)); + tError("%s conn %p read error: %s, ref: %d", CONN_GET_INST_LABEL(conn), conn, uv_err_name(nread), + T_REF_VAL_GET(conn)); conn->broken = true; cliHandleExcept(conn); } @@ -581,11 +579,7 @@ static SCliConn* cliCreateConn(SCliThrdObj* pThrd) { conn->broken = 0; transRefCliHandle(conn); - SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); - exh->handle = conn; - exh->pThrd = pThrd; - exh->refId = transAddExHandle(refMgt, exh); - conn->refId = exh->refId; + allocConnRef(conn, false); return conn; } @@ -749,25 +743,27 @@ static void cliHandleUpdate(SCliMsg* pMsg, SCliThrdObj* pThrd) { } SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrdObj* pThrd) { - SCliConn* conn = NULL; - SRpcHandleInfo* pInfo = &pMsg->msg.info; + SCliConn* conn = NULL; + // SExHandleWrap* exWrap = &pMsg->msg.info.handle; + // if (exWrap != NULL) { + //} - SExHandle* exh = transAcquireExHandle(refMgt, pInfo->refId); - if (exh == NULL) { - if (pInfo->refId != 0) { - tTrace("%s conn %p ignore msg", CONN_GET_INST_LABEL(conn), conn); - assert(0); - return NULL; - } - } else { - transReleaseExHandle(refMgt, pInfo->refId); - return exh->handle; - } + // SExHandle* exh = transAcquireExHandle(refMgt, exWrap->refId); + // if (exh == NULL) { + // if (pInfo->refId != 0) { + // tTrace("%s conn %p ignore msg", CONN_GET_INST_LABEL(conn), conn); + // assert(0); + // return NULL; + // } + //} else { + // transReleaseExHandle(refMgt, pInfo->refId); + // return exh->handle; + //} STransConnCtx* pCtx = pMsg->ctx; conn = getConnFromPool(pThrd->pool, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet)); if (conn != NULL) { - exh = taosMemoryCalloc(1, sizeof(SExHandle)); + SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); exh->handle = conn; exh->pThrd = pThrd; exh->refId = transAddExHandle(refMgt, exh); @@ -790,10 +786,6 @@ void cliMayCvtFqdnToIp(SEpSet* pEpSet, SCvtAddr* pCvtAddr) { } } void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { - uint64_t et = taosGetTimestampUs(); - uint64_t el = et - pMsg->st; - // tTrace("%s cli msg tran time cost: %" PRIu64 "us", ((STrans*)pThrd->pTransInst)->label, el); - STransConnCtx* pCtx = pMsg->ctx; STrans* pTransInst = pThrd->pTransInst; @@ -1014,7 +1006,6 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { tTrace("try to send req to next node"); pMsg->st = taosGetTimestampUs(); - taosMemoryFree(pResp->info.handle); pCtx->retryCount += 1; if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { if (pCtx->retryCount < pEpSet->numOfEps * 3) { @@ -1060,16 +1051,16 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { STraceId* trace = &pResp->info.traceId; if (pCtx->pSem != NULL) { - tGTrace("conn %p(sync) handle resp", pConn); + tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (pCtx->pRsp == NULL) { - tGTrace("conn %p(sync) failed to resp, ignore", pConn); + tGTrace("%s conn %p(sync) failed to resp, ignore", CONN_GET_INST_LABEL(pConn), pConn); } else { memcpy((char*)pCtx->pRsp, (char*)pResp, sizeof(*pResp)); } tsem_post(pCtx->pSem); pCtx->pRsp = NULL; } else { - tGTrace("conn %p handle resp", pConn); + tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (pResp->code != 0 || pCtx->retryCount == 0 || transEpSetIsEqual(&pCtx->epSet, &pCtx->origEpSet)) { pTransInst->cfp(pTransInst->parent, pResp, NULL); } else { @@ -1105,14 +1096,33 @@ void transUnrefCliHandle(void* handle) { return; } int ref = T_REF_DEC((SCliConn*)handle); - tTrace("%s conn %p ref %d", CONN_GET_INST_LABEL((SCliConn*)handle), handle, ref); + tTrace("%s conn %p ref:%d", CONN_GET_INST_LABEL((SCliConn*)handle), handle, ref); if (ref == 0) { cliDestroyConn((SCliConn*)handle, true); } } +SCliThrdObj* transGetWorkThrdFromHandle(int64_t handle) { + SCliThrdObj* pThrd = NULL; + SExHandle* exh = transAcquireExHandle(refMgt, handle); + if (exh == NULL) { + return NULL; + } + pThrd = exh->pThrd; + transReleaseExHandle(refMgt, handle); + return pThrd; +} +SCliThrdObj* transGetWorkThrd(STrans* trans, int64_t handle) { + int idx = -1; + if (handle == 0) { + idx = cliRBChoseIdx(trans); + return ((SCliObj*)trans->tcphandle)->pThreadObj[idx]; + } + return transGetWorkThrdFromHandle(handle); +} void transReleaseCliHandle(void* handle) { - SCliThrdObj* thrd = CONN_GET_HOST_THREAD(handle); - if (thrd == NULL) { + int idx = -1; + SCliThrdObj* pThrd = transGetWorkThrdFromHandle((int64_t)handle); + if (pThrd == NULL) { return; } @@ -1121,26 +1131,18 @@ void transReleaseCliHandle(void* handle) { cmsg->msg = tmsg; cmsg->type = Release; - transSendAsync(thrd->asyncPool, &cmsg->q); + transSendAsync(pThrd->asyncPool, &cmsg->q); + return; } void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransCtx* ctx) { - STrans* pTransInst = (STrans*)shandle; - SRpcHandleInfo* info = &pReq->info; - - int idx = -1; - SCliThrdObj* pThrd = NULL; - SExHandle* exh = info->handle; - int64_t refId = -1; - if (exh != NULL) { - refId = exh->refId; + STrans* pTransInst = (STrans*)shandle; + SCliThrdObj* pThrd = transGetWorkThrd(pTransInst, (int64_t)pReq->info.handle); + if (pThrd == NULL) { + transFreeMsg(pReq->pCont); + return; } - CONN_HOST_THREAD_IDX1(idx, exh, refId, pThrd); - if (idx == -1) { - idx = cliRBChoseIdx(pTransInst); - pThrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[idx]; - } TRACE_SET_MSGID(&pReq->info.traceId, tGenIdPI64()); STransConnCtx* pCtx = taosMemoryCalloc(1, sizeof(STransConnCtx)); @@ -1163,28 +1165,17 @@ void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STra tGTrace("%s send request at thread:%08" PRId64 ", dst: %s:%d, app:%p", transLabel(pTransInst), pThrd->pid, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle); ASSERT(transSendAsync(pThrd->asyncPool, &(cliMsg->q)) == 0); -_return1: - return; -_return2: return; } void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransMsg* pRsp) { - STrans* pTransInst = (STrans*)shandle; - SRpcHandleInfo* info = &pReq->info; - SCliThrdObj* pThrd = NULL; - int idx = -1; - SExHandle* exh = info->handle; - int64_t refId = -1; - if (exh != NULL) { - refId = exh->refId; + STrans* pTransInst = (STrans*)shandle; + SCliThrdObj* pThrd = transGetWorkThrd(pTransInst, (int64_t)pReq->info.handle); + if (pThrd == NULL) { + transFreeMsg(pReq->pCont); + return; } - CONN_HOST_THREAD_IDX1(idx, exh, refId, pThrd); - if (idx == -1) { - idx = cliRBChoseIdx(pTransInst); - pThrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[idx]; - } tsem_t* sem = taosMemoryCalloc(1, sizeof(tsem_t)); tsem_init(sem, 0, 0); @@ -1211,16 +1202,13 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM tsem_wait(sem); tsem_destroy(sem); taosMemoryFree(sem); -_return1: - return; -_return2: return; } /* * **/ -void transSetDefaultAddr(void* ahandle, const char* ip, const char* fqdn) { - STrans* pTransInst = ahandle; +void transSetDefaultAddr(void* shandle, const char* ip, const char* fqdn) { + STrans* pTransInst = shandle; SCvtAddr cvtAddr = {0}; if (ip != NULL && fqdn != NULL) { diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 121fddc99a..4cc2a9c9b2 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -133,7 +133,7 @@ static SSvrConn* createConn(void* hThrd); static void destroyConn(SSvrConn* conn, bool clear /*clear handle or not*/); static void destroyConnRegArg(SSvrConn* conn); -static int reallocConnRefHandle(SSvrConn* conn); +static int reallocConnRef(SSvrConn* conn); static void uvHandleQuit(SSvrMsg* msg, SWorkThrdObj* thrd); static void uvHandleRelease(SSvrMsg* msg, SWorkThrdObj* thrd); @@ -176,7 +176,7 @@ static bool addHandleToAcceptloop(void* arg); srvMsg->msg = tmsg; \ srvMsg->type = Release; \ srvMsg->pConn = conn; \ - reallocConnRefHandle(conn); \ + reallocConnRef(conn); \ if (!transQueuePush(&conn->srvMsgs, srvMsg)) { \ return; \ } \ @@ -353,7 +353,7 @@ void uvOnSendCb(uv_write_t* req, int status) { // if (msg->type == Release && conn->status != ConnNormal) { // conn->status = ConnNormal; // transUnrefSrvHandle(conn); - // reallocConnRefHandle(conn); + // reallocConnRef(conn); // destroySmsg(msg); // transQueueClear(&conn->srvMsgs); // return; @@ -800,7 +800,7 @@ static void destroyConnRegArg(SSvrConn* conn) { conn->regArg.init = 0; } } -static int reallocConnRefHandle(SSvrConn* conn) { +static int reallocConnRef(SSvrConn* conn) { transReleaseExHandle(refMgt, conn->refId); transRemoveExHandle(refMgt, conn->refId); // avoid app continue to send msg on invalid handle @@ -945,7 +945,7 @@ void uvHandleQuit(SSvrMsg* msg, SWorkThrdObj* thrd) { void uvHandleRelease(SSvrMsg* msg, SWorkThrdObj* thrd) { SSvrConn* conn = msg->pConn; if (conn->status == ConnAcquire) { - reallocConnRefHandle(conn); + reallocConnRef(conn); if (!transQueuePush(&conn->srvMsgs, msg)) { return; } From 0d904d5aa3b27c69776572fbc04887d1db5ac9c5 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 23 Jun 2022 20:06:22 +0800 Subject: [PATCH 04/17] fix: handle except --- source/client/src/clientEnv.c | 31 ++-- source/libs/transport/src/transCli.c | 244 +++++++++++++-------------- source/libs/transport/src/transSvr.c | 72 ++++---- 3 files changed, 171 insertions(+), 176 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 9f04e89694..f1e4107e23 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -13,11 +13,11 @@ * along with this program. If not, see . */ -#include "os.h" #include "catalog.h" -#include "functionMgt.h" #include "clientInt.h" #include "clientLog.h" +#include "functionMgt.h" +#include "os.h" #include "query.h" #include "scheduler.h" #include "tcache.h" @@ -38,7 +38,7 @@ static TdThreadOnce tscinit = PTHREAD_ONCE_INIT; volatile int32_t tscInitRes = 0; static void registerRequest(SRequestObj *pRequest) { - STscObj *pTscObj = acquireTscObj(*(int64_t*)pRequest->pTscObj->id); + STscObj *pTscObj = acquireTscObj(*(int64_t *)pRequest->pTscObj->id); assert(pTscObj != NULL); @@ -54,14 +54,14 @@ static void registerRequest(SRequestObj *pRequest) { int32_t currentInst = atomic_add_fetch_64((int64_t *)&pSummary->currentRequests, 1); tscDebug("0x%" PRIx64 " new Request from connObj:0x%" PRIx64 ", current:%d, app current:%d, total:%d, reqId:0x%" PRIx64, - pRequest->self, *(int64_t*)pRequest->pTscObj->id, num, currentInst, total, pRequest->requestId); + pRequest->self, *(int64_t *)pRequest->pTscObj->id, num, currentInst, total, pRequest->requestId); } } static void deregisterRequest(SRequestObj *pRequest) { assert(pRequest != NULL); - STscObj *pTscObj = pRequest->pTscObj; + STscObj * pTscObj = pRequest->pTscObj; SAppClusterSummary *pActivity = &pTscObj->pAppInfo->summary; int32_t currentInst = atomic_sub_fetch_64((int64_t *)&pActivity->currentRequests, 1); @@ -70,8 +70,8 @@ static void deregisterRequest(SRequestObj *pRequest) { int64_t duration = taosGetTimestampUs() - pRequest->metric.start; tscDebug("0x%" PRIx64 " free Request from connObj: 0x%" PRIx64 ", reqId:0x%" PRIx64 " elapsed:%" PRIu64 " ms, current:%d, app current:%d", - pRequest->self, *(int64_t*)pTscObj->id, pRequest->requestId, duration / 1000, num, currentInst); - releaseTscObj(*(int64_t*)pTscObj->id); + pRequest->self, *(int64_t *)pTscObj->id, pRequest->requestId, duration / 1000, num, currentInst); + releaseTscObj(*(int64_t *)pTscObj->id); } // todo close the transporter properly @@ -80,7 +80,7 @@ void closeTransporter(STscObj *pTscObj) { return; } - tscDebug("free transporter:%p in connObj: 0x%" PRIx64, pTscObj->pAppInfo->pTransporter, *(int64_t*)pTscObj->id); + tscDebug("free transporter:%p in connObj: 0x%" PRIx64, pTscObj->pAppInfo->pTransporter, *(int64_t *)pTscObj->id); rpcClose(pTscObj->pAppInfo->pTransporter); } @@ -128,16 +128,17 @@ void closeAllRequests(SHashObj *pRequests) { void destroyTscObj(void *pObj) { STscObj *pTscObj = pObj; - SClientHbKey connKey = {.tscRid = *(int64_t*)pTscObj->id, .connType = pTscObj->connType}; + SClientHbKey connKey = {.tscRid = *(int64_t *)pTscObj->id, .connType = pTscObj->connType}; hbDeregisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey); int64_t connNum = atomic_sub_fetch_64(&pTscObj->pAppInfo->numOfConns, 1); closeAllRequests(pTscObj->pRequests); schedulerStopQueryHb(pTscObj->pAppInfo->pTransporter); if (0 == connNum) { - // TODO - //closeTransporter(pTscObj); + // TODO + closeTransporter(pTscObj); } - tscDebug("connObj 0x%" PRIx64 " destroyed, totalConn:%" PRId64, *(int64_t*)pTscObj->id, pTscObj->pAppInfo->numOfConns); + tscDebug("connObj 0x%" PRIx64 " destroyed, totalConn:%" PRId64, *(int64_t *)pTscObj->id, + pTscObj->pAppInfo->numOfConns); taosThreadMutexDestroy(&pTscObj->mutex); taosMemoryFreeClear(pTscObj); } @@ -167,10 +168,10 @@ void *createTscObj(const char *user, const char *auth, const char *db, int32_t c taosThreadMutexInit(&pObj->mutex, NULL); pObj->id = taosMemoryMalloc(sizeof(int64_t)); - *(int64_t*)pObj->id = taosAddRef(clientConnRefPool, pObj); + *(int64_t *)pObj->id = taosAddRef(clientConnRefPool, pObj); pObj->schemalessType = 1; - tscDebug("connObj created, 0x%" PRIx64, *(int64_t*)pObj->id); + tscDebug("connObj created, 0x%" PRIx64, *(int64_t *)pObj->id); return pObj; } @@ -325,7 +326,7 @@ int taos_options_imp(TSDB_OPTION option, const char *str) { return 0; } - SConfig *pCfg = taosGetCfg(); + SConfig * pCfg = taosGetCfg(); SConfigItem *pItem = NULL; switch (option) { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index e18723d976..6abd230f31 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1,4 +1,4 @@ -/* * Copyright (c) 2019 TAOS Data, Inc. +/** Copyright (c) 2019 TAOS Data, Inc. * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 @@ -54,7 +54,7 @@ typedef struct SCliMsg { int sent; //(0: no send, 1: alread sent) } SCliMsg; -typedef struct SCliThrdObj { +typedef struct SCliThrd { TdThread thread; // tid int64_t pid; // pid uv_loop_t* loop; @@ -72,13 +72,13 @@ typedef struct SCliThrdObj { SCvtAddr cvtAddr; bool quit; -} SCliThrdObj; +} SCliThrd; typedef struct SCliObj { - char label[TSDB_LABEL_LEN]; - int32_t index; - int numOfThreads; - SCliThrdObj** pThreadObj; + char label[TSDB_LABEL_LEN]; + int32_t index; + int numOfThreads; + SCliThrd** pThreadObj; } SCliObj; typedef struct SConnList { @@ -106,7 +106,7 @@ static void cliAsyncCb(uv_async_t* handle); static int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg); -static SCliConn* cliCreateConn(SCliThrdObj* thrd); +static SCliConn* cliCreateConn(SCliThrd* thrd); static void cliDestroyConn(SCliConn* pConn, bool clear /*clear tcp handle or not*/); static void cliDestroy(uv_handle_t* handle); static void cliSend(SCliConn* pConn); @@ -122,14 +122,14 @@ static void cliHandleResp(SCliConn* conn); static void cliHandleExcept(SCliConn* conn); // handle req from app -static void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd); -static void cliHandleQuit(SCliMsg* pMsg, SCliThrdObj* pThrd); -static void cliHandleRelease(SCliMsg* pMsg, SCliThrdObj* pThrd); -static void cliHandleUpdate(SCliMsg* pMsg, SCliThrdObj* pThrd); -static void (*cliAsyncHandle[])(SCliMsg* pMsg, SCliThrdObj* pThrd) = {cliHandleReq, cliHandleQuit, cliHandleRelease, - NULL, cliHandleUpdate}; +static void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd); +static void cliHandleQuit(SCliMsg* pMsg, SCliThrd* pThrd); +static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd); +static void cliHandleUpdate(SCliMsg* pMsg, SCliThrd* pThrd); +static void (*cliAsyncHandle[])(SCliMsg* pMsg, SCliThrd* pThrd) = {cliHandleReq, cliHandleQuit, cliHandleRelease, NULL, + cliHandleUpdate}; -static void cliSendQuit(SCliThrdObj* thrd); +static void cliSendQuit(SCliThrd* thrd); static void destroyUserdata(STransMsg* userdata); static int cliRBChoseIdx(STrans* pTransInst); @@ -137,8 +137,8 @@ static int cliRBChoseIdx(STrans* pTransInst); static void destroyCmsg(SCliMsg* cmsg); static void transDestroyConnCtx(STransConnCtx* ctx); // thread obj -static SCliThrdObj* createThrdObj(); -static void destroyThrdObj(SCliThrdObj* pThrd); +static SCliThrd* createThrdObj(); +static void destroyThrdObj(SCliThrd* pThrd); static void cliWalkCb(uv_handle_t* handle, void* arg); @@ -174,12 +174,12 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { idx = -1; \ } else { \ ASYNC_CHECK_HANDLE((exh), refId); \ - pThrd = (SCliThrdObj*)(exh)->pThrd; \ + pThrd = (SCliThrd*)(exh)->pThrd; \ } \ } while (0) #define CONN_PERSIST_TIME(para) (para * 1000 * 10) #define CONN_GET_HOST_THREAD(conn) (conn ? ((SCliConn*)conn)->hostThrd : NULL) -#define CONN_GET_INST_LABEL(conn) (((STrans*)(((SCliThrdObj*)(conn)->hostThrd)->pTransInst))->label) +#define CONN_GET_INST_LABEL(conn) (((STrans*)(((SCliThrd*)(conn)->hostThrd)->pTransInst))->label) #define CONN_SHOULD_RELEASE(conn, head) \ do { \ if ((head)->release == 1 && (head->msgLen) == sizeof(*head)) { \ @@ -195,7 +195,7 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { destroyCmsg(pMsg); \ cliReleaseUnfinishedMsg(conn); \ if (status != ConnInPool) { \ - addConnToPool(((SCliThrdObj*)conn->hostThrd)->pool, conn); \ + addConnToPool(((SCliThrd*)conn->hostThrd)->pool, conn); \ } \ transRemoveExHandle(refMgt, conn->refId); \ return; \ @@ -279,8 +279,8 @@ _RETURN: return false; } void cliHandleResp(SCliConn* conn) { - SCliThrdObj* pThrd = conn->hostThrd; - STrans* pTransInst = pThrd->pTransInst; + SCliThrd* pThrd = conn->hostThrd; + STrans* pTransInst = pThrd->pTransInst; STransMsgHead* pHead = (STransMsgHead*)(conn->readBuf.buf); pHead->code = htonl(pHead->code); @@ -379,9 +379,9 @@ void cliHandleExcept(SCliConn* pConn) { return; } } - SCliThrdObj* pThrd = pConn->hostThrd; - STrans* pTransInst = pThrd->pTransInst; - bool once = false; + SCliThrd* pThrd = pConn->hostThrd; + STrans* pTransInst = pThrd->pTransInst; + bool once = false; do { SCliMsg* pMsg = transQueuePop(&pConn->cliMsgs); if (pMsg == NULL && once) { @@ -424,9 +424,9 @@ void cliHandleExcept(SCliConn* pConn) { } void cliTimeoutCb(uv_timer_t* handle) { - SCliThrdObj* pThrd = handle->data; - STrans* pTransInst = pThrd->pTransInst; - int64_t currentTime = pThrd->nextTimeout; + SCliThrd* pThrd = handle->data; + STrans* pTransInst = pThrd->pTransInst; + int64_t currentTime = pThrd->nextTimeout; tTrace("%s conn timeout, try to remove expire conn from conn pool", pTransInst->label); SConnList* p = taosHashIterate((SHashObj*)pThrd->pool, NULL); @@ -501,7 +501,7 @@ static void allocConnRef(SCliConn* conn, bool update) { conn->refId = exh->refId; } static void addConnToPool(void* pool, SCliConn* conn) { - SCliThrdObj* thrd = conn->hostThrd; + SCliThrd* thrd = conn->hostThrd; CONN_HANDLE_THREAD_QUIT(thrd); allocConnRef(conn, true); @@ -562,7 +562,7 @@ static void cliRecvCb(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf) { } } -static SCliConn* cliCreateConn(SCliThrdObj* pThrd) { +static SCliConn* cliCreateConn(SCliThrd* pThrd) { SCliConn* conn = taosMemoryCalloc(1, sizeof(SCliConn)); // read/write stream handle conn->stream = (uv_stream_t*)taosMemoryMalloc(sizeof(uv_tcp_t)); @@ -615,7 +615,7 @@ static bool cliHandleNoResp(SCliConn* conn) { } if (res == true) { if (cliMaySendCachedMsg(conn) == false) { - SCliThrdObj* thrd = conn->hostThrd; + SCliThrd* thrd = conn->hostThrd; addConnToPool(thrd->pool, conn); } } @@ -651,8 +651,8 @@ void cliSend(SCliConn* pConn) { STransConnCtx* pCtx = pCliMsg->ctx; - SCliThrdObj* pThrd = pConn->hostThrd; - STrans* pTransInst = pThrd->pTransInst; + SCliThrd* pThrd = pConn->hostThrd; + STrans* pTransInst = pThrd->pTransInst; STransMsg* pMsg = (STransMsg*)(&pCliMsg->msg); if (pMsg->pCont == 0) { @@ -709,7 +709,7 @@ void cliConnCb(uv_connect_t* req, int status) { cliSend(pConn); } -static void cliHandleQuit(SCliMsg* pMsg, SCliThrdObj* pThrd) { +static void cliHandleQuit(SCliMsg* pMsg, SCliThrd* pThrd) { tDebug("cli work thread %p start to quit", pThrd); destroyCmsg(pMsg); destroyConnPool(pThrd->pool); @@ -720,7 +720,7 @@ static void cliHandleQuit(SCliMsg* pMsg, SCliThrdObj* pThrd) { // uv_stop(pThrd->loop); } -static void cliHandleRelease(SCliMsg* pMsg, SCliThrdObj* pThrd) { +static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd) { SCliConn* conn = pMsg->msg.info.handle; tDebug("%s conn %p start to release to inst", CONN_GET_INST_LABEL(conn), conn); @@ -735,39 +735,30 @@ static void cliHandleRelease(SCliMsg* pMsg, SCliThrdObj* pThrd) { transUnrefCliHandle(conn); } } -static void cliHandleUpdate(SCliMsg* pMsg, SCliThrdObj* pThrd) { +static void cliHandleUpdate(SCliMsg* pMsg, SCliThrd* pThrd) { STransConnCtx* pCtx = pMsg->ctx; pThrd->cvtAddr = pCtx->cvtAddr; destroyCmsg(pMsg); } -SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrdObj* pThrd) { +SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrd* pThrd) { SCliConn* conn = NULL; - // SExHandleWrap* exWrap = &pMsg->msg.info.handle; - // if (exWrap != NULL) { - //} - - // SExHandle* exh = transAcquireExHandle(refMgt, exWrap->refId); - // if (exh == NULL) { - // if (pInfo->refId != 0) { - // tTrace("%s conn %p ignore msg", CONN_GET_INST_LABEL(conn), conn); - // assert(0); - // return NULL; - // } - //} else { - // transReleaseExHandle(refMgt, pInfo->refId); - // return exh->handle; - //} + int64_t refId = (int64_t)(pMsg->msg.info.handle); + if (refId != 0) { + SExHandle* exh = transAcquireExHandle(refMgt, refId); + if (exh == NULL) { + assert(0); + } else { + conn = exh->handle; + transReleaseExHandle(refMgt, refId); + } + return conn; + }; STransConnCtx* pCtx = pMsg->ctx; conn = getConnFromPool(pThrd->pool, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet)); if (conn != NULL) { - SExHandle* exh = taosMemoryCalloc(1, sizeof(SExHandle)); - exh->handle = conn; - exh->pThrd = pThrd; - exh->refId = transAddExHandle(refMgt, exh); - conn->refId = exh->refId; tTrace("%s conn %p get from conn pool", CONN_GET_INST_LABEL(conn), conn); } else { tTrace("%s not found conn in conn pool %p", ((STrans*)pThrd->pTransInst)->label, pThrd->pool); @@ -785,7 +776,7 @@ void cliMayCvtFqdnToIp(SEpSet* pEpSet, SCvtAddr* pCvtAddr) { } } } -void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { +void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { STransConnCtx* pCtx = pMsg->ctx; STrans* pTransInst = pThrd->pTransInst; @@ -833,9 +824,9 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) { } } static void cliAsyncCb(uv_async_t* handle) { - SAsyncItem* item = handle->data; - SCliThrdObj* pThrd = item->pThrd; - SCliMsg* pMsg = NULL; + SAsyncItem* item = handle->data; + SCliThrd* pThrd = item->pThrd; + SCliMsg* pMsg = NULL; // batch process to avoid to lock/unlock frequently queue wq; @@ -861,7 +852,7 @@ static void cliAsyncCb(uv_async_t* handle) { } static void* cliWorkThread(void* arg) { - SCliThrdObj* pThrd = (SCliThrdObj*)arg; + SCliThrd* pThrd = (SCliThrd*)arg; pThrd->pid = taosGetSelfPthreadId(); setThreadName("trans-cli-work"); uv_run(pThrd->loop, UV_RUN_DEFAULT); @@ -874,10 +865,10 @@ void* transInitClient(uint32_t ip, uint32_t port, char* label, int numOfThreads, STrans* pTransInst = shandle; memcpy(cli->label, label, strlen(label)); cli->numOfThreads = numOfThreads; - cli->pThreadObj = (SCliThrdObj**)taosMemoryCalloc(cli->numOfThreads, sizeof(SCliThrdObj*)); + cli->pThreadObj = (SCliThrd**)taosMemoryCalloc(cli->numOfThreads, sizeof(SCliThrd*)); for (int i = 0; i < cli->numOfThreads; i++) { - SCliThrdObj* pThrd = createThrdObj(); + SCliThrd* pThrd = createThrdObj(); pThrd->nextTimeout = taosGetTimestampMs() + CONN_PERSIST_TIME(pTransInst->idleTime); pThrd->pTransInst = shandle; @@ -911,8 +902,8 @@ static void destroyCmsg(SCliMsg* pMsg) { taosMemoryFree(pMsg); } -static SCliThrdObj* createThrdObj() { - SCliThrdObj* pThrd = (SCliThrdObj*)taosMemoryCalloc(1, sizeof(SCliThrdObj)); +static SCliThrd* createThrdObj() { + SCliThrd* pThrd = (SCliThrd*)taosMemoryCalloc(1, sizeof(SCliThrd)); QUEUE_INIT(&pThrd->msg); taosThreadMutexInit(&pThrd->msgMtx, NULL); @@ -930,7 +921,7 @@ static SCliThrdObj* createThrdObj() { pThrd->quit = false; return pThrd; } -static void destroyThrdObj(SCliThrdObj* pThrd) { +static void destroyThrdObj(SCliThrd* pThrd) { if (pThrd == NULL) { return; } @@ -951,7 +942,7 @@ static void transDestroyConnCtx(STransConnCtx* ctx) { taosMemoryFree(ctx); } -void cliSendQuit(SCliThrdObj* thrd) { +void cliSendQuit(SCliThrd* thrd) { // cli can stop gracefully SCliMsg* msg = taosMemoryCalloc(1, sizeof(SCliMsg)); msg->type = Quit; @@ -973,15 +964,16 @@ int cliRBChoseIdx(STrans* pTransInst) { static void doDelayTask(void* param) { STaskArg* arg = param; - SCliMsg* pMsg = arg->param1; - SCliThrdObj* pThrd = arg->param2; + SCliMsg* pMsg = arg->param1; + SCliThrd* pThrd = arg->param2; cliHandleReq(pMsg, pThrd); taosMemoryFree(arg); } + int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { - SCliThrdObj* pThrd = pConn->hostThrd; - STrans* pTransInst = pThrd->pTransInst; + SCliThrd* pThrd = pConn->hostThrd; + STrans* pTransInst = pThrd->pTransInst; if (pMsg == NULL || pMsg->ctx == NULL) { tTrace("%s conn %p handle resp", pTransInst->label, pConn); @@ -995,57 +987,60 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCount == 0) { pCtx->origEpSet = pCtx->epSet; } + /* * upper layer handle retry if code equal TSDB_CODE_RPC_NETWORK_UNAVAIL */ - tmsg_t msgType = pCtx->msgType; - if ((pTransInst->retry != NULL && pEpSet->numOfEps > 1 && (pTransInst->retry(pResp->code))) || - (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_APP_NOT_READY || - pResp->code == TSDB_CODE_NODE_NOT_DEPLOYED || pResp->code == TSDB_CODE_SYN_NOT_LEADER)) { - pMsg->sent = 0; - tTrace("try to send req to next node"); - pMsg->st = taosGetTimestampUs(); + if (CONN_NO_PERSIST_BY_APP(pConn)) { + tmsg_t msgType = pCtx->msgType; + if ((pTransInst->retry != NULL && pEpSet->numOfEps > 1 && (pTransInst->retry(pResp->code))) || + (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_APP_NOT_READY || + pResp->code == TSDB_CODE_NODE_NOT_DEPLOYED || pResp->code == TSDB_CODE_SYN_NOT_LEADER)) { + pMsg->sent = 0; + tTrace("try to send req to next node"); + pMsg->st = taosGetTimestampUs(); - pCtx->retryCount += 1; - if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - if (pCtx->retryCount < pEpSet->numOfEps * 3) { - pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; + pCtx->retryCount += 1; + if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (pCtx->retryCount < pEpSet->numOfEps * 3) { + pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; + + STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); + arg->param1 = pMsg; + arg->param2 = pThrd; + transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); + transPrintEpSet(pEpSet); + tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, + pCtx->retryCount + 1, pEpSet->numOfEps * 3); + + transUnrefCliHandle(pConn); + return -1; + } + } else if (pCtx->retryCount < TRANS_RETRY_COUNT_LIMIT) { + if (pResp->contLen == 0) { + pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; + transPrintEpSet(&pCtx->epSet); + tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, + pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); + } else { + SEpSet epSet = {0}; + tDeserializeSEpSet(pResp->pCont, pResp->contLen, &epSet); + pCtx->epSet = epSet; + + transPrintEpSet(&pCtx->epSet); + tTrace("%s use remote epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, + pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); + } + if (pConn->status != ConnInPool) { + addConnToPool(pThrd->pool, pConn); + } STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); arg->param1 = pMsg; arg->param2 = pThrd; transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); - transPrintEpSet(pEpSet); - tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, pEpSet->numOfEps * 3); - - transUnrefCliHandle(pConn); return -1; } - } else if (pCtx->retryCount < TRANS_RETRY_COUNT_LIMIT) { - if (pResp->contLen == 0) { - pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; - transPrintEpSet(&pCtx->epSet); - tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); - } else { - SEpSet epSet = {0}; - tDeserializeSEpSet(pResp->pCont, pResp->contLen, &epSet); - pCtx->epSet = epSet; - - transPrintEpSet(&pCtx->epSet); - tTrace("%s use remote epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); - } - if (pConn->status != ConnInPool) { - addConnToPool(pThrd->pool, pConn); - } - - STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); - arg->param1 = pMsg; - arg->param2 = pThrd; - transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); - return -1; } } @@ -1101,9 +1096,9 @@ void transUnrefCliHandle(void* handle) { cliDestroyConn((SCliConn*)handle, true); } } -SCliThrdObj* transGetWorkThrdFromHandle(int64_t handle) { - SCliThrdObj* pThrd = NULL; - SExHandle* exh = transAcquireExHandle(refMgt, handle); +SCliThrd* transGetWorkThrdFromHandle(int64_t handle) { + SCliThrd* pThrd = NULL; + SExHandle* exh = transAcquireExHandle(refMgt, handle); if (exh == NULL) { return NULL; } @@ -1111,17 +1106,16 @@ SCliThrdObj* transGetWorkThrdFromHandle(int64_t handle) { transReleaseExHandle(refMgt, handle); return pThrd; } -SCliThrdObj* transGetWorkThrd(STrans* trans, int64_t handle) { - int idx = -1; +SCliThrd* transGetWorkThrd(STrans* trans, int64_t handle) { if (handle == 0) { - idx = cliRBChoseIdx(trans); + int idx = cliRBChoseIdx(trans); return ((SCliObj*)trans->tcphandle)->pThreadObj[idx]; } return transGetWorkThrdFromHandle(handle); } void transReleaseCliHandle(void* handle) { - int idx = -1; - SCliThrdObj* pThrd = transGetWorkThrdFromHandle((int64_t)handle); + int idx = -1; + SCliThrd* pThrd = transGetWorkThrdFromHandle((int64_t)handle); if (pThrd == NULL) { return; } @@ -1136,8 +1130,8 @@ void transReleaseCliHandle(void* handle) { } void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransCtx* ctx) { - STrans* pTransInst = (STrans*)shandle; - SCliThrdObj* pThrd = transGetWorkThrd(pTransInst, (int64_t)pReq->info.handle); + STrans* pTransInst = (STrans*)shandle; + SCliThrd* pThrd = transGetWorkThrd(pTransInst, (int64_t)pReq->info.handle); if (pThrd == NULL) { transFreeMsg(pReq->pCont); return; @@ -1169,8 +1163,8 @@ void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STra } void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransMsg* pRsp) { - STrans* pTransInst = (STrans*)shandle; - SCliThrdObj* pThrd = transGetWorkThrd(pTransInst, (int64_t)pReq->info.handle); + STrans* pTransInst = (STrans*)shandle; + SCliThrd* pThrd = transGetWorkThrd(pTransInst, (int64_t)pReq->info.handle); if (pThrd == NULL) { transFreeMsg(pReq->pCont); return; @@ -1224,7 +1218,7 @@ void transSetDefaultAddr(void* shandle, const char* ip, const char* fqdn) { cliMsg->ctx = pCtx; cliMsg->type = Update; - SCliThrdObj* thrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[i]; + SCliThrd* thrd = ((SCliObj*)pTransInst->tcphandle)->pThreadObj[i]; tDebug("%s update epset at thread:%08" PRId64 "", pTransInst->label, thrd->pid); transSendAsync(thrd->asyncPool, &(cliMsg->q)); diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 4cc2a9c9b2..599d98a3e9 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -65,7 +65,7 @@ typedef struct SSvrMsg { STransMsgType type; } SSvrMsg; -typedef struct SWorkThrdObj { +typedef struct SWorkThrd { TdThread thread; uv_connect_t connect_req; uv_pipe_t* pipe; @@ -78,7 +78,7 @@ typedef struct SWorkThrdObj { queue conn; void* pTransInst; bool quit; -} SWorkThrdObj; +} SWorkThrd; typedef struct SServerObj { TdThread thread; @@ -86,10 +86,10 @@ typedef struct SServerObj { uv_loop_t* loop; // work thread info - int workerIdx; - int numOfThreads; - int numOfWorkerReady; - SWorkThrdObj** pThreadObj; + int workerIdx; + int numOfThreads; + int numOfWorkerReady; + SWorkThrd** pThreadObj; uv_pipe_t pipeListen; uv_pipe_t** pipe; @@ -135,12 +135,12 @@ static void destroyConnRegArg(SSvrConn* conn); static int reallocConnRef(SSvrConn* conn); -static void uvHandleQuit(SSvrMsg* msg, SWorkThrdObj* thrd); -static void uvHandleRelease(SSvrMsg* msg, SWorkThrdObj* thrd); -static void uvHandleResp(SSvrMsg* msg, SWorkThrdObj* thrd); -static void uvHandleRegister(SSvrMsg* msg, SWorkThrdObj* thrd); -static void (*transAsyncHandle[])(SSvrMsg* msg, SWorkThrdObj* thrd) = {uvHandleResp, uvHandleQuit, uvHandleRelease, - uvHandleRegister, NULL}; +static void uvHandleQuit(SSvrMsg* msg, SWorkThrd* thrd); +static void uvHandleRelease(SSvrMsg* msg, SWorkThrd* thrd); +static void uvHandleResp(SSvrMsg* msg, SWorkThrd* thrd); +static void uvHandleRegister(SSvrMsg* msg, SWorkThrd* thrd); +static void (*transAsyncHandle[])(SSvrMsg* msg, SWorkThrd* thrd) = {uvHandleResp, uvHandleQuit, uvHandleRelease, + uvHandleRegister, NULL}; static int32_t exHandlesMgt; @@ -160,7 +160,7 @@ static void* transWorkerThread(void* arg); static void* transAcceptThread(void* arg); // add handle loop -static bool addHandleToWorkloop(SWorkThrdObj* pThrd, char* pipeName); +static bool addHandleToWorkloop(SWorkThrd* pThrd, char* pipeName); static bool addHandleToAcceptloop(void* arg); #define CONN_SHOULD_RELEASE(conn, head) \ @@ -233,7 +233,7 @@ static void uvHandleReq(SSvrConn* pConn) { // wreq->data = pConn; // uv_read_stop((uv_stream_t*)pConn->pTcp); // transRefSrvHandle(pConn); - // uv_queue_work(((SWorkThrdObj*)pConn->hostThrd)->loop, wreq, uvWorkDoTask, uvWorkAfterTask); + // uv_queue_work(((SWorkThrd*)pConn->hostThrd)->loop, wreq, uvWorkDoTask, uvWorkAfterTask); CONN_SHOULD_RELEASE(pConn, pHead); @@ -478,7 +478,7 @@ static void destroySmsg(SSvrMsg* smsg) { transFreeMsg(smsg->msg.pCont); taosMemoryFree(smsg); } -static void destroyAllConn(SWorkThrdObj* pThrd) { +static void destroyAllConn(SWorkThrd* pThrd) { tTrace("thread %p destroy all conn ", pThrd); while (!QUEUE_IS_EMPTY(&pThrd->conn)) { queue* h = QUEUE_HEAD(&pThrd->conn); @@ -493,10 +493,10 @@ static void destroyAllConn(SWorkThrdObj* pThrd) { } } void uvWorkerAsyncCb(uv_async_t* handle) { - SAsyncItem* item = handle->data; - SWorkThrdObj* pThrd = item->pThrd; - SSvrConn* conn = NULL; - queue wq; + SAsyncItem* item = handle->data; + SWorkThrd* pThrd = item->pThrd; + SSvrConn* conn = NULL; + queue wq; // batch process to avoid to lock/unlock frequently taosThreadMutexLock(&item->mtx); @@ -624,7 +624,7 @@ void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) { assert(buf->base[0] == notify[0]); taosMemoryFree(buf->base); - SWorkThrdObj* pThrd = q->data; + SWorkThrd* pThrd = q->data; uv_pipe_t* pipe = (uv_pipe_t*)q; if (!uv_pipe_pending_count(pipe)) { @@ -692,10 +692,10 @@ void uvOnPipeConnectionCb(uv_connect_t* connect, int status) { if (status != 0) { return; } - SWorkThrdObj* pThrd = container_of(connect, SWorkThrdObj, connect_req); + SWorkThrd* pThrd = container_of(connect, SWorkThrd, connect_req); uv_read_start((uv_stream_t*)pThrd->pipe, uvAllocConnBufferCb, uvOnConnectionCb); } -static bool addHandleToWorkloop(SWorkThrdObj* pThrd, char* pipeName) { +static bool addHandleToWorkloop(SWorkThrd* pThrd, char* pipeName) { pThrd->loop = (uv_loop_t*)taosMemoryMalloc(sizeof(uv_loop_t)); if (0 != uv_loop_init(pThrd->loop)) { return false; @@ -748,14 +748,14 @@ static bool addHandleToAcceptloop(void* arg) { } void* transWorkerThread(void* arg) { setThreadName("trans-worker"); - SWorkThrdObj* pThrd = (SWorkThrdObj*)arg; + SWorkThrd* pThrd = (SWorkThrd*)arg; uv_run(pThrd->loop, UV_RUN_DEFAULT); return NULL; } static SSvrConn* createConn(void* hThrd) { - SWorkThrdObj* pThrd = hThrd; + SWorkThrd* pThrd = hThrd; SSvrConn* pConn = (SSvrConn*)taosMemoryCalloc(1, sizeof(SSvrConn)); QUEUE_INIT(&pConn->queue); @@ -818,7 +818,7 @@ static void uvDestroyConn(uv_handle_t* handle) { if (conn == NULL) { return; } - SWorkThrdObj* thrd = conn->hostThrd; + SWorkThrd* thrd = conn->hostThrd; transReleaseExHandle(refMgt, conn->refId); transRemoveExHandle(refMgt, conn->refId); @@ -863,7 +863,7 @@ void* transInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, srv->numOfThreads = numOfThreads; srv->workerIdx = 0; srv->numOfWorkerReady = 0; - srv->pThreadObj = (SWorkThrdObj**)taosMemoryCalloc(srv->numOfThreads, sizeof(SWorkThrdObj*)); + srv->pThreadObj = (SWorkThrd**)taosMemoryCalloc(srv->numOfThreads, sizeof(SWorkThrd*)); srv->pipe = (uv_pipe_t**)taosMemoryCalloc(srv->numOfThreads, sizeof(uv_pipe_t*)); srv->ip = ip; srv->port = port; @@ -888,7 +888,7 @@ void* transInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, assert(0 == uv_listen((uv_stream_t*)&srv->pipeListen, SOMAXCONN, uvPipeListenCb)); for (int i = 0; i < srv->numOfThreads; i++) { - SWorkThrdObj* thrd = (SWorkThrdObj*)taosMemoryCalloc(1, sizeof(SWorkThrdObj)); + SWorkThrd* thrd = (SWorkThrd*)taosMemoryCalloc(1, sizeof(SWorkThrd)); thrd->pTransInst = shandle; thrd->quit = false; srv->pThreadObj[i] = thrd; @@ -933,7 +933,7 @@ End: return NULL; } -void uvHandleQuit(SSvrMsg* msg, SWorkThrdObj* thrd) { +void uvHandleQuit(SSvrMsg* msg, SWorkThrd* thrd) { thrd->quit = true; if (QUEUE_IS_EMPTY(&thrd->conn)) { uv_walk(thrd->loop, uvWalkCb, NULL); @@ -942,7 +942,7 @@ void uvHandleQuit(SSvrMsg* msg, SWorkThrdObj* thrd) { } taosMemoryFree(msg); } -void uvHandleRelease(SSvrMsg* msg, SWorkThrdObj* thrd) { +void uvHandleRelease(SSvrMsg* msg, SWorkThrd* thrd) { SSvrConn* conn = msg->pConn; if (conn->status == ConnAcquire) { reallocConnRef(conn); @@ -956,12 +956,12 @@ void uvHandleRelease(SSvrMsg* msg, SWorkThrdObj* thrd) { } destroySmsg(msg); } -void uvHandleResp(SSvrMsg* msg, SWorkThrdObj* thrd) { +void uvHandleResp(SSvrMsg* msg, SWorkThrd* thrd) { // send msg to client tDebug("%s conn %p start to send resp (2/2)", transLabel(thrd->pTransInst), msg->pConn); uvStartSendResp(msg); } -void uvHandleRegister(SSvrMsg* msg, SWorkThrdObj* thrd) { +void uvHandleRegister(SSvrMsg* msg, SWorkThrd* thrd) { SSvrConn* conn = msg->pConn; tDebug("%s conn %p register brokenlink callback", transLabel(thrd->pTransInst), conn); if (conn->status == ConnAcquire) { @@ -982,7 +982,7 @@ void uvHandleRegister(SSvrMsg* msg, SWorkThrdObj* thrd) { taosMemoryFree(msg); } } -void destroyWorkThrd(SWorkThrdObj* pThrd) { +void destroyWorkThrd(SWorkThrd* pThrd) { if (pThrd == NULL) { return; } @@ -993,7 +993,7 @@ void destroyWorkThrd(SWorkThrdObj* pThrd) { taosMemoryFree(pThrd->loop); taosMemoryFree(pThrd); } -void sendQuitToWorkThrd(SWorkThrdObj* pThrd) { +void sendQuitToWorkThrd(SWorkThrd* pThrd) { SSvrMsg* msg = taosMemoryCalloc(1, sizeof(SSvrMsg)); msg->type = Quit; tDebug("server send quit msg to work thread"); @@ -1060,7 +1060,7 @@ void transReleaseSrvHandle(void* handle) { ASYNC_CHECK_HANDLE(exh, refId); - SWorkThrdObj* pThrd = exh->pThrd; + SWorkThrd* pThrd = exh->pThrd; ASYNC_ERR_JRET(pThrd); STransMsg tmsg = {.code = 0, .info.handle = exh, .info.ahandle = NULL, .info.refId = refId}; @@ -1090,7 +1090,7 @@ void transSendResponse(const STransMsg* msg) { STransMsg tmsg = *msg; tmsg.info.refId = refId; - SWorkThrdObj* pThrd = exh->pThrd; + SWorkThrd* pThrd = exh->pThrd; ASYNC_ERR_JRET(pThrd); SSvrMsg* m = taosMemoryCalloc(1, sizeof(SSvrMsg)); @@ -1120,7 +1120,7 @@ void transRegisterMsg(const STransMsg* msg) { STransMsg tmsg = *msg; tmsg.info.refId = refId; - SWorkThrdObj* pThrd = exh->pThrd; + SWorkThrd* pThrd = exh->pThrd; ASYNC_ERR_JRET(pThrd); SSvrMsg* m = taosMemoryCalloc(1, sizeof(SSvrMsg)); From ffd105d0e037c612c632be406a962b042bc58f14 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 23 Jun 2022 20:51:09 +0800 Subject: [PATCH 05/17] handle except --- source/libs/transport/src/transCli.c | 55 ++++++++++++++------------- source/libs/transport/src/transComm.c | 6 +-- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 5b100e3d56..0bf6b2778b 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -393,7 +393,6 @@ void cliHandleExcept(SCliConn* pConn) { transMsg.code = TSDB_CODE_RPC_NETWORK_UNAVAIL; transMsg.msgType = pMsg ? pMsg->msg.msgType + 1 : 0; transMsg.info.ahandle = NULL; - transMsg.info.handle = pConn; if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(pConn)) { transMsg.info.ahandle = transCtxDumpVal(&pConn->ctx, transMsg.msgType); @@ -987,10 +986,14 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCount == 0) { pCtx->origEpSet = pCtx->epSet; } - /* * upper layer handle retry if code equal TSDB_CODE_RPC_NETWORK_UNAVAIL */ + /* + * no retry + * 1. query conn 2. rpc thread already receive quit msg + * + */ if (CONN_NO_PERSIST_BY_APP(pConn) && pThrd->quit == false) { tmsg_t msgType = pCtx->msgType; if ((pTransInst->retry != NULL && pEpSet->numOfEps > 1 && (pTransInst->retry(pResp->code))) || @@ -1014,31 +1017,31 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { transUnrefCliHandle(pConn); return -1; - } else if (pCtx->retryCount < TRANS_RETRY_COUNT_LIMIT) { - if (pResp->contLen == 0) { - pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; - transPrintEpSet(&pCtx->epSet); - tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); - } else { - SEpSet epSet = {0}; - tDeserializeSEpSet(pResp->pCont, pResp->contLen, &epSet); - pCtx->epSet = epSet; - - transPrintEpSet(&pCtx->epSet); - tTrace("%s use remote epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); - } - if (pConn->status != ConnInPool) { - addConnToPool(pThrd->pool, pConn); - } - - STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); - arg->param1 = pMsg; - arg->param2 = pThrd; - transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); - return -1; } + } else if (pCtx->retryCount < TRANS_RETRY_COUNT_LIMIT) { + if (pResp->contLen == 0) { + pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; + transPrintEpSet(&pCtx->epSet); + tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, + pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); + } else { + SEpSet epSet = {0}; + tDeserializeSEpSet(pResp->pCont, pResp->contLen, &epSet); + pCtx->epSet = epSet; + + transPrintEpSet(&pCtx->epSet); + tTrace("%s use remote epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, + pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); + } + if (pConn->status != ConnInPool) { + addConnToPool(pThrd->pool, pConn); + } + + STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); + arg->param1 = pMsg; + arg->param2 = pThrd; + transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); + return -1; } } } diff --git a/source/libs/transport/src/transComm.c b/source/libs/transport/src/transComm.c index 5d342dd174..bff7d79bd3 100644 --- a/source/libs/transport/src/transComm.c +++ b/source/libs/transport/src/transComm.c @@ -455,16 +455,16 @@ void transPrintEpSet(SEpSet* pEpSet) { return; } char buf[512] = {0}; - int len = snprintf(buf, sizeof(buf), "epset:{ "); + int len = snprintf(buf, sizeof(buf), "epset:{"); for (int i = 0; i < pEpSet->numOfEps; i++) { if (i == pEpSet->numOfEps - 1) { - len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d ", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port); + len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port); } else { len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d, ", i, pEpSet->eps[i].fqdn, pEpSet->eps[i].port); } } len += snprintf(buf + len, sizeof(buf) - len, "}"); - tTrace("%s, inUse: %d", buf, pEpSet->inUse); + tTrace("%s, inUse:%d", buf, pEpSet->inUse); } bool transEpSetIsEqual(SEpSet* a, SEpSet* b) { if (a->numOfEps != b->numOfEps || a->inUse != b->inUse) { From 1d169ce22af04f958b5b635fa715118dcfe0e0fe Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 23 Jun 2022 21:16:32 +0800 Subject: [PATCH 06/17] handle except --- source/libs/transport/src/transCli.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 0bf6b2778b..96fbe3f6b7 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -300,17 +300,9 @@ void cliHandleResp(SCliConn* conn) { if (CONN_NO_PERSIST_BY_APP(conn)) { pMsg = transQueuePop(&conn->cliMsgs); - pCtx = pMsg ? pMsg->ctx : NULL; - if (pMsg == NULL && !CONN_NO_PERSIST_BY_APP(conn)) { - transMsg.info.ahandle = transCtxDumpVal(&conn->ctx, transMsg.msgType); - if (transMsg.info.ahandle == NULL) { - transMsg.info.ahandle = transCtxDumpBrokenlinkVal(&conn->ctx, (int32_t*)&(transMsg.msgType)); - } - tDebug("%s conn %p construct ahandle %p, persist: 0", CONN_GET_INST_LABEL(conn), conn, transMsg.info.ahandle); - } else { - transMsg.info.ahandle = pCtx ? pCtx->ahandle : NULL; - tDebug("%s conn %p get ahandle %p, persist: 0", CONN_GET_INST_LABEL(conn), conn, transMsg.info.ahandle); - } + pCtx = pMsg->ctx; + transMsg.info.ahandle = pCtx->ahandle; + tDebug("%s conn %p get ahandle %p, persist: 0", CONN_GET_INST_LABEL(conn), conn, transMsg.info.ahandle); } else { uint64_t ahandle = (uint64_t)pHead->ahandle; CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); From d63eb11b70b60aa379e0c0ed011fc9851fdae6c5 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 23 Jun 2022 21:35:35 +0800 Subject: [PATCH 07/17] handle except --- source/libs/transport/src/transCli.c | 17 ++++++++--------- source/libs/transport/src/transSvr.c | 2 -- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 96fbe3f6b7..9ccbf3eaa0 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -712,7 +712,14 @@ static void cliHandleQuit(SCliMsg* pMsg, SCliThrd* pThrd) { // uv_stop(pThrd->loop); } static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd) { - SCliConn* conn = pMsg->msg.info.handle; + int64_t refId = (int64_t)(pMsg->msg.info.handle); + SExHandle* exh = transAcquireExHandle(refMgt, refId); + if (exh == NULL) { + tDebug("%" PRid64 " already release", refId); + return NULL; + } + + SCliConn* conn = exh->handle; tDebug("%s conn %p start to release to inst", CONN_GET_INST_LABEL(conn), conn); if (T_REF_VAL_GET(conn) == 2) { @@ -721,14 +728,10 @@ static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd) { return; } cliSend(conn); - } else { - // conn already broken down - transUnrefCliHandle(conn); } } static void cliHandleUpdate(SCliMsg* pMsg, SCliThrd* pThrd) { STransConnCtx* pCtx = pMsg->ctx; - pThrd->cvtAddr = pCtx->cvtAddr; destroyCmsg(pMsg); } @@ -772,9 +775,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { STrans* pTransInst = pThrd->pTransInst; cliMayCvtFqdnToIp(&pCtx->epSet, &pThrd->cvtAddr); - transPrintEpSet(&pCtx->epSet); - SCliConn* conn = cliGetConn(pMsg, pThrd); if (conn != NULL) { transCtxMerge(&conn->ctx, &pCtx->appCtx); @@ -1112,7 +1113,6 @@ void transReleaseCliHandle(void* handle) { if (pThrd == NULL) { return; } - STransMsg tmsg = {.info.handle = handle}; SCliMsg* cmsg = taosMemoryCalloc(1, sizeof(SCliMsg)); cmsg->msg = tmsg; @@ -1162,7 +1162,6 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM transFreeMsg(pReq->pCont); return; } - tsem_t* sem = taosMemoryCalloc(1, sizeof(tsem_t)); tsem_init(sem, 0, 0); diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 599d98a3e9..215323f69d 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -1029,8 +1029,6 @@ void transCloseServer(void* arg) { int ref = atomic_sub_fetch_32(&tranSSvrInst, 1); if (ref == 0) { - // TdThreadOnce tmpInit = PTHREAD_ONCE_INIT; - // memcpy(&transModuleInit, &tmpInit, sizeof(TdThreadOnce)); transCloseExHandleMgt(refMgt); } } From 78be460ac31669b0373b76cc246f8684eec3800e Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 23 Jun 2022 21:40:12 +0800 Subject: [PATCH 08/17] handle except --- source/libs/transport/src/transCli.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 9ccbf3eaa0..a95afa8a27 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -715,8 +715,7 @@ static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd) { int64_t refId = (int64_t)(pMsg->msg.info.handle); SExHandle* exh = transAcquireExHandle(refMgt, refId); if (exh == NULL) { - tDebug("%" PRid64 " already release", refId); - return NULL; + tDebug("%" PRId64 " already release", refId); } SCliConn* conn = exh->handle; From 3795ff37ad4955c5f7d5078e2ce028f34996a772 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 24 Jun 2022 10:38:18 +0800 Subject: [PATCH 09/17] handle except --- source/libs/transport/src/transCli.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index a95afa8a27..ce6c85bb57 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -197,7 +197,6 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { if (status != ConnInPool) { \ addConnToPool(((SCliThrd*)conn->hostThrd)->pool, conn); \ } \ - transRemoveExHandle(refMgt, conn->refId); \ return; \ } \ } while (0) From 87f4f5364f9df0d93ca5d40bda4d649f4cede862 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 24 Jun 2022 11:09:34 +0800 Subject: [PATCH 10/17] handle except --- source/libs/transport/src/transCli.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index ce6c85bb57..cb78d7f0df 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -953,7 +953,6 @@ int cliRBChoseIdx(STrans* pTransInst) { } static void doDelayTask(void* param) { STaskArg* arg = param; - SCliMsg* pMsg = arg->param1; SCliThrd* pThrd = arg->param2; cliHandleReq(pMsg, pThrd); @@ -977,13 +976,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { if (pCtx->retryCount == 0) { pCtx->origEpSet = pCtx->epSet; } + /* - * upper layer handle retry if code equal TSDB_CODE_RPC_NETWORK_UNAVAIL - */ - /* - * no retry - * 1. query conn 2. rpc thread already receive quit msg - * + * no retry + * 1. query conn + * 2. rpc thread already receive quit msg */ if (CONN_NO_PERSIST_BY_APP(pConn) && pThrd->quit == false) { tmsg_t msgType = pCtx->msgType; From 445d7f2d90bf61b5efdd0e29ffc936ed016f9caa Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 24 Jun 2022 16:21:01 +0800 Subject: [PATCH 11/17] feat: refactor rpc quit --- source/client/src/clientEnv.c | 3 +- source/libs/transport/src/transCli.c | 139 +++++++++++++++------------ 2 files changed, 82 insertions(+), 60 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index f1e4107e23..ff9003b8fc 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -85,7 +85,8 @@ void closeTransporter(STscObj *pTscObj) { } static bool clientRpcRfp(int32_t code) { - if (code == TSDB_CODE_RPC_REDIRECT) { + if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || + code == TSDB_CODE_SYN_NOT_LEADER) { return true; } else { return false; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index cb78d7f0df..1cacc84d79 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -111,6 +111,13 @@ static void cliDestroyConn(SCliConn* pConn, bool clear /*clear tcp handle o static void cliDestroy(uv_handle_t* handle); static void cliSend(SCliConn* pConn); +static bool cliIsEpsetUpdated(int32_t code, STransConnCtx* pCtx) { + if (code != 0) return false; + if (pCtx->retryCnt == 0) return false; + if (transEpSetIsEqual(&pCtx->epSet, &pCtx->origEpSet)) return false; + return true; +} + void cliMayCvtFqdnToIp(SEpSet* pEpSet, SCvtAddr* pCvtAddr); /* * set TCP connection timeout per-socket level @@ -154,7 +161,6 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { destroyCmsg(pMsg); } } - #define CLI_RELEASE_UV(loop) \ do { \ uv_walk(loop, cliWalkCb, NULL); \ @@ -183,7 +189,6 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { #define CONN_SHOULD_RELEASE(conn, head) \ do { \ if ((head)->release == 1 && (head->msgLen) == sizeof(*head)) { \ - int status = conn->status; \ uint64_t ahandle = head->ahandle; \ CONN_GET_MSGCTX_BY_AHANDLE(conn, ahandle); \ transClearBuffer(&conn->readBuf); \ @@ -194,9 +199,7 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { } \ destroyCmsg(pMsg); \ cliReleaseUnfinishedMsg(conn); \ - if (status != ConnInPool) { \ - addConnToPool(((SCliThrd*)conn->hostThrd)->pool, conn); \ - } \ + addConnToPool(((SCliThrd*)conn->hostThrd)->pool, conn); \ return; \ } \ } while (0) @@ -262,8 +265,25 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { #define REQUEST_PERSIS_HANDLE(msg) ((msg)->info.persistHandle == 1) #define REQUEST_RELEASE_HANDLE(cmsg) ((cmsg)->type == Release) +#define EPSET_GET_SIZE(epSet) (epSet)->numOfEps #define EPSET_GET_INUSE_IP(epSet) ((epSet)->eps[(epSet)->inUse].fqdn) #define EPSET_GET_INUSE_PORT(epSet) ((epSet)->eps[(epSet)->inUse].port) +#define EPSET_FORWARD_INUSE(epSet) \ + do { \ + (epSet)->inUse = (++((epSet)->inUse)) % ((epSet)->numOfEps); \ + } while (0) +#define EPSET_DEBUG_STR(epSet, buf) \ + do { \ + int len = snprintf(buf, sizeof(buf), "epset:{"); \ + for (int i = 0; i < (epSet)->numOfEps; i++) { \ + if (i == (epSet)->numOfEps - 1) { \ + len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ + } else { \ + len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d, ", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ + } \ + } \ + len += snprintf(buf + len, sizeof(buf) - len, "}"); \ + } while (0); static void* cliWorkThread(void* arg); @@ -492,6 +512,10 @@ static void allocConnRef(SCliConn* conn, bool update) { conn->refId = exh->refId; } static void addConnToPool(void* pool, SCliConn* conn) { + if (conn->status == ConnInPool) { + assert(0); + return; + } SCliThrd* thrd = conn->hostThrd; CONN_HANDLE_THREAD_QUIT(thrd); @@ -505,7 +529,7 @@ static void addConnToPool(void* pool, SCliConn* conn) { char key[128] = {0}; CONN_CONSTRUCT_HASH_KEY(key, conn->ip, conn->port); - tTrace("%s conn %p added to conn pool, read buf cap: %d", CONN_GET_INST_LABEL(conn), conn, conn->readBuf.cap); + tTrace("%s conn %p added to conn pool, read buf cap:%d", CONN_GET_INST_LABEL(conn), conn, conn->readBuf.cap); SConnList* plist = taosHashGet((SHashObj*)pool, key, strlen(key)); // list already create before @@ -751,9 +775,9 @@ SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrd* pThrd) { STransConnCtx* pCtx = pMsg->ctx; conn = getConnFromPool(pThrd->pool, EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet)); if (conn != NULL) { - tTrace("%s conn %p get from conn pool", CONN_GET_INST_LABEL(conn), conn); + tTrace("%s conn %p get from conn pool:%p", CONN_GET_INST_LABEL(conn), conn, pThrd->pool); } else { - tTrace("%s not found conn in conn pool %p", ((STrans*)pThrd->pTransInst)->label, pThrd->pool); + tTrace("%s not found conn in conn pool:%p", ((STrans*)pThrd->pTransInst)->label, pThrd->pool); } return conn; } @@ -773,7 +797,8 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { STrans* pTransInst = pThrd->pTransInst; cliMayCvtFqdnToIp(&pCtx->epSet, &pThrd->cvtAddr); - transPrintEpSet(&pCtx->epSet); + + // transPrintEpSet(&pCtx->epSet); SCliConn* conn = cliGetConn(pMsg, pThrd); if (conn != NULL) { transCtxMerge(&conn->ctx, &pCtx->appCtx); @@ -955,11 +980,30 @@ static void doDelayTask(void* param) { STaskArg* arg = param; SCliMsg* pMsg = arg->param1; SCliThrd* pThrd = arg->param2; - cliHandleReq(pMsg, pThrd); - taosMemoryFree(arg); + + cliHandleReq(pMsg, pThrd); } +static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) { + STraceId* trace = &pMsg->msg.info.traceId; + STransConnCtx* pCtx = pMsg->ctx; + + char buf[256] = {0}; + EPSET_DEBUG_STR(&pCtx->epSet, buf); + tGTrace("%s %s, retryCnt:%d, limit:%d", transLabel(pThrd), buf, pCtx->retryCnt + 1, pCtx->retryLimit); + + STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); + arg->param1 = pMsg; + arg->param2 = pThrd; + transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); +} + +void cliUpdateRetryLimit(int8_t* val, int8_t exp, int8_t newVal) { + if (*val != exp) { + *val = newVal; + } +} int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { SCliThrd* pThrd = pConn->hostThrd; STrans* pTransInst = pThrd->pTransInst; @@ -971,68 +1015,45 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } STransConnCtx* pCtx = pMsg->ctx; - SEpSet* pEpSet = &pCtx->epSet; - - if (pCtx->retryCount == 0) { + if (pCtx->retryCnt == 0) { pCtx->origEpSet = pCtx->epSet; } - /* * no retry * 1. query conn * 2. rpc thread already receive quit msg */ - if (CONN_NO_PERSIST_BY_APP(pConn) && pThrd->quit == false) { - tmsg_t msgType = pCtx->msgType; - if ((pTransInst->retry != NULL && pEpSet->numOfEps > 1 && (pTransInst->retry(pResp->code))) || - (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pResp->code == TSDB_CODE_APP_NOT_READY || - pResp->code == TSDB_CODE_NODE_NOT_DEPLOYED || pResp->code == TSDB_CODE_SYN_NOT_LEADER)) { + int32_t code = pResp->code; + if (CONN_NO_PERSIST_BY_APP(pConn)) { + if (pTransInst->retry != NULL && pTransInst->retry(code)) { pMsg->sent = 0; - tTrace("try to send req to next node"); - pMsg->st = taosGetTimestampUs(); + pCtx->retryCnt += 1; + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + transUnrefCliHandle(pConn); - pCtx->retryCount += 1; - if (pResp->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - if (pCtx->retryCount < pEpSet->numOfEps * 3) { - pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; - STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); - arg->param1 = pMsg; - arg->param2 = pThrd; - transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); - transPrintEpSet(pEpSet); - tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, pEpSet->numOfEps * 3); - - transUnrefCliHandle(pConn); + cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, EPSET_GET_SIZE(&pCtx->epSet) * 3); + if (pCtx->retryCnt < pCtx->retryLimit) { + EPSET_FORWARD_INUSE(&pCtx->epSet); + cliSchedMsgToNextNode(pMsg, pThrd); return -1; } - } else if (pCtx->retryCount < TRANS_RETRY_COUNT_LIMIT) { - if (pResp->contLen == 0) { - pEpSet->inUse = (++pEpSet->inUse) % pEpSet->numOfEps; - transPrintEpSet(&pCtx->epSet); - tTrace("%s use local epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); - } else { - SEpSet epSet = {0}; - tDeserializeSEpSet(pResp->pCont, pResp->contLen, &epSet); - pCtx->epSet = epSet; + } else { + addConnToPool(pThrd->pool, pConn); - transPrintEpSet(&pCtx->epSet); - tTrace("%s use remote epset, inUse: %d, retry count:%d, limit: %d", pTransInst->label, pEpSet->inUse, - pCtx->retryCount + 1, TRANS_RETRY_COUNT_LIMIT); + cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); + if (pCtx->retryCnt < pCtx->retryLimit) { + if (pResp->contLen == 0) { + EPSET_FORWARD_INUSE(&pCtx->epSet); + } else { + tDeserializeSEpSet(pResp->pCont, pResp->contLen, &pCtx->epSet); + } + cliSchedMsgToNextNode(pMsg, pThrd); + return -1; } - if (pConn->status != ConnInPool) { - addConnToPool(pThrd->pool, pConn); - } - - STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); - arg->param1 = pMsg; - arg->param2 = pThrd; - transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); - return -1; } } } + STraceId* trace = &pResp->info.traceId; if (pCtx->pSem != NULL) { tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); @@ -1045,10 +1066,10 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pCtx->pRsp = NULL; } else { tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); - if (pResp->code != 0 || pCtx->retryCount == 0 || transEpSetIsEqual(&pCtx->epSet, &pCtx->origEpSet)) { + if (!cliIsEpsetUpdated(code, pCtx)) { pTransInst->cfp(pTransInst->parent, pResp, NULL); } else { - pTransInst->cfp(pTransInst->parent, pResp, pEpSet); + pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); } } return 0; From a215844467f8b5da581984aa01d04cda4c37ffc9 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 24 Jun 2022 17:02:53 +0800 Subject: [PATCH 12/17] handle except --- source/libs/transport/inc/transComm.h | 17 +++++++++-------- source/libs/transport/src/transCli.c | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 5367f6b49d..db8de1c8c3 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -120,14 +120,15 @@ typedef struct SCvtAddr { } SCvtAddr; typedef struct { - SEpSet epSet; // ip list provided by app - SEpSet origEpSet; - void* ahandle; // handle provided by app - tmsg_t msgType; // message type - int8_t connType; // connection type cli/srv - int64_t rid; // refId returned by taosAddRef + SEpSet epSet; // ip list provided by app + SEpSet origEpSet; + void* ahandle; // handle provided by app + tmsg_t msgType; // message type + int8_t connType; // connection type cli/srv - int8_t retryCount; + int8_t retryCnt; + int8_t retryLimit; + // bool setMaxRetry; STransCtx appCtx; // STransMsg* pRsp; // for synchronous API tsem_t* pSem; // for synchronous API @@ -381,7 +382,7 @@ void transDQDestroy(SDelayQueue* queue); int transDQSched(SDelayQueue* queue, void (*func)(void* arg), void* arg, uint64_t timeoutMs); -void transPrintEpSet(SEpSet* pEpSet); +// void transPrintEpSet(SEpSet* pEpSet); bool transEpSetIsEqual(SEpSet* a, SEpSet* b); /* * init global func diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 1cacc84d79..b9eaae6cba 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1039,7 +1039,6 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } } else { addConnToPool(pThrd->pool, pConn); - cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); if (pCtx->retryCnt < pCtx->retryLimit) { if (pResp->contLen == 0) { @@ -1047,6 +1046,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { } else { tDeserializeSEpSet(pResp->pCont, pResp->contLen, &pCtx->epSet); } + transFreeMsg(pResp->pCont); cliSchedMsgToNextNode(pMsg, pThrd); return -1; } From e54044de732badcbb4eda63731bbe07e7d3bce45 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 24 Jun 2022 19:24:14 +0800 Subject: [PATCH 13/17] update retry --- source/libs/transport/src/transCli.c | 34 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index b9eaae6cba..653666ee70 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -272,17 +272,17 @@ static void cliReleaseUnfinishedMsg(SCliConn* conn) { do { \ (epSet)->inUse = (++((epSet)->inUse)) % ((epSet)->numOfEps); \ } while (0) -#define EPSET_DEBUG_STR(epSet, buf) \ - do { \ - int len = snprintf(buf, sizeof(buf), "epset:{"); \ - for (int i = 0; i < (epSet)->numOfEps; i++) { \ - if (i == (epSet)->numOfEps - 1) { \ - len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ - } else { \ - len += snprintf(buf + len, sizeof(buf) - len, "%d. %s:%d, ", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ - } \ - } \ - len += snprintf(buf + len, sizeof(buf) - len, "}"); \ +#define EPSET_DEBUG_STR(epSet, tbuf) \ + do { \ + int len = snprintf(tbuf, sizeof(tbuf), "epset:{"); \ + for (int i = 0; i < (epSet)->numOfEps; i++) { \ + if (i == (epSet)->numOfEps - 1) { \ + len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ + } else { \ + len += snprintf(tbuf + len, sizeof(tbuf) - len, "%d. %s:%d, ", i, (epSet)->eps[i].fqdn, (epSet)->eps[i].port); \ + } \ + } \ + len += snprintf(tbuf + len, sizeof(tbuf) - len, "}, inUse:%d", (epSet)->inUse); \ } while (0); static void* cliWorkThread(void* arg); @@ -989,9 +989,10 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) { STraceId* trace = &pMsg->msg.info.traceId; STransConnCtx* pCtx = pMsg->ctx; - char buf[256] = {0}; - EPSET_DEBUG_STR(&pCtx->epSet, buf); - tGTrace("%s %s, retryCnt:%d, limit:%d", transLabel(pThrd), buf, pCtx->retryCnt + 1, pCtx->retryLimit); + char tbuf[256] = {0}; + EPSET_DEBUG_STR(&pCtx->epSet, tbuf); + tGTrace("%s retry to send msg to next node %dms later , use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), + TRANS_RETRY_INTERVAL, tbuf, pCtx->retryCnt + 1, pCtx->retryLimit); STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); arg->param1 = pMsg; @@ -1029,18 +1030,17 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pMsg->sent = 0; pCtx->retryCnt += 1; if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - transUnrefCliHandle(pConn); - cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, EPSET_GET_SIZE(&pCtx->epSet) * 3); if (pCtx->retryCnt < pCtx->retryLimit) { + transUnrefCliHandle(pConn); EPSET_FORWARD_INUSE(&pCtx->epSet); cliSchedMsgToNextNode(pMsg, pThrd); return -1; } } else { - addConnToPool(pThrd->pool, pConn); cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); if (pCtx->retryCnt < pCtx->retryLimit) { + addConnToPool(pThrd->pool, pConn); if (pResp->contLen == 0) { EPSET_FORWARD_INUSE(&pCtx->epSet); } else { From 720645800c43ec1ef7385ae4f612aac87a9d0af1 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 24 Jun 2022 20:51:00 +0800 Subject: [PATCH 14/17] feat: refactor rpc quit --- source/libs/transport/src/transCli.c | 53 +++++++++++++--------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 653666ee70..9ca5e8e73e 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1014,42 +1014,36 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pTransInst->cfp(pTransInst->parent, pResp, NULL); return 0; } - - STransConnCtx* pCtx = pMsg->ctx; - if (pCtx->retryCnt == 0) { - pCtx->origEpSet = pCtx->epSet; - } /* * no retry * 1. query conn * 2. rpc thread already receive quit msg */ - int32_t code = pResp->code; - if (CONN_NO_PERSIST_BY_APP(pConn)) { - if (pTransInst->retry != NULL && pTransInst->retry(code)) { - pMsg->sent = 0; - pCtx->retryCnt += 1; - if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, EPSET_GET_SIZE(&pCtx->epSet) * 3); - if (pCtx->retryCnt < pCtx->retryLimit) { - transUnrefCliHandle(pConn); + STransConnCtx* pCtx = pMsg->ctx; + int32_t code = pResp->code; + if (pTransInst->retry != NULL && pTransInst->retry(code)) { + pMsg->sent = 0; + pCtx->retryCnt += 1; + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, EPSET_GET_SIZE(&pCtx->epSet) * 3); + if (pCtx->retryCnt < pCtx->retryLimit) { + transUnrefCliHandle(pConn); + EPSET_FORWARD_INUSE(&pCtx->epSet); + cliSchedMsgToNextNode(pMsg, pThrd); + return -1; + } + } else { + cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); + if (pCtx->retryCnt < pCtx->retryLimit) { + addConnToPool(pThrd->pool, pConn); + if (pResp->contLen == 0) { EPSET_FORWARD_INUSE(&pCtx->epSet); - cliSchedMsgToNextNode(pMsg, pThrd); - return -1; - } - } else { - cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); - if (pCtx->retryCnt < pCtx->retryLimit) { - addConnToPool(pThrd->pool, pConn); - if (pResp->contLen == 0) { - EPSET_FORWARD_INUSE(&pCtx->epSet); - } else { - tDeserializeSEpSet(pResp->pCont, pResp->contLen, &pCtx->epSet); - } - transFreeMsg(pResp->pCont); - cliSchedMsgToNextNode(pMsg, pThrd); - return -1; + } else { + tDeserializeSEpSet(pResp->pCont, pResp->contLen, &pCtx->epSet); } + transFreeMsg(pResp->pCont); + cliSchedMsgToNextNode(pMsg, pThrd); + return -1; } } } @@ -1185,6 +1179,7 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM STransConnCtx* pCtx = taosMemoryCalloc(1, sizeof(STransConnCtx)); pCtx->epSet = *pEpSet; + pCtx->origEpSet = *pEpSet; pCtx->ahandle = pReq->info.ahandle; pCtx->msgType = pReq->msgType; pCtx->pSem = sem; From aa8957bcc54b3a2d775cea36ddf3f19cfdc15ae3 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 25 Jun 2022 11:02:49 +0800 Subject: [PATCH 15/17] handle redirect --- source/libs/transport/src/transCli.c | 16 ++++++++-------- source/libs/transport/src/transSvr.c | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 9ca5e8e73e..0671aa39d1 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -513,7 +513,7 @@ static void allocConnRef(SCliConn* conn, bool update) { } static void addConnToPool(void* pool, SCliConn* conn) { if (conn->status == ConnInPool) { - assert(0); + // assert(0); return; } SCliThrd* thrd = conn->hostThrd; @@ -986,13 +986,13 @@ static void doDelayTask(void* param) { } static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) { - STraceId* trace = &pMsg->msg.info.traceId; STransConnCtx* pCtx = pMsg->ctx; - char tbuf[256] = {0}; + STraceId* trace = &pMsg->msg.info.traceId; + char tbuf[256] = {0}; EPSET_DEBUG_STR(&pCtx->epSet, tbuf); - tGTrace("%s retry to send msg to next node %dms later , use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), - TRANS_RETRY_INTERVAL, tbuf, pCtx->retryCnt + 1, pCtx->retryLimit); + tGTrace("%s retry on next node, use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), tbuf, + pCtx->retryCnt + 1, pCtx->retryLimit); STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg)); arg->param1 = pMsg; @@ -1000,7 +1000,7 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) { transDQSched(pThrd->delayQueue, doDelayTask, arg, TRANS_RETRY_INTERVAL); } -void cliUpdateRetryLimit(int8_t* val, int8_t exp, int8_t newVal) { +void cliCompareAndSwap(int8_t* val, int8_t exp, int8_t newVal) { if (*val != exp) { *val = newVal; } @@ -1025,7 +1025,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pMsg->sent = 0; pCtx->retryCnt += 1; if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, EPSET_GET_SIZE(&pCtx->epSet) * 3); + cliCompareAndSwap(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, EPSET_GET_SIZE(&pCtx->epSet) * 3); if (pCtx->retryCnt < pCtx->retryLimit) { transUnrefCliHandle(pConn); EPSET_FORWARD_INUSE(&pCtx->epSet); @@ -1033,7 +1033,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { return -1; } } else { - cliUpdateRetryLimit(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); + cliCompareAndSwap(&pCtx->retryLimit, TRANS_RETRY_COUNT_LIMIT, TRANS_RETRY_COUNT_LIMIT); if (pCtx->retryCnt < pCtx->retryLimit) { addConnToPool(pThrd->pool, pConn); if (pResp->contLen == 0) { diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 215323f69d..7651860224 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -422,6 +422,7 @@ static void uvPrepareSendData(SSvrMsg* smsg, uv_buf_t* wb) { transUnrefSrvHandle(pConn); } else { pHead->msgType = pMsg->msgType; + if (pHead->msgType == 0) pHead->msgType = pConn->inType + 1; } } From 7c57b03de71b99bdfa975d4b12e8c39998c29641 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 25 Jun 2022 12:19:52 +0800 Subject: [PATCH 16/17] handle rpc retry --- source/client/src/clientEnv.c | 2 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 17 +++++--- source/libs/function/src/udfd.c | 40 ++++++++++--------- source/libs/transport/src/transCli.c | 13 ++++-- source/libs/transport/src/transSvr.c | 3 +- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index ff9003b8fc..657c21c4f9 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -86,7 +86,7 @@ void closeTransporter(STscObj *pTscObj) { static bool clientRpcRfp(int32_t code) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || - code == TSDB_CODE_SYN_NOT_LEADER) { + code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { return true; } else { return false; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 63d2a65df1..a4745abd5b 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -70,9 +70,9 @@ int32_t dmProcessNodeMsg(SMgmtWrapper *pWrapper, SRpcMsg *pMsg) { } static void dmProcessRpcMsg(SDnode *pDnode, SRpcMsg *pRpc, SEpSet *pEpSet) { - SDnodeTrans *pTrans = &pDnode->trans; + SDnodeTrans * pTrans = &pDnode->trans; int32_t code = -1; - SRpcMsg *pMsg = NULL; + SRpcMsg * pMsg = NULL; SMgmtWrapper *pWrapper = NULL; SDnodeHandle *pHandle = &pTrans->msgHandles[TMSG_INDEX(pRpc->msgType)]; @@ -194,11 +194,11 @@ int32_t dmInitMsgHandle(SDnode *pDnode) { for (EDndNodeType ntype = DNODE; ntype < NODE_END; ++ntype) { SMgmtWrapper *pWrapper = &pDnode->wrappers[ntype]; - SArray *pArray = (*pWrapper->func.getHandlesFp)(); + SArray * pArray = (*pWrapper->func.getHandlesFp)(); if (pArray == NULL) return -1; for (int32_t i = 0; i < taosArrayGetSize(pArray); ++i) { - SMgmtHandle *pMgmt = taosArrayGet(pArray, i); + SMgmtHandle * pMgmt = taosArrayGet(pArray, i); SDnodeHandle *pHandle = &pTrans->msgHandles[TMSG_INDEX(pMgmt->msgType)]; if (pMgmt->needCheckVgId) { pHandle->needCheckVgId = pMgmt->needCheckVgId; @@ -248,7 +248,14 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { } } -static bool rpcRfp(int32_t code) { return code == TSDB_CODE_RPC_REDIRECT; } +static bool rpcRfp(int32_t code) { + if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || + code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { + return true; + } else { + return false; + } +} int32_t dmInitClient(SDnode *pDnode) { SDnodeTrans *pTrans = &pDnode->trans; diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index 838071dbf1..983cffe9dc 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -12,6 +12,8 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ + +// clang-format off #include "uv.h" #include "os.h" #include "fnLog.h" @@ -25,6 +27,7 @@ #include "tglobal.h" #include "tmsg.h" #include "trpc.h" +// clang-foramt on typedef struct SUdfdContext { uv_loop_t * loop; @@ -103,12 +106,12 @@ typedef struct SUdfdRpcSendRecvInfo { uv_sem_t resultSem; } SUdfdRpcSendRecvInfo; -static void udfdProcessRpcRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet); +static void udfdProcessRpcRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet); static int32_t udfdFillUdfInfoFromMNode(void *clientRpc, char *udfName, SUdf *udf); static int32_t udfdConnectToMnode(); static int32_t udfdLoadUdf(char *udfName, SUdf *udf); -static bool udfdRpcRfp(int32_t code); -static int initEpSetFromCfg(const char *firstEp, const char *secondEp, SCorEpSet *pEpSet); +static bool udfdRpcRfp(int32_t code); +static int initEpSetFromCfg(const char *firstEp, const char *secondEp, SCorEpSet *pEpSet); static int32_t udfdOpenClientRpc(); static int32_t udfdCloseClientRpc(); @@ -126,19 +129,19 @@ static void udfdUvHandleError(SUdfdUvConn *conn) { uv_close((uv_handle_t *)conn- static void udfdPipeRead(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf); static void udfdOnNewConnection(uv_stream_t *server, int status); -static void udfdIntrSignalHandler(uv_signal_t *handle, int signum); +static void udfdIntrSignalHandler(uv_signal_t *handle, int signum); static int32_t removeListeningPipe(); -static void udfdPrintVersion(); +static void udfdPrintVersion(); static int32_t udfdParseArgs(int32_t argc, char *argv[]); static int32_t udfdInitLog(); -static void udfdCtrlAllocBufCb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf); -static void udfdCtrlReadCb(uv_stream_t *q, ssize_t nread, const uv_buf_t *buf); +static void udfdCtrlAllocBufCb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf); +static void udfdCtrlReadCb(uv_stream_t *q, ssize_t nread, const uv_buf_t *buf); static int32_t udfdUvInit(); -static void udfdCloseWalkCb(uv_handle_t *handle, void *arg); +static void udfdCloseWalkCb(uv_handle_t *handle, void *arg); static int32_t udfdRun(); -static void udfdConnectMnodeThreadFunc(void* args); +static void udfdConnectMnodeThreadFunc(void *args); void udfdProcessRequest(uv_work_t *req) { SUvUdfWork *uvUdf = (SUvUdfWork *)(req->data); @@ -401,11 +404,11 @@ void udfdProcessRpcRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) { udf->bufSize = pFuncInfo->bufSize; char path[PATH_MAX] = {0}; - #ifdef WINDOWS +#ifdef WINDOWS snprintf(path, sizeof(path), "%s%s.dll", TD_TMP_DIR_PATH, pFuncInfo->name); - #else +#else snprintf(path, sizeof(path), "%s/lib%s.so", TD_TMP_DIR_PATH, pFuncInfo->name); - #endif +#endif TdFilePtr file = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); if (file == NULL) { @@ -544,7 +547,8 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { return 0; } static bool udfdRpcRfp(int32_t code) { - if (code == TSDB_CODE_RPC_REDIRECT) { + if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || + code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY) { return true; } else { return false; @@ -652,8 +656,7 @@ void udfdAllocBuffer(uv_handle_t *handle, size_t suggestedSize, uv_buf_t *buf) { buf->base = ctx->inputBuf; buf->len = ctx->inputCap; } else { - fnError("udfd can not allocate enough memory") - buf->base = NULL; + fnError("udfd can not allocate enough memory") buf->base = NULL; buf->len = 0; } } else { @@ -664,8 +667,7 @@ void udfdAllocBuffer(uv_handle_t *handle, size_t suggestedSize, uv_buf_t *buf) { buf->base = ctx->inputBuf + ctx->inputLen; buf->len = ctx->inputCap - ctx->inputLen; } else { - fnError("udfd can not allocate enough memory") - buf->base = NULL; + fnError("udfd can not allocate enough memory") buf->base = NULL; buf->len = 0; } } @@ -881,7 +883,7 @@ static int32_t udfdRun() { return 0; } -void udfdConnectMnodeThreadFunc(void* args) { +void udfdConnectMnodeThreadFunc(void *args) { int32_t retryMnodeTimes = 0; int32_t code = 0; while (retryMnodeTimes++ <= TSDB_MAX_REPLICA) { @@ -939,7 +941,7 @@ int main(int argc, char *argv[]) { uv_thread_create(&mnodeConnectThread, udfdConnectMnodeThreadFunc, NULL); udfdRun(); - + removeListeningPipe(); udfdCloseClientRpc(); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 0671aa39d1..a21b753d6e 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -758,13 +758,16 @@ static void cliHandleUpdate(SCliMsg* pMsg, SCliThrd* pThrd) { destroyCmsg(pMsg); } -SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrd* pThrd) { +SCliConn* cliGetConn(SCliMsg* pMsg, SCliThrd* pThrd, bool* ignore) { SCliConn* conn = NULL; int64_t refId = (int64_t)(pMsg->msg.info.handle); if (refId != 0) { SExHandle* exh = transAcquireExHandle(refMgt, refId); if (exh == NULL) { - assert(0); + *ignore = true; + destroyCmsg(pMsg); + return NULL; + // assert(0); } else { conn = exh->handle; transReleaseExHandle(refMgt, refId); @@ -799,7 +802,11 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { cliMayCvtFqdnToIp(&pCtx->epSet, &pThrd->cvtAddr); // transPrintEpSet(&pCtx->epSet); - SCliConn* conn = cliGetConn(pMsg, pThrd); + bool ignore = false; + SCliConn* conn = cliGetConn(pMsg, pThrd, &ignore); + if (ignore == true) { + return; + } if (conn != NULL) { transCtxMerge(&conn->ctx, &pCtx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 7651860224..892d32696e 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -422,7 +422,8 @@ static void uvPrepareSendData(SSvrMsg* smsg, uv_buf_t* wb) { transUnrefSrvHandle(pConn); } else { pHead->msgType = pMsg->msgType; - if (pHead->msgType == 0) pHead->msgType = pConn->inType + 1; + if (pHead->msgType == 0 && transMsgLenFromCont(pMsg->contLen) == sizeof(STransMsgHead)) + pHead->msgType = pConn->inType + 1; } } From 25a12d96b05f6234c069de516b3c632d2a700d68 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 25 Jun 2022 14:06:33 +0800 Subject: [PATCH 17/17] handle rpc retry --- source/client/src/clientEnv.c | 2 +- source/libs/transport/src/trans.c | 1 + source/libs/transport/src/transCli.c | 9 ++++++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 657c21c4f9..d7bf4b60f1 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -136,7 +136,7 @@ void destroyTscObj(void *pObj) { schedulerStopQueryHb(pTscObj->pAppInfo->pTransporter); if (0 == connNum) { // TODO - closeTransporter(pTscObj); + // closeTransporter(pTscObj); } tscDebug("connObj 0x%" PRIx64 " destroyed, totalConn:%" PRId64, *(int64_t *)pTscObj->id, pTscObj->pAppInfo->numOfConns); diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index 1ec96f4a7a..4f7b19b539 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -79,6 +79,7 @@ void* rpcOpen(const SRpcInit* pInit) { return pRpc; } void rpcClose(void* arg) { + tInfo("start to close rpc"); SRpcInfo* pRpc = (SRpcInfo*)arg; (*taosCloseHandle[pRpc->connType])(pRpc->tcphandle); transCloseExHandleMgt(pRpc->refMgt); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index a21b753d6e..7374d1fffc 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -724,14 +724,13 @@ void cliConnCb(uv_connect_t* req, int status) { } static void cliHandleQuit(SCliMsg* pMsg, SCliThrd* pThrd) { + pThrd->quit = true; tDebug("cli work thread %p start to quit", pThrd); destroyCmsg(pMsg); destroyConnPool(pThrd->pool); uv_timer_stop(&pThrd->timer); uv_walk(pThrd->loop, cliWalkCb, NULL); - pThrd->quit = true; - // uv_stop(pThrd->loop); } static void cliHandleRelease(SCliMsg* pMsg, SCliThrd* pThrd) { @@ -977,7 +976,10 @@ void cliWalkCb(uv_handle_t* handle, void* arg) { } int cliRBChoseIdx(STrans* pTransInst) { - int64_t index = pTransInst->index; + int8_t index = pTransInst->index; + if (pTransInst->numOfThreads == 0) { + return -1; + } if (pTransInst->index++ >= pTransInst->numOfThreads) { pTransInst->index = 0; } @@ -1120,6 +1122,7 @@ SCliThrd* transGetWorkThrdFromHandle(int64_t handle) { SCliThrd* transGetWorkThrd(STrans* trans, int64_t handle) { if (handle == 0) { int idx = cliRBChoseIdx(trans); + if (idx < 0) return NULL; return ((SCliObj*)trans->tcphandle)->pThreadObj[idx]; } return transGetWorkThrdFromHandle(handle);