feat: add time unsynced check

This commit is contained in:
yihaoDeng 2022-07-15 14:51:20 +08:00
parent d7a463da46
commit 35e8ad2811
14 changed files with 89 additions and 55 deletions

View File

@ -525,6 +525,7 @@ typedef struct {
int8_t superUser;
int8_t connType;
SEpSet epSet;
int32_t svrTimestamp;
char sVer[TSDB_VERSION_LEN];
char sDetailVer[128];
} SConnectRsp;
@ -2224,6 +2225,7 @@ typedef struct {
typedef struct {
int64_t reqId;
int64_t rspId;
int32_t svrTimestamp;
SArray* rsps; // SArray<SClientHbRsp>
} SClientHbBatchRsp;

View File

@ -73,6 +73,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_MSG_DECODE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0031)
#define TSDB_CODE_NO_AVAIL_DISK TAOS_DEF_ERROR_CODE(0, 0x0032)
#define TSDB_CODE_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0033)
#define TSDB_CODE_TIME_UNSYNCED TAOS_DEF_ERROR_CODE(0, 0x0034)
#define TSDB_CODE_REF_NO_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0040)
#define TSDB_CODE_REF_FULL TAOS_DEF_ERROR_CODE(0, 0x0041)

View File

@ -285,7 +285,7 @@ static FORCE_INLINE SReqResultInfo* tscGetCurResInfo(TAOS_RES* res) {
extern SAppInfo appInfo;
extern int32_t clientReqRefPool;
extern int32_t clientConnRefPool;
extern void* tscQhandle;
extern int32_t timestampDeltaLimit;
__async_send_cb_fn_t getMsgRspHandle(int32_t msgType);

View File

@ -35,6 +35,8 @@ SAppInfo appInfo;
int32_t clientReqRefPool = -1;
int32_t clientConnRefPool = -1;
int32_t timestampDeltaLimit = 900; // s
static TdThreadOnce tscinit = PTHREAD_ONCE_INIT;
volatile int32_t tscInitRes = 0;

View File

@ -161,9 +161,9 @@ static int32_t hbQueryHbRspHandle(SAppHbMgr *pAppHbMgr, SClientHbRsp *pRsp) {
SEpSet *pOrig = &pTscObj->pAppInfo->mgmtEp.epSet;
SEp *pOrigEp = &pOrig->eps[pOrig->inUse];
SEp *pNewEp = &pRsp->query->epSet.eps[pRsp->query->epSet.inUse];
tscDebug("mnode epset updated from %d/%d=>%s:%d to %d/%d=>%s:%d in hb",
pOrig->inUse, pOrig->numOfEps, pOrigEp->fqdn, pOrigEp->port,
pRsp->query->epSet.inUse, pRsp->query->epSet.numOfEps, pNewEp->fqdn, pNewEp->port);
tscDebug("mnode epset updated from %d/%d=>%s:%d to %d/%d=>%s:%d in hb", pOrig->inUse, pOrig->numOfEps,
pOrigEp->fqdn, pOrigEp->port, pRsp->query->epSet.inUse, pRsp->query->epSet.numOfEps, pNewEp->fqdn,
pNewEp->port);
updateEpSet_s(&pTscObj->pAppInfo->mgmtEp, &pRsp->query->epSet);
}
@ -270,6 +270,13 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) {
tDeserializeSClientHbBatchRsp(pMsg->pData, pMsg->len, &pRsp);
}
int32_t now = taosGetTimestampSec();
int32_t delta = abs(now - pRsp.svrTimestamp);
if (delta > timestampDeltaLimit) {
code = TSDB_CODE_TIME_UNSYNCED;
tscError("time diff: %ds is too big", delta);
}
int32_t rspNum = taosArrayGetSize(pRsp.rsps);
taosThreadMutexLock(&appInfo.mutex);
@ -392,7 +399,6 @@ int32_t hbGetQueryBasicInfo(SClientHbKey *connKey, SClientHbReq *req) {
return TSDB_CODE_QRY_OUT_OF_MEMORY;
}
int32_t code = hbBuildQueryDesc(hbBasic, pTscObj);
if (code) {
releaseTscObj(connKey->tscRid);
@ -442,7 +448,6 @@ int32_t hbGetExpiredUserInfo(SClientHbKey *connKey, struct SCatalog *pCatalog, S
return TSDB_CODE_SUCCESS;
}
int32_t hbGetExpiredDBInfo(SClientHbKey *connKey, struct SCatalog *pCatalog, SClientHbReq *req) {
SDbVgVersion *dbs = NULL;
uint32_t dbNum = 0;
@ -534,7 +539,6 @@ int32_t hbGetAppInfo(int64_t clusterId, SClientHbReq *req) {
return TSDB_CODE_SUCCESS;
}
int32_t hbQueryHbReqHandle(SClientHbKey *connKey, void *param, SClientHbReq *req) {
int64_t *clusterId = (int64_t *)param;
struct SCatalog *pCatalog = NULL;
@ -615,9 +619,7 @@ SClientHbBatchReq *hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
return pBatchReq;
}
void hbThreadFuncUnexpectedStopped(void) {
atomic_store_8(&clientHbMgr.threadStop, 2);
}
void hbThreadFuncUnexpectedStopped(void) { atomic_store_8(&clientHbMgr.threadStop, 2); }
void hbMergeSummary(SAppClusterSummary *dst, SAppClusterSummary *src) {
dst->numOfInsertsReq += src->numOfInsertsReq;
@ -662,7 +664,6 @@ int32_t hbGatherAppInfo(void) {
return TSDB_CODE_SUCCESS;
}
static void *hbThreadFunc(void *param) {
setThreadName("hb");
#ifdef WINDOWS
@ -920,4 +921,3 @@ void hbDeregisterConn(SAppHbMgr *pAppHbMgr, SClientHbKey connKey) {
atomic_sub_fetch_32(&pAppHbMgr->connKeyCnt, 1);
}

View File

@ -52,6 +52,18 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) {
SConnectRsp connectRsp = {0};
tDeserializeSConnectRsp(pMsg->pData, pMsg->len, &connectRsp);
int32_t now = taosGetTimestampSec();
int32_t delta = abs(now - connectRsp.svrTimestamp);
if (delta > timestampDeltaLimit) {
code = TSDB_CODE_TIME_UNSYNCED;
tscError("time diff: %" PRId64 "ms is too big", delta);
taosMemoryFree(pMsg->pData);
setErrno(pRequest, code);
tsem_post(&pRequest->body.rspSem);
return code;
}
/*assert(connectRsp.epSet.numOfEps > 0);*/
if (connectRsp.epSet.numOfEps == 0) {
taosMemoryFree(pMsg->pData);

View File

@ -453,6 +453,7 @@ int32_t tSerializeSClientHbBatchRsp(void *buf, int32_t bufLen, const SClientHbBa
if (tStartEncode(&encoder) < 0) return -1;
if (tEncodeI64(&encoder, pBatchRsp->reqId) < 0) return -1;
if (tEncodeI64(&encoder, pBatchRsp->rspId) < 0) return -1;
if (tEncodeI32(&encoder, pBatchRsp->svrTimestamp) < 0) return -1;
int32_t rspNum = taosArrayGetSize(pBatchRsp->rsps);
if (tEncodeI32(&encoder, rspNum) < 0) return -1;
@ -474,6 +475,7 @@ int32_t tDeserializeSClientHbBatchRsp(void *buf, int32_t bufLen, SClientHbBatchR
if (tStartDecode(&decoder) < 0) return -1;
if (tDecodeI64(&decoder, &pBatchRsp->reqId) < 0) return -1;
if (tDecodeI64(&decoder, &pBatchRsp->rspId) < 0) return -1;
if (tDecodeI32(&decoder, &pBatchRsp->svrTimestamp) < 0) return -1;
int32_t rspNum = 0;
if (tDecodeI32(&decoder, &rspNum) < 0) return -1;
@ -3613,6 +3615,7 @@ int32_t tSerializeSConnectRsp(void *buf, int32_t bufLen, SConnectRsp *pRsp) {
if (tEncodeI8(&encoder, pRsp->superUser) < 0) return -1;
if (tEncodeI8(&encoder, pRsp->connType) < 0) return -1;
if (tEncodeSEpSet(&encoder, &pRsp->epSet) < 0) return -1;
if (tEncodeI32(&encoder, pRsp->svrTimestamp) < 0) return -1;
if (tEncodeCStr(&encoder, pRsp->sVer) < 0) return -1;
if (tEncodeCStr(&encoder, pRsp->sDetailVer) < 0) return -1;
tEndEncode(&encoder);
@ -3634,6 +3637,7 @@ int32_t tDeserializeSConnectRsp(void *buf, int32_t bufLen, SConnectRsp *pRsp) {
if (tDecodeI8(&decoder, &pRsp->superUser) < 0) return -1;
if (tDecodeI8(&decoder, &pRsp->connType) < 0) return -1;
if (tDecodeSEpSet(&decoder, &pRsp->epSet) < 0) return -1;
if (tDecodeI32(&decoder, &pRsp->svrTimestamp) < 0) return -1;
if (tDecodeCStrTo(&decoder, pRsp->sVer) < 0) return -1;
if (tDecodeCStrTo(&decoder, pRsp->sDetailVer) < 0) return -1;
tEndDecode(&decoder);

View File

@ -15,10 +15,10 @@
#define _DEFAULT_SOURCE
#include "mndProfile.h"
#include "mndPrivilege.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
#include "mndPrivilege.h"
#include "mndQnode.h"
#include "mndShow.h"
#include "mndStb.h"
@ -274,6 +274,7 @@ static int32_t mndProcessConnectReq(SRpcMsg *pReq) {
connectRsp.connId = pConn->id;
connectRsp.connType = connReq.connType;
connectRsp.dnodeNum = mndGetDnodeSize(pMnode);
connectRsp.svrTimestamp = taosGetTimestampSec();
strcpy(connectRsp.sVer, version);
snprintf(connectRsp.sDetailVer, sizeof(connectRsp.sDetailVer), "ver:%s\nbuild:%s\ngitinfo:%s", version, buildinfo,
@ -623,6 +624,7 @@ static int32_t mndProcessHeartBeatReq(SRpcMsg *pReq) {
}
SClientHbBatchRsp batchRsp = {0};
batchRsp.svrTimestamp = taosGetTimestampMs();
batchRsp.rsps = taosArrayInit(0, sizeof(SClientHbRsp));
int32_t sz = taosArrayGetSize(batchReq.reqs);

View File

@ -382,6 +382,15 @@ void udfdProcessRpcRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) {
if (msgInfo->rpcType == UDFD_RPC_MNODE_CONNECT) {
SConnectRsp connectRsp = {0};
tDeserializeSConnectRsp(pMsg->pCont, pMsg->contLen, &connectRsp);
int32_t now = taosGetTimestampSec();
int32_t delta = abs(now - connectRsp.svrTimestamp);
if (delta > 900) {
msgInfo->code = TSDB_CODE_TIME_UNSYNCED;
goto _return;
}
if (connectRsp.epSet.numOfEps == 0) {
msgInfo->code = TSDB_CODE_MND_APP_ERROR;
goto _return;

View File

@ -516,13 +516,14 @@ static void idxCacheMakeRoomForWrite(IndexCache* cache) {
idxCacheRef(cache);
cache->imm = cache->mem;
cache->mem = idxInternalCacheCreate(cache->type);
cache->mem->pCache = cache;
cache->occupiedMem = 0;
if (quit == false) {
atomic_store_32(&cache->merging, 1);
}
// sched to merge
// unref cache in bgwork
// 1. sched to merge
// 2. unref cache in bgwork
idxCacheSchedToMerge(cache, quit);
}
}

View File

@ -1041,7 +1041,7 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) {
STraceId* trace = &pMsg->msg.info.traceId;
char tbuf[256] = {0};
EPSET_DEBUG_STR(&pCtx->epSet, tbuf);
tGTrace("%s retry on next node, use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), tbuf,
tGDebug("%s retry on next node, use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), tbuf,
pCtx->retryCnt + 1, pCtx->retryLimit);
STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg));
@ -1133,11 +1133,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) {
if (hasEpSet) {
char tbuf[256] = {0};
EPSET_DEBUG_STR(&pCtx->epSet, tbuf);
tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn);
tGDebug("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn);
}
if (pCtx->pSem != NULL) {
tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn);
tGDebug("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn);
if (pCtx->pRsp == NULL) {
tGTrace("%s conn %p(sync) failed to resp, ignore", CONN_GET_INST_LABEL(pConn), pConn);
} else {
@ -1146,7 +1146,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) {
tsem_post(pCtx->pSem);
pCtx->pRsp = NULL;
} else {
tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn);
tGDebug("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn);
if (retry == false && hasEpSet == true) {
pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet);
} else {
@ -1256,7 +1256,7 @@ void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STra
cliMsg->refId = (int64_t)shandle;
STraceId* trace = &pReq->info.traceId;
tGTrace("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
tGDebug("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle);
ASSERT(transAsyncSend(pThrd->asyncPool, &(cliMsg->q)) == 0);
transReleaseExHandle(transGetInstMgt(), (int64_t)shandle);
@ -1296,7 +1296,7 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM
cliMsg->refId = (int64_t)shandle;
STraceId* trace = &pReq->info.traceId;
tGTrace("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
tGDebug("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle);
transAsyncSend(pThrd->asyncPool, &(cliMsg->q));

View File

@ -1020,7 +1020,7 @@ void transRefSrvHandle(void* handle) {
return;
}
int ref = T_REF_INC((SSvrConn*)handle);
tDebug("conn %p ref count:%d", handle, ref);
tTrace("conn %p ref count:%d", handle, ref);
}
void transUnrefSrvHandle(void* handle) {
@ -1028,7 +1028,7 @@ void transUnrefSrvHandle(void* handle) {
return;
}
int ref = T_REF_DEC((SSvrConn*)handle);
tDebug("conn %p ref count:%d", handle, ref);
tTrace("conn %p ref count:%d", handle, ref);
if (ref == 0) {
destroyConn((SSvrConn*)handle, true);
}

View File

@ -78,6 +78,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_TIMESTAMP, "Invalid timestamp for
TAOS_DEFINE_ERROR(TSDB_CODE_MSG_DECODE_ERROR, "Msg decode error")
TAOS_DEFINE_ERROR(TSDB_CODE_NO_AVAIL_DISK, "No available disk")
TAOS_DEFINE_ERROR(TSDB_CODE_NOT_FOUND, "Not found")
TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Unsynced time")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_NO_MEMORY, "Ref out of memory")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_FULL, "too many Ref Objs")