Merge branch '3.0' into fix/dnode

This commit is contained in:
Shengliang Guan 2022-07-15 17:05:11 +08:00
commit 9da55b39dd
17 changed files with 113 additions and 75 deletions

View File

@ -525,6 +525,7 @@ typedef struct {
int8_t superUser;
int8_t connType;
SEpSet epSet;
int32_t svrTimestamp;
char sVer[TSDB_VERSION_LEN];
char sDetailVer[128];
} SConnectRsp;
@ -2233,6 +2234,7 @@ typedef struct {
typedef struct {
int64_t reqId;
int64_t rspId;
int32_t svrTimestamp;
SArray* rsps; // SArray<SClientHbRsp>
} SClientHbBatchRsp;

View File

@ -73,6 +73,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_MSG_DECODE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0031)
#define TSDB_CODE_NO_AVAIL_DISK TAOS_DEF_ERROR_CODE(0, 0x0032)
#define TSDB_CODE_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0033)
#define TSDB_CODE_TIME_UNSYNCED TAOS_DEF_ERROR_CODE(0, 0x0034)
#define TSDB_CODE_REF_NO_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0040)
#define TSDB_CODE_REF_FULL TAOS_DEF_ERROR_CODE(0, 0x0041)

View File

@ -286,7 +286,7 @@ static FORCE_INLINE SReqResultInfo* tscGetCurResInfo(TAOS_RES* res) {
extern SAppInfo appInfo;
extern int32_t clientReqRefPool;
extern int32_t clientConnRefPool;
extern void* tscQhandle;
extern int32_t timestampDeltaLimit;
__async_send_cb_fn_t getMsgRspHandle(int32_t msgType);

View File

@ -35,6 +35,8 @@ SAppInfo appInfo;
int32_t clientReqRefPool = -1;
int32_t clientConnRefPool = -1;
int32_t timestampDeltaLimit = 900; // s
static TdThreadOnce tscinit = PTHREAD_ONCE_INIT;
volatile int32_t tscInitRes = 0;

View File

@ -161,9 +161,9 @@ static int32_t hbQueryHbRspHandle(SAppHbMgr *pAppHbMgr, SClientHbRsp *pRsp) {
SEpSet *pOrig = &pTscObj->pAppInfo->mgmtEp.epSet;
SEp *pOrigEp = &pOrig->eps[pOrig->inUse];
SEp *pNewEp = &pRsp->query->epSet.eps[pRsp->query->epSet.inUse];
tscDebug("mnode epset updated from %d/%d=>%s:%d to %d/%d=>%s:%d in hb",
pOrig->inUse, pOrig->numOfEps, pOrigEp->fqdn, pOrigEp->port,
pRsp->query->epSet.inUse, pRsp->query->epSet.numOfEps, pNewEp->fqdn, pNewEp->port);
tscDebug("mnode epset updated from %d/%d=>%s:%d to %d/%d=>%s:%d in hb", pOrig->inUse, pOrig->numOfEps,
pOrigEp->fqdn, pOrigEp->port, pRsp->query->epSet.inUse, pRsp->query->epSet.numOfEps, pNewEp->fqdn,
pNewEp->port);
updateEpSet_s(&pTscObj->pAppInfo->mgmtEp, &pRsp->query->epSet);
}
@ -270,6 +270,13 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) {
tDeserializeSClientHbBatchRsp(pMsg->pData, pMsg->len, &pRsp);
}
int32_t now = taosGetTimestampSec();
int32_t delta = abs(now - pRsp.svrTimestamp);
if (delta > timestampDeltaLimit) {
code = TSDB_CODE_TIME_UNSYNCED;
tscError("time diff: %ds is too big", delta);
}
int32_t rspNum = taosArrayGetSize(pRsp.rsps);
taosThreadMutexLock(&appInfo.mutex);
@ -286,7 +293,7 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) {
taosMemoryFreeClear(param);
if (code != 0) {
(*pInst)->onlineDnodes = 0;
(*pInst)->onlineDnodes = ((*pInst)->totalDnodes ? 0 : -1);
}
if (rspNum) {
@ -392,7 +399,6 @@ int32_t hbGetQueryBasicInfo(SClientHbKey *connKey, SClientHbReq *req) {
return TSDB_CODE_QRY_OUT_OF_MEMORY;
}
int32_t code = hbBuildQueryDesc(hbBasic, pTscObj);
if (code) {
releaseTscObj(connKey->tscRid);
@ -442,7 +448,6 @@ int32_t hbGetExpiredUserInfo(SClientHbKey *connKey, struct SCatalog *pCatalog, S
return TSDB_CODE_SUCCESS;
}
int32_t hbGetExpiredDBInfo(SClientHbKey *connKey, struct SCatalog *pCatalog, SClientHbReq *req) {
SDbVgVersion *dbs = NULL;
uint32_t dbNum = 0;
@ -534,7 +539,6 @@ int32_t hbGetAppInfo(int64_t clusterId, SClientHbReq *req) {
return TSDB_CODE_SUCCESS;
}
int32_t hbQueryHbReqHandle(SClientHbKey *connKey, void *param, SClientHbReq *req) {
int64_t *clusterId = (int64_t *)param;
struct SCatalog *pCatalog = NULL;
@ -615,9 +619,7 @@ SClientHbBatchReq *hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
return pBatchReq;
}
void hbThreadFuncUnexpectedStopped(void) {
atomic_store_8(&clientHbMgr.threadStop, 2);
}
void hbThreadFuncUnexpectedStopped(void) { atomic_store_8(&clientHbMgr.threadStop, 2); }
void hbMergeSummary(SAppClusterSummary *dst, SAppClusterSummary *src) {
dst->numOfInsertsReq += src->numOfInsertsReq;
@ -662,7 +664,6 @@ int32_t hbGatherAppInfo(void) {
return TSDB_CODE_SUCCESS;
}
static void *hbThreadFunc(void *param) {
setThreadName("hb");
#ifdef WINDOWS
@ -920,4 +921,3 @@ void hbDeregisterConn(SAppHbMgr *pAppHbMgr, SClientHbKey connKey) {
atomic_sub_fetch_32(&pAppHbMgr->connKeyCnt, 1);
}

View File

@ -52,6 +52,18 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) {
SConnectRsp connectRsp = {0};
tDeserializeSConnectRsp(pMsg->pData, pMsg->len, &connectRsp);
int32_t now = taosGetTimestampSec();
int32_t delta = abs(now - connectRsp.svrTimestamp);
if (delta > timestampDeltaLimit) {
code = TSDB_CODE_TIME_UNSYNCED;
tscError("time diff:%ds is too big", delta);
taosMemoryFree(pMsg->pData);
setErrno(pRequest, code);
tsem_post(&pRequest->body.rspSem);
return code;
}
/*assert(connectRsp.epSet.numOfEps > 0);*/
if (connectRsp.epSet.numOfEps == 0) {
taosMemoryFree(pMsg->pData);

View File

@ -453,6 +453,7 @@ int32_t tSerializeSClientHbBatchRsp(void *buf, int32_t bufLen, const SClientHbBa
if (tStartEncode(&encoder) < 0) return -1;
if (tEncodeI64(&encoder, pBatchRsp->reqId) < 0) return -1;
if (tEncodeI64(&encoder, pBatchRsp->rspId) < 0) return -1;
if (tEncodeI32(&encoder, pBatchRsp->svrTimestamp) < 0) return -1;
int32_t rspNum = taosArrayGetSize(pBatchRsp->rsps);
if (tEncodeI32(&encoder, rspNum) < 0) return -1;
@ -474,6 +475,7 @@ int32_t tDeserializeSClientHbBatchRsp(void *buf, int32_t bufLen, SClientHbBatchR
if (tStartDecode(&decoder) < 0) return -1;
if (tDecodeI64(&decoder, &pBatchRsp->reqId) < 0) return -1;
if (tDecodeI64(&decoder, &pBatchRsp->rspId) < 0) return -1;
if (tDecodeI32(&decoder, &pBatchRsp->svrTimestamp) < 0) return -1;
int32_t rspNum = 0;
if (tDecodeI32(&decoder, &rspNum) < 0) return -1;
@ -3613,6 +3615,7 @@ int32_t tSerializeSConnectRsp(void *buf, int32_t bufLen, SConnectRsp *pRsp) {
if (tEncodeI8(&encoder, pRsp->superUser) < 0) return -1;
if (tEncodeI8(&encoder, pRsp->connType) < 0) return -1;
if (tEncodeSEpSet(&encoder, &pRsp->epSet) < 0) return -1;
if (tEncodeI32(&encoder, pRsp->svrTimestamp) < 0) return -1;
if (tEncodeCStr(&encoder, pRsp->sVer) < 0) return -1;
if (tEncodeCStr(&encoder, pRsp->sDetailVer) < 0) return -1;
tEndEncode(&encoder);
@ -3634,6 +3637,7 @@ int32_t tDeserializeSConnectRsp(void *buf, int32_t bufLen, SConnectRsp *pRsp) {
if (tDecodeI8(&decoder, &pRsp->superUser) < 0) return -1;
if (tDecodeI8(&decoder, &pRsp->connType) < 0) return -1;
if (tDecodeSEpSet(&decoder, &pRsp->epSet) < 0) return -1;
if (tDecodeI32(&decoder, &pRsp->svrTimestamp) < 0) return -1;
if (tDecodeCStrTo(&decoder, pRsp->sVer) < 0) return -1;
if (tDecodeCStrTo(&decoder, pRsp->sDetailVer) < 0) return -1;
tEndDecode(&decoder);

View File

@ -148,9 +148,9 @@ static int32_t mmStart(SMnodeMgmt *pMgmt) {
static void mmStop(SMnodeMgmt *pMgmt) {
dDebug("mnode-mgmt start to stop");
mndPreClose(pMgmt->pMnode);
taosThreadRwlockWrlock(&pMgmt->lock);
pMgmt->stopped = 1;
mndPreClose(pMgmt->pMnode);
taosThreadRwlockUnlock(&pMgmt->lock);
mndStop(pMgmt->pMnode);

View File

@ -221,11 +221,11 @@ int32_t dmInitMsgHandle(SDnode *pDnode) {
static inline int32_t dmSendReq(const SEpSet *pEpSet, SRpcMsg *pMsg) {
SDnode *pDnode = dmInstance();
if (pDnode->status != DND_STAT_RUNNING) {
if (pDnode->status != DND_STAT_RUNNING && pMsg->msgType < TDMT_SYNC_MSG) {
rpcFreeCont(pMsg->pCont);
pMsg->pCont = NULL;
terrno = TSDB_CODE_NODE_OFFLINE;
dError("failed to send rpc msg since %s, handle:%p", terrstr(), pMsg->info.handle);
dError("failed to send rpc msg:%s since %s, handle:%p", TMSG_INFO(pMsg->msgType), terrstr(), pMsg->info.handle);
return -1;
} else {
rpcSendRequest(pDnode->trans.clientRpc, pEpSet, pMsg, NULL);

View File

@ -15,10 +15,10 @@
#define _DEFAULT_SOURCE
#include "mndProfile.h"
#include "mndPrivilege.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
#include "mndPrivilege.h"
#include "mndQnode.h"
#include "mndShow.h"
#include "mndStb.h"
@ -274,6 +274,7 @@ static int32_t mndProcessConnectReq(SRpcMsg *pReq) {
connectRsp.connId = pConn->id;
connectRsp.connType = connReq.connType;
connectRsp.dnodeNum = mndGetDnodeSize(pMnode);
connectRsp.svrTimestamp = taosGetTimestampSec();
strcpy(connectRsp.sVer, version);
snprintf(connectRsp.sDetailVer, sizeof(connectRsp.sDetailVer), "ver:%s\nbuild:%s\ngitinfo:%s", version, buildinfo,
@ -623,6 +624,7 @@ static int32_t mndProcessHeartBeatReq(SRpcMsg *pReq) {
}
SClientHbBatchRsp batchRsp = {0};
batchRsp.svrTimestamp = taosGetTimestampSec();
batchRsp.rsps = taosArrayInit(0, sizeof(SClientHbRsp));
int32_t sz = taosArrayGetSize(batchReq.reqs);

View File

@ -382,6 +382,15 @@ void udfdProcessRpcRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) {
if (msgInfo->rpcType == UDFD_RPC_MNODE_CONNECT) {
SConnectRsp connectRsp = {0};
tDeserializeSConnectRsp(pMsg->pCont, pMsg->contLen, &connectRsp);
int32_t now = taosGetTimestampSec();
int32_t delta = abs(now - connectRsp.svrTimestamp);
if (delta > 900) {
msgInfo->code = TSDB_CODE_TIME_UNSYNCED;
goto _return;
}
if (connectRsp.epSet.numOfEps == 0) {
msgInfo->code = TSDB_CODE_MND_APP_ERROR;
goto _return;

View File

@ -516,13 +516,14 @@ static void idxCacheMakeRoomForWrite(IndexCache* cache) {
idxCacheRef(cache);
cache->imm = cache->mem;
cache->mem = idxInternalCacheCreate(cache->type);
cache->mem->pCache = cache;
cache->occupiedMem = 0;
if (quit == false) {
atomic_store_32(&cache->merging, 1);
}
// sched to merge
// unref cache in bgwork
// 1. sched to merge
// 2. unref cache in bgwork
idxCacheSchedToMerge(cache, quit);
}
}

View File

@ -1042,7 +1042,7 @@ static void cliSchedMsgToNextNode(SCliMsg* pMsg, SCliThrd* pThrd) {
STraceId* trace = &pMsg->msg.info.traceId;
char tbuf[256] = {0};
EPSET_DEBUG_STR(&pCtx->epSet, tbuf);
tGTrace("%s retry on next node, use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), tbuf,
tGDebug("%s retry on next node, use %s, retryCnt:%d, limit:%d", transLabel(pThrd->pTransInst), tbuf,
pCtx->retryCnt + 1, pCtx->retryLimit);
STaskArg* arg = taosMemoryMalloc(sizeof(STaskArg));
@ -1134,11 +1134,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) {
if (hasEpSet) {
char tbuf[256] = {0};
EPSET_DEBUG_STR(&pCtx->epSet, tbuf);
tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn);
tGDebug("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn);
}
if (pCtx->pSem != NULL) {
tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn);
tGDebug("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn);
if (pCtx->pRsp == NULL) {
tGTrace("%s conn %p(sync) failed to resp, ignore", CONN_GET_INST_LABEL(pConn), pConn);
} else {
@ -1147,7 +1147,7 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) {
tsem_post(pCtx->pSem);
pCtx->pRsp = NULL;
} else {
tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn);
tGDebug("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn);
if (retry == false && hasEpSet == true) {
pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet);
} else {
@ -1257,7 +1257,7 @@ void transSendRequest(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STra
cliMsg->refId = (int64_t)shandle;
STraceId* trace = &pReq->info.traceId;
tGTrace("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
tGDebug("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle);
ASSERT(transAsyncSend(pThrd->asyncPool, &(cliMsg->q)) == 0);
transReleaseExHandle(transGetInstMgt(), (int64_t)shandle);
@ -1297,7 +1297,7 @@ void transSendRecv(void* shandle, const SEpSet* pEpSet, STransMsg* pReq, STransM
cliMsg->refId = (int64_t)shandle;
STraceId* trace = &pReq->info.traceId;
tGTrace("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
tGDebug("%s send request at thread:%08" PRId64 ", dst:%s:%d, app:%p", transLabel(pTransInst), pThrd->pid,
EPSET_GET_INUSE_IP(&pCtx->epSet), EPSET_GET_INUSE_PORT(&pCtx->epSet), pReq->info.ahandle);
transAsyncSend(pThrd->asyncPool, &(cliMsg->q));

View File

@ -1020,7 +1020,7 @@ void transRefSrvHandle(void* handle) {
return;
}
int ref = T_REF_INC((SSvrConn*)handle);
tDebug("conn %p ref count:%d", handle, ref);
tTrace("conn %p ref count:%d", handle, ref);
}
void transUnrefSrvHandle(void* handle) {
@ -1028,7 +1028,7 @@ void transUnrefSrvHandle(void* handle) {
return;
}
int ref = T_REF_DEC((SSvrConn*)handle);
tDebug("conn %p ref count:%d", handle, ref);
tTrace("conn %p ref count:%d", handle, ref);
if (ref == 0) {
destroyConn((SSvrConn*)handle, true);
}

View File

@ -78,6 +78,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_TIMESTAMP, "Invalid timestamp for
TAOS_DEFINE_ERROR(TSDB_CODE_MSG_DECODE_ERROR, "Msg decode error")
TAOS_DEFINE_ERROR(TSDB_CODE_NO_AVAIL_DISK, "No available disk")
TAOS_DEFINE_ERROR(TSDB_CODE_NOT_FOUND, "Not found")
TAOS_DEFINE_ERROR(TSDB_CODE_TIME_UNSYNCED, "Unsynced time")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_NO_MEMORY, "Ref out of memory")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_FULL, "too many Ref Objs")

View File

@ -62,26 +62,26 @@ system sh/exec.sh -n dnode1 -s start -v
print =============== stepa: query data
sql select * from c1
#sql select * from stb
#sql select * from stb_1
#sql select ts, c1, c2, c3 from c1
#sql select ts, c1, c2, c3 from stb
#sql select ts, c1 from stb_2
#sql select ts, c1, t1 from c1
#sql select ts, c1, t1 from stb
#sql select ts, c1, t1 from stb_2
sql select * from stb
sql select * from stb_1
sql select ts, c1, c2, c3 from c1
sql select ts, c1, c2, c3 from stb
sql select ts, c1 from stb_2
sql select ts, c1, t1 from c1
sql select ts, c1, t1 from stb
sql select ts, c1, t1 from stb_2
print =============== stepb: count
#sql select count(*) from c1;
#sql select count(*) from stb;
#sql select count(ts), count(c1), count(c2), count(c3) from c1
#sql select count(ts), count(c1), count(c2), count(c3) from stb
sql select count(*) from c1;
sql select count(*) from stb;
sql select count(ts), count(c1), count(c2), count(c3) from c1
sql select count(ts), count(c1), count(c2), count(c3) from stb
print =============== stepc: func
#sql select first(ts), first(c1), first(c2), first(c3) from c1
#sql select min(c2), min(c3), min(c4) from c1
#sql select max(c2), max(c3), max(c4) from c1
#sql select sum(c2), sum(c3), sum(c4) from c1
sql select first(ts), first(c1), first(c2), first(c3) from c1
sql select min(c2), min(c3), min(c4) from c1
sql select max(c2), max(c3), max(c4) from c1
sql select sum(c2), sum(c3), sum(c4) from c1
_OVER:
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -1,7 +1,7 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/cfg.sh -n dnode1 -c debugflag -v 131
system sh/exec.sh -n dnode1 -s start -v
system sh/exec.sh -n dnode1 -s start
sql connect
print =============== step1: create drop show dnodes
@ -56,6 +56,10 @@ print =============== step6: alter insert
sql insert into c3 using stb tags(true, -1, -2, -3, -4, -6.0, -7.0, 'child tbl 1', 'child tbl 1', '2022-02-25 18:00:00.000', 10, 20, 30, 40) values(now-1s, true, -1, -2, -3, -4, -6.0, -7.0, 'child tbl 1', 'child tbl 1', '2022-02-25 18:00:00.000', 10, 20, 30, 40)
sql insert into c3 using stb tags(true, -1, -2, -3, -4, -6.0, -7.0, 'child tbl 1', 'child tbl 1', '2022-02-25 18:00:00.000', 10, 20, 30, 40) values(now+0s, true, -1, -2, -3, -4, -6.0, -7.0, 'child tbl 1', 'child tbl 1', '2022-02-25 18:00:00.000', 10, 20, 30, 40)
print =============== restart
system sh/exec.sh -n dnode1 -s stop -x SIGINT
system sh/exec.sh -n dnode1 -s start -v
print =============== stepa: query data
sql select * from c1
sql select * from stb