From 7782318d1fc26f7820520a47968b57de5dac385b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 20 Aug 2020 05:37:07 +0000 Subject: [PATCH 1/3] TD-1184 add error code --- src/client/src/tscServer.c | 1 + src/dnode/src/dnodePeer.c | 2 +- src/dnode/src/dnodeShell.c | 4 ++-- src/inc/taoserror.h | 3 ++- src/mnode/src/mnodeUser.c | 4 ++-- src/rpc/src/rpcMain.c | 2 +- src/vnode/src/vnodeMain.c | 4 ++-- src/vnode/src/vnodeRead.c | 8 ++++---- src/vnode/src/vnodeWrite.c | 8 ++++---- 9 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 0785dd8b53..8f2f60887a 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -275,6 +275,7 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { (rpcMsg->code == TSDB_CODE_TDB_INVALID_TABLE_ID || rpcMsg->code == TSDB_CODE_VND_INVALID_VGROUP_ID || rpcMsg->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || + rpcMsg->code == TSDB_CODE_APP_NOT_READY || rpcMsg->code == TSDB_CODE_TDB_TABLE_RECONFIGURE)) { tscWarn("%p it shall renew table meta, code:%s, retry:%d", pSql, tstrerror(rpcMsg->code), ++pSql->retry); diff --git a/src/dnode/src/dnodePeer.c b/src/dnode/src/dnodePeer.c index b27f56a871..c09d742239 100644 --- a/src/dnode/src/dnodePeer.c +++ b/src/dnode/src/dnodePeer.c @@ -93,7 +93,7 @@ static void dnodeProcessReqMsgFromDnode(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { if (pMsg->pCont == NULL) return; if (dnodeGetRunStatus() != TSDB_DNODE_RUN_STATUS_RUNING) { - rspMsg.code = TSDB_CODE_RPC_NOT_READY; + rspMsg.code = TSDB_CODE_APP_NOT_READY; rpcSendResponse(&rspMsg); rpcFreeCont(pMsg->pCont); dDebug("RPC %p, msg:%s is ignored since dnode not running", pMsg->handle, taosMsg[pMsg->msgType]); diff --git a/src/dnode/src/dnodeShell.c b/src/dnode/src/dnodeShell.c index f9d137bb99..5daf616706 100644 --- a/src/dnode/src/dnodeShell.c +++ b/src/dnode/src/dnodeShell.c @@ -119,7 +119,7 @@ void dnodeProcessMsgFromShell(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { if (dnodeGetRunStatus() != TSDB_DNODE_RUN_STATUS_RUNING) { dError("RPC %p, shell msg:%s is ignored since dnode not running", pMsg->handle, taosMsg[pMsg->msgType]); - rpcMsg.code = TSDB_CODE_RPC_NOT_READY; + rpcMsg.code = TSDB_CODE_APP_NOT_READY; rpcSendResponse(&rpcMsg); rpcFreeCont(pMsg->pCont); return; @@ -144,7 +144,7 @@ void dnodeProcessMsgFromShell(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { static int dnodeRetrieveUserAuthInfo(char *user, char *spi, char *encrypt, char *secret, char *ckey) { int code = mnodeRetriveAuth(user, spi, encrypt, secret, ckey); - if (code != TSDB_CODE_RPC_NOT_READY) return code; + if (code != TSDB_CODE_APP_NOT_READY) return code; SDMAuthMsg *pMsg = rpcMallocCont(sizeof(SDMAuthMsg)); tstrncpy(pMsg->user, user, sizeof(pMsg->user)); diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 9af4cee28a..fe440c05e5 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -65,6 +65,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_SESSION_ID, 0, 0x0010, "Invalid se TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_MSG_TYPE, 0, 0x0011, "Invalid message type") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_RESPONSE_TYPE, 0, 0x0012, "Invalid response type") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_TIME_STAMP, 0, 0x0013, "Invalid timestamp") +TAOS_DEFINE_ERROR(TSDB_CODE_APP_NOT_READY, 0, 0x0014, "Database not ready") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_COM_OPS_NOT_SUPPORT, 0, 0x0100, "Operation not supported") @@ -184,7 +185,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_DISK_PERMISSIONS, 0, 0x0506, "No write p TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_SUCH_FILE_OR_DIR, 0, 0x0507, "Missing data file") TAOS_DEFINE_ERROR(TSDB_CODE_VND_OUT_OF_MEMORY, 0, 0x0508, "Out of memory") TAOS_DEFINE_ERROR(TSDB_CODE_VND_APP_ERROR, 0, 0x0509, "Unexpected generic error in vnode") -TAOS_DEFINE_ERROR(TSDB_CODE_VND_INVALID_STATUS, 0, 0x0510, "Database not ready") +TAOS_DEFINE_ERROR(TSDB_CODE_VND_INVALID_STATUS, 0, 0x0510, "Invalid Vgroup status") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NOT_SYNCED, 0, 0x0511, "Database suspended") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_WRITE_AUTH, 0, 0x0512, "Write operation denied") diff --git a/src/mnode/src/mnodeUser.c b/src/mnode/src/mnodeUser.c index 6e63a8f2de..f4cb1a9ef3 100644 --- a/src/mnode/src/mnodeUser.c +++ b/src/mnode/src/mnodeUser.c @@ -581,8 +581,8 @@ void mnodeDropAllUsers(SAcctObj *pAcct) { int32_t mnodeRetriveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey) { if (!sdbIsMaster()) { *secret = 0; - mDebug("user:%s, failed to auth user, reason:%s", user, tstrerror(TSDB_CODE_RPC_NOT_READY)); - return TSDB_CODE_RPC_NOT_READY; + mDebug("user:%s, failed to auth user, reason:%s", user, tstrerror(TSDB_CODE_APP_NOT_READY)); + return TSDB_CODE_APP_NOT_READY; } SUserObj *pUser = mnodeGetUser(user); diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index d0c57a34d0..777e4f8240 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1120,7 +1120,7 @@ static void rpcProcessIncomingMsg(SRpcConn *pConn, SRpcHead *pHead) { pContext->epSet.port[i] = htons(pContext->epSet.port[i]); rpcSendReqToServer(pRpc, pContext); rpcFreeCont(rpcMsg.pCont); - } else if (pHead->code == TSDB_CODE_RPC_NOT_READY) { + } else if (pHead->code == TSDB_CODE_RPC_NOT_READY || pHead->code == TSDB_CODE_APP_NOT_READY) { pContext->code = pHead->code; rpcProcessConnError(pContext, NULL); rpcFreeCont(rpcMsg.pCont); diff --git a/src/vnode/src/vnodeMain.c b/src/vnode/src/vnodeMain.c index 0a5e292f6d..7b5a1713af 100644 --- a/src/vnode/src/vnodeMain.c +++ b/src/vnode/src/vnodeMain.c @@ -390,7 +390,7 @@ void *vnodeAcquireRqueue(int32_t vgId) { if (pVnode == NULL) return NULL; if (pVnode->status == TAOS_VN_STATUS_RESET) { - terrno = TSDB_CODE_VND_INVALID_STATUS; + terrno = TSDB_CODE_APP_NOT_READY; vInfo("vgId:%d, status is in reset", vgId); vnodeRelease(pVnode); return NULL; @@ -404,7 +404,7 @@ void *vnodeAcquireWqueue(int32_t vgId) { if (pVnode == NULL) return NULL; if (pVnode->status == TAOS_VN_STATUS_RESET) { - terrno = TSDB_CODE_VND_INVALID_STATUS; + terrno = TSDB_CODE_APP_NOT_READY; vInfo("vgId:%d, status is in reset", vgId); vnodeRelease(pVnode); return NULL; diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index c41b245794..11315f5321 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -49,18 +49,18 @@ int32_t vnodeProcessRead(void *param, SReadMsg *pReadMsg) { if (pVnode->status != TAOS_VN_STATUS_READY) { vDebug("vgId:%d, msgType:%s not processed, vnode status is %d", pVnode->vgId, taosMsg[msgType], pVnode->status); - return TSDB_CODE_VND_INVALID_STATUS; + return TSDB_CODE_APP_NOT_READY; } // tsdb may be in reset state - if (pVnode->tsdb == NULL) return TSDB_CODE_RPC_NOT_READY; + if (pVnode->tsdb == NULL) return TSDB_CODE_APP_NOT_READY; if (pVnode->status == TAOS_VN_STATUS_CLOSING) - return TSDB_CODE_RPC_NOT_READY; + return TSDB_CODE_APP_NOT_READY; // TODO: Later, let slave to support query if (pVnode->syncCfg.replica > 1 && pVnode->role != TAOS_SYNC_ROLE_MASTER) { vDebug("vgId:%d, msgType:%s not processed, replica:%d role:%d", pVnode->vgId, taosMsg[msgType], pVnode->syncCfg.replica, pVnode->role); - return TSDB_CODE_RPC_NOT_READY; + return TSDB_CODE_APP_NOT_READY; } return (*vnodeProcessReadMsgFp[msgType])(pVnode, pReadMsg); diff --git a/src/vnode/src/vnodeWrite.c b/src/vnode/src/vnodeWrite.c index 900ff1fbba..1a9b05ed34 100644 --- a/src/vnode/src/vnodeWrite.c +++ b/src/vnode/src/vnodeWrite.c @@ -60,19 +60,19 @@ int32_t vnodeProcessWrite(void *param1, int qtype, void *param2, void *item) { } // tsdb may be in reset state - if (pVnode->tsdb == NULL) return TSDB_CODE_RPC_NOT_READY; + if (pVnode->tsdb == NULL) return TSDB_CODE_APP_NOT_READY; if (pVnode->status == TAOS_VN_STATUS_CLOSING) - return TSDB_CODE_RPC_NOT_READY; + return TSDB_CODE_APP_NOT_READY; if (pHead->version == 0) { // from client or CQ if (pVnode->status != TAOS_VN_STATUS_READY) { vDebug("vgId:%d, msgType:%s not processed, vnode status is %d", pVnode->vgId, taosMsg[pHead->msgType], pVnode->status); - return TSDB_CODE_VND_INVALID_STATUS; // it may be in deleting or closing state + return TSDB_CODE_APP_NOT_READY; // it may be in deleting or closing state } if (pVnode->role != TAOS_SYNC_ROLE_MASTER) { vDebug("vgId:%d, msgType:%s not processed, replica:%d role:%d", pVnode->vgId, taosMsg[pHead->msgType], pVnode->syncCfg.replica, pVnode->role); - return TSDB_CODE_RPC_NOT_READY; + return TSDB_CODE_APP_NOT_READY; } // assign version From 8960081486aa386d9e27d15d9781101c4005ed30 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 20 Aug 2020 05:52:06 +0000 Subject: [PATCH 2/3] TD-1184 send status msg to mnode on vnode changed --- src/dnode/src/dnodeMgmt.c | 7 +++++++ src/inc/dnode.h | 2 ++ src/inc/taoserror.h | 1 - src/vnode/src/vnodeMain.c | 3 +++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/dnode/src/dnodeMgmt.c b/src/dnode/src/dnodeMgmt.c index 4b06626753..c1071f9698 100644 --- a/src/dnode/src/dnodeMgmt.c +++ b/src/dnode/src/dnodeMgmt.c @@ -165,6 +165,13 @@ int32_t dnodeInitMgmtTimer() { return TSDB_CODE_SUCCESS; } +void dnodeSendStatusMsgToMnode() { + if (tsDnodeTmr != NULL && tsStatusTimer != NULL) { + dInfo("force send status msg to mnode"); + taosTmrReset(dnodeSendStatusMsg, 3, NULL, tsDnodeTmr, &tsStatusTimer); + } +} + void dnodeCleanupMgmtTimer() { if (tsStatusTimer != NULL) { taosTmrStopA(&tsStatusTimer); diff --git a/src/inc/dnode.h b/src/inc/dnode.h index 5a059c93a6..093ce93205 100644 --- a/src/inc/dnode.h +++ b/src/inc/dnode.h @@ -65,6 +65,8 @@ void dnodeSendRpcMnodeWriteRsp(void *pMsg, int32_t code); void dnodeReprocessMnodeWriteMsg(void *pMsg); void dnodeDelayReprocessMnodeWriteMsg(void *pMsg); +void dnodeSendStatusMsgToMnode(); + #ifdef __cplusplus } #endif diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index fe440c05e5..57c2b322fa 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -185,7 +185,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_DISK_PERMISSIONS, 0, 0x0506, "No write p TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_SUCH_FILE_OR_DIR, 0, 0x0507, "Missing data file") TAOS_DEFINE_ERROR(TSDB_CODE_VND_OUT_OF_MEMORY, 0, 0x0508, "Out of memory") TAOS_DEFINE_ERROR(TSDB_CODE_VND_APP_ERROR, 0, 0x0509, "Unexpected generic error in vnode") -TAOS_DEFINE_ERROR(TSDB_CODE_VND_INVALID_STATUS, 0, 0x0510, "Invalid Vgroup status") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NOT_SYNCED, 0, 0x0511, "Database suspended") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_WRITE_AUTH, 0, 0x0512, "Write operation denied") diff --git a/src/vnode/src/vnodeMain.c b/src/vnode/src/vnodeMain.c index 7b5a1713af..e5536bfaaf 100644 --- a/src/vnode/src/vnodeMain.c +++ b/src/vnode/src/vnodeMain.c @@ -30,6 +30,7 @@ #include "vnode.h" #include "vnodeInt.h" #include "query.h" +#include "dnode.h" #define TSDB_VNODE_VERSION_CONTENT_LEN 31 @@ -361,6 +362,7 @@ void vnodeRelease(void *pVnodeRaw) { sprintf(rootDir, "%s/vnode%d", tsVnodeDir, vgId); taosMvDir(tsVnodeBakDir, rootDir); taosRemoveDir(rootDir); + dnodeSendStatusMsgToMnode(); } tsem_destroy(&pVnode->sem); @@ -547,6 +549,7 @@ static void vnodeNotifyRole(void *ahandle, int8_t role) { SVnodeObj *pVnode = ahandle; vInfo("vgId:%d, sync role changed from %d to %d", pVnode->vgId, pVnode->role, role); pVnode->role = role; + dnodeSendStatusMsgToMnode(); if (pVnode->role == TAOS_SYNC_ROLE_MASTER) cqStart(pVnode->cq); From 760a8cd5be36b669d3704c184f258e8aeb240a53 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 20 Aug 2020 15:15:25 +0800 Subject: [PATCH 3/3] [td-255] sleep before retry --- src/client/src/tscServer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 8f2f60887a..b5fa1ea159 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -43,6 +43,14 @@ void tscUpdateSubscriptionProgress(void* sub, int64_t uid, TSKEY ts); void tscSaveSubscriptionProgress(void* sub); static int32_t minMsgSize() { return tsRpcHeadSize + 100; } +static int32_t getWaitingTimeInterval(int32_t count) { + int32_t initial = 100; // 100 ms by default + if (count <= 1) { + return 0; + } + + return initial * (2<<(count - 2)); +} static void tscSetDnodeEpSet(SSqlObj* pSql, SCMVgroupInfo* pVgroupInfo) { assert(pSql != NULL && pVgroupInfo != NULL && pVgroupInfo->numOfEps > 0); @@ -288,6 +296,12 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { if (pSql->retry > pSql->maxRetry) { tscError("%p max retry %d reached, give up", pSql, pSql->maxRetry); } else { + // wait for a little bit moment and then retry + if (rpcMsg->code == TSDB_CODE_APP_NOT_READY || rpcMsg->code == TSDB_CODE_VND_INVALID_VGROUP_ID) { + int32_t duration = getWaitingTimeInterval(pSql->retry); + taosMsleep(duration); + } + rpcMsg->code = tscRenewTableMeta(pSql, pTableMetaInfo->name); // if there is an error occurring, proceed to the following error handling procedure.