From 08e6d9a264ea020c66b2db645c7e78e2ed1ab63c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 23 Dec 2022 09:22:14 +0800 Subject: [PATCH 01/89] enh: write coredump bt to log file --- include/util/tlog.h | 2 ++ source/dnode/mgmt/exe/dmMain.c | 6 +++++- source/dnode/vnode/src/vnd/vnodeSvr.c | 3 +++ source/util/src/tlog.c | 30 ++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index e6ef7f388f..e256d2a6cc 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -87,6 +87,8 @@ bool taosAssert(bool condition, const char *file, int32_t line, const char *form #define ASSERTS(condition, ...) taosAssert(condition, __FILE__, __LINE__, __VA_ARGS__) #define ASSERT(condition) ASSERTS(condition, "assert info not provided") +void taosCrash(int signum, void *sigInfo, void *context); + // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index a8103351b4..00db22771a 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -73,12 +73,16 @@ static void dmSetSignalHandle() { taosSetSignal(SIGTERM, dmStopDnode); taosSetSignal(SIGHUP, dmStopDnode); taosSetSignal(SIGINT, dmStopDnode); - taosSetSignal(SIGABRT, dmStopDnode); taosSetSignal(SIGBREAK, dmStopDnode); #ifndef WINDOWS taosSetSignal(SIGTSTP, dmStopDnode); taosSetSignal(SIGQUIT, dmStopDnode); #endif + + taosSetSignal(SIGBUS, taosCrash); + taosSetSignal(SIGABRT, taosCrash); + taosSetSignal(SIGFPE, taosCrash); + taosSetSignal(SIGSEGV, taosCrash); } static int32_t dmParseArgs(int32_t argc, char const *argv[]) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 0668a01e32..2431d134ae 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -880,6 +880,9 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq bool tbCreated = false; terrno = TSDB_CODE_SUCCESS; + int32_t tta = 0; + int32_t ttt = 1/tta; + pRsp->code = 0; pSubmitReq->version = version; statis.nBatchInsert = 1; diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index f6f814d82b..f01d3042f7 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -822,4 +822,32 @@ bool taosAssert(bool condition, const char *file, int32_t line, const char *form } return true; -} \ No newline at end of file +} + +void taosCrash(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + + taosIgnSignal(SIGBUS); + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + + taosPrintLog(flags, level, dflag, "crash signal is %d", signum); + +#ifndef WINDOWS + taosPrintLog(flags, level, dflag, "sender PID:%d cmdline:%s", ((siginfo_t *)sigInfo)->si_pid, + taosGetCmdlineByPID(((siginfo_t *)sigInfo)->si_pid)); +#endif + + + taosPrintTrace(flags, level, dflag); + +} + From 5159d60f56407a59dec8742c355ec54aedfe1baa Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 23 Dec 2022 20:16:23 +0800 Subject: [PATCH 02/89] enh: vnodeCommit on consensus only --- include/libs/sync/sync.h | 5 +- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 2 + source/dnode/vnode/inc/vnode.h | 1 + source/dnode/vnode/src/inc/vnd.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/meta/metaOpen.c | 1 + source/dnode/vnode/src/vnd/vnodeCommit.c | 17 ++++- source/dnode/vnode/src/vnd/vnodeOpen.c | 5 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 +-- source/dnode/vnode/src/vnd/vnodeSync.c | 90 ++++++++++++++++-------- source/libs/sync/src/syncPipeline.c | 7 ++ source/libs/sync/src/syncRaftLog.c | 4 ++ source/libs/sync/src/syncRespMgr.c | 2 +- source/libs/tdb/inc/tdb.h | 7 +- source/libs/tdb/src/db/tdbPage.c | 2 +- source/libs/tdb/src/db/tdbTxn.c | 7 +- source/libs/transport/src/transCli.c | 2 +- 17 files changed, 122 insertions(+), 40 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index a13d203889..27aadee96a 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -49,10 +49,13 @@ extern "C" { #define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500 #define SYNC_SNAP_RESEND_MS 1000 * 60 +#define SYNC_VND_COMMIT_MIN_MS 200 +#define SYNC_VND_COMMIT_MAX_MS 60000 + #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 #define SYNC_INDEX_INVALID -1 -#define SYNC_TERM_INVALID -1 // 0xFFFFFFFFFFFFFFFF +#define SYNC_TERM_INVALID -1 typedef enum { SYNC_STRATEGY_NO_SNAPSHOT = 0, diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 313a88fc5c..4469e0afe6 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -79,6 +79,8 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { char path[TSDB_FILENAME_LEN] = {0}; + vnodeProposeCommitOnNeed(pVnode->pImpl); + taosThreadRwlockWrlock(&pMgmt->lock); taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t)); taosThreadRwlockUnlock(&pMgmt->lock); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 540f0c3127..7a3e3cb4a5 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -88,6 +88,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); +void vnodeProposeCommitOnNeed(SVnode *pVnode); // meta typedef struct SMeta SMeta; // todo: remove diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 28797c5361..d8c4b001b1 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -102,6 +102,7 @@ void vnodeSyncClose(SVnode* pVnode); void vnodeRedirectRpcMsg(SVnode* pVnode, SRpcMsg* pMsg, int32_t code); bool vnodeIsLeader(SVnode* pVnode); bool vnodeIsRoleLeader(SVnode* pVnode); +int vnodeShouldCommit(SVnode* pVnode); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index e56f130c2c..b9080fd6c6 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -348,6 +348,7 @@ struct SVnode { STQ* pTq; SSink* pSink; tsem_t canCommit; + int64_t commitMs; int64_t sync; TdThreadMutex lock; bool blocked; diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 1b5f742559..8974d93678 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -203,6 +203,7 @@ _err: int metaClose(SMeta *pMeta) { if (pMeta) { + if (pMeta->txn) tdbTxnClose(pMeta->txn); if (pMeta->pCache) metaCacheClose(pMeta); if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 4daab074b5..f7ec18e50e 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -59,6 +59,17 @@ int vnodeBegin(SVnode *pVnode) { } int vnodeShouldCommit(SVnode *pVnode) { + if (!pVnode->inUse || !osDataSpaceAvailable()) { + return false; + } + + int64_t nowMs = taosGetMonoTimestampMs(); + + return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pVnode->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || + (pVnode->inUse->size > 0 && pVnode->commitMs + SYNC_VND_COMMIT_MAX_MS < nowMs)); +} + +int vnodeShouldCommitOld(SVnode *pVnode) { if (pVnode->inUse) { return osDataSpaceAvailable() && (pVnode->inUse->size > pVnode->inUse->node.size); } @@ -194,6 +205,7 @@ static void vnodePrepareCommit(SVnode *pVnode) { vnodeBufPoolUnRef(pVnode->inUse); pVnode->inUse = NULL; } + static int32_t vnodeCommitTask(void *arg) { int32_t code = 0; @@ -210,6 +222,7 @@ _exit: taosMemoryFree(pInfo); return code; } + int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; @@ -257,7 +270,9 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { SVnode *pVnode = pInfo->pVnode; vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), - pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); + pInfo->info.state.commitID, pInfo->info.state.committed, pInfo->info.state.commitTerm); + + pVnode->commitMs = taosGetMonoTimestampMs(); // persist wal before starting if (walPersist(pVnode->pWal) < 0) { diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index e09fafb756..58b73d806f 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -249,15 +249,18 @@ void vnodePreClose(SVnode *pVnode) { void vnodeClose(SVnode *pVnode) { if (pVnode) { - vnodeSyncCommit(pVnode); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); + + tsem_wait(&pVnode->canCommit); walClose(pVnode->pWal); tqClose(pVnode->pTq); if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb); smaClose(pVnode->pSma); metaClose(pVnode->pMeta); vnodeCloseBufPool(pVnode); + tsem_post(&pVnode->canCommit); + // destroy handle tsem_destroy(&(pVnode->canCommit)); tsem_destroy(&pVnode->syncSem); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 6092888136..49bdfec269 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -200,6 +200,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp // skip header pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); len = pMsg->contLen - sizeof(SMsgHead); + bool needCommit = false; switch (pMsg->msgType) { /* META */ @@ -296,9 +297,8 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp vnodeProcessAlterConfigReq(pVnode, version, pReq, len, pRsp); break; case TDMT_VND_COMMIT: - vnodeSyncCommit(pVnode); - vnodeBegin(pVnode); - goto _exit; + needCommit = true; + break; default: vError("vgId:%d, unprocessed msg, %d", TD_VID(pVnode), pMsg->msgType); return -1; @@ -315,7 +315,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } // commit if need - if (vnodeShouldCommit(pVnode)) { + if (needCommit) { vInfo("vgId:%d, commit at version %" PRId64, TD_VID(pVnode), version); vnodeAsyncCommit(pVnode); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 2c23646db1..e000b26c6b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -101,6 +101,64 @@ static void vnodeHandleProposeError(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) } } +static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) { + int64_t seq = 0; + + taosThreadMutexLock(&pVnode->lock); + int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq); + bool wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType)); + if (wait) { + ASSERT(!pVnode->blocked); + pVnode->blocked = true; + pVnode->blockSec = taosGetTimestampSec(); + pVnode->blockSeq = seq; +#if 0 + pVnode->blockInfo = pMsg->info; +#endif + } + taosThreadMutexUnlock(&pVnode->lock); + + if (code > 0) { + vnodeHandleWriteMsg(pVnode, pMsg); + } else if (code < 0) { + if (terrno != 0) code = terrno; + vnodeHandleProposeError(pVnode, pMsg, code); + } + + if (wait) vnodeWaitBlockMsg(pVnode, pMsg); + return code; +} + +void vnodeProposeCommitOnNeed(SVnode *pVnode) { + if (!vnodeShouldCommit(pVnode)) { + return; + } + + int32_t contLen = sizeof(SMsgHead); + SMsgHead *pHead = rpcMallocCont(contLen); + pHead->contLen = contLen; + pHead->vgId = pVnode->config.vgId; + + SRpcMsg rpcMsg = {0}; + rpcMsg.msgType = TDMT_VND_COMMIT; + rpcMsg.contLen = contLen; + rpcMsg.pCont = pHead; + rpcMsg.info.noResp = 1; + + bool isWeak = false; + if (vnodeProposeMsg(pVnode, &rpcMsg, isWeak) < 0) { + vTrace("vgId:%d, failed to propose vnode commit since %s", pVnode->config.vgId, terrstr()); + goto _out; + } + + vInfo("vgId:%d, proposed vnode commit", pVnode->config.vgId); + +_out: + pVnode->commitMs = taosGetMonoTimestampMs(); + rpcFreeCont(rpcMsg.pCont); + rpcMsg.pCont = NULL; +} + #if BATCH_ENABLE static void inline vnodeProposeBatchMsg(SVnode *pVnode, SRpcMsg **pMsgArr, bool *pIsWeakArr, int32_t *arrSize) { @@ -178,6 +236,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) continue; } + vnodeProposeCommitOnNeed(pVnode); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); @@ -205,34 +265,6 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) #else -static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) { - int64_t seq = 0; - - taosThreadMutexLock(&pVnode->lock); - int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq); - bool wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType)); - if (wait) { - ASSERT(!pVnode->blocked); - pVnode->blocked = true; - pVnode->blockSec = taosGetTimestampSec(); - pVnode->blockSeq = seq; -#if 0 - pVnode->blockInfo = pMsg->info; -#endif - } - taosThreadMutexUnlock(&pVnode->lock); - - if (code > 0) { - vnodeHandleWriteMsg(pVnode, pMsg); - } else if (code < 0) { - if (terrno != 0) code = terrno; - vnodeHandleProposeError(pVnode, pMsg, code); - } - - if (wait) vnodeWaitBlockMsg(pVnode, pMsg); - return code; -} - void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; @@ -256,6 +288,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) continue; } + vnodeProposeCommitOnNeed(pVnode); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index d875d3ca09..34fbebdc39 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -326,6 +326,8 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt } // update + ASSERT(pBuf->startIndex < index); + ASSERT(index - pBuf->startIndex < pBuf->size); ASSERT(pBuf->entries[index % pBuf->size].pItem == NULL); SSyncLogBufEntry tmp = {.pItem = pEntry, .prevLogIndex = prevIndex, .prevLogTerm = prevTerm}; pEntry = NULL; @@ -454,6 +456,11 @@ int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, Syn pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType)); } + if (pEntry->originalRpcType == TDMT_VND_COMMIT) { + sInfo("vgId:%d, fsm execute vnode commit. index: %" PRId64 ", term: %" PRId64 "", pNode->vgId, pEntry->index, + pEntry->term); + } + SRpcMsg rpcMsg = {0}; syncEntry2OriginalRpc(pEntry, &rpcMsg); diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 018ac5bb7d..d86ff847f9 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -219,6 +219,10 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr ASSERT(pEntry->index == index); + if (pEntry->originalRpcType == TDMT_VND_COMMIT) { + walFsync(pWal, true); + } + sNTrace(pData->pSyncNode, "write index:%" PRId64 ", type:%s, origin type:%s, elapsed:%" PRId64, pEntry->index, TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType), tsElapsed); return 0; diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index 79a38cad7a..6e945b591e 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -107,7 +107,7 @@ int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t seq, SRpcHandleInfo *p taosThreadMutexUnlock(&pObj->mutex); return 1; // get one object } else { - sNError(pObj->data, "get-and-del message handle, no object of seq:%" PRIu64, seq); + sNTrace(pObj->data, "get-and-del message handle, no object of seq:%" PRIu64, seq); } taosThreadMutexUnlock(&pObj->mutex); diff --git a/source/libs/tdb/inc/tdb.h b/source/libs/tdb/inc/tdb.h index 10a99bb1fa..0e20941b3a 100644 --- a/source/libs/tdb/inc/tdb.h +++ b/source/libs/tdb/inc/tdb.h @@ -74,7 +74,12 @@ int32_t tdbTbcUpsert(TBC *pTbc, const void *pKey, int nKey, const void *pData, i int32_t tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg, int flags); -int32_t tdbTxnClose(TXN *pTxn); +int32_t tdbTxnCloseImpl(TXN *pTxn); +#define tdbTxnClose(pTxn) \ + do { \ + tdbTxnCloseImpl(pTxn); \ + (pTxn) = NULL; \ + } while (0) // other void tdbFree(void *); diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index 50dc8e0a65..d35f05461d 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -77,7 +77,7 @@ int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) u8 *ptr; tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree); - ASSERT(!pPage->isDirty); + // ASSERT(!pPage->isDirty); ASSERT(xFree); for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { diff --git a/source/libs/tdb/src/db/tdbTxn.c b/source/libs/tdb/src/db/tdbTxn.c index 055d9c7f98..24f955fe2f 100644 --- a/source/libs/tdb/src/db/tdbTxn.c +++ b/source/libs/tdb/src/db/tdbTxn.c @@ -28,13 +28,18 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void return 0; } -int tdbTxnClose(TXN *pTxn) { +int tdbTxnCloseImpl(TXN *pTxn) { if (pTxn) { if (pTxn->jPageSet) { hashset_destroy(pTxn->jPageSet); pTxn->jPageSet = NULL; } + if (pTxn->jfd) { + tdbOsClose(pTxn->jfd); + ASSERT(pTxn->jfd == NULL); + } + tdbOsFree(pTxn); } diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d144a76eb0..d37dff4d01 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1124,7 +1124,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { int ret = uv_tcp_connect(&conn->connReq, (uv_tcp_t*)(conn->stream), (const struct sockaddr*)&addr, cliConnCb); if (ret != 0) { - tGTrace("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, + tGError("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, uv_err_name(ret)); uv_timer_stop(conn->timer); From 7227e53b98a3e86447ff05e056c133f77646985b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 26 Dec 2022 14:22:54 +0800 Subject: [PATCH 03/89] fix: set restore finish only after reaching the current term --- source/libs/sync/src/syncPipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 34fbebdc39..96a1674027 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -558,7 +558,8 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm ret = 0; _out: // mark as restored if needed - if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex) { + if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex && pEntry != NULL && + pNode->pRaftStore->currentTerm <= pEntry->term) { pNode->pFsm->FpRestoreFinishCb(pNode->pFsm); pNode->restoreFinish = true; sInfo("vgId:%d, restore finished. log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, From 137e7d009df468bf0ab6c5890263c71ef5c4992f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 26 Dec 2022 20:51:57 +0800 Subject: [PATCH 04/89] fix: update nextRowIterGet of tsdbCache --- source/dnode/vnode/src/tsdb/tsdbCache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 6a82517067..0fc5b617bb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1080,6 +1080,8 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow) { iMax[nMax] = i; max[nMax++] = pIter->input[i].pRow; + } else { + pIter->input[i].next = false; } } } From c1c7f2593f6db5f0992ef40b1d574f9508ea3ee3 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 10:38:56 +0800 Subject: [PATCH 05/89] enh: initialize commitMs as the time when vnodeOpen --- source/dnode/vnode/src/vnd/vnodeOpen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 58b73d806f..f2973c188e 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -142,6 +142,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { pVnode->path = (char *)&pVnode[1]; strcpy(pVnode->path, path); pVnode->config = info.config; + pVnode->commitMs = taosGetMonoTimestampMs(); pVnode->state.committed = info.state.committed; pVnode->state.commitTerm = info.state.commitTerm; pVnode->state.commitID = info.state.commitID; From 691b75adf5559f3f8d4548736c6c20ed1c946372 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 15:42:00 +0800 Subject: [PATCH 06/89] enh: flush database for test cases of rsma --- tests/script/tsim/sma/rsmaCreateInsertQuery.sim | 2 +- tests/script/tsim/sma/rsmaPersistenceRecovery.sim | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim index 04cf09715c..508e6f88c1 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim @@ -82,8 +82,8 @@ endi #=================================================================== - #==================== reboot to trigger commit data to file +sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index faff48b61c..4117a2403d 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -85,6 +85,7 @@ endi #==================== reboot to trigger commit data to file +sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start From 92e05b8ece2075e907c935d81b9a8958d7401a1f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 16:28:44 +0800 Subject: [PATCH 07/89] enh: schedule vnodeCommit uniformly distributed --- source/dnode/vnode/src/inc/vnd.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 7 ++++++- source/dnode/vnode/src/vnd/vnodeCommit.c | 13 ++++++++++--- source/dnode/vnode/src/vnd/vnodeOpen.c | 3 ++- source/dnode/vnode/src/vnd/vnodeSync.c | 2 +- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index d8c4b001b1..24821a3a61 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -86,6 +86,7 @@ int32_t vnodeGetBatchMeta(SVnode* pVnode, SRpcMsg* pMsg); // vnodeCommit.c int32_t vnodeBegin(SVnode* pVnode); int32_t vnodeShouldCommit(SVnode* pVnode); +void vnodeUpdCommitSched(SVnode* pVnode); void vnodeRollback(SVnode* pVnode); int32_t vnodeSaveInfo(const char* dir, const SVnodeInfo* pCfg); int32_t vnodeCommitInfo(const char* dir, const SVnodeInfo* pInfo); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 1b84fb0578..75367883f1 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -332,6 +332,11 @@ struct STsdbKeepCfg { int32_t keep2; }; +typedef struct SVCommitSched { + int64_t commitMs; + int64_t maxWaitMs; +} SVCommitSched; + struct SVnode { char* path; SVnodeCfg config; @@ -350,7 +355,7 @@ struct SVnode { STQ* pTq; SSink* pSink; tsem_t canCommit; - int64_t commitMs; + SVCommitSched commitSched; int64_t sync; TdThreadMutex lock; bool blocked; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 4f63d6e043..3738966122 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -58,15 +58,22 @@ int vnodeBegin(SVnode *pVnode) { return 0; } +void vnodeUpdCommitSched(SVnode *pVnode) { + int64_t randNum = taosRand(); + pVnode->commitSched.commitMs = taosGetMonoTimestampMs(); + pVnode->commitSched.maxWaitMs = SYNC_VND_COMMIT_MAX_MS + (randNum % SYNC_VND_COMMIT_MAX_MS); +} + int vnodeShouldCommit(SVnode *pVnode) { if (!pVnode->inUse || !osDataSpaceAvailable()) { return false; } + SVCommitSched *pSched = &pVnode->commitSched; int64_t nowMs = taosGetMonoTimestampMs(); - return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pVnode->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || - (pVnode->inUse->size > 0 && pVnode->commitMs + SYNC_VND_COMMIT_MAX_MS < nowMs)); + return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pSched->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || + (pVnode->inUse->size > 0 && pSched->commitMs + pSched->maxWaitMs < nowMs)); } int vnodeShouldCommitOld(SVnode *pVnode) { @@ -306,7 +313,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), pInfo->info.state.commitID, pInfo->info.state.committed, pInfo->info.state.commitTerm); - pVnode->commitMs = taosGetMonoTimestampMs(); + vnodeUpdCommitSched(pVnode); // persist wal before starting if (walPersist(pVnode->pWal) < 0) { diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index f2973c188e..edbec0d044 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -142,7 +142,6 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { pVnode->path = (char *)&pVnode[1]; strcpy(pVnode->path, path); pVnode->config = info.config; - pVnode->commitMs = taosGetMonoTimestampMs(); pVnode->state.committed = info.state.committed; pVnode->state.commitTerm = info.state.commitTerm; pVnode->state.commitID = info.state.commitID; @@ -158,6 +157,8 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { taosThreadMutexInit(&pVnode->mutex, NULL); taosThreadCondInit(&pVnode->poolNotEmpty, NULL); + vnodeUpdCommitSched(pVnode); + int8_t rollback = vnodeShouldRollback(pVnode); // open buffer pool diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index eaea4102a2..a1dfeb9728 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -154,7 +154,7 @@ void vnodeProposeCommitOnNeed(SVnode *pVnode) { vInfo("vgId:%d, proposed vnode commit", pVnode->config.vgId); _out: - pVnode->commitMs = taosGetMonoTimestampMs(); + vnodeUpdCommitSched(pVnode); rpcFreeCont(rpcMsg.pCont); rpcMsg.pCont = NULL; } From 6fc47beb71f111a140ba8742b27d31da7d702344 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 14:34:23 +0800 Subject: [PATCH 08/89] enh: streamMetaCommit in sync with vnodeCommit --- source/dnode/snode/src/snode.c | 1 + source/dnode/vnode/src/tq/tqCommit.c | 9 ++++++++- source/dnode/vnode/src/vnd/vnodeOpen.c | 3 +-- source/libs/stream/src/streamMeta.c | 4 ++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index b133226ed3..860db20fa8 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -124,6 +124,7 @@ FAIL: } void sndClose(SSnode *pSnode) { + streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); taosMemoryFree(pSnode->path); taosMemoryFree(pSnode); diff --git a/source/dnode/vnode/src/tq/tqCommit.c b/source/dnode/vnode/src/tq/tqCommit.c index dabd97a345..7fc66c4919 100644 --- a/source/dnode/vnode/src/tq/tqCommit.c +++ b/source/dnode/vnode/src/tq/tqCommit.c @@ -15,4 +15,11 @@ #include "tq.h" -int tqCommit(STQ* pTq) { return tqOffsetCommitFile(pTq->pOffsetStore); } +int tqCommit(STQ* pTq) { + if (streamMetaCommit(pTq->pStreamMeta) < 0) { + tqError("vgId:%d, failed to commit stream meta since %s", TD_VID(pTq->pVnode), terrstr()); + return -1; + } + + return tqOffsetCommitFile(pTq->pOffsetStore); +} diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index edbec0d044..96c8956b58 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -251,10 +251,9 @@ void vnodePreClose(SVnode *pVnode) { void vnodeClose(SVnode *pVnode) { if (pVnode) { + tsem_wait(&pVnode->canCommit); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); - - tsem_wait(&pVnode->canCommit); walClose(pVnode->pWal); tqClose(pVnode->pTq); if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index afad78c5e5..56da86654c 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -69,8 +69,7 @@ _err: } void streamMetaClose(SStreamMeta* pMeta) { - tdbCommit(pMeta->db, pMeta->txn); - tdbPostCommit(pMeta->db, pMeta->txn); + tdbTxnClose(pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); @@ -88,6 +87,7 @@ void streamMetaClose(SStreamMeta* pMeta) { /*streamMetaReleaseTask(pMeta, pTask);*/ } taosHashCleanup(pMeta->pTasks); + taosHashCleanup(pMeta->pRecoverStatus); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } From 724cf98dc33a7c2f223deefb6d18a419a19ccf7d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 17:47:33 +0800 Subject: [PATCH 09/89] fix: be conservative on commit progress in appendEntries --- source/libs/sync/src/syncAppendEntries.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1dc6905b88..1e5adb4bed 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -211,7 +211,7 @@ _SEND_RESPONSE: if (accepted && matched) { pReply->success = true; // update commit index only after matching - (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); + (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pEntry->index)); } // ack, i.e. send response From c4fad84c7c69718b3bc2d01e3fb8836ea479dd57 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 18:24:41 +0800 Subject: [PATCH 10/89] enh: reset commitVer in WAL on restore --- source/libs/wal/src/walMeta.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 8e6628bb21..aeb0fe9fe9 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -292,19 +292,9 @@ void walAlignVersions(SWal* pWal) { } pWal->vers.lastVer = pWal->vers.snapshotVer; } - if (pWal->vers.commitVer < pWal->vers.snapshotVer) { - wWarn("vgId:%d, commitVer:%" PRId64 " is less than snapshotVer:%" PRId64 ". align with it.", pWal->cfg.vgId, - pWal->vers.commitVer, pWal->vers.snapshotVer); - pWal->vers.commitVer = pWal->vers.snapshotVer; - } - if (pWal->vers.appliedVer < pWal->vers.snapshotVer) { - wWarn("vgId:%d, appliedVer:%" PRId64 " is less than snapshotVer:%" PRId64 ". align with it.", pWal->cfg.vgId, - pWal->vers.appliedVer, pWal->vers.snapshotVer); - pWal->vers.appliedVer = pWal->vers.snapshotVer; - } - - pWal->vers.commitVer = TMIN(pWal->vers.lastVer, pWal->vers.commitVer); - pWal->vers.appliedVer = TMIN(pWal->vers.commitVer, pWal->vers.appliedVer); + // reset commitVer and appliedVer + pWal->vers.commitVer = pWal->vers.snapshotVer; + pWal->vers.appliedVer = pWal->vers.snapshotVer; } bool walLogEntriesComplete(const SWal* pWal) { From 0c4ade9373fe8a76a9cc23c7c53e56916b935231 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 20:20:41 +0800 Subject: [PATCH 11/89] fix: update sync node commitIndex only if matchTerm equals currentTerm upon heartbeat --- source/libs/sync/inc/syncMessage.h | 4 ++-- source/libs/sync/inc/syncPipeline.h | 1 + source/libs/sync/src/syncAppendEntries.c | 1 + source/libs/sync/src/syncCommit.c | 1 + source/libs/sync/src/syncMain.c | 28 +++++++++++++++--------- source/libs/sync/src/syncPipeline.c | 11 ++++++++-- source/libs/sync/src/syncUtil.c | 2 +- source/libs/wal/src/walMeta.c | 1 + 8 files changed, 34 insertions(+), 15 deletions(-) diff --git a/source/libs/sync/inc/syncMessage.h b/source/libs/sync/inc/syncMessage.h index 3bd94dbab5..49486bc12d 100644 --- a/source/libs/sync/inc/syncMessage.h +++ b/source/libs/sync/inc/syncMessage.h @@ -247,8 +247,8 @@ typedef struct SyncLocalCmd { SRaftId destId; int32_t cmd; - SyncTerm sdNewTerm; // step down new term - SyncIndex fcIndex; // follower commit index + SyncTerm currentTerm; // step down new term + SyncIndex commitIndex; // follower commit index } SyncLocalCmd; int32_t syncBuildTimeout(SRpcMsg* pMsg, ESyncTimeoutType ttype, uint64_t logicClock, int32_t ms, SSyncNode* pNode); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index a0a0691694..55cb0d7db6 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -98,6 +98,7 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode); // access int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf); +SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf); int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry); int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm); int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1e5adb4bed..66ff28d07d 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -90,6 +90,7 @@ // int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { + ASSERT(false && "deprecated"); if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { sNTrace(ths, "can not do follower commit"); return -1; diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 5fdcbeb91c..152fddb7e6 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -44,6 +44,7 @@ // /\ UNCHANGED <> // void syncOneReplicaAdvance(SSyncNode* pSyncNode) { + ASSERT(false && "deprecated"); if (pSyncNode == NULL) { sError("pSyncNode is NULL"); return; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 1a481a7e14..7a6c0f734f 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1036,6 +1036,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { } } pSyncNode->commitIndex = commitIndex; + sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { goto _error; @@ -1176,9 +1177,10 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) { } ASSERT(endIndex == lastVer + 1); - commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); + pSyncNode->commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); + sInfo("vgId:%d, restore sync until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); - if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, commitIndex) < 0) { + if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) { return -1; } @@ -2545,8 +2547,9 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; pSyncMsg->cmd = SYNC_LOCAL_CMD_FOLLOWER_CMT; - pSyncMsg->fcIndex = pMsg->commitIndex; - SyncIndex fcIndex = pSyncMsg->fcIndex; + pSyncMsg->commitIndex = pMsg->commitIndex; + pSyncMsg->currentTerm = pMsg->term; + SyncIndex fcIndex = pSyncMsg->commitIndex; if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); @@ -2567,7 +2570,8 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN; - pSyncMsg->sdNewTerm = pMsg->term; + pSyncMsg->currentTerm = pMsg->term; + pSyncMsg->commitIndex = pMsg->commitIndex; if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); @@ -2575,7 +2579,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code); rpcFreeCont(rpcMsgLocalCmd.pCont); } else { - sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->sdNewTerm); + sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->currentTerm); } } } @@ -2633,10 +2637,13 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { syncLogRecvLocalCmd(ths, pMsg, ""); if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->sdNewTerm); + syncNodeStepDown(ths, pMsg->currentTerm); } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - (void)syncNodeUpdateCommitIndex(ths, pMsg->fcIndex); + SyncTerm matchTerm = syncLogBufferGetLastMatchTerm(ths->pLogBuf); + if (pMsg->currentTerm == matchTerm) { + (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); + } if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { sError("vgId:%d, failed to commit raft log since %s. commit index: %" PRId64 "", ths->vgId, terrstr(), ths->commitIndex); @@ -2649,14 +2656,15 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { + ASSERT(false && "deprecated"); SyncLocalCmd* pMsg = pRpcMsg->pCont; syncLogRecvLocalCmd(ths, pMsg, ""); if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->sdNewTerm); + syncNodeStepDown(ths, pMsg->currentTerm); } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - syncNodeFollowerCommit(ths, pMsg->fcIndex); + syncNodeFollowerCommit(ths, pMsg->commitIndex); } else { sError("error local cmd"); diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index d88e610372..de9cd6e1a6 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -265,20 +265,27 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode) { return ret; } -FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { +FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTermWithoutLock(SSyncLogBuffer* pBuf) { SyncIndex index = pBuf->matchIndex; SSyncRaftEntry* pEntry = pBuf->entries[(index + pBuf->size) % pBuf->size].pItem; ASSERT(pEntry != NULL); return pEntry->term; } +SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { + taosThreadMutexLock(&pBuf->mutex); + SyncTerm term = syncLogBufferGetLastMatchTermWithoutLock(pBuf); + taosThreadMutexUnlock(&pBuf->mutex); + return term; +} + int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm) { taosThreadMutexLock(&pBuf->mutex); syncLogBufferValidate(pBuf); int32_t ret = -1; SyncIndex index = pEntry->index; SyncIndex prevIndex = pEntry->index - 1; - SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTerm(pBuf); + SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTermWithoutLock(pBuf); SSyncRaftEntry* pExist = NULL; bool inBuf = true; diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 49a24bebde..525681e53e 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -411,7 +411,7 @@ void syncLogRecvLocalCmd(SSyncNode* pSyncNode, const SyncLocalCmd* pMsg, const c if (!(sDebugFlag & DEBUG_TRACE)) return; sNTrace(pSyncNode, "recv sync-local-cmd {cmd:%d-%s, sd-new-term:%" PRId64 ", fc-index:%" PRId64 "}, %s", pMsg->cmd, - syncLocalCmdGetStr(pMsg->cmd), pMsg->sdNewTerm, pMsg->fcIndex, s); + syncLocalCmdGetStr(pMsg->cmd), pMsg->currentTerm, pMsg->commitIndex, s); } void syncLogSendAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntriesReply* pMsg, const char* s) { diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index aeb0fe9fe9..44e88a4dcc 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -295,6 +295,7 @@ void walAlignVersions(SWal* pWal) { // reset commitVer and appliedVer pWal->vers.commitVer = pWal->vers.snapshotVer; pWal->vers.appliedVer = pWal->vers.snapshotVer; + wInfo("vgId:%d, reset commitVer to %" PRId64, pWal->cfg.vgId, pWal->vers.commitVer); } bool walLogEntriesComplete(const SWal* pWal) { From 2890a8cb96abfa15a96f9558aab8877d5a8f576b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 20:34:40 +0800 Subject: [PATCH 12/89] fix: return error on failing to truncate raft log --- source/libs/sync/src/syncRaftLog.c | 23 ----------------------- source/libs/wal/src/walWrite.c | 2 +- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 701b61355d..03c3fe154d 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -316,29 +316,6 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; - // need not truncate - SyncIndex wallastVer = walGetLastVer(pWal); - if (fromIndex > wallastVer) { - return 0; - } - - // need not truncate - SyncIndex walCommitVer = walGetCommittedVer(pWal); - if (fromIndex <= walCommitVer) { - return 0; - } - - // delete from cache - for (SyncIndex index = fromIndex; index <= wallastVer; ++index) { - SLRUCache* pCache = pData->pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index)); - if (h) { - sNTrace(pData->pSyncNode, "cache delete index:%" PRId64, index); - - taosLRUCacheRelease(pData->pSyncNode->pLogStore->pCache, h, true); - } - } - int32_t code = walRollback(pWal, fromIndex); if (code != 0) { int32_t err = terrno; diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index a5c7bf1abd..51307dc17d 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -105,7 +105,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { wInfo("vgId:%d, wal rollback for version %" PRId64, pWal->cfg.vgId, ver); int64_t code; char fnameStr[WAL_FILE_LEN]; - if (ver > pWal->vers.lastVer || ver < pWal->vers.commitVer || ver <= pWal->vers.snapshotVer) { + if (ver > pWal->vers.lastVer || ver <= pWal->vers.commitVer || ver <= pWal->vers.snapshotVer) { terrno = TSDB_CODE_WAL_INVALID_VER; taosThreadMutexUnlock(&pWal->mutex); return -1; From 044e58b7122e4d53a786b404d2273105dcd4bab2 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 10:00:35 +0800 Subject: [PATCH 13/89] enh: adjust logging msgs for sync probe and rollback --- source/libs/sync/src/syncPipeline.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index de9cd6e1a6..0443be3f5e 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -818,11 +818,10 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode pMgr->endIndex = index + 1; SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, attempted to probe the %d'th peer with msg of index:%" PRId64 " term: %" PRId64 - ". pMgr(rs:%d): [%" PRId64 " %" PRId64 ", %" PRId64 "), pBuf: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 - ")", - pNode->vgId, pMgr->peerId, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, - pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); + sInfo("vgId:%d, probe peer:%" PRIx64 " with msg of index:%" PRId64 " term: %" PRId64 ". mgr (rs:%d): [%" PRId64 + " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", + pNode->vgId, pDestId->addr, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, + pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); return 0; } @@ -1004,6 +1003,10 @@ void syncLogBufferDestroy(SSyncLogBuffer* pBuf) { int32_t syncLogBufferRollback(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncIndex toIndex) { ASSERT(pBuf->commitIndex < toIndex && toIndex <= pBuf->endIndex); + if (toIndex == pBuf->endIndex) { + return 0; + } + sInfo("vgId:%d, rollback sync log buffer. toindex: %" PRId64 ", buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, toIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); From e22ce2df87ac07ba9b8384d59e0e54021624d856 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 10:45:25 +0800 Subject: [PATCH 14/89] fix: use pReply->lastSendIndex instead while updating commitIndex --- source/libs/sync/src/syncAppendEntries.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 66ff28d07d..026ebdb37c 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -207,12 +207,13 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { accepted = true; _SEND_RESPONSE: + pEntry = NULL; pReply->matchIndex = syncLogBufferProceed(ths->pLogBuf, ths, &pReply->lastMatchTerm); bool matched = (pReply->matchIndex >= pReply->lastSendIndex); if (accepted && matched) { pReply->success = true; // update commit index only after matching - (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pEntry->index)); + (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pReply->lastSendIndex)); } // ack, i.e. send response From e8e189cbbb6247a1b9b784fa8582e18c81baa6d8 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 15:48:59 +0800 Subject: [PATCH 15/89] enh: add sync log buffer info in logging msg in syncLogReplMgrRetryOnNeed --- source/libs/sync/src/syncPipeline.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index a458882da5..a9b5aadaa5 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -654,10 +654,12 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { _out: if (retried) { pMgr->retryBackoff = syncLogGetNextRetryBackoff(pMgr); - sInfo("vgId:%d, resent %d sync log entries. dest: %" PRIx64 ", indexes: %" PRId64 " ..., terms: ... %" PRId64 - ", retryWaitMs: %" PRId64 ", repl mgr: [%" PRId64 " %" PRId64 ", %" PRId64 ")", + SSyncLogBuffer* pBuf = pNode->pLogBuf; + sInfo("vgId:%d, resend %d sync log entries. dest: %" PRIx64 ", indexes: %" PRId64 " ..., terms: ... %" PRId64 + ", retryWaitMs: %" PRId64 ", mgr: [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 + " %" PRId64 ", %" PRId64 ")", pNode->vgId, count, pDestId->addr, firstIndex, term, retryWaitMs, pMgr->startIndex, pMgr->matchIndex, - pMgr->endIndex); + pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); } return ret; } From ed43aeaa9b2fe3f16480949d4184e4aee7df8a6d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 19:47:28 +0800 Subject: [PATCH 16/89] enh: add vndCommitMaxInterval cfg param --- include/common/tglobal.h | 3 +++ include/libs/sync/sync.h | 3 +-- source/common/src/tglobal.c | 7 +++++++ source/dnode/vnode/src/vnd/vnodeCommit.c | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/common/tglobal.h b/include/common/tglobal.h index d445fc26e8..f58c9fe055 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -69,6 +69,9 @@ extern int32_t tsElectInterval; extern int32_t tsHeartbeatInterval; extern int32_t tsHeartbeatTimeout; +// vnode +extern int64_t tsVndCommitMaxIntervalMs; + // monitor extern bool tsEnableMonitor; extern int32_t tsMonitorInterval; diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 02287deb73..b134e79442 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -49,8 +49,7 @@ extern "C" { #define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500 #define SYNC_SNAP_RESEND_MS 1000 * 60 -#define SYNC_VND_COMMIT_MIN_MS 200 -#define SYNC_VND_COMMIT_MAX_MS 60000 +#define SYNC_VND_COMMIT_MIN_MS 1000 #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 98b9b566ec..4de79060a7 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -60,6 +60,9 @@ int32_t tsElectInterval = 25 * 1000; int32_t tsHeartbeatInterval = 1000; int32_t tsHeartbeatTimeout = 20 * 1000; +// vnode +int64_t tsVndCommitMaxIntervalMs = 60 * 1000; + // monitor bool tsEnableMonitor = true; int32_t tsMonitorInterval = 30; @@ -427,6 +430,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; + if (cfgAddInt64(pCfg, "vndCommitMaxInterval", tsVndCommitMaxIntervalMs, 1000, 1000 * 60 * 60, 0) != 0) return -1; + if (cfgAddBool(pCfg, "monitor", tsEnableMonitor, 0) != 0) return -1; if (cfgAddInt32(pCfg, "monitorInterval", tsMonitorInterval, 1, 200000, 0) != 0) return -1; if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, 0) != 0) return -1; @@ -741,6 +746,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsHeartbeatInterval = cfgGetItem(pCfg, "syncHeartbeatInterval")->i32; tsHeartbeatTimeout = cfgGetItem(pCfg, "syncHeartbeatTimeout")->i32; + tsVndCommitMaxIntervalMs = cfgGetItem(pCfg, "vndCommitMaxInterval")->i64; + tsStartUdfd = cfgGetItem(pCfg, "udf")->bval; tstrncpy(tsUdfdResFuncs, cfgGetItem(pCfg, "udfdResFuncs")->str, sizeof(tsUdfdResFuncs)); tstrncpy(tsUdfdLdLibPath, cfgGetItem(pCfg, "udfdLdLibPath")->str, sizeof(tsUdfdLdLibPath)); diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 3738966122..47ca9eb1b6 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -61,7 +61,7 @@ int vnodeBegin(SVnode *pVnode) { void vnodeUpdCommitSched(SVnode *pVnode) { int64_t randNum = taosRand(); pVnode->commitSched.commitMs = taosGetMonoTimestampMs(); - pVnode->commitSched.maxWaitMs = SYNC_VND_COMMIT_MAX_MS + (randNum % SYNC_VND_COMMIT_MAX_MS); + pVnode->commitSched.maxWaitMs = tsVndCommitMaxIntervalMs + (randNum % tsVndCommitMaxIntervalMs); } int vnodeShouldCommit(SVnode *pVnode) { From dfefb68aa6911724145ac00720ff2b7435a26aff Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 30 Dec 2022 13:30:54 +0800 Subject: [PATCH 17/89] feat: support crash report --- include/common/tglobal.h | 4 + include/libs/transport/thttp.h | 2 +- include/os/osSystem.h | 56 ++++-- include/util/tlog.h | 5 +- packaging/cfg/taos.cfg | 3 + source/client/inc/clientInt.h | 3 + source/client/src/clientEnv.c | 136 ++++++++++++- source/client/src/clientMain.c | 4 +- source/client/src/clientMsgHandler.c | 1 + source/common/src/tglobal.c | 11 ++ source/dnode/mgmt/exe/dmMain.c | 43 ++++- source/dnode/mgmt/mgmt_dnode/inc/dmInt.h | 3 + source/dnode/mgmt/mgmt_dnode/src/dmInt.c | 4 + source/dnode/mgmt/mgmt_dnode/src/dmWorker.c | 88 +++++++++ source/dnode/mgmt/node_mgmt/inc/dmMgmt.h | 1 + source/dnode/mgmt/node_mgmt/src/dmEnv.c | 5 + source/dnode/mgmt/node_mgmt/src/dmNodes.c | 1 + source/dnode/mnode/impl/src/mndTelem.c | 2 +- source/libs/monitor/src/monMain.c | 3 +- source/libs/transport/src/thttp.c | 32 ++-- source/util/src/tlog.c | 201 ++++++++++++++++++-- 21 files changed, 550 insertions(+), 58 deletions(-) diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 9e8a139b31..4cc998bb39 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -82,6 +82,10 @@ extern bool tsEnableTelem; extern int32_t tsTelemInterval; extern char tsTelemServer[]; extern uint16_t tsTelemPort; +extern bool tsEnableCrashReport; +extern char* tsTelemUri; +extern char* tsClientCrashReportUri; +extern char* tsSvrCrashReportUri; // query buffer management extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing diff --git a/include/libs/transport/thttp.h b/include/libs/transport/thttp.h index 7d8c588bfc..9a6aee4187 100644 --- a/include/libs/transport/thttp.h +++ b/include/libs/transport/thttp.h @@ -24,7 +24,7 @@ extern "C" { typedef enum { HTTP_GZIP, HTTP_FLAT } EHttpCompFlag; -int32_t taosSendHttpReport(const char* server, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); +int32_t taosSendHttpReport(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); #ifdef __cplusplus } diff --git a/include/os/osSystem.h b/include/os/osSystem.h index 8f1f5c58d5..62bcc92fe1 100644 --- a/include/os/osSystem.h +++ b/include/os/osSystem.h @@ -47,26 +47,46 @@ int32_t taosGetOldTerminalMode(); void taosResetTerminalMode(); #if !defined(WINDOWS) -#define taosPrintTrace(flags, level, dflag) \ - { \ - void* array[100]; \ - int32_t size = backtrace(array, 100); \ - char** strings = backtrace_symbols(array, size); \ - if (strings != NULL) { \ - taosPrintLog(flags, level, dflag, "obtained %d stack frames", size); \ - for (int32_t i = 0; i < size; i++) { \ - taosPrintLog(flags, level, dflag, "frame:%d, %s", i, strings[i]); \ - } \ - } \ - \ - taosMemoryFree(strings); \ +#define taosLogTraceToBuf(buf, bufSize, ignoreNum) { \ + void* array[100]; \ + int32_t size = backtrace(array, 100); \ + char** strings = backtrace_symbols(array, size); \ + int32_t offset = 0; \ + if (strings != NULL) { \ + offset = snprintf(buf, bufSize - 1, "obtained %d stack frames\n", (ignoreNum > 0) ? size - ignoreNum : size); \ + for (int32_t i = (ignoreNum > 0) ? ignoreNum : 0; i < size; i++) { \ + offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%d, %s\n", (ignoreNum > 0) ? i - ignoreNum : i, strings[i]); \ + } \ + } \ + \ + taosMemoryFree(strings); \ +} + +#define taosPrintTrace(flags, level, dflag, ignoreNum) \ + { \ + void* array[100]; \ + int32_t size = backtrace(array, 100); \ + char** strings = backtrace_symbols(array, size); \ + if (strings != NULL) { \ + taosPrintLog(flags, level, dflag, "obtained %d stack frames", (ignoreNum > 0) ? size - ignoreNum : size); \ + for (int32_t i = (ignoreNum > 0) ? ignoreNum : 0; i < size; i++) { \ + taosPrintLog(flags, level, dflag, "frame:%d, %s", (ignoreNum > 0) ? i - ignoreNum : i, strings[i]); \ + } \ + } \ + \ + taosMemoryFree(strings); \ } #else -#define taosPrintTrace(flags, level, dflag) \ - { \ - taosPrintLog(flags, level, dflag, \ - "backtrace not implemented on windows, so detailed stack information cannot be printed"); \ - } +#define taosLogTraceToBuf(buf, bufSize, ignoreNum) { \ + snprintf(buf, bufSize - 1, \ + "backtrace not implemented on windows, so detailed stack information cannot be printed"); \ +} + +#define taosPrintTrace(flags, level, dflag, ignoreNum) \ + { \ + taosPrintLog(flags, level, dflag, \ + "backtrace not implemented on windows, so detailed stack information cannot be printed"); \ + } #endif #ifdef __cplusplus diff --git a/include/util/tlog.h b/include/util/tlog.h index e256d2a6cc..fb3e16f96d 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -87,7 +87,10 @@ bool taosAssert(bool condition, const char *file, int32_t line, const char *form #define ASSERTS(condition, ...) taosAssert(condition, __FILE__, __LINE__, __VA_ARGS__) #define ASSERT(condition) ASSERTS(condition, "assert info not provided") -void taosCrash(int signum, void *sigInfo, void *context); +void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, void *sigInfo); +void taosReadCrashInfo(char* filepath, char** pMsg, int64_t* pMsgLen, TdFilePtr* pFd); +void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); +int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t startTime); // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/packaging/cfg/taos.cfg b/packaging/cfg/taos.cfg index e22aa85c97..3d3dfc8e73 100644 --- a/packaging/cfg/taos.cfg +++ b/packaging/cfg/taos.cfg @@ -43,6 +43,9 @@ # Switch for allowing TDengine to collect and report service usage information # telemetryReporting 1 +# Switch for allowing TDengine to collect and report crash information +# crashReporting 1 + # The maximum number of vnodes supported by this dnode # supportVnodes 0 diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index ea76f726ea..903a6a22ca 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -313,6 +313,8 @@ extern SAppInfo appInfo; extern int32_t clientReqRefPool; extern int32_t clientConnRefPool; extern int32_t timestampDeltaLimit; +extern int64_t lastClusterId; + __async_send_cb_fn_t getMsgRspHandle(int32_t msgType); @@ -340,6 +342,7 @@ void resetConnectDB(STscObj* pTscObj); int taos_options_imp(TSDB_OPTION option, const char* str); void* openTransporter(const char* user, const char* auth, int32_t numOfThreads); +void tscStopCrashReport(); typedef struct AsyncArg { SRpcMsg msg; diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 64e1fd908a..b1e74b8089 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -28,13 +28,16 @@ #include "trpc.h" #include "tsched.h" #include "ttime.h" +#include "thttp.h" #define TSC_VAR_NOT_RELEASE 1 #define TSC_VAR_RELEASED 0 SAppInfo appInfo; +int64_t lastClusterId = 0; int32_t clientReqRefPool = -1; int32_t clientConnRefPool = -1; +int32_t clientStop = 0; int32_t timestampDeltaLimit = 900; // s @@ -385,6 +388,128 @@ void destroyRequest(SRequestObj *pRequest) { removeRequest(pRequest->self); } +void taosClientCrash(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + + taosIgnSignal(SIGBUS); + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen= -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + goto _return; + } else { + msgLen = strlen(pMsg); + } + } + +_return: + + taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); +} + +static void *tscCrashReportThreadFp(void *param) { + setThreadName("client-crashReport"); + char filepath[PATH_MAX] = {0}; + snprintf(filepath, sizeof(filepath), "%s%s.taosCrashLog", tsLogDir, TD_DIRSEP); + char *pMsg = NULL; + int64_t msgLen = 0; + TdFilePtr pFile = NULL; + bool truncateFile = false; + int32_t sleepTime = 200; + int32_t reportPeriodNum = 3600 * 1000 / sleepTime; + int32_t loopTimes = reportPeriodNum; + + while (1) { + if (clientStop) break; + if (loopTimes++ < reportPeriodNum) { + taosMsleep(sleepTime); + continue; + } + + taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); + if (pMsg && msgLen > 0) { + if (taosSendHttpReport(tsTelemServer, tsClientCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { + tscError("failed to send crash report"); + if (pFile) { + taosReleaseCrashLogFile(pFile, false); + continue; + } + } else { + tscInfo("succeed to send crash report"); + truncateFile = true; + } + } else { + tscDebug("no crash info"); + } + + taosMemoryFree(pMsg); + + if (pMsg && msgLen > 0) { + pMsg = NULL; + continue; + } + + if (pFile) { + taosReleaseCrashLogFile(pFile, truncateFile); + truncateFile = false; + } + + taosMsleep(sleepTime); + loopTimes = 0; + } + + clientStop = -1; + return NULL; +} + +int32_t tscCrashReportInit() { + if (!tsEnableCrashReport) { + return 0; + } + + TdThreadAttr thAttr; + taosThreadAttrInit(&thAttr); + taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); + TdThread crashReportThread; + if (taosThreadCreate(&crashReportThread, &thAttr, tscCrashReportThreadFp, NULL) != 0) { + tscError("failed to create crashReport thread since %s", strerror(errno)); + return -1; + } + + taosThreadAttrDestroy(&thAttr); + return 0; +} + +void tscStopCrashReport() { + if (!tsEnableCrashReport) { + return; + } + + clientStop = 1; + while (atomic_load_32(&clientStop) > 0) { + taosMsleep(100); + } +} + +static void tscSetSignalHandle() { + taosSetSignal(SIGBUS, taosClientCrash); + taosSetSignal(SIGABRT, taosClientCrash); + taosSetSignal(SIGFPE, taosClientCrash); + taosSetSignal(SIGSEGV, taosClientCrash); +} + void taos_init_imp(void) { // In the APIs of other program language, taos_cleanup is not available yet. // So, to make sure taos_cleanup will be invoked to clean up the allocated resource to suppress the valgrind warning. @@ -392,6 +517,10 @@ void taos_init_imp(void) { errno = TSDB_CODE_SUCCESS; taosSeedRand(taosGetTimestampSec()); + appInfo.pid = taosGetPId(); + appInfo.startTime = taosGetTimestampMs(); + appInfo.pInstMap = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + deltaToUtcInitOnce(); if (taosCreateLog("taoslog", 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) { @@ -404,6 +533,8 @@ void taos_init_imp(void) { return; } + tscSetSignalHandle(); + initQueryModuleMsgHandle(); if (taosConvInit() != 0) { @@ -433,9 +564,8 @@ void taos_init_imp(void) { taosGetAppName(appInfo.appName, NULL); taosThreadMutexInit(&appInfo.mutex, NULL); - appInfo.pid = taosGetPId(); - appInfo.startTime = taosGetTimestampMs(); - appInfo.pInstMap = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + tscCrashReportInit(); + tscDebug("client is initialized successfully"); } diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 87f5e5fa40..a151b113c1 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -55,6 +55,8 @@ void taos_cleanup(void) { return; } + tscStopCrashReport(); + int32_t id = clientReqRefPool; clientReqRefPool = -1; taosCloseRef(id); @@ -106,7 +108,7 @@ TAOS *taos_connect(const char *ip, const char *user, const char *pass, const cha if (pass == NULL) { pass = TSDB_DEFAULT_PASS; } - + STscObj *pObj = taos_connect_internal(ip, user, pass, NULL, db, port, CONN_TYPE__QUERY); if (pObj) { int64_t *rid = taosMemoryCalloc(1, sizeof(int64_t)); diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 85027ff371..f414c7e92f 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -119,6 +119,7 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { // update the appInstInfo pTscObj->pAppInfo->clusterId = connectRsp.clusterId; + lastClusterId = connectRsp.clusterId; pTscObj->connType = connectRsp.connType; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9caf0cc33e..e911a7cafe 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -73,6 +73,11 @@ bool tsEnableTelem = true; int32_t tsTelemInterval = 43200; char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.taosdata.com"; uint16_t tsTelemPort = 80; +char* tsTelemUri = "/report"; + +bool tsEnableCrashReport = true; +char* tsClientCrashReportUri = "/ccrashreport"; +char* tsSvrCrashReportUri = "/dcrashreport"; // schemaless char tsSmlTagName[TSDB_COL_NAME_LEN] = "_tag_null"; @@ -314,6 +319,7 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "maxMemUsedByInsert", tsMaxMemUsedByInsert, 1, INT32_MAX, true) != 0) return -1; if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, 0) != 0) return -1; if (cfgAddBool(pCfg, "useAdapter", tsUseAdapter, true) != 0) return -1; + if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, true) != 0) return -1; tsNumOfTaskQueueThreads = tsNumOfCores / 2; tsNumOfTaskQueueThreads = TMAX(tsNumOfTaskQueueThreads, 4); @@ -436,6 +442,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, 0) != 0) return -1; if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, 0) != 0) return -1; + if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, 0) != 0) return -1; if (cfgAddBool(pCfg, "telemetryReporting", tsEnableTelem, 0) != 0) return -1; if (cfgAddInt32(pCfg, "telemetryInterval", tsTelemInterval, 1, 200000, 0) != 0) return -1; if (cfgAddString(pCfg, "telemetryServer", tsTelemServer, 0) != 0) return -1; @@ -669,6 +676,7 @@ static int32_t taosSetClientCfg(SConfig *pCfg) { tsQueryUseNodeAllocator = cfgGetItem(pCfg, "queryUseNodeAllocator")->bval; tsKeepColumnName = cfgGetItem(pCfg, "keepColumnName")->bval; tsUseAdapter = cfgGetItem(pCfg, "useAdapter")->bval; + tsEnableCrashReport = cfgGetItem(pCfg, "crashReporting")->bval; tsMaxRetryWaitTime = cfgGetItem(pCfg, "maxRetryWaitTime")->i32; return 0; @@ -728,6 +736,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsQueryRspPolicy = cfgGetItem(pCfg, "queryRspPolicy")->i32; tsEnableTelem = cfgGetItem(pCfg, "telemetryReporting")->bval; + tsEnableCrashReport = cfgGetItem(pCfg, "crashReporting")->bval; tsTelemInterval = cfgGetItem(pCfg, "telemetryInterval")->i32; tstrncpy(tsTelemServer, cfgGetItem(pCfg, "telemetryServer")->str, TSDB_FQDN_LEN); tsTelemPort = (uint16_t)cfgGetItem(pCfg, "telemetryPort")->i32; @@ -797,6 +806,8 @@ int32_t taosSetCfg(SConfig *pCfg, char *name) { tsCountAlwaysReturnValue = cfgGetItem(pCfg, "countAlwaysReturnValue")->i32; } else if (strcasecmp("cDebugFlag", name) == 0) { cDebugFlag = cfgGetItem(pCfg, "cDebugFlag")->i32; + } else if (strcasecmp("crashReporting", name) == 0) { + tsEnableCrashReport = cfgGetItem(pCfg, "crashReporting")->bval; } break; } diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index 00db22771a..7a4eaf403c 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -44,6 +44,7 @@ static struct { char apolloUrl[PATH_MAX]; const char **envCmd; SArray *pArgs; // SConfigPair + int64_t startTime; } global = {0}; static void dmSetDebugFlag(int32_t signum, void *sigInfo, void *context) { taosSetAllDebugFlag(143, true); } @@ -67,6 +68,37 @@ static void dmStopDnode(int signum, void *sigInfo, void *context) { dmStop(); } +void dmLogCrash(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + + taosIgnSignal(SIGBUS); + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen= -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + goto _return; + } else { + msgLen = strlen(pMsg); + } + } + +_return: + + taosLogCrashInfo("taosd", pMsg, msgLen, signum, sigInfo); +} + static void dmSetSignalHandle() { taosSetSignal(SIGUSR1, dmSetDebugFlag); taosSetSignal(SIGUSR2, dmSetAssert); @@ -79,15 +111,18 @@ static void dmSetSignalHandle() { taosSetSignal(SIGQUIT, dmStopDnode); #endif - taosSetSignal(SIGBUS, taosCrash); - taosSetSignal(SIGABRT, taosCrash); - taosSetSignal(SIGFPE, taosCrash); - taosSetSignal(SIGSEGV, taosCrash); + taosSetSignal(SIGBUS, dmLogCrash); + taosSetSignal(SIGABRT, dmLogCrash); + taosSetSignal(SIGFPE, dmLogCrash); + taosSetSignal(SIGSEGV, dmLogCrash); } static int32_t dmParseArgs(int32_t argc, char const *argv[]) { + global.startTime = taosGetTimestampMs(); + int32_t cmdEnvIndex = 0; if (argc < 2) return 0; + global.envCmd = taosMemoryMalloc((argc - 1) * sizeof(char *)); memset(global.envCmd, 0, (argc - 1) * sizeof(char *)); for (int32_t i = 1; i < argc; ++i) { diff --git a/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h b/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h index c776beb3f0..ff32cbcb08 100644 --- a/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h +++ b/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h @@ -29,6 +29,7 @@ typedef struct SDnodeMgmt { const char *name; TdThread statusThread; TdThread monitorThread; + TdThread crashReportThread; SSingleWorker mgmtWorker; ProcessCreateNodeFp processCreateNodeFp; ProcessDropNodeFp processDropNodeFp; @@ -55,6 +56,8 @@ int32_t dmStartStatusThread(SDnodeMgmt *pMgmt); void dmStopStatusThread(SDnodeMgmt *pMgmt); int32_t dmStartMonitorThread(SDnodeMgmt *pMgmt); void dmStopMonitorThread(SDnodeMgmt *pMgmt); +int32_t dmStartCrashReportThread(SDnodeMgmt *pMgmt); +void dmStopCrashReportThread(SDnodeMgmt *pMgmt); int32_t dmStartWorker(SDnodeMgmt *pMgmt); void dmStopWorker(SDnodeMgmt *pMgmt); diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c index d2db1a4a62..51df293ba7 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c @@ -23,6 +23,9 @@ static int32_t dmStartMgmt(SDnodeMgmt *pMgmt) { if (dmStartMonitorThread(pMgmt) != 0) { return -1; } + if (dmStartCrashReportThread(pMgmt) != 0) { + return -1; + } return 0; } @@ -30,6 +33,7 @@ static void dmStopMgmt(SDnodeMgmt *pMgmt) { pMgmt->pData->stopped = true; dmStopMonitorThread(pMgmt); dmStopStatusThread(pMgmt); + dmStopCrashReportThread(pMgmt); } static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 80c040a5e8..76c8e09b70 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "dmInt.h" +#include "thttp.h" static void *dmStatusThreadFp(void *param) { SDnodeMgmt *pMgmt = param; @@ -63,6 +64,63 @@ static void *dmMonitorThreadFp(void *param) { return NULL; } +static void *dmCrashReportThreadFp(void *param) { + SDnodeMgmt *pMgmt = param; + int64_t lastTime = taosGetTimestampMs(); + setThreadName("dnode-crashReport"); + char filepath[PATH_MAX] = {0}; + snprintf(filepath, sizeof(filepath), "%s%s.taosdCrashLog", tsLogDir, TD_DIRSEP); + char *pMsg = NULL; + int64_t msgLen = 0; + TdFilePtr pFile = NULL; + bool truncateFile = false; + int32_t sleepTime = 200; + int32_t reportPeriodNum = 3600 * 1000 / sleepTime;; + int32_t loopTimes = reportPeriodNum; + + while (1) { + if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; + if (loopTimes++ < reportPeriodNum) { + taosMsleep(sleepTime); + continue; + } + + taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); + if (pMsg && msgLen > 0) { + if (taosSendHttpReport(tsTelemServer, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { + dError("failed to send crash report"); + if (pFile) { + taosReleaseCrashLogFile(pFile, false); + continue; + } + } else { + dInfo("succeed to send crash report"); + truncateFile = true; + } + } else { + dDebug("no crash info"); + } + + taosMemoryFree(pMsg); + + if (pMsg && msgLen > 0) { + pMsg = NULL; + continue; + } + + if (pFile) { + taosReleaseCrashLogFile(pFile, truncateFile); + truncateFile = false; + } + + taosMsleep(sleepTime); + loopTimes = 0; + } + + return NULL; +} + + int32_t dmStartStatusThread(SDnodeMgmt *pMgmt) { TdThreadAttr thAttr; taosThreadAttrInit(&thAttr); @@ -105,6 +163,36 @@ void dmStopMonitorThread(SDnodeMgmt *pMgmt) { } } +int32_t dmStartCrashReportThread(SDnodeMgmt *pMgmt) { + if (!tsEnableCrashReport) { + return 0; + } + + TdThreadAttr thAttr; + taosThreadAttrInit(&thAttr); + taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); + if (taosThreadCreate(&pMgmt->crashReportThread, &thAttr, dmCrashReportThreadFp, pMgmt) != 0) { + dError("failed to create crashReport thread since %s", strerror(errno)); + return -1; + } + + taosThreadAttrDestroy(&thAttr); + tmsgReportStartup("dnode-crashReport", "initialized"); + return 0; +} + +void dmStopCrashReportThread(SDnodeMgmt *pMgmt) { + if (!tsEnableCrashReport) { + return; + } + + if (taosCheckPthreadValid(pMgmt->crashReportThread)) { + taosThreadJoin(pMgmt->crashReportThread, NULL); + taosThreadClear(&pMgmt->crashReportThread); + } +} + + static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SDnodeMgmt *pMgmt = pInfo->ahandle; int32_t code = -1; diff --git a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h index 7e85e6b722..02cd678433 100644 --- a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h +++ b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h @@ -85,6 +85,7 @@ typedef struct SDnode { // dmEnv.c SDnode *dmInstance(); void dmReportStartup(const char *pName, const char *pDesc); +int64_t dmGetClusterId(); // dmMgmt.c int32_t dmInitDnode(SDnode *pDnode); diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index e3bda5a3f0..1d0236c0c5 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -268,3 +268,8 @@ void dmReportStartup(const char *pName, const char *pDesc) { tstrncpy(pStartup->desc, pDesc, TSDB_STEP_DESC_LEN); dDebug("step:%s, %s", pStartup->name, pStartup->desc); } + +int64_t dmGetClusterId() { + return global.data.clusterId; +} + diff --git a/source/dnode/mgmt/node_mgmt/src/dmNodes.c b/source/dnode/mgmt/node_mgmt/src/dmNodes.c index 981797834a..08330e025f 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmNodes.c +++ b/source/dnode/mgmt/node_mgmt/src/dmNodes.c @@ -111,6 +111,7 @@ static int32_t dmStartNodes(SDnode *pDnode) { dInfo("TDengine initialized successfully"); dmReportStartup("TDengine", "initialized successfully"); + return 0; } diff --git a/source/dnode/mnode/impl/src/mndTelem.c b/source/dnode/mnode/impl/src/mndTelem.c index 1d3209691a..9a4e4e08ec 100644 --- a/source/dnode/mnode/impl/src/mndTelem.c +++ b/source/dnode/mnode/impl/src/mndTelem.c @@ -132,7 +132,7 @@ static int32_t mndProcessTelemTimer(SRpcMsg* pReq) { taosThreadMutexUnlock(&pMgmt->lock); if (pCont != NULL) { - if (taosSendHttpReport(tsTelemServer, tsTelemPort, pCont, strlen(pCont), HTTP_FLAT) != 0) { + if (taosSendHttpReport(tsTelemServer, tsTelemUri, tsTelemPort, pCont, strlen(pCont), HTTP_FLAT) != 0) { mError("failed to send telemetry report"); } else { mInfo("succeed to send telemetry report"); diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index b3ca0fa452..b23a36d4df 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -20,6 +20,7 @@ #include "ttime.h" static SMonitor tsMonitor = {0}; +static char* tsMonUri = "/report"; void monRecordLog(int64_t ts, ELogLevel level, const char *content) { taosThreadMutexLock(&tsMonitor.lock); @@ -550,7 +551,7 @@ void monSendReport() { // uDebugL("report cont:%s\n", pCont); if (pCont != NULL) { EHttpCompFlag flag = tsMonitor.cfg.comp ? HTTP_GZIP : HTTP_FLAT; - if (taosSendHttpReport(tsMonitor.cfg.server, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != 0) { + if (taosSendHttpReport(tsMonitor.cfg.server, tsMonUri, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != 0) { uError("failed to send monitor msg"); } taosMemoryFree(pCont); diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index 00854b5ee5..cd508f6fe9 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -35,6 +35,7 @@ typedef struct SHttpModule { typedef struct SHttpMsg { queue q; char* server; + char* uri; int32_t port; char* cont; int32_t len; @@ -63,26 +64,26 @@ static void httpHandleReq(SHttpMsg* msg); static void httpHandleQuit(SHttpMsg* msg); static int32_t httpSendQuit(); -static int32_t taosSendHttpReportImpl(const char* server, uint16_t port, char* pCont, int32_t contLen, +static int32_t taosSendHttpReportImpl(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); -static int32_t taosBuildHttpHeader(const char* server, int32_t contLen, char* pHead, int32_t headLen, +static int32_t taosBuildHttpHeader(const char* server, const char* uri, int32_t contLen, char* pHead, int32_t headLen, EHttpCompFlag flag) { if (flag == HTTP_FLAT) { return snprintf(pHead, headLen, - "POST /report HTTP/1.1\n" + "POST %s HTTP/1.1\n" "Host: %s\n" "Content-Type: application/json\n" "Content-Length: %d\n\n", - server, contLen); + uri, server, contLen); } else if (flag == HTTP_GZIP) { return snprintf(pHead, headLen, - "POST /report HTTP/1.1\n" + "POST %s HTTP/1.1\n" "Host: %s\n" "Content-Type: application/json\n" "Content-Encoding: gzip\n" "Content-Length: %d\n\n", - server, contLen); + uri, server, contLen); } else { terrno = TSDB_CODE_INVALID_CFG; return -1; @@ -181,6 +182,7 @@ static void httpDestroyMsg(SHttpMsg* msg) { if (msg == NULL) return; taosMemoryFree(msg->server); + taosMemoryFree(msg->uri); taosMemoryFree(msg->cont); taosMemoryFree(msg); } @@ -293,10 +295,11 @@ int32_t httpSendQuit() { return 0; } -static int32_t taosSendHttpReportImpl(const char* server, uint16_t port, char* pCont, int32_t contLen, +static int32_t taosSendHttpReportImpl(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag) { SHttpMsg* msg = taosMemoryMalloc(sizeof(SHttpMsg)); msg->server = strdup(server); + msg->uri = strdup(uri); msg->port = port; msg->cont = taosMemoryMalloc(contLen); memcpy(msg->cont, pCont, contLen); @@ -309,12 +312,10 @@ static int32_t taosSendHttpReportImpl(const char* server, uint16_t port, char* p httpDestroyMsg(msg); tError("http-report already released"); return -1; - } else { - msg->http = load; - transAsyncSend(load->asyncPool, &(msg->q)); } - - return 0; + + msg->http = load; + return transAsyncSend(load->asyncPool, &(msg->q)); } static void httpDestroyClientCb(uv_handle_t* handle) { @@ -360,7 +361,7 @@ static void httpHandleReq(SHttpMsg* msg) { int32_t len = 2048; char* header = taosMemoryCalloc(1, len); - int32_t headLen = taosBuildHttpHeader(msg->server, msg->len, header, len, msg->flag); + int32_t headLen = taosBuildHttpHeader(msg->server, msg->uri, msg->len, header, len, msg->flag); if (headLen < 0) { taosMemoryFree(header); goto END; @@ -380,6 +381,7 @@ static void httpHandleReq(SHttpMsg* msg) { cli->port = msg->port; cli->dest = dest; + taosMemoryFree(msg->uri); taosMemoryFree(msg); uv_tcp_init(http->loop, &cli->tcp); @@ -406,9 +408,9 @@ END: httpDestroyMsg(msg); } -int32_t taosSendHttpReport(const char* server, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag) { +int32_t taosSendHttpReport(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag) { taosThreadOnce(&transHttpInit, transHttpEnvInit); - return taosSendHttpReportImpl(server, port, pCont, contLen, flag); + return taosSendHttpReportImpl(server, uri, port, pCont, contLen, flag); } static void transHttpEnvInit() { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index f01d3042f7..663451a89c 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -18,6 +18,8 @@ #include "os.h" #include "tconfig.h" #include "tutil.h" +#include "tjson.h" +#include "tglobal.h" #define LOG_MAX_LINE_SIZE (1024) #define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3) @@ -808,7 +810,7 @@ bool taosAssert(bool condition, const char *file, int32_t line, const char *form taosPrintLogImp(1, 255, buffer, len); taosPrintLog(flags, level, dflag, "tAssert at file %s:%d exit:%d", file, line, tsAssert); - taosPrintTrace(flags, level, dflag); + taosPrintTrace(flags, level, dflag, -1); if (tsAssert) { // taosCloseLog(); @@ -824,21 +826,100 @@ bool taosAssert(bool condition, const char *file, int32_t line, const char *form return true; } -void taosCrash(int signum, void *sigInfo, void *context) { - taosIgnSignal(SIGTERM); - taosIgnSignal(SIGHUP); - taosIgnSignal(SIGINT); - taosIgnSignal(SIGBREAK); +int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t startTime) { + SJson* pJson = tjsonCreateObject(); + if (pJson == NULL) return -1; + char tmp[4096] = {0}; + + tjsonAddDoubleToObject(pJson, "reportVersion", 1); + + tjsonAddIntegerToObject(pJson, "clusterId", clusterId); + tjsonAddIntegerToObject(pJson, "startTime", startTime); + + taosGetFqdn(tmp); + tjsonAddStringToObject(pJson, "fqdn", tmp); - taosIgnSignal(SIGBUS); - taosIgnSignal(SIGABRT); - taosIgnSignal(SIGFPE); - taosIgnSignal(SIGSEGV); + tjsonAddIntegerToObject(pJson, "pid", taosGetPId()); + + taosGetAppName(tmp, NULL); + tjsonAddStringToObject(pJson, "appName", tmp); + + if (taosGetOsReleaseName(tmp, sizeof(tmp)) == 0) { + tjsonAddStringToObject(pJson, "os", tmp); + } + + float numOfCores = 0; + if (taosGetCpuInfo(tmp, sizeof(tmp), &numOfCores) == 0) { + tjsonAddStringToObject(pJson, "cpuModel", tmp); + tjsonAddDoubleToObject(pJson, "numOfCpu", numOfCores); + } else { + tjsonAddDoubleToObject(pJson, "numOfCpu", tsNumOfCores); + } + + snprintf(tmp, sizeof(tmp), "%" PRId64 " kB", tsTotalMemoryKB); + tjsonAddStringToObject(pJson, "memory", tmp); + + tjsonAddStringToObject(pJson, "version", version); + tjsonAddStringToObject(pJson, "buildInfo", buildinfo); + tjsonAddStringToObject(pJson, "gitInfo", gitinfo); + + tjsonAddIntegerToObject(pJson, "crashSig", signum); + tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()); + + taosLogTraceToBuf(tmp, sizeof(tmp), 3); + tjsonAddStringToObject(pJson, "stackInfo", tmp); + char* pCont = tjsonToString(pJson); + tjsonDelete(pJson); + + *pMsg = pCont; + + return TSDB_CODE_SUCCESS; +} + + +void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, void *sigInfo) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; int32_t dflag = 255; - + char filepath[PATH_MAX] = {0}; + TdFilePtr pFile = NULL; + + if (pMsg && msgLen > 0) { + snprintf(filepath, sizeof(filepath), "%s%s.%sCrashLog", tsLogDir, TD_DIRSEP, nodeType); + + pFile = taosOpenFile(filepath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pFile == NULL) { + taosPrintLog(flags, level, dflag, "failed to open file:%s since %s", filepath, terrstr()); + goto _return; + } + + taosLockFile(pFile); + + int64_t writeSize = taosWriteFile(pFile, &msgLen, sizeof(msgLen)); + if (sizeof(msgLen) != writeSize) { + taosUnLockFile(pFile); + taosPrintLog(flags, level, dflag, "failed to write len to file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + filepath, pFile, writeSize, sizeof(msgLen), terrstr()); + goto _return; + } + + writeSize = taosWriteFile(pFile, pMsg, msgLen); + if (msgLen != writeSize) { + taosUnLockFile(pFile); + taosPrintLog(flags, level, dflag, "failed to write file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + filepath, pFile, writeSize, msgLen, terrstr()); + goto _return; + } + + taosUnLockFile(pFile); + } + +_return: + + if (pFile) taosCloseFile(&pFile); + + terrno = TAOS_SYSTEM_ERROR(errno); taosPrintLog(flags, level, dflag, "crash signal is %d", signum); #ifndef WINDOWS @@ -846,8 +927,102 @@ void taosCrash(int signum, void *sigInfo, void *context) { taosGetCmdlineByPID(((siginfo_t *)sigInfo)->si_pid)); #endif + taosPrintTrace(flags, level, dflag, 3); - taosPrintTrace(flags, level, dflag); - + taosMemoryFree(pMsg); +} + +void taosReadCrashInfo(char* filepath, char** pMsg, int64_t* pMsgLen, TdFilePtr* pFd) { + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + TdFilePtr pFile = NULL; + bool truncateFile = false; + char* buf = NULL; + + if (NULL == *pFd) { + int64_t filesize = 0; + if (taosStatFile(filepath, &filesize, NULL) < 0) { + if (ENOENT == errno) { + return; + } + + terrno = TAOS_SYSTEM_ERROR(errno); + taosPrintLog(flags, level, dflag, "failed to stat file:%s since %s", filepath, terrstr()); + return; + } + + if (filesize <= 0) { + return; + } + + pFile = taosOpenFile(filepath, TD_FILE_READ|TD_FILE_WRITE); + if (pFile == NULL) { + if (ENOENT == errno) { + return; + } + + terrno = TAOS_SYSTEM_ERROR(errno); + taosPrintLog(flags, level, dflag, "failed to open file:%s since %s", filepath, terrstr()); + return; + } + + taosLockFile(pFile); + } else { + pFile = *pFd; + } + + int64_t msgLen = 0; + int64_t readSize = taosReadFile(pFile, &msgLen, sizeof(msgLen)); + if (sizeof(msgLen) != readSize) { + truncateFile = true; + if (readSize < 0) { + taosPrintLog(flags, level, dflag, "failed to read len from file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + filepath, pFile, readSize, sizeof(msgLen), terrstr()); + } + goto _return; + } + + buf = taosMemoryMalloc(msgLen); + if (NULL == buf) { + taosPrintLog(flags, level, dflag, "failed to malloc buf, size:%" PRId64, msgLen); + goto _return; + } + + readSize = taosReadFile(pFile, buf, msgLen); + if (msgLen != readSize) { + truncateFile = true; + taosPrintLog(flags, level, dflag, "failed to read file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + filepath, pFile, readSize, msgLen, terrstr()); + goto _return; + } + + *pMsg = buf; + *pMsgLen = msgLen; + *pFd = pFile; + + return; + +_return: + + if (truncateFile) { + taosFtruncateFile(pFile, 0); + } + taosUnLockFile(pFile); + taosCloseFile(&pFile); + taosMemoryFree(buf); + + *pMsg = NULL; + *pMsgLen = 0; + *pFd = NULL; +} + +void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile) { + if (truncateFile) { + taosFtruncateFile(pFile, 0); + } + + taosUnLockFile(pFile); + taosCloseFile(&pFile); } From 586b73bef123fb671a62987f6214e7bd4288ba8b Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 30 Dec 2022 15:56:37 +0800 Subject: [PATCH 18/89] fix: windows compile issue --- source/client/src/clientEnv.c | 6 +++++- source/dnode/mgmt/exe/dmMain.c | 6 +++++- source/util/src/tlog.c | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index b1e74b8089..d88ca4ce19 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -393,8 +393,10 @@ void taosClientCrash(int signum, void *sigInfo, void *context) { taosIgnSignal(SIGHUP); taosIgnSignal(SIGINT); taosIgnSignal(SIGBREAK); - + +#if !defined(WINDOWS) taosIgnSignal(SIGBUS); +#endif taosIgnSignal(SIGABRT); taosIgnSignal(SIGFPE); taosIgnSignal(SIGSEGV); @@ -504,7 +506,9 @@ void tscStopCrashReport() { } static void tscSetSignalHandle() { +#if !defined(WINDOWS) taosSetSignal(SIGBUS, taosClientCrash); +#endif taosSetSignal(SIGABRT, taosClientCrash); taosSetSignal(SIGFPE, taosClientCrash); taosSetSignal(SIGSEGV, taosClientCrash); diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index 61ca3f6b6e..006610fb69 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -73,8 +73,10 @@ void dmLogCrash(int signum, void *sigInfo, void *context) { taosIgnSignal(SIGHUP); taosIgnSignal(SIGINT); taosIgnSignal(SIGBREAK); - + +#ifndef WINDOWS taosIgnSignal(SIGBUS); +#endif taosIgnSignal(SIGABRT); taosIgnSignal(SIGFPE); taosIgnSignal(SIGSEGV); @@ -111,7 +113,9 @@ static void dmSetSignalHandle() { taosSetSignal(SIGQUIT, dmStopDnode); #endif +#ifndef WINDOWS taosSetSignal(SIGBUS, dmLogCrash); +#endif taosSetSignal(SIGABRT, dmLogCrash); taosSetSignal(SIGFPE, dmLogCrash); taosSetSignal(SIGSEGV, dmLogCrash); diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 663451a89c..9374a39ef8 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -899,7 +899,7 @@ void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, vo int64_t writeSize = taosWriteFile(pFile, &msgLen, sizeof(msgLen)); if (sizeof(msgLen) != writeSize) { taosUnLockFile(pFile); - taosPrintLog(flags, level, dflag, "failed to write len to file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + taosPrintLog(flags, level, dflag, "failed to write len to file:%s,%p wlen:%" PRId64 " tlen:%lu since %s", filepath, pFile, writeSize, sizeof(msgLen), terrstr()); goto _return; } @@ -977,7 +977,7 @@ void taosReadCrashInfo(char* filepath, char** pMsg, int64_t* pMsgLen, TdFilePtr* if (sizeof(msgLen) != readSize) { truncateFile = true; if (readSize < 0) { - taosPrintLog(flags, level, dflag, "failed to read len from file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + taosPrintLog(flags, level, dflag, "failed to read len from file:%s,%p wlen:%" PRId64 " tlen:%lu since %s", filepath, pFile, readSize, sizeof(msgLen), terrstr()); } goto _return; From 4718f4d1b6a4e2ff587f1c5d0560a43e2bfa5edb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Dec 2022 17:24:44 +0800 Subject: [PATCH 19/89] fix: resend acked msgs on exceeding maximum retryWaitMs --- source/libs/sync/src/syncPipeline.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index a9b5aadaa5..e176735d56 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -31,6 +31,10 @@ static bool syncIsMsgBlock(tmsg_t type) { (type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM); } +FORCE_INLINE static int64_t syncGetRetryMaxWaitMs() { + return SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); +} + int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf) { taosThreadMutexLock(&pBuf->mutex); int64_t index = pBuf->endIndex; @@ -627,7 +631,7 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { break; } - if (pMgr->states[pos].acked) { + if (pMgr->states[pos].acked && nowMs < pMgr->states[pos].timeMs + syncGetRetryMaxWaitMs()) { continue; } @@ -791,7 +795,7 @@ int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index) { ASSERT(!pMgr->restored); ASSERT(pMgr->startIndex >= 0); - int64_t retryMaxWaitMs = SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); + int64_t retryMaxWaitMs = syncGetRetryMaxWaitMs(); int64_t nowMs = taosGetMonoTimestampMs(); if (pMgr->endIndex > pMgr->startIndex && @@ -834,9 +838,11 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int32_t count = 0; int64_t nowMs = taosGetMonoTimestampMs(); int64_t limit = pMgr->size >> 1; + SyncTerm term = -1; + SyncIndex firstIndex = -1; for (SyncIndex index = pMgr->endIndex; index <= pNode->pLogBuf->matchIndex; index++) { - if (batchSize < count++ || limit <= index - pMgr->startIndex) { + if (batchSize < count || limit <= index - pMgr->startIndex) { break; } if (pMgr->startIndex + 1 < index && pMgr->states[(index - 1) % pMgr->size].barrier) { @@ -845,7 +851,6 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int64_t pos = index % pMgr->size; SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; bool barrier = false; - SyncTerm term = -1; if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate log entry since %s. index: %" PRId64 ", dest: 0x%016" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); @@ -856,6 +861,9 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p pMgr->states[pos].term = term; pMgr->states[pos].acked = false; + if (firstIndex == -1) firstIndex = index; + count++; + pMgr->endIndex = index + 1; if (barrier) { sInfo("vgId:%d, replicated sync barrier to dest: %" PRIx64 ". index: %" PRId64 ", term: %" PRId64 @@ -869,10 +877,11 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p syncLogReplMgrRetryOnNeed(pMgr, pNode); SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, attempted to replicate %d msgs to the %d'th peer. pMgr(rs:%d): [%" PRId64 " %" PRId64 ", %" PRId64 - "), pBuf: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", - pNode->vgId, count, pMgr->peerId, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, - pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); + sTrace("vgId:%d, replicated %d msgs to peer: %" PRId64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 + ", mgr: (rs:%d) [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 + ")", + pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, + pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); return 0; } From eb524e610a84ab368854aa0ec757577c1cdeca68 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Dec 2022 19:02:46 +0800 Subject: [PATCH 20/89] enh: reset sync log replication on stagnation for eight times maxRetryWaitMs --- source/libs/sync/src/syncPipeline.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index e176735d56..0eff028cad 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -631,7 +631,12 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { break; } - if (pMgr->states[pos].acked && nowMs < pMgr->states[pos].timeMs + syncGetRetryMaxWaitMs()) { + if (pMgr->states[pos].acked) { + if (pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { + syncLogReplMgrReset(pMgr); + sWarn("vgId:%d, reset sync log repl mgr since stagnation. peer: %" PRIx64, pNode->vgId, pDestId->addr); + goto _out; + } continue; } From 03f4b12386b4a76c8a4860fb641dd3bc1b6147eb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Dec 2022 19:35:57 +0800 Subject: [PATCH 21/89] enh: print peer addr in hex format --- source/libs/sync/src/syncPipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 0eff028cad..7b8d5bed28 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -882,7 +882,7 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p syncLogReplMgrRetryOnNeed(pMgr, pNode); SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, replicated %d msgs to peer: %" PRId64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 + sTrace("vgId:%d, replicated %d msgs to peer: %" PRIx64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 ", mgr: (rs:%d) [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, From ac989908a2e1a8df5b945ad7e61f900656e0b8a2 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 31 Dec 2022 14:13:33 +0800 Subject: [PATCH 22/89] fix invalid vgroup id --- source/libs/catalog/src/ctgAsync.c | 38 ++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index b8590c9255..9ced1a99fd 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -483,7 +483,7 @@ int32_t ctgInitTask(SCtgJob* pJob, CTG_TASK_TYPE type, void* param, int32_t* tas _return: CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); - + return code; } @@ -905,6 +905,31 @@ int32_t ctgCallUserCb(void* param) { return TSDB_CODE_SUCCESS; } +void ctgUpdateJobErrCode(SCtgJob* pJob, int32_t errCode) { + if (TSDB_CODE_SUCCESS == errCode) return; + + int32_t origCode = atomic_load_32(&pJob->jobResCode); + if (TSDB_CODE_SUCCESS == origCode) { + if (origCode == atomic_val_compare_exchange_32(&pJob->jobResCode, origCode, errCode)) { + goto _return; + } + origCode = atomic_load_32(&pJob->jobResCode); + } + + if (NEED_CLIENT_HANDLE_ERROR(origCode)) { + return; + } + + if (NEED_CLIENT_HANDLE_ERROR(errCode)) { + atomic_store_32(&pJob->jobResCode, errCode); + goto _return; + } + return; + +_return: + qDebug("QID:0x%" PRIx64 " ctg job errCode updated to %s", pJob->queryId, tstrerror(errCode)); +} + int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { SCtgJob* pJob = pTask->pJob; int32_t code = 0; @@ -924,6 +949,8 @@ int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { if (taskDone < taosArrayGetSize(pJob->pTasks)) { qDebug("QID:0x%" PRIx64 " task done: %d, total: %d", pJob->queryId, taskDone, (int32_t)taosArrayGetSize(pJob->pTasks)); + + ctgUpdateJobErrCode(pJob, rspCode); return TSDB_CODE_SUCCESS; } @@ -931,7 +958,8 @@ int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { _return: - pJob->jobResCode = code; + ctgUpdateJobErrCode(pJob, rspCode); + // pJob->jobResCode = code; // taosSsleep(2); // qDebug("QID:0x%" PRIx64 " ctg after sleep", pJob->queryId); @@ -1098,7 +1126,8 @@ _return: } if (code) { - ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, tstrerror(code)); + ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, + tstrerror(code)); } if (pTask->res || code) { ctgHandleTaskEnd(pTask, code); @@ -1286,7 +1315,8 @@ _return: TSWAP(pTask->res, ctx->pResList); taskDone = true; } - ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, tstrerror(code)); + ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, + tstrerror(code)); } if (pTask->res && taskDone) { From 69fab69932660134f75790a05274995b79abbd4b Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 31 Dec 2022 15:24:35 +0800 Subject: [PATCH 23/89] fix invalid vgroup id --- source/libs/catalog/src/ctgAsync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 9ced1a99fd..5d38b75c08 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -906,7 +906,7 @@ int32_t ctgCallUserCb(void* param) { } void ctgUpdateJobErrCode(SCtgJob* pJob, int32_t errCode) { - if (TSDB_CODE_SUCCESS == errCode) return; + if (!NEED_CLIENT_HANDLE_ERROR(errCode) || errCode == TSDB_CODE_SUCCESS) return; int32_t origCode = atomic_load_32(&pJob->jobResCode); if (TSDB_CODE_SUCCESS == origCode) { From 17b5df0018bde1bb740dd36202f39f459f68b79a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 31 Dec 2022 16:27:50 +0800 Subject: [PATCH 24/89] fix invalid vgroup id --- source/libs/catalog/src/ctgAsync.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 5d38b75c08..2242bdb994 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -906,7 +906,7 @@ int32_t ctgCallUserCb(void* param) { } void ctgUpdateJobErrCode(SCtgJob* pJob, int32_t errCode) { - if (!NEED_CLIENT_HANDLE_ERROR(errCode) || errCode == TSDB_CODE_SUCCESS) return; + if (!NEED_CLIENT_REFRESH_VG_ERROR(errCode) || errCode == TSDB_CODE_SUCCESS) return; int32_t origCode = atomic_load_32(&pJob->jobResCode); if (TSDB_CODE_SUCCESS == origCode) { @@ -916,11 +916,11 @@ void ctgUpdateJobErrCode(SCtgJob* pJob, int32_t errCode) { origCode = atomic_load_32(&pJob->jobResCode); } - if (NEED_CLIENT_HANDLE_ERROR(origCode)) { + if (NEED_CLIENT_REFRESH_VG_ERROR(origCode)) { return; } - if (NEED_CLIENT_HANDLE_ERROR(errCode)) { + if (NEED_CLIENT_REFRESH_VG_ERROR(errCode)) { atomic_store_32(&pJob->jobResCode, errCode); goto _return; } From 3edde0eadfec0576cf0b38d1ec8706b95903656d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sun, 1 Jan 2023 21:05:57 +0800 Subject: [PATCH 25/89] fix: tdbAbort on metaClose or streamMetaClose instead of tdbTxnClose --- source/dnode/vnode/src/meta/metaOpen.c | 2 +- source/libs/stream/src/streamMeta.c | 2 +- source/libs/tdb/src/db/tdbPage.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 8974d93678..867b481bcc 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -203,7 +203,7 @@ _err: int metaClose(SMeta *pMeta) { if (pMeta) { - if (pMeta->txn) tdbTxnClose(pMeta->txn); + if (pMeta->pEnv) tdbAbort(pMeta->pEnv, pMeta->txn); if (pMeta->pCache) metaCacheClose(pMeta); if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 56da86654c..7c415053e1 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -69,7 +69,7 @@ _err: } void streamMetaClose(SStreamMeta* pMeta) { - tdbTxnClose(pMeta->txn); + tdbAbort(pMeta->db, pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index d35f05461d..50dc8e0a65 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -77,7 +77,7 @@ int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) u8 *ptr; tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree); - // ASSERT(!pPage->isDirty); + ASSERT(!pPage->isDirty); ASSERT(xFree); for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { From cb433c191bc6a7b6f617885b3e492ac5f297385e Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 2 Jan 2023 08:58:13 +0800 Subject: [PATCH 26/89] enh: reset as stagnation only when not matched in syncLogReplMgrRetryOnNeed --- source/libs/sync/src/syncPipeline.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 7b8d5bed28..04a9f9728f 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -632,9 +632,10 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { } if (pMgr->states[pos].acked) { - if (pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { + if (pMgr->matchIndex < index && pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { syncLogReplMgrReset(pMgr); - sWarn("vgId:%d, reset sync log repl mgr since stagnation. peer: %" PRIx64, pNode->vgId, pDestId->addr); + sWarn("vgId:%d, reset sync log repl mgr since stagnation. index: %" PRId64 ", peer: %" PRIx64, pNode->vgId, + index, pDestId->addr); goto _out; } continue; From 2baa71883d3f84c5de15d2c4a634b39f0468ed61 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 2 Jan 2023 10:06:20 +0800 Subject: [PATCH 27/89] enh: logging states of sync log repl mgrs and the ring buffer in syncPrintNodeLog --- source/libs/sync/src/syncUtil.c | 84 +++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 525681e53e..9db04ce698 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -17,6 +17,7 @@ #include "syncUtil.h" #include "syncIndexMgr.h" #include "syncMessage.h" +#include "syncPipeline.h" #include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncSnapshot.h" @@ -163,45 +164,67 @@ bool syncUtilUserPreCommit(tmsg_t msgType) { return msgType != TDMT_SYNC_NOOP && bool syncUtilUserRollback(tmsg_t msgType) { return msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_LEADER_TRANSFER; } void syncCfg2SimpleStr(const SSyncCfg* pCfg, char* buf, int32_t bufLen) { - int32_t len = snprintf(buf, bufLen, "{r-num:%d, my:%d, ", pCfg->replicaNum, pCfg->myIndex); - + int32_t len = snprintf(buf, bufLen, "{num:%d, idx:%d, [", pCfg->replicaNum, pCfg->myIndex); for (int32_t i = 0; i < pCfg->replicaNum; ++i) { + len += snprintf(buf + len, bufLen - len, "%s:%d", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); if (i < pCfg->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%s:%d, ", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); - } else { - len += snprintf(buf + len, bufLen - len, "%s:%d}", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); + len += snprintf(buf + len, bufLen - len, "%s", ", "); } } + len += snprintf(buf + len, bufLen - len, "%s", "]}"); } // for leader static void syncHearbeatReplyTime2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { - int32_t len = 5; - + int32_t len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { int64_t tsMs = syncIndexMgrGetRecvTime(pSyncNode->pMatchIndex, &(pSyncNode->replicasId[i])); - + len += snprintf(buf + len, bufLen - len, "%d:%" PRId64, i, tsMs); if (i < pSyncNode->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 ",", i, tsMs); - } else { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 "}", i, tsMs); + len += snprintf(buf + len, bufLen - len, "%s", ","); } } + len += snprintf(buf + len, bufLen - len, "%s", "}"); } // for follower static void syncHearbeatTime2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { - int32_t len = 4; - + int32_t len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { int64_t tsMs = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->replicasId[i])); - + len += snprintf(buf + len, bufLen - len, "%d:%" PRId64, i, tsMs); if (i < pSyncNode->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 ",", i, tsMs); - } else { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 "}", i, tsMs); + len += snprintf(buf + len, bufLen - len, "%s", ","); } } + len += snprintf(buf + len, bufLen - len, "%s", "}"); +} + +static void syncLogBufferStates2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { + SSyncLogBuffer* pBuf = pSyncNode->pLogBuf; + if (pBuf == NULL) { + return; + } + int len = 0; + len += snprintf(buf + len, bufLen - len, "[%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pBuf->startIndex, + pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); +} + +static void syncLogReplMgrStates2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { + int len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); + for (int32_t i = 0; i < pSyncNode->replicaNum; i++) { + SSyncLogReplMgr* pMgr = pSyncNode->logReplMgrs[i]; + if (pMgr == NULL) break; + len += snprintf(buf + len, bufLen - len, "%d:%d [%" PRId64 " %" PRId64 ", %" PRId64 ")", i, pMgr->restored, + pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex); + if (i + 1 < pSyncNode->replicaNum) { + len += snprintf(buf + len, bufLen - len, "%s", ", "); + } + } + len += snprintf(buf + len, bufLen - len, "%s", "}"); } static void syncPeerState2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { @@ -243,20 +266,23 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo int32_t cacheHit = pNode->pLogStore->cacheHit; int32_t cacheMiss = pNode->pLogStore->cacheMiss; - char cfgStr[1024]; + char cfgStr[1024] = ""; if (pNode->pRaftCfg != NULL) { syncCfg2SimpleStr(&(pNode->pRaftCfg->cfg), cfgStr, sizeof(cfgStr)); } else { return; } - char peerStr[1024] = "{"; - syncPeerState2Str(pNode, peerStr, sizeof(peerStr)); + char replMgrStatesStr[1024] = ""; + syncLogReplMgrStates2Str(pNode, replMgrStatesStr, sizeof(replMgrStatesStr)); - char hbrTimeStr[256] = "hbr:{"; + char bufferStatesStr[256] = ""; + syncLogBufferStates2Str(pNode, bufferStatesStr, sizeof(bufferStatesStr)); + + char hbrTimeStr[256] = ""; syncHearbeatReplyTime2Str(pNode, hbrTimeStr, sizeof(hbrTimeStr)); - char hbTimeStr[256] = "hb:{"; + char hbTimeStr[256] = ""; syncHearbeatTime2Str(pNode, hbTimeStr, sizeof(hbTimeStr)); int32_t quorum = syncNodeDynamicQuorum(pNode); @@ -279,16 +305,16 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo taosPrintLog(flags, level, dflag, "vgId:%d, %s, sync:%s, term:%" PRIu64 ", commit-index:%" PRId64 ", first-ver:%" PRId64 ", last-ver:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64 ", snap-term:%" PRIu64 - ", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hit:%d, mis:%d, hb-slow:%d, hbr-slow:%d, " + ", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hb-slow:%d, hbr-slow:%d, " "aq-items:%d, snaping:%" PRId64 ", replicas:%d, last-cfg:%" PRId64 - ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s", + ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 + ", buffer:%s, log-repl-mgrs:%s, members:%s, hb:%s, hb-reply:%s", pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum, - pNode->becomeLeaderNum, pNode->configChangeNum, cacheHit, cacheMiss, pNode->hbSlowNum, - pNode->hbrSlowNum, aqItems, pNode->snapshottingIndex, pNode->replicaNum, - pNode->pRaftCfg->lastConfigIndex, pNode->changing, pNode->restoreFinish, quorum, - pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr, - hbrTimeStr); + pNode->becomeLeaderNum, pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum, aqItems, + pNode->snapshottingIndex, pNode->replicaNum, pNode->pRaftCfg->lastConfigIndex, pNode->changing, + pNode->restoreFinish, quorum, pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, + bufferStatesStr, replMgrStatesStr, cfgStr, hbTimeStr, hbrTimeStr); } } From 38d1e8688e8c9b26b335121ed9726fc3fe700cc7 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 3 Jan 2023 09:27:41 +0800 Subject: [PATCH 28/89] fix: remove debug code --- source/dnode/vnode/src/vnd/vnodeSvr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 84d033249c..8d53579483 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -884,9 +884,6 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq bool tbCreated = false; terrno = TSDB_CODE_SUCCESS; - int32_t tta = 0; - int32_t ttt = 1/tta; - pRsp->code = 0; pSubmitReq->version = version; statis.nBatchInsert = 1; From 2fbede75a354466f6c540d15b62793e9c7504299 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 3 Jan 2023 17:46:17 +0800 Subject: [PATCH 29/89] fix mem leak --- source/libs/executor/src/tsort.c | 14 +++++++++----- source/libs/transport/inc/transComm.h | 9 +-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index fa0cdb3943..4ca15aa600 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -108,7 +108,7 @@ static int32_t sortComparCleanup(SMsortComparParam* cmpParam) { return TSDB_CODE_SUCCESS; } -void tsortClearOrderdSource(SArray *pOrderedSource) { +void tsortClearOrderdSource(SArray* pOrderedSource) { for (size_t i = 0; i < taosArrayGetSize(pOrderedSource); i++) { SSortSource** pSource = taosArrayGet(pOrderedSource, i); if (NULL == *pSource) { @@ -121,6 +121,10 @@ void tsortClearOrderdSource(SArray *pOrderedSource) { if ((*pSource)->param && !(*pSource)->onlyRef) { taosMemoryFree((*pSource)->param); } + if (pSource->src.pBlock) { + blockDataDestroy(pSource->src.pBlock); + pSource->src.pBlock = NULL; + } taosMemoryFreeClear(*pSource); } @@ -620,9 +624,9 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (pHandle->type == SORT_SINGLESOURCE_SORT) { SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); - SSortSource* source = *pSource; + SSortSource* source = *pSource; *pSource = NULL; - + tsortClearOrderdSource(pHandle->pOrderedSource); while (1) { @@ -840,8 +844,8 @@ SSortExecInfo tsortGetSortExecInfo(SSortHandle* pHandle) { SSortExecInfo info = {0}; if (pHandle == NULL) { - info.sortMethod = SORT_QSORT_T; // by default - info.sortBuffer = 2 * 1048576; // 2mb by default + info.sortMethod = SORT_QSORT_T; // by default + info.sortBuffer = 2 * 1048576; // 2mb by default } else { info.sortBuffer = pHandle->pageSize * pHandle->numOfPages; info.sortMethod = pHandle->inMemSort ? SORT_QSORT_T : SORT_SPILLED_MERGE_SORT_T; diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index bf9a6c0051..5f964f6b1a 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -100,14 +100,7 @@ typedef void* queue[2]; #define TRANS_READ_TIMEOUT 3000 // read timeout (ms) #define TRANS_PACKET_LIMIT 1024 * 1024 * 512 -#define TRANS_MAGIC_NUM 0x5f375a86 - -#define TRANS_NOVALID_PACKET(src) ((src) != TRANS_MAGIC_NUM ? 1 : 0) - -#define TRANS_PACKET_LIMIT 1024 * 1024 * 512 - -#define TRANS_MAGIC_NUM 0x5f375a86 - +#define TRANS_MAGIC_NUM 0x5f375a86 #define TRANS_NOVALID_PACKET(src) ((src) != TRANS_MAGIC_NUM ? 1 : 0) typedef SRpcMsg STransMsg; From 68d84a1c37be342e5dbc2a5d24f21a864c2b631d Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 3 Jan 2023 17:49:07 +0800 Subject: [PATCH 30/89] fix mem leak --- source/libs/executor/src/tsort.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 4ca15aa600..b3f342c694 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -121,9 +121,9 @@ void tsortClearOrderdSource(SArray* pOrderedSource) { if ((*pSource)->param && !(*pSource)->onlyRef) { taosMemoryFree((*pSource)->param); } - if (pSource->src.pBlock) { - blockDataDestroy(pSource->src.pBlock); - pSource->src.pBlock = NULL; + if ((*pSource)->src.pBlock) { + blockDataDestroy((*pSource)->src.pBlock); + (*pSource)->src.pBlock = NULL; } taosMemoryFreeClear(*pSource); } From b5dbc8439fd865e082665481839c56949eb40818 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 3 Jan 2023 20:35:26 +0800 Subject: [PATCH 31/89] fix mem leak --- source/libs/executor/src/tsort.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index b3f342c694..261ab2b2e8 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -121,10 +121,12 @@ void tsortClearOrderdSource(SArray* pOrderedSource) { if ((*pSource)->param && !(*pSource)->onlyRef) { taosMemoryFree((*pSource)->param); } - if ((*pSource)->src.pBlock) { + + if (!(*pSource)->onlyRef && (*pSource)->src.pBlock) { blockDataDestroy((*pSource)->src.pBlock); (*pSource)->src.pBlock = NULL; } + taosMemoryFreeClear(*pSource); } From d383dcd59df89f80106695aa3bbafbb1dd91f5be Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 3 Jan 2023 21:57:00 +0800 Subject: [PATCH 32/89] fix mem leak --- source/libs/executor/src/tsort.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 261ab2b2e8..c7fccc0104 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -656,6 +656,10 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (source->param && !source->onlyRef) { taosMemoryFree(source->param); } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } taosMemoryFree(source); return code; } @@ -669,6 +673,10 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (source->param && !source->onlyRef) { taosMemoryFree(source->param); } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } taosMemoryFree(source); return code; } From 5b7b8a97b428777fdc7f83dc4761c212c17e5607 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 4 Jan 2023 10:48:38 +0800 Subject: [PATCH 33/89] fix invalid vgroup id --- source/libs/catalog/src/ctgAsync.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 2242bdb994..438128203e 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -908,26 +908,9 @@ int32_t ctgCallUserCb(void* param) { void ctgUpdateJobErrCode(SCtgJob* pJob, int32_t errCode) { if (!NEED_CLIENT_REFRESH_VG_ERROR(errCode) || errCode == TSDB_CODE_SUCCESS) return; - int32_t origCode = atomic_load_32(&pJob->jobResCode); - if (TSDB_CODE_SUCCESS == origCode) { - if (origCode == atomic_val_compare_exchange_32(&pJob->jobResCode, origCode, errCode)) { - goto _return; - } - origCode = atomic_load_32(&pJob->jobResCode); - } - - if (NEED_CLIENT_REFRESH_VG_ERROR(origCode)) { - return; - } - - if (NEED_CLIENT_REFRESH_VG_ERROR(errCode)) { - atomic_store_32(&pJob->jobResCode, errCode); - goto _return; - } - return; - -_return: + atomic_store_32(&pJob->jobResCode, errCode); qDebug("QID:0x%" PRIx64 " ctg job errCode updated to %s", pJob->queryId, tstrerror(errCode)); + return; } int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { From bf6dc99461c74b30ae1ab3154e88cbc2e1480d9d Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 4 Jan 2023 11:48:30 +0800 Subject: [PATCH 34/89] fix: adjust sync logs --- source/dnode/mnode/impl/src/mndMnode.c | 4 ++-- source/dnode/mnode/sdb/src/sdbFile.c | 2 +- source/libs/sync/src/syncEnv.c | 2 +- source/libs/sync/src/syncMain.c | 13 +++++++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index c8c8e06c5e..9b3934c40c 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -785,9 +785,9 @@ static void mndReloadSyncConfig(SMnode *pMnode) { int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg); if (code != 0) { - mError("vgId:1, failed to reconfig mnode sync since %s", terrstr()); + mError("vgId:1, mnode sync reconfig failed since %s", terrstr()); } else { - mInfo("vgId:1, reconfig mnode sync success"); + mInfo("vgId:1, mnode sync reconfig success"); } } } diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index f43b6bdb25..b797e07e13 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -243,7 +243,7 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { if (pFile == NULL) { taosMemoryFree(pRaw); terrno = TAOS_SYSTEM_ERROR(errno); - mDebug("failed to read sdb file:%s since %s", file, terrstr()); + mInfo("read sdb file:%s finished since %s", file, terrstr()); return 0; } diff --git a/source/libs/sync/src/syncEnv.c b/source/libs/sync/src/syncEnv.c index 0d6d0f93f1..1fa67cfa4d 100644 --- a/source/libs/sync/src/syncEnv.c +++ b/source/libs/sync/src/syncEnv.c @@ -114,7 +114,7 @@ void syncHbTimerDataRemove(int64_t rid) { taosRemoveRef(gHbDataRefId, rid); } SSyncHbTimerData *syncHbTimerDataAcquire(int64_t rid) { SSyncHbTimerData *pData = taosAcquireRef(gHbDataRefId, rid); - if (pData == NULL) { + if (pData == NULL && rid > 0) { sInfo("failed to acquire hb-timer-data from refId:%" PRId64, rid); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 017806015b..6dcc7d3742 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1456,16 +1456,21 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pNode, SRpcMsg } } + int32_t code = -1; if (pNode->syncSendMSg != NULL && epSet != NULL) { syncUtilMsgHtoN(pMsg->pCont); pMsg->info.noResp = 1; - return pNode->syncSendMSg(epSet, pMsg); - } else { - sError("vgId:%d, sync send msg by id error, fp:%p epset:%p", pNode->vgId, pNode->syncSendMSg, epSet); + code = pNode->syncSendMSg(epSet, pMsg); + } + + if (code < 0) { + sError("vgId:%d, sync send msg by id error, epset:%p dnode:%d addr:%" PRId64 " err:0x%x", pNode->vgId, epSet, + DID(destRaftId), destRaftId->addr, terrno); rpcFreeCont(pMsg->pCont); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; } + + return code; } inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) { From 47125b5d9d9c2b3105e1d7e5e3f6ffbf8b3682da Mon Sep 17 00:00:00 2001 From: dapan <89396746@qq.com> Date: Wed, 4 Jan 2023 15:11:01 +0800 Subject: [PATCH 35/89] fix: windows crash report issues --- include/os/osSystem.h | 12 ++++++------ source/client/src/clientEnv.c | 18 +++++++++++++++++- source/util/src/tlog.c | 10 ++++++++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/include/os/osSystem.h b/include/os/osSystem.h index a72caa823e..ccff09089b 100644 --- a/include/os/osSystem.h +++ b/include/os/osSystem.h @@ -102,10 +102,10 @@ void taosResetTerminalMode(); symbol->SizeOfStruct = sizeof(SYMBOL_INFO); \ \ if (frames > 0) { \ - offset = snprintf(buf, bufSize - 1, "obtained %d stack frames", frames); \ - for (i = 0; i < frames; i++) { \ + offset = snprintf(buf, bufSize - 1, "obtained %d stack frames", (ignoreNum > 0) ? frames - ignoreNum : frames); \ + for (i = (ignoreNum > 0) ? ignoreNum : 0; i < frames; i++) { \ SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); \ - offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%i, %s - 0x%0X", frames - i - 1, symbol->Name, symbol->Address); \ + offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%i, %s - 0x%0X", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ } \ } \ free(symbol); \ @@ -131,10 +131,10 @@ void taosResetTerminalMode(); symbol->SizeOfStruct = sizeof(SYMBOL_INFO); \ \ if (frames > 0) { \ - taosPrintLog(flags, level, dflag, "obtained %d stack frames", frames); \ - for (i = 0; i < frames; i++) { \ + taosPrintLog(flags, level, dflag, "obtained %d stack frames", (ignoreNum > 0) ? frames - ignoreNum : frames); \ + for (i = (ignoreNum > 0) ? ignoreNum : 0; i < frames; i++) { \ SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); \ - taosPrintLog(flags, level, dflag, "frame:%i, %s - 0x%0X", frames - i - 1, symbol->Name, symbol->Address); \ + taosPrintLog(flags, level, dflag, "frame:%i, %s - 0x%0X", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ } \ } \ free(symbol); \ diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index d88ca4ce19..e44053c26c 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -419,8 +419,14 @@ void taosClientCrash(int signum, void *sigInfo, void *context) { _return: taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); + +#if defined(WINDOWS) + exit(signum); +#endif } +void crashReportThreadFuncUnexpectedStopped(void) { atomic_store_32(&clientStop, -1); } + static void *tscCrashReportThreadFp(void *param) { setThreadName("client-crashReport"); char filepath[PATH_MAX] = {0}; @@ -432,6 +438,12 @@ static void *tscCrashReportThreadFp(void *param) { int32_t sleepTime = 200; int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t loopTimes = reportPeriodNum; + +#ifdef WINDOWS + if (taosCheckCurrentInDll()) { + atexit(crashReportThreadFuncUnexpectedStopped); + } +#endif while (1) { if (clientStop) break; @@ -499,7 +511,11 @@ void tscStopCrashReport() { return; } - clientStop = 1; + if (atomic_val_compare_exchange_32(&clientStop, 0, 1)) { + tscDebug("hb thread already stopped"); + return; + } + while (atomic_load_32(&clientStop) > 0) { taosMsleep(100); } diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 6c79f9ae95..771fa08fcf 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -866,7 +866,12 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t tjsonAddIntegerToObject(pJson, "crashSig", signum); tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()); +#if !defined(WINDOWS) taosLogTraceToBuf(tmp, sizeof(tmp), 3); +#else + taosLogTraceToBuf(tmp, sizeof(tmp), 8); +#endif + tjsonAddStringToObject(pJson, "stackInfo", tmp); char* pCont = tjsonToString(pJson); @@ -925,9 +930,10 @@ _return: #ifndef WINDOWS taosPrintLog(flags, level, dflag, "sender PID:%d cmdline:%s", ((siginfo_t *)sigInfo)->si_pid, taosGetCmdlineByPID(((siginfo_t *)sigInfo)->si_pid)); -#endif - taosPrintTrace(flags, level, dflag, 3); +#else + taosPrintTrace(flags, level, dflag, 8); +#endif taosMemoryFree(pMsg); } From be23890976543bbc03f80f9e5d33b8aa53314d42 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 4 Jan 2023 16:07:30 +0800 Subject: [PATCH 36/89] fix: ut test issue --- source/os/test/osTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/os/test/osTests.cpp b/source/os/test/osTests.cpp index f831f457f9..2e24bb0526 100644 --- a/source/os/test/osTests.cpp +++ b/source/os/test/osTests.cpp @@ -33,7 +33,7 @@ TEST(osTest, osSystem) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; int32_t dflag = 255; // tsLogEmbedded ? 255 : uDebugFlag - taosPrintTrace(flags, level, dflag); + taosPrintTrace(flags, level, dflag, 0); } void fileOperateOnFree(void *param) { From 3bccd891bb61cae854599bba1ba72e211c3f714e Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 4 Jan 2023 16:51:46 +0800 Subject: [PATCH 37/89] fix: add debug code --- source/client/src/clientImpl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index f36036fd0a..bed4d9c20c 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1254,6 +1254,8 @@ STscObj* taosConnectImpl(const char* user, const char* auth, const char* db, __t int64_t transporterId = 0; asyncSendMsgToServer(pTscObj->pAppInfo->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &transporterId, body); + *(int32_t*)0 = 1; + tsem_wait(&pRequest->body.rspSem); if (pRequest->code != TSDB_CODE_SUCCESS) { const char* errorMsg = From 86382731dfe8fc5d1eb5f29b55500d4541dccc6c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 4 Jan 2023 18:58:18 +0800 Subject: [PATCH 38/89] fix: mac crash stack ignore number --- source/util/src/tlog.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 771fa08fcf..eac35cf1f4 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -866,7 +866,9 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t tjsonAddIntegerToObject(pJson, "crashSig", signum); tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()); -#if !defined(WINDOWS) +#ifdef _TD_DARWIN_64 + taosLogTraceToBuf(tmp, sizeof(tmp), 4); +#elseif !defined(WINDOWS) taosLogTraceToBuf(tmp, sizeof(tmp), 3); #else taosLogTraceToBuf(tmp, sizeof(tmp), 8); @@ -926,8 +928,10 @@ _return: terrno = TAOS_SYSTEM_ERROR(errno); taosPrintLog(flags, level, dflag, "crash signal is %d", signum); - -#ifndef WINDOWS + +#ifdef _TD_DARWIN_64 + taosPrintTrace(flags, level, dflag, 4); +#elseif !defined(WINDOWS) taosPrintLog(flags, level, dflag, "sender PID:%d cmdline:%s", ((siginfo_t *)sigInfo)->si_pid, taosGetCmdlineByPID(((siginfo_t *)sigInfo)->si_pid)); taosPrintTrace(flags, level, dflag, 3); From a551641b6c2f31cf09074a505440de0625e29f2c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 4 Jan 2023 19:12:16 +0800 Subject: [PATCH 39/89] fix: add mac crash report processing --- source/client/src/clientEnv.c | 2 -- source/client/src/clientImpl.c | 2 -- source/dnode/mgmt/exe/dmMain.c | 2 ++ source/util/src/tlog.c | 4 ++-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index e44053c26c..2ecade58f9 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -420,9 +420,7 @@ _return: taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); -#if defined(WINDOWS) exit(signum); -#endif } void crashReportThreadFuncUnexpectedStopped(void) { atomic_store_32(&clientStop, -1); } diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index bed4d9c20c..53acafeeaa 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1253,8 +1253,6 @@ STscObj* taosConnectImpl(const char* user, const char* auth, const char* db, __t int64_t transporterId = 0; asyncSendMsgToServer(pTscObj->pAppInfo->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &transporterId, body); - - *(int32_t*)0 = 1; tsem_wait(&pRequest->body.rspSem); if (pRequest->code != TSDB_CODE_SUCCESS) { diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index 006610fb69..711280ea58 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -99,6 +99,8 @@ void dmLogCrash(int signum, void *sigInfo, void *context) { _return: taosLogCrashInfo("taosd", pMsg, msgLen, signum, sigInfo); + + exit(signum); } static void dmSetSignalHandle() { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index eac35cf1f4..d9cbde5714 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -868,7 +868,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t #ifdef _TD_DARWIN_64 taosLogTraceToBuf(tmp, sizeof(tmp), 4); -#elseif !defined(WINDOWS) +#elif !defined(WINDOWS) taosLogTraceToBuf(tmp, sizeof(tmp), 3); #else taosLogTraceToBuf(tmp, sizeof(tmp), 8); @@ -931,7 +931,7 @@ _return: #ifdef _TD_DARWIN_64 taosPrintTrace(flags, level, dflag, 4); -#elseif !defined(WINDOWS) +#elif !defined(WINDOWS) taosPrintLog(flags, level, dflag, "sender PID:%d cmdline:%s", ((siginfo_t *)sigInfo)->si_pid, taosGetCmdlineByPID(((siginfo_t *)sigInfo)->si_pid)); taosPrintTrace(flags, level, dflag, 3); From 47ace000903ac18e1c9ef87735a3846b6a9e56b6 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 5 Jan 2023 15:40:43 +0800 Subject: [PATCH 40/89] enh: refactor some sync func names for pipelining --- source/libs/sync/inc/syncMessage.h | 4 +-- source/libs/sync/inc/syncPipeline.h | 12 ++++---- source/libs/sync/src/syncAppendEntries.c | 4 +-- source/libs/sync/src/syncMessage.c | 4 +-- source/libs/sync/src/syncPipeline.c | 35 +++++++++++------------- 5 files changed, 28 insertions(+), 31 deletions(-) diff --git a/source/libs/sync/inc/syncMessage.h b/source/libs/sync/inc/syncMessage.h index 3bd94dbab5..bd89f6af3a 100644 --- a/source/libs/sync/inc/syncMessage.h +++ b/source/libs/sync/inc/syncMessage.h @@ -258,8 +258,8 @@ int32_t syncBuildRequestVote(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildRequestVoteReply(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildAppendEntries(SRpcMsg* pMsg, int32_t dataLen, int32_t vgId); int32_t syncBuildAppendEntriesReply(SRpcMsg* pMsg, int32_t vgId); -int32_t syncBuildAppendEntriesFromRaftLog(SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevLogTerm, - SRpcMsg* pRpcMsg); +int32_t syncBuildAppendEntriesFromRaftEntry(SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevLogTerm, + SRpcMsg* pRpcMsg); int32_t syncBuildHeartbeat(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildHeartbeatReply(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildPreSnapshot(SRpcMsg* pMsg, int32_t vgId); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index a0a0691694..fb5541f916 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -78,14 +78,14 @@ static FORCE_INLINE int32_t syncLogGetNextRetryBackoff(SSyncLogReplMgr* pMgr) { SyncTerm syncLogReplMgrGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index); int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode); -int32_t syncLogBufferReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index, SyncTerm* pTerm, - SRaftId* pDestId, bool* pBarrier); -int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode); -int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index); +int32_t syncLogReplMgrReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index, SyncTerm* pTerm, + SRaftId* pDestId, bool* pBarrier); +int32_t syncLogReplMgrReplicateAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode); +int32_t syncLogReplMgrReplicateProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index); int32_t syncLogReplMgrProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); -int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); -int32_t syncLogReplMgrProcessReplyInNormalMode(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); +int32_t syncLogReplMgrProcessReplyAsRecovery(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); +int32_t syncLogReplMgrProcessReplyAsNormal(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); int32_t syncLogReplMgrProcessHeartbeatReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncHeartbeatReply* pMsg); int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1dc6905b88..d9b13610e3 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -127,7 +127,7 @@ int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { return 0; } -SSyncRaftEntry* syncLogAppendEntriesToRaftEntry(const SyncAppendEntries* pMsg) { +SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) { SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen); if (pEntry == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -181,7 +181,7 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { goto _IGNORE; } - SSyncRaftEntry* pEntry = syncLogAppendEntriesToRaftEntry(pMsg); + SSyncRaftEntry* pEntry = syncBuildRaftEntryFromAppendEntries(pMsg); if (pEntry == NULL) { sError("vgId:%d, failed to get raft entry from append entries since %s", ths->vgId, terrstr()); diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 467b4e2219..af2555153b 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -154,8 +154,8 @@ int32_t syncBuildAppendEntriesReply(SRpcMsg* pMsg, int32_t vgId) { return 0; } -int32_t syncBuildAppendEntriesFromRaftLog(SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevLogTerm, - SRpcMsg* pRpcMsg) { +int32_t syncBuildAppendEntriesFromRaftEntry(SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevLogTerm, + SRpcMsg* pRpcMsg) { uint32_t dataLen = pEntry->bytes; uint32_t bytes = sizeof(SyncAppendEntries) + dataLen; pRpcMsg->contLen = bytes; diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 410986b87a..4a5dc46c76 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -617,7 +617,7 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { } bool barrier = false; - if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { + if (syncLogReplMgrReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate sync log entry since %s. index: %" PRId64 ", dest: %" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); goto _out; @@ -647,8 +647,7 @@ _out: return ret; } -int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNode* pNode, - SyncAppendEntriesReply* pMsg) { +int32_t syncLogReplMgrProcessReplyAsRecovery(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { SSyncLogBuffer* pBuf = pNode->pLogBuf; SRaftId destId = pMsg->srcId; ASSERT(pMgr->restored == false); @@ -723,7 +722,7 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod // attempt to replicate the raft log at index (void)syncLogReplMgrReset(pMgr); - return syncLogReplMgrReplicateProbeOnce(pMgr, pNode, index); + return syncLogReplMgrReplicateProbe(pMgr, pNode, index); } int32_t syncLogReplMgrProcessHeartbeatReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncHeartbeatReply* pMsg) { @@ -751,9 +750,9 @@ int32_t syncLogReplMgrProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, Sync } if (pMgr->restored) { - (void)syncLogReplMgrProcessReplyInNormalMode(pMgr, pNode, pMsg); + (void)syncLogReplMgrProcessReplyAsNormal(pMgr, pNode, pMsg); } else { - (void)syncLogReplMgrProcessReplyInRecoveryMode(pMgr, pNode, pMsg); + (void)syncLogReplMgrProcessReplyAsRecovery(pMgr, pNode, pMsg); } taosThreadMutexUnlock(&pBuf->mutex); return 0; @@ -761,14 +760,14 @@ int32_t syncLogReplMgrProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, Sync int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { if (pMgr->restored) { - (void)syncLogReplMgrReplicateAttemptedOnce(pMgr, pNode); + (void)syncLogReplMgrReplicateAttempt(pMgr, pNode); } else { - (void)syncLogReplMgrReplicateProbeOnce(pMgr, pNode, pNode->pLogBuf->matchIndex); + (void)syncLogReplMgrReplicateProbe(pMgr, pNode, pNode->pLogBuf->matchIndex); } return 0; } -int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index) { +int32_t syncLogReplMgrReplicateProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index) { ASSERT(!pMgr->restored); ASSERT(pMgr->startIndex >= 0); int64_t retryMaxWaitMs = SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); @@ -783,7 +782,7 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; bool barrier = false; SyncTerm term = -1; - if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { + if (syncLogReplMgrReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate log entry since %s. index: %" PRId64 ", dest: 0x%016" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); return -1; @@ -807,7 +806,7 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode return 0; } -int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { +int32_t syncLogReplMgrReplicateAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { ASSERT(pMgr->restored); SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; @@ -827,7 +826,7 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; bool barrier = false; SyncTerm term = -1; - if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { + if (syncLogReplMgrReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate log entry since %s. index: %" PRId64 ", dest: 0x%016" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); return -1; @@ -857,7 +856,7 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p return 0; } -int32_t syncLogReplMgrProcessReplyInNormalMode(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { +int32_t syncLogReplMgrProcessReplyAsNormal(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { ASSERT(pMgr->restored == true); if (pMgr->startIndex <= pMsg->lastSendIndex && pMsg->lastSendIndex < pMgr->endIndex) { if (pMgr->startIndex < pMgr->matchIndex && pMgr->retryBackoff > 0) { @@ -876,7 +875,7 @@ int32_t syncLogReplMgrProcessReplyInNormalMode(SSyncLogReplMgr* pMgr, SSyncNode* pMgr->startIndex = pMgr->matchIndex; } - return syncLogReplMgrReplicateAttemptedOnce(pMgr, pNode); + return syncLogReplMgrReplicateAttempt(pMgr, pNode); } SSyncLogReplMgr* syncLogReplMgrCreate() { @@ -1066,12 +1065,11 @@ SSyncRaftEntry* syncLogBufferGetOneEntry(SSyncLogBuffer* pBuf, SSyncNode* pNode, return pEntry; } -int32_t syncLogBufferReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index, SyncTerm* pTerm, - SRaftId* pDestId, bool* pBarrier) { +int32_t syncLogReplMgrReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index, SyncTerm* pTerm, + SRaftId* pDestId, bool* pBarrier) { SSyncRaftEntry* pEntry = NULL; SRpcMsg msgOut = {0}; bool inBuf = false; - int32_t ret = -1; SyncTerm prevLogTerm = -1; SSyncLogBuffer* pBuf = pNode->pLogBuf; @@ -1097,14 +1095,13 @@ int32_t syncLogBufferReplicateOneTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, Syn } if (pTerm) *pTerm = pEntry->term; - int32_t code = syncBuildAppendEntriesFromRaftLog(pNode, pEntry, prevLogTerm, &msgOut); + int32_t code = syncBuildAppendEntriesFromRaftEntry(pNode, pEntry, prevLogTerm, &msgOut); if (code < 0) { sError("vgId:%d, failed to get append entries for index:%" PRId64 "", pNode->vgId, index); goto _err; } (void)syncNodeSendAppendEntries(pNode, pDestId, &msgOut); - ret = 0; sTrace("vgId:%d, replicate one msg index: %" PRId64 " term: %" PRId64 " prevterm: %" PRId64 " to dest: 0x%016" PRIx64, pNode->vgId, pEntry->index, pEntry->term, prevLogTerm, pDestId->addr); From e66f19ab4647584b59253856ae7722afe86af601 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 5 Jan 2023 16:59:16 +0800 Subject: [PATCH 41/89] enh: vote for higher lastLogTerm despite commitIndex --- source/libs/sync/src/syncRequestVote.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/source/libs/sync/src/syncRequestVote.c b/source/libs/sync/src/syncRequestVote.c index 773befe1e4..bdcc749516 100644 --- a/source/libs/sync/src/syncRequestVote.c +++ b/source/libs/sync/src/syncRequestVote.c @@ -48,15 +48,6 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode); SyncIndex myLastIndex = syncNodeGetLastIndex(pSyncNode); - if (pMsg->lastLogIndex < pSyncNode->commitIndex) { - sNTrace(pSyncNode, - "logok:0, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 - ", recv-term:%" PRIu64 "}", - myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); - - return false; - } - if (myLastTerm == SYNC_TERM_INVALID) { sNTrace(pSyncNode, "logok:0, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 @@ -70,6 +61,13 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM "logok:1, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); + + if (pMsg->lastLogIndex < pSyncNode->commitIndex) { + sNWarn(pSyncNode, + "logok:1, commit rollback required. {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 + ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", + myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); + } return true; } @@ -137,4 +135,4 @@ int32_t syncNodeOnRequestVote(SSyncNode* ths, const SRpcMsg* pRpcMsg) { syncLogSendRequestVoteReply(ths, pReply, ""); syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); return 0; -} \ No newline at end of file +} From d9d4074377973791eacc71a13fe141cda51f6923 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 5 Jan 2023 17:32:39 +0800 Subject: [PATCH 42/89] fix: tsdb snap wrong suid --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 7b5020d395..08d52554c6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -192,6 +192,7 @@ static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) { int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; if (rowVer >= pReader->sver && rowVer <= pReader->ever) { + pIter->rInfo.suid = pIter->bData.suid; pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); goto _out; From d1b4dc94d88ea4d18aee113ae47e2538fe234baf Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 5 Jan 2023 19:26:45 +0800 Subject: [PATCH 43/89] fix:table name error in schemaless --- include/common/tname.h | 2 +- source/client/src/clientSml.c | 13 ++++----- source/common/src/tname.c | 1 - utils/test/c/sml_test.c | 54 +++++++++++++++++++++++++++++++++-- 4 files changed, 58 insertions(+), 12 deletions(-) diff --git a/include/common/tname.h b/include/common/tname.h index 666a25303e..6a89d2a6be 100644 --- a/include/common/tname.h +++ b/include/common/tname.h @@ -78,7 +78,7 @@ typedef struct { // output char* ctbShortName; // must have size of TSDB_TABLE_NAME_LEN; - uint64_t uid; // child table uid, may be useful +// uint64_t uid; // child table uid, may be useful } RandTableName; void buildChildTableName(RandTableName* rName); diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index a4e943da32..c24aa536c2 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -182,6 +182,7 @@ typedef struct { SSmlMsgBuf msgBuf; SHashObj *dumplicateKey; // for dumplicate key SArray *colsContainer; // for cols parse, if dataFormat == false + int32_t uid; // used for automatic create child table cJSON *root; // for parse json } SSmlHandle; @@ -2155,13 +2156,11 @@ static int32_t smlParseInfluxLine(SSmlHandle *info, const char *sql, const int l (*oneTable)->sTableNameLen = elements.measureLen; if (strlen((*oneTable)->childTableName) == 0) { RandTableName rName = {(*oneTable)->tags, (*oneTable)->sTableName, (uint8_t)(*oneTable)->sTableNameLen, - (*oneTable)->childTableName, 0}; + (*oneTable)->childTableName}; buildChildTableName(&rName); - (*oneTable)->uid = rName.uid; - } else { - (*oneTable)->uid = *(uint64_t *)((*oneTable)->childTableName); } + (*oneTable)->uid = info->uid++; } SSmlSTableMeta **tableMeta = (SSmlSTableMeta **)taosHashGet(info->superTables, elements.measure, elements.measureLen); @@ -2226,11 +2225,8 @@ static int32_t smlParseTelnetLine(SSmlHandle *info, void *data, const int len) { taosHashClear(info->dumplicateKey); if (strlen(tinfo->childTableName) == 0) { - RandTableName rName = {tinfo->tags, tinfo->sTableName, (uint8_t)tinfo->sTableNameLen, tinfo->childTableName, 0}; + RandTableName rName = {tinfo->tags, tinfo->sTableName, (uint8_t)tinfo->sTableNameLen, tinfo->childTableName}; buildChildTableName(&rName); - tinfo->uid = rName.uid; - } else { - tinfo->uid = *(uint64_t *)(tinfo->childTableName); // generate uid by name simple } bool hasTable = true; @@ -2239,6 +2235,7 @@ static int32_t smlParseTelnetLine(SSmlHandle *info, void *data, const int len) { if (!oneTable) { taosHashPut(info->childTables, tinfo->childTableName, strlen(tinfo->childTableName), &tinfo, POINTER_BYTES); oneTable = &tinfo; + tinfo->uid = info->uid++; hasTable = false; } else { smlDestroyTableInfo(info, tinfo); diff --git a/source/common/src/tname.c b/source/common/src/tname.c index 5cb3fe4dc0..f21938ed29 100644 --- a/source/common/src/tname.c +++ b/source/common/src/tname.c @@ -330,5 +330,4 @@ void buildChildTableName(RandTableName* rName) { strcat(rName->ctbShortName, temp); } taosStringBuilderDestroy(&sb); - rName->uid = *(uint64_t*)(context.digest); } diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index df416b3822..315aabab3c 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1131,8 +1131,10 @@ int sml_ttl_Test() { pRes = taos_query(taos, "select `ttl` from information_schema.ins_tables where table_name='t_be97833a0e1f523fcdaeb6291d6fdf27'"); printf("%s result2:%s\n", __FUNCTION__, taos_errstr(pRes)); TAOS_ROW row = taos_fetch_row(pRes); - int32_t ttl = *(int32_t*)row[0]; - ASSERT(ttl == 20); + if(row != NULL && row[0] != NULL){ + int32_t ttl = *(int32_t*)row[0]; + ASSERT(ttl == 20); + } int code = taos_errno(pRes); taos_free_result(pRes); @@ -1141,8 +1143,56 @@ int sml_ttl_Test() { return code; } +int sml_ts2385_Test() { + TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); + + TAOS_RES *pRes = taos_query(taos, "CREATE DATABASE IF NOT EXISTS ts2385"); + taos_free_result(pRes); + + const char *sql[] ={ + "DataRTU,deviceId=2211230C94K0_1,dataModelName=DataRTU_2211230C94K0_1 s5=false,s18=false,k14=0,k2=0,k8=0,k10=0,s9=false,s19=false,k11=0,k13=0,s22=false,k15=0,m2=37.416671660000006,m8=600,m10=1532,m1=20.25,m13=0,s7=false,k7=0,m16=0,s17=false,k4=0,s11=false,s15=true,m7=600,m12=1490,s1=true,m14=0,s14=false,s16=true,k5=0,hex=\"7b3b00000001030301030200000000323231313233304339344b30002b01012a10028003000000070d05da025802580258025802580258045305fc05f505d200000000000000000afc7d\",k6=0,m3=600,s3=false,s24=false,k3=0,m6=600,m15=0,s12=false,k1=0,k16=0,s10=false,s21=false,k12=0,m5=600,s8=false,m4=600,m9=1107,s2=false,s13=false,s20=false,s23=false,k9=0,m11=1525,s4=false,s6=false 1672818929178749400", + "DataRTU,deviceId=2211230C94K0_1,dataModelName=DataRTU_2211230C94K0_1 k2=0,k11=0,m3=600,m12=1506,s17=false,m5=600,s11=false,s22=false,k6=0,m13=0,s16=true,k5=0,s21=false,m4=600,m7=600,s9=false,s10=false,s18=false,k7=0,m8=600,k1=0,hex=\"7b3a00000001030301030200000000323231313233304339344b30002b01012a10028003000000071105e8025802580258025802580258044905eb05ef05e200000000000000000afc7d\",m11=1519,m16=0,s19=false,s23=false,s24=false,s14=false,s6=false,k10=0,k15=0,k14=0,s2=false,s4=false,s8=false,s13=false,s15=true,s20=false,m2=38.000005040000005,s3=false,s7=false,k3=0,k8=0,k13=0,m6=600,m14=0,m15=0,k4=0,m1=20.450000000000003,m9=1097,s1=true,m10=1515,s5=false,s12=false,k9=0,k12=0,k16=0 1672818919126971000", + "DataRTU,deviceId=2211230C94K0_1,dataModelName=DataRTU_2211230C94K0_1 k7=0,k14=0,m3=600,m7=600,s5=false,k2=0,k3=0,k8=0,s3=false,s20=false,k15=0,m10=1482,s17=false,k1=0,k16=0,m15=0,s12=false,k9=0,m16=0,s11=false,m4=600,s10=false,s15=true,s24=false,m8=600,m13=0,s2=false,s18=false,k12=0,s14=false,s19=false,hex=\"7b3900000001030301030200000000323231313233304339344b30002b01012a10028003000000071505ef025802580258025802580258045005ca05b105d800000000000000000aa47d\",s1=true,s4=false,s7=false,s8=false,s13=false,m6=600,s6=false,s21=false,k11=0,m12=1496,m9=1104,s16=true,k5=0,s9=false,k10=0,k13=0,m2=38.291671730000004,s22=false,m5=600,m11=1457,m14=0,k4=0,m1=20.650000000000006,s23=false,k6=0 1672818909130866800", + "DataRTU,deviceId=2211230C94K0_1,dataModelName=DataRTU_2211230C94K0_1 m7=600,k4=0,k14=0,s22=false,k13=0,s2=false,m11=1510,m14=0,s4=false,s10=false,m1=21,m16=0,m13=0,s9=false,s13=false,s14=false,k10=0,m3=600,m9=1107,s18=false,s19=false,k2=0,hex=\"7b3600000001030301030200000000323231313233304339344b30002b01012a10028003000000071c0619025802580258025802580258045305dc05e6058d00000000000000000ad27d\",m2=40.04167187,m8=600,k7=0,k8=0,m10=1500,s23=false,k5=0,s11=false,s21=false,k9=0,m15=0,m12=1421,s1=true,s5=false,s8=false,m5=600,k16=0,k15=0,m6=600,s3=false,s6=false,s7=false,s15=true,s20=false,s24=false,k11=0,k1=0,k6=0,k12=0,m4=600,s16=true,s17=false,k3=0,s12=false 1672818879189483200", + "DataRTU,deviceId=2106070C11M0_2,dataModelName=DataRTU_2106070C11M0_2 m1=5691,k14=0,m6=0,s14=false,k8=0,s19=false,s20=false,k12=0,s17=false,k3=0,m8=0,s8=false,m7=0,s9=false,s4=false,s11=false,s13=false,s16=false,k5=0,k15=0,k16=0,s10=false,s23=false,s1=false,s2=false,s3=false,s12=false,s24=false,k2=0,k10=0,hex=\"7b1400000001030301030200000000323130363037304331314d30002b01022a080400000000000008af0c000000000000000000000000000000000000000000000000000000000ad47d\",m2=0,s7=false,s18=false,s21=false,m3=0,m5=0,k4=0,k11=0,m4=0,k1=0,k6=0,k13=0,s6=false,s15=false,s5=false,s22=false,k7=0,k9=0 1672818779549848800" + }; + pRes = taos_query(taos, "use ts2385"); + taos_free_result(pRes); + + pRes = taos_schemaless_insert(taos, (char **)sql, sizeof(sql) / sizeof(sql[0]), TSDB_SML_LINE_PROTOCOL, TSDB_SML_TIMESTAMP_NANO_SECONDS); + + printf("%s result:%s\n", __FUNCTION__, taos_errstr(pRes)); + int code = taos_errno(pRes); + ASSERT(!code); + taos_free_result(pRes); + + pRes = taos_query(taos, "select distinct tbname from `DataRTU` order by tbname"); + printf("%s result2:%s\n", __FUNCTION__, taos_errstr(pRes)); + int num = 0; + TAOS_ROW row = NULL; + while((row = taos_fetch_row(pRes))){ + if(row[0] != NULL && num == 0){ + ASSERT(strncmp((char *)row[0], "DataRTU_2106070C11M0_2", sizeof("DataRTU_2106070C11M0_2") - 1) == 0); + } + + if(row[0] != NULL && num == 1){ + ASSERT(strncmp((char *)row[0], "DataRTU_2211230C94K0_1", sizeof("DataRTU_2211230C94K0_1") - 1) == 0); + } + num++; + } + ASSERT(num == 2); + + code = taos_errno(pRes); + taos_free_result(pRes); + taos_close(taos); + + return code; +} + int main(int argc, char *argv[]) { int ret = 0; + ret = sml_ts2385_Test(); + ASSERT(!ret); ret = sml_ttl_Test(); ASSERT(!ret); ret = sml_ts2164_Test(); From 867b71a19c78597a95e548724b16937d5759776b Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 5 Jan 2023 21:32:40 +0800 Subject: [PATCH 44/89] change default config value --- source/common/src/tglobal.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index deefa65595..2f39bb7b42 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -202,7 +202,9 @@ int32_t taosSetTfsCfg(SConfig *pCfg) { int32_t taosSetTfsCfg(SConfig *pCfg); #endif -struct SConfig *taosGetCfg() { return tsCfg; } +struct SConfig *taosGetCfg() { + return tsCfg; +} static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile, char *apolloUrl) { @@ -377,11 +379,11 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, 0) != 0) return -1; tsNumOfRpcThreads = tsNumOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, 4); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, TSDB_MAX_RPC_THREADS); if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; tsNumOfCommitThreads = tsNumOfCores / 2; - tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); + tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, tsNumOfCommitThreads); if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, 0) != 0) return -1; tsNumOfMnodeReadThreads = tsNumOfCores / 8; @@ -715,7 +717,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; - tsSIMDBuiltins = (bool) cfgGetItem(pCfg, "SIMD-builtins")->bval; + tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "SIMD-builtins")->bval; tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; tsMonitorInterval = cfgGetItem(pCfg, "monitorInterval")->i32; From f3831b54db3b8a0c7fffbf3de21f0cb1da8cd245 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 6 Jan 2023 07:41:07 +0800 Subject: [PATCH 45/89] fix: rsma commit without table --- source/dnode/vnode/src/sma/smaCommit.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 9748963722..d9a3205a41 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -298,9 +298,13 @@ static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma) { * @return int32_t */ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { + int32_t code = 0; + int32_t lino = 0; + STsdb *pTsdb = NULL; SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + if (!pEnv) { - return TSDB_CODE_SUCCESS; + goto _exit; } SSmaStat *pStat = SMA_ENV_STAT(pEnv); @@ -351,9 +355,8 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } } smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - if (tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)) < 0) { - return TSDB_CODE_FAILED; - } + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); + TSDB_CHECK_CODE(code, lino, _exit); smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); #if 0 // consuming task of qTaskInfo clone @@ -378,12 +381,16 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); #endif - // all rsma results are written completely - STsdb *pTsdb = NULL; + // all rsma results are written completely, start to tsdbPrepareCommit + +_exit: if ((pTsdb = VND_RSMA1(pSma->pVnode))) tsdbPrepareCommit(pTsdb); if ((pTsdb = VND_RSMA2(pSma->pVnode))) tsdbPrepareCommit(pTsdb); - return TSDB_CODE_SUCCESS; + if (code) { + smaError("vgId:%d, %s failed at line %d since %s", SMA_VID(pSma), __func__, lino, tstrerror(code)); + } + return code; } /** From 88abc4fde22a42a5147de9d59c430aa538dae2b4 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 6 Jan 2023 09:36:57 +0800 Subject: [PATCH 46/89] change default config value --- source/common/src/tglobal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 2f39bb7b42..a58f0f681b 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -383,7 +383,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; tsNumOfCommitThreads = tsNumOfCores / 2; - tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, tsNumOfCommitThreads); + tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, 0) != 0) return -1; tsNumOfMnodeReadThreads = tsNumOfCores / 8; From 9c33f014b18ac97d730ee17bb6bf856cbf21f065 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 6 Jan 2023 10:57:04 +0800 Subject: [PATCH 47/89] fix: rsma commit without table --- source/dnode/vnode/src/sma/smaCommit.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index d9a3205a41..20db35e5b5 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -298,13 +298,9 @@ static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma) { * @return int32_t */ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { - int32_t code = 0; - int32_t lino = 0; - STsdb *pTsdb = NULL; SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); - if (!pEnv) { - goto _exit; + return TSDB_CODE_SUCCESS; } SSmaStat *pStat = SMA_ENV_STAT(pEnv); @@ -355,8 +351,9 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { } } smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); - TSDB_CHECK_CODE(code, lino, _exit); + if (tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)) < 0) { + return TSDB_CODE_FAILED; + } smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); #if 0 // consuming task of qTaskInfo clone @@ -381,16 +378,12 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); #endif - // all rsma results are written completely, start to tsdbPrepareCommit - -_exit: + // all rsma results are written completely + STsdb *pTsdb = NULL; if ((pTsdb = VND_RSMA1(pSma->pVnode))) tsdbPrepareCommit(pTsdb); if ((pTsdb = VND_RSMA2(pSma->pVnode))) tsdbPrepareCommit(pTsdb); - if (code) { - smaError("vgId:%d, %s failed at line %d since %s", SMA_VID(pSma), __func__, lino, tstrerror(code)); - } - return code; + return TSDB_CODE_SUCCESS; } /** @@ -402,6 +395,10 @@ _exit: static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { int32_t code = 0; SVnode *pVnode = pSma->pVnode; + SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma); + if (!pSmaEnv) { + goto _exit; + } #if 0 SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); From 5e9a90ef5c766e507479120ff08c8b8113d9dd74 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 11:53:55 +0800 Subject: [PATCH 48/89] fix: remove privilege info on drop db / topic --- source/dnode/mnode/impl/inc/mndUser.h | 2 + source/dnode/mnode/impl/src/mndDb.c | 12 +-- source/dnode/mnode/impl/src/mndTopic.c | 21 +++-- source/dnode/mnode/impl/src/mndUser.c | 114 +++++++++++++++++++++---- 4 files changed, 109 insertions(+), 40 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndUser.h b/source/dnode/mnode/impl/inc/mndUser.h index cf7deba397..8943ba703e 100644 --- a/source/dnode/mnode/impl/inc/mndUser.h +++ b/source/dnode/mnode/impl/inc/mndUser.h @@ -34,6 +34,8 @@ SHashObj *mndDupDbHash(SHashObj *pOld); SHashObj *mndDupTopicHash(SHashObj *pOld); int32_t mndValidateUserAuthInfo(SMnode *pMnode, SUserAuthVersion *pUsers, int32_t numOfUses, void **ppRsp, int32_t *pRspLen); +int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db); +int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 87b5a5c42d..7e5c29d56f 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -1051,17 +1051,7 @@ static int32_t mndDropDb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb) { if (mndDropStreamByDb(pMnode, pTrans, pDb) != 0) goto _OVER; if (mndDropSmasByDb(pMnode, pTrans, pDb) != 0) goto _OVER; if (mndSetDropDbRedoActions(pMnode, pTrans, pDb) != 0) goto _OVER; - - SUserObj *pUser = mndAcquireUser(pMnode, pDb->createUser); - if (pUser != NULL) { - pUser->authVersion++; - SSdbRaw *pCommitRaw = mndUserActionEncode(pUser); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); - goto _OVER; - } - (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); - } + if (mndUserRemoveDb(pMnode, pTrans, pDb->name) != 0) goto _OVER; int32_t rspLen = 0; void *pRsp = NULL; diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index bf3827c090..48c35f3f07 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -604,22 +604,19 @@ _OVER: } static int32_t mndDropTopic(SMnode *pMnode, STrans *pTrans, SRpcMsg *pReq, SMqTopicObj *pTopic) { + int32_t code = -1; + if (mndUserRemoveTopic(pMnode, pTrans, pTopic->name) != 0) goto _OVER; + SSdbRaw *pCommitRaw = mndTopicActionEncode(pTopic); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) goto _OVER; (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } + if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; + code = 0; +_OVER: mndTransDrop(pTrans); - return 0; + return code; } static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { @@ -890,6 +887,7 @@ int32_t mndCheckTopicExist(SMnode *pMnode, SDbObj *pDb) { return 0; } +#if 0 int32_t mndDropTopicByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { int32_t code = 0; SSdb *pSdb = pMnode->pSdb; @@ -917,3 +915,4 @@ int32_t mndDropTopicByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { return code; } +#endif \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 85a92c7aef..b965e13316 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -285,14 +285,35 @@ static int32_t mndUserActionInsert(SSdb *pSdb, SUserObj *pUser) { return 0; } -static int32_t mndUserActionDelete(SSdb *pSdb, SUserObj *pUser) { - mTrace("user:%s, perform delete action, row:%p", pUser->user, pUser); +static int32_t mndUserDupObj(SUserObj *pUser, SUserObj *pNew) { + memcpy(pNew, pUser, sizeof(SUserObj)); + pNew->authVersion++; + pNew->updateTime = taosGetTimestampMs(); + + taosRLockLatch(&pUser->lock); + pNew->readDbs = mndDupDbHash(pUser->readDbs); + pNew->writeDbs = mndDupDbHash(pUser->writeDbs); + pNew->topics = mndDupTopicHash(pUser->topics); + taosRUnLockLatch(&pUser->lock); + + if (pNew->readDbs == NULL || pNew->writeDbs == NULL || pNew->topics == NULL) { + return -1; + } + return 0; +} + +static void mndUserFreeObj(SUserObj *pUser) { taosHashCleanup(pUser->readDbs); taosHashCleanup(pUser->writeDbs); taosHashCleanup(pUser->topics); pUser->readDbs = NULL; pUser->writeDbs = NULL; pUser->topics = NULL; +} + +static int32_t mndUserActionDelete(SSdb *pSdb, SUserObj *pUser) { + mTrace("user:%s, perform delete action, row:%p", pUser->user, pUser); + mndUserFreeObj(pUser); return 0; } @@ -516,19 +537,7 @@ static int32_t mndProcessAlterUserReq(SRpcMsg *pReq) { goto _OVER; } - memcpy(&newUser, pUser, sizeof(SUserObj)); - newUser.authVersion++; - newUser.updateTime = taosGetTimestampMs(); - - taosRLockLatch(&pUser->lock); - newUser.readDbs = mndDupDbHash(pUser->readDbs); - newUser.writeDbs = mndDupDbHash(pUser->writeDbs); - newUser.topics = mndDupTopicHash(pUser->topics); - taosRUnLockLatch(&pUser->lock); - - if (newUser.readDbs == NULL || newUser.writeDbs == NULL || newUser.topics == NULL) { - goto _OVER; - } + if (mndUserDupObj(pUser, &newUser) != 0) goto _OVER; if (alterReq.alterType == TSDB_ALTER_USER_PASSWD) { char pass[TSDB_PASSWORD_LEN + 1] = {0}; @@ -654,9 +663,7 @@ _OVER: mndReleaseUser(pMnode, pOperUser); mndReleaseUser(pMnode, pUser); - taosHashCleanup(newUser.writeDbs); - taosHashCleanup(newUser.readDbs); - taosHashCleanup(newUser.topics); + mndUserFreeObj(&newUser); return code; } @@ -1007,3 +1014,74 @@ _OVER: tFreeSUserAuthBatchRsp(&batchRsp); return code; } + +int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + int32_t len = strlen(db) + 1; + void *pIter = NULL; + SUserObj *pUser = NULL; + SUserObj newUser = {0}; + + while (1) { + pIter = sdbFetch(pSdb, SDB_USER, pIter, (void **)&pUser); + if (pIter == NULL) break; + + code = -1; + if (mndUserDupObj(pUser, &newUser) != 0) break; + + bool inRead = (taosHashGet(newUser.readDbs, db, len) != NULL); + bool inWrite = (taosHashGet(newUser.writeDbs, db, len) != NULL); + if (inRead || inWrite) { + (void)taosHashRemove(newUser.readDbs, db, len); + (void)taosHashRemove(newUser.writeDbs, db, len); + + SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) break; + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + } + + mndUserFreeObj(&newUser); + sdbRelease(pSdb, pUser); + code = 0; + } + + if (pUser != NULL) sdbRelease(pSdb, pUser); + if (pIter != NULL) sdbCancelFetch(pSdb, pIter); + mndUserFreeObj(&newUser); + return code; +} + +int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + int32_t len = strlen(topic) + 1; + void *pIter = NULL; + SUserObj *pUser = NULL; + SUserObj newUser = {0}; + + while (1) { + pIter = sdbFetch(pSdb, SDB_USER, pIter, (void **)&pUser); + if (pIter == NULL) break; + + code = -1; + if (mndUserDupObj(pUser, &newUser) != 0) break; + + bool inTopic = (taosHashGet(newUser.topics, topic, len) != NULL); + if (inTopic) { + (void)taosHashRemove(newUser.topics, topic, len); + SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) break; + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + } + + mndUserFreeObj(&newUser); + sdbRelease(pSdb, pUser); + code = 0; + } + + if (pUser != NULL) sdbRelease(pSdb, pUser); + if (pIter != NULL) sdbCancelFetch(pSdb, pIter); + mndUserFreeObj(&newUser); + return code; +} From aa8eea0d659a289b7c9c182084cdc5e59372479e Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Fri, 6 Jan 2023 13:35:07 +0800 Subject: [PATCH 49/89] fix: set operator precision miss --- source/libs/planner/src/planLogicCreater.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 55e8dc7b49..084d99cae5 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1334,6 +1334,7 @@ static int32_t createSetOpLogicNode(SLogicPlanContext* pCxt, SSetOperator* pSetO } if (TSDB_CODE_SUCCESS == code) { + pSetOp->precision = pSetOperator->precision; *pLogicNode = (SLogicNode*)pSetOp; } else { nodesDestroyNode((SNode*)pSetOp); From 5a98690a631a62e093a2735203098e8ae77b6523 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 6 Jan 2023 13:39:36 +0800 Subject: [PATCH 50/89] fix: windows stack display issue --- include/os/osSystem.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/os/osSystem.h b/include/os/osSystem.h index ccff09089b..5154c56e4b 100644 --- a/include/os/osSystem.h +++ b/include/os/osSystem.h @@ -102,10 +102,10 @@ void taosResetTerminalMode(); symbol->SizeOfStruct = sizeof(SYMBOL_INFO); \ \ if (frames > 0) { \ - offset = snprintf(buf, bufSize - 1, "obtained %d stack frames", (ignoreNum > 0) ? frames - ignoreNum : frames); \ + offset = snprintf(buf, bufSize - 1, "obtained %d stack frames\n", (ignoreNum > 0) ? frames - ignoreNum : frames); \ for (i = (ignoreNum > 0) ? ignoreNum : 0; i < frames; i++) { \ SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); \ - offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%i, %s - 0x%0X", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ + offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%i, %s - 0x%0X\n", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ } \ } \ free(symbol); \ @@ -131,10 +131,10 @@ void taosResetTerminalMode(); symbol->SizeOfStruct = sizeof(SYMBOL_INFO); \ \ if (frames > 0) { \ - taosPrintLog(flags, level, dflag, "obtained %d stack frames", (ignoreNum > 0) ? frames - ignoreNum : frames); \ + taosPrintLog(flags, level, dflag, "obtained %d stack frames\n", (ignoreNum > 0) ? frames - ignoreNum : frames); \ for (i = (ignoreNum > 0) ? ignoreNum : 0; i < frames; i++) { \ SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); \ - taosPrintLog(flags, level, dflag, "frame:%i, %s - 0x%0X", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ + taosPrintLog(flags, level, dflag, "frame:%i, %s - 0x%0X\n", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ } \ } \ free(symbol); \ From 129380703fc8b9cac236c492a2cfc6fd721a52ac Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 14:25:38 +0800 Subject: [PATCH 51/89] fix: handle error while write vnodes.json --- source/dnode/mgmt/mgmt_vnode/src/vmFile.c | 64 +++++++++-------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index dc32054fd7..2f81313037 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -145,37 +145,26 @@ _OVER: } int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { - int32_t code = 0; - char file[PATH_MAX] = {0}; - char realfile[PATH_MAX] = {0}; + int32_t code = -1; + char *content = NULL; + TdFilePtr pFile = NULL; + SVnodeObj **ppVnodes = NULL; + char file[PATH_MAX] = {0}; + char realfile[PATH_MAX] = {0}; snprintf(file, sizeof(file), "%s%svnodes.json.bak", pMgmt->path, TD_DIRSEP); - snprintf(realfile, sizeof(file), "%s%svnodes.json", pMgmt->path, TD_DIRSEP); + snprintf(realfile, sizeof(realfile), "%s%svnodes.json", pMgmt->path, TD_DIRSEP); - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write %s since %s", file, terrstr()); - return -1; - } + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; - int32_t numOfVnodes = 0; - SVnodeObj **ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); - if (ppVnodes == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = -1; - dError("failed to write %s while get vnodelist", file); - goto _OVER; - } + int32_t numOfVnodes = 0; + ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); + if (ppVnodes == NULL) goto _OVER; int32_t len = 0; int32_t maxLen = MAX_CONTENT_LEN; - char *content = taosMemoryCalloc(1, maxLen + 1); - if (content == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = -1; - dError("failed to write %s while malloc content", file); - goto _OVER; - } + content = taosMemoryCalloc(1, maxLen + 1); + if (content == NULL) goto _OVER; len += snprintf(content + len, maxLen - len, "{\n"); len += snprintf(content + len, maxLen - len, " \"vnodes\": [\n"); @@ -195,14 +184,19 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { } len += snprintf(content + len, maxLen - len, " ]\n"); len += snprintf(content + len, maxLen - len, "}\n"); - terrno = 0; + + if (taosWriteFile(pFile, content, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; + taosCloseFile(&pFile); + + if (taosRenameFile(file, realfile) != 0) goto _OVER; + + code = 0; + dInfo("succeed to write vnodes file:%s, vnodes:%d", realfile, numOfVnodes); _OVER: - taosWriteFile(pFile, content, len); - taosFsyncFile(pFile); - taosCloseFile(&pFile); - taosMemoryFree(content); - + if (content != NULL) taosMemoryFree(content); + if (pFile != NULL) taosCloseFile(&pFile); if (ppVnodes != NULL) { for (int32_t i = 0; i < numOfVnodes; ++i) { SVnodeObj *pVnode = ppVnodes[i]; @@ -213,14 +207,8 @@ _OVER: taosMemoryFree(ppVnodes); } - if (code != 0) return -1; - - dInfo("succeed to write %s, numOfVnodes:%d", realfile, numOfVnodes); - code = taosRenameFile(file, realfile); - if (code != 0) { - dError("failed to rename %s to %s", file, realfile); + dError("failed to write vnodes file:%s since %s, vnodes:%d", realfile, terrstr(), numOfVnodes); } - return code; } \ No newline at end of file From 81dcb23caec6fc854aa46694dfe14ba5ec92bd7f Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 14:26:43 +0800 Subject: [PATCH 52/89] fix: handle error while write raft config file --- source/libs/sync/src/syncRaftCfg.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 86ea1f48cc..b4bacd2451 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -72,11 +72,7 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { snprintf(file, sizeof(file), "%s.bak", realfile); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - sError("vgId:%d, failed to open sync cfg file:%s since %s", pNode->vgId, realfile, terrstr()); - goto _OVER; - } + if (pFile == NULL) goto _OVER; terrno = TSDB_CODE_OUT_OF_MEMORY; pJson = tjsonCreateObject(); @@ -91,11 +87,7 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - sError("vgId:%d, failed to rename sync cfg file:%s to %s since %s", pNode->vgId, file, realfile, terrstr()); - goto _OVER; - } + if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d", pNode->vgId, realfile, len); From 4203a9e1845bde6088483cd04b37eaed7ee921ae Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 6 Jan 2023 14:33:48 +0800 Subject: [PATCH 53/89] fix: fix no available disk error --- source/libs/function/inc/tpercentile.h | 4 +- source/libs/function/src/builtinsimpl.c | 11 +++-- source/libs/function/src/tpercentile.c | 57 +++++++++++++------------ 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/source/libs/function/inc/tpercentile.h b/source/libs/function/inc/tpercentile.h index 873dc46a08..80159460f5 100644 --- a/source/libs/function/inc/tpercentile.h +++ b/source/libs/function/inc/tpercentile.h @@ -73,10 +73,10 @@ void tMemBucketDestroy(tMemBucket *pBucket); int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size); -double getPercentile(tMemBucket *pMemBucket, double percent); +int32_t getPercentile(tMemBucket *pMemBucket, double percent, double *result); #endif // TDENGINE_TPERCENTILE_H #ifdef __cplusplus } -#endif \ No newline at end of file +#endif diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 8fde27e046..cd224b716e 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1670,15 +1670,14 @@ int32_t percentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { tMemBucket* pMemBucket = ppInfo->pMemBucket; if (pMemBucket != NULL && pMemBucket->total > 0) { // check for null - SET_DOUBLE_VAL(&ppInfo->result, getPercentile(pMemBucket, v)); + int32_t code = getPercentile(pMemBucket, v, &ppInfo->result); + if (code != TSDB_CODE_SUCCESS) { + tMemBucketDestroy(pMemBucket); + return code; + } } tMemBucketDestroy(pMemBucket); - - if (ppInfo->result < 0) { - return TSDB_CODE_NO_AVAIL_DISK; - } - return functionFinalize(pCtx, pBlock); } diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c index 04472c42ec..acadb9de1b 100644 --- a/source/libs/function/src/tpercentile.c +++ b/source/libs/function/src/tpercentile.c @@ -90,7 +90,7 @@ static void resetPosInfo(SSlotInfo *pInfo) { pInfo->data = NULL; } -double findOnlyResult(tMemBucket *pMemBucket) { +int32_t findOnlyResult(tMemBucket *pMemBucket, double *result) { ASSERT(pMemBucket->total == 1); for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) { @@ -108,17 +108,17 @@ double findOnlyResult(tMemBucket *pMemBucket) { int32_t *pageId = taosArrayGet(list, 0); SFilePage *pPage = getBufPage(pMemBucket->pBuffer, *pageId); if (pPage == NULL) { - return -1; + return TSDB_CODE_NO_AVAIL_DISK; } ASSERT(pPage->num == 1); - double v = 0; - GET_TYPED_DATA(v, double, pMemBucket->type, pPage->data); - return v; + GET_TYPED_DATA(*result, double, pMemBucket->type, pPage->data); + return TSDB_CODE_SUCCESS; } } - return 0; + *result = 0.0; + return TSDB_CODE_SUCCESS; } int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) { @@ -440,7 +440,7 @@ static double getIdenticalDataVal(tMemBucket *pMemBucket, int32_t slotIndex) { return finalResult; } -double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) { +int32_t getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction, double *result) { int32_t num = 0; for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) { @@ -473,15 +473,15 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) ASSERT(minOfNextSlot > maxOfThisSlot); - double val = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; - return val; + *result = (1 - fraction) * maxOfThisSlot + fraction * minOfNextSlot; + return TSDB_CODE_SUCCESS; } if (pSlot->info.size <= pMemBucket->maxCapacity) { // data in buffer and file are merged together to be processed. SFilePage *buffer = loadDataFromFilePage(pMemBucket, i); if (buffer == NULL) { - return -1; + return TSDB_CODE_NO_AVAIL_DISK; } int32_t currentIdx = count - num; @@ -492,13 +492,14 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) GET_TYPED_DATA(td, double, pMemBucket->type, thisVal); GET_TYPED_DATA(nd, double, pMemBucket->type, nextVal); - double val = (1 - fraction) * td + fraction * nd; + *result = (1 - fraction) * td + fraction * nd; taosMemoryFreeClear(buffer); - return val; + return TSDB_CODE_SUCCESS; } else { // incur a second round bucket split if (isIdenticalData(pMemBucket, i)) { - return getIdenticalDataVal(pMemBucket, i); + *result = getIdenticalDataVal(pMemBucket, i); + return TSDB_CODE_SUCCESS; } // try next round @@ -518,37 +519,37 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) int32_t *pageId = taosArrayGet(list, f); SFilePage *pg = getBufPage(pMemBucket->pBuffer, *pageId); if (pg == NULL) { - return -1; + return TSDB_CODE_NO_AVAIL_DISK; } int32_t code = tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num); if (code != TSDB_CODE_SUCCESS) { - return -1; + return code; } setBufPageDirty(pg, true); releaseBufPage(pMemBucket->pBuffer, pg); } - return getPercentileImpl(pMemBucket, count - num, fraction); + return getPercentileImpl(pMemBucket, count - num, fraction, result); } } else { num += pSlot->info.size; } } - return 0; + *result = 0; + return TSDB_CODE_SUCCESS; } -double getPercentile(tMemBucket *pMemBucket, double percent) { +int32_t getPercentile(tMemBucket *pMemBucket, double percent, double *result) { if (pMemBucket->total == 0) { - return 0.0; + *result = 0.0; + return TSDB_CODE_SUCCESS; } // if only one elements exists, return it if (pMemBucket->total == 1) { - if (findOnlyResult(pMemBucket) < 0) { - return -1; - } + return findOnlyResult(pMemBucket, result); } percent = fabs(percent); @@ -558,21 +559,21 @@ double getPercentile(tMemBucket *pMemBucket, double percent) { MinMaxEntry *pRange = &pMemBucket->range; if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) { - double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal); - return v; + *result = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal); } else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) { - double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal); - return v; + *result = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal); } else { - return fabs(percent - 100) < DBL_EPSILON ? pRange->dMaxVal : pRange->dMinVal; + *result = fabs(percent - 100) < DBL_EPSILON ? pRange->dMaxVal : pRange->dMinVal; } + + return TSDB_CODE_SUCCESS; } double percentVal = (percent * (pMemBucket->total - 1)) / ((double)100.0); // do put data by using buckets int32_t orderIdx = (int32_t)percentVal; - return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx); + return getPercentileImpl(pMemBucket, orderIdx, percentVal - orderIdx, result); } /* From 03db839d4a4156012cbd232fdab111d0b9000c04 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 14:43:32 +0800 Subject: [PATCH 54/89] fix: handle error while write vnodes.json --- source/dnode/mgmt/mgmt_vnode/src/vmFile.c | 58 +++++++++++++---------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index 2f81313037..623e6d37e2 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "vmInt.h" +#include "tjson.h" #define MAX_CONTENT_LEN 2 * 1024 * 1024 @@ -144,9 +145,30 @@ _OVER: return code; } +static int32_t vmEncodeVnodeList(SJson *pJson, SVnodeObj **ppVnodes, int32_t numOfVnodes) { + SJson *vnodes = tjsonCreateArray(); + if (vnodes == NULL) return -1; + if (tjsonAddItemToObject(pJson, "vnodes", vnodes) < 0) return -1; + + for (int32_t i = 0; i < numOfVnodes; ++i) { + SVnodeObj *pVnode = ppVnodes[i]; + if (pVnode == NULL) continue; + + SJson *vnode = tjsonCreateObject(); + if (vnode == NULL) return -1; + if (tjsonAddDoubleToObject(vnode, "vgId", pVnode->vgId) < 0) return -1; + if (tjsonAddDoubleToObject(vnode, "dropped", pVnode->dropped) < 0) return -1; + if (tjsonAddDoubleToObject(vnode, "vgVersion", pVnode->vgVersion) < 0) return -1; + if (tjsonAddItemToArray(vnodes, vnode) < 0) return -1; + } + + return 0; +} + int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { int32_t code = -1; - char *content = NULL; + char *buffer = NULL; + SJson *pJson = NULL; TdFilePtr pFile = NULL; SVnodeObj **ppVnodes = NULL; char file[PATH_MAX] = {0}; @@ -161,31 +183,16 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); if (ppVnodes == NULL) goto _OVER; - int32_t len = 0; - int32_t maxLen = MAX_CONTENT_LEN; - content = taosMemoryCalloc(1, maxLen + 1); - if (content == NULL) goto _OVER; + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (vmEncodeVnodeList(pJson, ppVnodes, numOfVnodes) != 0) goto _OVER; - len += snprintf(content + len, maxLen - len, "{\n"); - len += snprintf(content + len, maxLen - len, " \"vnodes\": [\n"); - for (int32_t i = 0; i < numOfVnodes; ++i) { - SVnodeObj *pVnode = ppVnodes[i]; - if (pVnode == NULL) continue; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; - len += snprintf(content + len, maxLen - len, " {\n"); - len += snprintf(content + len, maxLen - len, " \"vgId\": %d,\n", pVnode->vgId); - len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pVnode->dropped); - len += snprintf(content + len, maxLen - len, " \"vgVersion\": %d\n", pVnode->vgVersion); - if (i < numOfVnodes - 1) { - len += snprintf(content + len, maxLen - len, " },\n"); - } else { - len += snprintf(content + len, maxLen - len, " }\n"); - } - } - len += snprintf(content + len, maxLen - len, " ]\n"); - len += snprintf(content + len, maxLen - len, "}\n"); - - if (taosWriteFile(pFile, content, len) <= 0) goto _OVER; + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); @@ -195,7 +202,8 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { dInfo("succeed to write vnodes file:%s, vnodes:%d", realfile, numOfVnodes); _OVER: - if (content != NULL) taosMemoryFree(content); + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); if (pFile != NULL) taosCloseFile(&pFile); if (ppVnodes != NULL) { for (int32_t i = 0; i < numOfVnodes; ++i) { From 3c619925f4645e5154f12f4994bff50f0c5db75e Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:16:07 +0800 Subject: [PATCH 55/89] fix: handle error while write smnode.json --- source/dnode/mgmt/mgmt_mnode/src/mmFile.c | 98 +++++++++++++---------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c index f736ffd0c8..ff459353f4 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "mmInt.h" +#include "tjson.h" int32_t mmReadFile(const char *path, SMnodeOpt *pOption) { int32_t code = TSDB_CODE_INVALID_JSON_FORMAT; @@ -130,56 +131,69 @@ _OVER: return code; } +static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) { + if (pOption->deploy && pOption->numOfReplicas > 0) { + if (tjsonAddDoubleToObject(pJson, "selfIndex", pOption->selfIndex) < 0) return -1; + + SJson *replicas = tjsonCreateArray(); + if (replicas == NULL) return -1; + if (tjsonAddItemToObject(pJson, "replicas", replicas) < 0) return -1; + + for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { + SJson *replica = tjsonCreateObject(); + if (replica == NULL) return -1; + + const SReplica *pReplica = pOption->replicas + i; + if (tjsonAddDoubleToObject(replica, "id", pReplica->id) < 0) return -1; + if (tjsonAddStringToObject(replica, "fqdn", pReplica->fqdn) < 0) return -1; + if (tjsonAddDoubleToObject(replica, "port", pReplica->port) < 0) return -1; + if (tjsonAddItemToArray(replicas, replica) < 0) return -1; + } + } + + if (tjsonAddDoubleToObject(pJson, "deployed", pOption->deploy) < 0) return -1; + + return 0; +} + int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption) { - char file[PATH_MAX] = {0}; - char realfile[PATH_MAX] = {0}; + int32_t code = -1; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; + char file[PATH_MAX] = {0}; + char realfile[PATH_MAX] = {0}; snprintf(file, sizeof(file), "%s%smnode.json.bak", path, TD_DIRSEP); snprintf(realfile, sizeof(realfile), "%s%smnode.json", path, TD_DIRSEP); - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write %s since %s", file, terrstr()); - return -1; - } + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; - int32_t len = 0; - int32_t maxLen = 4096; - char *content = taosMemoryCalloc(1, maxLen + 1); + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (mmEncodeOption(pJson, pOption) != 0) goto _OVER; - len += snprintf(content + len, maxLen - len, "{\n"); - if (pOption->deploy && pOption->numOfReplicas > 0) { - len += snprintf(content + len, maxLen - len, " \"selfIndex\": %d,\n", pOption->selfIndex); - len += snprintf(content + len, maxLen - len, " \"replicas\": [{\n"); + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; - for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { - const SReplica *pReplica = pOption->replicas + i; - if (pReplica != NULL && pReplica->id > 0) { - len += snprintf(content + len, maxLen - len, " \"id\": %d,\n", pReplica->id); - len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\",\n", pReplica->fqdn); - len += snprintf(content + len, maxLen - len, " \"port\": %u\n", pReplica->port); - } - if (i < pOption->numOfReplicas - 1) { - len += snprintf(content + len, maxLen - len, " },{\n"); - } else { - len += snprintf(content + len, maxLen - len, " }],\n"); - } - } - } - len += snprintf(content + len, maxLen - len, " \"deployed\": %d\n", pOption->deploy); - len += snprintf(content + len, maxLen - len, "}\n"); - - taosWriteFile(pFile, content, len); - taosFsyncFile(pFile); + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); - taosMemoryFree(content); - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to rename %s since %s", file, terrstr()); - return -1; + if (taosRenameFile(file, realfile) != 0) goto _OVER; + + code = 0; + dInfo("succeed to write mnode file:%s, deloyed:%d", realfile, pOption->deploy); + +_OVER: + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); + if (pFile != NULL) taosCloseFile(&pFile); + + if (code != 0) { + dError("failed to write mnode file:%s since %s, deloyed:%d", realfile, terrstr(), pOption->deploy); } - - dDebug("succeed to write %s, deployed:%d", realfile, pOption->deploy); - return 0; + return code; } From 675f0057cfed84e296e5e98f4a2e57df0d63f38b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:24:24 +0800 Subject: [PATCH 56/89] fix: handle error while write dmEps.json --- source/dnode/mgmt/node_util/src/dmEps.c | 106 +++++++++++------------- 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index a7a63fbaca..0a3e158c7d 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "dmUtil.h" +#include "tjson.h" #include "tmisce.h" static void dmPrintEps(SDnodeData *pData); @@ -181,81 +182,72 @@ _OVER: return code; } -int32_t dmWriteEps(SDnodeData *pData) { - int32_t code = -1; - char *content = NULL; - TdFilePtr pFile = NULL; +static int32_t dmEncodeEps(SJson *pJson, SDnodeData *pData) { + if (tjsonAddDoubleToObject(pJson, "dnodeId", pData->dnodeId) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "dnodeVer", pData->dnodeVer) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "clusterId", pData->clusterId) < 0) return -1; + if (tjsonAddDoubleToObject(pJson, "dropped", pData->dropped) < 0) return -1; - char file[PATH_MAX] = {0}; - char realfile[PATH_MAX] = {0}; - snprintf(file, sizeof(file), "%s%sdnode%sdnode.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP); - snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP); - - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - dError("failed to open %s since %s", file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - goto _OVER; - } - - int32_t len = 0; - int32_t maxLen = 256 * 1024; - content = taosMemoryCalloc(1, maxLen + 1); - - len += snprintf(content + len, maxLen - len, "{\n"); - len += snprintf(content + len, maxLen - len, " \"dnodeId\": %d,\n", pData->dnodeId); - len += snprintf(content + len, maxLen - len, " \"dnodeVer\": \"%" PRId64 "\",\n", pData->dnodeVer); - len += snprintf(content + len, maxLen - len, " \"clusterId\": \"%" PRId64 "\",\n", pData->clusterId); - len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pData->dropped); - len += snprintf(content + len, maxLen - len, " \"dnodes\": [{\n"); + SJson *dnodes = tjsonCreateArray(); + if (dnodes == NULL) return -1; + if (tjsonAddItemToObject(pJson, "dnodes", dnodes) < 0) return -1; int32_t numOfEps = (int32_t)taosArrayGetSize(pData->dnodeEps); for (int32_t i = 0; i < numOfEps; ++i) { SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, i); - len += snprintf(content + len, maxLen - len, " \"id\": %d,\n", pDnodeEp->id); - len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\",\n", pDnodeEp->ep.fqdn); - len += snprintf(content + len, maxLen - len, " \"port\": %u,\n", pDnodeEp->ep.port); - len += snprintf(content + len, maxLen - len, " \"isMnode\": %d\n", pDnodeEp->isMnode); - if (i < numOfEps - 1) { - len += snprintf(content + len, maxLen - len, " },{\n"); - } else { - len += snprintf(content + len, maxLen - len, " }]\n"); - } - } - len += snprintf(content + len, maxLen - len, "}\n"); + SJson *dnode = tjsonCreateObject(); + if (dnode == NULL) return -1; - if (taosWriteFile(pFile, content, len) != len) { - dError("failed to write %s since %s", file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - goto _OVER; + if (tjsonAddDoubleToObject(dnode, "id", pDnodeEp->id) < 0) return -1; + if (tjsonAddStringToObject(dnode, "fqdn", pDnodeEp->ep.fqdn) < 0) return -1; + if (tjsonAddDoubleToObject(dnode, "port", pDnodeEp->ep.port) < 0) return -1; + if (tjsonAddDoubleToObject(dnode, "isMnode", pDnodeEp->isMnode) < 0) return -1; + if (tjsonAddItemToArray(dnodes, dnode) < 0) return -1; } - if (taosFsyncFile(pFile) < 0) { - dError("failed to fsync %s since %s", file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - goto _OVER; - } + return 0; +} +int32_t dmWriteEps(SDnodeData *pData) { + int32_t code = -1; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; + char file[PATH_MAX] = {0}; + char realfile[PATH_MAX] = {0}; + snprintf(file, sizeof(file), "%s%sdnode%sdnode.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP); + snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP); + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; + + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (dmEncodeEps(pJson, pData) != 0) goto _OVER; + + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; + + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); - taosMemoryFreeClear(content); - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to rename %s since %s", file, terrstr()); - goto _OVER; - } + if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; pData->updateTime = taosGetTimestampMs(); - dInfo("succeed to write %s, dnodeVer:%" PRId64, realfile, pData->dnodeVer); + dInfo("succeed to write dnode file:%s, dnodeVer:%" PRId64, realfile, pData->dnodeVer); _OVER: - if (content != NULL) taosMemoryFreeClear(content); + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); if (pFile != NULL) taosCloseFile(&pFile); - if (code != 0) { - dError("failed to write file %s since %s", realfile, terrstr()); - } + if (code != 0) { + dInfo("succeed to write dnode file:%s since %s, dnodeVer:%" PRId64, realfile, terrstr(), pData->dnodeVer); + } return code; } From 7607a9788fc56299043ab22bc6bc925bc7704e9d Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:39:56 +0800 Subject: [PATCH 57/89] fix: handle error while write json file --- source/dnode/mgmt/mgmt_mnode/src/mmFile.c | 11 ++-- source/dnode/mgmt/mgmt_vnode/src/vmFile.c | 11 ++-- source/dnode/mgmt/node_util/src/dmEps.c | 5 +- source/dnode/mgmt/node_util/src/dmFile.c | 64 +++++++++++------------ 4 files changed, 45 insertions(+), 46 deletions(-) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c index ff459353f4..dd05fe673a 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c @@ -166,22 +166,22 @@ int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption) { snprintf(file, sizeof(file), "%s%smnode.json.bak", path, TD_DIRSEP); snprintf(realfile, sizeof(realfile), "%s%smnode.json", path, TD_DIRSEP); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) goto _OVER; - terrno = TSDB_CODE_OUT_OF_MEMORY; pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; if (mmEncodeOption(pJson, pOption) != 0) goto _OVER; - buffer = tjsonToString(pJson); if (buffer == NULL) goto _OVER; + terrno = 0; + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; - taosCloseFile(&pFile); + taosCloseFile(&pFile); if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; @@ -193,6 +193,7 @@ _OVER: if (pFile != NULL) taosCloseFile(&pFile); if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); dError("failed to write mnode file:%s since %s, deloyed:%d", realfile, terrstr(), pOption->deploy); } return code; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index 623e6d37e2..8337fb5d10 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -176,9 +176,6 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { snprintf(file, sizeof(file), "%s%svnodes.json.bak", pMgmt->path, TD_DIRSEP); snprintf(realfile, sizeof(realfile), "%s%svnodes.json", pMgmt->path, TD_DIRSEP); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) goto _OVER; - int32_t numOfVnodes = 0; ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); if (ppVnodes == NULL) goto _OVER; @@ -187,15 +184,18 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; if (vmEncodeVnodeList(pJson, ppVnodes, numOfVnodes) != 0) goto _OVER; - buffer = tjsonToString(pJson); if (buffer == NULL) goto _OVER; + terrno = 0; + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; - taosCloseFile(&pFile); + taosCloseFile(&pFile); if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; @@ -216,6 +216,7 @@ _OVER: } if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); dError("failed to write vnodes file:%s since %s, vnodes:%d", realfile, terrstr(), numOfVnodes); } return code; diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 0a3e158c7d..6882d25d32 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -225,15 +225,15 @@ int32_t dmWriteEps(SDnodeData *pData) { pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; if (dmEncodeEps(pJson, pData) != 0) goto _OVER; - buffer = tjsonToString(pJson); if (buffer == NULL) goto _OVER; + terrno = 0; int32_t len = strlen(buffer); if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; - taosCloseFile(&pFile); + taosCloseFile(&pFile); if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; @@ -246,6 +246,7 @@ _OVER: if (pFile != NULL) taosCloseFile(&pFile); if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); dInfo("succeed to write dnode file:%s since %s, dnodeVer:%" PRId64, realfile, terrstr(), pData->dnodeVer); } return code; diff --git a/source/dnode/mgmt/node_util/src/dmFile.c b/source/dnode/mgmt/node_util/src/dmFile.c index 2eb1462efc..4dcc962a20 100644 --- a/source/dnode/mgmt/node_util/src/dmFile.c +++ b/source/dnode/mgmt/node_util/src/dmFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "dmUtil.h" +#include "tjson.h" #define MAXLEN 1024 @@ -63,56 +64,51 @@ _OVER: return code; } +static int32_t dmEncodeFile(SJson *pJson, bool deployed) { + if (tjsonAddDoubleToObject(pJson, "deployed", deployed) < 0) return -1; + return 0; +} + int32_t dmWriteFile(const char *path, const char *name, bool deployed) { int32_t code = -1; - int32_t len = 0; - char content[MAXLEN + 1] = {0}; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; char file[PATH_MAX] = {0}; char realfile[PATH_MAX] = {0}; - TdFilePtr pFile = NULL; - snprintf(file, sizeof(file), "%s%s%s.json", path, TD_DIRSEP, name); snprintf(realfile, sizeof(realfile), "%s%s%s.json", path, TD_DIRSEP, name); + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (dmEncodeFile(pJson, deployed) != 0) goto _OVER; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; + terrno = 0; + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write %s since %s", file, terrstr()); - goto _OVER; - } + if (pFile == NULL) goto _OVER; - len += snprintf(content + len, MAXLEN - len, "{\n"); - len += snprintf(content + len, MAXLEN - len, " \"deployed\": %d\n", deployed); - len += snprintf(content + len, MAXLEN - len, "}\n"); - - if (taosWriteFile(pFile, content, len) != len) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write file:%s since %s", file, terrstr()); - goto _OVER; - } - - if (taosFsyncFile(pFile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to fsync file:%s since %s", file, terrstr()); - goto _OVER; - } + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); + if (taosRenameFile(file, realfile) != 0) goto _OVER; - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to rename %s since %s", file, terrstr()); - return -1; - } - - dInfo("succeed to write %s, deployed:%d", realfile, deployed); code = 0; + dInfo("succeed to write file:%s, deloyed:%d", realfile, deployed); _OVER: - if (pFile != NULL) { - taosCloseFile(&pFile); - } + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); + if (pFile != NULL) taosCloseFile(&pFile); + if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to write file:%s since %s, deloyed:%d", realfile, terrstr(), deployed); + } return code; } From e2ac984f610262602467f0ee763b09d65e044f2b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:40:09 +0800 Subject: [PATCH 58/89] fix: close file on sdbStopWrite --- source/dnode/mnode/sdb/src/sdbFile.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index f43b6bdb25..19899e63bb 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -636,31 +636,31 @@ int32_t sdbStartWrite(SSdb *pSdb, SSdbIter **ppIter) { } int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, int64_t term, int64_t config) { - int32_t code = 0; + int32_t code = -1; if (!isApply) { mInfo("sdbiter:%p, not apply to sdb", pIter); - sdbCloseIter(pIter); - return 0; + code = 0; + goto _OVER; } - taosFsyncFile(pIter->file); - taosCloseFile(&pIter->file); - pIter->file = NULL; + if (taosFsyncFile(pIter->file) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + mError("sdbiter:%p, failed to fasync file %s since %s", pIter, pIter->name, terrstr()); + goto _OVER; + } char datafile[PATH_MAX] = {0}; snprintf(datafile, sizeof(datafile), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP); if (taosRenameFile(pIter->name, datafile) != 0) { terrno = TAOS_SYSTEM_ERROR(errno); mError("sdbiter:%p, failed to rename file %s to %s since %s", pIter, pIter->name, datafile, terrstr()); - sdbCloseIter(pIter); - return -1; + goto _OVER; } if (sdbReadFile(pSdb) != 0) { mError("sdbiter:%p, failed to read from %s since %s", pIter, datafile, terrstr()); - sdbCloseIter(pIter); - return -1; + goto _OVER; } if (config > 0) { @@ -675,7 +675,13 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i mInfo("sdbiter:%p, success applyed to sdb", pIter); sdbCloseIter(pIter); - return 0; + code = 0; + +_OVER: + taosCloseFile(&pIter->file); + pIter->file = NULL; + sdbCloseIter(pIter); + return code; } int32_t sdbDoWrite(SSdb *pSdb, SSdbIter *pIter, void *pBuf, int32_t len) { From 71be00a8115a14b84c1b6fb150819849d446f9dd Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:42:11 +0800 Subject: [PATCH 59/89] fix: minor changes --- source/libs/sync/src/syncRaftCfg.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index b4bacd2451..806949c81e 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -71,22 +71,22 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { char file[PATH_MAX] = {0}; snprintf(file, sizeof(file), "%s.bak", realfile); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) goto _OVER; - terrno = TSDB_CODE_OUT_OF_MEMORY; pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; if (tjsonAddObject(pJson, "RaftCfg", syncEncodeRaftCfg, pCfg) < 0) goto _OVER; - buffer = tjsonToString(pJson); if (buffer == NULL) goto _OVER; + terrno = 0; + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; - taosCloseFile(&pFile); + taosCloseFile(&pFile); if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; @@ -98,6 +98,7 @@ _OVER: if (pFile != NULL) taosCloseFile(&pFile); if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); sError("vgId:%d, failed to write sync cfg file:%s since %s", pNode->vgId, realfile, terrstr()); } return code; From 4a90f84e26744a41bdb917e5f650bf08b82cc1a7 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:50:27 +0800 Subject: [PATCH 60/89] fix: minor changes --- source/dnode/mgmt/node_util/src/dmEps.c | 6 +++--- source/dnode/mnode/sdb/src/sdbFile.c | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 6882d25d32..3e2d8b53aa 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -218,9 +218,6 @@ int32_t dmWriteEps(SDnodeData *pData) { snprintf(file, sizeof(file), "%s%sdnode%sdnode.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP); snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) goto _OVER; - terrno = TSDB_CODE_OUT_OF_MEMORY; pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; @@ -229,6 +226,9 @@ int32_t dmWriteEps(SDnodeData *pData) { if (buffer == NULL) goto _OVER; terrno = 0; + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; + int32_t len = strlen(buffer); if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 19899e63bb..8caad74ce0 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -674,7 +674,6 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i } mInfo("sdbiter:%p, success applyed to sdb", pIter); - sdbCloseIter(pIter); code = 0; _OVER: From f5a6108ca0b6c5c0cfbbcd5ae6c7311b5879d4f3 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:55:16 +0800 Subject: [PATCH 61/89] fix: minor changes --- source/dnode/mnode/sdb/src/sdbFile.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 8caad74ce0..339701019c 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -650,6 +650,9 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i goto _OVER; } + taosCloseFile(&pIter->file); + pIter->file = NULL; + char datafile[PATH_MAX] = {0}; snprintf(datafile, sizeof(datafile), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP); if (taosRenameFile(pIter->name, datafile) != 0) { @@ -677,8 +680,10 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i code = 0; _OVER: - taosCloseFile(&pIter->file); - pIter->file = NULL; + if (pIter->file != NULL) { + taosCloseFile(&pIter->file); + pIter->file = NULL; + } sdbCloseIter(pIter); return code; } From ef6c273c11d9f91536b730e6b2040bbaba5f8ea0 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 6 Jan 2023 15:56:27 +0800 Subject: [PATCH 62/89] fix: minor changes --- source/dnode/mnode/sdb/src/sdbFile.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 339701019c..2a63e3faf3 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -680,10 +680,6 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i code = 0; _OVER: - if (pIter->file != NULL) { - taosCloseFile(&pIter->file); - pIter->file = NULL; - } sdbCloseIter(pIter); return code; } From 610b5631aaef5484c344de74136234c5aba0e28f Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Fri, 6 Jan 2023 18:00:01 +0800 Subject: [PATCH 63/89] fix(vnd/snap): keep vnode stats transfered from leader --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index cc22668b29..e75dc24329 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -406,8 +406,10 @@ static int32_t vnodeSnapWriteInfo(SVSnapWriter *pWriter, uint8_t *pData, uint32_ snprintf(dir, TSDB_FILENAME_LEN, "%s", pWriter->pVnode->path); } - SVnode *pVnode = pWriter->pVnode; + SVnodeStats vndStats = pWriter->info.config.vndStats; + SVnode *pVnode = pWriter->pVnode; pWriter->info.config = pVnode->config; + pWriter->info.config.vndStats = vndStats; vDebug("vgId:%d, save config while write snapshot", pWriter->pVnode->config.vgId); if (vnodeSaveInfo(dir, &pWriter->info) < 0) { code = terrno; From 66e62bb010281357f6c58dc2431697fd53fa6943 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 6 Jan 2023 19:19:31 +0800 Subject: [PATCH 64/89] fix: tsdb read invalid memory read issue --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 55703002b8..f30308845b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -775,7 +775,16 @@ _exit: return code; } -void tRowMergerClear(SRowMerger *pMerger) { taosArrayDestroy(pMerger->pArray); } +void tRowMergerClear(SRowMerger *pMerger) { + for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { + SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + if (IS_VAR_DATA_TYPE(pTColVal->type)) { + tFree(pTColVal->value.pData); + } + } + + taosArrayDestroy(pMerger->pArray); +} int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { int32_t code = 0; @@ -789,7 +798,17 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { if (key.version > pMerger->version) { if (!COL_VAL_IS_NONE(pColVal)) { - taosArraySet(pMerger->pArray, iCol, pColVal); + if (IS_VAR_DATA_TYPE(pColVal->type)) { + SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + + code = tRealloc(pTColVal->value.pData, pColVal->value.nData); + if (code) goto _exit; + + pTColVal->value.nData = pColVal->value.nData; + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal.value.nData); + } else { + taosArraySet(pMerger->pArray, iCol, pColVal); + } } } else if (key.version < pMerger->version) { SColVal *tColVal = (SColVal *)taosArrayGet(pMerger->pArray, iCol); From d70e32e7d36b14a5f7497d33ef310a68580bdef9 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Sat, 7 Jan 2023 14:56:43 +0800 Subject: [PATCH 65/89] fix: compile issue --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index f30308845b..112fbb61c6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -801,11 +801,11 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { if (IS_VAR_DATA_TYPE(pColVal->type)) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); - code = tRealloc(pTColVal->value.pData, pColVal->value.nData); + code = tRealloc(&pTColVal->value.pData, pColVal->value.nData); if (code) goto _exit; pTColVal->value.nData = pColVal->value.nData; - memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal.value.nData); + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); } else { taosArraySet(pMerger->pArray, iCol, pColVal); } From 45cfcdc04753c11fc8a3736cc2658d564f6ea2ee Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Sun, 8 Jan 2023 21:26:05 +0800 Subject: [PATCH 66/89] fix: taosbenchmark ctrl-c handle for main (#19433) --- cmake/taostools_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index 89d1066a43..599b508c93 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 4efbc10 + GIT_TAG 94d6895 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From 90830e54104512882bbf5c9e9c88645e15b00777 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 9 Jan 2023 09:08:28 +0800 Subject: [PATCH 67/89] fix: use pInfo->info.state.committed instead of the current one for async vnodeCommit --- source/dnode/vnode/src/vnd/vnodeCommit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 6c54c3cb5c..9a69299d9d 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -294,7 +294,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { SVnode *pVnode = pInfo->pVnode; vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), - pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); + pInfo->info.state.commitID, pInfo->info.state.committed, pVnode->state.commitTerm); // persist wal before starting if (walPersist(pVnode->pWal) < 0) { @@ -308,8 +308,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path); } - // walBeginSnapshot(pVnode->pWal, pVnode->state.applied); - syncBeginSnapshot(pVnode->sync, pVnode->state.applied); + syncBeginSnapshot(pVnode->sync, pInfo->info.state.committed); // commit each sub-system code = tsdbCommit(pVnode->pTsdb, pInfo); @@ -351,7 +350,6 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { return -1; } - // walEndSnapshot(pVnode->pWal); syncEndSnapshot(pVnode->sync); _exit: From d4d329ecce0250a4002b0b8b504eaed5d7219e34 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 10:18:12 +0800 Subject: [PATCH 68/89] fix: rename global variables --- source/dnode/mgmt/node_mgmt/src/dmEnv.c | 6 +++--- source/libs/sync/inc/syncRaftStore.h | 2 -- source/libs/sync/src/syncRaftStore.c | 2 ++ 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index 1d0236c0c5..acf96ad397 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -16,9 +16,9 @@ #define _DEFAULT_SOURCE #include "dmMgmt.h" -static SDnode global = {0}; +static SDnode globalDnode = {0}; -SDnode *dmInstance() { return &global; } +SDnode *dmInstance() { return &globalDnode; } static int32_t dmCheckRepeatInit(SDnode *pDnode) { if (atomic_val_compare_exchange_8(&pDnode->once, DND_ENV_INIT, DND_ENV_READY) != DND_ENV_INIT) { @@ -270,6 +270,6 @@ void dmReportStartup(const char *pName, const char *pDesc) { } int64_t dmGetClusterId() { - return global.data.clusterId; + return globalDnode.data.clusterId; } diff --git a/source/libs/sync/inc/syncRaftStore.h b/source/libs/sync/inc/syncRaftStore.h index bb6405f6b2..28faf8ea6d 100644 --- a/source/libs/sync/inc/syncRaftStore.h +++ b/source/libs/sync/inc/syncRaftStore.h @@ -37,8 +37,6 @@ typedef struct SRaftStore { SRaftStore *raftStoreOpen(const char *path); int32_t raftStoreClose(SRaftStore *pRaftStore); int32_t raftStorePersist(SRaftStore *pRaftStore); -int32_t raftStoreSerialize(SRaftStore *pRaftStore, char *buf, size_t len); -int32_t raftStoreDeserialize(SRaftStore *pRaftStore, char *buf, size_t len); bool raftStoreHasVoted(SRaftStore *pRaftStore); void raftStoreVote(SRaftStore *pRaftStore, SRaftId *pRaftId); diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index b19cda2a44..8ef3ceeae7 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -20,6 +20,8 @@ // private function static int32_t raftStoreInit(SRaftStore *pRaftStore); static bool raftStoreFileExist(char *path); +static int32_t raftStoreSerialize(SRaftStore *pRaftStore, char *buf, size_t len); +static int32_t raftStoreDeserialize(SRaftStore *pRaftStore, char *buf, size_t len); // public function SRaftStore *raftStoreOpen(const char *path) { From 80586ad997994924f77810c79850d39d14171d41 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 9 Jan 2023 11:33:26 +0800 Subject: [PATCH 69/89] fix: invalid free issue --- source/dnode/vnode/src/inc/tsdb.h | 1 + source/dnode/vnode/src/tsdb/tsdbUtil.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 5a2e462c8c..77a3bb7a2f 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -573,6 +573,7 @@ struct STSDBRowIter { struct SRowMerger { STSchema *pTSchema; int64_t version; + bool merged; SArray *pArray; // SArray }; diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 112fbb61c6..a9c31c19cb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -776,10 +776,12 @@ _exit: } void tRowMergerClear(SRowMerger *pMerger) { - for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { - SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); - if (IS_VAR_DATA_TYPE(pTColVal->type)) { - tFree(pTColVal->value.pData); + if (pMerger->merged) { + for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { + SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + if (IS_VAR_DATA_TYPE(pTColVal->type)) { + tFree(pTColVal->value.pData); + } } } @@ -801,6 +803,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { if (IS_VAR_DATA_TYPE(pColVal->type)) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + pTColVal->value.pData = NULL; code = tRealloc(&pTColVal->value.pData, pColVal->value.nData); if (code) goto _exit; @@ -821,6 +824,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { } pMerger->version = key.version; + pMerger->merged = true; _exit: return code; From 902ab5b12f9238ee1ea03d49654b73adc816148a Mon Sep 17 00:00:00 2001 From: Xuefeng Tan <1172915550@qq.com> Date: Mon, 9 Jan 2023 11:36:36 +0800 Subject: [PATCH 70/89] fix: get vgid in batch (#19437) --- cmake/taosadapter_CMakeLists.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/taosadapter_CMakeLists.txt.in b/cmake/taosadapter_CMakeLists.txt.in index 3e2e879e38..ab1609f35f 100644 --- a/cmake/taosadapter_CMakeLists.txt.in +++ b/cmake/taosadapter_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taosadapter ExternalProject_Add(taosadapter GIT_REPOSITORY https://github.com/taosdata/taosadapter.git - GIT_TAG a2e9920 + GIT_TAG 69eee2e SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter" BINARY_DIR "" #BUILD_IN_SOURCE TRUE From 060fc941b55a013c9a0de2ff011c5b57c88a006b Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 9 Jan 2023 11:39:14 +0800 Subject: [PATCH 71/89] fix:add config dir for libtaos in sml_test --- tests/system-test/2-query/sml.py | 7 ++++++- utils/test/c/sml_test.c | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/system-test/2-query/sml.py b/tests/system-test/2-query/sml.py index b764edebd7..78b633cf94 100644 --- a/tests/system-test/2-query/sml.py +++ b/tests/system-test/2-query/sml.py @@ -15,6 +15,9 @@ sys.path.append("./7-tmq") from tmqCommon import * class TDTestCase: + updatecfgDict = {'clientCfg': {'smlChildTableName': 'dataModelName', 'fqdn': 'localhost'}, 'fqdn': 'localhost'} + print("===================: ", updatecfgDict) + def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") @@ -22,8 +25,10 @@ class TDTestCase: #tdSql.init(conn.cursor(), logSql) # output sql.txt file def checkFileContent(self, dbname="sml_db"): + simClientCfg="%s/taos.cfg"%tdDnodes.getSimCfgPath() buildPath = tdCom.getBuildPath() - cmdStr = '%s/build/bin/sml_test'%(buildPath) + cmdStr = '%s/build/bin/sml_test %s'%(buildPath, simClientCfg) + print("cmdStr:", cmdStr) tdLog.info(cmdStr) ret = os.system(cmdStr) if ret != 0: diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index 315aabab3c..c6073541fd 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1190,6 +1190,10 @@ int sml_ts2385_Test() { } int main(int argc, char *argv[]) { + if(argc == 2){ + taos_options(TSDB_OPTION_CONFIGDIR, argv[1]); + } + int ret = 0; ret = sml_ts2385_Test(); ASSERT(!ret); From 2aeda3a94171340ab5b8fc3ca0b58e6db59b0813 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 12:01:36 +0800 Subject: [PATCH 72/89] enh: refact raft store file --- source/libs/sync/inc/syncInt.h | 17 +- source/libs/sync/inc/syncRaftStore.h | 27 +- source/libs/sync/src/syncAppendEntries.c | 12 +- source/libs/sync/src/syncAppendEntriesReply.c | 12 +- source/libs/sync/src/syncCommit.c | 4 +- source/libs/sync/src/syncElection.c | 10 +- source/libs/sync/src/syncMain.c | 91 +++---- source/libs/sync/src/syncMessage.c | 2 +- source/libs/sync/src/syncPipeline.c | 46 ++-- source/libs/sync/src/syncRaftStore.c | 253 +++++++++--------- source/libs/sync/src/syncReplication.c | 4 +- source/libs/sync/src/syncRequestVote.c | 34 +-- source/libs/sync/src/syncRequestVoteReply.c | 10 +- source/libs/sync/src/syncRespMgr.c | 2 +- source/libs/sync/src/syncSnapshot.c | 40 +-- source/libs/sync/src/syncUtil.c | 12 +- .../test/sync_test_lib/src/syncMainDebug.c | 4 +- .../sync_test_lib/src/syncSnapshotDebug.c | 2 +- 18 files changed, 283 insertions(+), 299 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 6793430923..7e08e195c1 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -32,11 +32,9 @@ typedef struct SyncRequestVoteReply SyncRequestVoteReply; typedef struct SyncAppendEntries SyncAppendEntries; typedef struct SyncAppendEntriesReply SyncAppendEntriesReply; typedef struct SSyncEnv SSyncEnv; -typedef struct SRaftStore SRaftStore; typedef struct SVotesGranted SVotesGranted; typedef struct SVotesRespond SVotesRespond; typedef struct SSyncIndexMgr SSyncIndexMgr; -typedef struct SRaftCfg SRaftCfg; typedef struct SSyncRespMgr SSyncRespMgr; typedef struct SSyncSnapshotSender SSyncSnapshotSender; typedef struct SSyncSnapshotReceiver SSyncSnapshotReceiver; @@ -70,6 +68,11 @@ typedef struct SRaftId { SyncGroupId vgId; } SRaftId; +typedef struct SRaftStore { + SyncTerm currentTerm; + SRaftId voteFor; +} SRaftStore; + typedef struct SSyncHbTimerData { int64_t syncNodeRid; SSyncTimer* pTimer; @@ -112,8 +115,8 @@ typedef struct SSyncNode { // sync io SSyncLogBuffer* pLogBuf; - SWal* pWal; - const SMsgCb* msgcb; + SWal* pWal; + const SMsgCb* msgcb; int32_t (*syncSendMSg)(const SEpSet* pEpSet, SRpcMsg* pMsg); int32_t (*syncEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg); int32_t (*syncEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg); @@ -139,8 +142,8 @@ typedef struct SSyncNode { int64_t rid; // tla+ server vars - ESyncState state; - SRaftStore* pRaftStore; + ESyncState state; + SRaftStore raftStore; // tla+ candidate vars SVotesGranted* pVotesGranted; @@ -229,7 +232,7 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode); void syncNodeClose(SSyncNode* pSyncNode); void syncNodePreClose(SSyncNode* pSyncNode); void syncNodePostClose(SSyncNode* pSyncNode); -int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_t *seq); +int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_t* seq); int32_t syncNodeRestore(SSyncNode* pSyncNode); void syncHbTimerDataFree(SSyncHbTimerData* pData); diff --git a/source/libs/sync/inc/syncRaftStore.h b/source/libs/sync/inc/syncRaftStore.h index bb6405f6b2..21a8fc64a8 100644 --- a/source/libs/sync/inc/syncRaftStore.h +++ b/source/libs/sync/inc/syncRaftStore.h @@ -24,27 +24,16 @@ extern "C" { #define RAFT_STORE_BLOCK_SIZE 512 #define RAFT_STORE_PATH_LEN (TSDB_FILENAME_LEN * 2) +#define EMPTY_RAFT_ID ((SRaftId){.addr = 0, .vgId = 0}) -#define EMPTY_RAFT_ID ((SRaftId){.addr = 0, .vgId = 0}) +int32_t raftStoreReadFile(SSyncNode *pNode); +int32_t raftStoreWriteFile(SSyncNode *pNode); -typedef struct SRaftStore { - SyncTerm currentTerm; - SRaftId voteFor; - TdFilePtr pFile; - char path[RAFT_STORE_PATH_LEN]; -} SRaftStore; - -SRaftStore *raftStoreOpen(const char *path); -int32_t raftStoreClose(SRaftStore *pRaftStore); -int32_t raftStorePersist(SRaftStore *pRaftStore); -int32_t raftStoreSerialize(SRaftStore *pRaftStore, char *buf, size_t len); -int32_t raftStoreDeserialize(SRaftStore *pRaftStore, char *buf, size_t len); - -bool raftStoreHasVoted(SRaftStore *pRaftStore); -void raftStoreVote(SRaftStore *pRaftStore, SRaftId *pRaftId); -void raftStoreClearVote(SRaftStore *pRaftStore); -void raftStoreNextTerm(SRaftStore *pRaftStore); -void raftStoreSetTerm(SRaftStore *pRaftStore, SyncTerm term); +bool raftStoreHasVoted(SSyncNode *pNode); +void raftStoreVote(SSyncNode *pNode, SRaftId *pRaftId); +void raftStoreClearVote(SSyncNode *pNode); +void raftStoreNextTerm(SSyncNode *pNode); +void raftStoreSetTerm(SSyncNode *pNode, SyncTerm term); #ifdef __cplusplus } diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 026ebdb37c..83d1777f44 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -159,17 +159,17 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { // prepare response msg pReply->srcId = ths->myRaftId; pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; + pReply->term = ths->raftStore.currentTerm; pReply->success = false; pReply->matchIndex = SYNC_INDEX_INVALID; pReply->lastSendIndex = pMsg->prevLogIndex + 1; pReply->startTime = ths->startTime; - if (pMsg->term < ths->pRaftStore->currentTerm) { + if (pMsg->term < ths->raftStore.currentTerm) { goto _SEND_RESPONSE; } - if (pMsg->term > ths->pRaftStore->currentTerm) { + if (pMsg->term > ths->raftStore.currentTerm) { pReply->term = pMsg->term; } @@ -253,19 +253,19 @@ int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncAppendEntriesReply* pReply = rpcRsp.pCont; pReply->srcId = ths->myRaftId; pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; + pReply->term = ths->raftStore.currentTerm; pReply->success = false; // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); pReply->matchIndex = SYNC_INDEX_INVALID; pReply->lastSendIndex = pMsg->prevLogIndex + 1; pReply->startTime = ths->startTime; - if (pMsg->term < ths->pRaftStore->currentTerm) { + if (pMsg->term < ths->raftStore.currentTerm) { syncLogRecvAppendEntries(ths, pMsg, "reject, small term"); goto _SEND_RESPONSE; } - if (pMsg->term > ths->pRaftStore->currentTerm) { + if (pMsg->term > ths->raftStore.currentTerm) { pReply->term = pMsg->term; } diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index b83be2bebb..8157a5a14f 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -50,19 +50,19 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } // drop stale response - if (pMsg->term < ths->pRaftStore->currentTerm) { + if (pMsg->term < ths->raftStore.currentTerm) { syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); return 0; } if (ths->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term > ths->pRaftStore->currentTerm) { + if (pMsg->term > ths->raftStore.currentTerm) { syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); syncNodeStepDown(ths, pMsg->term); return -1; } - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + ASSERT(pMsg->term == ths->raftStore.currentTerm); sTrace("vgId:%d, received append entries reply. srcId:0x%016" PRIx64 ", term:%" PRId64 ", matchIndex:%" PRId64 "", pMsg->vgId, pMsg->srcId.addr, pMsg->term, pMsg->matchIndex); @@ -100,19 +100,19 @@ int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* } // drop stale response - if (pMsg->term < ths->pRaftStore->currentTerm) { + if (pMsg->term < ths->raftStore.currentTerm) { syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); return 0; } if (ths->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term > ths->pRaftStore->currentTerm) { + if (pMsg->term > ths->raftStore.currentTerm) { syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); syncNodeStepDown(ths, pMsg->term); return -1; } - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + ASSERT(pMsg->term == ths->raftStore.currentTerm); if (pMsg->success) { SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 152fddb7e6..286cf4daf5 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -133,7 +133,7 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { } } // cannot commit, even if quorum agree. need check term! - if (pEntry->term <= pSyncNode->pRaftStore->currentTerm) { + if (pEntry->term <= pSyncNode->raftStore.currentTerm) { // update commit index newCommitIndex = index; @@ -329,7 +329,7 @@ int64_t syncNodeCheckCommitIndex(SSyncNode* ths, SyncIndex indexLikely) { SyncIndex commitIndex = indexLikely; syncNodeUpdateCommitIndex(ths, commitIndex); sTrace("vgId:%d, agreed upon. role:%d, term:%" PRId64 ", index: %" PRId64 "", ths->vgId, ths->state, - ths->pRaftStore->currentTerm, commitIndex); + ths->raftStore.currentTerm, commitIndex); } return ths->commitIndex; } diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index bcc95c5f10..cd3ffc33e3 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -48,7 +48,7 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { SyncRequestVote* pMsg = rpcMsg.pCont; pMsg->srcId = pNode->myRaftId; pMsg->destId = pNode->peersId[i]; - pMsg->term = pNode->pRaftStore->currentTerm; + pMsg->term = pNode->raftStore.currentTerm; ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm); ASSERT(ret == 0); @@ -75,10 +75,10 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) { } // start election - raftStoreNextTerm(pSyncNode->pRaftStore); - raftStoreClearVote(pSyncNode->pRaftStore); - voteGrantedReset(pSyncNode->pVotesGranted, pSyncNode->pRaftStore->currentTerm); - votesRespondReset(pSyncNode->pVotesRespond, pSyncNode->pRaftStore->currentTerm); + raftStoreNextTerm(pSyncNode); + raftStoreClearVote(pSyncNode); + voteGrantedReset(pSyncNode->pVotesGranted, pSyncNode->raftStore.currentTerm); + votesRespondReset(pSyncNode->pVotesRespond, pSyncNode->raftStore.currentTerm); syncNodeVoteForSelf(pSyncNode); if (voteGrantedMajority(pSyncNode->pVotesGranted)) { diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index c2bf6dc837..a339cb9857 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -468,7 +468,7 @@ bool syncNodeIsReadyForRead(SSyncNode* pSyncNode) { } if (code == 0 && pEntry != NULL) { - if (pEntry->originalRpcType == TDMT_SYNC_NOOP && pEntry->term == pSyncNode->pRaftStore->currentTerm) { + if (pEntry->originalRpcType == TDMT_SYNC_NOOP && pEntry->term == pSyncNode->raftStore.currentTerm) { ready = true; } @@ -736,7 +736,7 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_ int32_t code = syncNodeOnClientRequest(pSyncNode, pMsg, &retIndex); if (code == 0) { pMsg->info.conn.applyIndex = retIndex; - pMsg->info.conn.applyTerm = pSyncNode->pRaftStore->currentTerm; + pMsg->info.conn.applyTerm = pSyncNode->raftStore.currentTerm; sTrace("vgId:%d, propose optimized msg, index:%" PRId64 " type:%s", pSyncNode->vgId, retIndex, TMSG_INFO(pMsg->msgType)); return 1; @@ -983,8 +983,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { // init TLA+ server vars pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER; - pSyncNode->pRaftStore = raftStoreOpen(pSyncNode->raftStorePath); - if (pSyncNode->pRaftStore == NULL) { + if (raftStoreReadFile(pSyncNode) != 0) { sError("vgId:%d, failed to open raft store at path %s", pSyncNode->vgId, pSyncNode->raftStorePath); goto _error; } @@ -1184,7 +1183,7 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) { int32_t syncNodeStart(SSyncNode* pSyncNode) { // start raft if (pSyncNode->replicaNum == 1) { - raftStoreNextTerm(pSyncNode->pRaftStore); + raftStoreNextTerm(pSyncNode); syncNodeBecomeLeader(pSyncNode, "one replica start"); // Raft 3.6.2 Committing entries from previous terms @@ -1202,7 +1201,7 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) { void syncNodeStartOld(SSyncNode* pSyncNode) { // start raft if (pSyncNode->replicaNum == 1) { - raftStoreNextTerm(pSyncNode->pRaftStore); + raftStoreNextTerm(pSyncNode); syncNodeBecomeLeader(pSyncNode, "one replica start"); // Raft 3.6.2 Committing entries from previous terms @@ -1288,10 +1287,6 @@ void syncNodeClose(SSyncNode* pSyncNode) { if (pSyncNode == NULL) return; sNInfo(pSyncNode, "sync close, node:%p", pSyncNode); - int32_t ret = raftStoreClose(pSyncNode->pRaftStore); - ASSERT(ret == 0); - pSyncNode->pRaftStore = NULL; - syncNodeLogReplMgrDestroy(pSyncNode); syncRespMgrDestroy(pSyncNode->pSyncRespMgr); pSyncNode->pSyncRespMgr = NULL; @@ -1714,39 +1709,39 @@ _END: // raft state change -------------- void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) { - if (term > pSyncNode->pRaftStore->currentTerm) { - raftStoreSetTerm(pSyncNode->pRaftStore, term); + if (term > pSyncNode->raftStore.currentTerm) { + raftStoreSetTerm(pSyncNode, term); char tmpBuf[64]; snprintf(tmpBuf, sizeof(tmpBuf), "update term to %" PRId64, term); syncNodeBecomeFollower(pSyncNode, tmpBuf); - raftStoreClearVote(pSyncNode->pRaftStore); + raftStoreClearVote(pSyncNode); } } void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term) { - if (term > pSyncNode->pRaftStore->currentTerm) { - raftStoreSetTerm(pSyncNode->pRaftStore, term); + if (term > pSyncNode->raftStore.currentTerm) { + raftStoreSetTerm(pSyncNode, term); } } void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm) { - if (pSyncNode->pRaftStore->currentTerm > newTerm) { + if (pSyncNode->raftStore.currentTerm > newTerm) { sNTrace(pSyncNode, "step down, ignore, new-term:%" PRId64 ", current-term:%" PRId64, newTerm, - pSyncNode->pRaftStore->currentTerm); + pSyncNode->raftStore.currentTerm); return; } do { sNTrace(pSyncNode, "step down, new-term:%" PRId64 ", current-term:%" PRId64, newTerm, - pSyncNode->pRaftStore->currentTerm); + pSyncNode->raftStore.currentTerm); } while (0); - if (pSyncNode->pRaftStore->currentTerm < newTerm) { - raftStoreSetTerm(pSyncNode->pRaftStore, newTerm); + if (pSyncNode->raftStore.currentTerm < newTerm) { + raftStoreSetTerm(pSyncNode, newTerm); char tmpBuf[64]; snprintf(tmpBuf, sizeof(tmpBuf), "step down, update term to %" PRId64, newTerm); syncNodeBecomeFollower(pSyncNode, tmpBuf); - raftStoreClearVote(pSyncNode->pRaftStore); + raftStoreClearVote(pSyncNode); } else { if (pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) { @@ -1904,7 +1899,7 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); ASSERT(lastIndex >= 0); sInfo("vgId:%d, become leader. term: %" PRId64 ", commit index: %" PRId64 ", last index: %" PRId64 "", - pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, lastIndex); + pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); } void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) { @@ -1937,7 +1932,7 @@ void syncNodeFollower2Candidate(SSyncNode* pSyncNode) { pSyncNode->state = TAOS_SYNC_STATE_CANDIDATE; SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); sInfo("vgId:%d, become candidate from follower. term: %" PRId64 ", commit index: %" PRId64 ", last index: %" PRId64, - pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, lastIndex); + pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); sNTrace(pSyncNode, "follower to candidate"); } @@ -1947,7 +1942,7 @@ void syncNodeLeader2Follower(SSyncNode* pSyncNode) { syncNodeBecomeFollower(pSyncNode, "leader to follower"); SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); sInfo("vgId:%d, become follower from leader. term: %" PRId64 ", commit index: %" PRId64 ", last index: %" PRId64, - pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, lastIndex); + pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); sNTrace(pSyncNode, "leader to follower"); } @@ -1957,7 +1952,7 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) { syncNodeBecomeFollower(pSyncNode, "candidate to follower"); SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); sInfo("vgId:%d, become follower from candidate. term: %" PRId64 ", commit index: %" PRId64 ", last index: %" PRId64, - pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, lastIndex); + pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); sNTrace(pSyncNode, "candidate to follower"); } @@ -1965,15 +1960,15 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) { // just called by syncNodeVoteForSelf // need assert void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) { - ASSERT(term == pSyncNode->pRaftStore->currentTerm); - ASSERT(!raftStoreHasVoted(pSyncNode->pRaftStore)); + ASSERT(term == pSyncNode->raftStore.currentTerm); + ASSERT(!raftStoreHasVoted(pSyncNode)); - raftStoreVote(pSyncNode->pRaftStore, pRaftId); + raftStoreVote(pSyncNode, pRaftId); } // simulate get vote from outside void syncNodeVoteForSelf(SSyncNode* pSyncNode) { - syncNodeVoteForTerm(pSyncNode, pSyncNode->pRaftStore->currentTerm, &pSyncNode->myRaftId); + syncNodeVoteForTerm(pSyncNode, pSyncNode->raftStore.currentTerm, &pSyncNode->myRaftId); SRpcMsg rpcMsg = {0}; int32_t ret = syncBuildRequestVoteReply(&rpcMsg, pSyncNode->vgId); @@ -1982,7 +1977,7 @@ void syncNodeVoteForSelf(SSyncNode* pSyncNode) { SyncRequestVoteReply* pMsg = rpcMsg.pCont; pMsg->srcId = pSyncNode->myRaftId; pMsg->destId = pSyncNode->myRaftId; - pMsg->term = pSyncNode->pRaftStore->currentTerm; + pMsg->term = pSyncNode->raftStore.currentTerm; pMsg->voteGranted = true; voteGrantedVote(pSyncNode->pVotesGranted, pMsg); @@ -2272,13 +2267,6 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) { return; } - if (pSyncNode->pRaftStore == NULL) { - syncNodeRelease(pSyncNode); - syncHbTimerDataRelease(pData); - sError("vgId:%d, hb timer raft store already stop", pSyncNode->vgId); - return; - } - // sTrace("vgId:%d, eq peer hb timer", pSyncNode->vgId); if (pSyncNode->replicaNum > 1) { @@ -2302,7 +2290,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) { SyncHeartbeat* pSyncMsg = rpcMsg.pCont; pSyncMsg->srcId = pSyncNode->myRaftId; pSyncMsg->destId = pData->destId; - pSyncMsg->term = pSyncNode->pRaftStore->currentTerm; + pSyncMsg->term = pSyncNode->raftStore.currentTerm; pSyncMsg->commitIndex = pSyncNode->commitIndex; pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode); pSyncMsg->privateTerm = 0; @@ -2348,7 +2336,7 @@ static int32_t syncNodeEqNoop(SSyncNode* pNode) { } SyncIndex index = pNode->pLogStore->syncLogWriteIndex(pNode->pLogStore); - SyncTerm term = pNode->pRaftStore->currentTerm; + SyncTerm term = pNode->raftStore.currentTerm; SSyncRaftEntry* pEntry = syncEntryBuildNoop(term, index, pNode->vgId); if (pEntry == NULL) return -1; @@ -2394,8 +2382,7 @@ int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) { if (syncLogBufferAppend(ths->pLogBuf, ths, pEntry) < 0) { sError("vgId:%d, failed to enqueue sync log buffer, index:%" PRId64, ths->vgId, pEntry->index); terrno = TSDB_CODE_SYN_BUFFER_FULL; - (void)syncLogFsmExecute(ths, ths->pFsm, ths->state, ths->pRaftStore->currentTerm, pEntry, - TSDB_CODE_SYN_BUFFER_FULL); + (void)syncLogFsmExecute(ths, ths->pFsm, ths->state, ths->raftStore.currentTerm, pEntry, TSDB_CODE_SYN_BUFFER_FULL); syncEntryDestroy(pEntry); return -1; } @@ -2468,7 +2455,7 @@ bool syncNodeSnapshotRecving(SSyncNode* pSyncNode) { static int32_t syncNodeAppendNoop(SSyncNode* ths) { SyncIndex index = syncLogBufferGetEndIndex(ths->pLogBuf); - SyncTerm term = ths->pRaftStore->currentTerm; + SyncTerm term = ths->raftStore.currentTerm; SSyncRaftEntry* pEntry = syncEntryBuildNoop(term, index, ths->vgId); if (pEntry == NULL) { @@ -2484,7 +2471,7 @@ static int32_t syncNodeAppendNoopOld(SSyncNode* ths) { int32_t ret = 0; SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); - SyncTerm term = ths->pRaftStore->currentTerm; + SyncTerm term = ths->raftStore.currentTerm; SSyncRaftEntry* pEntry = syncEntryBuildNoop(term, index, ths->vgId); ASSERT(pEntry != NULL); @@ -2526,12 +2513,12 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncHeartbeatReply* pMsgReply = rpcMsg.pCont; pMsgReply->destId = pMsg->srcId; pMsgReply->srcId = ths->myRaftId; - pMsgReply->term = ths->pRaftStore->currentTerm; + pMsgReply->term = ths->raftStore.currentTerm; pMsgReply->privateTerm = 8864; // magic number pMsgReply->startTime = ths->startTime; pMsgReply->timeStamp = tsMs; - if (pMsg->term == ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) { + if (pMsg->term == ths->raftStore.currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) { syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), tsMs); syncNodeResetElectTimer(ths); @@ -2560,7 +2547,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } } - if (pMsg->term >= ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) { + if (pMsg->term >= ths->raftStore.currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) { // syncNodeStepDown(ths, pMsg->term); SRpcMsg rpcMsgLocalCmd = {0}; (void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId); @@ -2687,7 +2674,7 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn int32_t code = 0; SyncIndex index = syncLogBufferGetEndIndex(ths->pLogBuf); - SyncTerm term = ths->pRaftStore->currentTerm; + SyncTerm term = ths->raftStore.currentTerm; SSyncRaftEntry* pEntry = NULL; if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index); @@ -2721,7 +2708,7 @@ int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRe int32_t code = 0; SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); - SyncTerm term = ths->pRaftStore->currentTerm; + SyncTerm term = ths->raftStore.currentTerm; SSyncRaftEntry* pEntry; if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { @@ -2755,7 +2742,7 @@ int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRe .state = ths->state, .seqNum = pEntry->seqNum, .term = pEntry->term, - .currentTerm = ths->pRaftStore->currentTerm, + .currentTerm = ths->raftStore.currentTerm, .flag = 0, }; ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta); @@ -2833,7 +2820,7 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p return 0; } - if (pEntry->term < ths->pRaftStore->currentTerm) { + if (pEntry->term < ths->raftStore.currentTerm) { sNTrace(ths, "little term:%" PRId64 ", can not do leader transfer", pEntry->term); return 0; } @@ -2871,7 +2858,7 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p if (ths->pFsm->FpLeaderTransferCb != NULL) { SFsmCbMeta cbMeta = { .code = 0, - .currentTerm = ths->pRaftStore->currentTerm, + .currentTerm = ths->raftStore.currentTerm, .flag = 0, .index = pEntry->index, .lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index), @@ -2987,7 +2974,7 @@ int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endInde .state = ths->state, .seqNum = pEntry->seqNum, .term = pEntry->term, - .currentTerm = ths->pRaftStore->currentTerm, + .currentTerm = ths->raftStore.currentTerm, .flag = flag, }; diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 467b4e2219..29f327c35c 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -176,7 +176,7 @@ int32_t syncBuildAppendEntriesFromRaftLog(SSyncNode* pNode, SSyncRaftEntry* pEnt pMsg->prevLogTerm = prevLogTerm; pMsg->vgId = pNode->vgId; pMsg->srcId = pNode->myRaftId; - pMsg->term = pNode->pRaftStore->currentTerm; + pMsg->term = pNode->raftStore.currentTerm; pMsg->commitIndex = pNode->commitIndex; pMsg->privateTerm = 0; return 0; diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index f878044bca..b1f955b8df 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -285,9 +285,9 @@ SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm) { taosThreadMutexLock(&pBuf->mutex); syncLogBufferValidate(pBuf); - int32_t ret = -1; - SyncIndex index = pEntry->index; - SyncIndex prevIndex = pEntry->index - 1; + int32_t ret = -1; + SyncIndex index = pEntry->index; + SyncIndex prevIndex = pEntry->index - 1; SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTermWithoutLock(pBuf); SSyncRaftEntry* pExist = NULL; bool inBuf = true; @@ -509,7 +509,7 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm SSyncLogStore* pLogStore = pNode->pLogStore; SSyncFSM* pFsm = pNode->pFsm; ESyncState role = pNode->state; - SyncTerm term = pNode->pRaftStore->currentTerm; + SyncTerm term = pNode->raftStore.currentTerm; SyncGroupId vgId = pNode->vgId; int32_t ret = -1; int64_t upperIndex = TMIN(commitIndex, pBuf->matchIndex); @@ -571,7 +571,7 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm _out: // mark as restored if needed if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex && pEntry != NULL && - pNode->pRaftStore->currentTerm <= pEntry->term) { + pNode->raftStore.currentTerm <= pEntry->term) { pNode->pFsm->FpRestoreFinishCb(pNode->pFsm); pNode->restoreFinish = true; sInfo("vgId:%d, restore finished. log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, @@ -614,9 +614,9 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { return -1; } - int32_t ret = -1; - bool retried = false; - int64_t retryWaitMs = syncLogGetRetryBackoffTimeMs(pMgr); + int32_t ret = -1; + bool retried = false; + int64_t retryWaitMs = syncLogGetRetryBackoffTimeMs(pMgr); int64_t nowMs = taosGetMonoTimestampMs(); int count = 0; int64_t firstIndex = -1; @@ -807,9 +807,9 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode } (void)syncLogReplMgrReset(pMgr); - SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; - bool barrier = false; - SyncTerm term = -1; + SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; + bool barrier = false; + SyncTerm term = -1; if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate log entry since %s. index: %" PRId64 ", dest: 0x%016" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); @@ -836,11 +836,11 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { ASSERT(pMgr->restored); - SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; - int32_t batchSize = TMAX(1, pMgr->size >> (4 + pMgr->retryBackoff)); - int32_t count = 0; - int64_t nowMs = taosGetMonoTimestampMs(); - int64_t limit = pMgr->size >> 1; + SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; + int32_t batchSize = TMAX(1, pMgr->size >> (4 + pMgr->retryBackoff)); + int32_t count = 0; + int64_t nowMs = taosGetMonoTimestampMs(); + int64_t limit = pMgr->size >> 1; SyncTerm term = -1; SyncIndex firstIndex = -1; @@ -891,13 +891,13 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int32_t syncLogReplMgrProcessReplyInNormalMode(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { ASSERT(pMgr->restored == true); if (pMgr->startIndex <= pMsg->lastSendIndex && pMsg->lastSendIndex < pMgr->endIndex) { - if (pMgr->startIndex < pMgr->matchIndex && pMgr->retryBackoff > 0) { - int64_t firstSentMs = pMgr->states[pMgr->startIndex % pMgr->size].timeMs; - int64_t lastSentMs = pMgr->states[(pMgr->endIndex - 1) % pMgr->size].timeMs; - int64_t timeDiffMs = lastSentMs - firstSentMs; - if (timeDiffMs > 0 && timeDiffMs < (SYNC_LOG_REPL_RETRY_WAIT_MS << (pMgr->retryBackoff - 1))) { - pMgr->retryBackoff -= 1; - } + if (pMgr->startIndex < pMgr->matchIndex && pMgr->retryBackoff > 0) { + int64_t firstSentMs = pMgr->states[pMgr->startIndex % pMgr->size].timeMs; + int64_t lastSentMs = pMgr->states[(pMgr->endIndex - 1) % pMgr->size].timeMs; + int64_t timeDiffMs = lastSentMs - firstSentMs; + if (timeDiffMs > 0 && timeDiffMs < (SYNC_LOG_REPL_RETRY_WAIT_MS << (pMgr->retryBackoff - 1))) { + pMgr->retryBackoff -= 1; + } } pMgr->states[pMsg->lastSendIndex % pMgr->size].acked = true; pMgr->matchIndex = TMAX(pMgr->matchIndex, pMsg->matchIndex); diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index b19cda2a44..197d1463fd 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -16,156 +16,161 @@ #define _DEFAULT_SOURCE #include "syncRaftStore.h" #include "syncUtil.h" +#include "tjson.h" -// private function -static int32_t raftStoreInit(SRaftStore *pRaftStore); -static bool raftStoreFileExist(char *path); +static int32_t raftStoreDecode(const SJson *pJson, SRaftStore *pStore) { + int32_t code = 0; -// public function -SRaftStore *raftStoreOpen(const char *path) { - int32_t ret; + tjsonGetNumberValue(pJson, "current_term", pStore->currentTerm, code); + if (code < 0) return -1; + tjsonGetNumberValue(pJson, "vote_for_addr", pStore->voteFor.addr, code); + if (code < 0) return -1; + tjsonGetInt32ValueFromDouble(pJson, "vote_for_vgid", pStore->voteFor.vgId, code); + if (code < 0) return -1; - SRaftStore *pRaftStore = taosMemoryCalloc(1, sizeof(SRaftStore)); - if (pRaftStore == NULL) { + return 0; +} + +int32_t raftStoreReadFile(SSyncNode *pNode) { + int32_t code = -1; + TdFilePtr pFile = NULL; + char *pData = NULL; + SJson *pJson = NULL; + const char *file = pNode->raftStorePath; + SRaftStore *pStore = &pNode->raftStore; + + if (taosStatFile(file, NULL, NULL) < 0) { + sInfo("vgId:%d, raft store file:%s not exist, use default value", pNode->vgId, file); + pStore->currentTerm = 0; + pStore->voteFor.addr = 0; + pStore->voteFor.vgId = 0; + return raftStoreWriteFile(pNode); + } + + pFile = taosOpenFile(file, TD_FILE_READ); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + sError("vgId:%d, failed to open raft store file:%s since %s", pNode->vgId, file, terrstr()); + goto _OVER; + } + + int64_t size = 0; + if (taosFStatFile(pFile, &size, NULL) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + sError("vgId:%d, failed to fstat raft store file:%s since %s", pNode->vgId, file, terrstr()); + goto _OVER; + } + + pData = taosMemoryMalloc(size + 1); + if (pData == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; + goto _OVER; } - snprintf(pRaftStore->path, sizeof(pRaftStore->path), "%s", path); - if (!raftStoreFileExist(pRaftStore->path)) { - ret = raftStoreInit(pRaftStore); - ASSERT(ret == 0); + if (taosReadFile(pFile, pData, size) != size) { + terrno = TAOS_SYSTEM_ERROR(errno); + sError("vgId:%d, failed to read raft store file:%s since %s", pNode->vgId, file, terrstr()); + goto _OVER; } - char storeBuf[RAFT_STORE_BLOCK_SIZE] = {0}; - pRaftStore->pFile = taosOpenFile(path, TD_FILE_READ | TD_FILE_WRITE); - ASSERT(pRaftStore->pFile != NULL); + pData[size] = '\0'; - int len = taosReadFile(pRaftStore->pFile, storeBuf, RAFT_STORE_BLOCK_SIZE); - ASSERT(len > 0); + pJson = tjsonParse(pData); + if (pJson == NULL) { + terrno = TSDB_CODE_INVALID_JSON_FORMAT; + goto _OVER; + } - ret = raftStoreDeserialize(pRaftStore, storeBuf, len); - ASSERT(ret == 0); + if (raftStoreDecode(pJson, pStore) < 0) { + terrno = TSDB_CODE_INVALID_JSON_FORMAT; + goto _OVER; + } - return pRaftStore; + code = 0; + sInfo("vgId:%d, succceed to read raft store file %s", pNode->vgId, file); + +_OVER: + if (pData != NULL) taosMemoryFree(pData); + if (pJson != NULL) cJSON_Delete(pJson); + if (pFile != NULL) taosCloseFile(&pFile); + + if (code != 0) { + sError("vgId:%d, failed to read raft store file:%s since %s", pNode->vgId, file, terrstr()); + } + return code; } -static int32_t raftStoreInit(SRaftStore *pRaftStore) { - ASSERT(pRaftStore != NULL); - - pRaftStore->pFile = taosOpenFile(pRaftStore->path, TD_FILE_CREATE | TD_FILE_WRITE); - ASSERT(pRaftStore->pFile != NULL); - - pRaftStore->currentTerm = 0; - pRaftStore->voteFor.addr = 0; - pRaftStore->voteFor.vgId = 0; - - int32_t ret = raftStorePersist(pRaftStore); - ASSERT(ret == 0); - - taosCloseFile(&pRaftStore->pFile); +static int32_t raftStoreEncode(SJson *pJson, SRaftStore *pStore) { + if (tjsonAddIntegerToObject(pJson, "current_term", pStore->currentTerm) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "vote_for_addr", pStore->voteFor.addr) < 0) return -1; + if (tjsonAddDoubleToObject(pJson, "vote_for_vgid", pStore->voteFor.vgId) < 0) return -1; return 0; } -int32_t raftStoreClose(SRaftStore *pRaftStore) { - if (pRaftStore == NULL) return 0; +int32_t raftStoreWriteFile(SSyncNode *pNode) { + int32_t code = -1; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; + const char *realfile = pNode->raftStorePath; + SRaftStore *pStore = &pNode->raftStore; + char file[PATH_MAX] = {0}; + snprintf(file, sizeof(file), "%s.bak", realfile); - taosCloseFile(&pRaftStore->pFile); - taosMemoryFree(pRaftStore); - pRaftStore = NULL; - return 0; + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (raftStoreEncode(pJson, pStore) != 0) goto _OVER; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; + terrno = 0; + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; + + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; + + taosCloseFile(&pFile); + if (taosRenameFile(file, realfile) != 0) goto _OVER; + + code = 0; + sInfo("vgId:%d, succeed to write raft store file:%s, len:%d", pNode->vgId, realfile, len); + +_OVER: + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); + if (pFile != NULL) taosCloseFile(&pFile); + + if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); + sError("vgId:%d, failed to write raft store file:%s since %s", pNode->vgId, realfile, terrstr()); + } + return code; } -int32_t raftStorePersist(SRaftStore *pRaftStore) { - ASSERT(pRaftStore != NULL); - - int32_t ret; - char storeBuf[RAFT_STORE_BLOCK_SIZE] = {0}; - ret = raftStoreSerialize(pRaftStore, storeBuf, sizeof(storeBuf)); - ASSERT(ret == 0); - - taosLSeekFile(pRaftStore->pFile, 0, SEEK_SET); - - ret = taosWriteFile(pRaftStore->pFile, storeBuf, sizeof(storeBuf)); - ASSERT(ret == RAFT_STORE_BLOCK_SIZE); - - taosFsyncFile(pRaftStore->pFile); - return 0; -} - -static bool raftStoreFileExist(char *path) { - bool b = taosStatFile(path, NULL, NULL) >= 0; - return b; -} - -int32_t raftStoreSerialize(SRaftStore *pRaftStore, char *buf, size_t len) { - ASSERT(pRaftStore != NULL); - - cJSON *pRoot = cJSON_CreateObject(); - - char u64Buf[128] = {0}; - snprintf(u64Buf, sizeof(u64Buf), "%" PRIu64 "", pRaftStore->currentTerm); - cJSON_AddStringToObject(pRoot, "current_term", u64Buf); - - snprintf(u64Buf, sizeof(u64Buf), "%" PRIu64 "", pRaftStore->voteFor.addr); - cJSON_AddStringToObject(pRoot, "vote_for_addr", u64Buf); - - cJSON_AddNumberToObject(pRoot, "vote_for_vgid", pRaftStore->voteFor.vgId); - - char *serialized = cJSON_Print(pRoot); - int len2 = strlen(serialized); - ASSERT(len2 < len); - memset(buf, 0, len); - snprintf(buf, len, "%s", serialized); - taosMemoryFree(serialized); - - cJSON_Delete(pRoot); - return 0; -} - -int32_t raftStoreDeserialize(SRaftStore *pRaftStore, char *buf, size_t len) { - ASSERT(pRaftStore != NULL); - - ASSERT(len > 0 && len <= RAFT_STORE_BLOCK_SIZE); - cJSON *pRoot = cJSON_Parse(buf); - - cJSON *pCurrentTerm = cJSON_GetObjectItem(pRoot, "current_term"); - ASSERT(cJSON_IsString(pCurrentTerm)); - sscanf(pCurrentTerm->valuestring, "%" PRIu64 "", &(pRaftStore->currentTerm)); - - cJSON *pVoteForAddr = cJSON_GetObjectItem(pRoot, "vote_for_addr"); - ASSERT(cJSON_IsString(pVoteForAddr)); - sscanf(pVoteForAddr->valuestring, "%" PRIu64 "", &(pRaftStore->voteFor.addr)); - - cJSON *pVoteForVgid = cJSON_GetObjectItem(pRoot, "vote_for_vgid"); - pRaftStore->voteFor.vgId = pVoteForVgid->valueint; - - cJSON_Delete(pRoot); - return 0; -} - -bool raftStoreHasVoted(SRaftStore *pRaftStore) { - bool b = syncUtilEmptyId(&(pRaftStore->voteFor)); +bool raftStoreHasVoted(SSyncNode *pNode) { + bool b = syncUtilEmptyId(&pNode->raftStore.voteFor); return (!b); } -void raftStoreVote(SRaftStore *pRaftStore, SRaftId *pRaftId) { - ASSERT(!syncUtilEmptyId(pRaftId)); - pRaftStore->voteFor = *pRaftId; - raftStorePersist(pRaftStore); +void raftStoreVote(SSyncNode *pNode, SRaftId *pRaftId) { + pNode->raftStore.voteFor = *pRaftId; + (void)raftStoreWriteFile(pNode); } -void raftStoreClearVote(SRaftStore *pRaftStore) { - pRaftStore->voteFor = EMPTY_RAFT_ID; - raftStorePersist(pRaftStore); +void raftStoreClearVote(SSyncNode *pNode) { + pNode->raftStore.voteFor = EMPTY_RAFT_ID; + (void)raftStoreWriteFile(pNode); } -void raftStoreNextTerm(SRaftStore *pRaftStore) { - ++(pRaftStore->currentTerm); - raftStorePersist(pRaftStore); +void raftStoreNextTerm(SSyncNode *pNode) { + pNode->raftStore.currentTerm++; + (void)raftStoreWriteFile(pNode); } -void raftStoreSetTerm(SRaftStore *pRaftStore, SyncTerm term) { - pRaftStore->currentTerm = term; - raftStorePersist(pRaftStore); +void raftStoreSetTerm(SSyncNode *pNode, SyncTerm term) { + pNode->raftStore.currentTerm = term; + (void)raftStoreWriteFile(pNode); } diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index e3058768f8..1aa476e84e 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -122,7 +122,7 @@ int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapsh ASSERT(pMsg != NULL); pMsg->srcId = pSyncNode->myRaftId; pMsg->destId = *pDestId; - pMsg->term = pSyncNode->pRaftStore->currentTerm; + pMsg->term = pSyncNode->raftStore.currentTerm; pMsg->prevLogIndex = preLogIndex; pMsg->prevLogTerm = preLogTerm; pMsg->commitIndex = pSyncNode->commitIndex; @@ -245,7 +245,7 @@ int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode) { SyncHeartbeat* pSyncMsg = rpcMsg.pCont; pSyncMsg->srcId = pSyncNode->myRaftId; pSyncMsg->destId = pSyncNode->peersId[i]; - pSyncMsg->term = pSyncNode->pRaftStore->currentTerm; + pSyncMsg->term = pSyncNode->raftStore.currentTerm; pSyncMsg->commitIndex = pSyncNode->commitIndex; pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode); pSyncMsg->privateTerm = 0; diff --git a/source/libs/sync/src/syncRequestVote.c b/source/libs/sync/src/syncRequestVote.c index 773befe1e4..e9a18dfe86 100644 --- a/source/libs/sync/src/syncRequestVote.c +++ b/source/libs/sync/src/syncRequestVote.c @@ -44,12 +44,12 @@ // /\ UNCHANGED <> // -static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pMsg) { - SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode); - SyncIndex myLastIndex = syncNodeGetLastIndex(pSyncNode); +static bool syncNodeOnRequestVoteLogOK(SSyncNode* ths, SyncRequestVote* pMsg) { + SyncTerm myLastTerm = syncNodeGetLastTerm(ths); + SyncIndex myLastIndex = syncNodeGetLastIndex(ths); - if (pMsg->lastLogIndex < pSyncNode->commitIndex) { - sNTrace(pSyncNode, + if (pMsg->lastLogIndex < ths->commitIndex) { + sNTrace(ths, "logok:0, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); @@ -58,7 +58,7 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM } if (myLastTerm == SYNC_TERM_INVALID) { - sNTrace(pSyncNode, + sNTrace(ths, "logok:0, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); @@ -66,7 +66,7 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM } if (pMsg->lastLogTerm > myLastTerm) { - sNTrace(pSyncNode, + sNTrace(ths, "logok:1, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); @@ -74,14 +74,14 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM } if (pMsg->lastLogTerm == myLastTerm && pMsg->lastLogIndex >= myLastIndex) { - sNTrace(pSyncNode, + sNTrace(ths, "logok:1, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); return true; } - sNTrace(pSyncNode, + sNTrace(ths, "logok:0, {my-lterm:%" PRIu64 ", my-lindex:%" PRId64 ", recv-lterm:%" PRIu64 ", recv-lindex:%" PRId64 ", recv-term:%" PRIu64 "}", myLastTerm, myLastIndex, pMsg->lastLogTerm, pMsg->lastLogIndex, pMsg->term); @@ -93,7 +93,7 @@ int32_t syncNodeOnRequestVote(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncRequestVote* pMsg = pRpcMsg->pCont; // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { + if (!syncNodeInRaftGroup(ths, &pMsg->srcId)) { syncLogRecvRequestVote(ths, pMsg, -1, "not in my config"); return -1; } @@ -101,21 +101,21 @@ int32_t syncNodeOnRequestVote(SSyncNode* ths, const SRpcMsg* pRpcMsg) { bool logOK = syncNodeOnRequestVoteLogOK(ths, pMsg); // maybe update term - if (pMsg->term > ths->pRaftStore->currentTerm) { + if (pMsg->term > ths->raftStore.currentTerm) { syncNodeStepDown(ths, pMsg->term); // syncNodeUpdateTerm(ths, pMsg->term); } - ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); + ASSERT(pMsg->term <= ths->raftStore.currentTerm); - bool grant = (pMsg->term == ths->pRaftStore->currentTerm) && logOK && - ((!raftStoreHasVoted(ths->pRaftStore)) || (syncUtilSameId(&(ths->pRaftStore->voteFor), &(pMsg->srcId)))); + bool grant = (pMsg->term == ths->raftStore.currentTerm) && logOK && + ((!raftStoreHasVoted(ths)) || (syncUtilSameId(&ths->raftStore.voteFor, &pMsg->srcId))); if (grant) { // maybe has already voted for pMsg->srcId // vote again, no harm - raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); + raftStoreVote(ths, &(pMsg->srcId)); // candidate ? - syncNodeStepDown(ths, ths->pRaftStore->currentTerm); + syncNodeStepDown(ths, ths->raftStore.currentTerm); // forbid elect for this round syncNodeResetElectTimer(ths); @@ -129,7 +129,7 @@ int32_t syncNodeOnRequestVote(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncRequestVoteReply* pReply = rpcMsg.pCont; pReply->srcId = ths->myRaftId; pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; + pReply->term = ths->raftStore.currentTerm; pReply->voteGranted = grant; // trace log diff --git a/source/libs/sync/src/syncRequestVoteReply.c b/source/libs/sync/src/syncRequestVoteReply.c index 563f475070..a0d6cbf597 100644 --- a/source/libs/sync/src/syncRequestVoteReply.c +++ b/source/libs/sync/src/syncRequestVoteReply.c @@ -49,25 +49,25 @@ int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } // drop stale response - if (pMsg->term < ths->pRaftStore->currentTerm) { + if (pMsg->term < ths->raftStore.currentTerm) { syncLogRecvRequestVoteReply(ths, pMsg, "drop stale response"); return -1; } - // ASSERT(!(pMsg->term > ths->pRaftStore->currentTerm)); + // ASSERT(!(pMsg->term > ths->raftStore.currentTerm)); // no need this code, because if I receive reply.term, then I must have sent for that term. - // if (pMsg->term > ths->pRaftStore->currentTerm) { + // if (pMsg->term > ths->raftStore.currentTerm) { // syncNodeUpdateTerm(ths, pMsg->term); // } - if (pMsg->term > ths->pRaftStore->currentTerm) { + if (pMsg->term > ths->raftStore.currentTerm) { syncLogRecvRequestVoteReply(ths, pMsg, "error term"); syncNodeStepDown(ths, pMsg->term); return -1; } syncLogRecvRequestVoteReply(ths, pMsg, ""); - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + ASSERT(pMsg->term == ths->raftStore.currentTerm); // This tallies votes even when the current state is not Candidate, // but they won't be looked at, so it doesn't matter. diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index b55aae4c76..9373eccaef 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -143,7 +143,7 @@ static void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { .state = pNode->state, .seqNum = *pSeqNum, .term = SYNC_TERM_INVALID, - .currentTerm = pNode->pRaftStore->currentTerm, + .currentTerm = pNode->raftStore.currentTerm, .flag = 0, }; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index defb7402f4..880c76e4dd 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -43,7 +43,7 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; pSender->pSyncNode = pSyncNode; pSender->replicaIndex = replicaIndex; - pSender->term = pSyncNode->pRaftStore->currentTerm; + pSender->term = pSyncNode->raftStore.currentTerm; pSender->startTime = 0; pSender->endTime = 0; pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &pSender->snapshot); @@ -90,7 +90,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { memset(&pSender->lastConfig, 0, sizeof(pSender->lastConfig)); pSender->sendingMS = 0; - pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; + pSender->term = pSender->pSyncNode->raftStore.currentTerm; pSender->startTime = taosGetTimestampMs(); pSender->lastSendTime = pSender->startTime; pSender->finish = false; @@ -105,7 +105,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; - pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->term = pSender->pSyncNode->raftStore.currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; pMsg->lastTerm = pSender->snapshot.lastApplyTerm; @@ -185,7 +185,7 @@ static int32_t snapshotSend(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; - pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->term = pSender->pSyncNode->raftStore.currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; pMsg->lastTerm = pSender->snapshot.lastApplyTerm; @@ -226,7 +226,7 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { SyncSnapshotSend *pMsg = rpcMsg.pCont; pMsg->srcId = pSender->pSyncNode->myRaftId; pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; - pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->term = pSender->pSyncNode->raftStore.currentTerm; pMsg->beginIndex = pSender->snapshotParam.start; pMsg->lastIndex = pSender->snapshot.lastApplyIndex; pMsg->lastTerm = pSender->snapshot.lastApplyTerm; @@ -314,7 +314,7 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from pReceiver->pWriter = NULL; pReceiver->pSyncNode = pSyncNode; pReceiver->fromId = fromId; - pReceiver->term = pSyncNode->pRaftStore->currentTerm; + pReceiver->term = pSyncNode->raftStore.currentTerm; pReceiver->snapshot.data = NULL; pReceiver->snapshot.lastApplyIndex = SYNC_INDEX_INVALID; pReceiver->snapshot.lastApplyTerm = 0; @@ -380,7 +380,7 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p pReceiver->start = true; pReceiver->ack = SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT; - pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; + pReceiver->term = pReceiver->pSyncNode->raftStore.currentTerm; pReceiver->fromId = pPreMsg->srcId; pReceiver->startTime = pPreMsg->startTime; @@ -437,9 +437,9 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap } // maybe update term - if (pReceiver->snapshot.lastApplyTerm > pReceiver->pSyncNode->pRaftStore->currentTerm) { - pReceiver->pSyncNode->pRaftStore->currentTerm = pReceiver->snapshot.lastApplyTerm; - raftStorePersist(pReceiver->pSyncNode->pRaftStore); + if (pReceiver->snapshot.lastApplyTerm > pReceiver->pSyncNode->raftStore.currentTerm) { + pReceiver->pSyncNode->raftStore.currentTerm = pReceiver->snapshot.lastApplyTerm; + (void)raftStoreWriteFile(pReceiver->pSyncNode); } // stop writer, apply data @@ -592,7 +592,7 @@ _SEND_REPLY: SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; pRspMsg->srcId = pSyncNode->myRaftId; pRspMsg->destId = pMsg->srcId; - pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->term = pSyncNode->raftStore.currentTerm; pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; @@ -648,7 +648,7 @@ _SEND_REPLY: SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; pRspMsg->srcId = pSyncNode->myRaftId; pRspMsg->destId = pMsg->srcId; - pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->term = pSyncNode->raftStore.currentTerm; pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; @@ -698,7 +698,7 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; pRspMsg->srcId = pSyncNode->myRaftId; pRspMsg->destId = pMsg->srcId; - pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->term = pSyncNode->raftStore.currentTerm; pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; @@ -745,7 +745,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; pRspMsg->srcId = pSyncNode->myRaftId; pRspMsg->destId = pMsg->srcId; - pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->term = pSyncNode->raftStore.currentTerm; pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; pRspMsg->startTime = pReceiver->startTime; @@ -794,13 +794,13 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { return -1; } - if (pMsg->term < pSyncNode->pRaftStore->currentTerm) { + if (pMsg->term < pSyncNode->raftStore.currentTerm) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject since small term"); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } - if (pMsg->term > pSyncNode->pRaftStore->currentTerm) { + if (pMsg->term > pSyncNode->raftStore.currentTerm) { syncNodeStepDown(pSyncNode, pMsg->term); } syncNodeResetElectTimer(pSyncNode); @@ -808,7 +808,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { // state, term, seq/ack int32_t code = 0; if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { - if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { + if (pMsg->term == pSyncNode->raftStore.currentTerm) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot"); code = syncNodeOnSnapshotPre(pSyncNode, pMsg); @@ -892,7 +892,7 @@ static int32_t syncNodeOnSnapshotPreRsp(SSyncNode *pSyncNode, SSyncSnapshotSende SyncSnapshotSend *pSendMsg = rpcMsg.pCont; pSendMsg->srcId = pSender->pSyncNode->myRaftId; pSendMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex]; - pSendMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pSendMsg->term = pSender->pSyncNode->raftStore.currentTerm; pSendMsg->beginIndex = pSender->snapshotParam.start; pSendMsg->lastIndex = pSender->snapshot.lastApplyIndex; pSendMsg->lastTerm = pSender->snapshot.lastApplyTerm; @@ -951,10 +951,10 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { goto _ERROR; } - if (pMsg->term != pSyncNode->pRaftStore->currentTerm) { + if (pMsg->term != pSyncNode->raftStore.currentTerm) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match"); sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term, - pSyncNode->pRaftStore->currentTerm); + pSyncNode->raftStore.currentTerm); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; goto _ERROR; } diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index e4a65837f7..b246d9a79d 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -158,8 +158,8 @@ static void syncPeerState2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { } void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNode* pNode, const char* format, ...) { - if (pNode == NULL || pNode->pRaftStore == NULL || pNode->pLogStore == NULL) return; - int64_t currentTerm = pNode->pRaftStore->currentTerm; + if (pNode == NULL || pNode->pLogStore == NULL) return; + int64_t currentTerm = pNode->raftStore.currentTerm; // save error code, otherwise it will be overwritten int32_t errCode = terrno; @@ -228,7 +228,7 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo void syncPrintSnapshotSenderLog(const char* flags, ELogLevel level, int32_t dflag, SSyncSnapshotSender* pSender, const char* format, ...) { SSyncNode* pNode = pSender->pSyncNode; - if (pNode == NULL || pNode->pRaftStore == NULL || pNode->pLogStore == NULL) return; + if (pNode == NULL || pNode->pLogStore == NULL) return; SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; if (pNode->pFsm != NULL && pNode->pFsm->FpGetSnapshotInfo != NULL) { @@ -264,7 +264,7 @@ void syncPrintSnapshotSenderLog(const char* flags, ELogLevel level, int32_t dfla pNode->vgId, eventLog, syncStr(pNode->state), pSender, pSender->snapshotParam.start, pSender->snapshotParam.end, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->seq, pSender->ack, pSender->finish, pSender->replicaIndex, - DID(&pNode->replicasId[pSender->replicaIndex]), pNode->pRaftStore->currentTerm, pNode->commitIndex, + DID(&pNode->replicasId[pSender->replicaIndex]), pNode->raftStore.currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->raftCfg.isStandBy, pNode->raftCfg.snapshotStrategy, pNode->raftCfg.batchSize, pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, syncNodeDynamicQuorum(pNode), @@ -274,7 +274,7 @@ void syncPrintSnapshotSenderLog(const char* flags, ELogLevel level, int32_t dfla void syncPrintSnapshotReceiverLog(const char* flags, ELogLevel level, int32_t dflag, SSyncSnapshotReceiver* pReceiver, const char* format, ...) { SSyncNode* pNode = pReceiver->pSyncNode; - if (pNode == NULL || pNode->pRaftStore == NULL || pNode->pLogStore == NULL) return; + if (pNode == NULL || pNode->pLogStore == NULL) return; SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; if (pNode->pFsm != NULL && pNode->pFsm->FpGetSnapshotInfo != NULL) { @@ -311,7 +311,7 @@ void syncPrintSnapshotReceiverLog(const char* flags, ELogLevel level, int32_t df pNode->vgId, eventLog, syncStr(pNode->state), pReceiver, pReceiver->start, pReceiver->ack, pReceiver->term, pReceiver->startTime, DID(&pReceiver->fromId), pReceiver->snapshotParam.start, pReceiver->snapshotParam.end, pReceiver->snapshot.lastApplyIndex, pReceiver->snapshot.lastApplyTerm, - pReceiver->snapshot.lastConfigIndex, pNode->pRaftStore->currentTerm, pNode->commitIndex, logBeginIndex, + pReceiver->snapshot.lastConfigIndex, pNode->raftStore.currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->raftCfg.isStandBy, pNode->raftCfg.snapshotStrategy, pNode->raftCfg.batchSize, pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, syncNodeDynamicQuorum(pNode), diff --git a/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c b/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c index f1db2f0204..a3e76eabcc 100644 --- a/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c +++ b/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c @@ -80,7 +80,7 @@ cJSON* syncNode2Json(const SSyncNode* pSyncNode) { // tla+ server vars cJSON_AddNumberToObject(pRoot, "state", pSyncNode->state); cJSON_AddStringToObject(pRoot, "state_str", syncStr(pSyncNode->state)); - cJSON_AddItemToObject(pRoot, "pRaftStore", raftStore2Json(pSyncNode->pRaftStore)); + cJSON_AddItemToObject(pRoot, "pRaftStore", raftStore2Json(&pSyncNode.raftStore)); // tla+ candidate vars cJSON_AddItemToObject(pRoot, "pVotesGranted", voteGranted2Json(pSyncNode->pVotesGranted)); @@ -199,7 +199,7 @@ inline char* syncNode2SimpleStr(const SSyncNode* pSyncNode) { ", sby:%d, " "r-num:%d, " "lcfg:%" PRId64 ", chging:%d, rsto:%d", - pSyncNode->vgId, syncStr(pSyncNode->state), pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, + pSyncNode->vgId, syncStr(pSyncNode->state), pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, pSyncNode->raftCfg.isStandBy, pSyncNode->replicaNum, pSyncNode->raftCfg.lastConfigIndex, pSyncNode->changing, pSyncNode->restoreFinish); diff --git a/source/libs/sync/test/sync_test_lib/src/syncSnapshotDebug.c b/source/libs/sync/test/sync_test_lib/src/syncSnapshotDebug.c index f1237e5282..d8740de16a 100644 --- a/source/libs/sync/test/sync_test_lib/src/syncSnapshotDebug.c +++ b/source/libs/sync/test/sync_test_lib/src/syncSnapshotDebug.c @@ -137,7 +137,7 @@ int32_t syncNodeOnPreSnapshot(SSyncNode *ths, SyncPreSnapshot *pMsg) { SyncPreSnapshotReply *pMsgReply = syncPreSnapshotReplyBuild(ths->vgId); pMsgReply->srcId = ths->myRaftId; pMsgReply->destId = pMsg->srcId; - pMsgReply->term = ths->pRaftStore->currentTerm; + pMsgReply->term = ths->raftStore.currentTerm; SSyncLogStoreData *pData = ths->pLogStore->data; SWal *pWal = pData->pWal; From 9a9e93b6feb0ae4a4540e34d53241940f04601b4 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 12:06:20 +0800 Subject: [PATCH 73/89] fix: compile error in mac --- source/libs/sync/test/syncLocalCmdTest.cpp | 4 +- source/libs/sync/test/syncRaftStoreTest.cpp | 40 +++++++++---------- .../sync/test/syncSnapshotReceiverTest.cpp | 2 +- .../libs/sync/test/syncSnapshotSenderTest.cpp | 2 +- .../test/sync_test_lib/src/syncMainDebug.c | 2 +- .../test/sync_test_lib/src/syncMessageDebug.c | 8 ++-- .../sync_test_lib/src/syncRaftStoreDebug.c | 4 +- source/util/src/tlog.c | 2 +- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/source/libs/sync/test/syncLocalCmdTest.cpp b/source/libs/sync/test/syncLocalCmdTest.cpp index 8003cce7cc..2c839d0acb 100644 --- a/source/libs/sync/test/syncLocalCmdTest.cpp +++ b/source/libs/sync/test/syncLocalCmdTest.cpp @@ -16,8 +16,8 @@ SyncLocalCmd *createMsg() { pMsg->srcId.vgId = 100; pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); pMsg->destId.vgId = 100; - pMsg->sdNewTerm = 123; - pMsg->fcIndex = 456; + // pMsg->sdNewTerm = 123; + // pMsg->fcIndex = 456; pMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN; return pMsg; diff --git a/source/libs/sync/test/syncRaftStoreTest.cpp b/source/libs/sync/test/syncRaftStoreTest.cpp index 87798a7d80..a8022184ef 100644 --- a/source/libs/sync/test/syncRaftStoreTest.cpp +++ b/source/libs/sync/test/syncRaftStoreTest.cpp @@ -33,35 +33,35 @@ int main() { initRaftId(); - SRaftStore* pRaftStore = raftStoreOpen("./test_raft_store.json"); - assert(pRaftStore != NULL); - raftStoreLog2((char*)"==raftStoreOpen==", pRaftStore); + // SRaftStore* pRaftStore = raftStoreOpen("./test_raft_store.json"); + // assert(pRaftStore != NULL); + // raftStoreLog2((char*)"==raftStoreOpen==", pRaftStore); - raftStoreSetTerm(pRaftStore, 100); - raftStoreLog2((char*)"==raftStoreSetTerm==", pRaftStore); + // raftStoreSetTerm(pRaftStore, 100); + // raftStoreLog2((char*)"==raftStoreSetTerm==", pRaftStore); - raftStoreVote(pRaftStore, &ids[0]); - raftStoreLog2((char*)"==raftStoreVote==", pRaftStore); + // raftStoreVote(pRaftStore, &ids[0]); + // raftStoreLog2((char*)"==raftStoreVote==", pRaftStore); - raftStoreClearVote(pRaftStore); - raftStoreLog2((char*)"==raftStoreClearVote==", pRaftStore); + // raftStoreClearVote(pRaftStore); + // raftStoreLog2((char*)"==raftStoreClearVote==", pRaftStore); - raftStoreVote(pRaftStore, &ids[1]); - raftStoreLog2((char*)"==raftStoreVote==", pRaftStore); + // raftStoreVote(pRaftStore, &ids[1]); + // raftStoreLog2((char*)"==raftStoreVote==", pRaftStore); - raftStoreNextTerm(pRaftStore); - raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); + // raftStoreNextTerm(pRaftStore); + // raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); - raftStoreNextTerm(pRaftStore); - raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); + // raftStoreNextTerm(pRaftStore); + // raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); - raftStoreNextTerm(pRaftStore); - raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); + // raftStoreNextTerm(pRaftStore); + // raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); - raftStoreNextTerm(pRaftStore); - raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); + // raftStoreNextTerm(pRaftStore); + // raftStoreLog2((char*)"==raftStoreNextTerm==", pRaftStore); - raftStoreClose(pRaftStore); + // raftStoreClose(pRaftStore); return 0; } diff --git a/source/libs/sync/test/syncSnapshotReceiverTest.cpp b/source/libs/sync/test/syncSnapshotReceiverTest.cpp index 49b06a7d1b..1fca04a1ad 100644 --- a/source/libs/sync/test/syncSnapshotReceiverTest.cpp +++ b/source/libs/sync/test/syncSnapshotReceiverTest.cpp @@ -29,7 +29,7 @@ int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_ SSyncSnapshotReceiver* createReceiver() { SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(*pSyncNode)); - pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); + // pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(*(pSyncNode->pFsm))); #if 0 diff --git a/source/libs/sync/test/syncSnapshotSenderTest.cpp b/source/libs/sync/test/syncSnapshotSenderTest.cpp index bb697d541a..a1768c2ce5 100644 --- a/source/libs/sync/test/syncSnapshotSenderTest.cpp +++ b/source/libs/sync/test/syncSnapshotSenderTest.cpp @@ -29,7 +29,7 @@ int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_ SSyncSnapshotSender* createSender() { SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(*pSyncNode)); - pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); + // pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(*(pSyncNode->pFsm))); #if 0 diff --git a/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c b/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c index a3e76eabcc..1dbf4fb4fb 100644 --- a/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c +++ b/source/libs/sync/test/sync_test_lib/src/syncMainDebug.c @@ -80,7 +80,7 @@ cJSON* syncNode2Json(const SSyncNode* pSyncNode) { // tla+ server vars cJSON_AddNumberToObject(pRoot, "state", pSyncNode->state); cJSON_AddStringToObject(pRoot, "state_str", syncStr(pSyncNode->state)); - cJSON_AddItemToObject(pRoot, "pRaftStore", raftStore2Json(&pSyncNode.raftStore)); + // cJSON_AddItemToObject(pRoot, "pRaftStore", raftStore2Json(&pSyncNode.raftStore)); // tla+ candidate vars cJSON_AddItemToObject(pRoot, "pVotesGranted", voteGranted2Json(pSyncNode->pVotesGranted)); diff --git a/source/libs/sync/test/sync_test_lib/src/syncMessageDebug.c b/source/libs/sync/test/sync_test_lib/src/syncMessageDebug.c index ae83bf9ead..5f011ffe69 100644 --- a/source/libs/sync/test/sync_test_lib/src/syncMessageDebug.c +++ b/source/libs/sync/test/sync_test_lib/src/syncMessageDebug.c @@ -2858,11 +2858,11 @@ cJSON* syncLocalCmd2Json(const SyncLocalCmd* pMsg) { cJSON_AddNumberToObject(pRoot, "cmd", pMsg->cmd); - snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pMsg->sdNewTerm); - cJSON_AddStringToObject(pRoot, "sd-new-term", u64buf); + // snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pMsg->sdNewTerm); + // cJSON_AddStringToObject(pRoot, "sd-new-term", u64buf); - snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->fcIndex); - cJSON_AddStringToObject(pRoot, "fc-index", u64buf); + // snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->fcIndex); + // cJSON_AddStringToObject(pRoot, "fc-index", u64buf); } cJSON* pJson = cJSON_CreateObject(); diff --git a/source/libs/sync/test/sync_test_lib/src/syncRaftStoreDebug.c b/source/libs/sync/test/sync_test_lib/src/syncRaftStoreDebug.c index c462b3275d..f6cd381e54 100644 --- a/source/libs/sync/test/sync_test_lib/src/syncRaftStoreDebug.c +++ b/source/libs/sync/test/sync_test_lib/src/syncRaftStoreDebug.c @@ -41,8 +41,8 @@ cJSON *raftStore2Json(SRaftStore *pRaftStore) { cJSON_AddNumberToObject(pVoteFor, "vgId", pRaftStore->voteFor.vgId); cJSON_AddItemToObject(pRoot, "voteFor", pVoteFor); - int hasVoted = raftStoreHasVoted(pRaftStore); - cJSON_AddNumberToObject(pRoot, "hasVoted", hasVoted); + // int hasVoted = raftStoreHasVoted(pRaftStore); + // cJSON_AddNumberToObject(pRoot, "hasVoted", hasVoted); } cJSON *pJson = cJSON_CreateObject(); diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index d9cbde5714..34ad9ae6bc 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1045,7 +1045,7 @@ bool taosAssertRelease(bool condition) { int32_t dflag = 255; // tsLogEmbedded ? 255 : uDebugFlag taosPrintLog(flags, level, dflag, "tAssert called in release mode, exit:%d", tsAssert); - taosPrintTrace(flags, level, dflag); + taosPrintTrace(flags, level, dflag, 0); if (tsAssert) { taosMsleep(300); From 71d59160307101773e05c80b827f59a0b5d47567 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 9 Jan 2023 13:22:11 +0800 Subject: [PATCH 74/89] fix: error code not returned issue --- source/libs/executor/src/tsort.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 03be1ee6f2..661e9f97b7 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -251,7 +251,8 @@ static int32_t sortComparInit(SMsortComparParam* pParam, SArray* pSources, int32 if (pHandle->pBuf == NULL) { if (!osTempSpaceAvailable()) { code = TSDB_CODE_NO_AVAIL_DISK; - qError("Sort compare init failed since %s, %s", terrstr(code), pHandle->idStr); + terrno = code; + qError("Sort compare init failed since %s, %s", tstrerror(code), pHandle->idStr); return code; } @@ -259,6 +260,7 @@ static int32_t sortComparInit(SMsortComparParam* pParam, SArray* pSources, int32 "sortComparInit", tsTempDir); dBufSetPrintInfo(pHandle->pBuf); if (code != TSDB_CODE_SUCCESS) { + terrno = code; return code; } } @@ -282,6 +284,7 @@ static int32_t sortComparInit(SMsortComparParam* pParam, SArray* pSources, int32 code = blockDataFromBuf(pSource->src.pBlock, pPage); if (code != TSDB_CODE_SUCCESS) { + terrno = code; return code; } From 907cb73243807fd6846658bf43bde0daea3b29e4 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 16:31:16 +0800 Subject: [PATCH 75/89] fix: return dropping dnode in status resp --- source/dnode/mnode/impl/inc/mndDnode.h | 1 - source/dnode/mnode/impl/src/mndDnode.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndDnode.h b/source/dnode/mnode/impl/inc/mndDnode.h index ebbabdfa33..cf1e7422be 100644 --- a/source/dnode/mnode/impl/inc/mndDnode.h +++ b/source/dnode/mnode/impl/inc/mndDnode.h @@ -29,7 +29,6 @@ void mndReleaseDnode(SMnode *pMnode, SDnodeObj *pDnode); SEpSet mndGetDnodeEpset(SDnodeObj *pDnode); int32_t mndGetDnodeSize(SMnode *pMnode); bool mndIsDnodeOnline(SDnodeObj *pDnode, int64_t curMs); -void mndGetDnodeData(SMnode *pMnode, SArray *pDnodeEps); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index c7a416d444..ddb54a95ea 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -308,7 +308,8 @@ void mndGetDnodeData(SMnode *pMnode, SArray *pDnodeEps) { void *pIter = NULL; while (1) { SDnodeObj *pDnode = NULL; - pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode); + ESdbStatus objStatus = 0; + pIter = sdbFetchAll(pSdb, SDB_DNODE, pIter, (void **)&pDnode, &objStatus, true); if (pIter == NULL) break; SDnodeEp dnodeEp = {0}; From 284dd88b6f17ea89fba4f30aef4592b935ba6803 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 16:49:58 +0800 Subject: [PATCH 76/89] enh: add version for show cluster --- source/common/src/systable.c | 2 ++ source/dnode/mnode/impl/src/mndCluster.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 60a673ef9c..6c86743b69 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -67,6 +67,8 @@ static const SSysDbTableSchema clusterSchema[] = { {.name = "name", .bytes = TSDB_CLUSTER_ID_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "uptime", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "create_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, + {.name = "version", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + {.name = "expire_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, }; static const SSysDbTableSchema userDBSchema[] = { diff --git a/source/dnode/mnode/impl/src/mndCluster.c b/source/dnode/mnode/impl/src/mndCluster.c index ca03207d2b..e0d8ecb3eb 100644 --- a/source/dnode/mnode/impl/src/mndCluster.c +++ b/source/dnode/mnode/impl/src/mndCluster.c @@ -20,6 +20,8 @@ #define CLUSTER_VER_NUMBE 1 #define CLUSTER_RESERVE_SIZE 60 +char tsVersionName[16] = "community"; +int64_t tsExpireTime = 0; static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster); static SSdbRow *mndClusterActionDecode(SSdbRaw *pRaw); @@ -291,6 +293,18 @@ static int32_t mndRetrieveClusters(SRpcMsg *pMsg, SShowObj *pShow, SSDataBlock * pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataAppend(pColInfo, numOfRows, (const char *)&pCluster->createdTime, false); + char ver[12] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(ver, tsVersionName, pShow->pMeta->pSchemas[cols].bytes); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataAppend(pColInfo, numOfRows, (const char *)ver, false); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + if (tsExpireTime <= 0) { + colDataAppendNULL(pColInfo, numOfRows); + } else { + colDataAppend(pColInfo, numOfRows, (const char *)&tsExpireTime, false); + } + sdbRelease(pSdb, pCluster); numOfRows++; } From 6ba5b6a287879076599eac8acaa2bfb9bf159d2c Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 17:28:27 +0800 Subject: [PATCH 77/89] fix: compile error --- source/dnode/mnode/impl/inc/mndDnode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/mnode/impl/inc/mndDnode.h b/source/dnode/mnode/impl/inc/mndDnode.h index cf1e7422be..ebbabdfa33 100644 --- a/source/dnode/mnode/impl/inc/mndDnode.h +++ b/source/dnode/mnode/impl/inc/mndDnode.h @@ -29,6 +29,7 @@ void mndReleaseDnode(SMnode *pMnode, SDnodeObj *pDnode); SEpSet mndGetDnodeEpset(SDnodeObj *pDnode); int32_t mndGetDnodeSize(SMnode *pMnode); bool mndIsDnodeOnline(SDnodeObj *pDnode, int64_t curMs); +void mndGetDnodeData(SMnode *pMnode, SArray *pDnodeEps); #ifdef __cplusplus } From 012dbf3176e9023605a2cb637aa1072a01a6b2b5 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 9 Jan 2023 18:47:27 +0800 Subject: [PATCH 78/89] enh: read mnode file --- source/dnode/mgmt/mgmt_mnode/src/mmFile.c | 152 ++++++++++------------ 1 file changed, 66 insertions(+), 86 deletions(-) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c index dd05fe673a..f06669a610 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c @@ -17,117 +17,97 @@ #include "mmInt.h" #include "tjson.h" -int32_t mmReadFile(const char *path, SMnodeOpt *pOption) { - int32_t code = TSDB_CODE_INVALID_JSON_FORMAT; - int32_t len = 0; - int32_t maxLen = 4096; - char *content = taosMemoryCalloc(1, maxLen + 1); - cJSON *root = NULL; - char file[PATH_MAX] = {0}; - TdFilePtr pFile = NULL; +static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) { + int32_t code = 0; + tjsonGetInt32ValueFromDouble(pJson, "deployed", pOption->deploy, code); + if (code < 0) return -1; + tjsonGetInt32ValueFromDouble(pJson, "selfIndex", pOption->selfIndex, code); + if (code < 0) return 0; + + SJson *replicas = tjsonGetObjectItem(pJson, "replicas"); + if (replicas == NULL) return 0; + pOption->numOfReplicas = tjsonGetArraySize(replicas); + + for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { + SJson *replica = tjsonGetArrayItem(replicas, i); + if (replica == NULL) return -1; + + SReplica *pReplica = pOption->replicas + i; + tjsonGetInt32ValueFromDouble(replica, "id", pReplica->id, code); + if (code < 0) return -1; + code = tjsonGetStringValue(replica, "fqdn", pReplica->fqdn); + if (code < 0) return -1; + tjsonGetUInt16ValueFromDouble(replica, "port", pReplica->port, code); + if (code < 0) return -1; + } + + return 0; +} + +int32_t mmReadFile(const char *path, SMnodeOpt *pOption) { + int32_t code = -1; + TdFilePtr pFile = NULL; + char *pData = NULL; + SJson *pJson = NULL; + char file[PATH_MAX] = {0}; snprintf(file, sizeof(file), "%s%smnode.json", path, TD_DIRSEP); + + if (taosStatFile(file, NULL, NULL) < 0) { + dInfo("mnode file:%s not exist", file); + return 0; + } + pFile = taosOpenFile(file, TD_FILE_READ); if (pFile == NULL) { - code = 0; + terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to open mnode file:%s since %s", file, terrstr()); goto _OVER; } - len = (int32_t)taosReadFile(pFile, content, maxLen); - if (len <= 0) { - dError("failed to read %s since content is null", file); + int64_t size = 0; + if (taosFStatFile(pFile, &size, NULL) < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to fstat mnode file:%s since %s", file, terrstr()); goto _OVER; } - content[len] = 0; - root = cJSON_Parse(content); - if (root == NULL) { - dError("failed to read %s since invalid json format", file); + pData = taosMemoryMalloc(size + 1); + if (pData == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; goto _OVER; } - cJSON *deployed = cJSON_GetObjectItem(root, "deployed"); - if (!deployed || deployed->type != cJSON_Number) { - dError("failed to read %s since deployed not found", file); + if (taosReadFile(pFile, pData, size) != size) { + terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to read mnode file:%s since %s", file, terrstr()); goto _OVER; } - pOption->deploy = deployed->valueint; - cJSON *selfIndex = cJSON_GetObjectItem(root, "selfIndex"); - if (selfIndex) { - if (selfIndex->type != cJSON_Number) { - dError("failed to read %s since selfIndex not found", file); - goto _OVER; - } - pOption->selfIndex = selfIndex->valueint; + pData[size] = '\0'; + + pJson = tjsonParse(pData); + if (pJson == NULL) { + terrno = TSDB_CODE_INVALID_JSON_FORMAT; + goto _OVER; } - cJSON *replicas = cJSON_GetObjectItem(root, "replicas"); - if (replicas) { - if (replicas->type != cJSON_Array) { - dError("failed to read %s since replicas not found", file); - goto _OVER; - } - - int32_t numOfReplicas = cJSON_GetArraySize(replicas); - if (numOfReplicas <= 0) { - dError("failed to read %s since numOfReplicas:%d invalid", file, numOfReplicas); - goto _OVER; - } - pOption->numOfReplicas = numOfReplicas; - - for (int32_t i = 0; i < numOfReplicas; ++i) { - SReplica *pReplica = pOption->replicas + i; - - cJSON *replica = cJSON_GetArrayItem(replicas, i); - if (replica == NULL) break; - - cJSON *id = cJSON_GetObjectItem(replica, "id"); - if (id) { - if (id->type != cJSON_Number) { - dError("failed to read %s since id not found", file); - goto _OVER; - } - if (pReplica) { - pReplica->id = id->valueint; - } - } - - cJSON *fqdn = cJSON_GetObjectItem(replica, "fqdn"); - if (fqdn) { - if (fqdn->type != cJSON_String || fqdn->valuestring == NULL) { - dError("failed to read %s since fqdn not found", file); - goto _OVER; - } - if (pReplica) { - tstrncpy(pReplica->fqdn, fqdn->valuestring, TSDB_FQDN_LEN); - } - } - - cJSON *port = cJSON_GetObjectItem(replica, "port"); - if (port) { - if (port->type != cJSON_Number) { - dError("failed to read %s since port not found", file); - goto _OVER; - } - if (pReplica) { - pReplica->port = (uint16_t)port->valueint; - } - } - } + if (mmDecodeOption(pJson, pOption) < 0) { + terrno = TSDB_CODE_INVALID_JSON_FORMAT; + goto _OVER; } code = 0; + dInfo("succceed to read mnode file %s", file); _OVER: - if (content != NULL) taosMemoryFree(content); - if (root != NULL) cJSON_Delete(root); + if (pData != NULL) taosMemoryFree(pData); + if (pJson != NULL) cJSON_Delete(pJson); if (pFile != NULL) taosCloseFile(&pFile); - if (code == 0) { - dDebug("succcessed to read file %s, deployed:%d", file, pOption->deploy); - } - terrno = code; + if (code != 0) { + dError("failed to read mnode file:%s since %s", file, terrstr()); + } return code; } From 1d83a8ff01df46c7544cb860b2d2fe820553b978 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Mon, 9 Jan 2023 19:06:22 +0800 Subject: [PATCH 79/89] fix: install script don't install new cfg on fresh new system (#19451) --- packaging/tools/install.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index 63009e5421..2a078b5eab 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -481,11 +481,11 @@ function install_adapter_config() { ${csudo}mkdir -p ${cfg_install_dir} [ -f ${script_dir}/cfg/${adapterName}.toml ] && ${csudo}cp ${script_dir}/cfg/${adapterName}.toml ${cfg_install_dir} [ -f ${cfg_install_dir}/${adapterName}.toml ] && ${csudo}chmod 644 ${cfg_install_dir}/${adapterName}.toml + else + [ -f ${script_dir}/cfg/${adapterName}.toml ] && + ${csudo}cp -f ${script_dir}/cfg/${adapterName}.toml ${cfg_install_dir}/${adapterName}.toml.new fi - [ -f ${script_dir}/cfg/${adapterName}.toml ] && - ${csudo}cp -f ${script_dir}/cfg/${adapterName}.toml ${cfg_install_dir}/${adapterName}.toml.new - [ -f ${cfg_install_dir}/${adapterName}.toml ] && ${csudo}ln -s ${cfg_install_dir}/${adapterName}.toml ${install_main_dir}/cfg/${adapterName}.toml @@ -499,9 +499,10 @@ function install_config() { ${csudo}mkdir -p ${cfg_install_dir} [ -f ${script_dir}/cfg/${configFile} ] && ${csudo}cp ${script_dir}/cfg/${configFile} ${cfg_install_dir} ${csudo}chmod 644 ${cfg_install_dir}/* + else + ${csudo}cp -f ${script_dir}/cfg/${configFile} ${cfg_install_dir}/${configFile}.new fi - ${csudo}cp -f ${script_dir}/cfg/${configFile} ${cfg_install_dir}/${configFile}.new ${csudo}ln -s ${cfg_install_dir}/${configFile} ${install_main_dir}/cfg [ ! -z $1 ] && return 0 || : # only install client From df2175087b9c04c073d7b258b6fec037002a1676 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 9 Jan 2023 19:47:17 +0800 Subject: [PATCH 80/89] fix: memory leak and invalid read issue --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 83 +++++++++++++++++++++----- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index a9c31c19cb..9bc903a0ba 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -682,6 +682,16 @@ int32_t tRowMergerInit2(SRowMerger *pMerger, STSchema *pResTSchema, TSDBROW *pRo } tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal); + if ((!COL_VAL_IS_NONE(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { + uint8_t *pVal = pColVal->value.pData; + + pColVal->value.pData = NULL; + code = tRealloc(&pColVal->value.pData, pColVal->value.nData); + if (code) goto _exit; + + memcpy(pColVal->value.pData, pVal, pColVal->value.nData); + } + if (taosArrayPush(pMerger->pArray, pColVal) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -720,12 +730,28 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { if (key.version > pMerger->version) { if (!COL_VAL_IS_NONE(pColVal)) { - taosArraySet(pMerger->pArray, iCol, pColVal); + if (IS_VAR_DATA_TYPE(pColVal->type)) { + SColVal *tColVal = taosArrayGet(pMerger->pArray, iCol); + code = tRealloc(&tColVal->value.pData, pColVal->value.nData); + if (code) return code; + + memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + } else { + taosArraySet(pMerger->pArray, iCol, pColVal); + } } } else if (key.version < pMerger->version) { SColVal *tColVal = (SColVal *)taosArrayGet(pMerger->pArray, iCol); if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { - taosArraySet(pMerger->pArray, iCol, pColVal); + if (IS_VAR_DATA_TYPE(pColVal->type)) { + SColVal *tColVal = taosArrayGet(pMerger->pArray, iCol); + code = tRealloc(&tColVal->value.pData, pColVal->value.nData); + if (code) return code; + + memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + } else { + taosArraySet(pMerger->pArray, iCol, pColVal); + } } } else { ASSERT(0 && "dup versions not allowed"); @@ -765,6 +791,16 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { // other for (int16_t iCol = 1; iCol < pTSchema->numOfCols; iCol++) { tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); + if ((!COL_VAL_IS_NONE(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { + uint8_t *pVal = pColVal->value.pData; + + pColVal->value.pData = NULL; + code = tRealloc(&pColVal->value.pData, pColVal->value.nData); + if (code) goto _exit; + + memcpy(pColVal->value.pData, pVal, pColVal->value.nData); + } + if (taosArrayPush(pMerger->pArray, pColVal) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -776,12 +812,10 @@ _exit: } void tRowMergerClear(SRowMerger *pMerger) { - if (pMerger->merged) { - for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { - SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); - if (IS_VAR_DATA_TYPE(pTColVal->type)) { - tFree(pTColVal->value.pData); - } + for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { + SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + if (IS_VAR_DATA_TYPE(pTColVal->type)) { + tFree(pTColVal->value.pData); } } @@ -802,13 +836,17 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { if (!COL_VAL_IS_NONE(pColVal)) { if (IS_VAR_DATA_TYPE(pColVal->type)) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + if (!COL_VAL_IS_NULL(pColVal)) { + code = tRealloc(&pTColVal->value.pData, pColVal->value.nData); + if (code) goto _exit; - pTColVal->value.pData = NULL; - code = tRealloc(&pTColVal->value.pData, pColVal->value.nData); - if (code) goto _exit; - - pTColVal->value.nData = pColVal->value.nData; - memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + pTColVal->value.nData = pColVal->value.nData; + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + } else { + tFree(pTColVal->value.pData); + pTColVal->value.pData = NULL; + taosArraySet(pMerger->pArray, iCol, pColVal); + } } else { taosArraySet(pMerger->pArray, iCol, pColVal); } @@ -816,7 +854,21 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { } else if (key.version < pMerger->version) { SColVal *tColVal = (SColVal *)taosArrayGet(pMerger->pArray, iCol); if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { - taosArraySet(pMerger->pArray, iCol, pColVal); + if (IS_VAR_DATA_TYPE(pColVal->type)) { + if (!COL_VAL_IS_NULL(pColVal)) { + code = tRealloc(&tColVal->value.pData, pColVal->value.nData); + if (code) goto _exit; + + tColVal->value.nData = pColVal->value.nData; + memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); + } else { + tFree(tColVal->value.pData); + tColVal->value.pData = NULL; + taosArraySet(pMerger->pArray, iCol, pColVal); + } + } else { + taosArraySet(pMerger->pArray, iCol, pColVal); + } } } else { ASSERT(0); @@ -824,7 +876,6 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { } pMerger->version = key.version; - pMerger->merged = true; _exit: return code; From 6a6d53b89623cf07ef051e7fdb670a913f357b9f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 9 Jan 2023 20:55:34 +0800 Subject: [PATCH 81/89] fix(query): fix error for retrieve data only in last files. (#19457) * fix: add test cases; * fix(query): fix error for retrieve data only in last files. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 64 ++++++++++++++++++++- tests/script/tsim/parser/regressiontest.sim | 5 ++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index dcfc78fd1a..91690af4c8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2834,7 +2834,37 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); if (pBlockInfo == NULL) { // build data block from last data file - code = buildComposedDataBlock(pReader); + SBlockData* pBData = &pReader->status.fileBlockData; + tBlockDataReset(pBData); + + SSDataBlock* pResBlock = pReader->pResBlock; + tsdbDebug("load data in last block firstly, due to desc scan data, %s", pReader->idStr); + + int64_t st = taosGetTimestampUs(); + + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (pResBlock->info.rows >= pReader->capacity) { + break; + } + } + + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 + " rows:%d, elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + } } else if (fileBlockShouldLoad(pReader, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader)) { code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); if (code != TSDB_CODE_SUCCESS) { @@ -2853,10 +2883,38 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { // only return the rows in last block int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); ASSERT(tsLast >= pBlock->maxKey.ts); - tBlockDataReset(&pReader->status.fileBlockData); + SBlockData* pBData = &pReader->status.fileBlockData; + tBlockDataReset(pBData); + + SSDataBlock* pResBlock = pReader->pResBlock; tsdbDebug("load data in last block firstly, due to desc scan data, %s", pReader->idStr); - code = buildComposedDataBlock(pReader); + + int64_t st = taosGetTimestampUs(); + + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (pResBlock->info.rows >= pReader->capacity) { + break; + } + } + + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 + " rows:%d, elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + } } else { // whole block is required, return it directly SDataBlockInfo* pInfo = &pReader->pResBlock->info; pInfo->rows = pBlock->nRow; diff --git a/tests/script/tsim/parser/regressiontest.sim b/tests/script/tsim/parser/regressiontest.sim index 98cb0248a1..1b127155cb 100644 --- a/tests/script/tsim/parser/regressiontest.sim +++ b/tests/script/tsim/parser/regressiontest.sim @@ -58,4 +58,9 @@ if $data40 != @18-09-17 09:06:49.600@ then return -1 endi +sql select * from $tb order by ts desc; +if $rows != 8198 then + return -1 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT From a0d2da630e913d3d1acef62bf24823770ab24c73 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 10 Jan 2023 09:28:04 +0800 Subject: [PATCH 82/89] fix: no core file on linux --- source/client/src/clientEnv.c | 4 ++++ source/dnode/mgmt/exe/dmMain.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 2ecade58f9..495c2cca9a 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -420,7 +420,11 @@ _return: taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); +#ifdef _TD_DARWIN_64 exit(signum); +#elif defined(WINDOWS) + exit(signum); +#endif } void crashReportThreadFuncUnexpectedStopped(void) { atomic_store_32(&clientStop, -1); } diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index 711280ea58..4910b0ac3f 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -100,7 +100,11 @@ _return: taosLogCrashInfo("taosd", pMsg, msgLen, signum, sigInfo); +#ifdef _TD_DARWIN_64 exit(signum); +#elif defined(WINDOWS) + exit(signum); +#endif } static void dmSetSignalHandle() { From 91821c9bb45e21ce80d1db1d6190cf0feb7dfca2 Mon Sep 17 00:00:00 2001 From: jiajingbin Date: Tue, 10 Jan 2023 09:41:20 +0800 Subject: [PATCH 83/89] test: add cases for ts-2440 --- tests/system-test/1-insert/time_range_wise.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/system-test/1-insert/time_range_wise.py b/tests/system-test/1-insert/time_range_wise.py index 3d5c9197d1..df1cc516c5 100644 --- a/tests/system-test/1-insert/time_range_wise.py +++ b/tests/system-test/1-insert/time_range_wise.py @@ -600,6 +600,11 @@ class TDTestCase: tdLog.printNoPrefix("==========step4:after wal, all check again ") self.all_test() + # add for TS-2440 + for i in range(self.rows): + tdSql.execute("drop database if exists db3 ") + tdSql.execute("create database db3 retentions 1s:4m,2s:8m,3s:12m") + def stop(self): tdSql.close() tdLog.success(f"{__file__} successfully executed") From df6e9631e162ec82be3374e298c456a64e301499 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 10 Jan 2023 11:23:52 +0800 Subject: [PATCH 84/89] fix: synchronize access within walFsync --- source/libs/wal/src/walWrite.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 51307dc17d..db31692da9 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -635,6 +635,7 @@ int32_t walWrite(SWal *pWal, int64_t index, tmsg_t msgType, const void *body, in } void walFsync(SWal *pWal, bool forceFsync) { + taosThreadMutexLock(&pWal->mutex); if (forceFsync || (pWal->cfg.level == TAOS_WAL_FSYNC && pWal->cfg.fsyncPeriod == 0)) { wTrace("vgId:%d, fileId:%" PRId64 ".idx, do fsync", pWal->cfg.vgId, walGetCurFileFirstVer(pWal)); if (taosFsyncFile(pWal->pIdxFile) < 0) { @@ -647,4 +648,5 @@ void walFsync(SWal *pWal, bool forceFsync) { strerror(errno)); } } + taosThreadMutexUnlock(&pWal->mutex); } From bfc483aa30ef156fba14eb4c8d8593371249d534 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 10 Jan 2023 11:32:13 +0800 Subject: [PATCH 85/89] fix: row merge flag issue --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 9bc903a0ba..ede1e9a424 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -682,13 +682,13 @@ int32_t tRowMergerInit2(SRowMerger *pMerger, STSchema *pResTSchema, TSDBROW *pRo } tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal); - if ((!COL_VAL_IS_NONE(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { + if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { uint8_t *pVal = pColVal->value.pData; pColVal->value.pData = NULL; code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; - + memcpy(pColVal->value.pData, pVal, pColVal->value.nData); } @@ -730,12 +730,14 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { if (key.version > pMerger->version) { if (!COL_VAL_IS_NONE(pColVal)) { - if (IS_VAR_DATA_TYPE(pColVal->type)) { + if ((!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { SColVal *tColVal = taosArrayGet(pMerger->pArray, iCol); code = tRealloc(&tColVal->value.pData, pColVal->value.nData); if (code) return code; - + + tColVal->value.nData = pColVal->value.nData; memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + tColVal->flag = 0; } else { taosArraySet(pMerger->pArray, iCol, pColVal); } @@ -743,12 +745,13 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { } else if (key.version < pMerger->version) { SColVal *tColVal = (SColVal *)taosArrayGet(pMerger->pArray, iCol); if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { - if (IS_VAR_DATA_TYPE(pColVal->type)) { - SColVal *tColVal = taosArrayGet(pMerger->pArray, iCol); + if ((!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { code = tRealloc(&tColVal->value.pData, pColVal->value.nData); if (code) return code; - + + tColVal->value.nData = pColVal->value.nData; memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + tColVal->flag = 0; } else { taosArraySet(pMerger->pArray, iCol, pColVal); } @@ -791,7 +794,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { // other for (int16_t iCol = 1; iCol < pTSchema->numOfCols; iCol++) { tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - if ((!COL_VAL_IS_NONE(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { + if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { uint8_t *pVal = pColVal->value.pData; pColVal->value.pData = NULL; @@ -842,6 +845,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { pTColVal->value.nData = pColVal->value.nData; memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + pTColVal->flag = 0; } else { tFree(pTColVal->value.pData); pTColVal->value.pData = NULL; @@ -861,6 +865,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { tColVal->value.nData = pColVal->value.nData; memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); + tColVal->flag = 0; } else { tFree(tColVal->value.pData); tColVal->value.pData = NULL; From 459d2932b1727fc3eee0cb529ae25ab2a01914f5 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 10 Jan 2023 13:44:24 +0800 Subject: [PATCH 86/89] fix: memcpy empty value issue --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index ede1e9a424..86adc1dc80 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -689,7 +689,9 @@ int32_t tRowMergerInit2(SRowMerger *pMerger, STSchema *pResTSchema, TSDBROW *pRo code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; - memcpy(pColVal->value.pData, pVal, pColVal->value.nData); + if (pColVal->value.nData) { + memcpy(pColVal->value.pData, pVal, pColVal->value.nData); + } } if (taosArrayPush(pMerger->pArray, pColVal) == NULL) { @@ -736,7 +738,9 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { if (code) return code; tColVal->value.nData = pColVal->value.nData; - memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + if (pColVal->value.nData) { + memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + } tColVal->flag = 0; } else { taosArraySet(pMerger->pArray, iCol, pColVal); @@ -750,7 +754,9 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { if (code) return code; tColVal->value.nData = pColVal->value.nData; - memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + if (pColVal->value.nData) { + memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); + } tColVal->flag = 0; } else { taosArraySet(pMerger->pArray, iCol, pColVal); @@ -800,8 +806,10 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { pColVal->value.pData = NULL; code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; - - memcpy(pColVal->value.pData, pVal, pColVal->value.nData); + + if (pColVal->value.nData) { + memcpy(pColVal->value.pData, pVal, pColVal->value.nData); + } } if (taosArrayPush(pMerger->pArray, pColVal) == NULL) { @@ -844,7 +852,9 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { if (code) goto _exit; pTColVal->value.nData = pColVal->value.nData; - memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + if (pTColVal->value.nData) { + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + } pTColVal->flag = 0; } else { tFree(pTColVal->value.pData); @@ -864,7 +874,9 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { if (code) goto _exit; tColVal->value.nData = pColVal->value.nData; - memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); + if (tColVal->value.nData) { + memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); + } tColVal->flag = 0; } else { tFree(tColVal->value.pData); From fcbac7236e3b9effb813d2183259b1eb76929a50 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 10 Jan 2023 13:47:18 +0800 Subject: [PATCH 87/89] fix: reset table scan status --- source/libs/executor/src/scanoperator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 1d7f27d0cf..eb38299938 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -173,7 +173,7 @@ static SResultRow* getTableGroupOutputBuf(SOperatorInfo* pOperator, uint64_t gro if (NULL == *pPage) { return NULL; } - + return (SResultRow*)((char*)(*pPage) + p1->offset); } @@ -1729,6 +1729,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { /*resetTableScanInfo(pTSInfo, pWin);*/ tsdbReaderClose(pTSInfo->base.dataReader); pTSInfo->base.dataReader = NULL; + pInfo->pTableScanOp->status = OP_OPENED; pTSInfo->scanTimes = 0; pTSInfo->currentGroupId = -1; From fd0d4bb83031d99807974616d413c367b0317625 Mon Sep 17 00:00:00 2001 From: dapan1121 <72057773+dapan1121@users.noreply.github.com> Date: Tue, 10 Jan 2023 15:39:16 +0800 Subject: [PATCH 88/89] Update tsdb.h fix: remove tmp field --- source/dnode/vnode/src/inc/tsdb.h | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 77a3bb7a2f..5a2e462c8c 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -573,7 +573,6 @@ struct STSDBRowIter { struct SRowMerger { STSchema *pTSchema; int64_t version; - bool merged; SArray *pArray; // SArray }; From dfed4e7650dd079b1a18922e10847b9e41f8bd8b Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Wed, 11 Jan 2023 11:32:46 +0800 Subject: [PATCH 89/89] fix: taos-tools deb rpm compn for main (#19489) * fix: taos-tools deb/rpm compn for main * fix: update taos-tools 5aa25e9 --- cmake/taostools_CMakeLists.txt.in | 2 +- packaging/release.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index 599b508c93..d01928cfe8 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 94d6895 + GIT_TAG 5aa25e9 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/packaging/release.sh b/packaging/release.sh index 7a8a08352f..1dfbf2b112 100755 --- a/packaging/release.sh +++ b/packaging/release.sh @@ -273,7 +273,7 @@ if [ "$osType" != "Darwin" ]; then [ -z "$taos_tools_ver" ] && taos_tools_ver="0.1.0" ${csudo}./make-taos-tools-deb.sh ${top_dir} \ - ${compile_dir} ${output_dir} ${taos_tools_ver} ${cpuType} ${osType} ${verMode} ${verType} + ${compile_dir} ${output_dir} ${taos_tools_ver} ${cpuType} ${osType} ${verMode} ${verType} ${verNumberComp} fi fi else @@ -298,7 +298,7 @@ if [ "$osType" != "Darwin" ]; then [ -z "$taos_tools_ver" ] && taos_tools_ver="0.1.0" ${csudo}./make-taos-tools-rpm.sh ${top_dir} \ - ${compile_dir} ${output_dir} ${taos_tools_ver} ${cpuType} ${osType} ${verMode} ${verType} + ${compile_dir} ${output_dir} ${taos_tools_ver} ${cpuType} ${osType} ${verMode} ${verType} ${verNumberComp} fi fi else