From 5159d60f56407a59dec8742c355ec54aedfe1baa Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 23 Dec 2022 20:16:23 +0800 Subject: [PATCH 01/24] enh: vnodeCommit on consensus only --- include/libs/sync/sync.h | 5 +- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 2 + source/dnode/vnode/inc/vnode.h | 1 + source/dnode/vnode/src/inc/vnd.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/meta/metaOpen.c | 1 + source/dnode/vnode/src/vnd/vnodeCommit.c | 17 ++++- source/dnode/vnode/src/vnd/vnodeOpen.c | 5 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 +-- source/dnode/vnode/src/vnd/vnodeSync.c | 90 ++++++++++++++++-------- source/libs/sync/src/syncPipeline.c | 7 ++ source/libs/sync/src/syncRaftLog.c | 4 ++ source/libs/sync/src/syncRespMgr.c | 2 +- source/libs/tdb/inc/tdb.h | 7 +- source/libs/tdb/src/db/tdbPage.c | 2 +- source/libs/tdb/src/db/tdbTxn.c | 7 +- source/libs/transport/src/transCli.c | 2 +- 17 files changed, 122 insertions(+), 40 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index a13d203889..27aadee96a 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -49,10 +49,13 @@ extern "C" { #define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500 #define SYNC_SNAP_RESEND_MS 1000 * 60 +#define SYNC_VND_COMMIT_MIN_MS 200 +#define SYNC_VND_COMMIT_MAX_MS 60000 + #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 #define SYNC_INDEX_INVALID -1 -#define SYNC_TERM_INVALID -1 // 0xFFFFFFFFFFFFFFFF +#define SYNC_TERM_INVALID -1 typedef enum { SYNC_STRATEGY_NO_SNAPSHOT = 0, diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 313a88fc5c..4469e0afe6 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -79,6 +79,8 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { char path[TSDB_FILENAME_LEN] = {0}; + vnodeProposeCommitOnNeed(pVnode->pImpl); + taosThreadRwlockWrlock(&pMgmt->lock); taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t)); taosThreadRwlockUnlock(&pMgmt->lock); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 540f0c3127..7a3e3cb4a5 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -88,6 +88,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); +void vnodeProposeCommitOnNeed(SVnode *pVnode); // meta typedef struct SMeta SMeta; // todo: remove diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 28797c5361..d8c4b001b1 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -102,6 +102,7 @@ void vnodeSyncClose(SVnode* pVnode); void vnodeRedirectRpcMsg(SVnode* pVnode, SRpcMsg* pMsg, int32_t code); bool vnodeIsLeader(SVnode* pVnode); bool vnodeIsRoleLeader(SVnode* pVnode); +int vnodeShouldCommit(SVnode* pVnode); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index e56f130c2c..b9080fd6c6 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -348,6 +348,7 @@ struct SVnode { STQ* pTq; SSink* pSink; tsem_t canCommit; + int64_t commitMs; int64_t sync; TdThreadMutex lock; bool blocked; diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 1b5f742559..8974d93678 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -203,6 +203,7 @@ _err: int metaClose(SMeta *pMeta) { if (pMeta) { + if (pMeta->txn) tdbTxnClose(pMeta->txn); if (pMeta->pCache) metaCacheClose(pMeta); if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 4daab074b5..f7ec18e50e 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -59,6 +59,17 @@ int vnodeBegin(SVnode *pVnode) { } int vnodeShouldCommit(SVnode *pVnode) { + if (!pVnode->inUse || !osDataSpaceAvailable()) { + return false; + } + + int64_t nowMs = taosGetMonoTimestampMs(); + + return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pVnode->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || + (pVnode->inUse->size > 0 && pVnode->commitMs + SYNC_VND_COMMIT_MAX_MS < nowMs)); +} + +int vnodeShouldCommitOld(SVnode *pVnode) { if (pVnode->inUse) { return osDataSpaceAvailable() && (pVnode->inUse->size > pVnode->inUse->node.size); } @@ -194,6 +205,7 @@ static void vnodePrepareCommit(SVnode *pVnode) { vnodeBufPoolUnRef(pVnode->inUse); pVnode->inUse = NULL; } + static int32_t vnodeCommitTask(void *arg) { int32_t code = 0; @@ -210,6 +222,7 @@ _exit: taosMemoryFree(pInfo); return code; } + int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; @@ -257,7 +270,9 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { SVnode *pVnode = pInfo->pVnode; vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), - pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); + pInfo->info.state.commitID, pInfo->info.state.committed, pInfo->info.state.commitTerm); + + pVnode->commitMs = taosGetMonoTimestampMs(); // persist wal before starting if (walPersist(pVnode->pWal) < 0) { diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index e09fafb756..58b73d806f 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -249,15 +249,18 @@ void vnodePreClose(SVnode *pVnode) { void vnodeClose(SVnode *pVnode) { if (pVnode) { - vnodeSyncCommit(pVnode); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); + + tsem_wait(&pVnode->canCommit); walClose(pVnode->pWal); tqClose(pVnode->pTq); if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb); smaClose(pVnode->pSma); metaClose(pVnode->pMeta); vnodeCloseBufPool(pVnode); + tsem_post(&pVnode->canCommit); + // destroy handle tsem_destroy(&(pVnode->canCommit)); tsem_destroy(&pVnode->syncSem); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 6092888136..49bdfec269 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -200,6 +200,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp // skip header pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); len = pMsg->contLen - sizeof(SMsgHead); + bool needCommit = false; switch (pMsg->msgType) { /* META */ @@ -296,9 +297,8 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp vnodeProcessAlterConfigReq(pVnode, version, pReq, len, pRsp); break; case TDMT_VND_COMMIT: - vnodeSyncCommit(pVnode); - vnodeBegin(pVnode); - goto _exit; + needCommit = true; + break; default: vError("vgId:%d, unprocessed msg, %d", TD_VID(pVnode), pMsg->msgType); return -1; @@ -315,7 +315,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } // commit if need - if (vnodeShouldCommit(pVnode)) { + if (needCommit) { vInfo("vgId:%d, commit at version %" PRId64, TD_VID(pVnode), version); vnodeAsyncCommit(pVnode); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 2c23646db1..e000b26c6b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -101,6 +101,64 @@ static void vnodeHandleProposeError(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) } } +static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) { + int64_t seq = 0; + + taosThreadMutexLock(&pVnode->lock); + int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq); + bool wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType)); + if (wait) { + ASSERT(!pVnode->blocked); + pVnode->blocked = true; + pVnode->blockSec = taosGetTimestampSec(); + pVnode->blockSeq = seq; +#if 0 + pVnode->blockInfo = pMsg->info; +#endif + } + taosThreadMutexUnlock(&pVnode->lock); + + if (code > 0) { + vnodeHandleWriteMsg(pVnode, pMsg); + } else if (code < 0) { + if (terrno != 0) code = terrno; + vnodeHandleProposeError(pVnode, pMsg, code); + } + + if (wait) vnodeWaitBlockMsg(pVnode, pMsg); + return code; +} + +void vnodeProposeCommitOnNeed(SVnode *pVnode) { + if (!vnodeShouldCommit(pVnode)) { + return; + } + + int32_t contLen = sizeof(SMsgHead); + SMsgHead *pHead = rpcMallocCont(contLen); + pHead->contLen = contLen; + pHead->vgId = pVnode->config.vgId; + + SRpcMsg rpcMsg = {0}; + rpcMsg.msgType = TDMT_VND_COMMIT; + rpcMsg.contLen = contLen; + rpcMsg.pCont = pHead; + rpcMsg.info.noResp = 1; + + bool isWeak = false; + if (vnodeProposeMsg(pVnode, &rpcMsg, isWeak) < 0) { + vTrace("vgId:%d, failed to propose vnode commit since %s", pVnode->config.vgId, terrstr()); + goto _out; + } + + vInfo("vgId:%d, proposed vnode commit", pVnode->config.vgId); + +_out: + pVnode->commitMs = taosGetMonoTimestampMs(); + rpcFreeCont(rpcMsg.pCont); + rpcMsg.pCont = NULL; +} + #if BATCH_ENABLE static void inline vnodeProposeBatchMsg(SVnode *pVnode, SRpcMsg **pMsgArr, bool *pIsWeakArr, int32_t *arrSize) { @@ -178,6 +236,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) continue; } + vnodeProposeCommitOnNeed(pVnode); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); @@ -205,34 +265,6 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) #else -static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) { - int64_t seq = 0; - - taosThreadMutexLock(&pVnode->lock); - int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq); - bool wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType)); - if (wait) { - ASSERT(!pVnode->blocked); - pVnode->blocked = true; - pVnode->blockSec = taosGetTimestampSec(); - pVnode->blockSeq = seq; -#if 0 - pVnode->blockInfo = pMsg->info; -#endif - } - taosThreadMutexUnlock(&pVnode->lock); - - if (code > 0) { - vnodeHandleWriteMsg(pVnode, pMsg); - } else if (code < 0) { - if (terrno != 0) code = terrno; - vnodeHandleProposeError(pVnode, pMsg, code); - } - - if (wait) vnodeWaitBlockMsg(pVnode, pMsg); - return code; -} - void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; @@ -256,6 +288,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) continue; } + vnodeProposeCommitOnNeed(pVnode); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index d875d3ca09..34fbebdc39 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -326,6 +326,8 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt } // update + ASSERT(pBuf->startIndex < index); + ASSERT(index - pBuf->startIndex < pBuf->size); ASSERT(pBuf->entries[index % pBuf->size].pItem == NULL); SSyncLogBufEntry tmp = {.pItem = pEntry, .prevLogIndex = prevIndex, .prevLogTerm = prevTerm}; pEntry = NULL; @@ -454,6 +456,11 @@ int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, Syn pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType)); } + if (pEntry->originalRpcType == TDMT_VND_COMMIT) { + sInfo("vgId:%d, fsm execute vnode commit. index: %" PRId64 ", term: %" PRId64 "", pNode->vgId, pEntry->index, + pEntry->term); + } + SRpcMsg rpcMsg = {0}; syncEntry2OriginalRpc(pEntry, &rpcMsg); diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 018ac5bb7d..d86ff847f9 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -219,6 +219,10 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr ASSERT(pEntry->index == index); + if (pEntry->originalRpcType == TDMT_VND_COMMIT) { + walFsync(pWal, true); + } + sNTrace(pData->pSyncNode, "write index:%" PRId64 ", type:%s, origin type:%s, elapsed:%" PRId64, pEntry->index, TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType), tsElapsed); return 0; diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index 79a38cad7a..6e945b591e 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -107,7 +107,7 @@ int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t seq, SRpcHandleInfo *p taosThreadMutexUnlock(&pObj->mutex); return 1; // get one object } else { - sNError(pObj->data, "get-and-del message handle, no object of seq:%" PRIu64, seq); + sNTrace(pObj->data, "get-and-del message handle, no object of seq:%" PRIu64, seq); } taosThreadMutexUnlock(&pObj->mutex); diff --git a/source/libs/tdb/inc/tdb.h b/source/libs/tdb/inc/tdb.h index 10a99bb1fa..0e20941b3a 100644 --- a/source/libs/tdb/inc/tdb.h +++ b/source/libs/tdb/inc/tdb.h @@ -74,7 +74,12 @@ int32_t tdbTbcUpsert(TBC *pTbc, const void *pKey, int nKey, const void *pData, i int32_t tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg, int flags); -int32_t tdbTxnClose(TXN *pTxn); +int32_t tdbTxnCloseImpl(TXN *pTxn); +#define tdbTxnClose(pTxn) \ + do { \ + tdbTxnCloseImpl(pTxn); \ + (pTxn) = NULL; \ + } while (0) // other void tdbFree(void *); diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index 50dc8e0a65..d35f05461d 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -77,7 +77,7 @@ int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) u8 *ptr; tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree); - ASSERT(!pPage->isDirty); + // ASSERT(!pPage->isDirty); ASSERT(xFree); for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { diff --git a/source/libs/tdb/src/db/tdbTxn.c b/source/libs/tdb/src/db/tdbTxn.c index 055d9c7f98..24f955fe2f 100644 --- a/source/libs/tdb/src/db/tdbTxn.c +++ b/source/libs/tdb/src/db/tdbTxn.c @@ -28,13 +28,18 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void return 0; } -int tdbTxnClose(TXN *pTxn) { +int tdbTxnCloseImpl(TXN *pTxn) { if (pTxn) { if (pTxn->jPageSet) { hashset_destroy(pTxn->jPageSet); pTxn->jPageSet = NULL; } + if (pTxn->jfd) { + tdbOsClose(pTxn->jfd); + ASSERT(pTxn->jfd == NULL); + } + tdbOsFree(pTxn); } diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index d144a76eb0..d37dff4d01 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1124,7 +1124,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { int ret = uv_tcp_connect(&conn->connReq, (uv_tcp_t*)(conn->stream), (const struct sockaddr*)&addr, cliConnCb); if (ret != 0) { - tGTrace("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, + tGError("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, uv_err_name(ret)); uv_timer_stop(conn->timer); From 7227e53b98a3e86447ff05e056c133f77646985b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 26 Dec 2022 14:22:54 +0800 Subject: [PATCH 02/24] fix: set restore finish only after reaching the current term --- source/libs/sync/src/syncPipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 34fbebdc39..96a1674027 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -558,7 +558,8 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm ret = 0; _out: // mark as restored if needed - if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex) { + if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex && pEntry != NULL && + pNode->pRaftStore->currentTerm <= pEntry->term) { pNode->pFsm->FpRestoreFinishCb(pNode->pFsm); pNode->restoreFinish = true; sInfo("vgId:%d, restore finished. log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, From 137e7d009df468bf0ab6c5890263c71ef5c4992f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 26 Dec 2022 20:51:57 +0800 Subject: [PATCH 03/24] fix: update nextRowIterGet of tsdbCache --- source/dnode/vnode/src/tsdb/tsdbCache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 6a82517067..0fc5b617bb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1080,6 +1080,8 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow) { iMax[nMax] = i; max[nMax++] = pIter->input[i].pRow; + } else { + pIter->input[i].next = false; } } } From c1c7f2593f6db5f0992ef40b1d574f9508ea3ee3 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 10:38:56 +0800 Subject: [PATCH 04/24] enh: initialize commitMs as the time when vnodeOpen --- source/dnode/vnode/src/vnd/vnodeOpen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 58b73d806f..f2973c188e 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -142,6 +142,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { pVnode->path = (char *)&pVnode[1]; strcpy(pVnode->path, path); pVnode->config = info.config; + pVnode->commitMs = taosGetMonoTimestampMs(); pVnode->state.committed = info.state.committed; pVnode->state.commitTerm = info.state.commitTerm; pVnode->state.commitID = info.state.commitID; From 691b75adf5559f3f8d4548736c6c20ed1c946372 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 15:42:00 +0800 Subject: [PATCH 05/24] enh: flush database for test cases of rsma --- tests/script/tsim/sma/rsmaCreateInsertQuery.sim | 2 +- tests/script/tsim/sma/rsmaPersistenceRecovery.sim | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim index 04cf09715c..508e6f88c1 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim @@ -82,8 +82,8 @@ endi #=================================================================== - #==================== reboot to trigger commit data to file +sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index faff48b61c..4117a2403d 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -85,6 +85,7 @@ endi #==================== reboot to trigger commit data to file +sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start From 92e05b8ece2075e907c935d81b9a8958d7401a1f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 27 Dec 2022 16:28:44 +0800 Subject: [PATCH 06/24] enh: schedule vnodeCommit uniformly distributed --- source/dnode/vnode/src/inc/vnd.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 7 ++++++- source/dnode/vnode/src/vnd/vnodeCommit.c | 13 ++++++++++--- source/dnode/vnode/src/vnd/vnodeOpen.c | 3 ++- source/dnode/vnode/src/vnd/vnodeSync.c | 2 +- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index d8c4b001b1..24821a3a61 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -86,6 +86,7 @@ int32_t vnodeGetBatchMeta(SVnode* pVnode, SRpcMsg* pMsg); // vnodeCommit.c int32_t vnodeBegin(SVnode* pVnode); int32_t vnodeShouldCommit(SVnode* pVnode); +void vnodeUpdCommitSched(SVnode* pVnode); void vnodeRollback(SVnode* pVnode); int32_t vnodeSaveInfo(const char* dir, const SVnodeInfo* pCfg); int32_t vnodeCommitInfo(const char* dir, const SVnodeInfo* pInfo); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 1b84fb0578..75367883f1 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -332,6 +332,11 @@ struct STsdbKeepCfg { int32_t keep2; }; +typedef struct SVCommitSched { + int64_t commitMs; + int64_t maxWaitMs; +} SVCommitSched; + struct SVnode { char* path; SVnodeCfg config; @@ -350,7 +355,7 @@ struct SVnode { STQ* pTq; SSink* pSink; tsem_t canCommit; - int64_t commitMs; + SVCommitSched commitSched; int64_t sync; TdThreadMutex lock; bool blocked; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 4f63d6e043..3738966122 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -58,15 +58,22 @@ int vnodeBegin(SVnode *pVnode) { return 0; } +void vnodeUpdCommitSched(SVnode *pVnode) { + int64_t randNum = taosRand(); + pVnode->commitSched.commitMs = taosGetMonoTimestampMs(); + pVnode->commitSched.maxWaitMs = SYNC_VND_COMMIT_MAX_MS + (randNum % SYNC_VND_COMMIT_MAX_MS); +} + int vnodeShouldCommit(SVnode *pVnode) { if (!pVnode->inUse || !osDataSpaceAvailable()) { return false; } + SVCommitSched *pSched = &pVnode->commitSched; int64_t nowMs = taosGetMonoTimestampMs(); - return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pVnode->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || - (pVnode->inUse->size > 0 && pVnode->commitMs + SYNC_VND_COMMIT_MAX_MS < nowMs)); + return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pSched->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || + (pVnode->inUse->size > 0 && pSched->commitMs + pSched->maxWaitMs < nowMs)); } int vnodeShouldCommitOld(SVnode *pVnode) { @@ -306,7 +313,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), pInfo->info.state.commitID, pInfo->info.state.committed, pInfo->info.state.commitTerm); - pVnode->commitMs = taosGetMonoTimestampMs(); + vnodeUpdCommitSched(pVnode); // persist wal before starting if (walPersist(pVnode->pWal) < 0) { diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index f2973c188e..edbec0d044 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -142,7 +142,6 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { pVnode->path = (char *)&pVnode[1]; strcpy(pVnode->path, path); pVnode->config = info.config; - pVnode->commitMs = taosGetMonoTimestampMs(); pVnode->state.committed = info.state.committed; pVnode->state.commitTerm = info.state.commitTerm; pVnode->state.commitID = info.state.commitID; @@ -158,6 +157,8 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { taosThreadMutexInit(&pVnode->mutex, NULL); taosThreadCondInit(&pVnode->poolNotEmpty, NULL); + vnodeUpdCommitSched(pVnode); + int8_t rollback = vnodeShouldRollback(pVnode); // open buffer pool diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index eaea4102a2..a1dfeb9728 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -154,7 +154,7 @@ void vnodeProposeCommitOnNeed(SVnode *pVnode) { vInfo("vgId:%d, proposed vnode commit", pVnode->config.vgId); _out: - pVnode->commitMs = taosGetMonoTimestampMs(); + vnodeUpdCommitSched(pVnode); rpcFreeCont(rpcMsg.pCont); rpcMsg.pCont = NULL; } From 6fc47beb71f111a140ba8742b27d31da7d702344 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 14:34:23 +0800 Subject: [PATCH 07/24] enh: streamMetaCommit in sync with vnodeCommit --- source/dnode/snode/src/snode.c | 1 + source/dnode/vnode/src/tq/tqCommit.c | 9 ++++++++- source/dnode/vnode/src/vnd/vnodeOpen.c | 3 +-- source/libs/stream/src/streamMeta.c | 4 ++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index b133226ed3..860db20fa8 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -124,6 +124,7 @@ FAIL: } void sndClose(SSnode *pSnode) { + streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); taosMemoryFree(pSnode->path); taosMemoryFree(pSnode); diff --git a/source/dnode/vnode/src/tq/tqCommit.c b/source/dnode/vnode/src/tq/tqCommit.c index dabd97a345..7fc66c4919 100644 --- a/source/dnode/vnode/src/tq/tqCommit.c +++ b/source/dnode/vnode/src/tq/tqCommit.c @@ -15,4 +15,11 @@ #include "tq.h" -int tqCommit(STQ* pTq) { return tqOffsetCommitFile(pTq->pOffsetStore); } +int tqCommit(STQ* pTq) { + if (streamMetaCommit(pTq->pStreamMeta) < 0) { + tqError("vgId:%d, failed to commit stream meta since %s", TD_VID(pTq->pVnode), terrstr()); + return -1; + } + + return tqOffsetCommitFile(pTq->pOffsetStore); +} diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index edbec0d044..96c8956b58 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -251,10 +251,9 @@ void vnodePreClose(SVnode *pVnode) { void vnodeClose(SVnode *pVnode) { if (pVnode) { + tsem_wait(&pVnode->canCommit); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); - - tsem_wait(&pVnode->canCommit); walClose(pVnode->pWal); tqClose(pVnode->pTq); if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index afad78c5e5..56da86654c 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -69,8 +69,7 @@ _err: } void streamMetaClose(SStreamMeta* pMeta) { - tdbCommit(pMeta->db, pMeta->txn); - tdbPostCommit(pMeta->db, pMeta->txn); + tdbTxnClose(pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); @@ -88,6 +87,7 @@ void streamMetaClose(SStreamMeta* pMeta) { /*streamMetaReleaseTask(pMeta, pTask);*/ } taosHashCleanup(pMeta->pTasks); + taosHashCleanup(pMeta->pRecoverStatus); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } From 724cf98dc33a7c2f223deefb6d18a419a19ccf7d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 17:47:33 +0800 Subject: [PATCH 08/24] fix: be conservative on commit progress in appendEntries --- source/libs/sync/src/syncAppendEntries.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1dc6905b88..1e5adb4bed 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -211,7 +211,7 @@ _SEND_RESPONSE: if (accepted && matched) { pReply->success = true; // update commit index only after matching - (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); + (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pEntry->index)); } // ack, i.e. send response From c4fad84c7c69718b3bc2d01e3fb8836ea479dd57 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 18:24:41 +0800 Subject: [PATCH 09/24] enh: reset commitVer in WAL on restore --- source/libs/wal/src/walMeta.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 8e6628bb21..aeb0fe9fe9 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -292,19 +292,9 @@ void walAlignVersions(SWal* pWal) { } pWal->vers.lastVer = pWal->vers.snapshotVer; } - if (pWal->vers.commitVer < pWal->vers.snapshotVer) { - wWarn("vgId:%d, commitVer:%" PRId64 " is less than snapshotVer:%" PRId64 ". align with it.", pWal->cfg.vgId, - pWal->vers.commitVer, pWal->vers.snapshotVer); - pWal->vers.commitVer = pWal->vers.snapshotVer; - } - if (pWal->vers.appliedVer < pWal->vers.snapshotVer) { - wWarn("vgId:%d, appliedVer:%" PRId64 " is less than snapshotVer:%" PRId64 ". align with it.", pWal->cfg.vgId, - pWal->vers.appliedVer, pWal->vers.snapshotVer); - pWal->vers.appliedVer = pWal->vers.snapshotVer; - } - - pWal->vers.commitVer = TMIN(pWal->vers.lastVer, pWal->vers.commitVer); - pWal->vers.appliedVer = TMIN(pWal->vers.commitVer, pWal->vers.appliedVer); + // reset commitVer and appliedVer + pWal->vers.commitVer = pWal->vers.snapshotVer; + pWal->vers.appliedVer = pWal->vers.snapshotVer; } bool walLogEntriesComplete(const SWal* pWal) { From 0c4ade9373fe8a76a9cc23c7c53e56916b935231 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 20:20:41 +0800 Subject: [PATCH 10/24] fix: update sync node commitIndex only if matchTerm equals currentTerm upon heartbeat --- source/libs/sync/inc/syncMessage.h | 4 ++-- source/libs/sync/inc/syncPipeline.h | 1 + source/libs/sync/src/syncAppendEntries.c | 1 + source/libs/sync/src/syncCommit.c | 1 + source/libs/sync/src/syncMain.c | 28 +++++++++++++++--------- source/libs/sync/src/syncPipeline.c | 11 ++++++++-- source/libs/sync/src/syncUtil.c | 2 +- source/libs/wal/src/walMeta.c | 1 + 8 files changed, 34 insertions(+), 15 deletions(-) diff --git a/source/libs/sync/inc/syncMessage.h b/source/libs/sync/inc/syncMessage.h index 3bd94dbab5..49486bc12d 100644 --- a/source/libs/sync/inc/syncMessage.h +++ b/source/libs/sync/inc/syncMessage.h @@ -247,8 +247,8 @@ typedef struct SyncLocalCmd { SRaftId destId; int32_t cmd; - SyncTerm sdNewTerm; // step down new term - SyncIndex fcIndex; // follower commit index + SyncTerm currentTerm; // step down new term + SyncIndex commitIndex; // follower commit index } SyncLocalCmd; int32_t syncBuildTimeout(SRpcMsg* pMsg, ESyncTimeoutType ttype, uint64_t logicClock, int32_t ms, SSyncNode* pNode); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index a0a0691694..55cb0d7db6 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -98,6 +98,7 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode); // access int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf); +SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf); int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry); int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm); int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1e5adb4bed..66ff28d07d 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -90,6 +90,7 @@ // int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { + ASSERT(false && "deprecated"); if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { sNTrace(ths, "can not do follower commit"); return -1; diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 5fdcbeb91c..152fddb7e6 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -44,6 +44,7 @@ // /\ UNCHANGED <> // void syncOneReplicaAdvance(SSyncNode* pSyncNode) { + ASSERT(false && "deprecated"); if (pSyncNode == NULL) { sError("pSyncNode is NULL"); return; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 1a481a7e14..7a6c0f734f 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1036,6 +1036,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { } } pSyncNode->commitIndex = commitIndex; + sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { goto _error; @@ -1176,9 +1177,10 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) { } ASSERT(endIndex == lastVer + 1); - commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); + pSyncNode->commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); + sInfo("vgId:%d, restore sync until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); - if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, commitIndex) < 0) { + if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) { return -1; } @@ -2545,8 +2547,9 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; pSyncMsg->cmd = SYNC_LOCAL_CMD_FOLLOWER_CMT; - pSyncMsg->fcIndex = pMsg->commitIndex; - SyncIndex fcIndex = pSyncMsg->fcIndex; + pSyncMsg->commitIndex = pMsg->commitIndex; + pSyncMsg->currentTerm = pMsg->term; + SyncIndex fcIndex = pSyncMsg->commitIndex; if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); @@ -2567,7 +2570,8 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN; - pSyncMsg->sdNewTerm = pMsg->term; + pSyncMsg->currentTerm = pMsg->term; + pSyncMsg->commitIndex = pMsg->commitIndex; if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); @@ -2575,7 +2579,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code); rpcFreeCont(rpcMsgLocalCmd.pCont); } else { - sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->sdNewTerm); + sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->currentTerm); } } } @@ -2633,10 +2637,13 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { syncLogRecvLocalCmd(ths, pMsg, ""); if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->sdNewTerm); + syncNodeStepDown(ths, pMsg->currentTerm); } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - (void)syncNodeUpdateCommitIndex(ths, pMsg->fcIndex); + SyncTerm matchTerm = syncLogBufferGetLastMatchTerm(ths->pLogBuf); + if (pMsg->currentTerm == matchTerm) { + (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); + } if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { sError("vgId:%d, failed to commit raft log since %s. commit index: %" PRId64 "", ths->vgId, terrstr(), ths->commitIndex); @@ -2649,14 +2656,15 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { + ASSERT(false && "deprecated"); SyncLocalCmd* pMsg = pRpcMsg->pCont; syncLogRecvLocalCmd(ths, pMsg, ""); if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->sdNewTerm); + syncNodeStepDown(ths, pMsg->currentTerm); } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - syncNodeFollowerCommit(ths, pMsg->fcIndex); + syncNodeFollowerCommit(ths, pMsg->commitIndex); } else { sError("error local cmd"); diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index d88e610372..de9cd6e1a6 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -265,20 +265,27 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode) { return ret; } -FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { +FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTermWithoutLock(SSyncLogBuffer* pBuf) { SyncIndex index = pBuf->matchIndex; SSyncRaftEntry* pEntry = pBuf->entries[(index + pBuf->size) % pBuf->size].pItem; ASSERT(pEntry != NULL); return pEntry->term; } +SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { + taosThreadMutexLock(&pBuf->mutex); + SyncTerm term = syncLogBufferGetLastMatchTermWithoutLock(pBuf); + taosThreadMutexUnlock(&pBuf->mutex); + return term; +} + int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm) { taosThreadMutexLock(&pBuf->mutex); syncLogBufferValidate(pBuf); int32_t ret = -1; SyncIndex index = pEntry->index; SyncIndex prevIndex = pEntry->index - 1; - SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTerm(pBuf); + SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTermWithoutLock(pBuf); SSyncRaftEntry* pExist = NULL; bool inBuf = true; diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 49a24bebde..525681e53e 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -411,7 +411,7 @@ void syncLogRecvLocalCmd(SSyncNode* pSyncNode, const SyncLocalCmd* pMsg, const c if (!(sDebugFlag & DEBUG_TRACE)) return; sNTrace(pSyncNode, "recv sync-local-cmd {cmd:%d-%s, sd-new-term:%" PRId64 ", fc-index:%" PRId64 "}, %s", pMsg->cmd, - syncLocalCmdGetStr(pMsg->cmd), pMsg->sdNewTerm, pMsg->fcIndex, s); + syncLocalCmdGetStr(pMsg->cmd), pMsg->currentTerm, pMsg->commitIndex, s); } void syncLogSendAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntriesReply* pMsg, const char* s) { diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index aeb0fe9fe9..44e88a4dcc 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -295,6 +295,7 @@ void walAlignVersions(SWal* pWal) { // reset commitVer and appliedVer pWal->vers.commitVer = pWal->vers.snapshotVer; pWal->vers.appliedVer = pWal->vers.snapshotVer; + wInfo("vgId:%d, reset commitVer to %" PRId64, pWal->cfg.vgId, pWal->vers.commitVer); } bool walLogEntriesComplete(const SWal* pWal) { From 2890a8cb96abfa15a96f9558aab8877d5a8f576b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 28 Dec 2022 20:34:40 +0800 Subject: [PATCH 11/24] fix: return error on failing to truncate raft log --- source/libs/sync/src/syncRaftLog.c | 23 ----------------------- source/libs/wal/src/walWrite.c | 2 +- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 701b61355d..03c3fe154d 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -316,29 +316,6 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; - // need not truncate - SyncIndex wallastVer = walGetLastVer(pWal); - if (fromIndex > wallastVer) { - return 0; - } - - // need not truncate - SyncIndex walCommitVer = walGetCommittedVer(pWal); - if (fromIndex <= walCommitVer) { - return 0; - } - - // delete from cache - for (SyncIndex index = fromIndex; index <= wallastVer; ++index) { - SLRUCache* pCache = pData->pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index)); - if (h) { - sNTrace(pData->pSyncNode, "cache delete index:%" PRId64, index); - - taosLRUCacheRelease(pData->pSyncNode->pLogStore->pCache, h, true); - } - } - int32_t code = walRollback(pWal, fromIndex); if (code != 0) { int32_t err = terrno; diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index a5c7bf1abd..51307dc17d 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -105,7 +105,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { wInfo("vgId:%d, wal rollback for version %" PRId64, pWal->cfg.vgId, ver); int64_t code; char fnameStr[WAL_FILE_LEN]; - if (ver > pWal->vers.lastVer || ver < pWal->vers.commitVer || ver <= pWal->vers.snapshotVer) { + if (ver > pWal->vers.lastVer || ver <= pWal->vers.commitVer || ver <= pWal->vers.snapshotVer) { terrno = TSDB_CODE_WAL_INVALID_VER; taosThreadMutexUnlock(&pWal->mutex); return -1; From 044e58b7122e4d53a786b404d2273105dcd4bab2 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 10:00:35 +0800 Subject: [PATCH 12/24] enh: adjust logging msgs for sync probe and rollback --- source/libs/sync/src/syncPipeline.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index de9cd6e1a6..0443be3f5e 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -818,11 +818,10 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode pMgr->endIndex = index + 1; SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, attempted to probe the %d'th peer with msg of index:%" PRId64 " term: %" PRId64 - ". pMgr(rs:%d): [%" PRId64 " %" PRId64 ", %" PRId64 "), pBuf: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 - ")", - pNode->vgId, pMgr->peerId, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, - pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); + sInfo("vgId:%d, probe peer:%" PRIx64 " with msg of index:%" PRId64 " term: %" PRId64 ". mgr (rs:%d): [%" PRId64 + " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", + pNode->vgId, pDestId->addr, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, + pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); return 0; } @@ -1004,6 +1003,10 @@ void syncLogBufferDestroy(SSyncLogBuffer* pBuf) { int32_t syncLogBufferRollback(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncIndex toIndex) { ASSERT(pBuf->commitIndex < toIndex && toIndex <= pBuf->endIndex); + if (toIndex == pBuf->endIndex) { + return 0; + } + sInfo("vgId:%d, rollback sync log buffer. toindex: %" PRId64 ", buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, toIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); From e22ce2df87ac07ba9b8384d59e0e54021624d856 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 10:45:25 +0800 Subject: [PATCH 13/24] fix: use pReply->lastSendIndex instead while updating commitIndex --- source/libs/sync/src/syncAppendEntries.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 66ff28d07d..026ebdb37c 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -207,12 +207,13 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { accepted = true; _SEND_RESPONSE: + pEntry = NULL; pReply->matchIndex = syncLogBufferProceed(ths->pLogBuf, ths, &pReply->lastMatchTerm); bool matched = (pReply->matchIndex >= pReply->lastSendIndex); if (accepted && matched) { pReply->success = true; // update commit index only after matching - (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pEntry->index)); + (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pReply->lastSendIndex)); } // ack, i.e. send response From e8e189cbbb6247a1b9b784fa8582e18c81baa6d8 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 15:48:59 +0800 Subject: [PATCH 14/24] enh: add sync log buffer info in logging msg in syncLogReplMgrRetryOnNeed --- source/libs/sync/src/syncPipeline.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index a458882da5..a9b5aadaa5 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -654,10 +654,12 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { _out: if (retried) { pMgr->retryBackoff = syncLogGetNextRetryBackoff(pMgr); - sInfo("vgId:%d, resent %d sync log entries. dest: %" PRIx64 ", indexes: %" PRId64 " ..., terms: ... %" PRId64 - ", retryWaitMs: %" PRId64 ", repl mgr: [%" PRId64 " %" PRId64 ", %" PRId64 ")", + SSyncLogBuffer* pBuf = pNode->pLogBuf; + sInfo("vgId:%d, resend %d sync log entries. dest: %" PRIx64 ", indexes: %" PRId64 " ..., terms: ... %" PRId64 + ", retryWaitMs: %" PRId64 ", mgr: [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 + " %" PRId64 ", %" PRId64 ")", pNode->vgId, count, pDestId->addr, firstIndex, term, retryWaitMs, pMgr->startIndex, pMgr->matchIndex, - pMgr->endIndex); + pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); } return ret; } From ed43aeaa9b2fe3f16480949d4184e4aee7df8a6d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 29 Dec 2022 19:47:28 +0800 Subject: [PATCH 15/24] enh: add vndCommitMaxInterval cfg param --- include/common/tglobal.h | 3 +++ include/libs/sync/sync.h | 3 +-- source/common/src/tglobal.c | 7 +++++++ source/dnode/vnode/src/vnd/vnodeCommit.c | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/common/tglobal.h b/include/common/tglobal.h index d445fc26e8..f58c9fe055 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -69,6 +69,9 @@ extern int32_t tsElectInterval; extern int32_t tsHeartbeatInterval; extern int32_t tsHeartbeatTimeout; +// vnode +extern int64_t tsVndCommitMaxIntervalMs; + // monitor extern bool tsEnableMonitor; extern int32_t tsMonitorInterval; diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 02287deb73..b134e79442 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -49,8 +49,7 @@ extern "C" { #define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500 #define SYNC_SNAP_RESEND_MS 1000 * 60 -#define SYNC_VND_COMMIT_MIN_MS 200 -#define SYNC_VND_COMMIT_MAX_MS 60000 +#define SYNC_VND_COMMIT_MIN_MS 1000 #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 98b9b566ec..4de79060a7 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -60,6 +60,9 @@ int32_t tsElectInterval = 25 * 1000; int32_t tsHeartbeatInterval = 1000; int32_t tsHeartbeatTimeout = 20 * 1000; +// vnode +int64_t tsVndCommitMaxIntervalMs = 60 * 1000; + // monitor bool tsEnableMonitor = true; int32_t tsMonitorInterval = 30; @@ -427,6 +430,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; + if (cfgAddInt64(pCfg, "vndCommitMaxInterval", tsVndCommitMaxIntervalMs, 1000, 1000 * 60 * 60, 0) != 0) return -1; + if (cfgAddBool(pCfg, "monitor", tsEnableMonitor, 0) != 0) return -1; if (cfgAddInt32(pCfg, "monitorInterval", tsMonitorInterval, 1, 200000, 0) != 0) return -1; if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, 0) != 0) return -1; @@ -741,6 +746,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsHeartbeatInterval = cfgGetItem(pCfg, "syncHeartbeatInterval")->i32; tsHeartbeatTimeout = cfgGetItem(pCfg, "syncHeartbeatTimeout")->i32; + tsVndCommitMaxIntervalMs = cfgGetItem(pCfg, "vndCommitMaxInterval")->i64; + tsStartUdfd = cfgGetItem(pCfg, "udf")->bval; tstrncpy(tsUdfdResFuncs, cfgGetItem(pCfg, "udfdResFuncs")->str, sizeof(tsUdfdResFuncs)); tstrncpy(tsUdfdLdLibPath, cfgGetItem(pCfg, "udfdLdLibPath")->str, sizeof(tsUdfdLdLibPath)); diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 3738966122..47ca9eb1b6 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -61,7 +61,7 @@ int vnodeBegin(SVnode *pVnode) { void vnodeUpdCommitSched(SVnode *pVnode) { int64_t randNum = taosRand(); pVnode->commitSched.commitMs = taosGetMonoTimestampMs(); - pVnode->commitSched.maxWaitMs = SYNC_VND_COMMIT_MAX_MS + (randNum % SYNC_VND_COMMIT_MAX_MS); + pVnode->commitSched.maxWaitMs = tsVndCommitMaxIntervalMs + (randNum % tsVndCommitMaxIntervalMs); } int vnodeShouldCommit(SVnode *pVnode) { From 4718f4d1b6a4e2ff587f1c5d0560a43e2bfa5edb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Dec 2022 17:24:44 +0800 Subject: [PATCH 16/24] fix: resend acked msgs on exceeding maximum retryWaitMs --- source/libs/sync/src/syncPipeline.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index a9b5aadaa5..e176735d56 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -31,6 +31,10 @@ static bool syncIsMsgBlock(tmsg_t type) { (type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM); } +FORCE_INLINE static int64_t syncGetRetryMaxWaitMs() { + return SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); +} + int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf) { taosThreadMutexLock(&pBuf->mutex); int64_t index = pBuf->endIndex; @@ -627,7 +631,7 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { break; } - if (pMgr->states[pos].acked) { + if (pMgr->states[pos].acked && nowMs < pMgr->states[pos].timeMs + syncGetRetryMaxWaitMs()) { continue; } @@ -791,7 +795,7 @@ int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index) { ASSERT(!pMgr->restored); ASSERT(pMgr->startIndex >= 0); - int64_t retryMaxWaitMs = SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); + int64_t retryMaxWaitMs = syncGetRetryMaxWaitMs(); int64_t nowMs = taosGetMonoTimestampMs(); if (pMgr->endIndex > pMgr->startIndex && @@ -834,9 +838,11 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int32_t count = 0; int64_t nowMs = taosGetMonoTimestampMs(); int64_t limit = pMgr->size >> 1; + SyncTerm term = -1; + SyncIndex firstIndex = -1; for (SyncIndex index = pMgr->endIndex; index <= pNode->pLogBuf->matchIndex; index++) { - if (batchSize < count++ || limit <= index - pMgr->startIndex) { + if (batchSize < count || limit <= index - pMgr->startIndex) { break; } if (pMgr->startIndex + 1 < index && pMgr->states[(index - 1) % pMgr->size].barrier) { @@ -845,7 +851,6 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int64_t pos = index % pMgr->size; SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; bool barrier = false; - SyncTerm term = -1; if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate log entry since %s. index: %" PRId64 ", dest: 0x%016" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); @@ -856,6 +861,9 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p pMgr->states[pos].term = term; pMgr->states[pos].acked = false; + if (firstIndex == -1) firstIndex = index; + count++; + pMgr->endIndex = index + 1; if (barrier) { sInfo("vgId:%d, replicated sync barrier to dest: %" PRIx64 ". index: %" PRId64 ", term: %" PRId64 @@ -869,10 +877,11 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p syncLogReplMgrRetryOnNeed(pMgr, pNode); SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, attempted to replicate %d msgs to the %d'th peer. pMgr(rs:%d): [%" PRId64 " %" PRId64 ", %" PRId64 - "), pBuf: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", - pNode->vgId, count, pMgr->peerId, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, - pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); + sTrace("vgId:%d, replicated %d msgs to peer: %" PRId64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 + ", mgr: (rs:%d) [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 + ")", + pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, + pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); return 0; } From eb524e610a84ab368854aa0ec757577c1cdeca68 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Dec 2022 19:02:46 +0800 Subject: [PATCH 17/24] enh: reset sync log replication on stagnation for eight times maxRetryWaitMs --- source/libs/sync/src/syncPipeline.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index e176735d56..0eff028cad 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -631,7 +631,12 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { break; } - if (pMgr->states[pos].acked && nowMs < pMgr->states[pos].timeMs + syncGetRetryMaxWaitMs()) { + if (pMgr->states[pos].acked) { + if (pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { + syncLogReplMgrReset(pMgr); + sWarn("vgId:%d, reset sync log repl mgr since stagnation. peer: %" PRIx64, pNode->vgId, pDestId->addr); + goto _out; + } continue; } From 03f4b12386b4a76c8a4860fb641dd3bc1b6147eb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Dec 2022 19:35:57 +0800 Subject: [PATCH 18/24] enh: print peer addr in hex format --- source/libs/sync/src/syncPipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 0eff028cad..7b8d5bed28 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -882,7 +882,7 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p syncLogReplMgrRetryOnNeed(pMgr, pNode); SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, replicated %d msgs to peer: %" PRId64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 + sTrace("vgId:%d, replicated %d msgs to peer: %" PRIx64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 ", mgr: (rs:%d) [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, From 3edde0eadfec0576cf0b38d1ec8706b95903656d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sun, 1 Jan 2023 21:05:57 +0800 Subject: [PATCH 19/24] fix: tdbAbort on metaClose or streamMetaClose instead of tdbTxnClose --- source/dnode/vnode/src/meta/metaOpen.c | 2 +- source/libs/stream/src/streamMeta.c | 2 +- source/libs/tdb/src/db/tdbPage.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 8974d93678..867b481bcc 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -203,7 +203,7 @@ _err: int metaClose(SMeta *pMeta) { if (pMeta) { - if (pMeta->txn) tdbTxnClose(pMeta->txn); + if (pMeta->pEnv) tdbAbort(pMeta->pEnv, pMeta->txn); if (pMeta->pCache) metaCacheClose(pMeta); if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 56da86654c..7c415053e1 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -69,7 +69,7 @@ _err: } void streamMetaClose(SStreamMeta* pMeta) { - tdbTxnClose(pMeta->txn); + tdbAbort(pMeta->db, pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index d35f05461d..50dc8e0a65 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -77,7 +77,7 @@ int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) u8 *ptr; tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree); - // ASSERT(!pPage->isDirty); + ASSERT(!pPage->isDirty); ASSERT(xFree); for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { From cb433c191bc6a7b6f617885b3e492ac5f297385e Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 2 Jan 2023 08:58:13 +0800 Subject: [PATCH 20/24] enh: reset as stagnation only when not matched in syncLogReplMgrRetryOnNeed --- source/libs/sync/src/syncPipeline.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 7b8d5bed28..04a9f9728f 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -632,9 +632,10 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { } if (pMgr->states[pos].acked) { - if (pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { + if (pMgr->matchIndex < index && pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { syncLogReplMgrReset(pMgr); - sWarn("vgId:%d, reset sync log repl mgr since stagnation. peer: %" PRIx64, pNode->vgId, pDestId->addr); + sWarn("vgId:%d, reset sync log repl mgr since stagnation. index: %" PRId64 ", peer: %" PRIx64, pNode->vgId, + index, pDestId->addr); goto _out; } continue; From 2baa71883d3f84c5de15d2c4a634b39f0468ed61 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 2 Jan 2023 10:06:20 +0800 Subject: [PATCH 21/24] enh: logging states of sync log repl mgrs and the ring buffer in syncPrintNodeLog --- source/libs/sync/src/syncUtil.c | 84 +++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 525681e53e..9db04ce698 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -17,6 +17,7 @@ #include "syncUtil.h" #include "syncIndexMgr.h" #include "syncMessage.h" +#include "syncPipeline.h" #include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncSnapshot.h" @@ -163,45 +164,67 @@ bool syncUtilUserPreCommit(tmsg_t msgType) { return msgType != TDMT_SYNC_NOOP && bool syncUtilUserRollback(tmsg_t msgType) { return msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_LEADER_TRANSFER; } void syncCfg2SimpleStr(const SSyncCfg* pCfg, char* buf, int32_t bufLen) { - int32_t len = snprintf(buf, bufLen, "{r-num:%d, my:%d, ", pCfg->replicaNum, pCfg->myIndex); - + int32_t len = snprintf(buf, bufLen, "{num:%d, idx:%d, [", pCfg->replicaNum, pCfg->myIndex); for (int32_t i = 0; i < pCfg->replicaNum; ++i) { + len += snprintf(buf + len, bufLen - len, "%s:%d", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); if (i < pCfg->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%s:%d, ", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); - } else { - len += snprintf(buf + len, bufLen - len, "%s:%d}", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); + len += snprintf(buf + len, bufLen - len, "%s", ", "); } } + len += snprintf(buf + len, bufLen - len, "%s", "]}"); } // for leader static void syncHearbeatReplyTime2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { - int32_t len = 5; - + int32_t len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { int64_t tsMs = syncIndexMgrGetRecvTime(pSyncNode->pMatchIndex, &(pSyncNode->replicasId[i])); - + len += snprintf(buf + len, bufLen - len, "%d:%" PRId64, i, tsMs); if (i < pSyncNode->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 ",", i, tsMs); - } else { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 "}", i, tsMs); + len += snprintf(buf + len, bufLen - len, "%s", ","); } } + len += snprintf(buf + len, bufLen - len, "%s", "}"); } // for follower static void syncHearbeatTime2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { - int32_t len = 4; - + int32_t len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { int64_t tsMs = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->replicasId[i])); - + len += snprintf(buf + len, bufLen - len, "%d:%" PRId64, i, tsMs); if (i < pSyncNode->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 ",", i, tsMs); - } else { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 "}", i, tsMs); + len += snprintf(buf + len, bufLen - len, "%s", ","); } } + len += snprintf(buf + len, bufLen - len, "%s", "}"); +} + +static void syncLogBufferStates2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { + SSyncLogBuffer* pBuf = pSyncNode->pLogBuf; + if (pBuf == NULL) { + return; + } + int len = 0; + len += snprintf(buf + len, bufLen - len, "[%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pBuf->startIndex, + pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); +} + +static void syncLogReplMgrStates2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { + int len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); + for (int32_t i = 0; i < pSyncNode->replicaNum; i++) { + SSyncLogReplMgr* pMgr = pSyncNode->logReplMgrs[i]; + if (pMgr == NULL) break; + len += snprintf(buf + len, bufLen - len, "%d:%d [%" PRId64 " %" PRId64 ", %" PRId64 ")", i, pMgr->restored, + pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex); + if (i + 1 < pSyncNode->replicaNum) { + len += snprintf(buf + len, bufLen - len, "%s", ", "); + } + } + len += snprintf(buf + len, bufLen - len, "%s", "}"); } static void syncPeerState2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { @@ -243,20 +266,23 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo int32_t cacheHit = pNode->pLogStore->cacheHit; int32_t cacheMiss = pNode->pLogStore->cacheMiss; - char cfgStr[1024]; + char cfgStr[1024] = ""; if (pNode->pRaftCfg != NULL) { syncCfg2SimpleStr(&(pNode->pRaftCfg->cfg), cfgStr, sizeof(cfgStr)); } else { return; } - char peerStr[1024] = "{"; - syncPeerState2Str(pNode, peerStr, sizeof(peerStr)); + char replMgrStatesStr[1024] = ""; + syncLogReplMgrStates2Str(pNode, replMgrStatesStr, sizeof(replMgrStatesStr)); - char hbrTimeStr[256] = "hbr:{"; + char bufferStatesStr[256] = ""; + syncLogBufferStates2Str(pNode, bufferStatesStr, sizeof(bufferStatesStr)); + + char hbrTimeStr[256] = ""; syncHearbeatReplyTime2Str(pNode, hbrTimeStr, sizeof(hbrTimeStr)); - char hbTimeStr[256] = "hb:{"; + char hbTimeStr[256] = ""; syncHearbeatTime2Str(pNode, hbTimeStr, sizeof(hbTimeStr)); int32_t quorum = syncNodeDynamicQuorum(pNode); @@ -279,16 +305,16 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo taosPrintLog(flags, level, dflag, "vgId:%d, %s, sync:%s, term:%" PRIu64 ", commit-index:%" PRId64 ", first-ver:%" PRId64 ", last-ver:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64 ", snap-term:%" PRIu64 - ", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hit:%d, mis:%d, hb-slow:%d, hbr-slow:%d, " + ", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hb-slow:%d, hbr-slow:%d, " "aq-items:%d, snaping:%" PRId64 ", replicas:%d, last-cfg:%" PRId64 - ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s", + ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 + ", buffer:%s, log-repl-mgrs:%s, members:%s, hb:%s, hb-reply:%s", pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum, - pNode->becomeLeaderNum, pNode->configChangeNum, cacheHit, cacheMiss, pNode->hbSlowNum, - pNode->hbrSlowNum, aqItems, pNode->snapshottingIndex, pNode->replicaNum, - pNode->pRaftCfg->lastConfigIndex, pNode->changing, pNode->restoreFinish, quorum, - pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr, - hbrTimeStr); + pNode->becomeLeaderNum, pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum, aqItems, + pNode->snapshottingIndex, pNode->replicaNum, pNode->pRaftCfg->lastConfigIndex, pNode->changing, + pNode->restoreFinish, quorum, pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, + bufferStatesStr, replMgrStatesStr, cfgStr, hbTimeStr, hbrTimeStr); } } From bf6dc99461c74b30ae1ab3154e88cbc2e1480d9d Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 4 Jan 2023 11:48:30 +0800 Subject: [PATCH 22/24] fix: adjust sync logs --- source/dnode/mnode/impl/src/mndMnode.c | 4 ++-- source/dnode/mnode/sdb/src/sdbFile.c | 2 +- source/libs/sync/src/syncEnv.c | 2 +- source/libs/sync/src/syncMain.c | 13 +++++++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index c8c8e06c5e..9b3934c40c 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -785,9 +785,9 @@ static void mndReloadSyncConfig(SMnode *pMnode) { int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg); if (code != 0) { - mError("vgId:1, failed to reconfig mnode sync since %s", terrstr()); + mError("vgId:1, mnode sync reconfig failed since %s", terrstr()); } else { - mInfo("vgId:1, reconfig mnode sync success"); + mInfo("vgId:1, mnode sync reconfig success"); } } } diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index f43b6bdb25..b797e07e13 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -243,7 +243,7 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { if (pFile == NULL) { taosMemoryFree(pRaw); terrno = TAOS_SYSTEM_ERROR(errno); - mDebug("failed to read sdb file:%s since %s", file, terrstr()); + mInfo("read sdb file:%s finished since %s", file, terrstr()); return 0; } diff --git a/source/libs/sync/src/syncEnv.c b/source/libs/sync/src/syncEnv.c index 0d6d0f93f1..1fa67cfa4d 100644 --- a/source/libs/sync/src/syncEnv.c +++ b/source/libs/sync/src/syncEnv.c @@ -114,7 +114,7 @@ void syncHbTimerDataRemove(int64_t rid) { taosRemoveRef(gHbDataRefId, rid); } SSyncHbTimerData *syncHbTimerDataAcquire(int64_t rid) { SSyncHbTimerData *pData = taosAcquireRef(gHbDataRefId, rid); - if (pData == NULL) { + if (pData == NULL && rid > 0) { sInfo("failed to acquire hb-timer-data from refId:%" PRId64, rid); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 017806015b..6dcc7d3742 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1456,16 +1456,21 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pNode, SRpcMsg } } + int32_t code = -1; if (pNode->syncSendMSg != NULL && epSet != NULL) { syncUtilMsgHtoN(pMsg->pCont); pMsg->info.noResp = 1; - return pNode->syncSendMSg(epSet, pMsg); - } else { - sError("vgId:%d, sync send msg by id error, fp:%p epset:%p", pNode->vgId, pNode->syncSendMSg, epSet); + code = pNode->syncSendMSg(epSet, pMsg); + } + + if (code < 0) { + sError("vgId:%d, sync send msg by id error, epset:%p dnode:%d addr:%" PRId64 " err:0x%x", pNode->vgId, epSet, + DID(destRaftId), destRaftId->addr, terrno); rpcFreeCont(pMsg->pCont); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; } + + return code; } inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) { From 95a6720b2a1bb39b607d918589bf149736ffd1f9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 8 Jan 2023 23:02:16 +0800 Subject: [PATCH 23/24] fix(query): do not merge rows in last file when no data blocks exist. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 57 +++++++++++++------ tests/script/tsim/parser/regressiontest.sim | 61 +++++++++++++++++++++ 2 files changed, 102 insertions(+), 16 deletions(-) create mode 100644 tests/script/tsim/parser/regressiontest.sim diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index cb31890573..dcfc78fd1a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2401,6 +2401,19 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock return code; } +static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { + SSDataBlock* pResBlock = pReader->pResBlock; + + pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; + pResBlock->info.dataLoad = 1; + blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); + + setComposedBlockFlag(pReader, true); + + pReader->cost.composedBlocks += 1; + pReader->cost.buildComposedBlockTime += el; +} + static int32_t buildComposedDataBlock(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; @@ -2412,6 +2425,7 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pReader->order); int64_t st = taosGetTimestampUs(); int32_t step = asc ? 1 : -1; + double el = 0; STableBlockScanInfo* pBlockScanInfo = NULL; if (pBlockInfo != NULL) { @@ -2473,10 +2487,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { } } - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - // no data in last block and block, no need to proceed. - if ((hasBlockData == false) && (hasBlockLData == false)) { + if (hasBlockData == false) { break; } @@ -2495,15 +2507,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { } _end: - pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; - pResBlock->info.dataLoad = 1; - blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); - - setComposedBlockFlag(pReader, true); - double el = (taosGetTimestampUs() - st) / 1000.0; - - pReader->cost.composedBlocks += 1; - pReader->cost.buildComposedBlockTime += el; + el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pBlockScanInfo); if (pResBlock->info.rows > 0) { tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 @@ -2748,6 +2753,8 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { return code; } + SSDataBlock* pResBlock = pReader->pResBlock; + while (1) { // load the last data block of current table STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; @@ -2758,15 +2765,33 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { if (!hasNexTable) { return TSDB_CODE_SUCCESS; } + continue; } - code = doBuildDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; + int64_t st = taosGetTimestampUs(); + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (pResBlock->info.rows >= pReader->capacity) { + break; + } } - if (pReader->pResBlock->info.rows > 0) { + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 + " rows:%d, elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); return TSDB_CODE_SUCCESS; } diff --git a/tests/script/tsim/parser/regressiontest.sim b/tests/script/tsim/parser/regressiontest.sim new file mode 100644 index 0000000000..440b8f6129 --- /dev/null +++ b/tests/script/tsim/parser/regressiontest.sim @@ -0,0 +1,61 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +$dbPrefix = reg_db +$tb = tb +$rowNum = 8200 + +$ts0 = 1537146000000 +$delta = 100 +print ========== reg.sim +$i = 0 +$db = $dbPrefix . $i + +sql drop database if exists $db -x step1 +step1: +sql create database $db vgroups 1; + +sql use $db +sql create table $tb (ts timestamp, c1 int) + +$i = 0 +$ts = $ts0 + +$x = 0 +while $x < $rowNum +$xs = $x * $delta +$ts = $ts0 + $xs +sql insert into $tb values ( $ts , $x ) +$x = $x + 1 +endw + +print ================== restart server to commit data into disk +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print ================== server restart completed +sql connect + +sql use $db +sql delete from $tb where ts=1537146000000 +sql delete from $tb where ts=1537146409500 + +print =========================> TS-2410 +sql select * from $tb limit 20 offset 4090 +print $data00 +print $data10 +print $data20 +print $data30 +print $data40 +print $data50 +print $data60 +print $data70 +print $data80 +print $data90 + +if $data40 != @18-09-17 09:06:49.500@ then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT From 14c80364fb0998902f78cd2e9a14c1b19e1b6f33 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 8 Jan 2023 23:06:53 +0800 Subject: [PATCH 24/24] test:update sim script. --- tests/script/tsim/parser/regressiontest.sim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/script/tsim/parser/regressiontest.sim b/tests/script/tsim/parser/regressiontest.sim index 440b8f6129..98cb0248a1 100644 --- a/tests/script/tsim/parser/regressiontest.sim +++ b/tests/script/tsim/parser/regressiontest.sim @@ -54,7 +54,7 @@ print $data70 print $data80 print $data90 -if $data40 != @18-09-17 09:06:49.500@ then +if $data40 != @18-09-17 09:06:49.600@ then return -1 endi