From d6117935371f1a0cc0d29a42ed5e772b3e65d576 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 18 Jan 2023 10:07:05 +0800 Subject: [PATCH 01/20] enh: remove unused old routines in sync --- source/libs/sync/src/syncAppendEntries.c | 292 -------------- source/libs/sync/src/syncAppendEntriesReply.c | 60 --- source/libs/sync/src/syncCommit.c | 220 +--------- source/libs/sync/src/syncElection.c | 18 +- source/libs/sync/src/syncMain.c | 379 ++---------------- source/libs/sync/src/syncPipeline.c | 5 +- source/libs/sync/src/syncReplication.c | 138 ------- 7 files changed, 47 insertions(+), 1065 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index e77a8d4be3..835e5d248e 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -89,45 +89,6 @@ // /\ UNCHANGED <> // -int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { - ASSERT(false && "deprecated"); - if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { - sNTrace(ths, "can not do follower commit"); - return -1; - } - - // maybe update commit index, leader notice me - if (newCommitIndex > ths->commitIndex) { - // has commit entry in local - if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - // advance commit index to sanpshot first - SSnapshot snapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) { - SyncIndex commitBegin = ths->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - ths->commitIndex = snapshot.lastApplyIndex; - sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - SyncIndex beginIndex = ths->commitIndex + 1; - SyncIndex endIndex = newCommitIndex; - - // update commit index - ths->commitIndex = newCommitIndex; - - // call back Wal - int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex); - ASSERT(code == 0); - - code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state); - ASSERT(code == 0); - } - } - - return 0; -} - SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) { SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen); if (pEntry == NULL) { @@ -232,256 +193,3 @@ _IGNORE: rpcFreeCont(rpcRsp.pCont); return 0; } - -int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { - SyncAppendEntries* pMsg = pRpcMsg->pCont; - SRpcMsg rpcRsp = {0}; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { - syncLogRecvAppendEntries(ths, pMsg, "not in my config"); - goto _IGNORE; - } - - // prepare response msg - int32_t code = syncBuildAppendEntriesReply(&rpcRsp, ths->vgId); - if (code != 0) { - syncLogRecvAppendEntries(ths, pMsg, "build rsp error"); - goto _IGNORE; - } - - SyncAppendEntriesReply* pReply = rpcRsp.pCont; - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->raftStore.currentTerm; - pReply->success = false; - // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - pReply->matchIndex = SYNC_INDEX_INVALID; - pReply->lastSendIndex = pMsg->prevLogIndex + 1; - pReply->startTime = ths->startTime; - - if (pMsg->term < ths->raftStore.currentTerm) { - syncLogRecvAppendEntries(ths, pMsg, "reject, small term"); - goto _SEND_RESPONSE; - } - - if (pMsg->term > ths->raftStore.currentTerm) { - pReply->term = pMsg->term; - } - - syncNodeStepDown(ths, pMsg->term); - syncNodeResetElectTimer(ths); - - SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore); - SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - - if (pMsg->prevLogIndex > lastIndex) { - syncLogRecvAppendEntries(ths, pMsg, "reject, index not match"); - goto _SEND_RESPONSE; - } - - if (pMsg->prevLogIndex >= startIndex) { - SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1); - // ASSERT(myPreLogTerm != SYNC_TERM_INVALID); - if (myPreLogTerm == SYNC_TERM_INVALID) { - syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term invalid"); - goto _SEND_RESPONSE; - } - - if (myPreLogTerm != pMsg->prevLogTerm) { - syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match"); - goto _SEND_RESPONSE; - } - } - - // accept - pReply->success = true; - bool hasAppendEntries = pMsg->dataLen > 0; - if (hasAppendEntries) { - SSyncRaftEntry* pAppendEntry = syncEntryBuildFromAppendEntries(pMsg); - ASSERT(pAppendEntry != NULL); - - SyncIndex appendIndex = pMsg->prevLogIndex + 1; - - LRUHandle* hLocal = NULL; - LRUHandle* hAppend = NULL; - - int32_t code = 0; - SSyncRaftEntry* pLocalEntry = NULL; - SLRUCache* pCache = ths->pLogStore->pCache; - hLocal = taosLRUCacheLookup(pCache, &appendIndex, sizeof(appendIndex)); - if (hLocal) { - pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal); - code = 0; - - ths->pLogStore->cacheHit++; - sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry); - - } else { - ths->pLogStore->cacheMiss++; - sNTrace(ths, "miss cache index:%" PRId64, appendIndex); - - code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry); - } - - if (code == 0) { - // get local entry success - - if (pLocalEntry->term == pAppendEntry->term) { - // do nothing - sNTrace(ths, "log match, do nothing, index:%" PRId64, appendIndex); - - } else { - // truncate - code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - ASSERT(pAppendEntry->index == appendIndex); - - // append - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend); - } - - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // log not exist - - // truncate - code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - syncEntryDestroy(pLocalEntry); - syncEntryDestroy(pAppendEntry); - goto _IGNORE; - } - - // append - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend); - - } else { - // get local entry success - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64 " err:%d", appendIndex, - terrno); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - } - - // update match index - pReply->matchIndex = pAppendEntry->index; - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - } else { - // no append entries, do nothing - // maybe has extra entries, no harm - - // update match index - pReply->matchIndex = pMsg->prevLogIndex; - } - - // maybe update commit index, leader notice me - syncNodeFollowerCommit(ths, pMsg->commitIndex); - - syncLogRecvAppendEntries(ths, pMsg, "accept"); - goto _SEND_RESPONSE; - -_IGNORE: - rpcFreeCont(rpcRsp.pCont); - return 0; - -_SEND_RESPONSE: - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp); - return 0; -} diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 8157a5a14f..44a29da3ea 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } return 0; } - -int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { - syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config"); - return 0; - } - - // drop stale response - if (pMsg->term < ths->raftStore.currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); - return 0; - } - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term > ths->raftStore.currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); - syncNodeStepDown(ths, pMsg->term); - return -1; - } - - ASSERT(pMsg->term == ths->raftStore.currentTerm); - - if (pMsg->success) { - SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - if (pMsg->matchIndex > oldMatchIndex) { - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); - syncMaybeAdvanceCommitIndex(ths); - - // maybe update minMatchIndex - ths->minMatchIndex = syncMinMatchIndex(ths); - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); - - } else { - SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - if (nextIndex > SYNC_INDEX_BEGIN) { - --nextIndex; - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - } - - // send next append entries - SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId)); - ASSERT(pState != NULL); - - if (pMsg->lastSendIndex == pState->lastSendIndex) { - int64_t timeNow = taosGetTimestampMs(); - int64_t elapsed = timeNow - pState->lastSendTime; - sNTrace(ths, "sync-append-entries rtt elapsed:%" PRId64 ", index:%" PRId64, elapsed, pState->lastSendIndex); - - syncNodeReplicateOne(ths, &(pMsg->srcId), true); - } - } - - syncLogRecvAppendEntriesReply(ths, pMsg, "process"); - return 0; -} diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 286cf4daf5..67ed1e0701 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -43,148 +43,6 @@ // IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex] // /\ UNCHANGED <> // -void syncOneReplicaAdvance(SSyncNode* pSyncNode) { - ASSERT(false && "deprecated"); - if (pSyncNode == NULL) { - sError("pSyncNode is NULL"); - return; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - sNError(pSyncNode, "not leader, can not advance commit index"); - return; - } - - if (pSyncNode->replicaNum != 1) { - sNError(pSyncNode, "not one replica, can not advance commit index"); - return; - } - - // advance commit index to snapshot first - SSnapshot snapshot; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { - SyncIndex commitBegin = pSyncNode->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - pSyncNode->commitIndex = snapshot.lastApplyIndex; - sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - // advance commit index as large as possible - SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); - if (lastIndex > pSyncNode->commitIndex) { - sNTrace(pSyncNode, "commit by wal from index:%" PRId64 " to index:%" PRId64, pSyncNode->commitIndex + 1, lastIndex); - pSyncNode->commitIndex = lastIndex; - } - - // call back Wal - SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); - if (pSyncNode->commitIndex > walCommitVer) { - pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); - } -} - -void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { - ASSERTS(false, "deprecated"); - if (pSyncNode == NULL) { - sError("pSyncNode is NULL"); - return; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - sNError(pSyncNode, "not leader, can not advance commit index"); - return; - } - - // advance commit index to sanpshot first - SSnapshot snapshot; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { - SyncIndex commitBegin = pSyncNode->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - pSyncNode->commitIndex = snapshot.lastApplyIndex; - sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - // update commit index - SyncIndex newCommitIndex = pSyncNode->commitIndex; - for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) { - bool agree = syncAgree(pSyncNode, index); - - if (agree) { - // term - SSyncRaftEntry* pEntry = NULL; - SLRUCache* pCache = pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index)); - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - - pSyncNode->pLogStore->cacheHit++; - sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry); - - } else { - pSyncNode->pLogStore->cacheMiss++; - sNTrace(pSyncNode, "miss cache index:%" PRId64, index); - - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); - if (code != 0) { - sNError(pSyncNode, "advance commit index error, read wal index:%" PRId64, index); - return; - } - } - // cannot commit, even if quorum agree. need check term! - if (pEntry->term <= pSyncNode->raftStore.currentTerm) { - // update commit index - newCommitIndex = index; - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - break; - } else { - sNTrace(pSyncNode, "can not commit due to term not equal, index:%" PRId64 ", term:%" PRIu64, pEntry->index, - pEntry->term); - } - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - } - } - - // advance commit index as large as possible - SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); - if (walCommitVer > newCommitIndex) { - newCommitIndex = walCommitVer; - } - - // maybe execute fsm - if (newCommitIndex > pSyncNode->commitIndex) { - SyncIndex beginIndex = pSyncNode->commitIndex + 1; - SyncIndex endIndex = newCommitIndex; - - // update commit index - pSyncNode->commitIndex = newCommitIndex; - - // call back Wal - pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); - - // execute fsm - if (pSyncNode != NULL && pSyncNode->pFsm != NULL) { - int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); - if (code != 0) { - sNError(pSyncNode, "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, beginIndex, - endIndex); - return; - } - } - } -} bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) { // I am leader, I agree @@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) { return c; } -int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { - return pSyncNode->quorum; - -#if 0 - int32_t quorum = 1; // self - - int64_t timeNow = taosGetTimestampMs(); - for (int i = 0; i < pSyncNode->peersNum; ++i) { - int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]); - - int64_t recvTimeDiff = TABS(peerRecvTime - timeNow); - int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime); - int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode)); - - /* - int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow); - int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime); - int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode)); - */ - - int32_t addQuorum = 0; - - if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 1; - } else { - if (logDiff < SYNC_ADD_QUORUM_COUNT) { - addQuorum = 1; - } else { - addQuorum = 0; - } - } - } else { - addQuorum = 0; - } - - /* - if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - addQuorum = 1; - } else { - addQuorum = 0; - } - - if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 0; - } - */ - - quorum += addQuorum; - } - - ASSERT(quorum <= pSyncNode->replicaNum); - - if (quorum < pSyncNode->quorum) { - quorum = pSyncNode->quorum; - } - - return quorum; -#endif -} - -/* -bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { - int agreeCount = 0; - for (int i = 0; i < pSyncNode->replicaNum; ++i) { - if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) { - ++agreeCount; - } - if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) { - return true; - } - } - return false; -} -*/ +int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return pSyncNode->quorum; } bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index) { int count = 0; diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index cd3ffc33e3..682ace83ec 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { for (int i = 0; i < pNode->peersNum; ++i) { SRpcMsg rpcMsg = {0}; ret = syncBuildRequestVote(&rpcMsg, pNode->vgId); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to build request-vote msg since %s", pNode->vgId, terrstr()); + continue; + } SyncRequestVote* pMsg = rpcMsg.pCont; pMsg->srcId = pNode->myRaftId; @@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { pMsg->term = pNode->raftStore.currentTerm; ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to get index and term of last log since %s", pNode->vgId, terrstr()); + continue; + } ret = syncNodeSendMsgById(&pNode->peersId[i], pNode, &rpcMsg); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to send msg to peerId:%" PRId64, pNode->vgId, pNode->peersId[i].addr); + continue; + } } - - return ret; + return 0; } int32_t syncNodeElect(SSyncNode* pSyncNode) { diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 02f9795cad..07e781f420 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) { return state; } -#if 0 -int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) { - if (index < SYNC_INDEX_BEGIN) { - return -1; - } - - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - - SSyncRaftEntry* pEntry = NULL; - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); - if (code != 0) { - if (pEntry != NULL) { - syncEntryDestroy(pEntry); - } - syncNodeRelease(pSyncNode); - return -1; - } - ASSERT(pEntry != NULL); - - pSnapshot->data = NULL; - pSnapshot->lastApplyIndex = index; - pSnapshot->lastApplyTerm = pEntry->term; - pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index); - - syncEntryDestroy(pEntry); - syncNodeRelease(pSyncNode); - return 0; -} - -int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex; - - sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex); - - syncNodeRelease(pSyncNode); - return 0; -} - -int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - - ASSERT(pSyncNode->raftCfg.configIndexCount >= 1); - SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0]; - - for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) { - if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex && - (pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) { - lastIndex = (pSyncNode->raftCfg.configIndexArr)[i]; - } - } - sMeta->lastConfigIndex = lastIndex; - sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex, - sMeta->lastConfigIndex); - - syncNodeRelease(pSyncNode); - return 0; -} -#endif - SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) { ASSERT(pSyncNode->raftCfg.configIndexCount >= 1); SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0]; @@ -1031,9 +959,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->commitIndex = commitIndex; sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); + // restore log store on need if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { + sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr()); goto _error; } + // timer ms init pSyncNode->pingBaseLine = PING_TIMER_MS; pSyncNode->electBaseLine = tsElectInterval; @@ -1096,10 +1027,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->changing = false; // replication mgr - syncNodeLogReplMgrInit(pSyncNode); + if (syncNodeLogReplMgrInit(pSyncNode) < 0) { + sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr()); + goto _error; + } // peer state - syncNodePeerStateInit(pSyncNode); + if (syncNodePeerStateInit(pSyncNode) < 0) { + sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr()); + goto _error; + } // // min match index @@ -1194,27 +1131,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) { int32_t ret = 0; ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); - return ret; -} - -void syncNodeStartOld(SSyncNode* pSyncNode) { - // start raft - if (pSyncNode->replicaNum == 1) { - raftStoreNextTerm(pSyncNode); - syncNodeBecomeLeader(pSyncNode, "one replica start"); - - // Raft 3.6.2 Committing entries from previous terms - syncNodeAppendNoop(pSyncNode); - syncMaybeAdvanceCommitIndex(pSyncNode); - - } else { - syncNodeBecomeFollower(pSyncNode, "first start"); + if (ret != 0) { + sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr()); } - - int32_t ret = 0; - ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); + return ret; } int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -1225,11 +1145,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { // reset elect timer, long enough int32_t electMS = TIMER_MAX_MS; int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr()); + return -1; + } - ret = 0; ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr()); + return -1; + } return ret; } @@ -1818,12 +1743,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { pSyncNode->leaderCache = pSyncNode->myRaftId; for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) { - // maybe overwrite myself, no harm - // just do it! - - // pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; - - // maybe wal is deleted SyncIndex lastIndex; SyncTerm lastTerm; int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm); @@ -1885,7 +1804,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); + bool granted = voteGrantedMajority(pSyncNode->pVotesGranted); + if (!granted) { + sError("vgId:%d, not granted by majority.", pSyncNode->vgId); + return; + } syncNodeBecomeLeader(pSyncNode, "candidate to leader"); sNTrace(pSyncNode, "state change syncNodeCandidate2Leader"); @@ -1901,20 +1824,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); } -void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); - syncNodeBecomeLeader(pSyncNode, "candidate to leader"); - - // Raft 3.6.2 Committing entries from previous terms - syncNodeAppendNoop(pSyncNode); - syncMaybeAdvanceCommitIndex(pSyncNode); - - if (pSyncNode->replicaNum > 1) { - syncNodeReplicate(pSyncNode); - } -} - bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); } int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) { @@ -1960,7 +1869,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) { // need assert void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) { ASSERT(term == pSyncNode->raftStore.currentTerm); - ASSERT(!raftStoreHasVoted(pSyncNode)); + bool voted = raftStoreHasVoted(pSyncNode); + ASSERT(!voted); raftStoreVote(pSyncNode, pRaftId); } @@ -2638,24 +2548,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { return 0; } -int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { - ASSERT(false && "deprecated"); - SyncLocalCmd* pMsg = pRpcMsg->pCont; - syncLogRecvLocalCmd(ths, pMsg, ""); - - if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->currentTerm); - - } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - syncNodeFollowerCommit(ths, pMsg->commitIndex); - - } else { - sError("error local cmd"); - } - - return 0; -} - // TLA+ Spec // ClientRequest(i, v) == // /\ state[i] = Leader @@ -2700,96 +2592,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn } } -int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) { - sNTrace(ths, "on client request"); - - int32_t ret = 0; - int32_t code = 0; - - SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); - SyncTerm term = ths->raftStore.currentTerm; - SSyncRaftEntry* pEntry; - - if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index); - } else { - pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index); - } - - LRUHandle* h = NULL; - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - // append entry - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); - if (code != 0) { - if (ths->replicaNum == 1) { - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return -1; - - } else { - // del resp mgr, call FpCommitCb - SFsmCbMeta cbMeta = { - .index = pEntry->index, - .lastConfigIndex = SYNC_INDEX_INVALID, - .isWeak = pEntry->isWeak, - .code = -1, - .state = ths->state, - .seqNum = pEntry->seqNum, - .term = pEntry->term, - .currentTerm = ths->raftStore.currentTerm, - .flag = 0, - }; - ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta); - - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return -1; - } - } - - syncCacheEntry(ths->pLogStore, pEntry, &h); - - // if mulit replica, start replicate right now - if (ths->replicaNum > 1) { - syncNodeReplicate(ths); - } - - // if only myself, maybe commit right now - if (ths->replicaNum == 1) { - if (syncNodeIsMnode(ths)) { - syncMaybeAdvanceCommitIndex(ths); - } else { - syncOneReplicaAdvance(ths); - } - } - } - - if (pRetIndex != NULL) { - if (ret == 0 && pEntry != NULL) { - *pRetIndex = pEntry->index; - } else { - *pRetIndex = SYNC_INDEX_INVALID; - } - } - - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return ret; -} - const char* syncStr(ESyncState state) { switch (state) { case TAOS_SYNC_STATE_FOLLOWER: @@ -2894,129 +2696,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) { return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1); } -int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { - ASSERT(false); - if (beginIndex > endIndex) { - return 0; - } - - if (ths == NULL) { - return -1; - } - - if (ths->pFsm != NULL && ths->pFsm->FpGetSnapshotInfo != NULL) { - // advance commit index to sanpshot first - SSnapshot snapshot = {0}; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex >= beginIndex) { - sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, beginIndex, snapshot.lastApplyIndex); - - // update begin index - beginIndex = snapshot.lastApplyIndex + 1; - } - } - - int32_t code = 0; - ESyncState state = flag; - - sNTrace(ths, "commit by wal from index:%" PRId64 " to index:%" PRId64, beginIndex, endIndex); - - // execute fsm - if (ths->pFsm != NULL) { - for (SyncIndex i = beginIndex; i <= endIndex; ++i) { - if (i != SYNC_INDEX_INVALID) { - SSyncRaftEntry* pEntry; - SLRUCache* pCache = ths->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &i, sizeof(i)); - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - - ths->pLogStore->cacheHit++; - sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry); - - } else { - ths->pLogStore->cacheMiss++; - sNTrace(ths, "miss cache index:%" PRId64, i); - - code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry); - // ASSERT(code == 0); - // ASSERT(pEntry != NULL); - if (code != 0 || pEntry == NULL) { - sNError(ths, "get log entry error"); - sFatal("vgId:%d, get log entry %" PRId64 " error when commit since %s", ths->vgId, i, terrstr()); - continue; - } - } - - SRpcMsg rpcMsg = {0}; - syncEntry2OriginalRpc(pEntry, &rpcMsg); - - sTrace("do commit index:%" PRId64 ", type:%s", i, TMSG_INFO(pEntry->msgType)); - - // user commit - if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) { - bool internalExecute = true; - if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) { - internalExecute = false; - } - - sNTrace(ths, "user commit index:%" PRId64 ", internal:%d, type:%s", i, internalExecute, - TMSG_INFO(pEntry->msgType)); - - // execute fsm in apply thread, or execute outside syncPropose - if (internalExecute) { - SFsmCbMeta cbMeta = { - .index = pEntry->index, - .lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index), - .isWeak = pEntry->isWeak, - .code = 0, - .state = ths->state, - .seqNum = pEntry->seqNum, - .term = pEntry->term, - .currentTerm = ths->raftStore.currentTerm, - .flag = flag, - }; - - syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); - ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta); - } - } - -#if 0 - // execute in pre-commit - // leader transfer - if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) { - code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry); - ASSERT(code == 0); - } -#endif - - // restore finish - // if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok - if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - if (ths->restoreFinish == false) { - if (ths->pFsm->FpRestoreFinishCb != NULL) { - ths->pFsm->FpRestoreFinishCb(ths->pFsm); - } - ths->restoreFinish = true; - - int64_t restoreDelay = taosGetTimestampMs() - ths->leaderTime; - sNTrace(ths, "restore finish, index:%" PRId64 ", elapsed:%" PRId64 " ms", pEntry->index, restoreDelay); - } - } - - rpcFreeCont(rpcMsg.pCont); - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - } - } - } - return 0; -} - bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) { for (int32_t i = 0; i < ths->replicaNum; ++i) { if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) { diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index b61fc2e90d..b95aa04e40 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -940,8 +940,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) { for (int i = 0; i < TSDB_MAX_REPLICA; i++) { ASSERT(pNode->logReplMgrs[i] == NULL); pNode->logReplMgrs[i] = syncLogReplMgrCreate(); + if (pNode->logReplMgrs[i] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } pNode->logReplMgrs[i]->peerId = i; - ASSERTS(pNode->logReplMgrs[i] != NULL, "Out of memory."); } return 0; } diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index 1aa476e84e..3df203221b 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -48,92 +48,6 @@ int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg); -int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapshot) { - ASSERT(false && "deprecated"); - // next index - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - - if (snapshot) { - // maybe start snapshot - SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); - SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); - if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) { - sNTrace(pSyncNode, "maybe start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, nextIndex, - logStartIndex, logEndIndex); - // start snapshot - int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId); - } - } - - // pre index, pre term - SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); - SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); - - // prepare entry - SRpcMsg rpcMsg = {0}; - SyncAppendEntries* pMsg = NULL; - - SSyncRaftEntry* pEntry = NULL; - SLRUCache* pCache = pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &nextIndex, sizeof(nextIndex)); - int32_t code = 0; - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - code = 0; - - pSyncNode->pLogStore->cacheHit++; - sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry); - - } else { - pSyncNode->pLogStore->cacheMiss++; - sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex); - - code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); - } - - if (code == 0) { - ASSERT(pEntry != NULL); - - code = syncBuildAppendEntries(&rpcMsg, (int32_t)(pEntry->bytes), pSyncNode->vgId); - ASSERT(code == 0); - - pMsg = rpcMsg.pCont; - memcpy(pMsg->data, pEntry, pEntry->bytes); - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // no entry in log - code = syncBuildAppendEntries(&rpcMsg, 0, pSyncNode->vgId); - ASSERT(code == 0); - - pMsg = rpcMsg.pCont; - } else { - sNError(pSyncNode, "replicate to dnode:%d error, next-index:%" PRId64, DID(pDestId), nextIndex); - return -1; - } - } - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - // prepare msg - ASSERT(pMsg != NULL); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = *pDestId; - pMsg->term = pSyncNode->raftStore.currentTerm; - pMsg->prevLogIndex = preLogIndex; - pMsg->prevLogTerm = preLogTerm; - pMsg->commitIndex = pSyncNode->commitIndex; - pMsg->privateTerm = 0; - // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); - - // send msg - syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, &rpcMsg); - return 0; -} - int32_t syncNodeReplicate(SSyncNode* pNode) { SSyncLogBuffer* pBuf = pNode->pLogBuf; taosThreadMutexLock(&pBuf->mutex); @@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) { return 0; } -int32_t syncNodeReplicateOld(SSyncNode* pSyncNode) { - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - return -1; - } - - sNTrace(pSyncNode, "do replicate"); - - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SRaftId* pDestId = &(pSyncNode->peersId[i]); - ret = syncNodeReplicateOne(pSyncNode, pDestId, true); - if (ret != 0) { - sError("vgId:%d, do append entries error for dnode:%d", pSyncNode->vgId, DID(pDestId)); - } - } - - return 0; -} - int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { SyncAppendEntries* pMsg = pRpcMsg->pCont; pMsg->destId = *destRaftId; @@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI return 0; } -int32_t syncNodeSendAppendEntriesOld(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { - int32_t ret = 0; - SyncAppendEntries* pMsg = pRpcMsg->pCont; - if (pMsg == NULL) { - sError("vgId:%d, sync-append-entries msg is NULL", pSyncNode->vgId); - return 0; - } - - SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId); - if (pState == NULL) { - sError("vgId:%d, replica maybe dropped", pSyncNode->vgId); - return 0; - } - - // save index, otherwise pMsg will be free by rpc - SyncIndex saveLastSendIndex = pState->lastSendIndex; - bool update = false; - if (pMsg->dataLen > 0) { - saveLastSendIndex = pMsg->prevLogIndex + 1; - update = true; - } - - syncLogSendAppendEntries(pSyncNode, pMsg, ""); - syncNodeSendMsgById(destRaftId, pSyncNode, pRpcMsg); - - if (update) { - pState->lastSendIndex = saveLastSendIndex; - pState->lastSendTime = taosGetTimestampMs(); - } - - return ret; -} - int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { int32_t ret = 0; SyncAppendEntries* pMsg = pRpcMsg->pCont; From d6028bdde185e073b80d9ef7d9a886e8df9ec905 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 18 Jan 2023 14:59:02 +0800 Subject: [PATCH 02/20] some code --- source/dnode/vnode/src/inc/tsdb.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/sma/smaSnapshot.c | 4 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 1089 +++++++++++++++----- source/dnode/vnode/src/tsdb/tsdbUtil.c | 16 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 2 +- 6 files changed, 831 insertions(+), 283 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 5a2e462c8c..2efb00ae32 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -202,6 +202,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol uint8_t **ppBuf); int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData, uint8_t **ppBuf); +int32_t tRowInfoCmprFn(const void *p1, const void *p2); // tsdbMemTable ============================================================================================== // SMemTable int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 75367883f1..2501af7f04 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -247,7 +247,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); // STsdbSnapWriter ======================================== int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter); -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr); int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter); int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback); // STqSnapshotReader == diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index 34f884f9f9..0a6fac0fe7 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -423,10 +423,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) // rsma1/rsma2 if (pHdr->type == SNAP_DATA_RSMA1) { pHdr->type = SNAP_DATA_TSDB; - code = tsdbSnapWrite(pWriter->pDataWriter[0], pData, nData); + code = tsdbSnapWrite(pWriter->pDataWriter[0], pHdr); } else if (pHdr->type == SNAP_DATA_RSMA2) { pHdr->type = SNAP_DATA_TSDB; - code = tsdbSnapWrite(pWriter->pDataWriter[1], pData, nData); + code = tsdbSnapWrite(pWriter->pDataWriter[1], pHdr); } else if (pHdr->type == SNAP_DATA_QTASK) { code = rsmaSnapWriteQTaskInfo(pWriter, pData, nData); } else { diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 08d52554c6..b2574ddadd 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -15,6 +15,280 @@ #include "tsdb.h" +extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData); + +// STsdbDataIter2 ======================================== +#define TSDB_MEM_TABLE_DATA_ITER 0 +#define TSDB_DATA_FILE_DATA_ITER 1 +#define TSDB_STT_FILE_DATA_ITER 2 + +typedef struct STsdbDataIter2 STsdbDataIter2; +struct STsdbDataIter2 { + STsdbDataIter2* next; + SRBTreeNode rbtn; + + int32_t type; + SRowInfo rowInfo; + union { + // TSDB_MEM_TABLE_DATA_ITER + struct { + SMemTable* pMemTable; + } mIter; + + // TSDB_DATA_FILE_DATA_ITER + struct { + SDataFReader* pReader; + SArray* aBlockIdx; // SArray + SMapData mDataBlk; + SBlockData bData; + int32_t iBlockIdx; + int32_t iDataBlk; + int32_t iRow; + + } dIter; + + // TSDB_STT_FILE_DATA_ITER + struct { + SDataFReader* pReader; + int32_t iStt; + SArray* aSttBlk; + SBlockData bData; + int32_t iSttBlk; + int32_t iRow; + } sIter; + }; +}; + +#define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn))) + +/* open */ +static int32_t tsdbOpenDataFileDataIter(SDataFReader* pReader, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; + + // create handle + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pIter->type = TSDB_DATA_FILE_DATA_ITER; + pIter->dIter.pReader = pReader; + if ((pIter->dIter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataCreate(&pIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iBlockIdx = -1; + pIter->dIter.iDataBlk = -1; + pIter->dIter.iRow = -1; + + // read data + code = tsdbReadBlockIdx(pReader, pIter->dIter.aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pIter->dIter.aBlockIdx) == 0) goto _clear; + +_exit: + if (code) { + if (pIter) { + _clear: + tBlockDataDestroy(&pIter->dIter.bData, 1); + taosArrayDestroy(pIter->dIter.aBlockIdx); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + +static int32_t tsdbOpenSttFileDataIter(SDataFReader* pReader, int32_t iStt, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; + + // create handle + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pIter->type = TSDB_STT_FILE_DATA_ITER; + pIter->sIter.pReader = pReader; + pIter->sIter.iStt = iStt; + pIter->sIter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pIter->sIter.aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataCreate(&pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->sIter.iSttBlk = -1; + pIter->sIter.iRow = -1; + + // read data + code = tsdbReadSttBlk(pReader, iStt, pIter->sIter.aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pIter->sIter.aSttBlk) == 0) goto _clear; + +_exit: + if (code) { + if (pIter) { + _clear: + taosArrayDestroy(pIter->sIter.aSttBlk); + tBlockDataDestroy(&pIter->sIter.bData, 1); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + +/* close */ +static void tsdbCloseDataFileDataIter(STsdbDataIter2* pIter) { + tBlockDataDestroy(&pIter->dIter.bData, 1); + tMapDataClear(&pIter->dIter.mDataBlk); + taosArrayDestroy(pIter->dIter.aBlockIdx); + taosMemoryFree(pIter); +} + +static void tsdbCloseSttFileDataIter(STsdbDataIter2* pIter) { + tBlockDataDestroy(&pIter->sIter.bData, 1); + taosArrayDestroy(pIter->sIter.aSttBlk); + taosMemoryFree(pIter); +} + +static void tsdbCloseDataIter2(STsdbDataIter2* pIter) { + if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { + ASSERT(0); + } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { + tsdbCloseDataFileDataIter(pIter); + } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { + tsdbCloseSttFileDataIter(pIter); + } else { + ASSERT(0); + } +} + +/* cmpr */ +static int32_t tsdbDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { + STsdbDataIter2* pIter1 = TSDB_RBTN_TO_DATA_ITER(pNode1); + STsdbDataIter2* pIter2 = TSDB_RBTN_TO_DATA_ITER(pNode2); + return tRowInfoCmprFn(&pIter1->rowInfo, &pIter2->rowInfo); +} + +/* seek */ + +/* iter next */ +static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter) { + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + if (++pIter->dIter.iRow < pIter->dIter.bData.nRow) { + pIter->rowInfo.suid = pIter->dIter.bData.suid; + pIter->rowInfo.uid = pIter->dIter.bData.uid; + pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow); + break; + } + + for (;;) { + if (++pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iRow = -1; + + break; + } + + for (;;) { + if (++pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) { + SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); + + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iDataBlk = -1; + + break; + } else { + pIter->rowInfo = (SRowInfo){0}; + goto _exit; + } + } + } + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter) { + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + if (++pIter->sIter.iRow < pIter->sIter.bData.nRow) { + pIter->rowInfo.suid = pIter->sIter.bData.suid; + pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow]; + pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow); + break; + } + + if (++pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { + SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk); + + code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->sIter.iRow = -1; + } else { + pIter->rowInfo = (SRowInfo){0}; + break; + } + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter) { + int32_t code = 0; + + if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { + ASSERT(0); + return code; + } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { + return tsdbDataFileDataIterNext(pIter); + } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { + return tsdbSttFileDataIterNext(pIter); + } else { + ASSERT(0); + return code; + } +} + +/* get */ + // STsdbSnapReader ======================================== typedef enum { SNAP_DATA_FILE_ITER = 0, SNAP_STT_FILE_ITER } EFIterT; typedef struct { @@ -63,8 +337,6 @@ struct STsdbSnapReader { uint8_t* aBuf[5]; }; -extern int32_t tRowInfoCmprFn(const void* p1, const void* p2); -extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData); extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { @@ -632,12 +904,9 @@ _exit: // STsdbSnapWriter ======================================== struct STsdbSnapWriter { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - STsdbFS fs; - - // config + STsdb* pTsdb; + int64_t sver; + int64_t ever; int32_t minutes; int8_t precision; int32_t minRow; @@ -646,31 +915,31 @@ struct STsdbSnapWriter { int64_t commitID; uint8_t* aBuf[5]; - // for data file - SBlockData bData; - int32_t fid; - TABLEID id; - SSkmInfo skmTable; - struct { - SDataFReader* pReader; - SArray* aBlockIdx; - int32_t iBlockIdx; - SBlockIdx* pBlockIdx; - SMapData mDataBlk; - int32_t iDataBlk; - SBlockData bData; - int32_t iRow; - } dReader; - struct { - SDataFWriter* pWriter; - SArray* aBlockIdx; - SMapData mDataBlk; - SArray* aSttBlk; - SBlockData bData; - SBlockData sData; - } dWriter; + STsdbFS fs; - // for del file + // time-series data + SBlockData inData; + + int32_t fid; + TABLEID tbid; + SSkmInfo skmTable; + + /* reader */ + SDataFReader* pDataFReader; + STsdbDataIter2* iterList; + STsdbDataIter2* pDIter; + STsdbDataIter2* pIter; + SRBTree rbt; // SRBTree + + /* writer */ + SDataFWriter* pDataFWriter; + SArray* aBlockIdx; + SMapData mDataBlk; // SMapData + SArray* aSttBlk; // SArray + SBlockData bData; + SBlockData sData; + + // tombstone data SDelFReader* pDelFReader; SDelFWriter* pDelFWriter; int32_t iDelIdx; @@ -685,7 +954,9 @@ extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; +#if 0 ASSERT(pWriter->dReader.iRow >= pWriter->dReader.bData.nRow); if (pWriter->dReader.iBlockIdx < taosArrayGetSize(pWriter->dReader.aBlockIdx)) { @@ -702,14 +973,20 @@ static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) { pWriter->dReader.iDataBlk = 0; // point to the next one tBlockDataReset(&pWriter->dReader.bData); pWriter->dReader.iRow = 0; +#endif _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; + int32_t lino = 0; +#if 0 while (true) { if (pWriter->dReader.pBlockIdx == NULL) break; if (tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, pId) >= 0) break; @@ -726,37 +1003,93 @@ static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) { code = tsdbSnapNextTableData(pWriter); if (code) goto _exit; } +#endif _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; + int32_t lino = 0; + if (pWriter->pDIter) { + STsdbDataIter2* pIter = pWriter->pDIter; + for (;;) { + if (pIter->dIter.iBlockIdx + 1 >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { + pWriter->pDIter = NULL; + break; + } + + SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx + 1); + + int32_t c = tTABLEIDCmprFn(pBlockIdx, pId); + if (c < 0) { + ++pIter->dIter.iBlockIdx; + + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); + if (pNewBlockIdx == NULL) { + code == TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pNewBlockIdx->suid = pBlockIdx->suid; + pNewBlockIdx->uid = pBlockIdx->uid; + + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + } else if (c == 0) { + ++pIter->dIter.iBlockIdx; + + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iDataBlk = -1; + + break; + } else { + break; + } + } + } + + pWriter->tbid = pId[0]; + + tMapDataReset(&pWriter->mDataBlk); + +#if 0 code = tsdbSnapWriteCopyData(pWriter, pId); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); pWriter->id.suid = pId->suid; pWriter->id.uid = pId->uid; code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); tMapDataReset(&pWriter->dWriter.mDataBlk); code = tBlockDataInit(&pWriter->dWriter.bData, pId, pWriter->skmTable.pTSchema, NULL, 0); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); +#endif - return code; - -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; +#if 0 if (pWriter->id.suid == 0 && pWriter->id.uid == 0) return code; int32_t c = 1; @@ -807,123 +1140,174 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { pWriter->id.suid = 0; pWriter->id.uid = 0; - - return code; - -_err: - return code; -} - -static int32_t tsdbSnapWriteOpenFile(STsdbSnapWriter* pWriter, int32_t fid) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - - ASSERT(pWriter->dWriter.pWriter == NULL); - - pWriter->fid = fid; - pWriter->id = (TABLEID){0}; - SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - - // Reader - if (pSet) { - code = tsdbDataFReaderOpen(&pWriter->dReader.pReader, pWriter->pTsdb, pSet); - if (code) goto _err; - - code = tsdbReadBlockIdx(pWriter->dReader.pReader, pWriter->dReader.aBlockIdx); - if (code) goto _err; - } else { - ASSERT(pWriter->dReader.pReader == NULL); - taosArrayClear(pWriter->dReader.aBlockIdx); - } - pWriter->dReader.iBlockIdx = 0; // point to the next one - code = tsdbSnapNextTableData(pWriter); - if (code) goto _err; - - // Writer - SHeadFile fHead = {.commitID = pWriter->commitID}; - SDataFile fData = {.commitID = pWriter->commitID}; - SSmaFile fSma = {.commitID = pWriter->commitID}; - SSttFile fStt = {.commitID = pWriter->commitID}; - SDFileSet wSet = {.fid = pWriter->fid, .pHeadF = &fHead, .pDataF = &fData, .pSmaF = &fSma}; - if (pSet) { - wSet.diskId = pSet->diskId; - fData = *pSet->pDataF; - fSma = *pSet->pSmaF; - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - wSet.aSttF[iStt] = pSet->aSttF[iStt]; - } - wSet.nSttF = pSet->nSttF + 1; // TODO: fix pSet->nSttF == pTsdb->maxFile - } else { - SDiskID did = {0}; - tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did); - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - wSet.diskId = did; - wSet.nSttF = 1; - } - wSet.aSttF[wSet.nSttF - 1] = &fStt; - - code = tsdbDataFWriterOpen(&pWriter->dWriter.pWriter, pWriter->pTsdb, &wSet); - if (code) goto _err; - taosArrayClear(pWriter->dWriter.aBlockIdx); - tMapDataReset(&pWriter->dWriter.mDataBlk); - taosArrayClear(pWriter->dWriter.aSttBlk); - tBlockDataReset(&pWriter->dWriter.bData); - tBlockDataReset(&pWriter->dWriter.sData); - - return code; - -_err: - return code; -} - -static int32_t tsdbSnapWriteCloseFile(STsdbSnapWriter* pWriter) { - int32_t code = 0; - - ASSERT(pWriter->dWriter.pWriter); - - code = tsdbSnapWriteTableDataEnd(pWriter); - if (code) goto _err; - - // copy remain table data - TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX}; - code = tsdbSnapWriteCopyData(pWriter, &id); - if (code) goto _err; - - code = - tsdbWriteSttBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.sData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; - - // Indices - code = tsdbWriteBlockIdx(pWriter->dWriter.pWriter, pWriter->dWriter.aBlockIdx); - if (code) goto _err; - - code = tsdbWriteSttBlk(pWriter->dWriter.pWriter, pWriter->dWriter.aSttBlk); - if (code) goto _err; - - code = tsdbUpdateDFileSetHeader(pWriter->dWriter.pWriter); - if (code) goto _err; - - code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->dWriter.pWriter->wSet); - if (code) goto _err; - - code = tsdbDataFWriterClose(&pWriter->dWriter.pWriter, 1); - if (code) goto _err; - - if (pWriter->dReader.pReader) { - code = tsdbDataFReaderClose(&pWriter->dReader.pReader); - if (code) goto _err; - } +#endif _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; +} -_err: +static int32_t tsdbSnapWriteOpenDataFile(STsdbSnapWriter* pWriter, int32_t fid) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid); + + STsdb* pTsdb = pWriter->pTsdb; + + pWriter->fid = fid; + pWriter->tbid = (TABLEID){0}; + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); + + // open reader + pWriter->pDataFReader = NULL; + pWriter->iterList = NULL; + pWriter->pDIter = NULL; + pWriter->pIter = NULL; + tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn); + if (pSet) { + code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter); + TSDB_CHECK_CODE(code, lino, _exit); + if (pWriter->pDIter) { + pWriter->pDIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pDIter; + } + + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pIter); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pIter) { + code = tsdbSttFileDataIterNext(pWriter->pIter); + TSDB_CHECK_CODE(code, lino, _exit); + + // add to tree + tRBTreePut(&pWriter->rbt, &pWriter->pIter->rbtn); + + // add to list + pWriter->pIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pIter; + } + } + + pWriter->pIter = NULL; + } + + // open writer + SDiskID diskId; + if (pSet) { + diskId = pSet->diskId; + } else { + tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId); + tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId); + } + SDFileSet wSet = {.diskId = diskId, + .fid = fid, + .pHeadF = &(SHeadFile){.commitID = pWriter->commitID}, + .pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID}, + .pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID}, + .nSttF = 1, + .aSttF = {&(SSttFile){.commitID = pWriter->commitID}}}; + code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->aBlockIdx) { + taosArrayClear(pWriter->aBlockIdx); + } else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + tMapDataReset(&pWriter->mDataBlk); + + if (pWriter->aSttBlk) { + taosArrayClear(pWriter->aSttBlk); + } else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + tBlockDataReset(&pWriter->bData); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code), + fid); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid); + } + return code; +} + +static int32_t tsdbSnapWriteCloseDataFile(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(pWriter->pDataFWriter); + +#if 0 + // loop write remain data + for (;;) { + SRowInfo* pRowInfo; + + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pRowInfo == NULL) break; + + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteNextRow(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // TODO: write remain data +#endif + + // do file-level updates + code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pDataFReader) { + code = tsdbDataFReaderClose(&pWriter->pDataFReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // TODO: do clear sources + {} + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } return code; } static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, int8_t* done) { int32_t code = 0; + int32_t lino = 0; +#if 0 SBlockData* pBData = &pWriter->bData; TABLEID id = {.suid = pBData->suid, .uid = pBData->uid ? pBData->uid : pBData->aUid[iRow]}; TSDBROW row = tsdbRowFromBlockData(pBData, iRow); @@ -942,10 +1326,10 @@ static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, i int32_t c = tsdbKeyCmprFn(&key, &tKey); if (c < 0) { code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } else if (c > 0) { code = tBlockDataAppendRow(&pWriter->dWriter.bData, &trow, NULL, id.uid); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } else { ASSERT(0); } @@ -953,7 +1337,7 @@ static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, i if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } if (c < 0) { @@ -972,25 +1356,25 @@ static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, i if (c < 0) { code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } else if (c > 0) { code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } *done = 1; goto _exit; } else { code = tsdbReadDataBlockEx(pWriter->dReader.pReader, &dataBlk, &pWriter->dReader.bData); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); pWriter->dReader.iRow = 0; pWriter->dReader.iDataBlk++; @@ -998,18 +1382,20 @@ static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, i } } } +#endif _exit: - return code; - -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) { int32_t code = 0; + int32_t lino = 0; +#if 0 TABLEID id = {.suid = pWriter->bData.suid, .uid = pWriter->bData.uid ? pWriter->bData.uid : pWriter->bData.aUid[iRow]}; TSDBROW row = tsdbRowFromBlockData(&pWriter->bData, iRow); @@ -1017,8 +1403,8 @@ static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) { if (pBData->suid || pBData->uid) { if (!TABLE_SAME_SCHEMA(pBData->suid, pBData->uid, id.suid, id.uid)) { - code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; + code = tsdbWriteSttBlock(pWriter->pDataFWriter, pBData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); pBData->suid = 0; pBData->uid = 0; @@ -1027,105 +1413,263 @@ static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) { if (pBData->suid == 0 && pBData->uid == 0) { code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pWriter->id.suid, pWriter->id.uid, &pWriter->skmTable); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); TABLEID tid = {.suid = pWriter->id.suid, .uid = pWriter->id.suid ? 0 : pWriter->id.uid}; code = tBlockDataInit(pBData, &tid, pWriter->skmTable.pTSchema, NULL, 0); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } code = tBlockDataAppendRow(pBData, &row, NULL, id.uid); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; + code = tsdbWriteSttBlock(pWriter->pDataFWriter, pBData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); } +#endif _exit: - return code; - -_err: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteRowData(STsdbSnapWriter* pWriter, int32_t iRow) { +static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { int32_t code = 0; + int32_t lino = 0; - SBlockData* pBlockData = &pWriter->bData; - TABLEID id = {.suid = pBlockData->suid, .uid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[iRow]}; + if (pWriter->pIter) { + code = tsdbDataIterNext2(pWriter->pIter); + TSDB_CHECK_CODE(code, lino, _exit); - // End last table data write if need - if (tTABLEIDCmprFn(&pWriter->id, &id) != 0) { - code = tsdbSnapWriteTableDataEnd(pWriter); - if (code) goto _err; + if (pWriter->pIter->rowInfo.suid == 0 && pWriter->pIter->rowInfo.uid == 0) { + pWriter->pIter = NULL; + } else { + SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); + if (pNode) { + int32_t c = tsdbDataIterCmprFn(&pWriter->pIter->rbtn, pNode); + if (c > 0) { + tRBTreePut(&pWriter->rbt, &pWriter->pIter->rbtn); + pWriter->pIter = NULL; + } else if (c == 0) { + ASSERT(0); + } + } + } } - // Start new table data write if need - if (pWriter->id.suid == 0 && pWriter->id.uid == 0) { - code = tsdbSnapWriteTableDataStart(pWriter, &id); - if (code) goto _err; + if (pWriter->pIter == NULL) { + SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); + if (pNode) { + tRBTreeDrop(&pWriter->rbt, pNode); + pWriter->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); + } } - // Merge with .data file data - int8_t done = 0; - if (pWriter->dReader.pBlockIdx && tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &id) == 0) { - code = tsdbSnapWriteToDataFile(pWriter, iRow, &done); - if (code) goto _err; - } - - // Append to the .stt data block (todo: check if need to set/reload sst block) - if (!done) { - code = tsdbSnapWriteToSttFile(pWriter, iRow); - if (code) goto _err; + if (ppRowInfo) { + if (pWriter->pIter) { + *ppRowInfo = &pWriter->pIter->rowInfo; + } else { + *ppRowInfo = NULL; + } } _exit: - return code; - -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - SBlockData* pBlockData = &pWriter->bData; +static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { + int32_t code = 0; + int32_t lino = 0; - // Decode data - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - code = tDecmprBlockData(pHdr->data, pHdr->size, pBlockData, pWriter->aBuf); - if (code) goto _err; + if (pWriter->pIter) { + *ppRowInfo = &pWriter->pIter->rowInfo; + goto _exit; + } - ASSERT(pBlockData->nRow > 0); + code = tsdbSnapWriteNextRow(pWriter, ppRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); - // Loop to handle each row - for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { - TSKEY ts = pBlockData->aTSKEY[iRow]; - int32_t fid = tsdbKeyFid(ts, pWriter->minutes, pWriter->precision); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} - if (pWriter->dWriter.pWriter == NULL || pWriter->fid != fid) { - if (pWriter->dWriter.pWriter) { - // ASSERT(fid > pWriter->fid); +static int32_t tsdbSnapWriteRowImpl(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { + int32_t code = 0; + int32_t lino = 0; - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; - } + // TODO + ASSERT(0); - code = tsdbSnapWriteOpenFile(pWriter, fid); - if (code) goto _err; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + code = tBlockDataAppendRow(&pWriter->bData, pRow, NULL, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->bData.nRow >= pWriter->maxRow) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + // switch to new table if need + if (pRowInfo->uid != pWriter->tbid.uid) { + if (pRowInfo->uid) { + code = tsdbSnapWriteTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbSnapWriteRowData(pWriter, iRow); - if (code) goto _err; + code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); } - return code; + // do write the row + if (pWriter->pDIter == NULL /* || false */) { + goto _write_incoming_row; + } else { + for (;;) { + while (pWriter->pDIter->dIter.iRow + 1 < pWriter->pDIter->dIter.bData.nRow) { + TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow + 1); -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write data for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); + int32_t c = tsdbRowCmprFn(&pRowInfo->row, &row); + if (c < 0) { + goto _write_incoming_row; + } else if (c > 0) { + ++pWriter->pDIter->dIter.iRow; + + code = tsdbSnapWriteTableRow(pWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(0); + } + } + + while (pWriter->pDIter->dIter.iDataBlk < pWriter->pDIter->dIter.mDataBlk.nItem) { + SDataBlk dataBlk; + + tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk + 1, &dataBlk, + tGetDataBlk); + + int32_t c = tDataBlkCmprFn( + &dataBlk, &(SDataBlk){.minKey = TSDBROW_KEY(&pRowInfo->row), .maxKey = TSDBROW_KEY(&pRowInfo->row)}); + if (c > 0) { + goto _write_incoming_row; + } else if (c < 0) { + ++pWriter->pDIter->dIter.iDataBlk; + tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); + } else { + ++pWriter->pDIter->dIter.iDataBlk; + + code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow = -1; + break; + } + } + } + } + +_write_incoming_row: + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; + + code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf); + TSDB_CHECK_CODE(code, lino, _exit); + + ASSERT(pWriter->inData.nRow > 0); + + // switch to new data file if need + int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision); + if (pWriter->fid != fid) { + if (pWriter->pDataFWriter) { + code = tsdbSnapWriteCloseDataFile(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapWriteOpenDataFile(pWriter, fid); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // loop write each row + SRowInfo* pRowInfo; + + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + for (int32_t iRow = 0; iRow < pWriter->bData.nRow; ++iRow) { + SRowInfo rInfo = {.suid = pWriter->inData.suid, + .uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow], + .row = tsdbRowFromBlockData(&pWriter->inData, iRow)}; + + for (;;) { + if (pRowInfo == NULL) { + code = tsdbSnapWriteTableData(pWriter, &rInfo); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } else { + int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo); + if (c < 0) { + code = tsdbSnapWriteTableData(pWriter, &rInfo); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } else if (c > 0) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(0); + } + } + } + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow); + } return code; } @@ -1159,7 +1703,7 @@ _exit: return code; } -static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { +static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { int32_t code = 0; STsdb* pTsdb = pWriter->pTsdb; @@ -1186,10 +1730,7 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 taosArrayClear(pWriter->aDelIdxW); } - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - TABLEID id = *(TABLEID*)pHdr->data; - - ASSERT(pHdr->size + sizeof(SSnapDataHdr) == nData); + TABLEID id = *(TABLEID*)pHdr->data; // Move write data < id code = tsdbSnapMoveWriteDelData(pWriter, &id); @@ -1208,11 +1749,11 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 taosArrayClear(pWriter->aDelData); } - int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - while (n < nData) { + int64_t n = sizeof(TABLEID); + while (n < pHdr->size) { SDelData delData; - n += tGetDelData(pData + n, &delData); + n += tGetDelData(pHdr->data + n, &delData); if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -1275,12 +1816,11 @@ _err: // APIs int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) { - int32_t code = 0; - int32_t lino = 0; - STsdbSnapWriter* pWriter = NULL; + int32_t code = 0; + int32_t lino = 0; // alloc - pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -1288,11 +1828,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->pTsdb = pTsdb; pWriter->sver = sver; pWriter->ever = ever; - - code = tsdbFSCopy(pTsdb, &pWriter->fs); - TSDB_CHECK_CODE(code, lino, _exit); - - // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; @@ -1300,7 +1835,19 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; pWriter->commitID = pTsdb->pVnode->state.commitID; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + TSDB_CHECK_CODE(code, lino, _exit); + // SNAP_DATA_TSDB +#if 1 + pWriter->fid = INT32_MIN; + + code = tBlockDataCreate(&pWriter->inData); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tBlockDataCreate(&pWriter->bData); + TSDB_CHECK_CODE(code, lino, _exit); +#else code = tBlockDataCreate(&pWriter->bData); TSDB_CHECK_CODE(code, lino, _exit); @@ -1330,6 +1877,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr TSDB_CHECK_CODE(code, lino, _exit); code = tBlockDataCreate(&pWriter->dWriter.sData); TSDB_CHECK_CODE(code, lino, _exit); +#endif // SNAP_DATA_DEL pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx)); @@ -1354,6 +1902,7 @@ _exit: *ppWriter = NULL; if (pWriter) { +#if 0 if (pWriter->aDelIdxW) taosArrayDestroy(pWriter->aDelIdxW); if (pWriter->aDelData) taosArrayDestroy(pWriter->aDelData); if (pWriter->aDelIdxR) taosArrayDestroy(pWriter->aDelIdxR); @@ -1366,6 +1915,7 @@ _exit: tBlockDataDestroy(&pWriter->bData, 1); tsdbFSDestroy(&pWriter->fs); taosMemoryFree(pWriter); +#endif } } else { tsdbInfo("vgId:%d, %s done", TD_VID(pTsdb->pVnode), __func__); @@ -1376,8 +1926,8 @@ _exit: int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { int32_t code = 0; - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); + if (pWriter->pDataFWriter) { + code = tsdbSnapWriteCloseDataFile(pWriter); if (code) goto _exit; } @@ -1422,17 +1972,17 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { // SNAP_DATA_TSDB - // Writer - tBlockDataDestroy(&pWriter->dWriter.sData, 1); - tBlockDataDestroy(&pWriter->dWriter.bData, 1); - taosArrayDestroy(pWriter->dWriter.aSttBlk); - tMapDataClear(&pWriter->dWriter.mDataBlk); - taosArrayDestroy(pWriter->dWriter.aBlockIdx); + // // Writer + // tBlockDataDestroy(&pWriter->dWriter.sData, 1); + // tBlockDataDestroy(&pWriter->dWriter.bData, 1); + // taosArrayDestroy(pWriter->dWriter.aSttBlk); + // tMapDataClear(&pWriter->dWriter.mDataBlk); + // taosArrayDestroy(pWriter->dWriter.aBlockIdx); - // Reader - tBlockDataDestroy(&pWriter->dReader.bData, 1); - tMapDataClear(&pWriter->dReader.mDataBlk); - taosArrayDestroy(pWriter->dReader.aBlockIdx); + // // Reader + // tBlockDataDestroy(&pWriter->dReader.bData, 1); + // tMapDataClear(&pWriter->dReader.mDataBlk); + // taosArrayDestroy(pWriter->dReader.aBlockIdx); tBlockDataDestroy(&pWriter->bData, 1); tDestroyTSchema(pWriter->skmTable.pTSchema); @@ -1453,35 +2003,32 @@ _err: return code; } -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; +int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; - // ts data if (pHdr->type == SNAP_DATA_TSDB) { - code = tsdbSnapWriteData(pWriter, pData, nData); - if (code) goto _err; - + code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr); + TSDB_CHECK_CODE(code, lino, _exit); goto _exit; - } else { - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; - } + } else if (pWriter->pDataFWriter) { + code = tsdbSnapWriteCloseDataFile(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - // del data if (pHdr->type == SNAP_DATA_DEL) { - code = tsdbSnapWriteDel(pWriter, pData, nData); - if (code) goto _err; + code = tsdbSnapWriteDelData(pWriter, pHdr); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; } _exit: - tsdbDebug("vgId:%d, tsdb snapshot write for %s succeed", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path); - return code; - -_err: - tsdbError("vgId:%d, tsdb snapshot write for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path, - tstrerror(code)); + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64, + TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size); + } else { + tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pHdr->type, pHdr->index, pHdr->size); + } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 86adc1dc80..10b3ef8562 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -684,7 +684,7 @@ int32_t tRowMergerInit2(SRowMerger *pMerger, STSchema *pResTSchema, TSDBROW *pRo tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal); if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { uint8_t *pVal = pColVal->value.pData; - + pColVal->value.pData = NULL; code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; @@ -753,7 +753,7 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { code = tRealloc(&tColVal->value.pData, pColVal->value.nData); if (code) return code; - tColVal->value.nData = pColVal->value.nData; + tColVal->value.nData = pColVal->value.nData; if (pColVal->value.nData) { memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData); } @@ -802,7 +802,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { uint8_t *pVal = pColVal->value.pData; - + pColVal->value.pData = NULL; code = tRealloc(&pColVal->value.pData, pColVal->value.nData); if (code) goto _exit; @@ -811,7 +811,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { memcpy(pColVal->value.pData, pVal, pColVal->value.nData); } } - + if (taosArrayPush(pMerger->pArray, pColVal) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; @@ -822,7 +822,7 @@ _exit: return code; } -void tRowMergerClear(SRowMerger *pMerger) { +void tRowMergerClear(SRowMerger *pMerger) { for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); if (IS_VAR_DATA_TYPE(pTColVal->type)) { @@ -830,7 +830,7 @@ void tRowMergerClear(SRowMerger *pMerger) { } } - taosArrayDestroy(pMerger->pArray); + taosArrayDestroy(pMerger->pArray); } int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { @@ -853,7 +853,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { pTColVal->value.nData = pColVal->value.nData; if (pTColVal->value.nData) { - memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); } pTColVal->flag = 0; } else { @@ -875,7 +875,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) { tColVal->value.nData = pColVal->value.nData; if (tColVal->value.nData) { - memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); + memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData); } tColVal->flag = 0; } else { diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index e75dc24329..43f903dc48 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { if (code) goto _err; } - code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pData, nData); + code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr); if (code) goto _err; } break; case SNAP_DATA_TQ_HANDLE: { From 1f01ba7cc0882bad4b70720d7326d52f15cefbbb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 18 Jan 2023 16:33:52 +0800 Subject: [PATCH 03/20] fix: not keep wal log vnodeCommit'ed for single replica db --- source/libs/sync/src/syncMain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index eeda612bcd..ebf06a5d8b 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { goto _DEL_WAL; } else { - lastApplyIndex -= SYNC_VNODE_LOG_RETENTION; - SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore); @@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { if (pSyncNode->replicaNum > 1) { // multi replicas + lastApplyIndex -= SYNC_VNODE_LOG_RETENTION; + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode); From 516306b598d1f963b647af8d752c3b228bd6311f Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Wed, 18 Jan 2023 17:28:18 +0800 Subject: [PATCH 04/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 332 +++++---------------- 1 file changed, 78 insertions(+), 254 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index b2574ddadd..563f488f8e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -83,9 +83,9 @@ static int32_t tsdbOpenDataFileDataIter(SDataFReader* pReader, STsdbDataIter2** code = tBlockDataCreate(&pIter->dIter.bData); TSDB_CHECK_CODE(code, lino, _exit); - pIter->dIter.iBlockIdx = -1; - pIter->dIter.iDataBlk = -1; - pIter->dIter.iRow = -1; + pIter->dIter.iBlockIdx = 0; + pIter->dIter.iDataBlk = 0; + pIter->dIter.iRow = 0; // read data code = tsdbReadBlockIdx(pReader, pIter->dIter.aBlockIdx); @@ -130,8 +130,8 @@ static int32_t tsdbOpenSttFileDataIter(SDataFReader* pReader, int32_t iStt, STsd code = tBlockDataCreate(&pIter->sIter.bData); TSDB_CHECK_CODE(code, lino, _exit); - pIter->sIter.iSttBlk = -1; - pIter->sIter.iRow = -1; + pIter->sIter.iSttBlk = 0; + pIter->sIter.iRow = 0; // read data code = tsdbReadSttBlk(pReader, iStt, pIter->sIter.aSttBlk); @@ -194,34 +194,37 @@ static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter) { int32_t lino = 0; for (;;) { - if (++pIter->dIter.iRow < pIter->dIter.bData.nRow) { + if (pIter->dIter.iRow < pIter->dIter.bData.nRow) { pIter->rowInfo.suid = pIter->dIter.bData.suid; pIter->rowInfo.uid = pIter->dIter.bData.uid; pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow); + pIter->dIter.iRow++; break; } for (;;) { - if (++pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { + if (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { SDataBlk dataBlk; tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData); TSDB_CHECK_CODE(code, lino, _exit); - pIter->dIter.iRow = -1; + pIter->dIter.iDataBlk++; + pIter->dIter.iRow = 0; break; } for (;;) { - if (++pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) { + if (pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) { SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); TSDB_CHECK_CODE(code, lino, _exit); - pIter->dIter.iDataBlk = -1; + pIter->dIter.iBlockIdx++; + pIter->dIter.iDataBlk = 0; break; } else { @@ -244,20 +247,23 @@ static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter) { int32_t lino = 0; for (;;) { - if (++pIter->sIter.iRow < pIter->sIter.bData.nRow) { + if (pIter->sIter.iRow < pIter->sIter.bData.nRow) { pIter->rowInfo.suid = pIter->sIter.bData.suid; pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow]; pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow); + pIter->sIter.iRow++; break; } - if (++pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { + if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk); code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData); TSDB_CHECK_CODE(code, lino, _exit); - pIter->sIter.iRow = -1; + pIter->sIter.iSttBlk++; + + pIter->sIter.iRow = 0; } else { pIter->rowInfo = (SRowInfo){0}; break; @@ -1019,16 +1025,16 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI if (pWriter->pDIter) { STsdbDataIter2* pIter = pWriter->pDIter; for (;;) { - if (pIter->dIter.iBlockIdx + 1 >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { + if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { pWriter->pDIter = NULL; break; } - SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx + 1); + SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); int32_t c = tTABLEIDCmprFn(pBlockIdx, pId); if (c < 0) { - ++pIter->dIter.iBlockIdx; + pIter->dIter.iBlockIdx++; code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); TSDB_CHECK_CODE(code, lino, _exit); @@ -1042,15 +1048,15 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI pNewBlockIdx->suid = pBlockIdx->suid; pNewBlockIdx->uid = pBlockIdx->uid; - code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pBlockIdx); + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx); TSDB_CHECK_CODE(code, lino, _exit); } else if (c == 0) { - ++pIter->dIter.iBlockIdx; + pIter->dIter.iBlockIdx++; code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); TSDB_CHECK_CODE(code, lino, _exit); - pIter->dIter.iDataBlk = -1; + pIter->dIter.iDataBlk = 0; break; } else { @@ -1089,58 +1095,38 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; int32_t lino = 0; -#if 0 - if (pWriter->id.suid == 0 && pWriter->id.uid == 0) return code; + // TODO: do write the row, refer line 1593 - int32_t c = 1; - if (pWriter->dReader.pBlockIdx) { - c = tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &pWriter->id); - ASSERT(c >= 0); - } + if (pWriter->bData.nRow > 0) { + if (pWriter->bData.nRow < pWriter->minRow) { + for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { + code = tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), + pWriter->skmTable.pTSchema, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); - if (c == 0) { - SBlockData* pBData = &pWriter->dWriter.bData; - - for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow); - - code = tBlockDataAppendRow(pBData, &row, NULL, pWriter->id.uid); - if (code) goto _err; - - if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + if (pWriter->sData.nRow >= pWriter->maxRow) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } } + + tBlockDataReset(&pWriter->bData); + } else { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); } - - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; - - for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk); - - code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - if (code) goto _err; - } - - code = tsdbSnapNextTableData(pWriter); - if (code) goto _err; } - if (pWriter->dWriter.mDataBlk.nItem) { - SBlockIdx blockIdx = {.suid = pWriter->id.suid, .uid = pWriter->id.uid}; - code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dWriter.mDataBlk, &blockIdx); - - if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blockIdx) == NULL) { + if (pWriter->mDataBlk.nItem) { + SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); + if (pBlockIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } - } - pWriter->id.suid = 0; - pWriter->id.uid = 0; -#endif + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + } _exit: if (code) { @@ -1149,7 +1135,7 @@ _exit: return code; } -static int32_t tsdbSnapWriteOpenDataFile(STsdbSnapWriter* pWriter, int32_t fid) { +static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) { int32_t code = 0; int32_t lino = 0; @@ -1244,31 +1230,14 @@ _exit: return code; } -static int32_t tsdbSnapWriteCloseDataFile(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; int32_t lino = 0; ASSERT(pWriter->pDataFWriter); -#if 0 - // loop write remain data - for (;;) { - SRowInfo* pRowInfo; - - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pRowInfo == NULL) break; - - code = tsdbSnapWriteTableData(pWriter, pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbSnapWriteNextRow(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // TODO: write remain data -#endif + code = tsdbSnapWriteTableData(pWriter, NULL /* TODO */); + TSDB_CHECK_CODE(code, lino, _exit); // do file-level updates code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); @@ -1303,139 +1272,6 @@ _exit: return code; } -static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, int8_t* done) { - int32_t code = 0; - int32_t lino = 0; - -#if 0 - SBlockData* pBData = &pWriter->bData; - TABLEID id = {.suid = pBData->suid, .uid = pBData->uid ? pBData->uid : pBData->aUid[iRow]}; - TSDBROW row = tsdbRowFromBlockData(pBData, iRow); - TSDBKEY key = TSDBROW_KEY(&row); - - *done = 0; - while (pWriter->dReader.iRow < pWriter->dReader.bData.nRow || - pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem) { - // Merge row by row - for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) { - TSDBROW trow = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow); - TSDBKEY tKey = TSDBROW_KEY(&trow); - - ASSERT(pWriter->dReader.bData.suid == id.suid && pWriter->dReader.bData.uid == id.uid); - - int32_t c = tsdbKeyCmprFn(&key, &tKey); - if (c < 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - TSDB_CHECK_CODE(code, lino, _exit); - } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &trow, NULL, id.uid); - TSDB_CHECK_CODE(code, lino, _exit); - } else { - ASSERT(0); - } - - if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (c < 0) { - *done = 1; - goto _exit; - } - } - - // Merge row by block - SDataBlk tDataBlk = {.minKey = key, .maxKey = key}; - for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk); - - int32_t c = tDataBlkCmprFn(&dataBlk, &tDataBlk); - if (c < 0) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - TSDB_CHECK_CODE(code, lino, _exit); - } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - - *done = 1; - goto _exit; - } else { - code = tsdbReadDataBlockEx(pWriter->dReader.pReader, &dataBlk, &pWriter->dReader.bData); - TSDB_CHECK_CODE(code, lino, _exit); - pWriter->dReader.iRow = 0; - - pWriter->dReader.iDataBlk++; - break; - } - } - } -#endif - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) { - int32_t code = 0; - int32_t lino = 0; - -#if 0 - TABLEID id = {.suid = pWriter->bData.suid, - .uid = pWriter->bData.uid ? pWriter->bData.uid : pWriter->bData.aUid[iRow]}; - TSDBROW row = tsdbRowFromBlockData(&pWriter->bData, iRow); - SBlockData* pBData = &pWriter->dWriter.sData; - - if (pBData->suid || pBData->uid) { - if (!TABLE_SAME_SCHEMA(pBData->suid, pBData->uid, id.suid, id.uid)) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, pBData, pWriter->aSttBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - - pBData->suid = 0; - pBData->uid = 0; - } - } - - if (pBData->suid == 0 && pBData->uid == 0) { - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pWriter->id.suid, pWriter->id.uid, &pWriter->skmTable); - TSDB_CHECK_CODE(code, lino, _exit); - - TABLEID tid = {.suid = pWriter->id.suid, .uid = pWriter->id.suid ? 0 : pWriter->id.uid}; - code = tBlockDataInit(pBData, &tid, pWriter->skmTable.pTSchema, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tBlockDataAppendRow(pBData, &row, NULL, id.uid); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, pBData, pWriter->aSttBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } -#endif - -_exit: - if (code) { - tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - } - return code; -} - static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { int32_t code = 0; int32_t lino = 0; @@ -1502,20 +1338,6 @@ _exit: return code; } -static int32_t tsdbSnapWriteRowImpl(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - // TODO - ASSERT(0); - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { int32_t code = 0; int32_t lino = 0; @@ -1551,21 +1373,25 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn } // do write the row - if (pWriter->pDIter == NULL /* || false */) { - goto _write_incoming_row; + if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && + pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); } else { for (;;) { - while (pWriter->pDIter->dIter.iRow + 1 < pWriter->pDIter->dIter.bData.nRow) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow + 1); + while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { + TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); int32_t c = tsdbRowCmprFn(&pRowInfo->row, &row); if (c < 0) { - goto _write_incoming_row; + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; } else if (c > 0) { - ++pWriter->pDIter->dIter.iRow; - code = tsdbSnapWriteTableRow(pWriter, &row); TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow++; } else { ASSERT(0); } @@ -1573,34 +1399,32 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn while (pWriter->pDIter->dIter.iDataBlk < pWriter->pDIter->dIter.mDataBlk.nItem) { SDataBlk dataBlk; - - tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk + 1, &dataBlk, - tGetDataBlk); + tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); int32_t c = tDataBlkCmprFn( &dataBlk, &(SDataBlk){.minKey = TSDBROW_KEY(&pRowInfo->row), .maxKey = TSDBROW_KEY(&pRowInfo->row)}); if (c > 0) { - goto _write_incoming_row; + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; } else if (c < 0) { - ++pWriter->pDIter->dIter.iDataBlk; - tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); - } else { - ++pWriter->pDIter->dIter.iDataBlk; + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); + pWriter->pDIter->dIter.iDataBlk++; + } else { code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); TSDB_CHECK_CODE(code, lino, _exit); - pWriter->pDIter->dIter.iRow = -1; + pWriter->pDIter->dIter.iDataBlk++; + pWriter->pDIter->dIter.iRow = 0; break; } } } } -_write_incoming_row: - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); @@ -1621,11 +1445,11 @@ static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHd int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision); if (pWriter->fid != fid) { if (pWriter->pDataFWriter) { - code = tsdbSnapWriteCloseDataFile(pWriter); + code = tsdbSnapWriteFileDataEnd(pWriter); TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbSnapWriteOpenDataFile(pWriter, fid); + code = tsdbSnapWriteFileDataStart(pWriter, fid); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1927,7 +1751,7 @@ _exit: int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { int32_t code = 0; if (pWriter->pDataFWriter) { - code = tsdbSnapWriteCloseDataFile(pWriter); + code = tsdbSnapWriteFileDataEnd(pWriter); if (code) goto _exit; } @@ -2012,7 +1836,7 @@ int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { TSDB_CHECK_CODE(code, lino, _exit); goto _exit; } else if (pWriter->pDataFWriter) { - code = tsdbSnapWriteCloseDataFile(pWriter); + code = tsdbSnapWriteFileDataEnd(pWriter); TSDB_CHECK_CODE(code, lino, _exit); } From 720381988868bb9f5e6364ffe17d8602242b6864 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 19 Jan 2023 21:46:27 +0800 Subject: [PATCH 05/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 80 ++++++++++++++-------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 563f488f8e..5747584d26 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -44,7 +44,6 @@ struct STsdbDataIter2 { int32_t iBlockIdx; int32_t iDataBlk; int32_t iRow; - } dIter; // TSDB_STT_FILE_DATA_ITER @@ -1022,8 +1021,18 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI int32_t code = 0; int32_t lino = 0; + if (pId) { + pWriter->tbid = *pId; + } else { + pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX}; + } + if (pWriter->pDIter) { STsdbDataIter2* pIter = pWriter->pDIter; + + ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow); + ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem); + for (;;) { if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { pWriter->pDIter = NULL; @@ -1032,10 +1041,8 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); - int32_t c = tTABLEIDCmprFn(pBlockIdx, pId); + int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid); if (c < 0) { - pIter->dIter.iBlockIdx++; - code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); TSDB_CHECK_CODE(code, lino, _exit); @@ -1050,43 +1057,46 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx); TSDB_CHECK_CODE(code, lino, _exit); - } else if (c == 0) { - pIter->dIter.iBlockIdx++; + pIter->dIter.iBlockIdx++; + } else if (c == 0) { code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); TSDB_CHECK_CODE(code, lino, _exit); pIter->dIter.iDataBlk = 0; + pIter->dIter.iBlockIdx++; break; } else { + pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem; break; } } } - pWriter->tbid = pId[0]; + if (pId == NULL) { + if (pWriter->sData.nRow) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); + TSDB_CHECK_CODE(code, lino, _exit); - tMapDataReset(&pWriter->mDataBlk); + tMapDataReset(&pWriter->mDataBlk); -#if 0 - code = tsdbSnapWriteCopyData(pWriter, pId); - TSDB_CHECK_CODE(code, lino, _exit); + code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); - pWriter->id.suid = pId->suid; - pWriter->id.uid = pId->uid; - - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); - TSDB_CHECK_CODE(code, lino, _exit); - - tMapDataReset(&pWriter->dWriter.mDataBlk); - code = tBlockDataInit(&pWriter->dWriter.bData, pId, pWriter->skmTable.pTSchema, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); -#endif + // TODO: init pWriter->sData ?? + } _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, pId->suid, + pId->uid); } return code; } @@ -1219,6 +1229,7 @@ static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) } tBlockDataReset(&pWriter->bData); + tBlockDataReset(&pWriter->sData); _exit: if (code) { @@ -1342,7 +1353,7 @@ static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { int32_t code = 0; int32_t lino = 0; - code = tBlockDataAppendRow(&pWriter->bData, pRow, NULL, pWriter->tbid.uid); + code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->bData.nRow >= pWriter->maxRow) { @@ -1362,8 +1373,8 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn int32_t lino = 0; // switch to new table if need - if (pRowInfo->uid != pWriter->tbid.uid) { - if (pRowInfo->uid) { + if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid != 0) { code = tsdbSnapWriteTableDataEnd(pWriter); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1372,6 +1383,9 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn TSDB_CHECK_CODE(code, lino, _exit); } + // end with a NULL row + if (pRowInfo == NULL) goto _exit; + // do write the row if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { @@ -1397,7 +1411,14 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn } } - while (pWriter->pDIter->dIter.iDataBlk < pWriter->pDIter->dIter.mDataBlk.nItem) { + for (;;) { + if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) { + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; + } + + // FIXME: Here can be slow, use array instead SDataBlk dataBlk; tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); @@ -1408,8 +1429,10 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn TSDB_CHECK_CODE(code, lino, _exit); goto _exit; } else if (c < 0) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); + if (pWriter->bData.nRow > 0) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); pWriter->pDIter->dIter.iDataBlk++; @@ -1417,8 +1440,8 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); TSDB_CHECK_CODE(code, lino, _exit); - pWriter->pDIter->dIter.iDataBlk++; pWriter->pDIter->dIter.iRow = 0; + pWriter->pDIter->dIter.iDataBlk++; break; } } @@ -1455,7 +1478,6 @@ static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHd // loop write each row SRowInfo* pRowInfo; - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); TSDB_CHECK_CODE(code, lino, _exit); for (int32_t iRow = 0; iRow < pWriter->bData.nRow; ++iRow) { From fff4a96bdf94a0f8adfafca456d9c5a71b7a7286 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 19 Jan 2023 22:18:39 +0800 Subject: [PATCH 06/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 160 ++++++++++++--------- 1 file changed, 94 insertions(+), 66 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 5747584d26..310c3de5aa 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1105,7 +1105,9 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; int32_t lino = 0; - // TODO: do write the row, refer line 1593 + // write a NULL row to end current table data write + code = tsdbSnapWriteTableRow(pWriter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->bData.nRow > 0) { if (pWriter->bData.nRow < pWriter->minRow) { @@ -1120,7 +1122,7 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { } } - tBlockDataReset(&pWriter->bData); + tBlockDataClear(&pWriter->bData); } else { code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); TSDB_CHECK_CODE(code, lino, _exit); @@ -1134,6 +1136,9 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { TSDB_CHECK_CODE(code, lino, _exit); } + pBlockIdx->suid = pWriter->tbid.suid; + pBlockIdx->uid = pWriter->tbid.uid; + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1247,9 +1252,14 @@ static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { ASSERT(pWriter->pDataFWriter); - code = tsdbSnapWriteTableData(pWriter, NULL /* TODO */); + code = tsdbSnapWriteTableData(pWriter, NULL); TSDB_CHECK_CODE(code, lino, _exit); + if (pWriter->sData.nRow) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + // do file-level updates code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); TSDB_CHECK_CODE(code, lino, _exit); @@ -1271,8 +1281,12 @@ static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { TSDB_CHECK_CODE(code, lino, _exit); } - // TODO: do clear sources - {} + // clear sources + while (pWriter->iterList) { + STsdbDataIter2* pIter = pWriter->iterList; + pWriter->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } _exit: if (code) { @@ -1349,7 +1363,7 @@ _exit: return code; } -static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { +static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { int32_t code = 0; int32_t lino = 0; @@ -1368,6 +1382,79 @@ _exit: return code; } +static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + // TODO: take pRow == NULL into account + + if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && + pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + for (;;) { + while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { + TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); + + int32_t c = tsdbRowCmprFn(pRow, &row); + if (c < 0) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; + } else if (c > 0) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow++; + } else { + ASSERT(0); + } + } + + for (;;) { + if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; + } + + // FIXME: Here can be slow, use array instead + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = TSDBROW_KEY(pRow), .maxKey = TSDBROW_KEY(pRow)}); + if (c > 0) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; + } else if (c < 0) { + if (pWriter->bData.nRow > 0) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); + pWriter->pDIter->dIter.iDataBlk++; + } else { + code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow = 0; + pWriter->pDIter->dIter.iDataBlk++; + break; + } + } + } + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { int32_t code = 0; int32_t lino = 0; @@ -1384,68 +1471,9 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn } // end with a NULL row - if (pRowInfo == NULL) goto _exit; - - // do write the row - if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && - pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { + if (pRowInfo) { code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); TSDB_CHECK_CODE(code, lino, _exit); - } else { - for (;;) { - while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); - - int32_t c = tsdbRowCmprFn(&pRowInfo->row, &row); - if (c < 0) { - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; - } else if (c > 0) { - code = tsdbSnapWriteTableRow(pWriter, &row); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow++; - } else { - ASSERT(0); - } - } - - for (;;) { - if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) { - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; - } - - // FIXME: Here can be slow, use array instead - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); - - int32_t c = tDataBlkCmprFn( - &dataBlk, &(SDataBlk){.minKey = TSDBROW_KEY(&pRowInfo->row), .maxKey = TSDBROW_KEY(&pRowInfo->row)}); - if (c > 0) { - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; - } else if (c < 0) { - if (pWriter->bData.nRow > 0) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - - tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); - pWriter->pDIter->dIter.iDataBlk++; - } else { - code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow = 0; - pWriter->pDIter->dIter.iDataBlk++; - break; - } - } - } } _exit: From 4f0a605cc08ab3d50597e8b98b40a4a64f3dbe81 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sun, 22 Jan 2023 17:20:26 +0800 Subject: [PATCH 07/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 310c3de5aa..caf3463fdc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1255,6 +1255,8 @@ static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { code = tsdbSnapWriteTableData(pWriter, NULL); TSDB_CHECK_CODE(code, lino, _exit); + // TODO: ?? + if (pWriter->sData.nRow) { code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); TSDB_CHECK_CODE(code, lino, _exit); From a902d412eb002b43002a12f5d45bf4414a0f58c3 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 26 Jan 2023 20:20:39 +0800 Subject: [PATCH 08/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 38 ++++++++++------------ source/dnode/vnode/src/tsdb/tsdbUtil.c | 2 +- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index caf3463fdc..387f5e1763 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1030,6 +1030,7 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI if (pWriter->pDIter) { STsdbDataIter2* pIter = pWriter->pDIter; + // assert last table data end ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow); ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem); @@ -1095,8 +1096,8 @@ _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, pId->suid, - pId->uid); + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->tbid.suid, pWriter->tbid.uid); } return code; } @@ -1388,24 +1389,21 @@ static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { int32_t code = 0; int32_t lino = 0; - // TODO: take pRow == NULL into account + TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); - TSDB_CHECK_CODE(code, lino, _exit); + goto _write_row; } else { for (;;) { while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); - int32_t c = tsdbRowCmprFn(pRow, &row); + int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); if (c < 0) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; + goto _write_row; } else if (c > 0) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + code = tsdbSnapWriteTableRowImpl(pWriter, &row); TSDB_CHECK_CODE(code, lino, _exit); pWriter->pDIter->dIter.iRow++; @@ -1415,21 +1413,15 @@ static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { } for (;;) { - if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; - } + if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; // FIXME: Here can be slow, use array instead SDataBlk dataBlk; tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); - int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = TSDBROW_KEY(pRow), .maxKey = TSDBROW_KEY(pRow)}); + int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); if (c > 0) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; + goto _write_row; } else if (c < 0) { if (pWriter->bData.nRow > 0) { code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); @@ -1450,6 +1442,12 @@ static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { } } +_write_row: + if (pRow) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); @@ -1510,7 +1508,7 @@ static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHd SRowInfo* pRowInfo; code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); TSDB_CHECK_CODE(code, lino, _exit); - for (int32_t iRow = 0; iRow < pWriter->bData.nRow; ++iRow) { + for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) { SRowInfo rInfo = {.suid = pWriter->inData.suid, .uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow], .row = tsdbRowFromBlockData(&pWriter->inData, iRow)}; diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 4e6adb2dbf..e2d4b92836 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -757,7 +757,7 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { pTColVal->value.nData = pColVal->value.nData; if (pTColVal->value.nData) { - memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); } pTColVal->flag = 0; } else { From ec886154a4d4f564fbfa34ecc8fa2643bcf35c24 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 26 Jan 2023 21:48:54 +0800 Subject: [PATCH 09/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 387f5e1763..ebb4986e23 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1075,12 +1075,7 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI } } - if (pId == NULL) { - if (pWriter->sData.nRow) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - } else { + if (pId) { code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); TSDB_CHECK_CODE(code, lino, _exit); @@ -1088,8 +1083,17 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); + } - // TODO: init pWriter->sData ?? + if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) { + if ((pWriter->sData.nRow > 0)) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; + code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: From 2f5eeb8000f4778d07537ac8b1e9837207692ade Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 26 Jan 2023 21:57:22 +0800 Subject: [PATCH 10/20] make it compile --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 252 ++++++++++----------- 1 file changed, 122 insertions(+), 130 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index ebb4986e23..46ffcefe08 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1106,6 +1106,95 @@ _exit: return code; } +static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->bData.nRow >= pWriter->maxRow) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; + + if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && + pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { + goto _write_row; + } else { + for (;;) { + while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { + TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); + + int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); + if (c < 0) { + goto _write_row; + } else if (c > 0) { + code = tsdbSnapWriteTableRowImpl(pWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow++; + } else { + ASSERT(0); + } + } + + for (;;) { + if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; + + // FIXME: Here can be slow, use array instead + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); + if (c > 0) { + goto _write_row; + } else if (c < 0) { + if (pWriter->bData.nRow > 0) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); + pWriter->pDIter->dIter.iDataBlk++; + } else { + code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow = 0; + pWriter->pDIter->dIter.iDataBlk++; + break; + } + } + } + } + +_write_row: + if (pRow) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; int32_t lino = 0; @@ -1251,6 +1340,34 @@ _exit: return code; } +static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + // switch to new table if need + if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid != 0) { + code = tsdbSnapWriteTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // end with a NULL row + if (pRowInfo) { + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; int32_t lino = 0; @@ -1370,123 +1487,6 @@ _exit: return code; } -static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { - int32_t code = 0; - int32_t lino = 0; - - code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { - int32_t code = 0; - int32_t lino = 0; - - TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; - - if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && - pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { - goto _write_row; - } else { - for (;;) { - while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); - - int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); - if (c < 0) { - goto _write_row; - } else if (c > 0) { - code = tsdbSnapWriteTableRowImpl(pWriter, &row); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow++; - } else { - ASSERT(0); - } - } - - for (;;) { - if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; - - // FIXME: Here can be slow, use array instead - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); - - int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); - if (c > 0) { - goto _write_row; - } else if (c < 0) { - if (pWriter->bData.nRow > 0) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - - tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); - pWriter->pDIter->dIter.iDataBlk++; - } else { - code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow = 0; - pWriter->pDIter->dIter.iDataBlk++; - break; - } - } - } - } - -_write_row: - if (pRow) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - // switch to new table if need - if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { - if (pWriter->tbid.uid != 0) { - code = tsdbSnapWriteTableDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // end with a NULL row - if (pRowInfo) { - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { int32_t code = 0; int32_t lino = 0; @@ -1849,21 +1849,13 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { taosArrayDestroy(pWriter->aDelIdxR); // SNAP_DATA_TSDB - - // // Writer - // tBlockDataDestroy(&pWriter->dWriter.sData, 1); - // tBlockDataDestroy(&pWriter->dWriter.bData, 1); - // taosArrayDestroy(pWriter->dWriter.aSttBlk); - // tMapDataClear(&pWriter->dWriter.mDataBlk); - // taosArrayDestroy(pWriter->dWriter.aBlockIdx); - - // // Reader - // tBlockDataDestroy(&pWriter->dReader.bData, 1); - // tMapDataClear(&pWriter->dReader.mDataBlk); - // taosArrayDestroy(pWriter->dReader.aBlockIdx); - + tBlockDataDestroy(&pWriter->sData, 1); tBlockDataDestroy(&pWriter->bData, 1); + taosArrayDestroy(pWriter->aSttBlk); + tMapDataClear(&pWriter->mDataBlk); + taosArrayDestroy(pWriter->aBlockIdx); tDestroyTSchema(pWriter->skmTable.pTSchema); + tBlockDataDestroy(&pWriter->inData, 1); for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) { tFree(pWriter->aBuf[iBuf]); From 8d5630651780891ef19b6be9762c1f2e8036bc86 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sat, 28 Jan 2023 11:31:32 +0800 Subject: [PATCH 11/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 97 ++++++---------------- 1 file changed, 24 insertions(+), 73 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 46ffcefe08..0a04d09a2c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1049,7 +1049,7 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); if (pNewBlockIdx == NULL) { - code == TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } @@ -1091,9 +1091,11 @@ static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pI TSDB_CHECK_CODE(code, lino, _exit); } - TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; - code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); + if (pId) { + TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; + code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } } _exit: @@ -1206,8 +1208,8 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { if (pWriter->bData.nRow > 0) { if (pWriter->bData.nRow < pWriter->minRow) { for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { - code = tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), - pWriter->skmTable.pTSchema, pWriter->tbid.uid); + code = + tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid); TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->sData.nRow >= pWriter->maxRow) { @@ -1374,13 +1376,17 @@ static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { ASSERT(pWriter->pDataFWriter); - code = tsdbSnapWriteTableData(pWriter, NULL); + // consume remain data and end with a NULL table row + SRowInfo* pRowInfo; + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); TSDB_CHECK_CODE(code, lino, _exit); + for (;;) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); - // TODO: ?? + if (pRowInfo == NULL) break; - if (pWriter->sData.nRow) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1717,88 +1723,33 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr TSDB_CHECK_CODE(code, lino, _exit); // SNAP_DATA_TSDB -#if 1 - pWriter->fid = INT32_MIN; - code = tBlockDataCreate(&pWriter->inData); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataCreate(&pWriter->bData); - TSDB_CHECK_CODE(code, lino, _exit); -#else - code = tBlockDataCreate(&pWriter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - pWriter->fid = INT32_MIN; - pWriter->id = (TABLEID){0}; - // Reader - pWriter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pWriter->dReader.aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - code = tBlockDataCreate(&pWriter->dReader.bData); + + code = tBlockDataCreate(&pWriter->bData); TSDB_CHECK_CODE(code, lino, _exit); - // Writer - pWriter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pWriter->dWriter.aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->dWriter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); - if (pWriter->dWriter.aSttBlk == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - code = tBlockDataCreate(&pWriter->dWriter.bData); + code = tBlockDataCreate(&pWriter->sData); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataCreate(&pWriter->dWriter.sData); - TSDB_CHECK_CODE(code, lino, _exit); -#endif // SNAP_DATA_DEL - pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx)); - if (pWriter->aDelIdxR == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->aDelData = taosArrayInit(0, sizeof(SDelData)); - if (pWriter->aDelData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->aDelIdxW = taosArrayInit(0, sizeof(SDelIdx)); - if (pWriter->aDelIdxW == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } _exit: if (code) { tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - *ppWriter = NULL; - if (pWriter) { -#if 0 - if (pWriter->aDelIdxW) taosArrayDestroy(pWriter->aDelIdxW); - if (pWriter->aDelData) taosArrayDestroy(pWriter->aDelData); - if (pWriter->aDelIdxR) taosArrayDestroy(pWriter->aDelIdxR); - tBlockDataDestroy(&pWriter->dWriter.sData, 1); - tBlockDataDestroy(&pWriter->dWriter.bData, 1); - if (pWriter->dWriter.aSttBlk) taosArrayDestroy(pWriter->dWriter.aSttBlk); - if (pWriter->dWriter.aBlockIdx) taosArrayDestroy(pWriter->dWriter.aBlockIdx); - tBlockDataDestroy(&pWriter->dReader.bData, 1); - if (pWriter->dReader.aBlockIdx) taosArrayDestroy(pWriter->dReader.aBlockIdx); + tBlockDataDestroy(&pWriter->sData, 1); tBlockDataDestroy(&pWriter->bData, 1); + tBlockDataDestroy(&pWriter->inData, 1); tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); -#endif + pWriter = NULL; } } else { - tsdbInfo("vgId:%d, %s done", TD_VID(pTsdb->pVnode), __func__); - *ppWriter = pWriter; + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever); } + *ppWriter = pWriter; return code; } From 545e4d7f02669d1db271db3771dab369df51f52c Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sat, 28 Jan 2023 13:30:15 +0800 Subject: [PATCH 12/20] make it compile --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 114 ++++++++++----------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 0a04d09a2c..e602226457 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1370,63 +1370,6 @@ _exit: return code; } -static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { - int32_t code = 0; - int32_t lino = 0; - - ASSERT(pWriter->pDataFWriter); - - // consume remain data and end with a NULL table row - SRowInfo* pRowInfo; - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - for (;;) { - code = tsdbSnapWriteTableData(pWriter, pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pRowInfo == NULL) break; - - code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // do file-level updates - code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pDataFReader) { - code = tsdbDataFReaderClose(&pWriter->pDataFReader); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // clear sources - while (pWriter->iterList) { - STsdbDataIter2* pIter = pWriter->iterList; - pWriter->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); - } - return code; -} - static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { int32_t code = 0; int32_t lino = 0; @@ -1493,6 +1436,63 @@ _exit: return code; } +static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(pWriter->pDataFWriter); + + // consume remain data and end with a NULL table row + SRowInfo* pRowInfo; + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + for (;;) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pRowInfo == NULL) break; + + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // do file-level updates + code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pDataFReader) { + code = tsdbDataFReaderClose(&pWriter->pDataFReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // clear sources + while (pWriter->iterList) { + STsdbDataIter2* pIter = pWriter->iterList; + pWriter->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } + return code; +} + static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { int32_t code = 0; int32_t lino = 0; From 30a6488f0d757f312f9299eaece032823a512cd0 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sat, 28 Jan 2023 13:38:39 +0800 Subject: [PATCH 13/20] fix: create topic before inserting data for two testcases of 7-tmq/tmqUpdate --- tests/system-test/7-tmq/tmqUpdate-1ctb.py | 31 ++++++++++------- .../7-tmq/tmqUpdate-multiCtb-snapshot0.py | 34 +++++++++++-------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/tests/system-test/7-tmq/tmqUpdate-1ctb.py b/tests/system-test/7-tmq/tmqUpdate-1ctb.py index b974e4a41a..f03fa84335 100644 --- a/tests/system-test/7-tmq/tmqUpdate-1ctb.py +++ b/tests/system-test/7-tmq/tmqUpdate-1ctb.py @@ -100,6 +100,14 @@ class TDTestCase: # update to half tables paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) + + tdLog.info("create topics from stb1") + topicFromStb1 = 'topic_stb1' + queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicFromStb1, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix="ctbx", # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) @@ -107,12 +115,6 @@ class TDTestCase: ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - tdLog.info("create topics from stb1") - topicFromStb1 = 'topic_stb1' - queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) - sqlString = "create topic %s as %s" %(topicFromStb1, queryString) - tdLog.info("create topic sql: %s"%sqlString) - tdSql.execute(sqlString) # paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl @@ -156,6 +158,7 @@ class TDTestCase: def tmqCase2(self): tdLog.printNoPrefix("======== test case 2: ") + paraDict = {'dbName': 'dbt', 'dropFlag': 1, 'event': '', @@ -175,7 +178,7 @@ class TDTestCase: 'showMsg': 1, 'showRow': 1, 'snapshot': 0} - + self.snapshot = 1 paraDict['snapshot'] = self.snapshot paraDict['vgroups'] = self.vgroups paraDict['ctbNum'] = self.ctbNum @@ -187,12 +190,6 @@ class TDTestCase: # update to half tables paraDict['startTs'] = paraDict['startTs'] + int(self.rowsPerTbl / 2) paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) - tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"], - ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - # tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], - # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) tmqCom.initConsumerTable() tdLog.info("create topics from stb1") @@ -202,6 +199,14 @@ class TDTestCase: tdLog.info("create topic sql: %s"%sqlString) tdSql.execute(sqlString) + tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"], + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + # tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], + # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + + # paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py index d5df88cf43..7310466abe 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py @@ -101,19 +101,20 @@ class TDTestCase: # update to half tables paraDict['ctbNum'] = int(self.ctbNum/2) paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) - tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=self.autoCtbPrefix, - ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], - ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - + tdLog.info("create topics from stb1") topicFromStb1 = 'topic_stb1' queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) sqlString = "create topic %s as %s" %(topicFromStb1, queryString) tdLog.info("create topic sql: %s"%sqlString) tdSql.execute(sqlString) + + tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=self.autoCtbPrefix, + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl @@ -176,6 +177,7 @@ class TDTestCase: 'showRow': 1, 'snapshot': 0} + self.snapshot = 1 paraDict['snapshot'] = self.snapshot paraDict['vgroups'] = self.vgroups paraDict['ctbNum'] = self.ctbNum @@ -188,6 +190,16 @@ class TDTestCase: paraDict['ctbNum'] = int(self.ctbNum/2) paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) paraDict['startTs'] = paraDict['startTs'] + int(self.rowsPerTbl / 2) + + tmqCom.initConsumerTable() + tdLog.info("create topics from stb1") + topicFromStb1 = 'topic_stb1' + queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicFromStb1, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + + tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=self.autoCtbPrefix, ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']+int(self.ctbNum/2)) @@ -200,14 +212,6 @@ class TDTestCase: ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']+int(self.ctbNum/2)) - tmqCom.initConsumerTable() - tdLog.info("create topics from stb1") - topicFromStb1 = 'topic_stb1' - queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) - sqlString = "create topic %s as %s" %(topicFromStb1, queryString) - tdLog.info("create topic sql: %s"%sqlString) - tdSql.execute(sqlString) - # paraDict['ctbNum'] = self.ctbNum paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl From 2a71d47668208ec814269fad359bf95a3c0d41a9 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Sat, 28 Jan 2023 16:11:17 +0800 Subject: [PATCH 14/20] fix: wal ref --- include/libs/wal/wal.h | 1 + source/dnode/vnode/src/tq/tq.c | 4 +++- source/libs/wal/src/walRef.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index a1ae1e429d..bef7301a07 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead); int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead); int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead); +SWalRef *walRefFirstVer(SWal *); SWalRef *walRefCommittedVer(SWal *); SWalRef *walOpenRef(SWal *); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 1d5fae33eb..7649e8a006 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -521,7 +521,9 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqOffsetResetToData(&fetchOffsetNew, 0, 0); } } else { - tqOffsetResetToLog(&fetchOffsetNew, walGetFirstVer(pTq->pVnode->pWal)); + int64_t firstVer = walGetFirstVer(pTq->pVnode->pWal); + walRefVer(pHandle->pRef, firstVer); + tqOffsetResetToLog(&fetchOffsetNew, firstVer - 1); } } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { diff --git a/source/libs/wal/src/walRef.c b/source/libs/wal/src/walRef.c index e86111109c..f5cfe9abae 100644 --- a/source/libs/wal/src/walRef.c +++ b/source/libs/wal/src/walRef.c @@ -77,14 +77,39 @@ void walUnrefVer(SWalRef *pRef) { } #endif -SWalRef *walRefCommittedVer(SWal *pWal) { +SWalRef *walRefFirstVer(SWal *pWal) { SWalRef *pRef = walOpenRef(pWal); if (pRef == NULL) { return NULL; } taosThreadMutexLock(&pWal->mutex); - int64_t ver = walGetCommittedVer(pWal); + int64_t ver = walGetFirstVer(pWal); + + wDebug("vgId:%d, wal ref version %" PRId64 " for first", pWal->cfg.vgId, ver); + + pRef->refVer = ver; + // bsearch in fileSet + SWalFileInfo tmpInfo; + tmpInfo.firstVer = ver; + SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE); + ASSERT(pRet != NULL); + pRef->refFile = pRet->firstVer; + + taosThreadMutexUnlock(&pWal->mutex); + return pRef; +} + +SWalRef *walRefCommittedVer(SWal *pWal) { + SWalRef *pRef = walOpenRef(pWal); + if (pRef == NULL) { + return NULL; + } + taosThreadMutexLock(&pWal->mutex); + + int64_t ver = walGetCommittedVer(pWal); + + wDebug("vgId:%d, wal ref version %" PRId64 " for committed", pWal->cfg.vgId, ver); pRef->refVer = ver; // bsearch in fileSet From fb2b42f968de39118eabe85f753e7d3f9d1547e0 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sat, 28 Jan 2023 16:51:22 +0800 Subject: [PATCH 15/20] refact more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 460 ++++++++++++++------- 1 file changed, 307 insertions(+), 153 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index e602226457..6096936c29 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -21,6 +21,7 @@ extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SB #define TSDB_MEM_TABLE_DATA_ITER 0 #define TSDB_DATA_FILE_DATA_ITER 1 #define TSDB_STT_FILE_DATA_ITER 2 +#define TSDB_TOMB_FILE_DATA_ITER 3 typedef struct STsdbDataIter2 STsdbDataIter2; struct STsdbDataIter2 { @@ -55,6 +56,14 @@ struct STsdbDataIter2 { int32_t iSttBlk; int32_t iRow; } sIter; + // TSDB_TOMB_FILE_DATA_ITER + struct { + SDelFReader* pReader; + SArray* aDelIdx; + SArray* aDelData; + int32_t iDelIdx; + int32_t iDelData; + } tIter; }; }; @@ -152,6 +161,49 @@ _exit: return code; } +static int32_t tsdbOpenTombFileDataIter(SDelFReader* pReader, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; + + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + pIter->type = TSDB_TOMB_FILE_DATA_ITER; + + pIter->tIter.pReader = pReader; + if ((pIter->tIter.aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + if ((pIter->tIter.aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadDelIdx(pReader, pIter->tIter.aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pIter->tIter.aDelIdx) == 0) goto _clear; + + pIter->tIter.iDelIdx = 0; + pIter->tIter.iDelData = 0; + +_exit: + if (code) { + if (pIter) { + _clear: + taosArrayDestroy(pIter->tIter.aDelIdx); + taosArrayDestroy(pIter->tIter.aDelData); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + /* close */ static void tsdbCloseDataFileDataIter(STsdbDataIter2* pIter) { tBlockDataDestroy(&pIter->dIter.bData, 1); @@ -921,19 +973,19 @@ struct STsdbSnapWriter { uint8_t* aBuf[5]; STsdbFS fs; + TABLEID tbid; // time-series data SBlockData inData; int32_t fid; - TABLEID tbid; SSkmInfo skmTable; /* reader */ SDataFReader* pDataFReader; STsdbDataIter2* iterList; STsdbDataIter2* pDIter; - STsdbDataIter2* pIter; + STsdbDataIter2* pSIter; SRBTree rbt; // SRBTree /* writer */ @@ -945,12 +997,14 @@ struct STsdbSnapWriter { SBlockData sData; // tombstone data - SDelFReader* pDelFReader; + /* reader */ + SDelFReader* pDelFReader; + STsdbDataIter2* pTIter; + + /* writer */ SDelFWriter* pDelFWriter; - int32_t iDelIdx; - SArray* aDelIdxR; + SArray* aDelIdx; SArray* aDelData; - SArray* aDelIdxW; }; // SNAP_DATA_TSDB @@ -1207,6 +1261,7 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { if (pWriter->bData.nRow > 0) { if (pWriter->bData.nRow < pWriter->minRow) { + ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid)); for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { code = tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid); @@ -1262,7 +1317,7 @@ static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) pWriter->pDataFReader = NULL; pWriter->iterList = NULL; pWriter->pDIter = NULL; - pWriter->pIter = NULL; + pWriter->pSIter = NULL; tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn); if (pSet) { code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); @@ -1276,23 +1331,23 @@ static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) } for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pIter); + code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter); TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pIter) { - code = tsdbSttFileDataIterNext(pWriter->pIter); + if (pWriter->pSIter) { + code = tsdbSttFileDataIterNext(pWriter->pSIter); TSDB_CHECK_CODE(code, lino, _exit); // add to tree - tRBTreePut(&pWriter->rbt, &pWriter->pIter->rbtn); + tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); // add to list - pWriter->pIter->next = pWriter->iterList; - pWriter->iterList = pWriter->pIter; + pWriter->pSIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pSIter; } } - pWriter->pIter = NULL; + pWriter->pSIter = NULL; } // open writer @@ -1348,7 +1403,7 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn // switch to new table if need if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { - if (pWriter->tbid.uid != 0) { + if (pWriter->tbid.uid) { code = tsdbSnapWriteTableDataEnd(pWriter); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1357,11 +1412,10 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowIn TSDB_CHECK_CODE(code, lino, _exit); } - // end with a NULL row - if (pRowInfo) { - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - } + if (pRowInfo == NULL) goto _exit; + + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { @@ -1374,19 +1428,19 @@ static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowIn int32_t code = 0; int32_t lino = 0; - if (pWriter->pIter) { - code = tsdbDataIterNext2(pWriter->pIter); + if (pWriter->pSIter) { + code = tsdbDataIterNext2(pWriter->pSIter); TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pIter->rowInfo.suid == 0 && pWriter->pIter->rowInfo.uid == 0) { - pWriter->pIter = NULL; + if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) { + pWriter->pSIter = NULL; } else { SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); if (pNode) { - int32_t c = tsdbDataIterCmprFn(&pWriter->pIter->rbtn, pNode); + int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode); if (c > 0) { - tRBTreePut(&pWriter->rbt, &pWriter->pIter->rbtn); - pWriter->pIter = NULL; + tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); + pWriter->pSIter = NULL; } else if (c == 0) { ASSERT(0); } @@ -1394,17 +1448,17 @@ static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowIn } } - if (pWriter->pIter == NULL) { + if (pWriter->pSIter == NULL) { SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); if (pNode) { tRBTreeDrop(&pWriter->rbt, pNode); - pWriter->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); + pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode); } } if (ppRowInfo) { - if (pWriter->pIter) { - *ppRowInfo = &pWriter->pIter->rowInfo; + if (pWriter->pSIter) { + *ppRowInfo = &pWriter->pSIter->rowInfo; } else { *ppRowInfo = NULL; } @@ -1421,8 +1475,8 @@ static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInf int32_t code = 0; int32_t lino = 0; - if (pWriter->pIter) { - *ppRowInfo = &pWriter->pIter->rowInfo; + if (pWriter->pSIter) { + *ppRowInfo = &pWriter->pSIter->rowInfo; goto _exit; } @@ -1558,143 +1612,238 @@ _exit: } // SNAP_DATA_DEL -static int32_t tsdbSnapMoveWriteDelData(STsdbSnapWriter* pWriter, TABLEID* pId) { +static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; + int32_t lino = 0; - while (true) { - if (pWriter->iDelIdx >= taosArrayGetSize(pWriter->aDelIdxR)) break; + if (pId) { + pWriter->tbid = *pId; + } else { + pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}; + } - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); + if (pWriter->pTIter) { + ASSERT(pWriter->pTIter->tIter.iDelData >= taosArrayGetSize(pWriter->pTIter->tIter.aDelData)); - if (tTABLEIDCmprFn(pDelIdx, pId) >= 0) break; + for (;;) { + if (pWriter->pTIter->tIter.iDelIdx >= taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { + break; + } - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); - if (code) goto _exit; + SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx); - SDelIdx delIdx = *pDelIdx; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); - if (code) goto _exit; + int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid); + if (c < 0) { + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); - if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { + SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->pTIter->tIter.aDelIdx, 1); + if (pDelIdxNew == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pDelIdxNew->suid = pDelIdx->suid; + pDelIdxNew->uid = pDelIdx->uid; + + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pTIter->tIter.iDelIdx++; + } else if (c == 0) { + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayAddBatch(pWriter->aDelData, pWriter->pTIter->tIter.aDelData->pData, + taosArrayGetSize(pWriter->pTIter->tIter.aDelData)) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pWriter->pTIter->tIter.iDelData = taosArrayGetSize(pWriter->pTIter->tIter.aDelData); + pWriter->pTIter->tIter.iDelIdx++; + break; + } else { + pWriter->pTIter->tIter.iDelData = taosArrayGetSize(pWriter->pTIter->tIter.aDelData); + break; + } + } + } + + taosArrayClear(pWriter->aDelData); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, pId->suid, + pId->uid); + } + return code; +} + +static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + if (taosArrayGetSize(pWriter->aDelData) > 0) { + SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1); + if (pDelIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); } - pWriter->iDelIdx++; + pDelIdx->suid = pWriter->tbid.suid; + pDelIdx->uid = pWriter->tbid.uid; + + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) { + int32_t code = 0; + int32_t lino = 0; + + if (pId == NULL || pId->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid) { + code = tsdbSnapWriteDelTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapWriteDelTableDataStart(pWriter, pId); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pId == NULL) goto _exit; + + int64_t n = 0; + while (n < size) { + SDelData delData; + n += tGetDelData(pData + n, &delData); + + if (taosArrayPush(pWriter->aDelData, &delData) < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + ASSERT(n == size); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pWriter->pTsdb; + SDelFile* pDelFile = pWriter->fs.pDelFile; + + pWriter->tbid = (TABLEID){0}; + + // reader + if (pDelFile) { + code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // writer + code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pWriter->pTsdb; + + // end remaining table (TODO) + code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + + // update file-level info + code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->pDelFReader) { + code = tsdbDelFReaderClose(&pWriter->pDelFReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; } static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; + int32_t lino = 0; - // Open del file if not opened yet + STsdb* pTsdb = pWriter->pTsdb; + + // start to write del data if need if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = pWriter->fs.pDelFile; - - // reader - if (pDelFile) { - code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); - if (code) goto _err; - - code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR); - if (code) goto _err; - } else { - taosArrayClear(pWriter->aDelIdxR); - } - pWriter->iDelIdx = 0; - - // writer - SDelFile delFile = {.commitID = pWriter->commitID}; - code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &delFile, pTsdb); - if (code) goto _err; - taosArrayClear(pWriter->aDelIdxW); + code = tsdbSnapWriteDelDataStart(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - TABLEID id = *(TABLEID*)pHdr->data; + code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID), + pHdr->size - sizeof(TABLEID)); + TSDB_CHECK_CODE(code, lino, _exit); - // Move write data < id - code = tsdbSnapMoveWriteDelData(pWriter, &id); - if (code) goto _err; - - // Merge incoming data with current - if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR) && - tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) == 0) { - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); - if (code) goto _err; - - pWriter->iDelIdx++; +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code)); } else { - taosArrayClear(pWriter->aDelData); + tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); } - - int64_t n = sizeof(TABLEID); - while (n < pHdr->size) { - SDelData delData; - - n += tGetDelData(pHdr->data + n, &delData); - - if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - SDelIdx delIdx = {.suid = id.suid, .uid = id.uid}; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); - if (code) goto _err; - - if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write del for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); - return code; -} - -static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - - if (pWriter->pDelFWriter == NULL) return code; - - TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX}; - code = tsdbSnapMoveWriteDelData(pWriter, &id); - if (code) goto _err; - - code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdxW); - if (code) goto _err; - - code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); - if (code) goto _err; - - code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); - if (code) goto _err; - - code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); - if (code) goto _err; - - if (pWriter->pDelFReader) { - code = tsdbDelFReaderClose(&pWriter->pDelFReader); - if (code) goto _err; - } - - tsdbInfo("vgId:%d, vnode snapshot tsdb write del for %s end", TD_VID(pTsdb->pVnode), pTsdb->path); - return code; - -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write del end for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); return code; } @@ -1738,7 +1887,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr _exit: if (code) { - tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); if (pWriter) { tBlockDataDestroy(&pWriter->sData, 1); tBlockDataDestroy(&pWriter->bData, 1); @@ -1755,20 +1904,26 @@ _exit: int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; + if (pWriter->pDataFWriter) { code = tsdbSnapWriteFileDataEnd(pWriter); - if (code) goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbSnapWriteDelEnd(pWriter); - if (code) goto _exit; + if (pWriter->pDelFWriter) { + code = tsdbSnapWriteDelDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); - if (code) goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); } return code; } @@ -1795,9 +1950,8 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { } // SNAP_DATA_DEL - taosArrayDestroy(pWriter->aDelIdxW); taosArrayDestroy(pWriter->aDelData); - taosArrayDestroy(pWriter->aDelIdxR); + taosArrayDestroy(pWriter->aDelIdx); // SNAP_DATA_TSDB tBlockDataDestroy(&pWriter->sData, 1); From f5c4ca3380ba7361b8b2a89910896078303c6f8a Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Sat, 28 Jan 2023 17:17:10 +0800 Subject: [PATCH 16/20] fix: wal ref --- include/libs/wal/wal.h | 2 +- source/dnode/vnode/src/tq/tq.c | 11 ++++++++--- source/libs/wal/src/walRef.c | 8 +++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index bef7301a07..a0f421212a 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -201,7 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead); int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead); int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead); -SWalRef *walRefFirstVer(SWal *); +SWalRef *walRefFirstVer(SWal *, SWalRef *); SWalRef *walRefCommittedVer(SWal *); SWalRef *walOpenRef(SWal *); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 7649e8a006..b195cfafb0 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -521,9 +521,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqOffsetResetToData(&fetchOffsetNew, 0, 0); } } else { - int64_t firstVer = walGetFirstVer(pTq->pVnode->pWal); - walRefVer(pHandle->pRef, firstVer); - tqOffsetResetToLog(&fetchOffsetNew, firstVer - 1); + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1); } } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { @@ -721,6 +724,8 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen) { SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg; + tqDebug("vgId:%d, delete sub: %s", pTq->pVnode->config.vgId, pReq->subKey); + taosWLockLatch(&pTq->pushLock); int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey)); if (code != 0) { diff --git a/source/libs/wal/src/walRef.c b/source/libs/wal/src/walRef.c index f5cfe9abae..43470f4c82 100644 --- a/source/libs/wal/src/walRef.c +++ b/source/libs/wal/src/walRef.c @@ -77,10 +77,12 @@ void walUnrefVer(SWalRef *pRef) { } #endif -SWalRef *walRefFirstVer(SWal *pWal) { - SWalRef *pRef = walOpenRef(pWal); +SWalRef *walRefFirstVer(SWal *pWal, SWalRef *pRef) { if (pRef == NULL) { - return NULL; + pRef = walOpenRef(pWal); + if (pRef == NULL) { + return NULL; + } } taosThreadMutexLock(&pWal->mutex); From b996618e65abd0ca3aa881a92df2ae99c3b30961 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sat, 28 Jan 2023 18:03:13 +0800 Subject: [PATCH 17/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 312 ++++++--------------- 1 file changed, 93 insertions(+), 219 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 6096936c29..c519e92727 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -16,6 +16,9 @@ #include "tsdb.h" extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData); +extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); +extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); +extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); // STsdbDataIter2 ======================================== #define TSDB_MEM_TABLE_DATA_ITER 0 @@ -218,6 +221,12 @@ static void tsdbCloseSttFileDataIter(STsdbDataIter2* pIter) { taosMemoryFree(pIter); } +static void tsdbCloseTombFileDataIter(STsdbDataIter2* pIter) { + taosArrayDestroy(pIter->tIter.aDelData); + taosArrayDestroy(pIter->tIter.aDelIdx); + taosMemoryFree(pIter); +} + static void tsdbCloseDataIter2(STsdbDataIter2* pIter) { if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { ASSERT(0); @@ -225,6 +234,8 @@ static void tsdbCloseDataIter2(STsdbDataIter2* pIter) { tsdbCloseDataFileDataIter(pIter); } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { tsdbCloseSttFileDataIter(pIter); + } else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) { + tsdbCloseTombFileDataIter(pIter); } else { ASSERT(0); } @@ -347,54 +358,31 @@ static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter) { /* get */ // STsdbSnapReader ======================================== -typedef enum { SNAP_DATA_FILE_ITER = 0, SNAP_STT_FILE_ITER } EFIterT; -typedef struct { - SRBTreeNode n; - SRowInfo rInfo; - EFIterT type; - union { - struct { - SArray* aBlockIdx; - int32_t iBlockIdx; - SBlockIdx* pBlockIdx; - SMapData mBlock; - int32_t iBlock; - }; // .data file - struct { - int32_t iStt; - SArray* aSttBlk; - int32_t iSttBlk; - }; // .stt file - }; - SBlockData bData; - int32_t iRow; -} SFDataIter; - struct STsdbSnapReader { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - STsdbFS fs; - int8_t type; - // for data file - int8_t dataDone; - int32_t fid; - SDataFReader* pDataFReader; - SFDataIter* pIter; - SRBTree rbt; - SFDataIter aFDataIter[TSDB_MAX_STT_TRIGGER + 1]; - SBlockData bData; - SSkmInfo skmTable; - // for del file - int8_t delDone; - SDelFReader* pDelFReader; - SArray* aDelIdx; // SArray - int32_t iDelIdx; - SArray* aDelData; // SArray - uint8_t* aBuf[5]; -}; + STsdb* pTsdb; + int64_t sver; + int64_t ever; + int8_t type; + uint8_t* aBuf[5]; -extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); + STsdbFS fs; + SSkmInfo skmTable; + + // timeseries data + int8_t dataDone; + int32_t fid; + + SDataFReader* pDataFReader; + STsdbDataIter2* iterList; + STsdbDataIter2* pIter; + SRBTree rbt; + SBlockData bData; + + // tombstone data + int8_t delDone; + SDelFReader* pDelFReader; + STsdbDataIter2* pTIter; +}; static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { SFDataIter* pIter1 = (SFDataIter*)(((uint8_t*)pNode1) - offsetof(SFDataIter, n)); @@ -647,7 +635,7 @@ _exit: return code; } -static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; @@ -705,7 +693,7 @@ _exit: return code; } -static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapReadTombstoneData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; @@ -790,12 +778,11 @@ _exit: } int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) { - int32_t code = 0; - int32_t lino = 0; - STsdbSnapReader* pReader = NULL; + int32_t code = 0; + int32_t lino = 0; // alloc - pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); + STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -805,118 +792,78 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type pReader->ever = ever; pReader->type = type; - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - TSDB_CHECK_CODE(code, lino, _exit); - } - + taosThreadRwlockRdlock(&pTsdb->rwLock); code = tsdbFSRef(pTsdb, &pReader->fs); if (code) { taosThreadRwlockUnlock(&pTsdb->rwLock); TSDB_CHECK_CODE(code, lino, _exit); } + taosThreadRwlockUnlock(&pTsdb->rwLock); - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // data + // init pReader->fid = INT32_MIN; - for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) { - SFDataIter* pIter = &pReader->aFDataIter[iIter]; - - if (iIter == 0) { - pIter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pIter->aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } else { - pIter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); - if (pIter->aSttBlk == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } - - code = tBlockDataCreate(&pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - } code = tBlockDataCreate(&pReader->bData); TSDB_CHECK_CODE(code, lino, _exit); - // del - pReader->aDelIdx = taosArrayInit(0, sizeof(SDelIdx)); - if (pReader->aDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pReader->aDelData = taosArrayInit(0, sizeof(SDelData)); - if (pReader->aDelData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - _exit: if (code) { - tsdbError("vgId:%d, %s failed at line %d since %s, TSDB path: %s", TD_VID(pTsdb->pVnode), __func__, lino, - tstrerror(code), pTsdb->path); - *ppReader = NULL; - + tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), + __func__, lino, tstrerror(code), sver, ever, type); if (pReader) { - taosArrayDestroy(pReader->aDelData); - taosArrayDestroy(pReader->aDelIdx); tBlockDataDestroy(&pReader->bData, 1); - tsdbFSDestroy(&pReader->fs); + tsdbFSUnref(pTsdb, &pReader->fs); taosMemoryFree(pReader); + pReader = NULL; } } else { - *ppReader = pReader; - tsdbInfo("vgId:%d, vnode snapshot tsdb reader opened for %s", TD_VID(pTsdb->pVnode), pTsdb->path); + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever, + type); } + *ppReader = pReader; return code; } int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { - int32_t code = 0; + int32_t code = 0; + int32_t lino = 0; + STsdbSnapReader* pReader = *ppReader; - // data - if (pReader->pDataFReader) tsdbDataFReaderClose(&pReader->pDataFReader); - for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) { - SFDataIter* pIter = &pReader->aFDataIter[iIter]; - - if (iIter == 0) { - taosArrayDestroy(pIter->aBlockIdx); - tMapDataClear(&pIter->mBlock); - } else { - taosArrayDestroy(pIter->aSttBlk); - } - - tBlockDataDestroy(&pIter->bData, 1); + // tombstone + if (pReader->pTIter) { + tsdbCloseDataIter2(pReader->pTIter); + pReader->pTIter = NULL; + } + if (pReader->pDelFReader) { + tsdbDelFReaderClose(&pReader->pDelFReader); } + // timeseries tBlockDataDestroy(&pReader->bData, 1); + while (pReader->iterList) { + STsdbDataIter2* pIter = pReader->iterList; + pReader->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + if (pReader->pDataFReader) { + tsdbDataFReaderClose(&pReader->pDataFReader); + } + + // other tDestroyTSchema(pReader->skmTable.pTSchema); - - // del - if (pReader->pDelFReader) tsdbDelFReaderClose(&pReader->pDelFReader); - taosArrayDestroy(pReader->aDelIdx); - taosArrayDestroy(pReader->aDelData); - tsdbFSUnref(pReader->pTsdb, &pReader->fs); - - tsdbInfo("vgId:%d, vnode snapshot tsdb reader closed for %s", TD_VID(pReader->pTsdb->pVnode), pReader->pTsdb->path); - for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) { tFree(pReader->aBuf[iBuf]); } - taosMemoryFree(pReader); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__); + } *ppReader = NULL; return code; } @@ -929,7 +876,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read data file if (!pReader->dataDone) { - code = tsdbSnapReadData(pReader, ppData); + code = tsdbSnapReadTimeSeriesData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -940,7 +887,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read del file if (!pReader->delDone) { - code = tsdbSnapReadDel(pReader, ppData); + code = tsdbSnapReadTombstoneData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -951,10 +898,9 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code), - pReader->pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - tsdbDebug("vgId:%d, %s done, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path); + tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__); } return code; } @@ -1008,69 +954,6 @@ struct STsdbSnapWriter { }; // SNAP_DATA_TSDB -extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); -extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); - -static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) { - int32_t code = 0; - int32_t lino = 0; - -#if 0 - ASSERT(pWriter->dReader.iRow >= pWriter->dReader.bData.nRow); - - if (pWriter->dReader.iBlockIdx < taosArrayGetSize(pWriter->dReader.aBlockIdx)) { - pWriter->dReader.pBlockIdx = (SBlockIdx*)taosArrayGet(pWriter->dReader.aBlockIdx, pWriter->dReader.iBlockIdx); - - code = tsdbReadDataBlk(pWriter->dReader.pReader, pWriter->dReader.pBlockIdx, &pWriter->dReader.mDataBlk); - if (code) goto _exit; - - pWriter->dReader.iBlockIdx++; - } else { - pWriter->dReader.pBlockIdx = NULL; - tMapDataReset(&pWriter->dReader.mDataBlk); - } - pWriter->dReader.iDataBlk = 0; // point to the next one - tBlockDataReset(&pWriter->dReader.bData); - pWriter->dReader.iRow = 0; -#endif - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) { - int32_t code = 0; - int32_t lino = 0; - -#if 0 - while (true) { - if (pWriter->dReader.pBlockIdx == NULL) break; - if (tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, pId) >= 0) break; - - SBlockIdx blkIdx = *pWriter->dReader.pBlockIdx; - code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dReader.mDataBlk, &blkIdx); - if (code) goto _exit; - - if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blkIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - - code = tsdbSnapNextTableData(pWriter); - if (code) goto _exit; - } -#endif - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; int32_t lino = 0; @@ -1622,14 +1505,10 @@ static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}; } + taosArrayClear(pWriter->aDelData); + if (pWriter->pTIter) { - ASSERT(pWriter->pTIter->tIter.iDelData >= taosArrayGetSize(pWriter->pTIter->tIter.aDelData)); - - for (;;) { - if (pWriter->pTIter->tIter.iDelIdx >= taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { - break; - } - + while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx); int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid); @@ -1637,7 +1516,7 @@ static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData); TSDB_CHECK_CODE(code, lino, _exit); - SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->pTIter->tIter.aDelIdx, 1); + SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1); if (pDelIdxNew == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -1651,27 +1530,17 @@ static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pWriter->pTIter->tIter.iDelIdx++; } else if (c == 0) { - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData); + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); TSDB_CHECK_CODE(code, lino, _exit); - if (taosArrayAddBatch(pWriter->aDelData, pWriter->pTIter->tIter.aDelData->pData, - taosArrayGetSize(pWriter->pTIter->tIter.aDelData)) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pWriter->pTIter->tIter.iDelData = taosArrayGetSize(pWriter->pTIter->tIter.aDelData); pWriter->pTIter->tIter.iDelIdx++; break; } else { - pWriter->pTIter->tIter.iDelData = taosArrayGetSize(pWriter->pTIter->tIter.aDelData); break; } } } - taosArrayClear(pWriter->aDelData); - _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); @@ -1735,7 +1604,6 @@ static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, TSDB_CHECK_CODE(code, lino, _exit); } } - ASSERT(n == size); _exit: @@ -1791,7 +1659,7 @@ static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { STsdb* pTsdb = pWriter->pTsdb; - // end remaining table (TODO) + // end remaining table with NULL data code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); @@ -1813,6 +1681,11 @@ static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { TSDB_CHECK_CODE(code, lino, _exit); } + if (pWriter->pTIter) { + tsdbCloseDataIter2(pWriter->pTIter); + pWriter->pTIter = NULL; + } + _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); @@ -1834,6 +1707,7 @@ static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr TSDB_CHECK_CODE(code, lino, _exit); } + // do write del data code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID), pHdr->size - sizeof(TABLEID)); TSDB_CHECK_CODE(code, lino, _exit); From 9ec79d7ef84a8cb8a91db661e475e1a1828f3d6f Mon Sep 17 00:00:00 2001 From: plum-lihui Date: Sat, 28 Jan 2023 18:15:11 +0800 Subject: [PATCH 18/20] test:modify expect consume rows --- tests/system-test/7-tmq/tmqUpdate-1ctb.py | 33 +++++++--------- .../7-tmq/tmqUpdate-multiCtb-snapshot0.py | 38 +++++++++---------- 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/tests/system-test/7-tmq/tmqUpdate-1ctb.py b/tests/system-test/7-tmq/tmqUpdate-1ctb.py index f03fa84335..db2ec3285d 100644 --- a/tests/system-test/7-tmq/tmqUpdate-1ctb.py +++ b/tests/system-test/7-tmq/tmqUpdate-1ctb.py @@ -100,14 +100,6 @@ class TDTestCase: # update to half tables paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) - - tdLog.info("create topics from stb1") - topicFromStb1 = 'topic_stb1' - queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) - sqlString = "create topic %s as %s" %(topicFromStb1, queryString) - tdLog.info("create topic sql: %s"%sqlString) - tdSql.execute(sqlString) - # tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix="ctbx", # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) @@ -115,6 +107,12 @@ class TDTestCase: ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + tdLog.info("create topics from stb1") + topicFromStb1 = 'topic_stb1' + queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicFromStb1, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) # paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl @@ -158,7 +156,6 @@ class TDTestCase: def tmqCase2(self): tdLog.printNoPrefix("======== test case 2: ") - paraDict = {'dbName': 'dbt', 'dropFlag': 1, 'event': '', @@ -178,7 +175,7 @@ class TDTestCase: 'showMsg': 1, 'showRow': 1, 'snapshot': 0} - self.snapshot = 1 + paraDict['snapshot'] = self.snapshot paraDict['vgroups'] = self.vgroups paraDict['ctbNum'] = self.ctbNum @@ -190,6 +187,12 @@ class TDTestCase: # update to half tables paraDict['startTs'] = paraDict['startTs'] + int(self.rowsPerTbl / 2) paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) + tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"], + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + # tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], + # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) tmqCom.initConsumerTable() tdLog.info("create topics from stb1") @@ -199,19 +202,11 @@ class TDTestCase: tdLog.info("create topic sql: %s"%sqlString) tdSql.execute(sqlString) - tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"], - ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - # tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], - # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], - # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - - # paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 if self.snapshot == 0: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2)) elif self.snapshot == 1: expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1)) diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py index 7310466abe..daffff44c1 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py @@ -101,14 +101,6 @@ class TDTestCase: # update to half tables paraDict['ctbNum'] = int(self.ctbNum/2) paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) - - tdLog.info("create topics from stb1") - topicFromStb1 = 'topic_stb1' - queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) - sqlString = "create topic %s as %s" %(topicFromStb1, queryString) - tdLog.info("create topic sql: %s"%sqlString) - tdSql.execute(sqlString) - tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=self.autoCtbPrefix, ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) @@ -116,6 +108,13 @@ class TDTestCase: ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + tdLog.info("create topics from stb1") + topicFromStb1 = 'topic_stb1' + queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicFromStb1, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 0 @@ -177,7 +176,6 @@ class TDTestCase: 'showRow': 1, 'snapshot': 0} - self.snapshot = 1 paraDict['snapshot'] = self.snapshot paraDict['vgroups'] = self.vgroups paraDict['ctbNum'] = self.ctbNum @@ -190,16 +188,6 @@ class TDTestCase: paraDict['ctbNum'] = int(self.ctbNum/2) paraDict['rowsPerTbl'] = int(self.rowsPerTbl / 2) paraDict['startTs'] = paraDict['startTs'] + int(self.rowsPerTbl / 2) - - tmqCom.initConsumerTable() - tdLog.info("create topics from stb1") - topicFromStb1 = 'topic_stb1' - queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) - sqlString = "create topic %s as %s" %(topicFromStb1, queryString) - tdLog.info("create topic sql: %s"%sqlString) - tdSql.execute(sqlString) - - tmqCom.insert_data_with_autoCreateTbl(tsql=tdSql,dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=self.autoCtbPrefix, ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']+int(self.ctbNum/2)) @@ -212,14 +200,22 @@ class TDTestCase: ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']+int(self.ctbNum/2)) + tmqCom.initConsumerTable() + tdLog.info("create topics from stb1") + topicFromStb1 = 'topic_stb1' + queryString = "select ts, c1, c2 from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicFromStb1, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # paraDict['ctbNum'] = self.ctbNum paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 if self.snapshot == 0: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2*2 + 1/2*1/2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2) * (1/2*3)) elif self.snapshot == 1: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1 + 1/2)) topicList = topicFromStb1 ifcheckdata = 1 From ded3d6e7c9e2bd80ac64520b6724e9446b3cbb3b Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sun, 29 Jan 2023 09:08:02 +0800 Subject: [PATCH 19/20] more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 588 +++++++++++---------- 1 file changed, 310 insertions(+), 278 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index c519e92727..b14fbdf189 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -26,7 +26,9 @@ extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, #define TSDB_STT_FILE_DATA_ITER 2 #define TSDB_TOMB_FILE_DATA_ITER 3 -typedef struct STsdbDataIter2 STsdbDataIter2; +typedef struct STsdbDataIter2 STsdbDataIter2; +typedef struct STsdbFilterInfo STsdbFilterInfo; + struct STsdbDataIter2 { STsdbDataIter2* next; SRBTreeNode rbtn; @@ -70,6 +72,13 @@ struct STsdbDataIter2 { }; }; +#define TSDB_FILTER_FLAG_BY_VERSION 0x1 +struct STsdbFilterInfo { + int32_t flag; + int64_t sver; + int64_t ever; +}; + #define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn))) /* open */ @@ -251,24 +260,44 @@ static int32_t tsdbDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* /* seek */ /* iter next */ -static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter) { +static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { int32_t code = 0; int32_t lino = 0; for (;;) { - if (pIter->dIter.iRow < pIter->dIter.bData.nRow) { + while (pIter->dIter.iRow < pIter->dIter.bData.nRow) { + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pIter->dIter.bData.aVersion[pIter->dIter.iRow] < pFilterInfo->sver || + pIter->dIter.bData.aVersion[pIter->dIter.iRow] > pFilterInfo->ever) { + pIter->dIter.iRow++; + continue; + } + } + } + pIter->rowInfo.suid = pIter->dIter.bData.suid; pIter->rowInfo.uid = pIter->dIter.bData.uid; pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow); pIter->dIter.iRow++; - break; + goto _exit; } for (;;) { - if (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { + while (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { SDataBlk dataBlk; tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + // filter + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > dataBlk.maxVer || pFilterInfo->ever < dataBlk.minVer) { + pIter->dIter.iDataBlk++; + continue; + } + } + } + code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData); TSDB_CHECK_CODE(code, lino, _exit); @@ -278,6 +307,8 @@ static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter) { break; } + if (pIter->dIter.iRow < pIter->dIter.bData.nRow) break; + for (;;) { if (pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) { SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); @@ -304,31 +335,52 @@ _exit: return code; } -static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter) { +static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { int32_t code = 0; int32_t lino = 0; for (;;) { - if (pIter->sIter.iRow < pIter->sIter.bData.nRow) { + while (pIter->sIter.iRow < pIter->sIter.bData.nRow) { + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pIter->sIter.bData.aVersion[pIter->sIter.iRow] || + pFilterInfo->ever < pIter->sIter.bData.aVersion[pIter->sIter.iRow]) { + pIter->sIter.iRow++; + continue; + } + } + } + pIter->rowInfo.suid = pIter->sIter.bData.suid; pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow]; pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow); pIter->sIter.iRow++; - break; + goto _exit; } - if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { - SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk); + for (;;) { + if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { + SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk); - code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData); - TSDB_CHECK_CODE(code, lino, _exit); + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pSttBlk->maxVer || pFilterInfo->ever < pSttBlk->minVer) { + pIter->sIter.iSttBlk++; + continue; + } + } + } - pIter->sIter.iSttBlk++; + code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); - pIter->sIter.iRow = 0; - } else { - pIter->rowInfo = (SRowInfo){0}; - break; + pIter->sIter.iRow = 0; + pIter->sIter.iSttBlk++; + break; + } else { + pIter->rowInfo = (SRowInfo){0}; + goto _exit; + } } } @@ -339,16 +391,16 @@ _exit: return code; } -static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter) { +static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { int32_t code = 0; if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { ASSERT(0); return code; } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { - return tsdbDataFileDataIterNext(pIter); + return tsdbDataFileDataIterNext(pIter, pFilterInfo); } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { - return tsdbSttFileDataIterNext(pIter); + return tsdbSttFileDataIterNext(pIter, pFilterInfo); } else { ASSERT(0); return code; @@ -366,6 +418,7 @@ struct STsdbSnapReader { uint8_t* aBuf[5]; STsdbFS fs; + TABLEID tbid; SSkmInfo skmTable; // timeseries data @@ -382,224 +435,151 @@ struct STsdbSnapReader { int8_t delDone; SDelFReader* pDelFReader; STsdbDataIter2* pTIter; + SArray* aDelData; }; -static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { - SFDataIter* pIter1 = (SFDataIter*)(((uint8_t*)pNode1) - offsetof(SFDataIter, n)); - SFDataIter* pIter2 = (SFDataIter*)(((uint8_t*)pNode2) - offsetof(SFDataIter, n)); - - return tRowInfoCmprFn(&pIter1->rInfo, &pIter2->rInfo); -} - -static int32_t tsdbSnapReadOpenFile(STsdbSnapReader* pReader) { +static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) { int32_t code = 0; int32_t lino = 0; - SDFileSet dFileSet = {.fid = pReader->fid}; - SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &dFileSet, tDFileSetCmprFn, TD_GT); - if (pSet == NULL) return code; + SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); + if (pSet == NULL) { + pReader->fid = INT32_MAX; + goto _exit; + } pReader->fid = pSet->fid; + + tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn); + code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); TSDB_CHECK_CODE(code, lino, _exit); - pReader->pIter = NULL; - tRBTreeCreate(&pReader->rbt, tFDataIterCmprFn); - - // .data file - SFDataIter* pIter = &pReader->aFDataIter[0]; - pIter->type = SNAP_DATA_FILE_ITER; - - code = tsdbReadBlockIdx(pReader->pDataFReader, pIter->aBlockIdx); + code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter); TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iBlockIdx = 0; pIter->iBlockIdx < taosArrayGetSize(pIter->aBlockIdx); pIter->iBlockIdx++) { - pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx); - - code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock); + if (pReader->pIter) { + // iter to next with filter info (sver, ever) + code = tsdbDataIterNext2(pReader->pIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iBlock = 0; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk); + if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { + // add to rbtree + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); - if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue; - - code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - - ASSERT(pIter->pBlockIdx->suid == pIter->bData.suid); - ASSERT(pIter->pBlockIdx->uid == pIter->bData.uid); - - for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; - - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->pBlockIdx->suid; - pIter->rInfo.uid = pIter->pBlockIdx->uid; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _add_iter_and_break; - } - } + // add to iterList + pReader->pIter->next = pReader->iterList; + pReader->iterList = pReader->pIter; + } else { + tsdbCloseDataIter2(pReader->pIter); } - - continue; - - _add_iter_and_break: - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter); - break; } - // .stt file - pIter = &pReader->aFDataIter[1]; - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - pIter->type = SNAP_STT_FILE_ITER; - pIter->iStt = iStt; - - code = tsdbReadSttBlk(pReader->pDataFReader, iStt, pIter->aSttBlk); + for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) { + code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter); TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iSttBlk = 0; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) { - SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); - - if (pSttBlk->minVer > pReader->ever) continue; - if (pSttBlk->maxVer < pReader->sver) continue; - - code = tsdbReadSttBlockEx(pReader->pDataFReader, iStt, pSttBlk, &pIter->bData); + if (pReader->pIter) { + // iter to valid row + code = tsdbDataIterNext2(pReader->pIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; + if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { + // add to rbtree + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->bData.suid; - pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _add_iter; - } + // add to iterList + pReader->pIter->next = pReader->iterList; + pReader->iterList = pReader->pIter; + } else { + tsdbCloseDataIter2(pReader->pIter); } } - - continue; - - _add_iter: - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter); - pIter++; } + pReader->pIter = NULL; + _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code)); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - tsdbInfo("vgId:%d, %s done, path:%s, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path, - pReader->fid); + tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid); } return code; } -static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) { +static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) { + while (pReader->iterList) { + STsdbDataIter2* pIter = pReader->iterList; + pReader->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + + tsdbDataFReaderClose(&pReader->pDataFReader); +} + +static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { int32_t code = 0; + int32_t lino = 0; if (pReader->pIter) { - SFDataIter* pIter = NULL; - while (true) { - _find_row: - pIter = pReader->pIter; - for (pIter->iRow++; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; + code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->bData.suid; - pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _out; + if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) { + pReader->pIter = NULL; + } else { + SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); + if (pNode) { + int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode); + if (c > 0) { + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + pReader->pIter = NULL; + } else if (c == 0) { + ASSERT(0); } } - - if (pIter->type == SNAP_DATA_FILE_ITER) { - while (true) { - for (pIter->iBlock++; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk); - - if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue; - - code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData); - if (code) goto _err; - - pIter->iRow = -1; - goto _find_row; - } - - pIter->iBlockIdx++; - if (pIter->iBlockIdx >= taosArrayGetSize(pIter->aBlockIdx)) break; - - pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx); - code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock); - if (code) goto _err; - pIter->iBlock = -1; - } - - pReader->pIter = NULL; - break; - } else if (pIter->type == SNAP_STT_FILE_ITER) { - for (pIter->iSttBlk++; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) { - SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); - - if (pSttBlk->minVer > pReader->ever || pSttBlk->maxVer < pReader->sver) continue; - - code = tsdbReadSttBlockEx(pReader->pDataFReader, pIter->iStt, pSttBlk, &pIter->bData); - if (code) goto _err; - - pIter->iRow = -1; - goto _find_row; - } - - pReader->pIter = NULL; - break; - } else { - ASSERT(0); - } - } - - _out: - pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt); - if (pReader->pIter && pIter) { - int32_t c = tRowInfoCmprFn(&pReader->pIter->rInfo, &pIter->rInfo); - if (c > 0) { - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pReader->pIter); - pReader->pIter = NULL; - } else { - ASSERT(c); - } } } if (pReader->pIter == NULL) { - pReader->pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt); - if (pReader->pIter) { - tRBTreeDrop(&pReader->rbt, (SRBTreeNode*)pReader->pIter); + SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); + if (pNode) { + tRBTreeDrop(&pReader->rbt, pNode); + pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); } } - return code; + if (ppRowInfo) { + if (pReader->pIter) { + *ppRowInfo = &pReader->pIter->rowInfo; + } else { + *ppRowInfo = NULL; + } + } -_err: +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static SRowInfo* tsdbSnapGetRow(STsdbSnapReader* pReader) { +static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { if (pReader->pIter) { - return &pReader->pIter->rInfo; - } else { - tsdbSnapNextRow(pReader); - - if (pReader->pIter) { - return &pReader->pIter->rInfo; - } else { - return NULL; - } + *ppRowInfo = &pReader->pIter->rowInfo; + return 0; } + + return tsdbSnapReadNextRow(pReader, ppRowInfo); } static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) { @@ -641,138 +621,190 @@ static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** pp STsdb* pTsdb = pReader->pTsdb; - while (true) { + tBlockDataClear(&pReader->bData); + + for (;;) { + // start a new file read if need if (pReader->pDataFReader == NULL) { - code = tsdbSnapReadOpenFile(pReader); + code = tsdbSnapReadFileDataStart(pReader); TSDB_CHECK_CODE(code, lino, _exit); } if (pReader->pDataFReader == NULL) break; - SRowInfo* pRowInfo = tsdbSnapGetRow(pReader); + SRowInfo* pRowInfo; + code = tsdbSnapReadGetRow(pReader, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + if (pRowInfo == NULL) { - tsdbDataFReaderClose(&pReader->pDataFReader); + tsdbSnapReadFileDataEnd(pReader); continue; } - TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid}; - SBlockData* pBlockData = &pReader->bData; - - code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, id.suid, id.uid, &pReader->skmTable); + code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataInit(pBlockData, &id, pReader->skmTable.pTSchema, NULL, 0); + code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); - while (pRowInfo->suid == id.suid && pRowInfo->uid == id.uid) { - code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pRowInfo->uid); - TSDB_CHECK_CODE(code, lino, _exit); + do { + if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break; - code = tsdbSnapNextRow(pReader); - TSDB_CHECK_CODE(code, lino, _exit); + if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) { + code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1)); + TSDB_CHECK_CODE(code, lino, _exit); - pRowInfo = tsdbSnapGetRow(pReader); - if (pRowInfo == NULL) { - tsdbDataFReaderClose(&pReader->pDataFReader); - break; + for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) { + pReader->bData.aUid[iRow] = pReader->bData.uid; + } + pReader->bData.uid = 0; } - if (pBlockData->nRow >= 4096) break; - } + code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapCmprData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbSnapReadNextRow(pReader, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->bData.nRow >= 4096) break; + } while (pRowInfo); + + ASSERT(pReader->bData.nRow > 0); break; } + if (pReader->bData.nRow > 0) { + code = tsdbSnapCmprData(pReader, ppData); + TSDB_CHECK_CODE(code, lino, _exit); + } + _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); } return code; } -static int32_t tsdbSnapReadTombstoneData(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; - STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pReader->fs.pDelFile; - - if (pReader->pDelFReader == NULL) { - if (pDelFile == NULL) { - goto _exit; - } - - // open - code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - // read index - code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - pReader->iDelIdx = 0; + int64_t size = sizeof(TABLEID); + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { + size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData)); } - while (true) { - if (pReader->iDelIdx >= taosArrayGetSize(pReader->aDelIdx)) { - tsdbDelFReaderClose(&pReader->pDelFReader); - break; - } - - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pReader->aDelIdx, pReader->iDelIdx); - - pReader->iDelIdx++; - - code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData); + uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); + } - int32_t size = 0; - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) { - SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData); + SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; + pHdr->type = SNAP_DATA_DEL; + pHdr->size = size; - if (pDelData->version >= pReader->sver && pDelData->version <= pReader->ever) { - size += tPutDelData(NULL, pDelData); - } - } - if (size == 0) continue; + TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr)); + *pId = pReader->tbid; - // org data - size = sizeof(TABLEID) + size; - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (*ppData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_DEL; - pHdr->size = size; - - TABLEID* pId = (TABLEID*)(&pHdr[1]); - pId->suid = pDelIdx->suid; - pId->uid = pDelIdx->uid; - int32_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) { - SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData); - - if (pDelData->version < pReader->sver) continue; - if (pDelData->version > pReader->ever) continue; - - n += tPutDelData((*ppData) + n, pDelData); - } - - tsdbInfo("vgId:%d, vnode snapshot tsdb read del data for %s, suid:%" PRId64 " uid:%" PRId64 " size:%d", - TD_VID(pTsdb->pVnode), pTsdb->path, pDelIdx->suid, pDelIdx->uid, size); - - break; + size = sizeof(SSnapDataHdr) + sizeof(TABLEID); + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { + size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData)); } _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + if (pData) { + taosMemoryFree(pData); + pData = NULL; + } + } + *ppData = pData; + return code; +} + +static int32_t tsdbSnapReadGetTombData(STsdbSnapReader* pReader, void* pDelInfo) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(0); + // TODO +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, void* pDelInfo) { + int32_t code = 0; + int32_t lino = 0; + // TODO +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pReader->pTsdb; + + if (pReader->pDelFReader == NULL) { + if (pReader->fs.pDelFile == NULL) goto _exit; + + // open + code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + struct { + int64_t suid; + int64_t uid; + SDelData dData; + }* pDelInfo; + code = tsdbSnapReadGetTombData(pReader, &pDelInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pDelInfo == NULL) goto _exit; + + pReader->tbid = *(TABLEID*)pDelInfo; + + if (pReader->aDelData) { + taosArrayClear(pReader->aDelData); + } else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) { + if (taosArrayPush(pReader->aDelData, &pDelInfo->dData) < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSnapReadNextTombData(pReader, &pDelInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (taosArrayGetSize(pReader->aDelData) > 0) { + code = tsdbSnapCmprTombData(pReader, ppData); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); } return code; } @@ -887,7 +919,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read del file if (!pReader->delDone) { - code = tsdbSnapReadTombstoneData(pReader, ppData); + code = tsdbSnapReadTombData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -1218,7 +1250,7 @@ static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->pSIter) { - code = tsdbSttFileDataIterNext(pWriter->pSIter); + code = tsdbSttFileDataIterNext(pWriter->pSIter, NULL); TSDB_CHECK_CODE(code, lino, _exit); // add to tree @@ -1312,7 +1344,7 @@ static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowIn int32_t lino = 0; if (pWriter->pSIter) { - code = tsdbDataIterNext2(pWriter->pSIter); + code = tsdbDataIterNext2(pWriter->pSIter, NULL); TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) { From e0b0ce23951d1aadf94ba409424ecb3aaece4bd3 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sun, 29 Jan 2023 10:21:39 +0800 Subject: [PATCH 20/20] refact more code --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 107 +++++++++++++++++---- 1 file changed, 86 insertions(+), 21 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index b14fbdf189..befaf4a3e0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -29,12 +29,19 @@ extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, typedef struct STsdbDataIter2 STsdbDataIter2; typedef struct STsdbFilterInfo STsdbFilterInfo; +typedef struct { + int64_t suid; + int64_t uid; + SDelData delData; +} SDelInfo; + struct STsdbDataIter2 { STsdbDataIter2* next; SRBTreeNode rbtn; int32_t type; SRowInfo rowInfo; + SDelInfo delInfo; union { // TSDB_MEM_TABLE_DATA_ITER struct { @@ -391,6 +398,54 @@ _exit: return code; } +static int32_t tsdbTombFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + while (pIter->tIter.iDelData < taosArrayGetSize(pIter->tIter.aDelData)) { + SDelData* pDelData = taosArrayGet(pIter->tIter.aDelData, pIter->tIter.iDelData); + + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pDelData->version || pFilterInfo->ever < pDelData->version) { + pIter->tIter.iDelData++; + continue; + } + } + } + + pIter->delInfo.delData = *pDelData; + pIter->tIter.iDelData++; + goto _exit; + } + + for (;;) { + if (pIter->tIter.iDelIdx < taosArrayGetSize(pIter->tIter.aDelIdx)) { + SDelIdx* pDelIdx = taosArrayGet(pIter->tIter.aDelIdx, pIter->tIter.iDelIdx); + + code = tsdbReadDelData(pIter->tIter.pReader, pDelIdx, pIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->delInfo.suid = pDelIdx->suid; + pIter->delInfo.uid = pDelIdx->uid; + pIter->tIter.iDelData = 0; + pIter->tIter.iDelIdx++; + break; + } else { + pIter->delInfo = (SDelInfo){0}; + goto _exit; + } + } + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} + static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { int32_t code = 0; @@ -401,6 +456,8 @@ static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilter return tsdbDataFileDataIterNext(pIter, pFilterInfo); } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { return tsdbSttFileDataIterNext(pIter, pFilterInfo); + } else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) { + return tsdbTombFileDataIterNext(pIter, pFilterInfo); } else { ASSERT(0); return code; @@ -725,23 +782,27 @@ _exit: return code; } -static int32_t tsdbSnapReadGetTombData(STsdbSnapReader* pReader, void* pDelInfo) { - int32_t code = 0; - int32_t lino = 0; - - ASSERT(0); - // TODO -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); +static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { + if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) { + *ppDelInfo = NULL; + } else { + *ppDelInfo = &pReader->pTIter->delInfo; } - return code; } -static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, void* pDelInfo) { +static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { int32_t code = 0; int32_t lino = 0; - // TODO + + code = tsdbDataIterNext2( + pReader->pTIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, .sver = pReader->sver, .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (ppDelInfo) { + tsdbSnapReadGetTombData(pReader, ppDelInfo); + } + _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); @@ -755,6 +816,7 @@ static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) STsdb* pTsdb = pReader->pTsdb; + // open tombstone data iter if need if (pReader->pDelFReader == NULL) { if (pReader->fs.pDelFile == NULL) goto _exit; @@ -764,15 +826,16 @@ static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter); TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pTIter) { + code = tsdbSnapReadNextTombData(pReader, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + } } - struct { - int64_t suid; - int64_t uid; - SDelData dData; - }* pDelInfo; - code = tsdbSnapReadGetTombData(pReader, &pDelInfo); - TSDB_CHECK_CODE(code, lino, _exit); + // loop to get tombstone data + SDelInfo* pDelInfo; + tsdbSnapReadGetTombData(pReader, &pDelInfo); if (pDelInfo == NULL) goto _exit; @@ -786,7 +849,7 @@ static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) } while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) { - if (taosArrayPush(pReader->aDelData, &pDelInfo->dData) < 0) { + if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) < 0) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } @@ -795,6 +858,7 @@ static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) TSDB_CHECK_CODE(code, lino, _exit); } + // encode tombstone data if (taosArrayGetSize(pReader->aDelData) > 0) { code = tsdbSnapCmprTombData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); @@ -870,9 +934,9 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { if (pReader->pDelFReader) { tsdbDelFReaderClose(&pReader->pDelFReader); } + taosArrayDestroy(pReader->aDelData); // timeseries - tBlockDataDestroy(&pReader->bData, 1); while (pReader->iterList) { STsdbDataIter2* pIter = pReader->iterList; pReader->iterList = pIter->next; @@ -881,6 +945,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { if (pReader->pDataFReader) { tsdbDataFReaderClose(&pReader->pDataFReader); } + tBlockDataDestroy(&pReader->bData, 1); // other tDestroyTSchema(pReader->skmTable.pTSchema);