From e6684fa5de9fd95ed83359a3d8965f5b74cfd987 Mon Sep 17 00:00:00 2001 From: kailixu Date: Tue, 31 Oct 2023 20:26:42 +0800 Subject: [PATCH 01/32] enh: rsma retetion and stream state --- source/common/src/tglobal.c | 2 +- source/dnode/vnode/src/inc/sma.h | 3 +- source/dnode/vnode/src/inc/vnodeInt.h | 3 +- source/dnode/vnode/src/sma/smaCommit.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 34 ++++++++++++++++----- source/dnode/vnode/src/sma/smaUtil.c | 5 +++ source/dnode/vnode/src/vnd/vnodeRetention.c | 10 ++++-- 7 files changed, 45 insertions(+), 14 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index c6cff27011..cc485b16dc 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -590,7 +590,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; - tsNumOfVnodeRsmaThreads = tsNumOfCores; + tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index aaf0973b41..5dd7df0962 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -143,6 +143,7 @@ struct SRSmaInfoItem { int32_t maxDelay; // ms tmr_h tmrId; void *pStreamState; + void *pStreamTask; // SStreamTask }; struct SRSmaInfo { @@ -218,7 +219,7 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback); int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); -// int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); +int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer, int8_t rollback); void tdRSmaQTaskInfoGetFullPath(SVnode *pVnode, tb_uid_t suid, int8_t level, STfs *pTfs, char *outputName); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 12e273c32d..c7343b1b42 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -209,6 +209,7 @@ int32_t tsdbBegin(STsdb* pTsdb); // int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); +int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); // int32_t tsdbFinishCommit(STsdb* pTsdb); // int32_t tsdbRollbackCommit(STsdb* pTsdb); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); @@ -274,7 +275,7 @@ int32_t smaPrepareAsyncCommit(SSma* pSma); int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo); int32_t smaFinishCommit(SSma* pSma); int32_t smaPostCommit(SSma* pSma); -int32_t smaDoRetention(SSma* pSma, int64_t now); +int32_t smaRetention(SSma* pSma, int64_t now); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index c26157f4b7..652aab3c01 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -178,7 +178,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; - // code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 14c5baa402..4ea7d4612a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -90,6 +90,10 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { streamStateClose(pItem->pStreamState, false); } + if(isDeepFree && pItem->pStreamTask) { + taosMemoryFreeClear(pItem->pStreamTask); + } + if (isDeepFree && pInfo->taskInfo[i]) { tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } @@ -254,11 +258,19 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosMemoryFree(s); } - SStreamTask task = {.id.taskId = 0, .id.streamId = 0}; // TODO: assign value - task.pMeta = pVnode->pTq->pStreamMeta; - pStreamState = streamStateOpen(taskInfDir, &task, true, -1, -1); + SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + if (!pStreamTask) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_FAILED; + } + pStreamTask->id.taskId = 0; + pStreamTask->id.streamId = pRSmaInfo->suid + idx; + pStreamTask->pMeta = pVnode->pTq->pStreamMeta; + + pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; + taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } @@ -268,11 +280,13 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); if (!pRSmaInfo->taskInfo[idx]) { terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE; + taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot pItem->pStreamState = pStreamState; + pItem->pStreamTask = pStreamTask; if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); @@ -562,7 +576,7 @@ static int32_t tdFetchSubmitReqSuids(SSubmitReq2 *pMsg, STbUidStore *pStore) { * @param now * @return int32_t */ -int32_t smaDoRetention(SSma *pSma, int64_t now) { +int32_t smaRetention(SSma *pSma, int64_t now) { int32_t code = TSDB_CODE_SUCCESS; if (!VND_IS_RSMA(pSma->pVnode)) { return code; @@ -570,8 +584,8 @@ int32_t smaDoRetention(SSma *pSma, int64_t now) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - // code = tsdbDoRetention(pSma->pRSmaTsdb[i], now); - // if (code) goto _end; + code = tsdbRetention(pSma->pRSmaTsdb[i], now, pSma->pVnode->config.sttTrigger == 1); + if (code) goto _end; } } @@ -1050,7 +1064,7 @@ _err: return code; } -#if 0 +#if 1 int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t code = 0; int32_t lino = 0; @@ -1072,6 +1086,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); +#if 0 if (pItem && pItem->pStreamState) { if (streamStateCommit(pItem->pStreamState) < 0) { code = TSDB_CODE_RSMA_STREAM_STATE_COMMIT; @@ -1080,6 +1095,11 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode), pRSmaInfo->suid, i + 1); } +#endif + if(pItem && pItem->pStreamState) { + + } + } } diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index e45cbac329..479c57e65f 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -30,8 +30,13 @@ void tdRSmaGetDirName(SVnode *pVnode, STfs *pTfs, bool endWithSep, char *outputN offset = strlen(outputName); // rsma +#if 0 snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s", TD_DIRSEP, VNODE_RSMA_DIR, (endWithSep ? TD_DIRSEP : "")); +#else + snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s%s%s%s%s", TD_DIRSEP, "tq", TD_DIRSEP, "stream", + TD_DIRSEP, "state", (endWithSep ? TD_DIRSEP : "")); +#endif } // smaXXXUtil ================ diff --git a/source/dnode/vnode/src/vnd/vnodeRetention.c b/source/dnode/vnode/src/vnd/vnodeRetention.c index f3344d1d7d..c510c0fe92 100644 --- a/source/dnode/vnode/src/vnd/vnodeRetention.c +++ b/source/dnode/vnode/src/vnd/vnodeRetention.c @@ -15,8 +15,12 @@ #include "vnd.h" -extern int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); - int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) { - return tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); + int32_t code = TSDB_CODE_SUCCESS; + + code = tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); + + if (TSDB_CODE_SUCCESS == code) code = smaRetention(pVnode->pSma, now); + + return code; } \ No newline at end of file From ad1e6accd4f64cfa37ad186c8f93ae987dc7fa55 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 09:45:58 +0800 Subject: [PATCH 02/32] chore: build checkpoint for rsma --- include/libs/stream/tstream.h | 1 + source/common/src/tglobal.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 14 ++++++++++---- source/libs/stream/inc/streamInt.h | 1 - 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 629efa00b3..b9c6c905c9 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -772,6 +772,7 @@ void streamMetaInitForSnode(SStreamMeta* pMeta); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index cc485b16dc..a5459798fa 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -590,7 +590,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; - tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; + tsNumOfVnodeRsmaThreads = tsNumOfCores / 2; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 4ea7d4612a..980b23986e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -15,6 +15,7 @@ #include "sma.h" #include "tq.h" +#include "tstream.h" #define RSMA_QTASKEXEC_SMOOTH_SIZE (100) // cnt #define RSMA_SUBMIT_BATCH_SIZE (1024) // cnt @@ -1096,10 +1097,15 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { pRSmaInfo->suid, i + 1); } #endif - if(pItem && pItem->pStreamState) { - - } - + if (pItem && pItem->pStreamState && pItem->pStreamTask) { + SStreamTask *pTask = pItem->pStreamTask; + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); // adaption for API streamTaskBuildCheckpoint + pTask->checkpointingId = taosGetTimestampNs(); + code = streamTaskBuildCheckpoint(pTask); + TSDB_CHECK_CODE(code, lino, _exit); + smaInfo("vgId:%d, rsma persist, build stream checkpoint success, table:%" PRIi64 ", level:%d, id:%" PRIi64, + TD_VID(pVnode), pRSmaInfo->suid, i + 1, pTask->checkpointingId); + } } } diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 4cd8319a07..1f43e44dca 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -101,7 +101,6 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); -int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); From ab266c712f6fc5078879c1ee0850d87440eab341 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 12:29:54 +0800 Subject: [PATCH 03/32] chore: checkpoint for rsma stream state --- include/common/tmsg.h | 5 ++++ source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 7 +++-- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/sma/smaOpen.c | 18 ++++++------ source/dnode/vnode/src/sma/smaRollup.c | 8 +++-- source/dnode/vnode/src/tsdb/tsdbRead2.c | 6 ++-- source/dnode/vnode/src/vnd/vnodeCfg.c | 29 ++++++++++--------- source/dnode/vnode/src/vnd/vnodeCommit.c | 6 ++-- .../tsim/sma/rsmaPersistenceRecovery.sim | 2 +- 9 files changed, 48 insertions(+), 35 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 07eb8a461a..aa39a9da30 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -451,6 +451,11 @@ typedef struct SRetention { int8_t keepUnit; } SRetention; +typedef struct SRetentionEx { + SRetention rtn; + int64_t checkpointId; +} SRetentionEx; + #define RETENTION_VALID(l, r) ((((l) == 0 && (r)->freq >= 0) || ((r)->freq > 0)) && ((r)->keep > 0)) #pragma pack(push, 1) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index c4d525a871..5a4b341662 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -134,10 +134,11 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { pCfg->tsdbCfg.minRows = pCreate->minRows; pCfg->tsdbCfg.maxRows = pCreate->maxRows; for (size_t i = 0; i < taosArrayGetSize(pCreate->pRetensions); ++i) { - SRetention *pRetention = &pCfg->tsdbCfg.retentions[i]; - memcpy(pRetention, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); + SRetentionEx *pRetention = &pCfg->tsdbCfg.retentions[i]; + memcpy(&pRetention->rtn, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); + pRetention->checkpointId = -1; if (i == 0) { - if ((pRetention->freq >= 0 && pRetention->keep > 0)) pCfg->isRsma = 1; + if ((pRetention->rtn.freq >= 0 && pRetention->rtn.keep > 0)) pCfg->isRsma = 1; } } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 6a0c991be4..e92fc04f6e 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -287,7 +287,7 @@ struct STsdbCfg { int32_t keep1; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keep2; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keepTimeOffset; // just for save config, use STsdbKeepCfg in STsdb instead - SRetention retentions[TSDB_RETENTION_MAX]; + SRetentionEx retentions[TSDB_RETENTION_MAX]; }; typedef struct { diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index 633e096314..cea4ccb1b7 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -16,13 +16,13 @@ #include "sma.h" #include "tsdb.h" -static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration); +static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration); static int32_t smaSetKeepCfg(SVnode *pVnode, STsdbKeepCfg *pKeepCfg, STsdbCfg *pCfg, int type); static int32_t rsmaRestore(SSma *pSma); #define SMA_SET_KEEP_CFG(v, l) \ do { \ - SRetention *r = &pCfg->retentions[l]; \ + SRetention *r = &(pCfg->retentions[l].rtn); \ pKeepCfg->keep2 = convertTimeFromPrecisionToUnit(r->keep, pCfg->precision, TIME_UNIT_MINUTE); \ pKeepCfg->keep0 = pKeepCfg->keep2; \ pKeepCfg->keep1 = pKeepCfg->keep2; \ @@ -32,7 +32,7 @@ static int32_t rsmaRestore(SSma *pSma); #define SMA_OPEN_RSMA_IMPL(v, l, force) \ do { \ - SRetention *r = (SRetention *)VND_RETENTIONS(v) + l; \ + SRetention *r = &(((SRetentionEx *)VND_RETENTIONS(v) + l)->rtn); \ if (!RETENTION_VALID(l, r)) { \ if (l == 0) { \ code = TSDB_CODE_INVALID_PARA; \ @@ -59,9 +59,9 @@ static int32_t rsmaRestore(SSma *pSma); * @param duration * @return int32_t */ -static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration) { - int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->freq, precision, TIME_UNIT_MINUTE); - int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->keep, precision, TIME_UNIT_MINUTE); +static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration) { + int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.freq, precision, TIME_UNIT_MINUTE); + int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.keep, precision, TIME_UNIT_MINUTE); int32_t days = duration; // min if (days < freqDuration) { @@ -76,10 +76,10 @@ static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t p goto _exit; } - freqDuration = convertTimeFromPrecisionToUnit((r + level)->freq, precision, TIME_UNIT_MINUTE); - keepDuration = convertTimeFromPrecisionToUnit((r + level)->keep, precision, TIME_UNIT_MINUTE); + freqDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.freq, precision, TIME_UNIT_MINUTE); + keepDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.keep, precision, TIME_UNIT_MINUTE); - int32_t nFreqTimes = (r + level)->freq / (60 * 1000); // use 60s for freq of 1st level + int32_t nFreqTimes = (r + level)->rtn.freq / (60 * 1000); // use 60s for freq of 1st level days *= (nFreqTimes > 1 ? nFreqTimes : 1); if (days < freqDuration) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 980b23986e..1b13f37141 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -266,7 +266,9 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } pStreamTask->id.taskId = 0; pStreamTask->id.streamId = pRSmaInfo->suid + idx; + pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; + pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { @@ -1096,16 +1098,18 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode), pRSmaInfo->suid, i + 1); } -#endif - if (pItem && pItem->pStreamState && pItem->pStreamTask) { +#else + if (pItem) { SStreamTask *pTask = pItem->pStreamTask; atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); // adaption for API streamTaskBuildCheckpoint pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); + (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; smaInfo("vgId:%d, rsma persist, build stream checkpoint success, table:%" PRIi64 ", level:%d, id:%" PRIi64, TD_VID(pVnode), pRSmaInfo->suid, i + 1, pTask->checkpointingId); } +#endif } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index d1919d95ba..be88a5a435 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -49,7 +49,7 @@ static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo STsdbReader* pReader); static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); @@ -3140,7 +3140,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } } -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idStr, int8_t* pLevel) { if (VND_IS_RSMA(pVnode)) { int8_t level = 0; @@ -3151,7 +3151,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret : 1000000L); for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - SRetention* pRetention = retentions + level; + SRetention* pRetention = &((retentions + level)->rtn); if (pRetention->keep <= 0) { if (level > 0) { --level; diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index 07bfa6c719..d429eb2a94 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -106,23 +106,24 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { if (tjsonAddIntegerToObject(pJson, "keep1", pCfg->tsdbCfg.keep1) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keep2", pCfg->tsdbCfg.keep2) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keepTimeOffset", pCfg->tsdbCfg.keepTimeOffset) < 0) return -1; - if (pCfg->tsdbCfg.retentions[0].keep > 0) { + if (pCfg->tsdbCfg.retentions[0].rtn.keep > 0) { int32_t nRetention = 1; - if (pCfg->tsdbCfg.retentions[1].freq > 0) { + if (pCfg->tsdbCfg.retentions[1].rtn.freq > 0) { ++nRetention; - if (pCfg->tsdbCfg.retentions[2].freq > 0) { + if (pCfg->tsdbCfg.retentions[2].rtn.freq > 0) { ++nRetention; } } SJson *pNodeRetentions = tjsonCreateArray(); tjsonAddItemToObject(pJson, "retentions", pNodeRetentions); for (int32_t i = 0; i < nRetention; ++i) { - SJson *pNodeRetention = tjsonCreateObject(); - const SRetention *pRetention = pCfg->tsdbCfg.retentions + i; - tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->freq); - tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->freqUnit); - tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->keep); - tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->keepUnit); + SJson *pNodeRetention = tjsonCreateObject(); + const SRetentionEx *pRetention = pCfg->tsdbCfg.retentions + i; + tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->rtn.freq); + tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit); + tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->rtn.keep); + tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit); + tjsonAddIntegerToObject(pNodeRetention, "checkpointId", pRetention->checkpointId); tjsonAddItemToArray(pNodeRetentions, pNodeRetention); } } @@ -231,10 +232,12 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { for (int32_t i = 0; i < nRetention; ++i) { SJson *pNodeRetention = tjsonGetArrayItem(pNodeRetentions, i); ASSERT(pNodeRetention != NULL); - tjsonGetNumberValue(pNodeRetention, "freq", (pCfg->tsdbCfg.retentions)[i].freq, code); - tjsonGetNumberValue(pNodeRetention, "freqUnit", (pCfg->tsdbCfg.retentions)[i].freqUnit, code); - tjsonGetNumberValue(pNodeRetention, "keep", (pCfg->tsdbCfg.retentions)[i].keep, code); - tjsonGetNumberValue(pNodeRetention, "keepUnit", (pCfg->tsdbCfg.retentions)[i].keepUnit, code); + SRetentionEx *pRetention = &(pCfg->tsdbCfg.retentions[i]); + tjsonGetNumberValue(pNodeRetention, "freq", pRetention->rtn.freq, code); + tjsonGetNumberValue(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit, code); + tjsonGetNumberValue(pNodeRetention, "keep", pRetention->rtn.keep, code); + tjsonGetNumberValue(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit, code); + tjsonGetNumberValue(pNodeRetention, "checkpointId", pRetention->checkpointId, code); } tjsonGetNumberValue(pJson, "wal.vgId", pCfg->walCfg.vgId, code); if (code < 0) return -1; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 50ca2f5d03..ca4335f391 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -290,6 +290,9 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { tsem_wait(&pVnode->canCommit); if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; + + code = smaPrepareAsyncCommit(pVnode->pSma); + if (code) goto _exit; pVnode->state.commitTerm = pVnode->state.applyTerm; @@ -313,9 +316,6 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { metaPrepareAsyncCommit(pVnode->pMeta); - code = smaPrepareAsyncCommit(pVnode->pSma); - if (code) goto _exit; - taosThreadMutexLock(&pVnode->mutex); ASSERT(pVnode->onCommit == NULL); pVnode->onCommit = pVnode->inUse; diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index 6f78829db7..c70f2dc20a 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect #todo wait for streamState checkpoint -return 1 +#return 1 print =============== create database with retentions sql create database d0 retentions -:7d,5m:21d,15m:365d; From 59be62c96eb182873317ee2ef8319595d499c565 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 15:34:17 +0800 Subject: [PATCH 04/32] chore: set stream input checkpoint --- include/common/tmsg.h | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 43 +++++++++++++------------- source/libs/executor/src/executor.c | 9 ++++++ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index aa39a9da30..323772af95 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -453,7 +453,7 @@ typedef struct SRetention { typedef struct SRetentionEx { SRetention rtn; - int64_t checkpointId; + int64_t checkpointId; } SRetentionEx; #define RETENTION_VALID(l, r) ((((l) == 0 && (r)->freq >= 0) || ((r)->freq > 0)) && ((r)->keep > 0)) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 1b13f37141..7c7f9fad25 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -259,7 +259,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosMemoryFree(s); } - SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(*pStreamTask)); if (!pStreamTask) { terrno = TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_FAILED; @@ -269,7 +269,6 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; - pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -286,6 +285,14 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } + + if (pStreamTask->chkInfo.checkpointId != -1) { + SSDataBlock dataBlock = {.info.type = STREAM_CHECKPOINT}; + if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], &dataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { + return TSDB_CODE_FAILED; + } + } + SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot pItem->pStreamState = pStreamState; @@ -308,10 +315,10 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, item:%p table:%" PRIi64 " level:%" PRIi8 " maxdelay:%" PRIi64 " watermark:%" PRIi64 - ", finally maxdelay:%" PRIi32, - TD_VID(pVnode), pItem, pRSmaInfo->suid, (int8_t)(idx + 1), param->maxdelay[idx], param->watermark[idx], - pItem->maxDelay); + smaInfo("vgId:%d, open task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 + " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, + TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId, + param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); } return TSDB_CODE_SUCCESS; } @@ -1089,27 +1096,21 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); -#if 0 - if (pItem && pItem->pStreamState) { - if (streamStateCommit(pItem->pStreamState) < 0) { - code = TSDB_CODE_RSMA_STREAM_STATE_COMMIT; - TSDB_CHECK_CODE(code, lino, _exit); - } - smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode), - pRSmaInfo->suid, i + 1); - } -#else - if (pItem) { + if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); // adaption for API streamTaskBuildCheckpoint + // adaption for API streamTaskBuildCheckpoint + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); + + // save checkpointId to vnode.json (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; - smaInfo("vgId:%d, rsma persist, build stream checkpoint success, table:%" PRIi64 ", level:%d, id:%" PRIi64, - TD_VID(pVnode), pRSmaInfo->suid, i + 1, pTask->checkpointingId); + + smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 + ", level:%d, checkpointId:%" PRIi64, + TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); } -#endif } } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 60dc6f0185..2eac04db88 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -75,6 +75,15 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; + } else if (type == STREAM_INPUT__CHECKPOINT) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; + SPackedData tmp = { + .pDataBlock = pDataBlock, + }; + taosArrayPush(pInfo->pBlockLists, &tmp); + } + pInfo->blockType = STREAM_INPUT__CHECKPOINT; } return TSDB_CODE_SUCCESS; From c32e60d199fa99cf97398dfa508975f24a09b294 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 16:27:41 +0800 Subject: [PATCH 05/32] chore: more code for rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 5 +++-- source/libs/executor/src/executor.c | 8 ++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 5dd7df0962..48e9aed6c2 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -159,6 +159,7 @@ struct SRSmaInfo { void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t STaosQueue *queue; // buffer queue of SubmitReq STaosQall *qall; // buffer qall of SubmitReq + SSDataBlock dataBlock; }; #define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 7c7f9fad25..303c222ae5 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -287,8 +287,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } if (pStreamTask->chkInfo.checkpointId != -1) { - SSDataBlock dataBlock = {.info.type = STREAM_CHECKPOINT}; - if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], &dataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { + SSDataBlock *pDataBlock = &pRSmaInfo->dataBlock; + if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], pDataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { return TSDB_CODE_FAILED; } } @@ -370,6 +370,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con pRSmaInfo->pSma = pSma; pRSmaInfo->pTSchema = pTSchema; pRSmaInfo->suid = suid; + pRSmaInfo->dataBlock.info.type = STREAM_CHECKPOINT; T_REF_INIT_VAL(pRSmaInfo, 1); if (!(pRSmaInfo->queue = taosOpenQueue()) || !(pRSmaInfo->qall = taosAllocateQall()) || diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 2eac04db88..b46ae9e1c0 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -58,17 +58,20 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf SStreamScanInfo* pInfo = pOperator->info; if (type == STREAM_INPUT__MERGED_SUBMIT) { + qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); for (int32_t i = 0; i < numOfBlocks; i++) { SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData)); taosArrayPush(pInfo->pBlockLists, pReq); } pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_SUBMIT) { + qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); taosArrayPush(pInfo->pBlockLists, &input); pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; + qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); SPackedData tmp = { .pDataBlock = pDataBlock, }; @@ -78,8 +81,9 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf } else if (type == STREAM_INPUT__CHECKPOINT) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - SPackedData tmp = { - .pDataBlock = pDataBlock, + qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); + SPackedData tmp = { + .pDataBlock = pDataBlock, }; taosArrayPush(pInfo->pBlockLists, &tmp); } From 3803f952f9d763f8b27b0e706fe39f9ad3306ebf Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 20:05:00 +0800 Subject: [PATCH 06/32] chore: rsma checkpoint verify --- source/dnode/vnode/src/inc/sma.h | 5 +- source/dnode/vnode/src/sma/smaCommit.c | 2 + source/dnode/vnode/src/sma/smaEnv.c | 1 + source/dnode/vnode/src/sma/smaRollup.c | 181 +++++++++++++++++++----- source/libs/executor/src/executor.c | 2 +- source/libs/executor/src/scanoperator.c | 7 +- 6 files changed, 156 insertions(+), 42 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 48e9aed6c2..a8807483f6 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -115,6 +115,7 @@ struct SRSmaStat { SRSmaFS fs; // for recovery/snapshot r/w SHashObj *infoHash; // key: suid, value: SRSmaInfo tsem_t notEmpty; // has items in queue buffer + SSDataBlock dataBlock; }; struct SSmaStat { @@ -140,7 +141,8 @@ struct SRSmaInfoItem { int8_t fetchLevel : 4; int8_t triggerStat; uint16_t nScanned; - int32_t maxDelay; // ms + int32_t streamFlushed : 1; + int32_t maxDelay : 31; // ms tmr_h tmrId; void *pStreamState; void *pStreamTask; // SStreamTask @@ -159,7 +161,6 @@ struct SRSmaInfo { void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t STaosQueue *queue; // buffer queue of SubmitReq STaosQall *qall; // buffer qall of SubmitReq - SSDataBlock dataBlock; }; #define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 652aab3c01..5a6144b3fa 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -178,6 +178,8 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; + + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 04a254fc7a..94ecb46473 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -209,6 +209,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS pRSmaStat->pSma = (SSma *)pSma; atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT); tsem_init(&pRSmaStat->notEmpty, 0, 0); + pRSmaStat->dataBlock.info.type = STREAM_CHECKPOINT; // init smaMgmt smaInit(); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 303c222ae5..88bdc2df1d 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -78,32 +78,26 @@ static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t l */ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { if (pInfo) { - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - SRSmaInfoItem *pItem = &pInfo->items[i]; + if (isDeepFree) { + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + SRSmaInfoItem *pItem = &pInfo->items[i]; - if (isDeepFree && pItem->tmrId) { - smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, - pInfo->suid, i + 1); - taosTmrStopA(&pItem->tmrId); - } + if (pItem->tmrId) { + smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, + pInfo->suid, i + 1); + taosTmrStopA(&pItem->tmrId); + } - if (isDeepFree && pItem->pStreamState) { - streamStateClose(pItem->pStreamState, false); - } + if (pItem->pStreamState) { + streamStateClose(pItem->pStreamState, false); + } - if(isDeepFree && pItem->pStreamTask) { taosMemoryFreeClear(pItem->pStreamTask); - } - - if (isDeepFree && pInfo->taskInfo[i]) { tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } - } - if (isDeepFree) { - taosMemoryFreeClear(pInfo->pTSchema); - } - if (isDeepFree) { + taosMemoryFreeClear(pInfo->pTSchema); + if (pInfo->queue) { taosCloseQueue(pInfo->queue); pInfo->queue = NULL; @@ -286,13 +280,6 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } - if (pStreamTask->chkInfo.checkpointId != -1) { - SSDataBlock *pDataBlock = &pRSmaInfo->dataBlock; - if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], pDataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { - return TSDB_CODE_FAILED; - } - } - SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot pItem->pStreamState = pStreamState; @@ -370,7 +357,6 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con pRSmaInfo->pSma = pSma; pRSmaInfo->pTSchema = pTSchema; pRSmaInfo->suid = suid; - pRSmaInfo->dataBlock.info.type = STREAM_CHECKPOINT; T_REF_INIT_VAL(pRSmaInfo, 1); if (!(pRSmaInfo->queue = taosOpenQueue()) || !(pRSmaInfo->qall = taosAllocateQall()) || @@ -1075,22 +1061,145 @@ _err: return code; } -#if 1 + +static int32_t tdRSmaExecVerifyCheckPoint(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, + int64_t suid, SArray **ppResList, int8_t *streamFlushed) { + int32_t code = 0; + int32_t lino = 0; + SSDataBlock *output = NULL; + SArray *pResList = NULL; + + if (!(*ppResList)) { + pResList = taosArrayInit(1, POINTER_BYTES); + if (pResList == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + *ppResList = pResList; + } else { + pResList = *ppResList; + } + + while (1) { + uint64_t ts; + bool hasMore = false; + code = qExecTaskOpt(taskInfo, pResList, &ts, &hasMore, NULL); + if (code == TSDB_CODE_QRY_IN_EXEC) { + code = 0; + break; + } + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pResList) == 0) { + break; + } +#if 0 + char flag[10] = {0}; + snprintf(flag, 10, "level %" PRIi8, pItem->level); + blockDebugShowDataBlocks(pResList, flag); +#endif + for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { + output = taosArrayGetP(pResList, i); + if(output->info.type == STREAM_CHECKPOINT) { + if (streamFlushed) *streamFlushed = 1; + continue; + } + smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), + output->info.id.uid, output->info.id.groupId, output->info.rows); + + STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); + SSubmitReq2 *pReq = NULL; + + // TODO: the schema update should be handled later(TD-17965) + if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { + code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { + code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + taosMemoryFree(pReq); + TSDB_CHECK_CODE(code, lino, _exit); + } + + smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, + SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); + + if (pReq) { + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + taosMemoryFree(pReq); + } + } + } +_exit: + if (code) { + smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 + ", ver:%" PRIi64, + SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, + output ? output->info.version : -1); + } else { + smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level); + } + taosArrayDestroy(pResList); + qCleanExecTaskBlockBuf(taskInfo); + return code; +} + int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { - int32_t code = 0; - int32_t lino = 0; - SSma *pSma = pRSmaStat->pSma; - SVnode *pVnode = pSma->pVnode; - SRSmaFS fs = {0}; + int32_t code = 0; + int32_t lino = 0; + int32_t nTaskInfo = 0; + SSma *pSma = pRSmaStat->pSma; + SVnode *pVnode = pSma->pVnode; + SSDataBlock *pDataBlock = &pRSmaStat->dataBlock; + SArray *pResList = NULL; + SRSmaFS fs = {0}; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; } void *infoHash = NULL; + // stream state: trigger checkpoint while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i]) { + code = qSetSMAInput(pRSmaInfo->taskInfo[i], pDataBlock, 1, STREAM_INPUT__CHECKPOINT); + TSDB_CHECK_CODE(code, lino, _exit); + pRSmaInfo->items[i].streamFlushed = 0; + ++nTaskInfo; + } + } + } + // stream state: process checkpoint response in async mode + int32_t nStreamFlushed = 0; + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { + int8_t streamFlushed = 0; + code = tdRSmaExecVerifyCheckPoint(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, + pRSmaInfo->suid, &pResList, &streamFlushed); + TSDB_CHECK_CODE(code, lino, _exit); + if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { + goto _checkpoint; + } + } + } + } + // stream state: build checkpoint in backend +_checkpoint: + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { continue; } @@ -1100,11 +1209,11 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; // adaption for API streamTaskBuildCheckpoint - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); - + // save checkpointId to vnode.json (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; @@ -1123,7 +1232,7 @@ _exit: terrno = code; return code; } -#endif + /** * @brief trigger to get rsma result in async mode * diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index b46ae9e1c0..c08a2d38f9 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -646,7 +646,7 @@ int32_t qExecTaskOpt(qTaskInfo_t tinfo, SArray* pResList, uint64_t* useconds, bo blockIndex += 1; current += p->info.rows; - ASSERT(p->info.rows > 0); + ASSERT(p->info.rows > 0 || p->info.type == STREAM_CHECKPOINT); taosArrayPush(pResList, &p); if (current >= rowsThreshold) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index efbc978323..b7071d3f52 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2331,11 +2331,12 @@ FETCH_NEXT_BLOCK: return NULL; } - int32_t current = pInfo->validBlockIndex++; - qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id); + int32_t current = pInfo->validBlockIndex++; + qDebug("process %d/%d input data blocks, %s", current, (int32_t)total, id); SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); - SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); + // SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); + SSDataBlock* pBlock = pData->pDataBlock; if (pBlock->info.type == STREAM_CHECKPOINT) { streamScanOperatorSaveCheckpoint(pInfo); From bacf771ada1544cc109bd7657a086a77815fb27e Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 08:55:01 +0800 Subject: [PATCH 07/32] chore: test case for rsma persist --- tests/script/tsim/sma/rsmaPersistenceRecovery.sim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index c70f2dc20a..6f78829db7 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect #todo wait for streamState checkpoint -#return 1 +return 1 print =============== create database with retentions sql create database d0 retentions -:7d,5m:21d,15m:365d; From 9649e87cabf416c2a151178607116351886e65ae Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 09:44:43 +0800 Subject: [PATCH 08/32] fix: rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/sma/smaCommit.c | 2 -- source/dnode/vnode/src/sma/smaEnv.c | 8 +++++++- source/dnode/vnode/src/sma/smaRollup.c | 3 +-- source/libs/executor/src/executor.c | 17 +++-------------- source/libs/executor/src/scanoperator.c | 3 +-- 6 files changed, 13 insertions(+), 22 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index a8807483f6..bce5e1b0b2 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -115,7 +115,7 @@ struct SRSmaStat { SRSmaFS fs; // for recovery/snapshot r/w SHashObj *infoHash; // key: suid, value: SRSmaInfo tsem_t notEmpty; // has items in queue buffer - SSDataBlock dataBlock; + SArray *blocks; // SArray }; struct SSmaStat { diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 5a6144b3fa..652aab3c01 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -178,8 +178,6 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; - - code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 94ecb46473..d47398bdff 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -209,7 +209,12 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS pRSmaStat->pSma = (SSma *)pSma; atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT); tsem_init(&pRSmaStat->notEmpty, 0, 0); - pRSmaStat->dataBlock.info.type = STREAM_CHECKPOINT; + if (!(pRSmaStat->blocks = taosArrayInit(1, sizeof(SSDataBlock)))) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + SSDataBlock datablock = {.info.type = STREAM_CHECKPOINT}; + taosArrayPush(pRSmaStat->blocks, &datablock); // init smaMgmt smaInit(); @@ -291,6 +296,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { // step 5: free pStat tsem_destroy(&(pStat->notEmpty)); + taosArrayDestroy(pStat->blocks); taosMemoryFreeClear(pStat); } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 88bdc2df1d..8e00297564 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1152,7 +1152,6 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t nTaskInfo = 0; SSma *pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; - SSDataBlock *pDataBlock = &pRSmaStat->dataBlock; SArray *pResList = NULL; SRSmaFS fs = {0}; @@ -1169,7 +1168,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - code = qSetSMAInput(pRSmaInfo->taskInfo[i], pDataBlock, 1, STREAM_INPUT__CHECKPOINT); + code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT); TSDB_CHECK_CODE(code, lino, _exit); pRSmaInfo->items[i].streamFlushed = 0; ++nTaskInfo; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index c08a2d38f9..8117ceb55c 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -58,35 +58,24 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf SStreamScanInfo* pInfo = pOperator->info; if (type == STREAM_INPUT__MERGED_SUBMIT) { - qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); for (int32_t i = 0; i < numOfBlocks; i++) { SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData)); taosArrayPush(pInfo->pBlockLists, pReq); } pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_SUBMIT) { - qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); taosArrayPush(pInfo->pBlockLists, &input); pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); - SPackedData tmp = { - .pDataBlock = pDataBlock, - }; + SPackedData tmp = {.pDataBlock = pDataBlock}; taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; } else if (type == STREAM_INPUT__CHECKPOINT) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); - SPackedData tmp = { - .pDataBlock = pDataBlock, - }; - taosArrayPush(pInfo->pBlockLists, &tmp); - } + SPackedData tmp = {.pDataBlock = input}; + taosArrayPush(pInfo->pBlockLists, &tmp); pInfo->blockType = STREAM_INPUT__CHECKPOINT; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b7071d3f52..247dde7fc3 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2335,8 +2335,7 @@ FETCH_NEXT_BLOCK: qDebug("process %d/%d input data blocks, %s", current, (int32_t)total, id); SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); - // SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); - SSDataBlock* pBlock = pData->pDataBlock; + SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); if (pBlock->info.type == STREAM_CHECKPOINT) { streamScanOperatorSaveCheckpoint(pInfo); From 76536e1c8290e8070103c29741cdf3a595b3114a Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 11:03:18 +0800 Subject: [PATCH 09/32] enh: rsma logic --- source/dnode/vnode/src/sma/smaRollup.c | 155 ++++++++----------------- source/libs/executor/src/executil.c | 10 ++ 2 files changed, 58 insertions(+), 107 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8e00297564..d2694c860d 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -44,7 +44,7 @@ static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid); + int64_t suid, SArray **ppResList, int8_t *streamFlushed); static void tdRSmaFetchTrigger(void *param, void *tmrId); static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); @@ -591,17 +591,25 @@ _end: } static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid) { + int64_t suid, SArray **ppResList, int8_t *streamFlushed) { int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; + SArray *pResList = NULL; - SArray *pResList = taosArrayInit(1, POINTER_BYTES); - if (pResList == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); + if (!(*ppResList)) { + pResList = taosArrayInit(1, POINTER_BYTES); + if (pResList == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + *ppResList = pResList; + } else { + pResList = *ppResList; } + taosArrayClear(pResList); + while (1) { uint64_t ts; bool hasMore = false; @@ -622,6 +630,10 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma #endif for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { output = taosArrayGetP(pResList, i); + if(output->info.type == STREAM_CHECKPOINT) { + if (streamFlushed) *streamFlushed = 1; + continue; + } smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), output->info.id.uid, output->info.id.groupId, output->info.rows); @@ -659,7 +671,6 @@ _exit: } else { smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level); } - taosArrayDestroy(pResList); qCleanExecTaskBlockBuf(taskInfo); return code; } @@ -756,6 +767,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, ERsmaExecType type, int8_t level) { int32_t idx = level - 1; void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); + SArray *pResList = NULL; if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, @@ -784,8 +796,9 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, } SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid); + tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL); + taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; } @@ -1062,90 +1075,6 @@ _err: return code; } -static int32_t tdRSmaExecVerifyCheckPoint(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, SArray **ppResList, int8_t *streamFlushed) { - int32_t code = 0; - int32_t lino = 0; - SSDataBlock *output = NULL; - SArray *pResList = NULL; - - if (!(*ppResList)) { - pResList = taosArrayInit(1, POINTER_BYTES); - if (pResList == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - *ppResList = pResList; - } else { - pResList = *ppResList; - } - - while (1) { - uint64_t ts; - bool hasMore = false; - code = qExecTaskOpt(taskInfo, pResList, &ts, &hasMore, NULL); - if (code == TSDB_CODE_QRY_IN_EXEC) { - code = 0; - break; - } - TSDB_CHECK_CODE(code, lino, _exit); - - if (taosArrayGetSize(pResList) == 0) { - break; - } -#if 0 - char flag[10] = {0}; - snprintf(flag, 10, "level %" PRIi8, pItem->level); - blockDebugShowDataBlocks(pResList, flag); -#endif - for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { - output = taosArrayGetP(pResList, i); - if(output->info.type == STREAM_CHECKPOINT) { - if (streamFlushed) *streamFlushed = 1; - continue; - } - smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), - output->info.id.uid, output->info.id.groupId, output->info.rows); - - STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); - SSubmitReq2 *pReq = NULL; - - // TODO: the schema update should be handled later(TD-17965) - if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { - code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { - code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; - tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); - taosMemoryFree(pReq); - TSDB_CHECK_CODE(code, lino, _exit); - } - - smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, - SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); - - if (pReq) { - tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); - taosMemoryFree(pReq); - } - } - } -_exit: - if (code) { - smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 - ", ver:%" PRIi64, - SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, - output ? output->info.version : -1); - } else { - smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level); - } - taosArrayDestroy(pResList); - qCleanExecTaskBlockBuf(taskInfo); - return code; -} - int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t code = 0; int32_t lino = 0; @@ -1177,22 +1106,31 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } // stream state: process checkpoint response in async mode int32_t nStreamFlushed = 0; - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - continue; - } - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { - int8_t streamFlushed = 0; - code = tdRSmaExecVerifyCheckPoint(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, - pRSmaInfo->suid, &pResList, &streamFlushed); - TSDB_CHECK_CODE(code, lino, _exit); - if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - goto _checkpoint; + int32_t nMSleep = 0; + while (true) { + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { + int8_t streamFlushed = 0; + code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, + pRSmaInfo->suid, &pResList, &streamFlushed); + TSDB_CHECK_CODE(code, lino, _exit); + if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { + smaInfo("%s:%d checkpoint ready, %d ms consumed, received/total: %d/%d", __func__, __LINE__, nMSleep, + nStreamFlushed, nTaskInfo); + goto _checkpoint; + } } } } + taosMsleep(1); + ++nMSleep; + smaInfo("%s:%d wait for checkpoint ready, %d ms elapsed, received/total: %d/%d", __func__, __LINE__, nMSleep, + nStreamFlushed, nTaskInfo); } // stream state: build checkpoint in backend @@ -1207,7 +1145,6 @@ _checkpoint: SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; - // adaption for API streamTaskBuildCheckpoint atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); @@ -1224,6 +1161,7 @@ _checkpoint: } _exit: + taosArrayDestroy(pResList); if (code) { smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); } @@ -1355,6 +1293,7 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { */ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; + SArray *pResList = NULL; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); if (pItem->fetchLevel) { @@ -1385,7 +1324,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid) < 0) { + if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL) < 0) { goto _err; } @@ -1399,8 +1338,10 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } _end: + taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; _err: + taosArrayDestroy(pResList); return TSDB_CODE_FAILED; } diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 753d3e680c..a1bd5a7483 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1967,9 +1967,19 @@ int32_t tableListAddTableInfo(STableListInfo* pTableList, uint64_t uid, uint64_t pTableList->map = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); } + for(int32_t i=0; i< taosArrayGetSize(pTableList->pTableList); ++i) { + STableKeyInfo* pKeyInfo = taosArrayGet(pTableList->pTableList, i); + if(pKeyInfo->uid == uid) { + assert(0); + } + } + STableKeyInfo keyInfo = {.uid = uid, .groupId = gid}; taosArrayPush(pTableList->pTableList, &keyInfo); + if(taosHashGet(pTableList->map, &uid, sizeof(uid))) { + assert(0); + } int32_t slot = (int32_t)taosArrayGetSize(pTableList->pTableList) - 1; taosHashPut(pTableList->map, &uid, sizeof(uid), &slot, sizeof(slot)); From 698fb804f9e0b377fc7175a1099838571fa88307 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 12:54:23 +0800 Subject: [PATCH 10/32] enh: disable update tbUidList during reboot --- source/dnode/vnode/src/sma/smaRollup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index d2694c860d..89db465b2e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1012,7 +1012,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } - +#if 0 // reload all ctbUids for suid uidStore.suid = suid; if (vnodeGetCtbIdList(pVnode, suid, uidStore.tbUids) < 0) { @@ -1026,7 +1026,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { } taosArrayClear(uidStore.tbUids); - +#endif smaDebug("vgId:%d, rsma restore env success for %" PRIi64, TD_VID(pVnode), suid); } } From 96b50243473c87f4adcaab071a58eead93b0aa24 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 13:05:36 +0800 Subject: [PATCH 11/32] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 1 + tests/script/tsim/sma/rsmaPersistenceRecovery.sim | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 89db465b2e..4619086ccd 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1147,6 +1147,7 @@ _checkpoint: SStreamTask *pTask = pItem->pStreamTask; atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); + pTask->chkInfo.checkpointId = pTask->checkpointingId; code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index 6f78829db7..c70f2dc20a 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect #todo wait for streamState checkpoint -return 1 +#return 1 print =============== create database with retentions sql create database d0 retentions -:7d,5m:21d,15m:365d; From 722777f8c963ab4a89b95c937348b9713d742e9e Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 13:15:31 +0800 Subject: [PATCH 12/32] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 4619086ccd..986d0da677 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1106,7 +1106,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } // stream state: process checkpoint response in async mode int32_t nStreamFlushed = 0; - int32_t nMSleep = 0; + int32_t nSleep = 0; while (true) { while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; @@ -1120,16 +1120,16 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { pRSmaInfo->suid, &pResList, &streamFlushed); TSDB_CHECK_CODE(code, lino, _exit); if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("%s:%d checkpoint ready, %d ms consumed, received/total: %d/%d", __func__, __LINE__, nMSleep, + smaInfo("%s:%d checkpoint ready, %d us consumed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, nStreamFlushed, nTaskInfo); goto _checkpoint; } } } } - taosMsleep(1); - ++nMSleep; - smaInfo("%s:%d wait for checkpoint ready, %d ms elapsed, received/total: %d/%d", __func__, __LINE__, nMSleep, + taosUsleep(10); + ++nSleep; + smaInfo("%s:%d wait for checkpoint ready, %d us elapsed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, nStreamFlushed, nTaskInfo); } From 2d597659bc2e4eaa55e4ba6a8bd3f7aaaff61af3 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 15:20:54 +0800 Subject: [PATCH 13/32] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 69 ++++++++++++------------ source/dnode/vnode/src/vnd/vnodeCommit.c | 4 +- source/libs/executor/src/executil.c | 10 ---- source/libs/executor/src/scanoperator.c | 4 +- 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 986d0da677..f35eec786c 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -630,7 +630,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma #endif for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { output = taosArrayGetP(pResList, i); - if(output->info.type == STREAM_CHECKPOINT) { + if (output->info.type == STREAM_CHECKPOINT) { if (streamFlushed) *streamFlushed = 1; continue; } @@ -1076,18 +1076,17 @@ _err: } int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { - int32_t code = 0; - int32_t lino = 0; - int32_t nTaskInfo = 0; - SSma *pSma = pRSmaStat->pSma; - SVnode *pVnode = pSma->pVnode; - SArray *pResList = NULL; - SRSmaFS fs = {0}; + int32_t code = 0; + int32_t lino = 0; + int32_t nTaskInfo = 0; + SSma *pSma = pRSmaStat->pSma; + SVnode *pVnode = pSma->pVnode; + SArray *pResList = NULL; + SRSmaFS fs = {0}; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; } - void *infoHash = NULL; // stream state: trigger checkpoint while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { @@ -1120,7 +1119,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { pRSmaInfo->suid, &pResList, &streamFlushed); TSDB_CHECK_CODE(code, lino, _exit); if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("%s:%d checkpoint ready, %d us consumed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, + smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, nStreamFlushed, nTaskInfo); goto _checkpoint; } @@ -1129,38 +1128,40 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } taosUsleep(10); ++nSleep; - smaInfo("%s:%d wait for checkpoint ready, %d us elapsed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, - nStreamFlushed, nTaskInfo); + smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, + nStreamFlushed, nTaskInfo); } - // stream state: build checkpoint in backend + _checkpoint: - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - continue; - } + do { + void *infHash = NULL; + while ((infHash = taosHashIterate(pInfoHash, infHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); - if (pItem && pItem->pStreamTask) { - SStreamTask *pTask = pItem->pStreamTask; - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->checkpointingId = taosGetTimestampNs(); - pTask->chkInfo.checkpointId = pTask->checkpointingId; - code = streamTaskBuildCheckpoint(pTask); - TSDB_CHECK_CODE(code, lino, _exit); + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); + if (pItem && pItem->pStreamTask) { + SStreamTask *pTask = pItem->pStreamTask; + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); + pTask->checkpointingId = taosGetTimestampNs(); + pTask->chkInfo.checkpointId = pTask->checkpointingId; + code = streamTaskBuildCheckpoint(pTask); + TSDB_CHECK_CODE(code, lino, _exit); - // save checkpointId to vnode.json - (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; + // save checkpointId to vnode.json + (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; - smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 - ", level:%d, checkpointId:%" PRIi64, - TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); + smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 + ", level:%d, checkpointId:%" PRIi64, + TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); + } } } - } - + } while (0); _exit: taosArrayDestroy(pResList); if (code) { diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index ca4335f391..9e0106dff4 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -290,8 +290,8 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { tsem_wait(&pVnode->canCommit); if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; - - code = smaPrepareAsyncCommit(pVnode->pSma); + + code = smaPrepareAsyncCommit(pVnode->pSma); // prepare checkpointId and save to vnode.json if (code) goto _exit; pVnode->state.commitTerm = pVnode->state.applyTerm; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index a1bd5a7483..753d3e680c 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1967,19 +1967,9 @@ int32_t tableListAddTableInfo(STableListInfo* pTableList, uint64_t uid, uint64_t pTableList->map = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); } - for(int32_t i=0; i< taosArrayGetSize(pTableList->pTableList); ++i) { - STableKeyInfo* pKeyInfo = taosArrayGet(pTableList->pTableList, i); - if(pKeyInfo->uid == uid) { - assert(0); - } - } - STableKeyInfo keyInfo = {.uid = uid, .groupId = gid}; taosArrayPush(pTableList->pTableList, &keyInfo); - if(taosHashGet(pTableList->map, &uid, sizeof(uid))) { - assert(0); - } int32_t slot = (int32_t)taosArrayGetSize(pTableList->pTableList) - 1; taosHashPut(pTableList->map, &uid, sizeof(uid), &slot, sizeof(slot)); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 247dde7fc3..efbc978323 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2331,8 +2331,8 @@ FETCH_NEXT_BLOCK: return NULL; } - int32_t current = pInfo->validBlockIndex++; - qDebug("process %d/%d input data blocks, %s", current, (int32_t)total, id); + int32_t current = pInfo->validBlockIndex++; + qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id); SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); From fa5d89678790c5bc229b55fa8a3fef1b280db7e7 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 18:56:36 +0800 Subject: [PATCH 14/32] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 151 +++++++++++++++---------- 1 file changed, 93 insertions(+), 58 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index f35eec786c..8882dada9a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -92,7 +92,9 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { streamStateClose(pItem->pStreamState, false); } - taosMemoryFreeClear(pItem->pStreamTask); + if (pItem->pStreamTask) { + tFreeStreamTask(pItem->pStreamTask); + } tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } @@ -173,8 +175,8 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { return TSDB_CODE_FAILED; } - void *pIter = taosHashIterate(pStore->uidHash, NULL); - while (pIter) { + void *pIter = NULL; + while ((pIter = taosHashIterate(pStore->uidHash, pIter))) { tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); SArray *pTbUids = *(SArray **)pIter; @@ -182,8 +184,6 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { taosHashCancelIterate(pStore->uidHash, pIter); return TSDB_CODE_FAILED; } - - pIter = taosHashIterate(pStore->uidHash, pIter); } return TSDB_CODE_SUCCESS; } @@ -234,11 +234,12 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx) { if ((param->qmsgLen > 0) && param->qmsg[idx]) { - SRetention *pRetention = SMA_RETENTION(pSma); - STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); - SVnode *pVnode = pSma->pVnode; - char taskInfDir[TSDB_FILENAME_LEN] = {0}; - void *pStreamState = NULL; + SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); + SRetention *pRetention = SMA_RETENTION(pSma); + STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); + SVnode *pVnode = pSma->pVnode; + char taskInfDir[TSDB_FILENAME_LEN] = {0}; + void *pStreamState = NULL; // set the backend of stream state tdRSmaQTaskInfoGetFullPath(pVnode, pRSmaInfo->suid, idx + 1, pVnode->pTfs, taskInfDir); @@ -258,32 +259,30 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat terrno = TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_FAILED; } + pItem->pStreamTask = pStreamTask; pStreamTask->id.taskId = 0; pStreamTask->id.streamId = pRSmaInfo->suid + idx; pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; + pStreamTask->exec.qmsg = taosMemoryMalloc(2); + sprintf(pStreamTask->exec.qmsg, "%d", idx); pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; - taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } + pItem->pStreamState = pStreamState; SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); - pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); if (!pRSmaInfo->taskInfo[idx]) { terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE; - taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } - SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot - pItem->pStreamState = pStreamState; - pItem->pStreamTask = pStreamTask; if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); @@ -509,11 +508,10 @@ static void tdUidStoreDestory(STbUidStore *pStore) { if (pStore->uidHash) { if (pStore->tbUids) { // When pStore->tbUids not NULL, the pStore->uidHash has k/v; otherwise pStore->uidHash only has keys. - void *pIter = taosHashIterate(pStore->uidHash, NULL); - while (pIter) { + void *pIter = NULL; + while ((pIter = taosHashIterate(pStore->uidHash, pIter))) { SArray *arr = *(SArray **)pIter; taosArrayDestroy(arr); - pIter = taosHashIterate(pStore->uidHash, pIter); } } taosHashCleanup(pStore->uidHash); @@ -1082,62 +1080,77 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { SSma *pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; SArray *pResList = NULL; - SRSmaFS fs = {0}; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; } - void *infoHash = NULL; + // stream state: trigger checkpoint - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - continue; - } - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (pRSmaInfo->taskInfo[i]) { - code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT); - TSDB_CHECK_CODE(code, lino, _exit); - pRSmaInfo->items[i].streamFlushed = 0; - ++nTaskInfo; - } - } - } - // stream state: process checkpoint response in async mode - int32_t nStreamFlushed = 0; - int32_t nSleep = 0; - while (true) { + do { + void *infoHash = NULL; while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { continue; } for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { - int8_t streamFlushed = 0; - code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, - pRSmaInfo->suid, &pResList, &streamFlushed); - TSDB_CHECK_CODE(code, lino, _exit); - if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); - goto _checkpoint; + if (pRSmaInfo->taskInfo[i]) { + code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); } + pRSmaInfo->items[i].streamFlushed = 0; + ++nTaskInfo; } } } - taosUsleep(10); - ++nSleep; - smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); - } - // stream state: build checkpoint in backend + } while (0); + + // stream state: wait checkpoint ready in async mode + do { + int32_t nStreamFlushed = 0; + int32_t nSleep = 0; + void *infoHash = NULL; + while (true) { + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { + int8_t streamFlushed = 0; + code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, + pRSmaInfo->suid, &pResList, &streamFlushed); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { + smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, + nStreamFlushed, nTaskInfo); + taosHashCancelIterate(pInfoHash, infoHash); + goto _checkpoint; + } + } + } + } + taosUsleep(10); + ++nSleep; + smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, + nStreamFlushed, nTaskInfo); + } + } while (0); _checkpoint: + // stream state: build checkpoint in backend do { - void *infHash = NULL; - while ((infHash = taosHashIterate(pInfoHash, infHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infHash; + void *infoHash = NULL; + + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { continue; } @@ -1150,7 +1163,26 @@ _checkpoint: pTask->checkpointingId = taosGetTimestampNs(); pTask->chkInfo.checkpointId = pTask->checkpointingId; code = streamTaskBuildCheckpoint(pTask); - TSDB_CHECK_CODE(code, lino, _exit); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + + taosWLockLatch(&pTask->pMeta->lock); + if (streamMetaSaveTask(pTask->pMeta, pTask) != 0) { + taosWUnLockLatch(&pTask->pMeta->lock); + code = TSDB_CODE_OUT_OF_MEMORY; + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (streamMetaCommit(pTask->pMeta) != 0) { + taosWUnLockLatch(&pTask->pMeta->lock); + code = TSDB_CODE_OUT_OF_MEMORY; + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosWUnLockLatch(&pTask->pMeta->lock); // save checkpointId to vnode.json (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; @@ -1158,6 +1190,8 @@ _checkpoint: smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 ", level:%d, checkpointId:%" PRIi64, TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); + + } } } @@ -1452,6 +1486,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { if (ASSERTS(oldVal >= 0, "oldVal of nFetchAll: %d < 0", oldVal)) { code = TSDB_CODE_APP_ERROR; + taosHashCancelIterate(infoHash, pIter); TSDB_CHECK_CODE(code, lino, _exit); } From 6c944bb1927fa157469e640c5a9cd018e7ebb5d7 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 20:32:54 +0800 Subject: [PATCH 15/32] enh: skip rsma_task during load stream tasks --- source/dnode/vnode/src/sma/smaRollup.c | 5 +++-- source/libs/stream/src/streamMeta.c | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8882dada9a..cd93dda4fb 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -22,6 +22,7 @@ #define RSMA_FETCH_DELAY_MAX (120000) // ms #define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms +#define RSMA_TASK_FLAG "rsma_task" #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -264,8 +265,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->id.streamId = pRSmaInfo->suid + idx; pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; - pStreamTask->exec.qmsg = taosMemoryMalloc(2); - sprintf(pStreamTask->exec.qmsg, "%d", idx); + pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); + sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index f788e244cd..7f023f2451 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -687,7 +687,10 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { } tDecoderClear(&decoder); - if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { + if (0 == strcmp(pTask->exec.qmsg, "rsma_task")) { + tFreeStreamTask(pTask); + continue; + } else if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); From a48968e5e8f61c630e7bf4f41a204b51110c8fa8 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 3 Nov 2023 14:33:45 +0800 Subject: [PATCH 16/32] enh: rsma checkpoint --- include/common/tmsg.h | 5 -- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 7 +-- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/inc/sma.h | 12 ++-- source/dnode/vnode/src/sma/smaCommit.c | 7 ++- source/dnode/vnode/src/sma/smaOpen.c | 18 +++--- source/dnode/vnode/src/sma/smaRollup.c | 67 +++++++++++++-------- source/dnode/vnode/src/tsdb/tsdbRead2.c | 6 +- source/dnode/vnode/src/vnd/vnodeCfg.c | 29 ++++----- source/libs/stream/src/streamMeta.c | 5 +- 10 files changed, 80 insertions(+), 78 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 57906c1695..4ef4273631 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -445,11 +445,6 @@ typedef struct SRetention { int8_t keepUnit; } SRetention; -typedef struct SRetentionEx { - SRetention rtn; - int64_t checkpointId; -} SRetentionEx; - #define RETENTION_VALID(l, r) ((((l) == 0 && (r)->freq >= 0) || ((r)->freq > 0)) && ((r)->keep > 0)) #pragma pack(push, 1) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 5a4b341662..c4d525a871 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -134,11 +134,10 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { pCfg->tsdbCfg.minRows = pCreate->minRows; pCfg->tsdbCfg.maxRows = pCreate->maxRows; for (size_t i = 0; i < taosArrayGetSize(pCreate->pRetensions); ++i) { - SRetentionEx *pRetention = &pCfg->tsdbCfg.retentions[i]; - memcpy(&pRetention->rtn, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); - pRetention->checkpointId = -1; + SRetention *pRetention = &pCfg->tsdbCfg.retentions[i]; + memcpy(pRetention, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); if (i == 0) { - if ((pRetention->rtn.freq >= 0 && pRetention->rtn.keep > 0)) pCfg->isRsma = 1; + if ((pRetention->freq >= 0 && pRetention->keep > 0)) pCfg->isRsma = 1; } } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index e92fc04f6e..6a0c991be4 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -287,7 +287,7 @@ struct STsdbCfg { int32_t keep1; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keep2; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keepTimeOffset; // just for save config, use STsdbKeepCfg in STsdb instead - SRetentionEx retentions[TSDB_RETENTION_MAX]; + SRetention retentions[TSDB_RETENTION_MAX]; }; typedef struct { diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index bce5e1b0b2..63d6e7e5c2 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -155,7 +155,7 @@ struct SRSmaInfo { int64_t lastRecv; // ms int8_t assigned; // 0 idle, 1 assgined for exec int8_t delFlag; - int16_t padding; + int8_t padding; T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t @@ -163,12 +163,10 @@ struct SRSmaInfo { STaosQall *qall; // buffer qall of SubmitReq }; -#define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) -#define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) -#define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) -#define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) -#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i]) -#define RSMA_INFO_ITEM(r, i) (&(r)->items[i]) +#define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) +#define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) +#define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) +#define RSMA_INFO_ITEM(r, i) (&(r)->items[i]) enum { TASK_TRIGGER_STAT_INIT = 0, diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 652aab3c01..fad2e4d7e9 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -156,10 +156,10 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { nLoops = 0; while (1) { if (atomic_load_32(&pRSmaStat->nFetchAll) <= 0) { - smaDebug("vgId:%d, rsma commit:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit); + smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit); break; } else { - smaDebug("vgId:%d, rsma commit%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit); + smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit); } TD_SMA_LOOPS_CHECK(nLoops, 1000); } @@ -175,6 +175,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { while (atomic_load_64(&pRSmaStat->nBufItems) > 0) { TD_SMA_LOOPS_CHECK(nLoops, 1000); } + smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); if (!isCommit) goto _exit; @@ -183,7 +184,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + // all rsma results are written completely STsdb *pTsdb = NULL; diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index cea4ccb1b7..c0af670a17 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -16,13 +16,13 @@ #include "sma.h" #include "tsdb.h" -static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration); +static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration); static int32_t smaSetKeepCfg(SVnode *pVnode, STsdbKeepCfg *pKeepCfg, STsdbCfg *pCfg, int type); static int32_t rsmaRestore(SSma *pSma); #define SMA_SET_KEEP_CFG(v, l) \ do { \ - SRetention *r = &(pCfg->retentions[l].rtn); \ + SRetention *r = &(pCfg->retentions[l]); \ pKeepCfg->keep2 = convertTimeFromPrecisionToUnit(r->keep, pCfg->precision, TIME_UNIT_MINUTE); \ pKeepCfg->keep0 = pKeepCfg->keep2; \ pKeepCfg->keep1 = pKeepCfg->keep2; \ @@ -32,7 +32,7 @@ static int32_t rsmaRestore(SSma *pSma); #define SMA_OPEN_RSMA_IMPL(v, l, force) \ do { \ - SRetention *r = &(((SRetentionEx *)VND_RETENTIONS(v) + l)->rtn); \ + SRetention *r = (SRetention *)VND_RETENTIONS(v) + l; \ if (!RETENTION_VALID(l, r)) { \ if (l == 0) { \ code = TSDB_CODE_INVALID_PARA; \ @@ -59,9 +59,9 @@ static int32_t rsmaRestore(SSma *pSma); * @param duration * @return int32_t */ -static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration) { - int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.freq, precision, TIME_UNIT_MINUTE); - int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.keep, precision, TIME_UNIT_MINUTE); +static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration) { + int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->freq, precision, TIME_UNIT_MINUTE); + int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->keep, precision, TIME_UNIT_MINUTE); int32_t days = duration; // min if (days < freqDuration) { @@ -76,10 +76,10 @@ static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t goto _exit; } - freqDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.freq, precision, TIME_UNIT_MINUTE); - keepDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.keep, precision, TIME_UNIT_MINUTE); + freqDuration = convertTimeFromPrecisionToUnit((r + level)->freq, precision, TIME_UNIT_MINUTE); + keepDuration = convertTimeFromPrecisionToUnit((r + level)->keep, precision, TIME_UNIT_MINUTE); - int32_t nFreqTimes = (r + level)->rtn.freq / (60 * 1000); // use 60s for freq of 1st level + int32_t nFreqTimes = (r + level)->freq / (60 * 1000); // use 60s for freq of 1st level days *= (nFreqTimes > 1 ? nFreqTimes : 1); if (days < freqDuration) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index cd93dda4fb..8f81829dc2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -232,6 +232,29 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui return TSDB_CODE_SUCCESS; } +static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { + int64_t checkpointId = -1; + STaskId id = {.streamId = streamId, .taskId = taskId}; + taosRLockLatch(&pMeta->lock); + SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (ppTask && *ppTask) { + checkpointId = (*ppTask)->chkInfo.checkpointId; + } + taosRUnLockLatch(&pMeta->lock); + return checkpointId; +} + +static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { + streamMetaUnregisterTask(pMeta, streamId, taskId); + taosWLockLatch(&pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pMeta->lock); + smaDebug("vgId:%d rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); +} + static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx) { if ((param->qmsgLen > 0) && param->qmsg[idx]) { @@ -267,7 +290,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); - pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; + pStreamTask->chkInfo.checkpointId = + tdRSmaTaskGetCheckpointId(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -275,6 +299,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } pItem->pStreamState = pStreamState; + tdRSmaTaskRemove(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); + SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); @@ -1129,19 +1155,22 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { TSDB_CHECK_CODE(code, lino, _exit); } - if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); - taosHashCancelIterate(pInfoHash, infoHash); - goto _checkpoint; + if (streamFlushed) { + pRSmaInfo->items[i].streamFlushed = 1; + if (++nStreamFlushed >= nTaskInfo) { + smaInfo("vgId:%d rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), + nSleep * 10, nStreamFlushed, nTaskInfo); + taosHashCancelIterate(pInfoHash, infoHash); + goto _checkpoint; + } } } } } taosUsleep(10); ++nSleep; - smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); + smaDebug("vgId:%d, rsma commit, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), + nSleep * 10, nStreamFlushed, nTaskInfo); } } while (0); @@ -1149,7 +1178,6 @@ _checkpoint: // stream state: build checkpoint in backend do { void *infoHash = NULL; - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { @@ -1170,29 +1198,16 @@ _checkpoint: } taosWLockLatch(&pTask->pMeta->lock); - if (streamMetaSaveTask(pTask->pMeta, pTask) != 0) { + if (0 != streamMetaSaveTask(pTask->pMeta, pTask) || 0 != streamMetaCommit(pTask->pMeta)) { taosWUnLockLatch(&pTask->pMeta->lock); - code = TSDB_CODE_OUT_OF_MEMORY; - taosHashCancelIterate(pInfoHash, infoHash); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (streamMetaCommit(pTask->pMeta) != 0) { - taosWUnLockLatch(&pTask->pMeta->lock); - code = TSDB_CODE_OUT_OF_MEMORY; + code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } taosWUnLockLatch(&pTask->pMeta->lock); - // save checkpointId to vnode.json - (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; - - smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 - ", level:%d, checkpointId:%" PRIi64, - TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); - - + smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint/task:%" PRIi64 "/%p, table:%" PRIi64 ", level:%d", + TD_VID(pVnode), pTask->checkpointingId, pTask, pRSmaInfo->suid, i + 1); } } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index be88a5a435..d1919d95ba 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -49,7 +49,7 @@ static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo STsdbReader* pReader); static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idstr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); @@ -3140,7 +3140,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } } -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idStr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, int8_t* pLevel) { if (VND_IS_RSMA(pVnode)) { int8_t level = 0; @@ -3151,7 +3151,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* r : 1000000L); for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - SRetention* pRetention = &((retentions + level)->rtn); + SRetention* pRetention = retentions + level; if (pRetention->keep <= 0) { if (level > 0) { --level; diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index d429eb2a94..07bfa6c719 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -106,24 +106,23 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { if (tjsonAddIntegerToObject(pJson, "keep1", pCfg->tsdbCfg.keep1) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keep2", pCfg->tsdbCfg.keep2) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keepTimeOffset", pCfg->tsdbCfg.keepTimeOffset) < 0) return -1; - if (pCfg->tsdbCfg.retentions[0].rtn.keep > 0) { + if (pCfg->tsdbCfg.retentions[0].keep > 0) { int32_t nRetention = 1; - if (pCfg->tsdbCfg.retentions[1].rtn.freq > 0) { + if (pCfg->tsdbCfg.retentions[1].freq > 0) { ++nRetention; - if (pCfg->tsdbCfg.retentions[2].rtn.freq > 0) { + if (pCfg->tsdbCfg.retentions[2].freq > 0) { ++nRetention; } } SJson *pNodeRetentions = tjsonCreateArray(); tjsonAddItemToObject(pJson, "retentions", pNodeRetentions); for (int32_t i = 0; i < nRetention; ++i) { - SJson *pNodeRetention = tjsonCreateObject(); - const SRetentionEx *pRetention = pCfg->tsdbCfg.retentions + i; - tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->rtn.freq); - tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit); - tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->rtn.keep); - tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit); - tjsonAddIntegerToObject(pNodeRetention, "checkpointId", pRetention->checkpointId); + SJson *pNodeRetention = tjsonCreateObject(); + const SRetention *pRetention = pCfg->tsdbCfg.retentions + i; + tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->freq); + tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->freqUnit); + tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->keep); + tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->keepUnit); tjsonAddItemToArray(pNodeRetentions, pNodeRetention); } } @@ -232,12 +231,10 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { for (int32_t i = 0; i < nRetention; ++i) { SJson *pNodeRetention = tjsonGetArrayItem(pNodeRetentions, i); ASSERT(pNodeRetention != NULL); - SRetentionEx *pRetention = &(pCfg->tsdbCfg.retentions[i]); - tjsonGetNumberValue(pNodeRetention, "freq", pRetention->rtn.freq, code); - tjsonGetNumberValue(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit, code); - tjsonGetNumberValue(pNodeRetention, "keep", pRetention->rtn.keep, code); - tjsonGetNumberValue(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit, code); - tjsonGetNumberValue(pNodeRetention, "checkpointId", pRetention->checkpointId, code); + tjsonGetNumberValue(pNodeRetention, "freq", (pCfg->tsdbCfg.retentions)[i].freq, code); + tjsonGetNumberValue(pNodeRetention, "freqUnit", (pCfg->tsdbCfg.retentions)[i].freqUnit, code); + tjsonGetNumberValue(pNodeRetention, "keep", (pCfg->tsdbCfg.retentions)[i].keep, code); + tjsonGetNumberValue(pNodeRetention, "keepUnit", (pCfg->tsdbCfg.retentions)[i].keepUnit, code); } tjsonGetNumberValue(pJson, "wal.vgId", pCfg->walCfg.vgId, code); if (code < 0) return -1; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 7f023f2451..f788e244cd 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -687,10 +687,7 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { } tDecoderClear(&decoder); - if (0 == strcmp(pTask->exec.qmsg, "rsma_task")) { - tFreeStreamTask(pTask); - continue; - } else if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); From c95fc014a8de592834e238bb4bed396a97e3ad2e Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 3 Nov 2023 14:57:36 +0800 Subject: [PATCH 17/32] enh: rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/sma/smaOpen.c | 2 +- source/dnode/vnode/src/vnd/vnodeCommit.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 63d6e7e5c2..e87b356be7 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -155,7 +155,7 @@ struct SRSmaInfo { int64_t lastRecv; // ms int8_t assigned; // 0 idle, 1 assgined for exec int8_t delFlag; - int8_t padding; + int16_t padding; T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index c0af670a17..633e096314 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -22,7 +22,7 @@ static int32_t rsmaRestore(SSma *pSma); #define SMA_SET_KEEP_CFG(v, l) \ do { \ - SRetention *r = &(pCfg->retentions[l]); \ + SRetention *r = &pCfg->retentions[l]; \ pKeepCfg->keep2 = convertTimeFromPrecisionToUnit(r->keep, pCfg->precision, TIME_UNIT_MINUTE); \ pKeepCfg->keep0 = pKeepCfg->keep2; \ pKeepCfg->keep1 = pKeepCfg->keep2; \ diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 9e0106dff4..50ca2f5d03 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -291,9 +291,6 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; - code = smaPrepareAsyncCommit(pVnode->pSma); // prepare checkpointId and save to vnode.json - if (code) goto _exit; - pVnode->state.commitTerm = pVnode->state.applyTerm; pInfo->info.config = pVnode->config; @@ -316,6 +313,9 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { metaPrepareAsyncCommit(pVnode->pMeta); + code = smaPrepareAsyncCommit(pVnode->pSma); + if (code) goto _exit; + taosThreadMutexLock(&pVnode->mutex); ASSERT(pVnode->onCommit == NULL); pVnode->onCommit = pVnode->inUse; From 411151d671bc7335e7c75db4a5cd93ba169cd8df Mon Sep 17 00:00:00 2001 From: kailixu Date: Sat, 4 Nov 2023 08:44:26 +0800 Subject: [PATCH 18/32] fix: buffer overflow/buffer use after free/memory leak --- source/dnode/vnode/src/tsdb/tsdbRetention.c | 5 ++++- source/libs/executor/src/streamtimewindowoperator.c | 2 +- source/libs/stream/src/streamMeta.c | 2 +- source/libs/stream/src/streamSnapshot.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 0fc1e1b64b..86298db2c4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -387,6 +387,8 @@ _exit: return code; } +static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); } + static int32_t tsdbDoRetentionSync(void *arg) { int32_t code = 0; int32_t lino = 0; @@ -409,6 +411,7 @@ _exit: TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit); + tsdbFreeRtnArg(arg); return code; } @@ -438,7 +441,7 @@ _exit: return code; } -static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); } + int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { int32_t code = 0; diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 8bfa8e1a5d..839f3324a3 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -386,7 +386,6 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); cleanupAggSup(&pInfo->aggSup); - clearGroupResInfo(&pInfo->groupResInfo); // it should be empty. void* pIte = NULL; @@ -401,6 +400,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { blockDataDestroy(pInfo->pDelRes); pInfo->stateStore.streamFileStateDestroy(pInfo->pState->pFileState); taosMemoryFreeClear(pInfo->pState); + clearGroupResInfo(&pInfo->groupResInfo); nodesDestroyNode((SNode*)pInfo->pPhyNode); colDataDestroy(&pInfo->twAggSup.timeWindowData); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 76945f17a9..31f8647dd5 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -185,7 +185,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); - pMeta->chkpCap = 8; + pMeta->chkpCap = 2; taosInitRWLatch(&pMeta->chkpDirLock); pMeta->chkpId = streamGetLatestCheckpointId(pMeta); diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 3de5de9967..2fed21dfd5 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -194,7 +194,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk } } { - char* buf = taosMemoryCalloc(1, 512); + char* buf = taosMemoryCalloc(1, 1024); sprintf(buf, "[current: %s,", pFile->pCurrent); sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); From 7e8c123fcae335094445f15d79bc3cdd801f456a Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 6 Nov 2023 08:46:04 +0800 Subject: [PATCH 19/32] chore: rsma code optimization --- source/dnode/vnode/src/sma/smaRollup.c | 11 +++++++---- source/libs/executor/src/streamtimewindowoperator.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8f81829dc2..ada7c9d6b7 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -137,7 +137,9 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, return TSDB_CODE_FAILED; } - if (!taosArrayGetSize(tbUids)) { + int32_t nTables = taosArrayGetSize(tbUids); + + if (0 == nTables) { smaDebug("vgId:%d, no need to update tbUidList for suid:%" PRIi64 " since Empty tbUids", SMA_VID(pSma), *suid); return TSDB_CODE_SUCCESS; } @@ -158,8 +160,9 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, terrstr()); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p with suid:%" PRIi64 " uid:%" PRIi64 " level %d", - SMA_VID(pSma), pRSmaInfo->taskInfo[i], *suid, *(int64_t *)taosArrayGet(tbUids, 0), i); + smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p. suid:%" PRIi64 " uid:%" PRIi64 + "nTables:%d level %d", + SMA_VID(pSma), pRSmaInfo->taskInfo[i], *suid, *(int64_t *)TARRAY_GET_ELEM(tbUids, 0), nTables, i); } } @@ -252,7 +255,7 @@ static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskI // persist to disk } taosWUnLockLatch(&pMeta->lock); - smaDebug("vgId:%d rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); + smaDebug("vgId:%d, rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); } static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 839f3324a3..8bfa8e1a5d 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -386,6 +386,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); cleanupAggSup(&pInfo->aggSup); + clearGroupResInfo(&pInfo->groupResInfo); // it should be empty. void* pIte = NULL; @@ -400,7 +401,6 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { blockDataDestroy(pInfo->pDelRes); pInfo->stateStore.streamFileStateDestroy(pInfo->pState->pFileState); taosMemoryFreeClear(pInfo->pState); - clearGroupResInfo(&pInfo->groupResInfo); nodesDestroyNode((SNode*)pInfo->pPhyNode); colDataDestroy(&pInfo->twAggSup.timeWindowData); From dc5284a19cc073b6e6145bec23ae308910a91264 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 6 Nov 2023 17:27:36 +0800 Subject: [PATCH 20/32] chore: add debug info --- source/dnode/vnode/src/inc/sma.h | 6 ++--- source/dnode/vnode/src/sma/smaRollup.c | 29 ++++++++-------------- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 12 +++++++++ 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index e87b356be7..198c93a937 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -137,10 +137,10 @@ struct SSmaStat { #define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { - int8_t level : 4; - int8_t fetchLevel : 4; + int8_t level; + int8_t fetchLevel; int8_t triggerStat; - uint16_t nScanned; + uint32_t nScanned; int32_t streamFlushed : 1; int32_t maxDelay : 31; // ms tmr_h tmrId; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index ada7c9d6b7..3884b1df7a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -331,7 +331,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, open task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 + smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId, param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); @@ -1161,7 +1161,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { if (streamFlushed) { pRSmaInfo->items[i].streamFlushed = 1; if (++nStreamFlushed >= nTaskInfo) { - smaInfo("vgId:%d rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), + smaInfo("vgId:%d, rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, nStreamFlushed, nTaskInfo); taosHashCancelIterate(pInfoHash, infoHash); goto _checkpoint; @@ -1292,20 +1292,14 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } int8_t fetchTriggerStat = - atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_INACTIVE); + atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_ACTIVE); switch (fetchTriggerStat) { case TASK_TRIGGER_STAT_ACTIVE: { smaDebug("vgId:%d, rsma fetch task planned for level:%" PRIi8 " suid:%" PRIi64 " since stat is active", SMA_VID(pSma), pItem->level, pRSmaInfo->suid); // async process - pItem->fetchLevel = pItem->level; -#if 0 - // debugging codes - SRSmaInfo *qInfo = tdAcquireRSmaInfoBySuid(pSma, pRSmaInfo->suid); - SRSmaInfoItem *qItem = RSMA_INFO_ITEM(qInfo, pItem->level - 1); - make sure(qItem->level == pItem->level); - make sure(qItem->fetchLevel == pItem->fetchLevel); -#endif + atomic_store_8(&pItem->fetchLevel, 1); + if (atomic_load_8(&pRSmaInfo->assigned) == 0) { tsem_post(&(pStat->notEmpty)); } @@ -1351,13 +1345,14 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SArray *pResList = NULL; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); - if (pItem->fetchLevel) { - pItem->fetchLevel = 0; + + if (1 == atomic_val_compare_exchange_8(&pItem->fetchLevel, 1, 0)) { qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pInfo, i - 1); if (!taskInfo) { continue; } +#if 0 if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch executed", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); @@ -1375,6 +1370,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } pItem->nScanned = 0; +#endif if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; @@ -1509,12 +1505,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { TSDB_CHECK_CODE(code, lino, _exit); } - int8_t curStat = atomic_load_8(RSMA_COMMIT_STAT(pRSmaStat)); - if (curStat == 1) { - smaDebug("vgId:%d, fetch all not exec as commit stat is %" PRIi8, SMA_VID(pSma), curStat); - } else { - tdRSmaFetchAllResult(pSma, pInfo); - } + tdRSmaFetchAllResult(pSma, pInfo); if (0 == atomic_sub_fetch_32(&pRSmaStat->nFetchAll, 1)) { atomic_store_8(RSMA_COMMIT_STAT(pRSmaStat), 0); diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index cc77474e79..69b19f4bc5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -661,6 +661,9 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, if ((code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0))) goto _exit; pTbData->minKey = TMIN(pTbData->minKey, key.ts); lRow = tRow; + tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); // remain row ++tRow.iRow; @@ -680,6 +683,9 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; ++tRow.iRow; + tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); } } @@ -721,6 +727,9 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0); if (code) goto _exit; lRow = tRow; + tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); pTbData->minKey = TMIN(pTbData->minKey, key.ts); @@ -744,6 +753,9 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; iRow++; + tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); } } From f5d796a081bdcb73eb020cdb2cc0703112a5d8bd Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 6 Nov 2023 19:53:09 +0800 Subject: [PATCH 21/32] chore: print stream state --- source/dnode/vnode/src/sma/smaRollup.c | 7 ++++- source/libs/stream/src/tstreamFileState.c | 33 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 3884b1df7a..ac99dc9de3 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -678,7 +678,12 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); - TSDB_CHECK_CODE(code, lino, _exit); + smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 + ", ver:%" PRIi64, + SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, + output ? output->info.version : -1); + continue; + // TSDB_CHECK_CODE(code, lino, _exit); } smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index 584e81fafc..a597858e63 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -515,6 +515,17 @@ void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { taosEncodeFixedI64(&buff, *pKey); } +static void getDebugRowBuff(char* val, int32_t vlen, char* output) { + for (int32_t i = 0; i < vlen; ++i) { + if (*(val + i) == '\0') { + sprintf(output + i, "0"); + } else { + sprintf(output + i, "%c", *(val + i)); + } + } + output[vlen] = 0; +} + int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) { int32_t code = TSDB_CODE_SUCCESS; SListIter iter = {0}; @@ -530,6 +541,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, int32_t len = pFileState->rowSize + sizeof(uint64_t) + sizeof(int32_t) + 1; char* buf = taosMemoryCalloc(1, len); + char output[1024]; void* batch = streamStateCreateBatch(); while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { @@ -546,6 +558,15 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } void* pSKey = pFileState->stateBuffCreateStateKeyFn(pPos, ((SStreamState*)pFileState->pFileStore)->number); +#if 1 + SStateKey* pStateKey = pSKey; + char* pStateVal = pPos->pRowBuff; + int32_t pStateVLen = pFileState->rowSize; + assert(pStateVLen < 1024); + getDebugRowBuff(pStateVal, pStateVLen, output); + qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, + pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); +#endif code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, pSKey, pPos->pRowBuff, pFileState->rowSize, 0, buf); taosMemoryFreeClear(pSKey); @@ -691,6 +712,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { if (pCur == NULL) { return -1; } + char output[1024]; int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); while (code == TSDB_CODE_SUCCESS) { if (pFileState->curRowCount >= recoverNum) { @@ -710,6 +732,17 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { } ASSERT(vlen == pFileState->rowSize); memcpy(pNewPos->pRowBuff, pVal, vlen); + +#if 1 + SStateKey* pStateKey = pNewPos->pKey; + char* pStateVal = pVal; + int32_t pStateVLen = vlen; + assert(pStateVLen < 1024); + getDebugRowBuff(pStateVal, pStateVLen, output); + qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, + pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); +#endif + taosMemoryFreeClear(pVal); pNewPos->beFlushed = true; code = tSimpleHashPut(pFileState->rowStateBuff, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); From e972ab16fe566d6446e4d11c69dd6ecaaa468b09 Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Tue, 7 Nov 2023 14:11:08 +0800 Subject: [PATCH 22/32] recover flush mark --- source/libs/stream/src/tstreamFileState.c | 49 ++++++++++++++--------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index a597858e63..8a3e7ce892 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -27,6 +27,8 @@ #define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024) #define MIN_NUM_OF_ROW_BUFF 10240 +#define TASK_KEY "streamFileState" + struct SStreamFileState { SList* usedBuffs; SList* freeBuffs; @@ -113,6 +115,15 @@ void* sessionCreateStateKey(SRowBuffPos* pPos, int64_t num) { return pStateKey; } +static void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); } + +static void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { + *pLen = sizeof(TSKEY); + (*pVal) = taosMemoryCalloc(1, *pLen); + void* buff = *pVal; + taosEncodeFixedI64(&buff, *pKey); +} + SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, int64_t checkpointId, int8_t type) { @@ -181,6 +192,17 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ recoverSesssion(pFileState, checkpointId); } + char keyBuf[128] = {0}; + void* valBuf = NULL; + int32_t len = 0; + sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); + int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, keyBuf, &valBuf, &len); + if (code == TSDB_CODE_SUCCESS) { + ASSERT(len == sizeof(TSKEY)); + streamFileStateDecode(&pFileState->flushMark, valBuf, len); + qDebug("===stream===flushMark read:%" PRId64 ",checkpointid:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); + } + return pFileState; _error: @@ -506,15 +528,6 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) { return pFileState->usedBuffs; } -void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); } - -void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { - *pLen = sizeof(TSKEY); - (*pVal) = taosMemoryCalloc(1, *pLen); - void* buff = *pVal; - taosEncodeFixedI64(&buff, *pKey); -} - static void getDebugRowBuff(char* val, int32_t vlen, char* output) { for (int32_t i = 0; i < vlen; ++i) { if (*(val + i) == '\0') { @@ -550,6 +563,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, continue; } pPos->beFlushed = true; + pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey)); qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey)); if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) { @@ -586,13 +600,13 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, pFileState->id, numOfElems, BATCH_LIMIT, elapsed); if (flushState) { - const char* taskKey = "streamFileState"; { char keyBuf[128] = {0}; void* valBuf = NULL; int32_t len = 0; - sprintf(keyBuf, "%s:%" PRId64 "", taskKey, ((SStreamState*)pFileState->pFileStore)->checkPointId); + sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); streamFileStateEncode(&pFileState->flushMark, &valBuf, &len); + qDebug("===stream===flushMark write:%" PRId64 ",checkpoint id:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); taosMemoryFree(valBuf); } @@ -600,7 +614,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, char keyBuf[128] = {0}; char valBuf[64] = {0}; int32_t len = 0; - memcpy(keyBuf, taskKey, strlen(taskKey)); + memcpy(keyBuf, TASK_KEY, strlen(TASK_KEY)); len = sprintf(valBuf, "%" PRId64 "", ((SStreamState*)pFileState->pFileStore)->checkPointId); code = streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); } @@ -612,26 +626,23 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } int32_t forceRemoveCheckpoint(SStreamFileState* pFileState, int64_t checkpointId) { - const char* taskKey = "streamFileState"; char keyBuf[128] = {0}; - sprintf(keyBuf, "%s:%" PRId64 "", taskKey, checkpointId); + sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, checkpointId); return streamDefaultDel_rocksdb(pFileState->pFileStore, keyBuf); } int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list) { - const char* taskKey = "streamFileState"; - return streamDefaultIterGet_rocksdb(pFileState->pFileStore, taskKey, NULL, list); + return streamDefaultIterGet_rocksdb(pFileState->pFileStore, TASK_KEY, NULL, list); } int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { int32_t code = TSDB_CODE_SUCCESS; - const char* taskKey = "streamFileState"; int64_t maxCheckPointId = 0; { char buf[128] = {0}; void* val = NULL; int32_t len = 0; - memcpy(buf, taskKey, strlen(taskKey)); + memcpy(buf, TASK_KEY, strlen(TASK_KEY)); code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len); if (code != 0 || len == 0 || val == NULL) { return TSDB_CODE_FAILED; @@ -645,7 +656,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { char buf[128] = {0}; void* val = 0; int32_t len = 0; - sprintf(buf, "%s:%" PRId64 "", taskKey, i); + sprintf(buf, "%s:%" PRId64 "", TASK_KEY, i); code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len); if (code != 0) { return TSDB_CODE_FAILED; From 467c27c7585d183f03447dbca23e80346921cb55 Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Tue, 7 Nov 2023 15:45:38 +0800 Subject: [PATCH 23/32] recover flush mark --- include/libs/function/function.h | 1 - source/libs/stream/src/streamState.c | 3 --- source/libs/stream/src/tstreamFileState.c | 31 +++++++---------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/include/libs/function/function.h b/include/libs/function/function.h index 2e3cd670d7..49435a6317 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -168,7 +168,6 @@ typedef struct { struct SStreamFileState *pFileState; int32_t number; SSHashObj *parNameMap; - int64_t checkPointId; int32_t taskId; int64_t streamId; int64_t streamBackendRid; diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index fb0090ec6d..6ca7bc5e7b 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -221,7 +221,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz } pState->pTdbState->pOwner = pTask; - pState->checkPointId = 0; return pState; @@ -274,7 +273,6 @@ int32_t streamStateCommit(SStreamState* pState) { SStreamSnapshot* pShot = getSnapshot(pState->pFileState); flushSnapshot(pState->pFileState, pShot, true); } - pState->checkPointId++; return 0; #else if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) { @@ -288,7 +286,6 @@ int32_t streamStateCommit(SStreamState* pState) { TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } - pState->checkPointId++; return 0; #endif } diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index 8a3e7ce892..0a3970adaa 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -28,6 +28,7 @@ #define MIN_NUM_OF_ROW_BUFF 10240 #define TASK_KEY "streamFileState" +#define STREAM_STATE_INFO_NAME "StreamStateCheckPoint" struct SStreamFileState { SList* usedBuffs; @@ -192,15 +193,13 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ recoverSesssion(pFileState, checkpointId); } - char keyBuf[128] = {0}; void* valBuf = NULL; int32_t len = 0; - sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); - int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, keyBuf, &valBuf, &len); + int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, STREAM_STATE_INFO_NAME, &valBuf, &len); if (code == TSDB_CODE_SUCCESS) { ASSERT(len == sizeof(TSKEY)); streamFileStateDecode(&pFileState->flushMark, valBuf, len); - qDebug("===stream===flushMark read:%" PRId64 ",checkpointid:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); + qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark); } return pFileState; @@ -600,24 +599,12 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, pFileState->id, numOfElems, BATCH_LIMIT, elapsed); if (flushState) { - { - char keyBuf[128] = {0}; - void* valBuf = NULL; - int32_t len = 0; - sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); - streamFileStateEncode(&pFileState->flushMark, &valBuf, &len); - qDebug("===stream===flushMark write:%" PRId64 ",checkpoint id:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); - streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); - taosMemoryFree(valBuf); - } - { - char keyBuf[128] = {0}; - char valBuf[64] = {0}; - int32_t len = 0; - memcpy(keyBuf, TASK_KEY, strlen(TASK_KEY)); - len = sprintf(valBuf, "%" PRId64 "", ((SStreamState*)pFileState->pFileStore)->checkPointId); - code = streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); - } + void* valBuf = NULL; + int32_t len = 0; + streamFileStateEncode(&pFileState->flushMark, &valBuf, &len); + qDebug("===stream===flushMark write:%" PRId64, pFileState->flushMark); + streamStatePutBatch(pFileState->pFileStore, "default", batch, STREAM_STATE_INFO_NAME, valBuf, len, 0); + taosMemoryFree(valBuf); streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); } From 6b738884a4be908a9dc95783776360ef311a4753 Mon Sep 17 00:00:00 2001 From: kailixu Date: Tue, 7 Nov 2023 19:59:05 +0800 Subject: [PATCH 24/32] chore: rsma checkpoint --- include/common/tcommon.h | 1 + include/libs/executor/executor.h | 1 + include/libs/stream/tstream.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 13 ++++++--- source/dnode/vnode/src/sma/smaUtil.c | 2 +- source/dnode/vnode/src/tsdb/tsdbRead2.c | 14 +++++----- source/libs/executor/inc/executil.h | 2 +- source/libs/executor/src/executil.c | 3 ++- source/libs/executor/src/scanoperator.c | 4 +-- source/libs/stream/src/streamMeta.c | 5 ++-- source/libs/stream/src/tstreamFileState.c | 33 ----------------------- 11 files changed, 27 insertions(+), 52 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 72aab9adf0..e072eaa831 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -249,6 +249,7 @@ typedef struct SQueryTableDataCond { SColumnInfo* colList; int32_t* pSlotList; // the column output destation slot, and it may be null int32_t type; // data block load type: + bool skipRollup; STimeWindow twindows; int64_t startVersion; int64_t endVersion; diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 5990ae1c9c..6005c13455 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -49,6 +49,7 @@ typedef struct { uint64_t checkpointId; bool initTableReader; bool initTqReader; + bool skipRollup; int32_t numOfVgroups; void* sContext; // SSnapContext* void* pStateBackend; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 173c68a818..2e9eb884a0 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -801,6 +801,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); int32_t streamMetaReopen(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); +int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); void streamMetaStartHb(SStreamMeta* pMeta); void streamMetaInitForSnode(SStreamMeta* pMeta); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index ac99dc9de3..665610304c 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -235,6 +235,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui return TSDB_CODE_SUCCESS; } +#if 0 static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { int64_t checkpointId = -1; STaskId id = {.streamId = streamId, .taskId = taskId}; @@ -246,6 +247,7 @@ static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, i taosRUnLockLatch(&pMeta->lock); return checkpointId; } +#endif static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { streamMetaUnregisterTask(pMeta, streamId, taskId); @@ -293,8 +295,12 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); +#if 0 pStreamTask->chkInfo.checkpointId = tdRSmaTaskGetCheckpointId(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); +#else + pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); +#endif pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -304,7 +310,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat tdRSmaTaskRemove(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); - SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; + SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .skipRollup = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); if (!pRSmaInfo->taskInfo[idx]) { @@ -682,8 +688,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma ", ver:%" PRIi64, SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, output ? output->info.version : -1); - continue; - // TSDB_CHECK_CODE(code, lino, _exit); + TSDB_CHECK_CODE(code, lino, _exit); } smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, @@ -1297,7 +1302,7 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } int8_t fetchTriggerStat = - atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_ACTIVE); + atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_INACTIVE); switch (fetchTriggerStat) { case TASK_TRIGGER_STAT_ACTIVE: { smaDebug("vgId:%d, rsma fetch task planned for level:%" PRIi8 " suid:%" PRIi64 " since stat is active", diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index 479c57e65f..8c04306d0f 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -30,7 +30,7 @@ void tdRSmaGetDirName(SVnode *pVnode, STfs *pTfs, bool endWithSep, char *outputN offset = strlen(outputName); // rsma -#if 0 +#if 1 snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s", TD_DIRSEP, VNODE_RSMA_DIR, (endWithSep ? TD_DIRSEP : "")); #else diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index d1919d95ba..c56164ff9d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -48,9 +48,9 @@ static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScan static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, STsdbReader* pReader); -static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, - int8_t* pLevel); +static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); +static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idstr, + int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); static int32_t doBuildDataBlock(STsdbReader* pReader); @@ -384,7 +384,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void initReaderStatus(&pReader->status); - pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); + pReader->pTsdb = getTsdbByRetentions(pVnode, pCond, pVnode->config.tsdbCfg.retentions, idstr, &level); pReader->info.suid = pCond->suid; pReader->info.order = pCond->order; @@ -3140,9 +3140,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } } -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idStr, int8_t* pLevel) { - if (VND_IS_RSMA(pVnode)) { + if (VND_IS_RSMA(pVnode) && !pCond->skipRollup) { int8_t level = 0; int8_t precision = pVnode->config.tsdbCfg.precision; int64_t now = taosGetTimestamp(precision); @@ -3158,7 +3158,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret } break; } - if ((now - pRetention->keep) <= (winSKey + offset)) { + if ((now - pRetention->keep) <= (pCond->twindows.skey + offset)) { break; } ++level; diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 740ff7b0dc..6387b3d0d6 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -178,7 +178,7 @@ void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode); SColumn extractColumnFromColumnNode(SColumnNode* pColNode); -int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode); +int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle); void cleanupQueryTableDataCond(SQueryTableDataCond* pCond); int32_t convertFillType(int32_t mode); diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 753d3e680c..39b47504c6 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1713,7 +1713,7 @@ SColumn extractColumnFromColumnNode(SColumnNode* pColNode) { return c; } -int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode) { +int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle) { pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols); @@ -1732,6 +1732,7 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = -1; + pCond->skipRollup = readHandle->skipRollup; int32_t j = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index efbc978323..c47e14ad0d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1035,7 +1035,7 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, } initLimitInfo(pScanNode->node.pLimit, pScanNode->node.pSlimit, &pInfo->base.limitInfo); - code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode); + code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -3533,7 +3533,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN goto _error; } - code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode); + code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(pInfo->base.matchInfo.pList); goto _error; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 31f8647dd5..6202753a87 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -28,7 +28,6 @@ int32_t streamBackendId = 0; int32_t streamBackendCfWrapperId = 0; int32_t streamMetaId = 0; -static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta); static void metaHbToMnode(void* param, void* tmrId); static void streamMetaClear(SStreamMeta* pMeta); static int32_t streamMetaBegin(SStreamMeta* pMeta); @@ -188,7 +187,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->chkpCap = 2; taosInitRWLatch(&pMeta->chkpDirLock); - pMeta->chkpId = streamGetLatestCheckpointId(pMeta); + pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); while (pMeta->streamBackend == NULL) { taosMsleep(100); @@ -595,7 +594,7 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) { return 0; } -int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) { +int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { int64_t chkpId = 0; TBC* pCur = NULL; diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index 0a3970adaa..e38ba85f62 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -527,17 +527,6 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) { return pFileState->usedBuffs; } -static void getDebugRowBuff(char* val, int32_t vlen, char* output) { - for (int32_t i = 0; i < vlen; ++i) { - if (*(val + i) == '\0') { - sprintf(output + i, "0"); - } else { - sprintf(output + i, "%c", *(val + i)); - } - } - output[vlen] = 0; -} - int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) { int32_t code = TSDB_CODE_SUCCESS; SListIter iter = {0}; @@ -553,7 +542,6 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, int32_t len = pFileState->rowSize + sizeof(uint64_t) + sizeof(int32_t) + 1; char* buf = taosMemoryCalloc(1, len); - char output[1024]; void* batch = streamStateCreateBatch(); while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { @@ -571,15 +559,6 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } void* pSKey = pFileState->stateBuffCreateStateKeyFn(pPos, ((SStreamState*)pFileState->pFileStore)->number); -#if 1 - SStateKey* pStateKey = pSKey; - char* pStateVal = pPos->pRowBuff; - int32_t pStateVLen = pFileState->rowSize; - assert(pStateVLen < 1024); - getDebugRowBuff(pStateVal, pStateVLen, output); - qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, - pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); -#endif code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, pSKey, pPos->pRowBuff, pFileState->rowSize, 0, buf); taosMemoryFreeClear(pSKey); @@ -710,7 +689,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { if (pCur == NULL) { return -1; } - char output[1024]; int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); while (code == TSDB_CODE_SUCCESS) { if (pFileState->curRowCount >= recoverNum) { @@ -730,17 +708,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { } ASSERT(vlen == pFileState->rowSize); memcpy(pNewPos->pRowBuff, pVal, vlen); - -#if 1 - SStateKey* pStateKey = pNewPos->pKey; - char* pStateVal = pVal; - int32_t pStateVLen = vlen; - assert(pStateVLen < 1024); - getDebugRowBuff(pStateVal, pStateVLen, output); - qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, - pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); -#endif - taosMemoryFreeClear(pVal); pNewPos->beFlushed = true; code = tSimpleHashPut(pFileState->rowStateBuff, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); From f2d24306b1a05868a6c3362f482dc8b8172804e9 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 08:35:52 +0800 Subject: [PATCH 25/32] enh: rsma tasks share one checkpoint and fix memory leak --- source/dnode/vnode/src/sma/smaRollup.c | 4 ++++ source/libs/stream/src/tstreamFileState.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 665610304c..424a9b1cf2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1221,6 +1221,10 @@ _checkpoint: smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint/task:%" PRIi64 "/%p, table:%" PRIi64 ", level:%d", TD_VID(pVnode), pTask->checkpointingId, pTask, pRSmaInfo->suid, i + 1); + + // the stream states share one checkpoint + taosHashCancelIterate(pInfoHash, infoHash); + goto _exit; } } } diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index e38ba85f62..fc47498a3c 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -201,7 +201,7 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ streamFileStateDecode(&pFileState->flushMark, valBuf, len); qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark); } - + taosMemoryFreeClear(valBuf); return pFileState; _error: From d1106c51dcf19f7bc12e6dd8f9b1e290e5ee3831 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 09:54:03 +0800 Subject: [PATCH 26/32] enh: rsma code optimization --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 198c93a937..5e808c217c 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -140,7 +140,7 @@ struct SRSmaInfoItem { int8_t level; int8_t fetchLevel; int8_t triggerStat; - uint32_t nScanned; + int32_t nScanned; int32_t streamFlushed : 1; int32_t maxDelay : 31; // ms tmr_h tmrId; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 424a9b1cf2..68d829bea4 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -22,7 +22,7 @@ #define RSMA_FETCH_DELAY_MAX (120000) // ms #define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms -#define RSMA_TASK_FLAG "rsma_task" +#define RSMA_TASK_FLAG "rsma" #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -1368,7 +1368,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { #if 0 if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch executed", + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch executed", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); } else { int64_t curMs = taosGetTimestampMs(); @@ -1393,10 +1393,10 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { goto _err; } - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch finished", + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch finished", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); } else { - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch not executed as fetch level is %" PRIi8, SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay, pItem->fetchLevel); } From 72ecb0431c6fc8d44531240354d5f6942691d562 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 20:45:44 +0800 Subject: [PATCH 27/32] enh: rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 20 +- source/dnode/vnode/src/sma/smaCommit.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 133 ++++++---- source/dnode/vnode/src/sma/smaUtil.c | 5 - source/dnode/vnode/src/tsdb/tsdbMemTable.c | 12 - source/libs/stream/src/streamTask.c | 9 +- tests/parallel_test/cases.task | 1 + .../tsim/sync/vnodesnapshot-rsma-test.sim | 2 +- tests/script/win-test-file | 1 + tests/system-test/1-insert/rsma.py | 248 ++++++++++++++++++ 10 files changed, 348 insertions(+), 85 deletions(-) create mode 100644 tests/system-test/1-insert/rsma.py diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 5e808c217c..de6bb23f04 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -137,15 +137,17 @@ struct SSmaStat { #define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { - int8_t level; - int8_t fetchLevel; - int8_t triggerStat; - int32_t nScanned; - int32_t streamFlushed : 1; - int32_t maxDelay : 31; // ms - tmr_h tmrId; - void *pStreamState; - void *pStreamTask; // SStreamTask + int8_t level; + int8_t fetchLevel; + int8_t triggerStat; + int32_t nScanned; + int32_t streamFlushed : 1; + int32_t maxDelay : 31; // ms + int64_t submitReqVer; + int64_t fetchResultVer; + tmr_h tmrId; + void *pStreamState; + void *pStreamTask; // SStreamTask }; struct SRSmaInfo { diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index fad2e4d7e9..92181f054d 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -169,7 +169,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { * 1) This is high cost task and should not put in asyncPreCommit originally. * 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently. */ - smaInfo("vgId:%d, rsma commit:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit, + smaInfo("vgId:%d, rsma commit, type:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit, (void *)taosGetSelfPthreadId()); nLoops = 0; while (atomic_load_64(&pRSmaStat->nBufItems) > 0) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 68d829bea4..7296f3d468 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -44,8 +44,8 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); -static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, SArray **ppResList, int8_t *streamFlushed); +static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, + int32_t execType, SArray **ppResList, int8_t *streamFlushed); static void tdRSmaFetchTrigger(void *param, void *tmrId); static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); @@ -235,19 +235,16 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui return TSDB_CODE_SUCCESS; } -#if 0 -static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { - int64_t checkpointId = -1; - STaskId id = {.streamId = streamId, .taskId = taskId}; +static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTaskId *pId) { + STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; taosRLockLatch(&pMeta->lock); SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask && *ppTask) { - checkpointId = (*ppTask)->chkInfo.checkpointId; + pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer; + pItem->fetchResultVer = (*ppTask)->info.triggerParam; } taosRUnLockLatch(&pMeta->lock); - return checkpointId; } -#endif static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { streamMetaUnregisterTask(pMeta, streamId, taskId); @@ -295,12 +292,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); -#if 0 - pStreamTask->chkInfo.checkpointId = - tdRSmaTaskGetCheckpointId(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); -#else pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); -#endif + tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -318,7 +311,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } - pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot + if (pItem->fetchResultVer < pItem->submitReqVer) { + // fetch the data when reboot + pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; + } + if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); @@ -337,10 +334,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 - " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, + smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 + ", submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 ", maxdelay:%" PRIi64 " watermark:%" PRIi64 + ", finally maxdelay:%" PRIi32, TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId, - param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); + pItem->submitReqVer, pItem->fetchResultVer, param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); } return TSDB_CODE_SUCCESS; } @@ -624,12 +622,14 @@ _end: return code; } -static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, SArray **ppResList, int8_t *streamFlushed) { +static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, + int32_t execType, SArray **ppResList, int8_t *streamFlushed) { int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; SArray *pResList = NULL; + STSchema *pTSchema = pInfo->pTSchema; + int64_t suid = pInfo->suid; if (!(*ppResList)) { pResList = taosArrayInit(1, POINTER_BYTES); @@ -657,11 +657,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma if (taosArrayGetSize(pResList) == 0) { break; } -#if 0 - char flag[10] = {0}; - snprintf(flag, 10, "level %" PRIi8, pItem->level); - blockDebugShowDataBlocks(pResList, flag); -#endif + for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { output = taosArrayGetP(pResList, i); if (output->info.type == STREAM_CHECKPOINT) { @@ -674,12 +670,17 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; - // TODO: the schema update should be handled later(TD-17965) if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; TSDB_CHECK_CODE(code, lino, _exit); } + // reset the output version to handle reboot + if (STREAM_GET_ALL == execType && output->info.version == 0) { + // the submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously + output->info.version = pItem->submitReqVer; + } + if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); @@ -691,6 +692,10 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma TSDB_CHECK_CODE(code, lino, _exit); } + if (STREAM_GET_ALL == execType) { + atomic_store_64(&pItem->fetchResultVer, output->info.version); + } + smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); @@ -803,9 +808,10 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { */ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) { - int32_t idx = level - 1; - void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); - SArray *pResList = NULL; + int32_t idx = level - 1; + void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); + SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); + SArray *pResList = NULL; if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, @@ -833,8 +839,12 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_FAILED; } - SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL); + if (STREAM_INPUT__MERGED_SUBMIT == inputType) { + SPackedData *packData = POINTER_SHIFT(pMsg, sizeof(SPackedData) * (msgSize - 1)); + atomic_store_64(&pItem->submitReqVer, packData->ver); + } + + tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; @@ -1161,8 +1171,8 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { int8_t streamFlushed = 0; - code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, - pRSmaInfo->suid, &pResList, &streamFlushed); + code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo, + STREAM_CHECKPOINT, &pResList, &streamFlushed); if (code) { taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); @@ -1190,7 +1200,10 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { _checkpoint: // stream state: build checkpoint in backend do { - void *infoHash = NULL; + SStreamMeta *pMeta = NULL; + int64_t checkpointId = taosGetTimestampNs(); + bool checkpointBuilt = false; + void *infoHash = NULL; while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { @@ -1202,32 +1215,48 @@ _checkpoint: if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->checkpointingId = taosGetTimestampNs(); + pTask->checkpointingId = checkpointId; pTask->chkInfo.checkpointId = pTask->checkpointingId; - code = streamTaskBuildCheckpoint(pTask); - if (code) { - taosHashCancelIterate(pInfoHash, infoHash); - TSDB_CHECK_CODE(code, lino, _exit); + pTask->chkInfo.checkpointVer = pItem->submitReqVer; + pTask->info.triggerParam = pItem->fetchResultVer; + + if (!checkpointBuilt) { + // the stream states share one checkpoint + code = streamTaskBuildCheckpoint(pTask); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + pMeta = pTask->pMeta; + checkpointBuilt = true; } - taosWLockLatch(&pTask->pMeta->lock); - if (0 != streamMetaSaveTask(pTask->pMeta, pTask) || 0 != streamMetaCommit(pTask->pMeta)) { - taosWUnLockLatch(&pTask->pMeta->lock); + taosWLockLatch(&pMeta->lock); + if (0 != streamMetaSaveTask(pMeta, pTask)) { + taosWUnLockLatch(&pMeta->lock); code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } - taosWUnLockLatch(&pTask->pMeta->lock); - - smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint/task:%" PRIi64 "/%p, table:%" PRIi64 ", level:%d", - TD_VID(pVnode), pTask->checkpointingId, pTask, pRSmaInfo->suid, i + 1); - - // the stream states share one checkpoint - taosHashCancelIterate(pInfoHash, infoHash); - goto _exit; + taosWUnLockLatch(&pMeta->lock); + smaDebug("vgId:%d, rsma commit, succeed to commit task:%p, submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 + ", table:%" PRIi64 ", level:%d", + TD_VID(pVnode), pTask, pItem->submitReqVer, pItem->fetchResultVer, pRSmaInfo->suid, i + 1); } } } + if (pMeta) { + taosWLockLatch(&pMeta->lock); + if (0 != streamMetaCommit(pMeta)) { + taosWUnLockLatch(&pMeta->lock); + code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + taosWUnLockLatch(&pMeta->lock); + } + if (checkpointBuilt) { + smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint:%" PRIi64, TD_VID(pVnode), checkpointId); + } } while (0); _exit: taosArrayDestroy(pResList); @@ -1366,7 +1395,6 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { continue; } -#if 0 if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch executed", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); @@ -1384,12 +1412,11 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } pItem->nScanned = 0; -#endif if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL) < 0) { + if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, &pResList, NULL) < 0) { goto _err; } diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index 8c04306d0f..e45cbac329 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -30,13 +30,8 @@ void tdRSmaGetDirName(SVnode *pVnode, STfs *pTfs, bool endWithSep, char *outputN offset = strlen(outputName); // rsma -#if 1 snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s", TD_DIRSEP, VNODE_RSMA_DIR, (endWithSep ? TD_DIRSEP : "")); -#else - snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s%s%s%s%s", TD_DIRSEP, "tq", TD_DIRSEP, "stream", - TD_DIRSEP, "state", (endWithSep ? TD_DIRSEP : "")); -#endif } // smaXXXUtil ================ diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 69b19f4bc5..cc77474e79 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -661,9 +661,6 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, if ((code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0))) goto _exit; pTbData->minKey = TMIN(pTbData->minKey, key.ts); lRow = tRow; - tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); // remain row ++tRow.iRow; @@ -683,9 +680,6 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; ++tRow.iRow; - tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); } } @@ -727,9 +721,6 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0); if (code) goto _exit; lRow = tRow; - tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); pTbData->minKey = TMIN(pTbData->minKey, key.ts); @@ -753,9 +744,6 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; iRow++; - tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index a7fb590d1b..2f8de98039 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -120,8 +120,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { taskId = pTask->streamTaskId.taskId; if (tEncodeI32(pEncoder, taskId)) return -1; - if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; - if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; + if (tEncodeI64(pEncoder, pTask->dataRange.range.minVer)) return -1; + if (tEncodeI64(pEncoder, pTask->dataRange.range.maxVer)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; @@ -193,8 +193,9 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &taskId)) return -1; pTask->streamTaskId.taskId = taskId; - if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; - if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.range.minVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 21dcd16441..37d1c2aa59 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1175,6 +1175,7 @@ e ,,y,script,./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ,,y,script,./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim ,,y,script,./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim +,,y,script,./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ,,n,script,./test.sh -f tsim/valgrind/checkError1.sim ,,n,script,./test.sh -f tsim/valgrind/checkError2.sim ,,n,script,./test.sh -f tsim/valgrind/checkError3.sim diff --git a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim index 3b3cd01521..b1e5ed200f 100644 --- a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim +++ b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim @@ -114,7 +114,7 @@ endi vg_ready: print ====> create stable/child table -sql create table stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int) rollup(sum) watermark 3s,3s max_delay 3s,3s +sql create table stb (ts timestamp, c1 float, c2 float, c3 double) tags (t1 int) rollup(sum) watermark 3s,3s max_delay 3s,3s sql show stables if $rows != 1 then diff --git a/tests/script/win-test-file b/tests/script/win-test-file index 4ff4b52f7e..fe5f5c39e3 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -320,6 +320,7 @@ ./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim ./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim +./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ./test.sh -f tsim/valgrind/checkError1.sim ./test.sh -f tsim/valgrind/checkError2.sim ./test.sh -f tsim/valgrind/checkError3.sim diff --git a/tests/system-test/1-insert/rsma.py b/tests/system-test/1-insert/rsma.py new file mode 100644 index 0000000000..ab84185e87 --- /dev/null +++ b/tests/system-test/1-insert/rsma.py @@ -0,0 +1,248 @@ +from datetime import datetime +import time + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * + +PRIMARY_COL = "ts" + +INT_COL = "c_int" +BINT_COL = "c_bint" +SINT_COL = "c_sint" +TINT_COL = "c_tint" +FLOAT_COL = "c_float" +DOUBLE_COL = "c_double" +BOOL_COL = "c_bool" +TINT_UN_COL = "c_utint" +SINT_UN_COL = "c_usint" +BINT_UN_COL = "c_ubint" +INT_UN_COL = "c_uint" +BINARY_COL = "c_binary" +NCHAR_COL = "c_nchar" +TS_COL = "c_ts" + +INT_TAG = "t_int" + +TAG_COL = [INT_TAG] + +## insert data args: +TIME_STEP = 10000 +NOW = int(datetime.timestamp(datetime.now()) * 1000) + +# init db/table +DBNAME = "db" +DB1 = "db1" +DB2 = "db2" +DB3 = "db3" +DB4 = "db4" +STBNAME = "stb1" +CTBNAME = "ct1" +NTBNAME = "nt1" + +class TDTestCase: + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + + @property + def create_databases_sql_err(self): + return [ + # check grammar + "create database db1 retentions", + "create database db1 retentions 1s:1d", + "create database db1 retentions 1s:1d,2s:2d", + "create database db1 retentions 1s:1d,2s:2d,3s:3d", + "create database db1 retentions 1s:1d,2s:2d,3s:3d,4s:4d", + "create database db1 retentions -:1d,2s:2d,3s:3d,4s:4d", + "create database db1 retentions --:1d", + "create database db1 retentions -:-:1d", + "create database db1 retentions 1d:-", + "create database db1 retentions -:-", + "create database db1 retentions +:1d", + "create database db1 retentions :1d", + "create database db1 retentions -:1d,-:2d", + "create database db1 retentions -:1d,-:2d,-:3d", + "create database db1 retentions -:1d,1s:-", + "create database db1 retentions -:1d,15s:2d,-:3d", + + # check unit + "create database db1 retentions -:1d,1b:1d", + "create database db1 retentions -:1d,1u:1d", + "create database db1 retentions -:1d,1a:1d", + "create database db1 retentions -:1d,1n:1d", + "create database db1 retentions -:1d,1y:1d", + "create database db1 retentions -:1d,1s:86400s", + "create database db1 retentions -:1d,1s:86400000a", + "create database db1 retentions -:1d,1s:86400000000u", + "create database db1 retentions -:1d,1s:86400000000000b", + "create database db1 retentions -:1s,1s:2s", + "create database db1 retentions -:1d,1s:1w", + "create database db1 retentions -:1d,1s:1n", + "create database db1 retentions -:1d,1s:1y", + + # check value range + "create database db3 retentions -:-1d", + "create database db3 retentions -:0d", + "create database db3 retentions -:1439m", + "create database db3 retentions -:365001d", + "create database db3 retentions -:8760001h", + "create database db3 retentions -:525600001m", + "create database db3 retentions -:106581d precision 'ns'", + "create database db3 retentions -:2557921h precision 'ns'", + "create database db3 retentions -:153475201m precision 'ns'", + # check relationships + "create database db5 retentions -:1440m,1441m:1440m,2d:3d", + "create database db5 retentions -:1d,2m:1d,1s:2d", + "create database db5 retentions -:1440m,1s:2880m,2s:2879m", + "create database db5 retentions -:1d,2s:2d,2s:3d", + "create database db5 retentions -:1d,3s:2d,2s:3d", + "create database db1 retentions -:1d,2s:3d,3s:2d", + "create database db1 retentions -:1d,2s:3d,1s:2d", + + ] + + @property + def create_databases_sql_current(self): + return [ + f"create database {DB1} retentions -:1d", + f"create database {DB2} retentions -:1d,2m:2d,3h:3d", + ] + + @property + def alter_database_sql(self): + return [ + "alter database db1 retentions -:99d", + "alter database db2 retentions -:97d,98h:98d,99h:99d,", + ] + + @property + def create_stable_sql_err(self, dbname=DB2): + return [ + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(ceil) watermark 1s max_delay 1m", + f"create stable {dbname}.stb12 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(count) watermark 1min", + f"create stable {dbname}.stb13 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay -1s", + f"create stable {dbname}.stb14 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark -1m", + f"create stable {dbname}.stb15 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) watermark 1m ", + f"create stable {dbname}.stb16 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) max_delay 1m ", + f"create stable {dbname}.stb21 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} binary(16)) tags (tag1 int) rollup(avg) watermark 1s", + f"create stable {dbname}.stb22 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) rollup(avg) max_delay 1m", + f"create table {dbname}.ntb_1 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) rollup(avg) watermark 1s max_delay 1s", + f"create table {dbname}.ntb_2 ({PRIMARY_COL} timestamp, {INT_COL} int) " , + f"create stable {dbname}.stb23 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) " , + f"create stable {dbname}.stb24 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) " , + f"create stable {dbname}.stb25 ({PRIMARY_COL} timestamp, {INT_COL} int) " , + f"create stable {dbname}.stb26 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} nchar(16)) " , + # only float/double allowd for avg/sum + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(sum)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(sum)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(sum)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(sum)", + + + # watermark, max_delay: [0, 900000], [ms, s, m, ?] + f"create stable stb17 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1u", + f"create stable stb18 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 1b", + f"create stable stb19 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 900001ms", + f"create stable stb20 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 16m", + f"create stable stb27 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 901s", + f"create stable stb28 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1h", + f"create stable stb29 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 0.2h", + f"create stable stb30 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 0.002d", + + ] + + @property + def create_tb(self, stb=STBNAME, ctb_num=20, ntbnum=1, rsma=False, dbname=DBNAME, rsma_type="sum"): + tdLog.printNoPrefix("==========step: create table") + if rsma: + if rsma_type.lower().strip() in ("last", "first"): + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned, {BINARY_COL} binary(16) + ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s + ''' + elif rsma_type.lower().strip() in ("sum", "avg"): + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {DOUBLE_COL} double, {DOUBLE_COL}_1 double, {DOUBLE_COL}_2 double, {DOUBLE_COL}_3 double, + {FLOAT_COL} float, {DOUBLE_COL}_4 double, {FLOAT_COL}_1 float, {FLOAT_COL}_2 float, {FLOAT_COL}_3 float, + {DOUBLE_COL}_5 double) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s + ''' + else: + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned + ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s + ''' + tdSql.execute(create_stb_sql) + else: + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, + {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, + {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned + ) tags ({INT_TAG} int) + ''' + tdSql.execute(create_stb_sql) + + for i in range(ntbnum): + create_ntb_sql = f'''create table {dbname}.nt{i+1}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, + {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, + {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned + ) + ''' + tdSql.execute(create_ntb_sql) + + for i in range(ctb_num): + tdSql.execute(f'create table {dbname}.ct{i+1} using {dbname}.{stb} tags ( {i+1} )') + + def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1): + tsql.execute("use %s" %dbName) + pre_create = "create table" + sql = pre_create + #tdLog.debug("doing create one stable %s and %d child table in %s ..." %(stbname, count ,dbname)) + for i in range(ctbNum): + tagValue = 'beijing' + if (i % 2 == 0): + tagValue = 'shanghai' + sql += " %s%d using %s tags(%d, '%s')"%(ctbPrefix,i,stbName,i+1, tagValue) + if (i > 0) and (i%100 == 0): + tsql.execute(sql) + sql = pre_create + if sql != pre_create: + tsql.execute(sql) + + tdLog.debug("complete to create %d child tables in %s.%s" %(ctbNum, dbName, stbName)) + return + + + def run(self): + self.rows = 10 + tdLog.printNoPrefix("==========step0:all check") + dbname='d0' + tdSql.execute(f"create database {dbname} retentions -:10d,1m:15d,1h:30d STT_TRIGGER 1 vgroups 6;") + tdSql.execute(f"create stable if not exists {dbname}.st_min (ts timestamp, c1 int) tags (proid int,city binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") + tdSql.execute(f"create stable if not exists {dbname}.st_avg (ts timestamp, c1 double) tags (city binary(20),district binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") + self.create_ctable(tdSql, dbname, 'st_min', 'ct_min', 10000) + tdLog.printNoPrefix("==========step4:after wal, all check again ") + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From f97cf96fd60dcebdbaa357c698f32a6ce64bc389 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 20:47:41 +0800 Subject: [PATCH 28/32] chore: remove test case --- tests/system-test/1-insert/rsma.py | 248 ----------------------------- 1 file changed, 248 deletions(-) delete mode 100644 tests/system-test/1-insert/rsma.py diff --git a/tests/system-test/1-insert/rsma.py b/tests/system-test/1-insert/rsma.py deleted file mode 100644 index ab84185e87..0000000000 --- a/tests/system-test/1-insert/rsma.py +++ /dev/null @@ -1,248 +0,0 @@ -from datetime import datetime -import time - -from util.log import * -from util.sql import * -from util.cases import * -from util.dnodes import * -from util.common import * - -PRIMARY_COL = "ts" - -INT_COL = "c_int" -BINT_COL = "c_bint" -SINT_COL = "c_sint" -TINT_COL = "c_tint" -FLOAT_COL = "c_float" -DOUBLE_COL = "c_double" -BOOL_COL = "c_bool" -TINT_UN_COL = "c_utint" -SINT_UN_COL = "c_usint" -BINT_UN_COL = "c_ubint" -INT_UN_COL = "c_uint" -BINARY_COL = "c_binary" -NCHAR_COL = "c_nchar" -TS_COL = "c_ts" - -INT_TAG = "t_int" - -TAG_COL = [INT_TAG] - -## insert data args: -TIME_STEP = 10000 -NOW = int(datetime.timestamp(datetime.now()) * 1000) - -# init db/table -DBNAME = "db" -DB1 = "db1" -DB2 = "db2" -DB3 = "db3" -DB4 = "db4" -STBNAME = "stb1" -CTBNAME = "ct1" -NTBNAME = "nt1" - -class TDTestCase: - - def init(self, conn, logSql, replicaVar=1): - self.replicaVar = int(replicaVar) - tdLog.debug(f"start to excute {__file__}") - tdSql.init(conn.cursor(), True) - - @property - def create_databases_sql_err(self): - return [ - # check grammar - "create database db1 retentions", - "create database db1 retentions 1s:1d", - "create database db1 retentions 1s:1d,2s:2d", - "create database db1 retentions 1s:1d,2s:2d,3s:3d", - "create database db1 retentions 1s:1d,2s:2d,3s:3d,4s:4d", - "create database db1 retentions -:1d,2s:2d,3s:3d,4s:4d", - "create database db1 retentions --:1d", - "create database db1 retentions -:-:1d", - "create database db1 retentions 1d:-", - "create database db1 retentions -:-", - "create database db1 retentions +:1d", - "create database db1 retentions :1d", - "create database db1 retentions -:1d,-:2d", - "create database db1 retentions -:1d,-:2d,-:3d", - "create database db1 retentions -:1d,1s:-", - "create database db1 retentions -:1d,15s:2d,-:3d", - - # check unit - "create database db1 retentions -:1d,1b:1d", - "create database db1 retentions -:1d,1u:1d", - "create database db1 retentions -:1d,1a:1d", - "create database db1 retentions -:1d,1n:1d", - "create database db1 retentions -:1d,1y:1d", - "create database db1 retentions -:1d,1s:86400s", - "create database db1 retentions -:1d,1s:86400000a", - "create database db1 retentions -:1d,1s:86400000000u", - "create database db1 retentions -:1d,1s:86400000000000b", - "create database db1 retentions -:1s,1s:2s", - "create database db1 retentions -:1d,1s:1w", - "create database db1 retentions -:1d,1s:1n", - "create database db1 retentions -:1d,1s:1y", - - # check value range - "create database db3 retentions -:-1d", - "create database db3 retentions -:0d", - "create database db3 retentions -:1439m", - "create database db3 retentions -:365001d", - "create database db3 retentions -:8760001h", - "create database db3 retentions -:525600001m", - "create database db3 retentions -:106581d precision 'ns'", - "create database db3 retentions -:2557921h precision 'ns'", - "create database db3 retentions -:153475201m precision 'ns'", - # check relationships - "create database db5 retentions -:1440m,1441m:1440m,2d:3d", - "create database db5 retentions -:1d,2m:1d,1s:2d", - "create database db5 retentions -:1440m,1s:2880m,2s:2879m", - "create database db5 retentions -:1d,2s:2d,2s:3d", - "create database db5 retentions -:1d,3s:2d,2s:3d", - "create database db1 retentions -:1d,2s:3d,3s:2d", - "create database db1 retentions -:1d,2s:3d,1s:2d", - - ] - - @property - def create_databases_sql_current(self): - return [ - f"create database {DB1} retentions -:1d", - f"create database {DB2} retentions -:1d,2m:2d,3h:3d", - ] - - @property - def alter_database_sql(self): - return [ - "alter database db1 retentions -:99d", - "alter database db2 retentions -:97d,98h:98d,99h:99d,", - ] - - @property - def create_stable_sql_err(self, dbname=DB2): - return [ - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(ceil) watermark 1s max_delay 1m", - f"create stable {dbname}.stb12 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(count) watermark 1min", - f"create stable {dbname}.stb13 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay -1s", - f"create stable {dbname}.stb14 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark -1m", - f"create stable {dbname}.stb15 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) watermark 1m ", - f"create stable {dbname}.stb16 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) max_delay 1m ", - f"create stable {dbname}.stb21 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} binary(16)) tags (tag1 int) rollup(avg) watermark 1s", - f"create stable {dbname}.stb22 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) rollup(avg) max_delay 1m", - f"create table {dbname}.ntb_1 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) rollup(avg) watermark 1s max_delay 1s", - f"create table {dbname}.ntb_2 ({PRIMARY_COL} timestamp, {INT_COL} int) " , - f"create stable {dbname}.stb23 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) " , - f"create stable {dbname}.stb24 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) " , - f"create stable {dbname}.stb25 ({PRIMARY_COL} timestamp, {INT_COL} int) " , - f"create stable {dbname}.stb26 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} nchar(16)) " , - # only float/double allowd for avg/sum - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(sum)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(sum)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(sum)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(sum)", - - - # watermark, max_delay: [0, 900000], [ms, s, m, ?] - f"create stable stb17 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1u", - f"create stable stb18 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 1b", - f"create stable stb19 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 900001ms", - f"create stable stb20 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 16m", - f"create stable stb27 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 901s", - f"create stable stb28 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1h", - f"create stable stb29 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 0.2h", - f"create stable stb30 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 0.002d", - - ] - - @property - def create_tb(self, stb=STBNAME, ctb_num=20, ntbnum=1, rsma=False, dbname=DBNAME, rsma_type="sum"): - tdLog.printNoPrefix("==========step: create table") - if rsma: - if rsma_type.lower().strip() in ("last", "first"): - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned, {BINARY_COL} binary(16) - ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s - ''' - elif rsma_type.lower().strip() in ("sum", "avg"): - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {DOUBLE_COL} double, {DOUBLE_COL}_1 double, {DOUBLE_COL}_2 double, {DOUBLE_COL}_3 double, - {FLOAT_COL} float, {DOUBLE_COL}_4 double, {FLOAT_COL}_1 float, {FLOAT_COL}_2 float, {FLOAT_COL}_3 float, - {DOUBLE_COL}_5 double) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s - ''' - else: - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned - ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s - ''' - tdSql.execute(create_stb_sql) - else: - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, - {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, - {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned - ) tags ({INT_TAG} int) - ''' - tdSql.execute(create_stb_sql) - - for i in range(ntbnum): - create_ntb_sql = f'''create table {dbname}.nt{i+1}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, - {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, - {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned - ) - ''' - tdSql.execute(create_ntb_sql) - - for i in range(ctb_num): - tdSql.execute(f'create table {dbname}.ct{i+1} using {dbname}.{stb} tags ( {i+1} )') - - def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1): - tsql.execute("use %s" %dbName) - pre_create = "create table" - sql = pre_create - #tdLog.debug("doing create one stable %s and %d child table in %s ..." %(stbname, count ,dbname)) - for i in range(ctbNum): - tagValue = 'beijing' - if (i % 2 == 0): - tagValue = 'shanghai' - sql += " %s%d using %s tags(%d, '%s')"%(ctbPrefix,i,stbName,i+1, tagValue) - if (i > 0) and (i%100 == 0): - tsql.execute(sql) - sql = pre_create - if sql != pre_create: - tsql.execute(sql) - - tdLog.debug("complete to create %d child tables in %s.%s" %(ctbNum, dbName, stbName)) - return - - - def run(self): - self.rows = 10 - tdLog.printNoPrefix("==========step0:all check") - dbname='d0' - tdSql.execute(f"create database {dbname} retentions -:10d,1m:15d,1h:30d STT_TRIGGER 1 vgroups 6;") - tdSql.execute(f"create stable if not exists {dbname}.st_min (ts timestamp, c1 int) tags (proid int,city binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") - tdSql.execute(f"create stable if not exists {dbname}.st_avg (ts timestamp, c1 double) tags (city binary(20),district binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") - self.create_ctable(tdSql, dbname, 'st_min', 'ct_min', 10000) - tdLog.printNoPrefix("==========step4:after wal, all check again ") - - def stop(self): - tdSql.close() - tdLog.success(f"{__file__} successfully executed") - -tdCases.addLinux(__file__, TDTestCase()) -tdCases.addWindows(__file__, TDTestCase()) From edef4de7b299b3b5a6ef1308c3ae49cb27754ffd Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 20:57:57 +0800 Subject: [PATCH 29/32] chore: revert the code --- source/libs/stream/src/streamTask.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 2f8de98039..59002e456a 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -120,8 +120,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { taskId = pTask->streamTaskId.taskId; if (tEncodeI32(pEncoder, taskId)) return -1; - if (tEncodeI64(pEncoder, pTask->dataRange.range.minVer)) return -1; - if (tEncodeI64(pEncoder, pTask->dataRange.range.maxVer)) return -1; + if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; + if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; @@ -193,8 +193,8 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &taskId)) return -1; pTask->streamTaskId.taskId = taskId; - if (tDecodeI64(pDecoder, &pTask->dataRange.range.minVer)) return -1; - if (tDecodeI64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; + if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; From 3495efaac7a807d07cdca0ad7a3dc57122f071e7 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 10:49:40 +0800 Subject: [PATCH 30/32] enh: rsma exception process --- source/dnode/vnode/src/inc/sma.h | 1 + source/dnode/vnode/src/sma/smaCommit.c | 5 +++-- source/dnode/vnode/src/sma/smaRollup.c | 27 ++++++++++++++++--------- source/dnode/vnode/src/tsdb/tsdbWrite.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index de6bb23f04..f45050bfec 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -108,6 +108,7 @@ struct SRSmaStat { int64_t refId; // shared by fetch tasks volatile int64_t nBufItems; // number of items in queue buffer SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo) + volatile int32_t execStat; // 0 succeed, other failed volatile int32_t nFetchAll; // active number of fetch all volatile int8_t triggerStat; // shared by fetch tasks volatile int8_t commitStat; // 0 not in committing, 1 in committing diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 92181f054d..92b8c09fbc 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -179,13 +179,14 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; + code = atomic_load_32(&pRSmaStat->execStat); + TSDB_CHECK_CODE(code, lino, _exit); + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - - // all rsma results are written completely STsdb *pTsdb = NULL; if ((pTsdb = VND_RSMA1(pSma->pVnode))) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 7296f3d468..92494553d0 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -682,13 +682,14 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma } if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { - code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; + if (terrno == TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE) { + // TODO: reconfigure SSubmitReq2 + } else { + if (terrno == 0) terrno = TSDB_CODE_RSMA_RESULT; + code = terrno; + } tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); - smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 - ", ver:%" PRIi64, - SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, - output ? output->info.version : -1); TSDB_CHECK_CODE(code, lino, _exit); } @@ -844,10 +845,10 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, atomic_store_64(&pItem->submitReqVer, packData->ver); } - tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); + terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); taosArrayDestroy(pResList); - return TSDB_CODE_SUCCESS; + return terrno ? TSDB_CODE_FAILED : TDB_CODE_SUCCESS; } /** @@ -953,7 +954,12 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); if (!pEnv) { // only applicable when rsma env exists - return TSDB_CODE_SUCCESS; + return TDB_CODE_SUCCESS; + } + + if (0 != (terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { + smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr()); + goto _err; } STbUidStore uidStore = {0}; @@ -985,7 +991,7 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, return TSDB_CODE_SUCCESS; _err: tdUidStoreDestory(&uidStore); - return TSDB_CODE_FAILED; + return terrno; } /** @@ -1417,6 +1423,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { goto _err; } if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, &pResList, NULL) < 0) { + atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); goto _err; } @@ -1448,6 +1455,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA .msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t))}; if (!taosArrayPush(pSubmitArr, &packData)) { + terrno = TSDB_CODE_OUT_OF_MEMORY; tdFreeRSmaSubmitItems(pSubmitArr); goto _err; } @@ -1467,6 +1475,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA } return TSDB_CODE_SUCCESS; _err: + atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); smaError("vgId:%d, batch exec for suid:%" PRIi64 " execType:%d size:%d failed since %s", SMA_VID(pSma), pInfo->suid, type, (int32_t)taosArrayGetSize(pSubmitArr), terrstr()); tdFreeRSmaSubmitItems(pSubmitArr); diff --git a/source/dnode/vnode/src/tsdb/tsdbWrite.c b/source/dnode/vnode/src/tsdb/tsdbWrite.c index 1e6526da48..836fda9903 100644 --- a/source/dnode/vnode/src/tsdb/tsdbWrite.c +++ b/source/dnode/vnode/src/tsdb/tsdbWrite.c @@ -39,7 +39,7 @@ int tsdbInsertData(STsdb *pTsdb, int64_t version, SSubmitReq2 *pMsg, SSubmitRsp2 arrSize = taosArrayGetSize(pMsg->aSubmitTbData); // scan and convert - if (tsdbScanAndConvertSubmitMsg(pTsdb, pMsg) < 0) { + if ((terrno = tsdbScanAndConvertSubmitMsg(pTsdb, pMsg)) < 0) { if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) { tsdbError("vgId:%d, failed to insert data since %s", TD_VID(pTsdb->pVnode), tstrerror(terrno)); } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index b31463ac00..ed86f0c22b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1669,7 +1669,7 @@ _exit: atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1); if (code == 0) { atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1); - tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); + code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); } // clear From 349e190120b736830b56b96514ee655b7e1e113b Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 11:23:50 +0800 Subject: [PATCH 31/32] enh: rsma result --- source/dnode/vnode/src/inc/sma.h | 3 +- source/dnode/vnode/src/sma/smaEnv.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 90 +++++++++++--------------- 3 files changed, 39 insertions(+), 56 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index f45050bfec..29eaa0509a 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -149,6 +149,7 @@ struct SRSmaInfoItem { tmr_h tmrId; void *pStreamState; void *pStreamTask; // SStreamTask + SArray *pResList; }; struct SRSmaInfo { @@ -218,7 +219,7 @@ static FORCE_INLINE void tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat) { int32_t smaPreClose(SSma *pSma); // rsma -void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); +void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback); int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index d47398bdff..dd12f2bca2 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -179,7 +179,7 @@ static void tRSmaInfoHashFreeNode(void *data) { if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 1)) && pItem->level) { taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES); } - tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo, true); + tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo); } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 92494553d0..73a0849ab2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -45,7 +45,7 @@ static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, - int32_t execType, SArray **ppResList, int8_t *streamFlushed); + int32_t execType, int8_t *streamFlushed); static void tdRSmaFetchTrigger(void *param, void *tmrId); static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); @@ -74,41 +74,39 @@ static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t l * * @param pSma * @param pInfo - * @param isDeepFree Only stop tmrId and free pTSchema for deep free * @return void* */ -void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { +void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { if (pInfo) { - if (isDeepFree) { - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - SRSmaInfoItem *pItem = &pInfo->items[i]; + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + SRSmaInfoItem *pItem = &pInfo->items[i]; - if (pItem->tmrId) { - smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, - pInfo->suid, i + 1); - taosTmrStopA(&pItem->tmrId); - } - - if (pItem->pStreamState) { - streamStateClose(pItem->pStreamState, false); - } - - if (pItem->pStreamTask) { - tFreeStreamTask(pItem->pStreamTask); - } - tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); + if (pItem->tmrId) { + smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, + pInfo->suid, i + 1); + taosTmrStopA(&pItem->tmrId); } - taosMemoryFreeClear(pInfo->pTSchema); + if (pItem->pStreamState) { + streamStateClose(pItem->pStreamState, false); + } - if (pInfo->queue) { - taosCloseQueue(pInfo->queue); - pInfo->queue = NULL; - } - if (pInfo->qall) { - taosFreeQall(pInfo->qall); - pInfo->qall = NULL; + if (pItem->pStreamTask) { + tFreeStreamTask(pItem->pStreamTask); } + taosArrayDestroy(pItem->pResList); + tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); + } + + taosMemoryFreeClear(pInfo->pTSchema); + + if (pInfo->queue) { + taosCloseQueue(pInfo->queue); + pInfo->queue = NULL; + } + if (pInfo->qall) { + taosFreeQall(pInfo->qall); + pInfo->qall = NULL; } taosMemoryFree(pInfo); @@ -311,6 +309,10 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } + if (!(pItem->pResList = taosArrayInit(1, POINTER_BYTES))) { + return TSDB_CODE_FAILED; + } + if (pItem->fetchResultVer < pItem->submitReqVer) { // fetch the data when reboot pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; @@ -406,7 +408,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con return TSDB_CODE_SUCCESS; _err: - tdFreeRSmaInfo(pSma, pRSmaInfo, true); + tdFreeRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_FAILED; } @@ -623,27 +625,14 @@ _end: } static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, - int32_t execType, SArray **ppResList, int8_t *streamFlushed) { + int32_t execType, int8_t *streamFlushed) { int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; - SArray *pResList = NULL; + SArray *pResList = pItem->pResList; STSchema *pTSchema = pInfo->pTSchema; int64_t suid = pInfo->suid; - if (!(*ppResList)) { - pResList = taosArrayInit(1, POINTER_BYTES); - if (pResList == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - *ppResList = pResList; - } else { - pResList = *ppResList; - } - - taosArrayClear(pResList); - while (1) { uint64_t ts; bool hasMore = false; @@ -812,7 +801,6 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t idx = level - 1; void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - SArray *pResList = NULL; if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, @@ -845,9 +833,8 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, atomic_store_64(&pItem->submitReqVer, packData->ver); } - terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); + terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, NULL); - taosArrayDestroy(pResList); return terrno ? TSDB_CODE_FAILED : TDB_CODE_SUCCESS; } @@ -1135,7 +1122,6 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t nTaskInfo = 0; SSma *pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; - SArray *pResList = NULL; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; @@ -1178,7 +1164,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { int8_t streamFlushed = 0; code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo, - STREAM_CHECKPOINT, &pResList, &streamFlushed); + STREAM_CHECKPOINT, &streamFlushed); if (code) { taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); @@ -1265,7 +1251,6 @@ _checkpoint: } } while (0); _exit: - taosArrayDestroy(pResList); if (code) { smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); } @@ -1391,7 +1376,6 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { */ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; - SArray *pResList = NULL; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); @@ -1422,7 +1406,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, &pResList, NULL) < 0) { + if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, NULL) < 0) { atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); goto _err; } @@ -1437,10 +1421,8 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } _end: - taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; _err: - taosArrayDestroy(pResList); return TSDB_CODE_FAILED; } From 1a950afdb85197dd23426ebb2b79462e8fefacff Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 12:09:27 +0800 Subject: [PATCH 32/32] enh: test case for rsma snapshot --- source/dnode/vnode/src/sma/smaRollup.c | 10 +++++----- source/libs/stream/src/streamTask.c | 1 - .../script/tsim/sync/vnodesnapshot-rsma-test.sim | 16 +++++++++++----- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 73a0849ab2..252a3ade36 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -944,7 +944,7 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, return TDB_CODE_SUCCESS; } - if (0 != (terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { + if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr()); goto _err; } @@ -1224,9 +1224,9 @@ _checkpoint: } taosWLockLatch(&pMeta->lock); - if (0 != streamMetaSaveTask(pMeta, pTask)) { + if (streamMetaSaveTask(pMeta, pTask)) { taosWUnLockLatch(&pMeta->lock); - code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; + code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1239,9 +1239,9 @@ _checkpoint: } if (pMeta) { taosWLockLatch(&pMeta->lock); - if (0 != streamMetaCommit(pMeta)) { + if (streamMetaCommit(pMeta)) { taosWUnLockLatch(&pMeta->lock); - code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; + code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } taosWUnLockLatch(&pMeta->lock); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 59002e456a..a7fb590d1b 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -195,7 +195,6 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; - if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; diff --git a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim index b1e5ed200f..8b1720d213 100644 --- a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim +++ b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim @@ -167,9 +167,6 @@ system sh/exec.sh -n dnode4 -s start sleep 3000 - - - print =============== query data of level 1 sql connect sql use db @@ -181,12 +178,21 @@ if $rows != 100 then return -1 endi +print =============== sleep 5s to wait the result +sleep 5000 + print =============== query data of level 2 sql select * from ct1 where ts > now - 10d -print rows of level 2: $rows print $data00 $data01 $data02 +print $data10 $data11 $data12 +if $rows != 100 then + print rows of level 2: $rows +endi print =============== query data of level 3 sql select * from ct1 -print rows of level 3: $rows print $data00 $data01 $data02 +print $data10 $data11 $data12 +if $rows != 100 then + print rows of level 3: $rows +endi \ No newline at end of file