From e6684fa5de9fd95ed83359a3d8965f5b74cfd987 Mon Sep 17 00:00:00 2001 From: kailixu Date: Tue, 31 Oct 2023 20:26:42 +0800 Subject: [PATCH 01/56] enh: rsma retetion and stream state --- source/common/src/tglobal.c | 2 +- source/dnode/vnode/src/inc/sma.h | 3 +- source/dnode/vnode/src/inc/vnodeInt.h | 3 +- source/dnode/vnode/src/sma/smaCommit.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 34 ++++++++++++++++----- source/dnode/vnode/src/sma/smaUtil.c | 5 +++ source/dnode/vnode/src/vnd/vnodeRetention.c | 10 ++++-- 7 files changed, 45 insertions(+), 14 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index c6cff27011..cc485b16dc 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -590,7 +590,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; - tsNumOfVnodeRsmaThreads = tsNumOfCores; + tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index aaf0973b41..5dd7df0962 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -143,6 +143,7 @@ struct SRSmaInfoItem { int32_t maxDelay; // ms tmr_h tmrId; void *pStreamState; + void *pStreamTask; // SStreamTask }; struct SRSmaInfo { @@ -218,7 +219,7 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback); int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); -// int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); +int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer, int8_t rollback); void tdRSmaQTaskInfoGetFullPath(SVnode *pVnode, tb_uid_t suid, int8_t level, STfs *pTfs, char *outputName); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 12e273c32d..c7343b1b42 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -209,6 +209,7 @@ int32_t tsdbBegin(STsdb* pTsdb); // int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); +int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); // int32_t tsdbFinishCommit(STsdb* pTsdb); // int32_t tsdbRollbackCommit(STsdb* pTsdb); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); @@ -274,7 +275,7 @@ int32_t smaPrepareAsyncCommit(SSma* pSma); int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo); int32_t smaFinishCommit(SSma* pSma); int32_t smaPostCommit(SSma* pSma); -int32_t smaDoRetention(SSma* pSma, int64_t now); +int32_t smaRetention(SSma* pSma, int64_t now); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index c26157f4b7..652aab3c01 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -178,7 +178,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; - // code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 14c5baa402..4ea7d4612a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -90,6 +90,10 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { streamStateClose(pItem->pStreamState, false); } + if(isDeepFree && pItem->pStreamTask) { + taosMemoryFreeClear(pItem->pStreamTask); + } + if (isDeepFree && pInfo->taskInfo[i]) { tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } @@ -254,11 +258,19 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosMemoryFree(s); } - SStreamTask task = {.id.taskId = 0, .id.streamId = 0}; // TODO: assign value - task.pMeta = pVnode->pTq->pStreamMeta; - pStreamState = streamStateOpen(taskInfDir, &task, true, -1, -1); + SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + if (!pStreamTask) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_FAILED; + } + pStreamTask->id.taskId = 0; + pStreamTask->id.streamId = pRSmaInfo->suid + idx; + pStreamTask->pMeta = pVnode->pTq->pStreamMeta; + + pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; + taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } @@ -268,11 +280,13 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); if (!pRSmaInfo->taskInfo[idx]) { terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE; + taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot pItem->pStreamState = pStreamState; + pItem->pStreamTask = pStreamTask; if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); @@ -562,7 +576,7 @@ static int32_t tdFetchSubmitReqSuids(SSubmitReq2 *pMsg, STbUidStore *pStore) { * @param now * @return int32_t */ -int32_t smaDoRetention(SSma *pSma, int64_t now) { +int32_t smaRetention(SSma *pSma, int64_t now) { int32_t code = TSDB_CODE_SUCCESS; if (!VND_IS_RSMA(pSma->pVnode)) { return code; @@ -570,8 +584,8 @@ int32_t smaDoRetention(SSma *pSma, int64_t now) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - // code = tsdbDoRetention(pSma->pRSmaTsdb[i], now); - // if (code) goto _end; + code = tsdbRetention(pSma->pRSmaTsdb[i], now, pSma->pVnode->config.sttTrigger == 1); + if (code) goto _end; } } @@ -1050,7 +1064,7 @@ _err: return code; } -#if 0 +#if 1 int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t code = 0; int32_t lino = 0; @@ -1072,6 +1086,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); +#if 0 if (pItem && pItem->pStreamState) { if (streamStateCommit(pItem->pStreamState) < 0) { code = TSDB_CODE_RSMA_STREAM_STATE_COMMIT; @@ -1080,6 +1095,11 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode), pRSmaInfo->suid, i + 1); } +#endif + if(pItem && pItem->pStreamState) { + + } + } } diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index e45cbac329..479c57e65f 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -30,8 +30,13 @@ void tdRSmaGetDirName(SVnode *pVnode, STfs *pTfs, bool endWithSep, char *outputN offset = strlen(outputName); // rsma +#if 0 snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s", TD_DIRSEP, VNODE_RSMA_DIR, (endWithSep ? TD_DIRSEP : "")); +#else + snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s%s%s%s%s", TD_DIRSEP, "tq", TD_DIRSEP, "stream", + TD_DIRSEP, "state", (endWithSep ? TD_DIRSEP : "")); +#endif } // smaXXXUtil ================ diff --git a/source/dnode/vnode/src/vnd/vnodeRetention.c b/source/dnode/vnode/src/vnd/vnodeRetention.c index f3344d1d7d..c510c0fe92 100644 --- a/source/dnode/vnode/src/vnd/vnodeRetention.c +++ b/source/dnode/vnode/src/vnd/vnodeRetention.c @@ -15,8 +15,12 @@ #include "vnd.h" -extern int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); - int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) { - return tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); + int32_t code = TSDB_CODE_SUCCESS; + + code = tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); + + if (TSDB_CODE_SUCCESS == code) code = smaRetention(pVnode->pSma, now); + + return code; } \ No newline at end of file From ad1e6accd4f64cfa37ad186c8f93ae987dc7fa55 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 09:45:58 +0800 Subject: [PATCH 02/56] chore: build checkpoint for rsma --- include/libs/stream/tstream.h | 1 + source/common/src/tglobal.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 14 ++++++++++---- source/libs/stream/inc/streamInt.h | 1 - 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 629efa00b3..b9c6c905c9 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -772,6 +772,7 @@ void streamMetaInitForSnode(SStreamMeta* pMeta); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index cc485b16dc..a5459798fa 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -590,7 +590,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; - tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; + tsNumOfVnodeRsmaThreads = tsNumOfCores / 2; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 4ea7d4612a..980b23986e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -15,6 +15,7 @@ #include "sma.h" #include "tq.h" +#include "tstream.h" #define RSMA_QTASKEXEC_SMOOTH_SIZE (100) // cnt #define RSMA_SUBMIT_BATCH_SIZE (1024) // cnt @@ -1096,10 +1097,15 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { pRSmaInfo->suid, i + 1); } #endif - if(pItem && pItem->pStreamState) { - - } - + if (pItem && pItem->pStreamState && pItem->pStreamTask) { + SStreamTask *pTask = pItem->pStreamTask; + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); // adaption for API streamTaskBuildCheckpoint + pTask->checkpointingId = taosGetTimestampNs(); + code = streamTaskBuildCheckpoint(pTask); + TSDB_CHECK_CODE(code, lino, _exit); + smaInfo("vgId:%d, rsma persist, build stream checkpoint success, table:%" PRIi64 ", level:%d, id:%" PRIi64, + TD_VID(pVnode), pRSmaInfo->suid, i + 1, pTask->checkpointingId); + } } } diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 4cd8319a07..1f43e44dca 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -101,7 +101,6 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); -int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); From ab266c712f6fc5078879c1ee0850d87440eab341 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 12:29:54 +0800 Subject: [PATCH 03/56] chore: checkpoint for rsma stream state --- include/common/tmsg.h | 5 ++++ source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 7 +++-- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/sma/smaOpen.c | 18 ++++++------ source/dnode/vnode/src/sma/smaRollup.c | 8 +++-- source/dnode/vnode/src/tsdb/tsdbRead2.c | 6 ++-- source/dnode/vnode/src/vnd/vnodeCfg.c | 29 ++++++++++--------- source/dnode/vnode/src/vnd/vnodeCommit.c | 6 ++-- .../tsim/sma/rsmaPersistenceRecovery.sim | 2 +- 9 files changed, 48 insertions(+), 35 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 07eb8a461a..aa39a9da30 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -451,6 +451,11 @@ typedef struct SRetention { int8_t keepUnit; } SRetention; +typedef struct SRetentionEx { + SRetention rtn; + int64_t checkpointId; +} SRetentionEx; + #define RETENTION_VALID(l, r) ((((l) == 0 && (r)->freq >= 0) || ((r)->freq > 0)) && ((r)->keep > 0)) #pragma pack(push, 1) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index c4d525a871..5a4b341662 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -134,10 +134,11 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { pCfg->tsdbCfg.minRows = pCreate->minRows; pCfg->tsdbCfg.maxRows = pCreate->maxRows; for (size_t i = 0; i < taosArrayGetSize(pCreate->pRetensions); ++i) { - SRetention *pRetention = &pCfg->tsdbCfg.retentions[i]; - memcpy(pRetention, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); + SRetentionEx *pRetention = &pCfg->tsdbCfg.retentions[i]; + memcpy(&pRetention->rtn, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); + pRetention->checkpointId = -1; if (i == 0) { - if ((pRetention->freq >= 0 && pRetention->keep > 0)) pCfg->isRsma = 1; + if ((pRetention->rtn.freq >= 0 && pRetention->rtn.keep > 0)) pCfg->isRsma = 1; } } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 6a0c991be4..e92fc04f6e 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -287,7 +287,7 @@ struct STsdbCfg { int32_t keep1; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keep2; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keepTimeOffset; // just for save config, use STsdbKeepCfg in STsdb instead - SRetention retentions[TSDB_RETENTION_MAX]; + SRetentionEx retentions[TSDB_RETENTION_MAX]; }; typedef struct { diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index 633e096314..cea4ccb1b7 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -16,13 +16,13 @@ #include "sma.h" #include "tsdb.h" -static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration); +static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration); static int32_t smaSetKeepCfg(SVnode *pVnode, STsdbKeepCfg *pKeepCfg, STsdbCfg *pCfg, int type); static int32_t rsmaRestore(SSma *pSma); #define SMA_SET_KEEP_CFG(v, l) \ do { \ - SRetention *r = &pCfg->retentions[l]; \ + SRetention *r = &(pCfg->retentions[l].rtn); \ pKeepCfg->keep2 = convertTimeFromPrecisionToUnit(r->keep, pCfg->precision, TIME_UNIT_MINUTE); \ pKeepCfg->keep0 = pKeepCfg->keep2; \ pKeepCfg->keep1 = pKeepCfg->keep2; \ @@ -32,7 +32,7 @@ static int32_t rsmaRestore(SSma *pSma); #define SMA_OPEN_RSMA_IMPL(v, l, force) \ do { \ - SRetention *r = (SRetention *)VND_RETENTIONS(v) + l; \ + SRetention *r = &(((SRetentionEx *)VND_RETENTIONS(v) + l)->rtn); \ if (!RETENTION_VALID(l, r)) { \ if (l == 0) { \ code = TSDB_CODE_INVALID_PARA; \ @@ -59,9 +59,9 @@ static int32_t rsmaRestore(SSma *pSma); * @param duration * @return int32_t */ -static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration) { - int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->freq, precision, TIME_UNIT_MINUTE); - int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->keep, precision, TIME_UNIT_MINUTE); +static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration) { + int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.freq, precision, TIME_UNIT_MINUTE); + int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.keep, precision, TIME_UNIT_MINUTE); int32_t days = duration; // min if (days < freqDuration) { @@ -76,10 +76,10 @@ static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t p goto _exit; } - freqDuration = convertTimeFromPrecisionToUnit((r + level)->freq, precision, TIME_UNIT_MINUTE); - keepDuration = convertTimeFromPrecisionToUnit((r + level)->keep, precision, TIME_UNIT_MINUTE); + freqDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.freq, precision, TIME_UNIT_MINUTE); + keepDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.keep, precision, TIME_UNIT_MINUTE); - int32_t nFreqTimes = (r + level)->freq / (60 * 1000); // use 60s for freq of 1st level + int32_t nFreqTimes = (r + level)->rtn.freq / (60 * 1000); // use 60s for freq of 1st level days *= (nFreqTimes > 1 ? nFreqTimes : 1); if (days < freqDuration) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 980b23986e..1b13f37141 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -266,7 +266,9 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } pStreamTask->id.taskId = 0; pStreamTask->id.streamId = pRSmaInfo->suid + idx; + pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; + pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { @@ -1096,16 +1098,18 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode), pRSmaInfo->suid, i + 1); } -#endif - if (pItem && pItem->pStreamState && pItem->pStreamTask) { +#else + if (pItem) { SStreamTask *pTask = pItem->pStreamTask; atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); // adaption for API streamTaskBuildCheckpoint pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); + (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; smaInfo("vgId:%d, rsma persist, build stream checkpoint success, table:%" PRIi64 ", level:%d, id:%" PRIi64, TD_VID(pVnode), pRSmaInfo->suid, i + 1, pTask->checkpointingId); } +#endif } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index d1919d95ba..be88a5a435 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -49,7 +49,7 @@ static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo STsdbReader* pReader); static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); @@ -3140,7 +3140,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } } -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idStr, int8_t* pLevel) { if (VND_IS_RSMA(pVnode)) { int8_t level = 0; @@ -3151,7 +3151,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret : 1000000L); for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - SRetention* pRetention = retentions + level; + SRetention* pRetention = &((retentions + level)->rtn); if (pRetention->keep <= 0) { if (level > 0) { --level; diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index 07bfa6c719..d429eb2a94 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -106,23 +106,24 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { if (tjsonAddIntegerToObject(pJson, "keep1", pCfg->tsdbCfg.keep1) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keep2", pCfg->tsdbCfg.keep2) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keepTimeOffset", pCfg->tsdbCfg.keepTimeOffset) < 0) return -1; - if (pCfg->tsdbCfg.retentions[0].keep > 0) { + if (pCfg->tsdbCfg.retentions[0].rtn.keep > 0) { int32_t nRetention = 1; - if (pCfg->tsdbCfg.retentions[1].freq > 0) { + if (pCfg->tsdbCfg.retentions[1].rtn.freq > 0) { ++nRetention; - if (pCfg->tsdbCfg.retentions[2].freq > 0) { + if (pCfg->tsdbCfg.retentions[2].rtn.freq > 0) { ++nRetention; } } SJson *pNodeRetentions = tjsonCreateArray(); tjsonAddItemToObject(pJson, "retentions", pNodeRetentions); for (int32_t i = 0; i < nRetention; ++i) { - SJson *pNodeRetention = tjsonCreateObject(); - const SRetention *pRetention = pCfg->tsdbCfg.retentions + i; - tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->freq); - tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->freqUnit); - tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->keep); - tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->keepUnit); + SJson *pNodeRetention = tjsonCreateObject(); + const SRetentionEx *pRetention = pCfg->tsdbCfg.retentions + i; + tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->rtn.freq); + tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit); + tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->rtn.keep); + tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit); + tjsonAddIntegerToObject(pNodeRetention, "checkpointId", pRetention->checkpointId); tjsonAddItemToArray(pNodeRetentions, pNodeRetention); } } @@ -231,10 +232,12 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { for (int32_t i = 0; i < nRetention; ++i) { SJson *pNodeRetention = tjsonGetArrayItem(pNodeRetentions, i); ASSERT(pNodeRetention != NULL); - tjsonGetNumberValue(pNodeRetention, "freq", (pCfg->tsdbCfg.retentions)[i].freq, code); - tjsonGetNumberValue(pNodeRetention, "freqUnit", (pCfg->tsdbCfg.retentions)[i].freqUnit, code); - tjsonGetNumberValue(pNodeRetention, "keep", (pCfg->tsdbCfg.retentions)[i].keep, code); - tjsonGetNumberValue(pNodeRetention, "keepUnit", (pCfg->tsdbCfg.retentions)[i].keepUnit, code); + SRetentionEx *pRetention = &(pCfg->tsdbCfg.retentions[i]); + tjsonGetNumberValue(pNodeRetention, "freq", pRetention->rtn.freq, code); + tjsonGetNumberValue(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit, code); + tjsonGetNumberValue(pNodeRetention, "keep", pRetention->rtn.keep, code); + tjsonGetNumberValue(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit, code); + tjsonGetNumberValue(pNodeRetention, "checkpointId", pRetention->checkpointId, code); } tjsonGetNumberValue(pJson, "wal.vgId", pCfg->walCfg.vgId, code); if (code < 0) return -1; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 50ca2f5d03..ca4335f391 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -290,6 +290,9 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { tsem_wait(&pVnode->canCommit); if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; + + code = smaPrepareAsyncCommit(pVnode->pSma); + if (code) goto _exit; pVnode->state.commitTerm = pVnode->state.applyTerm; @@ -313,9 +316,6 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { metaPrepareAsyncCommit(pVnode->pMeta); - code = smaPrepareAsyncCommit(pVnode->pSma); - if (code) goto _exit; - taosThreadMutexLock(&pVnode->mutex); ASSERT(pVnode->onCommit == NULL); pVnode->onCommit = pVnode->inUse; diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index 6f78829db7..c70f2dc20a 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect #todo wait for streamState checkpoint -return 1 +#return 1 print =============== create database with retentions sql create database d0 retentions -:7d,5m:21d,15m:365d; From 59be62c96eb182873317ee2ef8319595d499c565 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 15:34:17 +0800 Subject: [PATCH 04/56] chore: set stream input checkpoint --- include/common/tmsg.h | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 43 +++++++++++++------------- source/libs/executor/src/executor.c | 9 ++++++ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index aa39a9da30..323772af95 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -453,7 +453,7 @@ typedef struct SRetention { typedef struct SRetentionEx { SRetention rtn; - int64_t checkpointId; + int64_t checkpointId; } SRetentionEx; #define RETENTION_VALID(l, r) ((((l) == 0 && (r)->freq >= 0) || ((r)->freq > 0)) && ((r)->keep > 0)) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 1b13f37141..7c7f9fad25 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -259,7 +259,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosMemoryFree(s); } - SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(*pStreamTask)); if (!pStreamTask) { terrno = TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_FAILED; @@ -269,7 +269,6 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; - pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -286,6 +285,14 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } + + if (pStreamTask->chkInfo.checkpointId != -1) { + SSDataBlock dataBlock = {.info.type = STREAM_CHECKPOINT}; + if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], &dataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { + return TSDB_CODE_FAILED; + } + } + SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot pItem->pStreamState = pStreamState; @@ -308,10 +315,10 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, item:%p table:%" PRIi64 " level:%" PRIi8 " maxdelay:%" PRIi64 " watermark:%" PRIi64 - ", finally maxdelay:%" PRIi32, - TD_VID(pVnode), pItem, pRSmaInfo->suid, (int8_t)(idx + 1), param->maxdelay[idx], param->watermark[idx], - pItem->maxDelay); + smaInfo("vgId:%d, open task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 + " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, + TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId, + param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); } return TSDB_CODE_SUCCESS; } @@ -1089,27 +1096,21 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); -#if 0 - if (pItem && pItem->pStreamState) { - if (streamStateCommit(pItem->pStreamState) < 0) { - code = TSDB_CODE_RSMA_STREAM_STATE_COMMIT; - TSDB_CHECK_CODE(code, lino, _exit); - } - smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode), - pRSmaInfo->suid, i + 1); - } -#else - if (pItem) { + if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); // adaption for API streamTaskBuildCheckpoint + // adaption for API streamTaskBuildCheckpoint + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); + + // save checkpointId to vnode.json (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; - smaInfo("vgId:%d, rsma persist, build stream checkpoint success, table:%" PRIi64 ", level:%d, id:%" PRIi64, - TD_VID(pVnode), pRSmaInfo->suid, i + 1, pTask->checkpointingId); + + smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 + ", level:%d, checkpointId:%" PRIi64, + TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); } -#endif } } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 60dc6f0185..2eac04db88 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -75,6 +75,15 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; + } else if (type == STREAM_INPUT__CHECKPOINT) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; + SPackedData tmp = { + .pDataBlock = pDataBlock, + }; + taosArrayPush(pInfo->pBlockLists, &tmp); + } + pInfo->blockType = STREAM_INPUT__CHECKPOINT; } return TSDB_CODE_SUCCESS; From c32e60d199fa99cf97398dfa508975f24a09b294 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 16:27:41 +0800 Subject: [PATCH 05/56] chore: more code for rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 5 +++-- source/libs/executor/src/executor.c | 8 ++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 5dd7df0962..48e9aed6c2 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -159,6 +159,7 @@ struct SRSmaInfo { void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t STaosQueue *queue; // buffer queue of SubmitReq STaosQall *qall; // buffer qall of SubmitReq + SSDataBlock dataBlock; }; #define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 7c7f9fad25..303c222ae5 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -287,8 +287,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } if (pStreamTask->chkInfo.checkpointId != -1) { - SSDataBlock dataBlock = {.info.type = STREAM_CHECKPOINT}; - if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], &dataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { + SSDataBlock *pDataBlock = &pRSmaInfo->dataBlock; + if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], pDataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { return TSDB_CODE_FAILED; } } @@ -370,6 +370,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con pRSmaInfo->pSma = pSma; pRSmaInfo->pTSchema = pTSchema; pRSmaInfo->suid = suid; + pRSmaInfo->dataBlock.info.type = STREAM_CHECKPOINT; T_REF_INIT_VAL(pRSmaInfo, 1); if (!(pRSmaInfo->queue = taosOpenQueue()) || !(pRSmaInfo->qall = taosAllocateQall()) || diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 2eac04db88..b46ae9e1c0 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -58,17 +58,20 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf SStreamScanInfo* pInfo = pOperator->info; if (type == STREAM_INPUT__MERGED_SUBMIT) { + qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); for (int32_t i = 0; i < numOfBlocks; i++) { SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData)); taosArrayPush(pInfo->pBlockLists, pReq); } pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_SUBMIT) { + qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); taosArrayPush(pInfo->pBlockLists, &input); pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; + qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); SPackedData tmp = { .pDataBlock = pDataBlock, }; @@ -78,8 +81,9 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf } else if (type == STREAM_INPUT__CHECKPOINT) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - SPackedData tmp = { - .pDataBlock = pDataBlock, + qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); + SPackedData tmp = { + .pDataBlock = pDataBlock, }; taosArrayPush(pInfo->pBlockLists, &tmp); } From 3803f952f9d763f8b27b0e706fe39f9ad3306ebf Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 1 Nov 2023 20:05:00 +0800 Subject: [PATCH 06/56] chore: rsma checkpoint verify --- source/dnode/vnode/src/inc/sma.h | 5 +- source/dnode/vnode/src/sma/smaCommit.c | 2 + source/dnode/vnode/src/sma/smaEnv.c | 1 + source/dnode/vnode/src/sma/smaRollup.c | 181 +++++++++++++++++++----- source/libs/executor/src/executor.c | 2 +- source/libs/executor/src/scanoperator.c | 7 +- 6 files changed, 156 insertions(+), 42 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 48e9aed6c2..a8807483f6 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -115,6 +115,7 @@ struct SRSmaStat { SRSmaFS fs; // for recovery/snapshot r/w SHashObj *infoHash; // key: suid, value: SRSmaInfo tsem_t notEmpty; // has items in queue buffer + SSDataBlock dataBlock; }; struct SSmaStat { @@ -140,7 +141,8 @@ struct SRSmaInfoItem { int8_t fetchLevel : 4; int8_t triggerStat; uint16_t nScanned; - int32_t maxDelay; // ms + int32_t streamFlushed : 1; + int32_t maxDelay : 31; // ms tmr_h tmrId; void *pStreamState; void *pStreamTask; // SStreamTask @@ -159,7 +161,6 @@ struct SRSmaInfo { void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t STaosQueue *queue; // buffer queue of SubmitReq STaosQall *qall; // buffer qall of SubmitReq - SSDataBlock dataBlock; }; #define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 652aab3c01..5a6144b3fa 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -178,6 +178,8 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; + + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 04a254fc7a..94ecb46473 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -209,6 +209,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS pRSmaStat->pSma = (SSma *)pSma; atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT); tsem_init(&pRSmaStat->notEmpty, 0, 0); + pRSmaStat->dataBlock.info.type = STREAM_CHECKPOINT; // init smaMgmt smaInit(); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 303c222ae5..88bdc2df1d 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -78,32 +78,26 @@ static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t l */ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { if (pInfo) { - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - SRSmaInfoItem *pItem = &pInfo->items[i]; + if (isDeepFree) { + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + SRSmaInfoItem *pItem = &pInfo->items[i]; - if (isDeepFree && pItem->tmrId) { - smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, - pInfo->suid, i + 1); - taosTmrStopA(&pItem->tmrId); - } + if (pItem->tmrId) { + smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, + pInfo->suid, i + 1); + taosTmrStopA(&pItem->tmrId); + } - if (isDeepFree && pItem->pStreamState) { - streamStateClose(pItem->pStreamState, false); - } + if (pItem->pStreamState) { + streamStateClose(pItem->pStreamState, false); + } - if(isDeepFree && pItem->pStreamTask) { taosMemoryFreeClear(pItem->pStreamTask); - } - - if (isDeepFree && pInfo->taskInfo[i]) { tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } - } - if (isDeepFree) { - taosMemoryFreeClear(pInfo->pTSchema); - } - if (isDeepFree) { + taosMemoryFreeClear(pInfo->pTSchema); + if (pInfo->queue) { taosCloseQueue(pInfo->queue); pInfo->queue = NULL; @@ -286,13 +280,6 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } - if (pStreamTask->chkInfo.checkpointId != -1) { - SSDataBlock *pDataBlock = &pRSmaInfo->dataBlock; - if ((terrno = qSetSMAInput(pRSmaInfo->taskInfo[idx], pDataBlock, 1, STREAM_INPUT__CHECKPOINT)) < 0) { - return TSDB_CODE_FAILED; - } - } - SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot pItem->pStreamState = pStreamState; @@ -370,7 +357,6 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con pRSmaInfo->pSma = pSma; pRSmaInfo->pTSchema = pTSchema; pRSmaInfo->suid = suid; - pRSmaInfo->dataBlock.info.type = STREAM_CHECKPOINT; T_REF_INIT_VAL(pRSmaInfo, 1); if (!(pRSmaInfo->queue = taosOpenQueue()) || !(pRSmaInfo->qall = taosAllocateQall()) || @@ -1075,22 +1061,145 @@ _err: return code; } -#if 1 + +static int32_t tdRSmaExecVerifyCheckPoint(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, + int64_t suid, SArray **ppResList, int8_t *streamFlushed) { + int32_t code = 0; + int32_t lino = 0; + SSDataBlock *output = NULL; + SArray *pResList = NULL; + + if (!(*ppResList)) { + pResList = taosArrayInit(1, POINTER_BYTES); + if (pResList == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + *ppResList = pResList; + } else { + pResList = *ppResList; + } + + while (1) { + uint64_t ts; + bool hasMore = false; + code = qExecTaskOpt(taskInfo, pResList, &ts, &hasMore, NULL); + if (code == TSDB_CODE_QRY_IN_EXEC) { + code = 0; + break; + } + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pResList) == 0) { + break; + } +#if 0 + char flag[10] = {0}; + snprintf(flag, 10, "level %" PRIi8, pItem->level); + blockDebugShowDataBlocks(pResList, flag); +#endif + for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { + output = taosArrayGetP(pResList, i); + if(output->info.type == STREAM_CHECKPOINT) { + if (streamFlushed) *streamFlushed = 1; + continue; + } + smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), + output->info.id.uid, output->info.id.groupId, output->info.rows); + + STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); + SSubmitReq2 *pReq = NULL; + + // TODO: the schema update should be handled later(TD-17965) + if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { + code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { + code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + taosMemoryFree(pReq); + TSDB_CHECK_CODE(code, lino, _exit); + } + + smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, + SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); + + if (pReq) { + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + taosMemoryFree(pReq); + } + } + } +_exit: + if (code) { + smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 + ", ver:%" PRIi64, + SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, + output ? output->info.version : -1); + } else { + smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level); + } + taosArrayDestroy(pResList); + qCleanExecTaskBlockBuf(taskInfo); + return code; +} + int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { - int32_t code = 0; - int32_t lino = 0; - SSma *pSma = pRSmaStat->pSma; - SVnode *pVnode = pSma->pVnode; - SRSmaFS fs = {0}; + int32_t code = 0; + int32_t lino = 0; + int32_t nTaskInfo = 0; + SSma *pSma = pRSmaStat->pSma; + SVnode *pVnode = pSma->pVnode; + SSDataBlock *pDataBlock = &pRSmaStat->dataBlock; + SArray *pResList = NULL; + SRSmaFS fs = {0}; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; } void *infoHash = NULL; + // stream state: trigger checkpoint while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i]) { + code = qSetSMAInput(pRSmaInfo->taskInfo[i], pDataBlock, 1, STREAM_INPUT__CHECKPOINT); + TSDB_CHECK_CODE(code, lino, _exit); + pRSmaInfo->items[i].streamFlushed = 0; + ++nTaskInfo; + } + } + } + // stream state: process checkpoint response in async mode + int32_t nStreamFlushed = 0; + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { + int8_t streamFlushed = 0; + code = tdRSmaExecVerifyCheckPoint(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, + pRSmaInfo->suid, &pResList, &streamFlushed); + TSDB_CHECK_CODE(code, lino, _exit); + if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { + goto _checkpoint; + } + } + } + } + // stream state: build checkpoint in backend +_checkpoint: + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { continue; } @@ -1100,11 +1209,11 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; // adaption for API streamTaskBuildCheckpoint - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); - + // save checkpointId to vnode.json (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; @@ -1123,7 +1232,7 @@ _exit: terrno = code; return code; } -#endif + /** * @brief trigger to get rsma result in async mode * diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index b46ae9e1c0..c08a2d38f9 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -646,7 +646,7 @@ int32_t qExecTaskOpt(qTaskInfo_t tinfo, SArray* pResList, uint64_t* useconds, bo blockIndex += 1; current += p->info.rows; - ASSERT(p->info.rows > 0); + ASSERT(p->info.rows > 0 || p->info.type == STREAM_CHECKPOINT); taosArrayPush(pResList, &p); if (current >= rowsThreshold) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index efbc978323..b7071d3f52 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2331,11 +2331,12 @@ FETCH_NEXT_BLOCK: return NULL; } - int32_t current = pInfo->validBlockIndex++; - qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id); + int32_t current = pInfo->validBlockIndex++; + qDebug("process %d/%d input data blocks, %s", current, (int32_t)total, id); SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); - SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); + // SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); + SSDataBlock* pBlock = pData->pDataBlock; if (pBlock->info.type == STREAM_CHECKPOINT) { streamScanOperatorSaveCheckpoint(pInfo); From bacf771ada1544cc109bd7657a086a77815fb27e Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 08:55:01 +0800 Subject: [PATCH 07/56] chore: test case for rsma persist --- tests/script/tsim/sma/rsmaPersistenceRecovery.sim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index c70f2dc20a..6f78829db7 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect #todo wait for streamState checkpoint -#return 1 +return 1 print =============== create database with retentions sql create database d0 retentions -:7d,5m:21d,15m:365d; From 9649e87cabf416c2a151178607116351886e65ae Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 09:44:43 +0800 Subject: [PATCH 08/56] fix: rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/sma/smaCommit.c | 2 -- source/dnode/vnode/src/sma/smaEnv.c | 8 +++++++- source/dnode/vnode/src/sma/smaRollup.c | 3 +-- source/libs/executor/src/executor.c | 17 +++-------------- source/libs/executor/src/scanoperator.c | 3 +-- 6 files changed, 13 insertions(+), 22 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index a8807483f6..bce5e1b0b2 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -115,7 +115,7 @@ struct SRSmaStat { SRSmaFS fs; // for recovery/snapshot r/w SHashObj *infoHash; // key: suid, value: SRSmaInfo tsem_t notEmpty; // has items in queue buffer - SSDataBlock dataBlock; + SArray *blocks; // SArray }; struct SSmaStat { diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 5a6144b3fa..652aab3c01 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -178,8 +178,6 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; - - code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 94ecb46473..d47398bdff 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -209,7 +209,12 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS pRSmaStat->pSma = (SSma *)pSma; atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT); tsem_init(&pRSmaStat->notEmpty, 0, 0); - pRSmaStat->dataBlock.info.type = STREAM_CHECKPOINT; + if (!(pRSmaStat->blocks = taosArrayInit(1, sizeof(SSDataBlock)))) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + SSDataBlock datablock = {.info.type = STREAM_CHECKPOINT}; + taosArrayPush(pRSmaStat->blocks, &datablock); // init smaMgmt smaInit(); @@ -291,6 +296,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { // step 5: free pStat tsem_destroy(&(pStat->notEmpty)); + taosArrayDestroy(pStat->blocks); taosMemoryFreeClear(pStat); } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 88bdc2df1d..8e00297564 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1152,7 +1152,6 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t nTaskInfo = 0; SSma *pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; - SSDataBlock *pDataBlock = &pRSmaStat->dataBlock; SArray *pResList = NULL; SRSmaFS fs = {0}; @@ -1169,7 +1168,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - code = qSetSMAInput(pRSmaInfo->taskInfo[i], pDataBlock, 1, STREAM_INPUT__CHECKPOINT); + code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT); TSDB_CHECK_CODE(code, lino, _exit); pRSmaInfo->items[i].streamFlushed = 0; ++nTaskInfo; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index c08a2d38f9..8117ceb55c 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -58,35 +58,24 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf SStreamScanInfo* pInfo = pOperator->info; if (type == STREAM_INPUT__MERGED_SUBMIT) { - qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); for (int32_t i = 0; i < numOfBlocks; i++) { SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData)); taosArrayPush(pInfo->pBlockLists, pReq); } pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_SUBMIT) { - qInfo("%s:%d type:%d, pDataBlock->info.type(N/A)", __func__, __LINE__, type); taosArrayPush(pInfo->pBlockLists, &input); pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); - SPackedData tmp = { - .pDataBlock = pDataBlock, - }; + SPackedData tmp = {.pDataBlock = pDataBlock}; taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; } else if (type == STREAM_INPUT__CHECKPOINT) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - qInfo("%s:%d type:%d, pDataBlock->info.type:%d", __func__, __LINE__, type, pDataBlock->info.type); - SPackedData tmp = { - .pDataBlock = pDataBlock, - }; - taosArrayPush(pInfo->pBlockLists, &tmp); - } + SPackedData tmp = {.pDataBlock = input}; + taosArrayPush(pInfo->pBlockLists, &tmp); pInfo->blockType = STREAM_INPUT__CHECKPOINT; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b7071d3f52..247dde7fc3 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2335,8 +2335,7 @@ FETCH_NEXT_BLOCK: qDebug("process %d/%d input data blocks, %s", current, (int32_t)total, id); SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); - // SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); - SSDataBlock* pBlock = pData->pDataBlock; + SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); if (pBlock->info.type == STREAM_CHECKPOINT) { streamScanOperatorSaveCheckpoint(pInfo); From 76536e1c8290e8070103c29741cdf3a595b3114a Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 11:03:18 +0800 Subject: [PATCH 09/56] enh: rsma logic --- source/dnode/vnode/src/sma/smaRollup.c | 155 ++++++++----------------- source/libs/executor/src/executil.c | 10 ++ 2 files changed, 58 insertions(+), 107 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8e00297564..d2694c860d 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -44,7 +44,7 @@ static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid); + int64_t suid, SArray **ppResList, int8_t *streamFlushed); static void tdRSmaFetchTrigger(void *param, void *tmrId); static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); @@ -591,17 +591,25 @@ _end: } static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid) { + int64_t suid, SArray **ppResList, int8_t *streamFlushed) { int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; + SArray *pResList = NULL; - SArray *pResList = taosArrayInit(1, POINTER_BYTES); - if (pResList == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); + if (!(*ppResList)) { + pResList = taosArrayInit(1, POINTER_BYTES); + if (pResList == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + *ppResList = pResList; + } else { + pResList = *ppResList; } + taosArrayClear(pResList); + while (1) { uint64_t ts; bool hasMore = false; @@ -622,6 +630,10 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma #endif for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { output = taosArrayGetP(pResList, i); + if(output->info.type == STREAM_CHECKPOINT) { + if (streamFlushed) *streamFlushed = 1; + continue; + } smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), output->info.id.uid, output->info.id.groupId, output->info.rows); @@ -659,7 +671,6 @@ _exit: } else { smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level); } - taosArrayDestroy(pResList); qCleanExecTaskBlockBuf(taskInfo); return code; } @@ -756,6 +767,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, ERsmaExecType type, int8_t level) { int32_t idx = level - 1; void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); + SArray *pResList = NULL; if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, @@ -784,8 +796,9 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, } SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid); + tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL); + taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; } @@ -1062,90 +1075,6 @@ _err: return code; } -static int32_t tdRSmaExecVerifyCheckPoint(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, SArray **ppResList, int8_t *streamFlushed) { - int32_t code = 0; - int32_t lino = 0; - SSDataBlock *output = NULL; - SArray *pResList = NULL; - - if (!(*ppResList)) { - pResList = taosArrayInit(1, POINTER_BYTES); - if (pResList == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - *ppResList = pResList; - } else { - pResList = *ppResList; - } - - while (1) { - uint64_t ts; - bool hasMore = false; - code = qExecTaskOpt(taskInfo, pResList, &ts, &hasMore, NULL); - if (code == TSDB_CODE_QRY_IN_EXEC) { - code = 0; - break; - } - TSDB_CHECK_CODE(code, lino, _exit); - - if (taosArrayGetSize(pResList) == 0) { - break; - } -#if 0 - char flag[10] = {0}; - snprintf(flag, 10, "level %" PRIi8, pItem->level); - blockDebugShowDataBlocks(pResList, flag); -#endif - for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { - output = taosArrayGetP(pResList, i); - if(output->info.type == STREAM_CHECKPOINT) { - if (streamFlushed) *streamFlushed = 1; - continue; - } - smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), - output->info.id.uid, output->info.id.groupId, output->info.rows); - - STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); - SSubmitReq2 *pReq = NULL; - - // TODO: the schema update should be handled later(TD-17965) - if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { - code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { - code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; - tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); - taosMemoryFree(pReq); - TSDB_CHECK_CODE(code, lino, _exit); - } - - smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, - SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); - - if (pReq) { - tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); - taosMemoryFree(pReq); - } - } - } -_exit: - if (code) { - smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 - ", ver:%" PRIi64, - SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, - output ? output->info.version : -1); - } else { - smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level); - } - taosArrayDestroy(pResList); - qCleanExecTaskBlockBuf(taskInfo); - return code; -} - int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t code = 0; int32_t lino = 0; @@ -1177,22 +1106,31 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } // stream state: process checkpoint response in async mode int32_t nStreamFlushed = 0; - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - continue; - } - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { - int8_t streamFlushed = 0; - code = tdRSmaExecVerifyCheckPoint(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, - pRSmaInfo->suid, &pResList, &streamFlushed); - TSDB_CHECK_CODE(code, lino, _exit); - if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - goto _checkpoint; + int32_t nMSleep = 0; + while (true) { + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { + int8_t streamFlushed = 0; + code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, + pRSmaInfo->suid, &pResList, &streamFlushed); + TSDB_CHECK_CODE(code, lino, _exit); + if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { + smaInfo("%s:%d checkpoint ready, %d ms consumed, received/total: %d/%d", __func__, __LINE__, nMSleep, + nStreamFlushed, nTaskInfo); + goto _checkpoint; + } } } } + taosMsleep(1); + ++nMSleep; + smaInfo("%s:%d wait for checkpoint ready, %d ms elapsed, received/total: %d/%d", __func__, __LINE__, nMSleep, + nStreamFlushed, nTaskInfo); } // stream state: build checkpoint in backend @@ -1207,7 +1145,6 @@ _checkpoint: SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; - // adaption for API streamTaskBuildCheckpoint atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); code = streamTaskBuildCheckpoint(pTask); @@ -1224,6 +1161,7 @@ _checkpoint: } _exit: + taosArrayDestroy(pResList); if (code) { smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); } @@ -1355,6 +1293,7 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { */ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; + SArray *pResList = NULL; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); if (pItem->fetchLevel) { @@ -1385,7 +1324,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid) < 0) { + if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL) < 0) { goto _err; } @@ -1399,8 +1338,10 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } _end: + taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; _err: + taosArrayDestroy(pResList); return TSDB_CODE_FAILED; } diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 753d3e680c..a1bd5a7483 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1967,9 +1967,19 @@ int32_t tableListAddTableInfo(STableListInfo* pTableList, uint64_t uid, uint64_t pTableList->map = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); } + for(int32_t i=0; i< taosArrayGetSize(pTableList->pTableList); ++i) { + STableKeyInfo* pKeyInfo = taosArrayGet(pTableList->pTableList, i); + if(pKeyInfo->uid == uid) { + assert(0); + } + } + STableKeyInfo keyInfo = {.uid = uid, .groupId = gid}; taosArrayPush(pTableList->pTableList, &keyInfo); + if(taosHashGet(pTableList->map, &uid, sizeof(uid))) { + assert(0); + } int32_t slot = (int32_t)taosArrayGetSize(pTableList->pTableList) - 1; taosHashPut(pTableList->map, &uid, sizeof(uid), &slot, sizeof(slot)); From 698fb804f9e0b377fc7175a1099838571fa88307 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 12:54:23 +0800 Subject: [PATCH 10/56] enh: disable update tbUidList during reboot --- source/dnode/vnode/src/sma/smaRollup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index d2694c860d..89db465b2e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1012,7 +1012,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } - +#if 0 // reload all ctbUids for suid uidStore.suid = suid; if (vnodeGetCtbIdList(pVnode, suid, uidStore.tbUids) < 0) { @@ -1026,7 +1026,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { } taosArrayClear(uidStore.tbUids); - +#endif smaDebug("vgId:%d, rsma restore env success for %" PRIi64, TD_VID(pVnode), suid); } } From 96b50243473c87f4adcaab071a58eead93b0aa24 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 13:05:36 +0800 Subject: [PATCH 11/56] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 1 + tests/script/tsim/sma/rsmaPersistenceRecovery.sim | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 89db465b2e..4619086ccd 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1147,6 +1147,7 @@ _checkpoint: SStreamTask *pTask = pItem->pStreamTask; atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); pTask->checkpointingId = taosGetTimestampNs(); + pTask->chkInfo.checkpointId = pTask->checkpointingId; code = streamTaskBuildCheckpoint(pTask); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index 6f78829db7..c70f2dc20a 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect #todo wait for streamState checkpoint -return 1 +#return 1 print =============== create database with retentions sql create database d0 retentions -:7d,5m:21d,15m:365d; From 722777f8c963ab4a89b95c937348b9713d742e9e Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 13:15:31 +0800 Subject: [PATCH 12/56] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 4619086ccd..986d0da677 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1106,7 +1106,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } // stream state: process checkpoint response in async mode int32_t nStreamFlushed = 0; - int32_t nMSleep = 0; + int32_t nSleep = 0; while (true) { while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; @@ -1120,16 +1120,16 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { pRSmaInfo->suid, &pResList, &streamFlushed); TSDB_CHECK_CODE(code, lino, _exit); if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("%s:%d checkpoint ready, %d ms consumed, received/total: %d/%d", __func__, __LINE__, nMSleep, + smaInfo("%s:%d checkpoint ready, %d us consumed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, nStreamFlushed, nTaskInfo); goto _checkpoint; } } } } - taosMsleep(1); - ++nMSleep; - smaInfo("%s:%d wait for checkpoint ready, %d ms elapsed, received/total: %d/%d", __func__, __LINE__, nMSleep, + taosUsleep(10); + ++nSleep; + smaInfo("%s:%d wait for checkpoint ready, %d us elapsed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, nStreamFlushed, nTaskInfo); } From 2d597659bc2e4eaa55e4ba6a8bd3f7aaaff61af3 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 15:20:54 +0800 Subject: [PATCH 13/56] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 69 ++++++++++++------------ source/dnode/vnode/src/vnd/vnodeCommit.c | 4 +- source/libs/executor/src/executil.c | 10 ---- source/libs/executor/src/scanoperator.c | 4 +- 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 986d0da677..f35eec786c 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -630,7 +630,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma #endif for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { output = taosArrayGetP(pResList, i); - if(output->info.type == STREAM_CHECKPOINT) { + if (output->info.type == STREAM_CHECKPOINT) { if (streamFlushed) *streamFlushed = 1; continue; } @@ -1076,18 +1076,17 @@ _err: } int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { - int32_t code = 0; - int32_t lino = 0; - int32_t nTaskInfo = 0; - SSma *pSma = pRSmaStat->pSma; - SVnode *pVnode = pSma->pVnode; - SArray *pResList = NULL; - SRSmaFS fs = {0}; + int32_t code = 0; + int32_t lino = 0; + int32_t nTaskInfo = 0; + SSma *pSma = pRSmaStat->pSma; + SVnode *pVnode = pSma->pVnode; + SArray *pResList = NULL; + SRSmaFS fs = {0}; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; } - void *infoHash = NULL; // stream state: trigger checkpoint while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { @@ -1120,7 +1119,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { pRSmaInfo->suid, &pResList, &streamFlushed); TSDB_CHECK_CODE(code, lino, _exit); if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("%s:%d checkpoint ready, %d us consumed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, + smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, nStreamFlushed, nTaskInfo); goto _checkpoint; } @@ -1129,38 +1128,40 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { } taosUsleep(10); ++nSleep; - smaInfo("%s:%d wait for checkpoint ready, %d us elapsed, received/total: %d/%d", __func__, __LINE__, nSleep * 10, - nStreamFlushed, nTaskInfo); + smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, + nStreamFlushed, nTaskInfo); } - // stream state: build checkpoint in backend + _checkpoint: - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - continue; - } + do { + void *infHash = NULL; + while ((infHash = taosHashIterate(pInfoHash, infHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); - if (pItem && pItem->pStreamTask) { - SStreamTask *pTask = pItem->pStreamTask; - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->checkpointingId = taosGetTimestampNs(); - pTask->chkInfo.checkpointId = pTask->checkpointingId; - code = streamTaskBuildCheckpoint(pTask); - TSDB_CHECK_CODE(code, lino, _exit); + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); + if (pItem && pItem->pStreamTask) { + SStreamTask *pTask = pItem->pStreamTask; + atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); + pTask->checkpointingId = taosGetTimestampNs(); + pTask->chkInfo.checkpointId = pTask->checkpointingId; + code = streamTaskBuildCheckpoint(pTask); + TSDB_CHECK_CODE(code, lino, _exit); - // save checkpointId to vnode.json - (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; + // save checkpointId to vnode.json + (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; - smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 - ", level:%d, checkpointId:%" PRIi64, - TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); + smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 + ", level:%d, checkpointId:%" PRIi64, + TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); + } } } - } - + } while (0); _exit: taosArrayDestroy(pResList); if (code) { diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index ca4335f391..9e0106dff4 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -290,8 +290,8 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { tsem_wait(&pVnode->canCommit); if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; - - code = smaPrepareAsyncCommit(pVnode->pSma); + + code = smaPrepareAsyncCommit(pVnode->pSma); // prepare checkpointId and save to vnode.json if (code) goto _exit; pVnode->state.commitTerm = pVnode->state.applyTerm; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index a1bd5a7483..753d3e680c 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1967,19 +1967,9 @@ int32_t tableListAddTableInfo(STableListInfo* pTableList, uint64_t uid, uint64_t pTableList->map = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); } - for(int32_t i=0; i< taosArrayGetSize(pTableList->pTableList); ++i) { - STableKeyInfo* pKeyInfo = taosArrayGet(pTableList->pTableList, i); - if(pKeyInfo->uid == uid) { - assert(0); - } - } - STableKeyInfo keyInfo = {.uid = uid, .groupId = gid}; taosArrayPush(pTableList->pTableList, &keyInfo); - if(taosHashGet(pTableList->map, &uid, sizeof(uid))) { - assert(0); - } int32_t slot = (int32_t)taosArrayGetSize(pTableList->pTableList) - 1; taosHashPut(pTableList->map, &uid, sizeof(uid), &slot, sizeof(slot)); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 247dde7fc3..efbc978323 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2331,8 +2331,8 @@ FETCH_NEXT_BLOCK: return NULL; } - int32_t current = pInfo->validBlockIndex++; - qDebug("process %d/%d input data blocks, %s", current, (int32_t)total, id); + int32_t current = pInfo->validBlockIndex++; + qDebug("process %d/%d input data blocks, %s", current, (int32_t) total, id); SPackedData* pData = taosArrayGet(pInfo->pBlockLists, current); SSDataBlock* pBlock = taosArrayGet(pData->pDataBlock, 0); From fa5d89678790c5bc229b55fa8a3fef1b280db7e7 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 18:56:36 +0800 Subject: [PATCH 14/56] enh: rsma checkpoint --- source/dnode/vnode/src/sma/smaRollup.c | 151 +++++++++++++++---------- 1 file changed, 93 insertions(+), 58 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index f35eec786c..8882dada9a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -92,7 +92,9 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { streamStateClose(pItem->pStreamState, false); } - taosMemoryFreeClear(pItem->pStreamTask); + if (pItem->pStreamTask) { + tFreeStreamTask(pItem->pStreamTask); + } tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); } @@ -173,8 +175,8 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { return TSDB_CODE_FAILED; } - void *pIter = taosHashIterate(pStore->uidHash, NULL); - while (pIter) { + void *pIter = NULL; + while ((pIter = taosHashIterate(pStore->uidHash, pIter))) { tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); SArray *pTbUids = *(SArray **)pIter; @@ -182,8 +184,6 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { taosHashCancelIterate(pStore->uidHash, pIter); return TSDB_CODE_FAILED; } - - pIter = taosHashIterate(pStore->uidHash, pIter); } return TSDB_CODE_SUCCESS; } @@ -234,11 +234,12 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx) { if ((param->qmsgLen > 0) && param->qmsg[idx]) { - SRetention *pRetention = SMA_RETENTION(pSma); - STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); - SVnode *pVnode = pSma->pVnode; - char taskInfDir[TSDB_FILENAME_LEN] = {0}; - void *pStreamState = NULL; + SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); + SRetention *pRetention = SMA_RETENTION(pSma); + STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); + SVnode *pVnode = pSma->pVnode; + char taskInfDir[TSDB_FILENAME_LEN] = {0}; + void *pStreamState = NULL; // set the backend of stream state tdRSmaQTaskInfoGetFullPath(pVnode, pRSmaInfo->suid, idx + 1, pVnode->pTfs, taskInfDir); @@ -258,32 +259,30 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat terrno = TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_FAILED; } + pItem->pStreamTask = pStreamTask; pStreamTask->id.taskId = 0; pStreamTask->id.streamId = pRSmaInfo->suid + idx; pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; + pStreamTask->exec.qmsg = taosMemoryMalloc(2); + sprintf(pStreamTask->exec.qmsg, "%d", idx); pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; - taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } + pItem->pStreamState = pStreamState; SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); - pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); if (!pRSmaInfo->taskInfo[idx]) { terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE; - taosMemoryFreeClear(pStreamTask); return TSDB_CODE_FAILED; } - SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot - pItem->pStreamState = pStreamState; - pItem->pStreamTask = pStreamTask; if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); @@ -509,11 +508,10 @@ static void tdUidStoreDestory(STbUidStore *pStore) { if (pStore->uidHash) { if (pStore->tbUids) { // When pStore->tbUids not NULL, the pStore->uidHash has k/v; otherwise pStore->uidHash only has keys. - void *pIter = taosHashIterate(pStore->uidHash, NULL); - while (pIter) { + void *pIter = NULL; + while ((pIter = taosHashIterate(pStore->uidHash, pIter))) { SArray *arr = *(SArray **)pIter; taosArrayDestroy(arr); - pIter = taosHashIterate(pStore->uidHash, pIter); } } taosHashCleanup(pStore->uidHash); @@ -1082,62 +1080,77 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { SSma *pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; SArray *pResList = NULL; - SRSmaFS fs = {0}; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; } - void *infoHash = NULL; + // stream state: trigger checkpoint - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; - if (RSMA_INFO_IS_DEL(pRSmaInfo)) { - continue; - } - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (pRSmaInfo->taskInfo[i]) { - code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT); - TSDB_CHECK_CODE(code, lino, _exit); - pRSmaInfo->items[i].streamFlushed = 0; - ++nTaskInfo; - } - } - } - // stream state: process checkpoint response in async mode - int32_t nStreamFlushed = 0; - int32_t nSleep = 0; - while (true) { + do { + void *infoHash = NULL; while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { continue; } for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { - int8_t streamFlushed = 0; - code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, - pRSmaInfo->suid, &pResList, &streamFlushed); - TSDB_CHECK_CODE(code, lino, _exit); - if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); - goto _checkpoint; + if (pRSmaInfo->taskInfo[i]) { + code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); } + pRSmaInfo->items[i].streamFlushed = 0; + ++nTaskInfo; } } } - taosUsleep(10); - ++nSleep; - smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); - } - // stream state: build checkpoint in backend + } while (0); + + // stream state: wait checkpoint ready in async mode + do { + int32_t nStreamFlushed = 0; + int32_t nSleep = 0; + void *infoHash = NULL; + while (true) { + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; + if (RSMA_INFO_IS_DEL(pRSmaInfo)) { + continue; + } + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { + int8_t streamFlushed = 0; + code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, + pRSmaInfo->suid, &pResList, &streamFlushed); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { + smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, + nStreamFlushed, nTaskInfo); + taosHashCancelIterate(pInfoHash, infoHash); + goto _checkpoint; + } + } + } + } + taosUsleep(10); + ++nSleep; + smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, + nStreamFlushed, nTaskInfo); + } + } while (0); _checkpoint: + // stream state: build checkpoint in backend do { - void *infHash = NULL; - while ((infHash = taosHashIterate(pInfoHash, infHash))) { - SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infHash; + void *infoHash = NULL; + + while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { + SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { continue; } @@ -1150,7 +1163,26 @@ _checkpoint: pTask->checkpointingId = taosGetTimestampNs(); pTask->chkInfo.checkpointId = pTask->checkpointingId; code = streamTaskBuildCheckpoint(pTask); - TSDB_CHECK_CODE(code, lino, _exit); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + + taosWLockLatch(&pTask->pMeta->lock); + if (streamMetaSaveTask(pTask->pMeta, pTask) != 0) { + taosWUnLockLatch(&pTask->pMeta->lock); + code = TSDB_CODE_OUT_OF_MEMORY; + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (streamMetaCommit(pTask->pMeta) != 0) { + taosWUnLockLatch(&pTask->pMeta->lock); + code = TSDB_CODE_OUT_OF_MEMORY; + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosWUnLockLatch(&pTask->pMeta->lock); // save checkpointId to vnode.json (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; @@ -1158,6 +1190,8 @@ _checkpoint: smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 ", level:%d, checkpointId:%" PRIi64, TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); + + } } } @@ -1452,6 +1486,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { if (ASSERTS(oldVal >= 0, "oldVal of nFetchAll: %d < 0", oldVal)) { code = TSDB_CODE_APP_ERROR; + taosHashCancelIterate(infoHash, pIter); TSDB_CHECK_CODE(code, lino, _exit); } From 6c944bb1927fa157469e640c5a9cd018e7ebb5d7 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 2 Nov 2023 20:32:54 +0800 Subject: [PATCH 15/56] enh: skip rsma_task during load stream tasks --- source/dnode/vnode/src/sma/smaRollup.c | 5 +++-- source/libs/stream/src/streamMeta.c | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8882dada9a..cd93dda4fb 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -22,6 +22,7 @@ #define RSMA_FETCH_DELAY_MAX (120000) // ms #define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms +#define RSMA_TASK_FLAG "rsma_task" #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -264,8 +265,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->id.streamId = pRSmaInfo->suid + idx; pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; - pStreamTask->exec.qmsg = taosMemoryMalloc(2); - sprintf(pStreamTask->exec.qmsg, "%d", idx); + pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); + sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index f788e244cd..7f023f2451 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -687,7 +687,10 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { } tDecoderClear(&decoder); - if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { + if (0 == strcmp(pTask->exec.qmsg, "rsma_task")) { + tFreeStreamTask(pTask); + continue; + } else if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); From a48968e5e8f61c630e7bf4f41a204b51110c8fa8 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 3 Nov 2023 14:33:45 +0800 Subject: [PATCH 16/56] enh: rsma checkpoint --- include/common/tmsg.h | 5 -- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 7 +-- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/inc/sma.h | 12 ++-- source/dnode/vnode/src/sma/smaCommit.c | 7 ++- source/dnode/vnode/src/sma/smaOpen.c | 18 +++--- source/dnode/vnode/src/sma/smaRollup.c | 67 +++++++++++++-------- source/dnode/vnode/src/tsdb/tsdbRead2.c | 6 +- source/dnode/vnode/src/vnd/vnodeCfg.c | 29 ++++----- source/libs/stream/src/streamMeta.c | 5 +- 10 files changed, 80 insertions(+), 78 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 57906c1695..4ef4273631 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -445,11 +445,6 @@ typedef struct SRetention { int8_t keepUnit; } SRetention; -typedef struct SRetentionEx { - SRetention rtn; - int64_t checkpointId; -} SRetentionEx; - #define RETENTION_VALID(l, r) ((((l) == 0 && (r)->freq >= 0) || ((r)->freq > 0)) && ((r)->keep > 0)) #pragma pack(push, 1) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 5a4b341662..c4d525a871 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -134,11 +134,10 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { pCfg->tsdbCfg.minRows = pCreate->minRows; pCfg->tsdbCfg.maxRows = pCreate->maxRows; for (size_t i = 0; i < taosArrayGetSize(pCreate->pRetensions); ++i) { - SRetentionEx *pRetention = &pCfg->tsdbCfg.retentions[i]; - memcpy(&pRetention->rtn, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); - pRetention->checkpointId = -1; + SRetention *pRetention = &pCfg->tsdbCfg.retentions[i]; + memcpy(pRetention, taosArrayGet(pCreate->pRetensions, i), sizeof(SRetention)); if (i == 0) { - if ((pRetention->rtn.freq >= 0 && pRetention->rtn.keep > 0)) pCfg->isRsma = 1; + if ((pRetention->freq >= 0 && pRetention->keep > 0)) pCfg->isRsma = 1; } } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index e92fc04f6e..6a0c991be4 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -287,7 +287,7 @@ struct STsdbCfg { int32_t keep1; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keep2; // just for save config, don't use in tsdbRead/tsdbCommit/..., and use STsdbKeepCfg in STsdb instead int32_t keepTimeOffset; // just for save config, use STsdbKeepCfg in STsdb instead - SRetentionEx retentions[TSDB_RETENTION_MAX]; + SRetention retentions[TSDB_RETENTION_MAX]; }; typedef struct { diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index bce5e1b0b2..63d6e7e5c2 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -155,7 +155,7 @@ struct SRSmaInfo { int64_t lastRecv; // ms int8_t assigned; // 0 idle, 1 assgined for exec int8_t delFlag; - int16_t padding; + int8_t padding; T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t @@ -163,12 +163,10 @@ struct SRSmaInfo { STaosQall *qall; // buffer qall of SubmitReq }; -#define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) -#define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) -#define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) -#define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) -#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i]) -#define RSMA_INFO_ITEM(r, i) (&(r)->items[i]) +#define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) +#define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) +#define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) +#define RSMA_INFO_ITEM(r, i) (&(r)->items[i]) enum { TASK_TRIGGER_STAT_INIT = 0, diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 652aab3c01..fad2e4d7e9 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -156,10 +156,10 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { nLoops = 0; while (1) { if (atomic_load_32(&pRSmaStat->nFetchAll) <= 0) { - smaDebug("vgId:%d, rsma commit:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit); + smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit); break; } else { - smaDebug("vgId:%d, rsma commit%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit); + smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit); } TD_SMA_LOOPS_CHECK(nLoops, 1000); } @@ -175,6 +175,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { while (atomic_load_64(&pRSmaStat->nBufItems) > 0) { TD_SMA_LOOPS_CHECK(nLoops, 1000); } + smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); if (!isCommit) goto _exit; @@ -183,7 +184,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); + // all rsma results are written completely STsdb *pTsdb = NULL; diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index cea4ccb1b7..c0af670a17 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -16,13 +16,13 @@ #include "sma.h" #include "tsdb.h" -static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration); +static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration); static int32_t smaSetKeepCfg(SVnode *pVnode, STsdbKeepCfg *pKeepCfg, STsdbCfg *pCfg, int type); static int32_t rsmaRestore(SSma *pSma); #define SMA_SET_KEEP_CFG(v, l) \ do { \ - SRetention *r = &(pCfg->retentions[l].rtn); \ + SRetention *r = &(pCfg->retentions[l]); \ pKeepCfg->keep2 = convertTimeFromPrecisionToUnit(r->keep, pCfg->precision, TIME_UNIT_MINUTE); \ pKeepCfg->keep0 = pKeepCfg->keep2; \ pKeepCfg->keep1 = pKeepCfg->keep2; \ @@ -32,7 +32,7 @@ static int32_t rsmaRestore(SSma *pSma); #define SMA_OPEN_RSMA_IMPL(v, l, force) \ do { \ - SRetention *r = &(((SRetentionEx *)VND_RETENTIONS(v) + l)->rtn); \ + SRetention *r = (SRetention *)VND_RETENTIONS(v) + l; \ if (!RETENTION_VALID(l, r)) { \ if (l == 0) { \ code = TSDB_CODE_INVALID_PARA; \ @@ -59,9 +59,9 @@ static int32_t rsmaRestore(SSma *pSma); * @param duration * @return int32_t */ -static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t precision, int32_t duration) { - int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.freq, precision, TIME_UNIT_MINUTE); - int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->rtn.keep, precision, TIME_UNIT_MINUTE); +static int32_t smaEvalDays(SVnode *pVnode, SRetention *r, int8_t level, int8_t precision, int32_t duration) { + int32_t freqDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->freq, precision, TIME_UNIT_MINUTE); + int32_t keepDuration = convertTimeFromPrecisionToUnit((r + TSDB_RETENTION_L0)->keep, precision, TIME_UNIT_MINUTE); int32_t days = duration; // min if (days < freqDuration) { @@ -76,10 +76,10 @@ static int32_t smaEvalDays(SVnode *pVnode, SRetentionEx *r, int8_t level, int8_t goto _exit; } - freqDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.freq, precision, TIME_UNIT_MINUTE); - keepDuration = convertTimeFromPrecisionToUnit((r + level)->rtn.keep, precision, TIME_UNIT_MINUTE); + freqDuration = convertTimeFromPrecisionToUnit((r + level)->freq, precision, TIME_UNIT_MINUTE); + keepDuration = convertTimeFromPrecisionToUnit((r + level)->keep, precision, TIME_UNIT_MINUTE); - int32_t nFreqTimes = (r + level)->rtn.freq / (60 * 1000); // use 60s for freq of 1st level + int32_t nFreqTimes = (r + level)->freq / (60 * 1000); // use 60s for freq of 1st level days *= (nFreqTimes > 1 ? nFreqTimes : 1); if (days < freqDuration) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index cd93dda4fb..8f81829dc2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -232,6 +232,29 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui return TSDB_CODE_SUCCESS; } +static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { + int64_t checkpointId = -1; + STaskId id = {.streamId = streamId, .taskId = taskId}; + taosRLockLatch(&pMeta->lock); + SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (ppTask && *ppTask) { + checkpointId = (*ppTask)->chkInfo.checkpointId; + } + taosRUnLockLatch(&pMeta->lock); + return checkpointId; +} + +static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { + streamMetaUnregisterTask(pMeta, streamId, taskId); + taosWLockLatch(&pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pMeta->lock); + smaDebug("vgId:%d rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); +} + static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx) { if ((param->qmsgLen > 0) && param->qmsg[idx]) { @@ -267,7 +290,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); - pStreamTask->chkInfo.checkpointId = pTsdbCfg->retentions[idx + 1].checkpointId; + pStreamTask->chkInfo.checkpointId = + tdRSmaTaskGetCheckpointId(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -275,6 +299,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat } pItem->pStreamState = pStreamState; + tdRSmaTaskRemove(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); + SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); @@ -1129,19 +1155,22 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { TSDB_CHECK_CODE(code, lino, _exit); } - if (streamFlushed && (++nStreamFlushed >= nTaskInfo)) { - smaInfo("vgId:%d checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); - taosHashCancelIterate(pInfoHash, infoHash); - goto _checkpoint; + if (streamFlushed) { + pRSmaInfo->items[i].streamFlushed = 1; + if (++nStreamFlushed >= nTaskInfo) { + smaInfo("vgId:%d rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), + nSleep * 10, nStreamFlushed, nTaskInfo); + taosHashCancelIterate(pInfoHash, infoHash); + goto _checkpoint; + } } } } } taosUsleep(10); ++nSleep; - smaDebug("vgId:%d, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, - nStreamFlushed, nTaskInfo); + smaDebug("vgId:%d, rsma commit, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode), + nSleep * 10, nStreamFlushed, nTaskInfo); } } while (0); @@ -1149,7 +1178,6 @@ _checkpoint: // stream state: build checkpoint in backend do { void *infoHash = NULL; - while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { @@ -1170,29 +1198,16 @@ _checkpoint: } taosWLockLatch(&pTask->pMeta->lock); - if (streamMetaSaveTask(pTask->pMeta, pTask) != 0) { + if (0 != streamMetaSaveTask(pTask->pMeta, pTask) || 0 != streamMetaCommit(pTask->pMeta)) { taosWUnLockLatch(&pTask->pMeta->lock); - code = TSDB_CODE_OUT_OF_MEMORY; - taosHashCancelIterate(pInfoHash, infoHash); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (streamMetaCommit(pTask->pMeta) != 0) { - taosWUnLockLatch(&pTask->pMeta->lock); - code = TSDB_CODE_OUT_OF_MEMORY; + code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } taosWUnLockLatch(&pTask->pMeta->lock); - // save checkpointId to vnode.json - (pVnode->config.tsdbCfg.retentions + i + 1)->checkpointId = pTask->checkpointingId; - - smaInfo("vgId:%d, commit task:%p, build stream checkpoint success, table:%" PRIi64 - ", level:%d, checkpointId:%" PRIi64, - TD_VID(pVnode), pTask, pRSmaInfo->suid, i + 1, pTask->checkpointingId); - - + smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint/task:%" PRIi64 "/%p, table:%" PRIi64 ", level:%d", + TD_VID(pVnode), pTask->checkpointingId, pTask, pRSmaInfo->suid, i + 1); } } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index be88a5a435..d1919d95ba 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -49,7 +49,7 @@ static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo STsdbReader* pReader); static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idstr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); @@ -3140,7 +3140,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } } -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* retentions, const char* idStr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, int8_t* pLevel) { if (VND_IS_RSMA(pVnode)) { int8_t level = 0; @@ -3151,7 +3151,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetentionEx* r : 1000000L); for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - SRetention* pRetention = &((retentions + level)->rtn); + SRetention* pRetention = retentions + level; if (pRetention->keep <= 0) { if (level > 0) { --level; diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index d429eb2a94..07bfa6c719 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -106,24 +106,23 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { if (tjsonAddIntegerToObject(pJson, "keep1", pCfg->tsdbCfg.keep1) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keep2", pCfg->tsdbCfg.keep2) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "keepTimeOffset", pCfg->tsdbCfg.keepTimeOffset) < 0) return -1; - if (pCfg->tsdbCfg.retentions[0].rtn.keep > 0) { + if (pCfg->tsdbCfg.retentions[0].keep > 0) { int32_t nRetention = 1; - if (pCfg->tsdbCfg.retentions[1].rtn.freq > 0) { + if (pCfg->tsdbCfg.retentions[1].freq > 0) { ++nRetention; - if (pCfg->tsdbCfg.retentions[2].rtn.freq > 0) { + if (pCfg->tsdbCfg.retentions[2].freq > 0) { ++nRetention; } } SJson *pNodeRetentions = tjsonCreateArray(); tjsonAddItemToObject(pJson, "retentions", pNodeRetentions); for (int32_t i = 0; i < nRetention; ++i) { - SJson *pNodeRetention = tjsonCreateObject(); - const SRetentionEx *pRetention = pCfg->tsdbCfg.retentions + i; - tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->rtn.freq); - tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit); - tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->rtn.keep); - tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit); - tjsonAddIntegerToObject(pNodeRetention, "checkpointId", pRetention->checkpointId); + SJson *pNodeRetention = tjsonCreateObject(); + const SRetention *pRetention = pCfg->tsdbCfg.retentions + i; + tjsonAddIntegerToObject(pNodeRetention, "freq", pRetention->freq); + tjsonAddIntegerToObject(pNodeRetention, "freqUnit", pRetention->freqUnit); + tjsonAddIntegerToObject(pNodeRetention, "keep", pRetention->keep); + tjsonAddIntegerToObject(pNodeRetention, "keepUnit", pRetention->keepUnit); tjsonAddItemToArray(pNodeRetentions, pNodeRetention); } } @@ -232,12 +231,10 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { for (int32_t i = 0; i < nRetention; ++i) { SJson *pNodeRetention = tjsonGetArrayItem(pNodeRetentions, i); ASSERT(pNodeRetention != NULL); - SRetentionEx *pRetention = &(pCfg->tsdbCfg.retentions[i]); - tjsonGetNumberValue(pNodeRetention, "freq", pRetention->rtn.freq, code); - tjsonGetNumberValue(pNodeRetention, "freqUnit", pRetention->rtn.freqUnit, code); - tjsonGetNumberValue(pNodeRetention, "keep", pRetention->rtn.keep, code); - tjsonGetNumberValue(pNodeRetention, "keepUnit", pRetention->rtn.keepUnit, code); - tjsonGetNumberValue(pNodeRetention, "checkpointId", pRetention->checkpointId, code); + tjsonGetNumberValue(pNodeRetention, "freq", (pCfg->tsdbCfg.retentions)[i].freq, code); + tjsonGetNumberValue(pNodeRetention, "freqUnit", (pCfg->tsdbCfg.retentions)[i].freqUnit, code); + tjsonGetNumberValue(pNodeRetention, "keep", (pCfg->tsdbCfg.retentions)[i].keep, code); + tjsonGetNumberValue(pNodeRetention, "keepUnit", (pCfg->tsdbCfg.retentions)[i].keepUnit, code); } tjsonGetNumberValue(pJson, "wal.vgId", pCfg->walCfg.vgId, code); if (code < 0) return -1; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 7f023f2451..f788e244cd 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -687,10 +687,7 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { } tDecoderClear(&decoder); - if (0 == strcmp(pTask->exec.qmsg, "rsma_task")) { - tFreeStreamTask(pTask); - continue; - } else if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); From c95fc014a8de592834e238bb4bed396a97e3ad2e Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 3 Nov 2023 14:57:36 +0800 Subject: [PATCH 17/56] enh: rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/sma/smaOpen.c | 2 +- source/dnode/vnode/src/vnd/vnodeCommit.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 63d6e7e5c2..e87b356be7 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -155,7 +155,7 @@ struct SRSmaInfo { int64_t lastRecv; // ms int8_t assigned; // 0 idle, 1 assgined for exec int8_t delFlag; - int8_t padding; + int16_t padding; T_REF_DECLARE() SRSmaInfoItem items[TSDB_RETENTION_L2]; void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index c0af670a17..633e096314 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -22,7 +22,7 @@ static int32_t rsmaRestore(SSma *pSma); #define SMA_SET_KEEP_CFG(v, l) \ do { \ - SRetention *r = &(pCfg->retentions[l]); \ + SRetention *r = &pCfg->retentions[l]; \ pKeepCfg->keep2 = convertTimeFromPrecisionToUnit(r->keep, pCfg->precision, TIME_UNIT_MINUTE); \ pKeepCfg->keep0 = pKeepCfg->keep2; \ pKeepCfg->keep1 = pKeepCfg->keep2; \ diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 9e0106dff4..50ca2f5d03 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -291,9 +291,6 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; - code = smaPrepareAsyncCommit(pVnode->pSma); // prepare checkpointId and save to vnode.json - if (code) goto _exit; - pVnode->state.commitTerm = pVnode->state.applyTerm; pInfo->info.config = pVnode->config; @@ -316,6 +313,9 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { metaPrepareAsyncCommit(pVnode->pMeta); + code = smaPrepareAsyncCommit(pVnode->pSma); + if (code) goto _exit; + taosThreadMutexLock(&pVnode->mutex); ASSERT(pVnode->onCommit == NULL); pVnode->onCommit = pVnode->inUse; From 411151d671bc7335e7c75db4a5cd93ba169cd8df Mon Sep 17 00:00:00 2001 From: kailixu Date: Sat, 4 Nov 2023 08:44:26 +0800 Subject: [PATCH 18/56] fix: buffer overflow/buffer use after free/memory leak --- source/dnode/vnode/src/tsdb/tsdbRetention.c | 5 ++++- source/libs/executor/src/streamtimewindowoperator.c | 2 +- source/libs/stream/src/streamMeta.c | 2 +- source/libs/stream/src/streamSnapshot.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 0fc1e1b64b..86298db2c4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -387,6 +387,8 @@ _exit: return code; } +static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); } + static int32_t tsdbDoRetentionSync(void *arg) { int32_t code = 0; int32_t lino = 0; @@ -409,6 +411,7 @@ _exit: TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit); + tsdbFreeRtnArg(arg); return code; } @@ -438,7 +441,7 @@ _exit: return code; } -static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); } + int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { int32_t code = 0; diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 8bfa8e1a5d..839f3324a3 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -386,7 +386,6 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); cleanupAggSup(&pInfo->aggSup); - clearGroupResInfo(&pInfo->groupResInfo); // it should be empty. void* pIte = NULL; @@ -401,6 +400,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { blockDataDestroy(pInfo->pDelRes); pInfo->stateStore.streamFileStateDestroy(pInfo->pState->pFileState); taosMemoryFreeClear(pInfo->pState); + clearGroupResInfo(&pInfo->groupResInfo); nodesDestroyNode((SNode*)pInfo->pPhyNode); colDataDestroy(&pInfo->twAggSup.timeWindowData); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 76945f17a9..31f8647dd5 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -185,7 +185,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); - pMeta->chkpCap = 8; + pMeta->chkpCap = 2; taosInitRWLatch(&pMeta->chkpDirLock); pMeta->chkpId = streamGetLatestCheckpointId(pMeta); diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 3de5de9967..2fed21dfd5 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -194,7 +194,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk } } { - char* buf = taosMemoryCalloc(1, 512); + char* buf = taosMemoryCalloc(1, 1024); sprintf(buf, "[current: %s,", pFile->pCurrent); sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); From 7e8c123fcae335094445f15d79bc3cdd801f456a Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 6 Nov 2023 08:46:04 +0800 Subject: [PATCH 19/56] chore: rsma code optimization --- source/dnode/vnode/src/sma/smaRollup.c | 11 +++++++---- source/libs/executor/src/streamtimewindowoperator.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8f81829dc2..ada7c9d6b7 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -137,7 +137,9 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, return TSDB_CODE_FAILED; } - if (!taosArrayGetSize(tbUids)) { + int32_t nTables = taosArrayGetSize(tbUids); + + if (0 == nTables) { smaDebug("vgId:%d, no need to update tbUidList for suid:%" PRIi64 " since Empty tbUids", SMA_VID(pSma), *suid); return TSDB_CODE_SUCCESS; } @@ -158,8 +160,9 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, terrstr()); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p with suid:%" PRIi64 " uid:%" PRIi64 " level %d", - SMA_VID(pSma), pRSmaInfo->taskInfo[i], *suid, *(int64_t *)taosArrayGet(tbUids, 0), i); + smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p. suid:%" PRIi64 " uid:%" PRIi64 + "nTables:%d level %d", + SMA_VID(pSma), pRSmaInfo->taskInfo[i], *suid, *(int64_t *)TARRAY_GET_ELEM(tbUids, 0), nTables, i); } } @@ -252,7 +255,7 @@ static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskI // persist to disk } taosWUnLockLatch(&pMeta->lock); - smaDebug("vgId:%d rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); + smaDebug("vgId:%d, rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks); } static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 839f3324a3..8bfa8e1a5d 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -386,6 +386,7 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); cleanupAggSup(&pInfo->aggSup); + clearGroupResInfo(&pInfo->groupResInfo); // it should be empty. void* pIte = NULL; @@ -400,7 +401,6 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { blockDataDestroy(pInfo->pDelRes); pInfo->stateStore.streamFileStateDestroy(pInfo->pState->pFileState); taosMemoryFreeClear(pInfo->pState); - clearGroupResInfo(&pInfo->groupResInfo); nodesDestroyNode((SNode*)pInfo->pPhyNode); colDataDestroy(&pInfo->twAggSup.timeWindowData); From dc5284a19cc073b6e6145bec23ae308910a91264 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 6 Nov 2023 17:27:36 +0800 Subject: [PATCH 20/56] chore: add debug info --- source/dnode/vnode/src/inc/sma.h | 6 ++--- source/dnode/vnode/src/sma/smaRollup.c | 29 ++++++++-------------- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 12 +++++++++ 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index e87b356be7..198c93a937 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -137,10 +137,10 @@ struct SSmaStat { #define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { - int8_t level : 4; - int8_t fetchLevel : 4; + int8_t level; + int8_t fetchLevel; int8_t triggerStat; - uint16_t nScanned; + uint32_t nScanned; int32_t streamFlushed : 1; int32_t maxDelay : 31; // ms tmr_h tmrId; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index ada7c9d6b7..3884b1df7a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -331,7 +331,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, open task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 + smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId, param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); @@ -1161,7 +1161,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { if (streamFlushed) { pRSmaInfo->items[i].streamFlushed = 1; if (++nStreamFlushed >= nTaskInfo) { - smaInfo("vgId:%d rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), + smaInfo("vgId:%d, rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode), nSleep * 10, nStreamFlushed, nTaskInfo); taosHashCancelIterate(pInfoHash, infoHash); goto _checkpoint; @@ -1292,20 +1292,14 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } int8_t fetchTriggerStat = - atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_INACTIVE); + atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_ACTIVE); switch (fetchTriggerStat) { case TASK_TRIGGER_STAT_ACTIVE: { smaDebug("vgId:%d, rsma fetch task planned for level:%" PRIi8 " suid:%" PRIi64 " since stat is active", SMA_VID(pSma), pItem->level, pRSmaInfo->suid); // async process - pItem->fetchLevel = pItem->level; -#if 0 - // debugging codes - SRSmaInfo *qInfo = tdAcquireRSmaInfoBySuid(pSma, pRSmaInfo->suid); - SRSmaInfoItem *qItem = RSMA_INFO_ITEM(qInfo, pItem->level - 1); - make sure(qItem->level == pItem->level); - make sure(qItem->fetchLevel == pItem->fetchLevel); -#endif + atomic_store_8(&pItem->fetchLevel, 1); + if (atomic_load_8(&pRSmaInfo->assigned) == 0) { tsem_post(&(pStat->notEmpty)); } @@ -1351,13 +1345,14 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SArray *pResList = NULL; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); - if (pItem->fetchLevel) { - pItem->fetchLevel = 0; + + if (1 == atomic_val_compare_exchange_8(&pItem->fetchLevel, 1, 0)) { qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pInfo, i - 1); if (!taskInfo) { continue; } +#if 0 if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch executed", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); @@ -1375,6 +1370,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } pItem->nScanned = 0; +#endif if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; @@ -1509,12 +1505,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { TSDB_CHECK_CODE(code, lino, _exit); } - int8_t curStat = atomic_load_8(RSMA_COMMIT_STAT(pRSmaStat)); - if (curStat == 1) { - smaDebug("vgId:%d, fetch all not exec as commit stat is %" PRIi8, SMA_VID(pSma), curStat); - } else { - tdRSmaFetchAllResult(pSma, pInfo); - } + tdRSmaFetchAllResult(pSma, pInfo); if (0 == atomic_sub_fetch_32(&pRSmaStat->nFetchAll, 1)) { atomic_store_8(RSMA_COMMIT_STAT(pRSmaStat), 0); diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index cc77474e79..69b19f4bc5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -661,6 +661,9 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, if ((code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0))) goto _exit; pTbData->minKey = TMIN(pTbData->minKey, key.ts); lRow = tRow; + tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); // remain row ++tRow.iRow; @@ -680,6 +683,9 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; ++tRow.iRow; + tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); } } @@ -721,6 +727,9 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0); if (code) goto _exit; lRow = tRow; + tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); pTbData->minKey = TMIN(pTbData->minKey, key.ts); @@ -744,6 +753,9 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; iRow++; + tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, + TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, + pSubmitTbData->uid); } } From a72d87b3ff16e34ce0c469b71148b15f1cef7f95 Mon Sep 17 00:00:00 2001 From: charles Date: Mon, 6 Nov 2023 18:59:34 +0800 Subject: [PATCH 21/56] add non marterial test cases and update sql.error function by charles --- tests/parallel_test/cases.task | 1 + tests/pytest/util/sql.py | 4 +- .../view/non_marterial_view/test_view.py | 591 ++++++++++++++++++ 3 files changed, 594 insertions(+), 2 deletions(-) create mode 100644 tests/system-test/0-others/view/non_marterial_view/test_view.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 21dcd16441..22f6199ee9 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -225,6 +225,7 @@ e ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/ttlChangeOnWrite.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/compress_tsz1.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/compress_tsz2.py +,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/view/non_marterial_view/test_view.py ,,n,system-test,python3 ./test.py -f 0-others/compatibility.py ,,n,system-test,python3 ./test.py -f 0-others/tag_index_basic.py ,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py diff --git a/tests/pytest/util/sql.py b/tests/pytest/util/sql.py index 7dcf6bc3f2..c05df0a852 100644 --- a/tests/pytest/util/sql.py +++ b/tests/pytest/util/sql.py @@ -88,7 +88,7 @@ class TDSql: expectErrNotOccured = False self.errno = e.errno error_info = repr(e) - self.error_info = error_info[error_info.index('(')+1:-1].split(",")[0].replace("'","") + self.error_info = ','.join(error_info[error_info.index('(')+1:-1].split(",")[:-1]).replace("'","") # self.error_info = (','.join(error_info.split(",")[:-1]).split("(",1)[1:][0]).replace("'","") if expectErrNotOccured: tdLog.exit("%s(%d) failed: sql:%s, expect error not occured" % (caller.filename, caller.lineno, sql)) @@ -106,7 +106,7 @@ class TDSql: tdLog.info("sql:%s, expect error occured" % (sql)) if expectErrInfo != None: - if expectErrInfo == self.error_info: + if expectErrInfo == self.error_info or expectErrInfo in self.error_info: tdLog.info("sql:%s, expected expectErrInfo %s occured" % (sql, expectErrInfo)) else: tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo %s occured, but not expected errno %s" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) diff --git a/tests/system-test/0-others/view/non_marterial_view/test_view.py b/tests/system-test/0-others/view/non_marterial_view/test_view.py new file mode 100644 index 0000000000..afb2476305 --- /dev/null +++ b/tests/system-test/0-others/view/non_marterial_view/test_view.py @@ -0,0 +1,591 @@ + +import taos +import os +import sys +import time +from pathlib import Path +sys.path.append(os.path.dirname(Path(__file__).resolve().parent.parent.parent) + "/7-tmq") + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.sqlset import * +from tmqCommon import * + +class TDTestCase: + """This test case is used to veirfy the tmq consume data from non marterial view + """ + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.setsql = TDSetSql() + + # db info + self.dbname = "view_db" + self.stbname = 'stb' + self.ctbname_list = ["ct1", "ct2"] + self.stable_column_dict = { + 'ts': 'timestamp', + 'col1': 'float', + 'col2': 'int', + } + self.tag_dict = { + 'ctbname': 'binary(10)' + } + + def prepare_data(self, conn=None): + """Create the db and data for test + """ + tdLog.debug("Start to prepare the data") + if not conn: + conn = tdSql + # create datebase + conn.execute(f"create database {self.dbname}") + conn.execute(f"use {self.dbname}") + time.sleep(2) + + # create stable + conn.execute(self.setsql.set_create_stable_sql(self.stbname, self.stable_column_dict, self.tag_dict)) + tdLog.debug("Create stable {} successfully".format(self.stbname)) + + # create child tables + for ctname in self.ctbname_list: + conn.execute(f"create table {ctname} using {self.stbname} tags('{ctname}');") + tdLog.debug("Create child table {} successfully".format(ctname)) + + # insert data into child tables + conn.execute(f"insert into {ctname} values(now, 1.1, 1)(now+1s, 2.2, 2)(now+2s, 3.3, 3)(now+3s, 4.4, 4)(now+4s, 5.5, 5)(now+5s, 6.6, 6)(now+6s, 7.7, 7)(now+7s, 8.8, 8)(now+8s, 9.9, 9)(now+9s, 10.1, 10);)") + tdLog.debug(f"Insert into data to {ctname} successfully") + + def prepare_tmq_data(self, para_dic): + tdLog.debug("Start to prepare the tmq data") + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, para_dic["dbName"], para_dic["dropFlag"], vgroups=para_dic["vgroups"], replica=1) + tdLog.info("create stb") + tdCom.create_stable(tdSql, dbname=para_dic["dbName"], stbname=para_dic["stbName"], column_elm_list=para_dic['colSchema'], tag_elm_list=para_dic['tagSchema']) + tdLog.info("create ctb") + tdCom.create_ctable(tdSql, dbname=para_dic["dbName"], stbname=para_dic["stbName"],tag_elm_list=para_dic['tagSchema'], count=para_dic["ctbNum"], default_ctbname_prefix=para_dic['ctbPrefix']) + tdLog.info("insert data") + tmqCom.insert_data(tdSql, para_dic["dbName"], para_dic["ctbPrefix"], para_dic["ctbNum"], para_dic["rowsPerTbl"], para_dic["batchNum"], para_dic["startTs"]) + tdLog.debug("Finish to prepare the tmq data") + + def check_view_num(self, num): + tdSql.query("show views;") + rows = tdSql.queryRows + assert(rows == num) + tdLog.debug(f"Verify the view number successfully") + + def create_user(self, username, password): + tdSql.execute(f"create user {username} pass '{password}';") + tdLog.debug("Create user {} with password {} successfully".format(username, password)) + + def check_permissions(self, username, db_name, permission_dict, view_name=None): + """ + :param permission_dict: {'db': ["read", "write], 'view': ["read", "write", "alter"]} + """ + tdSql.query("select * from information_schema.ins_user_privileges;") + for item in permission_dict.keys(): + if item == "db": + for permission in permission_dict[item]: + assert((username, permission, db_name, "", "", "") in tdSql.queryResult) + tdLog.debug(f"Verify the {item} {db_name} {permission} permission successfully") + elif item == "view": + for permission in permission_dict[item]: + assert((username, permission, db_name, view_name, "", "view") in tdSql.queryResult) + tdLog.debug(f"Verify the {item} {db_name} {view_name} {permission} permission successfully") + else: + raise Exception(f"Invalid permission type: {item}") + + def test_create_view_from_one_database(self): + """This test case is used to verify the create view from one database + """ + self.prepare_data() + tdSql.execute(f"create view v1 as select * from {self.stbname};") + self.check_view_num(1) + tdSql.error(f'create view v1 as select * from {self.stbname};', expectErrInfo='view already exists in db') + tdSql.error(f'create view db2.v2 as select * from {self.stbname};', expectErrInfo='Fail to get table info, error: Database not exist') + tdSql.error(f'create view v2 as select c2 from {self.stbname};', expectErrInfo='Invalid column name: c2') + tdSql.error(f'create view v2 as select ts, col1 from tt1;', expectErrInfo='Fail to get table info, error: Table does not exist') + + tdSql.execute(f"drop database {self.dbname}") + tdLog.debug("Finish test case 'test_create_view_from_one_database'") + + def test_create_view_from_multi_database(self): + """This test case is used to verify the create view from multi database + """ + self.prepare_data() + tdSql.execute(f"create view v1 as select * from view_db.{self.stbname};") + self.check_view_num(1) + + self.dbname = "view_db2" + self.prepare_data() + tdSql.execute(f"create view v1 as select * from view_db2.{self.stbname};") + tdSql.execute(f"create view v2 as select * from view_db.v1;") + self.check_view_num(2) + + self.dbname = "view_db" + tdSql.execute(f"drop database view_db;") + tdSql.execute(f"drop database view_db2;") + tdLog.debug("Finish test case 'test_create_view_from_multi_database'") + + def test_create_view_name_params(self): + """This test case is used to verify the create view with different view name params + """ + self.prepare_data() + tdSql.execute(f"create view v1 as select * from {self.stbname};") + self.check_view_num(1) + tdSql.error(f"create view v/2 as select * from {self.stbname};", expectErrInfo='syntax error near "/2 as select * from stb;"') + tdSql.execute(f"create view v2 as select ts, col1 from {self.stbname};") + self.check_view_num(2) + view_name_192_characters = "rzuoxoIXilAGgzNjYActiQwgzZK7PZYpDuaOe1lSJMFMVYXaexh1OfMmk3LvJcQbTeXXW7uGJY8IHuweHF73VHgoZgf0waO33YpZiTKfDQbdWtN4YmR2eWjL84ZtkfjM4huCP6lCysbDMj8YNwWksTdUq70LIyNhHp2V8HhhxyYSkREYFLJ1kOE78v61MQT6" + tdSql.execute(f"create view {view_name_192_characters} as select * from {self.stbname};") + self.check_view_num(3) + tdSql.error(f"create view {view_name_192_characters}1 as select * from {self.stbname};", expectErrInfo='Invalid identifier name: rzuoxoixilaggznjyactiqwgzzk7pzypduaoe1lsjmfmvyxaexh1ofmmk3lvjcqbtexxw7ugjy8ihuwehf73vhgozgf0wao33ypzitkfdqbdwtn4ymr2ewjl84ztkfjm4hucp6lcysbdmj8ynwwkstduq70liynhhp2v8hhhxyyskreyflj1koe78v61mqt61 as select * from stb;') + tdSql.execute(f"drop database {self.dbname}") + tdLog.debug("Finish test case 'test_create_view_name_params'") + + def test_create_view_query(self): + """This test case is used to verify the create view with different data type in query + """ + self.prepare_data() + # add different data type table + tdSql.execute(f"create table tb (ts timestamp, c1 int, c2 int unsigned, c3 bigint, c4 bigint unsigned, c5 float, c6 double, c7 binary(16), c8 smallint, c9 smallint unsigned, c10 tinyint, c11 tinyint unsigned, c12 bool, c13 varchar(16), c14 nchar(8), c15 geometry(21), c16 varbinary(16));") + tdSql.execute(f"create view v1 as select ts, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 from tb;") + # check data type in create view sql + tdSql.query("desc v1;") + res = tdSql.queryResult + data_type_list = [res[index][1] for index in range(len(res))] + tdLog.debug(data_type_list) + assert('TIMESTAMP' in data_type_list and 'INT' in data_type_list and 'INT UNSIGNED' in data_type_list and 'BIGINT' in data_type_list and 'BIGINT UNSIGNED' in data_type_list and 'FLOAT' in data_type_list and 'DOUBLE' in data_type_list and 'VARCHAR' in data_type_list and 'SMALLINT' in data_type_list and 'SMALLINT UNSIGNED' in data_type_list and 'TINYINT' in data_type_list and 'TINYINT UNSIGNED' in data_type_list and 'BOOL' in data_type_list and 'VARCHAR' in data_type_list and 'NCHAR' in data_type_list and 'GEOMETRY' in data_type_list and 'VARBINARY' in data_type_list) + tdSql.execute("create view v2 as select * from tb where c1 >5 and c7 like '%ab%';") + self.check_view_num(2) + tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid value type') + tdSql.execute("create view v3 as select first(ts), sum(c1) from tb group by c2 having avg(c4) > 0;") + tdSql.execute("create view v4 as select _wstart,sum(c6) from tb interval(10s);") + tdSql.execute("create view v5 as select * from tb join v2 on tb.ts = v2.ts;") + tdSql.execute("create view v6 as select * from (select ts, c1, c2 from (select * from v2));") + self.check_view_num(6) + for v in ['v1', 'v2', 'v3', 'v4', 'v5', 'v6']: + tdSql.execute(f"drop view {v};") + tdSql.execute(f"drop database {self.dbname}") + tdLog.debug("Finish test case 'test_create_view_query'") + + def test_show_view(self): + """This test case is used to verify the show view + """ + self.prepare_data() + tdSql.execute(f"create view v1 as select * from {self.ctbname_list[0]};") + + # query from show sql + tdSql.query("show views;") + res = tdSql.queryResult + assert(res[0][0] == 'v1' and res[0][1] == 'view_db' and res[0][2] == 'root' and res[0][4] == 'NORMAL' and res[0][5] == 'select * from ct1;') + + # show create sql + tdSql.query("show create view v1;") + res = tdSql.queryResult + assert(res[0][1] == 'CREATE VIEW `view_db`.`v1` AS select * from ct1;') + + # query from desc results + tdSql.query("desc view_db.v1;") + res = tdSql.queryResult + assert(res[0][1] == 'TIMESTAMP' and res[1][1] == 'FLOAT' and res[2][1] == 'INT') + + # query from system table + tdSql.query("select * from information_schema.ins_views;") + res = tdSql.queryResult + assert(res[0][0] == 'v1' and res[0][1] == 'view_db' and res[0][2] == 'root' and res[0][4] == 'NORMAL' and res[0][5] == 'select * from ct1;') + tdSql.error("show db3.views;", expectErrInfo='Database not exist') + tdSql.error("desc viewx;", expectErrInfo='Table does not exist') + tdSql.error(f"show create view {self.dbname}.viewx;", expectErrInfo='view not exists in db') + tdSql.execute(f"drop database {self.dbname}") + tdSql.error("show views;", expectErrInfo='Database not exist') + tdLog.debug("Finish test case 'test_show_view'") + + def test_drop_view(self): + """This test case is used to verify the drop view + """ + self.prepare_data() + self.dbname = "view_db2" + self.prepare_data() + tdSql.execute("create view view_db.v1 as select * from view_db.stb;") + tdSql.execute("create view view_db2.v1 as select * from view_db2.stb;") + # delete view without database name + tdSql.execute("drop view v1;") + # delete view with database name + tdSql.execute("drop view view_db.v1;") + # delete non exist view + tdSql.error("drop view view_db.v11;", expectErrInfo='view not exists in db') + tdSql.execute("drop database view_db") + tdSql.execute("drop database view_db2;") + self.dbname = "view_db" + tdLog.debug("Finish test case 'test_drop_view'") + + def test_view_permission_db_all_view_all(self): + """This test case is used to verify the view permission with db all and view all, + the time sleep to wait the permission take effect + """ + self.prepare_data() + username = "view_test" + password = "test" + self.create_user(username, password) + # grant all db permission to user + tdSql.execute("grant all on view_db.* to view_test;") + + conn = taos.connect(user=username, password=password) + conn.execute(f"use {self.dbname};") + conn.execute("create view v1 as select * from stb;") + res = conn.query("show views;") + assert(len(res.fetch_all()) == 1) + tdLog.debug(f"Verify the show view permission of user '{username}' with db all and view all successfully") + self.check_permissions("view_test", "view_db", {"db": ["read", "write"], "view": ["read", "write", "alter"]}, "v1") + tdLog.debug(f"Verify the view permission from system table successfully") + time.sleep(2) + conn.execute("drop view v1;") + tdSql.execute("revoke all on view_db.* from view_test;") + tdSql.execute(f"drop database {self.dbname};") + time.sleep(1) + + # prepare data by user 'view_test' + self.prepare_data(conn) + + conn.execute("create view v1 as select * from stb;") + res = conn.query("show views;") + assert(len(res.fetch_all()) == 1) + tdLog.debug(f"Verify the view permission of user '{username}' with db all and view all successfully") + self.check_permissions("view_test", "view_db", {"db": ["read", "write"], "view": ["read", "write", "alter"]}, "v1") + tdLog.debug(f"Verify the view permission from system table successfully") + time.sleep(2) + conn.execute("drop view v1;") + tdSql.execute("revoke all on view_db.* from view_test;") + tdSql.execute("revoke all on view_db.v1 from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_all_view_all'") + + def test_view_permission_db_write_view_all(self): + """This test case is used to verify the view permission with db write and view all + """ + username = "view_test" + password = "test" + self.create_user(username, password) + conn = taos.connect(user=username, password=password) + self.prepare_data(conn) + conn.execute("create view v1 as select * from stb;") + tdSql.execute("revoke read on view_db.* from view_test;") + self.check_permissions("view_test", "view_db", {"db": ["write"], "view": ["read", "write", "alter"]}, "v1") + # create view permission error + try: + conn.execute("create view v2 as select * from v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + # query from view permission error + try: + conn.query("select * from v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + # view query permission + res = conn.query("show views;") + assert(len(res.fetch_all()) == 1) + time.sleep(2) + conn.execute("drop view v1;") + tdSql.execute("revoke write on view_db.* from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_write_view_all'") + + def test_view_permission_db_write_view_read(self): + """This test case is used to verify the view permission with db write and view read + """ + username = "view_test" + password = "test" + self.create_user(username, password) + conn = taos.connect(user=username, password=password) + self.prepare_data() + + tdSql.execute("create view v1 as select * from stb;") + tdSql.execute("grant write on view_db.* to view_test;") + tdSql.execute("grant read on view_db.v1 to view_test;") + + conn.execute(f"use {self.dbname};") + time.sleep(2) + res = conn.query("select * from v1;") + assert(len(res.fetch_all()) == 20) + + conn.execute("create view v2 as select * from v1;") + # create view from super table of database + try: + conn.execute("create view v3 as select * from stb;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + time.sleep(2) + conn.execute("drop view v2;") + try: + conn.execute("drop view v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + tdSql.execute("revoke read on view_db.v1 from view_test;") + tdSql.execute("revoke write on view_db.* from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_write_view_read'") + + def test_view_permission_db_write_view_alter(self): + """This test case is used to verify the view permission with db write and view alter + """ + username = "view_test" + password = "test" + self.create_user(username, password) + conn = taos.connect(user=username, password=password) + self.prepare_data() + + tdSql.execute("create view v1 as select * from stb;") + tdSql.execute("grant write on view_db.* to view_test;") + tdSql.execute("grant alter on view_db.v1 to view_test;") + try: + conn.execute(f"use {self.dbname};") + conn.execute("select * from v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + time.sleep(2) + conn.execute("drop view v1;") + tdSql.execute("revoke write on view_db.* from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_write_view_alter'") + + def test_view_permission_db_read_view_all(self): + """This test case is used to verify the view permission with db read and view all + """ + username = "view_test" + password = "test" + self.create_user(username, password) + conn = taos.connect(user=username, password=password) + self.prepare_data() + + tdSql.execute("create view v1 as select * from stb;") + tdSql.execute("grant read on view_db.* to view_test;") + tdSql.execute("grant all on view_db.v1 to view_test;") + try: + conn.execute(f"use {self.dbname};") + conn.execute("create view v2 as select * from v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + time.sleep(2) + res = conn.query("select * from v1;") + assert(len(res.fetch_all()) == 20) + conn.execute("drop view v1;") + tdSql.execute("revoke read on view_db.* from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_read_view_all'") + + def test_view_permission_db_read_view_alter(self): + """This test case is used to verify the view permission with db read and view alter + """ + username = "view_test" + password = "test" + self.create_user(username, password) + conn = taos.connect(user=username, password=password) + self.prepare_data() + + tdSql.execute("create view v1 as select * from stb;") + tdSql.execute("grant read on view_db.* to view_test;") + tdSql.execute("grant alter on view_db.v1 to view_test;") + try: + conn.execute(f"use {self.dbname};") + conn.execute("select * from v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + + time.sleep(2) + conn.execute("drop view v1;") + tdSql.execute("revoke read on view_db.* from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_read_view_alter'") + + def test_view_permission_db_read_view_read(self): + """This test case is used to verify the view permission with db read and view read + """ + username = "view_test" + password = "test" + self.create_user(username, password) + conn = taos.connect(user=username, password=password) + self.prepare_data() + + tdSql.execute("create view v1 as select * from stb;") + tdSql.execute("grant read on view_db.* to view_test;") + tdSql.execute("grant read on view_db.v1 to view_test;") + conn.execute(f"use {self.dbname};") + time.sleep(2) + res = conn.query("select * from v1;") + assert(len(res.fetch_all()) == 20) + try: + conn.execute("drop view v1;") + except Exception as ex: + assert("[0x2644]: Permission denied or target object not exist" in str(ex)) + tdSql.execute("revoke read on view_db.* from view_test;") + tdSql.execute("revoke read on view_db.v1 from view_test;") + tdSql.execute(f"drop database {self.dbname}") + tdSql.execute("drop user view_test;") + tdLog.debug("Finish test case 'test_view_permission_db_read_view_read'") + + def test_query_from_view(self): + """This test case is used to verify the query from view + """ + self.prepare_data() + view_name_list = [] + + # common query from super table + tdSql.execute(f"create view v1 as select * from {self.stbname};") + tdSql.query(f"select * from v1;") + rows = tdSql.queryRows + assert(rows == 20) + view_name_list.append("v1") + tdLog.debug("Verify the query from super table successfully") + + # common query from child table + tdSql.execute(f"create view v2 as select * from {self.ctbname_list[0]};") + tdSql.query(f"select * from v2;") + rows = tdSql.queryRows + assert(rows == 10) + view_name_list.append("v2") + tdLog.debug("Verify the query from child table successfully") + + # join query + tdSql.execute(f"create view v3 as select * from {self.stbname} join {self.ctbname_list[1]} on {self.ctbname_list[1]}.ts = {self.stbname}.ts;") + tdSql.query(f"select * from v3;") + rows = tdSql.queryRows + assert(rows == 10) + view_name_list.append("v3") + tdLog.debug("Verify the join query successfully") + + # group by query + tdSql.execute(f"create view v4 as select count(*) from {self.stbname} group by tbname;") + tdSql.query(f"select * from v4;") + rows = tdSql.queryRows + assert(rows == 2) + res = tdSql.queryResult + assert(res[0][0] == 10) + view_name_list.append("v4") + tdLog.debug("Verify the group by query successfully") + + # partition by query + tdSql.execute(f"create view v5 as select sum(col1) from {self.stbname} where col2 > 4 partition by tbname interval(3s);") + tdSql.query(f"select * from v5;") + rows = tdSql.queryRows + assert(rows >= 4) + view_name_list.append("v5") + tdLog.debug("Verify the partition by query successfully") + + # query from nested view + tdSql.execute(f"create view v6 as select * from v5;") + tdSql.query(f"select * from v6;") + rows = tdSql.queryRows + assert(rows >= 4) + view_name_list.append("v6") + tdLog.debug("Verify the query from nested view successfully") + + # delete view + for view in view_name_list: + tdSql.execute(f"drop view {view};") + tdLog.debug(f"Drop view {view} successfully") + tdSql.execute(f"drop database {self.dbname}") + tdLog.debug("Finish test case 'test_query_from_view'") + + def test_tmq_from_view(self): + """This test case is used to verify the tmq consume data from view + """ + # params for db + paraDict = {'dbName': 'view_db', + 'dropFlag': 1, + 'event': '', + 'vgroups': 4, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbNum': 1, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 10, + 'showMsg': 1, + 'showRow': 1} + # topic info + topic_name_list = ['topic1'] + view_name_list = ['view1'] + expectRowsList = [] + + self.prepare_tmq_data(paraDict) + + # init consume info, and start tmq_sim, then check consume result + tmqCom.initConsumerTable() + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + tdSql.execute(f"create view {view_name_list[0]} as {queryString}") + sqlString = "create topic %s as %s" %(topic_name_list[0], "select * from %s"%view_name_list[0]) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + tdSql.query(queryString) + expectRowsList.append(tdSql.getRows()) + + consumerId = 1 + topicList = topic_name_list[0] + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] + keyList = 'group.id:cgrp1, enable.auto.commit:false, auto.commit.interval.ms:6000, auto.offset.reset:earliest' + ifcheckdata = 1 + ifManualCommit = 1 + tmqCom.insertConsumerInfo(consumerId, expectrowcnt, topicList, keyList, ifcheckdata, ifManualCommit) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(paraDict['pollDelay'], paraDict["dbName"], paraDict['showMsg'], paraDict['showRow']) + + tdLog.info("wait the consume result") + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + if expectRowsList[0] != resultList[0]: + tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectRowsList[0], resultList[0])) + tdLog.exit("1 tmq consume rows error!") + + tmqCom.checkFileContent(consumerId, queryString) + + time.sleep(10) + for i in range(len(topic_name_list)): + tdSql.query("drop topic %s"%topic_name_list[i]) + for i in range(len(view_name_list)): + tdSql.query("drop view %s"%view_name_list[i]) + + # drop database + tdSql.execute(f"drop database {paraDict['dbName']}") + tdSql.execute("drop database cdb;") + tdLog.debug("Finish test case 'test_tmq_from_view'") + + def run(self): + self.test_create_view_from_one_database() + self.test_create_view_from_multi_database() + self.test_create_view_name_params() + self.test_create_view_query() + self.test_show_view() + self.test_drop_view() + self.test_view_permission_db_all_view_all() + self.test_view_permission_db_write_view_all() + self.test_view_permission_db_write_view_read() + self.test_view_permission_db_write_view_alter() + self.test_view_permission_db_read_view_all() + self.test_view_permission_db_read_view_alter() + self.test_view_permission_db_read_view_read() + self.test_query_from_view() + self.test_tmq_from_view() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From f5d796a081bdcb73eb020cdb2cc0703112a5d8bd Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 6 Nov 2023 19:53:09 +0800 Subject: [PATCH 22/56] chore: print stream state --- source/dnode/vnode/src/sma/smaRollup.c | 7 ++++- source/libs/stream/src/tstreamFileState.c | 33 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 3884b1df7a..ac99dc9de3 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -678,7 +678,12 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); - TSDB_CHECK_CODE(code, lino, _exit); + smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 + ", ver:%" PRIi64, + SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, + output ? output->info.version : -1); + continue; + // TSDB_CHECK_CODE(code, lino, _exit); } smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index 584e81fafc..a597858e63 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -515,6 +515,17 @@ void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { taosEncodeFixedI64(&buff, *pKey); } +static void getDebugRowBuff(char* val, int32_t vlen, char* output) { + for (int32_t i = 0; i < vlen; ++i) { + if (*(val + i) == '\0') { + sprintf(output + i, "0"); + } else { + sprintf(output + i, "%c", *(val + i)); + } + } + output[vlen] = 0; +} + int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) { int32_t code = TSDB_CODE_SUCCESS; SListIter iter = {0}; @@ -530,6 +541,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, int32_t len = pFileState->rowSize + sizeof(uint64_t) + sizeof(int32_t) + 1; char* buf = taosMemoryCalloc(1, len); + char output[1024]; void* batch = streamStateCreateBatch(); while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { @@ -546,6 +558,15 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } void* pSKey = pFileState->stateBuffCreateStateKeyFn(pPos, ((SStreamState*)pFileState->pFileStore)->number); +#if 1 + SStateKey* pStateKey = pSKey; + char* pStateVal = pPos->pRowBuff; + int32_t pStateVLen = pFileState->rowSize; + assert(pStateVLen < 1024); + getDebugRowBuff(pStateVal, pStateVLen, output); + qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, + pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); +#endif code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, pSKey, pPos->pRowBuff, pFileState->rowSize, 0, buf); taosMemoryFreeClear(pSKey); @@ -691,6 +712,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { if (pCur == NULL) { return -1; } + char output[1024]; int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); while (code == TSDB_CODE_SUCCESS) { if (pFileState->curRowCount >= recoverNum) { @@ -710,6 +732,17 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { } ASSERT(vlen == pFileState->rowSize); memcpy(pNewPos->pRowBuff, pVal, vlen); + +#if 1 + SStateKey* pStateKey = pNewPos->pKey; + char* pStateVal = pVal; + int32_t pStateVLen = vlen; + assert(pStateVLen < 1024); + getDebugRowBuff(pStateVal, pStateVLen, output); + qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, + pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); +#endif + taosMemoryFreeClear(pVal); pNewPos->beFlushed = true; code = tSimpleHashPut(pFileState->rowStateBuff, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); From e972ab16fe566d6446e4d11c69dd6ecaaa468b09 Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Tue, 7 Nov 2023 14:11:08 +0800 Subject: [PATCH 23/56] recover flush mark --- source/libs/stream/src/tstreamFileState.c | 49 ++++++++++++++--------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index a597858e63..8a3e7ce892 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -27,6 +27,8 @@ #define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024) #define MIN_NUM_OF_ROW_BUFF 10240 +#define TASK_KEY "streamFileState" + struct SStreamFileState { SList* usedBuffs; SList* freeBuffs; @@ -113,6 +115,15 @@ void* sessionCreateStateKey(SRowBuffPos* pPos, int64_t num) { return pStateKey; } +static void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); } + +static void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { + *pLen = sizeof(TSKEY); + (*pVal) = taosMemoryCalloc(1, *pLen); + void* buff = *pVal; + taosEncodeFixedI64(&buff, *pKey); +} + SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, int64_t checkpointId, int8_t type) { @@ -181,6 +192,17 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ recoverSesssion(pFileState, checkpointId); } + char keyBuf[128] = {0}; + void* valBuf = NULL; + int32_t len = 0; + sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); + int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, keyBuf, &valBuf, &len); + if (code == TSDB_CODE_SUCCESS) { + ASSERT(len == sizeof(TSKEY)); + streamFileStateDecode(&pFileState->flushMark, valBuf, len); + qDebug("===stream===flushMark read:%" PRId64 ",checkpointid:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); + } + return pFileState; _error: @@ -506,15 +528,6 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) { return pFileState->usedBuffs; } -void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); } - -void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { - *pLen = sizeof(TSKEY); - (*pVal) = taosMemoryCalloc(1, *pLen); - void* buff = *pVal; - taosEncodeFixedI64(&buff, *pKey); -} - static void getDebugRowBuff(char* val, int32_t vlen, char* output) { for (int32_t i = 0; i < vlen; ++i) { if (*(val + i) == '\0') { @@ -550,6 +563,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, continue; } pPos->beFlushed = true; + pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey)); qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey)); if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) { @@ -586,13 +600,13 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, pFileState->id, numOfElems, BATCH_LIMIT, elapsed); if (flushState) { - const char* taskKey = "streamFileState"; { char keyBuf[128] = {0}; void* valBuf = NULL; int32_t len = 0; - sprintf(keyBuf, "%s:%" PRId64 "", taskKey, ((SStreamState*)pFileState->pFileStore)->checkPointId); + sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); streamFileStateEncode(&pFileState->flushMark, &valBuf, &len); + qDebug("===stream===flushMark write:%" PRId64 ",checkpoint id:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); taosMemoryFree(valBuf); } @@ -600,7 +614,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, char keyBuf[128] = {0}; char valBuf[64] = {0}; int32_t len = 0; - memcpy(keyBuf, taskKey, strlen(taskKey)); + memcpy(keyBuf, TASK_KEY, strlen(TASK_KEY)); len = sprintf(valBuf, "%" PRId64 "", ((SStreamState*)pFileState->pFileStore)->checkPointId); code = streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); } @@ -612,26 +626,23 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } int32_t forceRemoveCheckpoint(SStreamFileState* pFileState, int64_t checkpointId) { - const char* taskKey = "streamFileState"; char keyBuf[128] = {0}; - sprintf(keyBuf, "%s:%" PRId64 "", taskKey, checkpointId); + sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, checkpointId); return streamDefaultDel_rocksdb(pFileState->pFileStore, keyBuf); } int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list) { - const char* taskKey = "streamFileState"; - return streamDefaultIterGet_rocksdb(pFileState->pFileStore, taskKey, NULL, list); + return streamDefaultIterGet_rocksdb(pFileState->pFileStore, TASK_KEY, NULL, list); } int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { int32_t code = TSDB_CODE_SUCCESS; - const char* taskKey = "streamFileState"; int64_t maxCheckPointId = 0; { char buf[128] = {0}; void* val = NULL; int32_t len = 0; - memcpy(buf, taskKey, strlen(taskKey)); + memcpy(buf, TASK_KEY, strlen(TASK_KEY)); code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len); if (code != 0 || len == 0 || val == NULL) { return TSDB_CODE_FAILED; @@ -645,7 +656,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { char buf[128] = {0}; void* val = 0; int32_t len = 0; - sprintf(buf, "%s:%" PRId64 "", taskKey, i); + sprintf(buf, "%s:%" PRId64 "", TASK_KEY, i); code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len); if (code != 0) { return TSDB_CODE_FAILED; From 467c27c7585d183f03447dbca23e80346921cb55 Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Tue, 7 Nov 2023 15:45:38 +0800 Subject: [PATCH 24/56] recover flush mark --- include/libs/function/function.h | 1 - source/libs/stream/src/streamState.c | 3 --- source/libs/stream/src/tstreamFileState.c | 31 +++++++---------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/include/libs/function/function.h b/include/libs/function/function.h index 2e3cd670d7..49435a6317 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -168,7 +168,6 @@ typedef struct { struct SStreamFileState *pFileState; int32_t number; SSHashObj *parNameMap; - int64_t checkPointId; int32_t taskId; int64_t streamId; int64_t streamBackendRid; diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index fb0090ec6d..6ca7bc5e7b 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -221,7 +221,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz } pState->pTdbState->pOwner = pTask; - pState->checkPointId = 0; return pState; @@ -274,7 +273,6 @@ int32_t streamStateCommit(SStreamState* pState) { SStreamSnapshot* pShot = getSnapshot(pState->pFileState); flushSnapshot(pState->pFileState, pShot, true); } - pState->checkPointId++; return 0; #else if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) { @@ -288,7 +286,6 @@ int32_t streamStateCommit(SStreamState* pState) { TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } - pState->checkPointId++; return 0; #endif } diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index 8a3e7ce892..0a3970adaa 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -28,6 +28,7 @@ #define MIN_NUM_OF_ROW_BUFF 10240 #define TASK_KEY "streamFileState" +#define STREAM_STATE_INFO_NAME "StreamStateCheckPoint" struct SStreamFileState { SList* usedBuffs; @@ -192,15 +193,13 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ recoverSesssion(pFileState, checkpointId); } - char keyBuf[128] = {0}; void* valBuf = NULL; int32_t len = 0; - sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); - int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, keyBuf, &valBuf, &len); + int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, STREAM_STATE_INFO_NAME, &valBuf, &len); if (code == TSDB_CODE_SUCCESS) { ASSERT(len == sizeof(TSKEY)); streamFileStateDecode(&pFileState->flushMark, valBuf, len); - qDebug("===stream===flushMark read:%" PRId64 ",checkpointid:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); + qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark); } return pFileState; @@ -600,24 +599,12 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, pFileState->id, numOfElems, BATCH_LIMIT, elapsed); if (flushState) { - { - char keyBuf[128] = {0}; - void* valBuf = NULL; - int32_t len = 0; - sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, ((SStreamState*)pFileState->pFileStore)->checkPointId); - streamFileStateEncode(&pFileState->flushMark, &valBuf, &len); - qDebug("===stream===flushMark write:%" PRId64 ",checkpoint id:%" PRId64, pFileState->flushMark, ((SStreamState*)pFileState->pFileStore)->checkPointId); - streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); - taosMemoryFree(valBuf); - } - { - char keyBuf[128] = {0}; - char valBuf[64] = {0}; - int32_t len = 0; - memcpy(keyBuf, TASK_KEY, strlen(TASK_KEY)); - len = sprintf(valBuf, "%" PRId64 "", ((SStreamState*)pFileState->pFileStore)->checkPointId); - code = streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0); - } + void* valBuf = NULL; + int32_t len = 0; + streamFileStateEncode(&pFileState->flushMark, &valBuf, &len); + qDebug("===stream===flushMark write:%" PRId64, pFileState->flushMark); + streamStatePutBatch(pFileState->pFileStore, "default", batch, STREAM_STATE_INFO_NAME, valBuf, len, 0); + taosMemoryFree(valBuf); streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); } From c1f2f0bb630cd319d6f05339711272325f5bffe8 Mon Sep 17 00:00:00 2001 From: charles Date: Tue, 7 Nov 2023 18:13:23 +0800 Subject: [PATCH 25/56] add test case for ts-4219 by charles --- tests/parallel_test/cases.task | 1 + tests/system-test/1-insert/test_ts4219.py | 27 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/system-test/1-insert/test_ts4219.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 22f6199ee9..082aa13ddb 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -283,6 +283,7 @@ e ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/rowlength64k_4.py -Q 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/precisionUS.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/precisionNS.py +,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_ts4219.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/show.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/show_tag_index.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/information_schema.py diff --git a/tests/system-test/1-insert/test_ts4219.py b/tests/system-test/1-insert/test_ts4219.py new file mode 100644 index 0000000000..e6447d77ae --- /dev/null +++ b/tests/system-test/1-insert/test_ts4219.py @@ -0,0 +1,27 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + + +class TDTestCase: + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), True) + + def prepare_data(self): + tdSql.execute("create database db;") + tdSql.execute("use db;") + tdSql.execute("create stable st(ts timestamp, c1 int, c2 float) tags(groupname binary(32));") + + def run(self): + tdSql.error("insert into ct1 using st tags('group name 1') values(now, 1, 1.1)(now+1s, 2, 2.2) ct1 using st tags('group 1) values(now+2s, 3, 3.3); ") + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) From 6b738884a4be908a9dc95783776360ef311a4753 Mon Sep 17 00:00:00 2001 From: kailixu Date: Tue, 7 Nov 2023 19:59:05 +0800 Subject: [PATCH 26/56] chore: rsma checkpoint --- include/common/tcommon.h | 1 + include/libs/executor/executor.h | 1 + include/libs/stream/tstream.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 13 ++++++--- source/dnode/vnode/src/sma/smaUtil.c | 2 +- source/dnode/vnode/src/tsdb/tsdbRead2.c | 14 +++++----- source/libs/executor/inc/executil.h | 2 +- source/libs/executor/src/executil.c | 3 ++- source/libs/executor/src/scanoperator.c | 4 +-- source/libs/stream/src/streamMeta.c | 5 ++-- source/libs/stream/src/tstreamFileState.c | 33 ----------------------- 11 files changed, 27 insertions(+), 52 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 72aab9adf0..e072eaa831 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -249,6 +249,7 @@ typedef struct SQueryTableDataCond { SColumnInfo* colList; int32_t* pSlotList; // the column output destation slot, and it may be null int32_t type; // data block load type: + bool skipRollup; STimeWindow twindows; int64_t startVersion; int64_t endVersion; diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 5990ae1c9c..6005c13455 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -49,6 +49,7 @@ typedef struct { uint64_t checkpointId; bool initTableReader; bool initTqReader; + bool skipRollup; int32_t numOfVgroups; void* sContext; // SSnapContext* void* pStateBackend; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 173c68a818..2e9eb884a0 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -801,6 +801,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); int32_t streamMetaReopen(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); +int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); void streamMetaStartHb(SStreamMeta* pMeta); void streamMetaInitForSnode(SStreamMeta* pMeta); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index ac99dc9de3..665610304c 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -235,6 +235,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui return TSDB_CODE_SUCCESS; } +#if 0 static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { int64_t checkpointId = -1; STaskId id = {.streamId = streamId, .taskId = taskId}; @@ -246,6 +247,7 @@ static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, i taosRUnLockLatch(&pMeta->lock); return checkpointId; } +#endif static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { streamMetaUnregisterTask(pMeta, streamId, taskId); @@ -293,8 +295,12 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); +#if 0 pStreamTask->chkInfo.checkpointId = tdRSmaTaskGetCheckpointId(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); +#else + pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); +#endif pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -304,7 +310,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat tdRSmaTaskRemove(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); - SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; + SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .skipRollup = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0); if (!pRSmaInfo->taskInfo[idx]) { @@ -682,8 +688,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma ", ver:%" PRIi64, SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, output ? output->info.version : -1); - continue; - // TSDB_CHECK_CODE(code, lino, _exit); + TSDB_CHECK_CODE(code, lino, _exit); } smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, @@ -1297,7 +1302,7 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } int8_t fetchTriggerStat = - atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_ACTIVE); + atomic_val_compare_exchange_8(&pItem->triggerStat, TASK_TRIGGER_STAT_ACTIVE, TASK_TRIGGER_STAT_INACTIVE); switch (fetchTriggerStat) { case TASK_TRIGGER_STAT_ACTIVE: { smaDebug("vgId:%d, rsma fetch task planned for level:%" PRIi8 " suid:%" PRIi64 " since stat is active", diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index 479c57e65f..8c04306d0f 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -30,7 +30,7 @@ void tdRSmaGetDirName(SVnode *pVnode, STfs *pTfs, bool endWithSep, char *outputN offset = strlen(outputName); // rsma -#if 0 +#if 1 snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s", TD_DIRSEP, VNODE_RSMA_DIR, (endWithSep ? TD_DIRSEP : "")); #else diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index d1919d95ba..c56164ff9d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -48,9 +48,9 @@ static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScan static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, STsdbReader* pReader); -static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, - int8_t* pLevel); +static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); +static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idstr, + int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); static int32_t doBuildDataBlock(STsdbReader* pReader); @@ -384,7 +384,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void initReaderStatus(&pReader->status); - pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); + pReader->pTsdb = getTsdbByRetentions(pVnode, pCond, pVnode->config.tsdbCfg.retentions, idstr, &level); pReader->info.suid = pCond->suid; pReader->info.order = pCond->order; @@ -3140,9 +3140,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } } -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, +static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idStr, int8_t* pLevel) { - if (VND_IS_RSMA(pVnode)) { + if (VND_IS_RSMA(pVnode) && !pCond->skipRollup) { int8_t level = 0; int8_t precision = pVnode->config.tsdbCfg.precision; int64_t now = taosGetTimestamp(precision); @@ -3158,7 +3158,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret } break; } - if ((now - pRetention->keep) <= (winSKey + offset)) { + if ((now - pRetention->keep) <= (pCond->twindows.skey + offset)) { break; } ++level; diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 740ff7b0dc..6387b3d0d6 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -178,7 +178,7 @@ void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode); SColumn extractColumnFromColumnNode(SColumnNode* pColNode); -int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode); +int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle); void cleanupQueryTableDataCond(SQueryTableDataCond* pCond); int32_t convertFillType(int32_t mode); diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 753d3e680c..39b47504c6 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -1713,7 +1713,7 @@ SColumn extractColumnFromColumnNode(SColumnNode* pColNode) { return c; } -int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode) { +int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle) { pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols); @@ -1732,6 +1732,7 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->startVersion = -1; pCond->endVersion = -1; + pCond->skipRollup = readHandle->skipRollup; int32_t j = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index efbc978323..c47e14ad0d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1035,7 +1035,7 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, } initLimitInfo(pScanNode->node.pLimit, pScanNode->node.pSlimit, &pInfo->base.limitInfo); - code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode); + code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -3533,7 +3533,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN goto _error; } - code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode); + code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(pInfo->base.matchInfo.pList); goto _error; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 31f8647dd5..6202753a87 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -28,7 +28,6 @@ int32_t streamBackendId = 0; int32_t streamBackendCfWrapperId = 0; int32_t streamMetaId = 0; -static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta); static void metaHbToMnode(void* param, void* tmrId); static void streamMetaClear(SStreamMeta* pMeta); static int32_t streamMetaBegin(SStreamMeta* pMeta); @@ -188,7 +187,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->chkpCap = 2; taosInitRWLatch(&pMeta->chkpDirLock); - pMeta->chkpId = streamGetLatestCheckpointId(pMeta); + pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); while (pMeta->streamBackend == NULL) { taosMsleep(100); @@ -595,7 +594,7 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) { return 0; } -int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) { +int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { int64_t chkpId = 0; TBC* pCur = NULL; diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index 0a3970adaa..e38ba85f62 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -527,17 +527,6 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) { return pFileState->usedBuffs; } -static void getDebugRowBuff(char* val, int32_t vlen, char* output) { - for (int32_t i = 0; i < vlen; ++i) { - if (*(val + i) == '\0') { - sprintf(output + i, "0"); - } else { - sprintf(output + i, "%c", *(val + i)); - } - } - output[vlen] = 0; -} - int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) { int32_t code = TSDB_CODE_SUCCESS; SListIter iter = {0}; @@ -553,7 +542,6 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, int32_t len = pFileState->rowSize + sizeof(uint64_t) + sizeof(int32_t) + 1; char* buf = taosMemoryCalloc(1, len); - char output[1024]; void* batch = streamStateCreateBatch(); while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { @@ -571,15 +559,6 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } void* pSKey = pFileState->stateBuffCreateStateKeyFn(pPos, ((SStreamState*)pFileState->pFileStore)->number); -#if 1 - SStateKey* pStateKey = pSKey; - char* pStateVal = pPos->pRowBuff; - int32_t pStateVLen = pFileState->rowSize; - assert(pStateVLen < 1024); - getDebugRowBuff(pStateVal, pStateVLen, output); - qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, - pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); -#endif code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, pSKey, pPos->pRowBuff, pFileState->rowSize, 0, buf); taosMemoryFreeClear(pSKey); @@ -710,7 +689,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { if (pCur == NULL) { return -1; } - char output[1024]; int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); while (code == TSDB_CODE_SUCCESS) { if (pFileState->curRowCount >= recoverNum) { @@ -730,17 +708,6 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { } ASSERT(vlen == pFileState->rowSize); memcpy(pNewPos->pRowBuff, pVal, vlen); - -#if 1 - SStateKey* pStateKey = pNewPos->pKey; - char* pStateVal = pVal; - int32_t pStateVLen = vlen; - assert(pStateVLen < 1024); - getDebugRowBuff(pStateVal, pStateVLen, output); - qDebug("%s:%d key:[%" PRIu64 ",%" PRIi64 ",%" PRIi64 "] vlen:%d, val:%s", __func__, __LINE__, pStateKey->key.groupId, - pStateKey->key.ts, pStateKey->opNum, pStateVLen, output); -#endif - taosMemoryFreeClear(pVal); pNewPos->beFlushed = true; code = tSimpleHashPut(pFileState->rowStateBuff, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); From f2d24306b1a05868a6c3362f482dc8b8172804e9 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 08:35:52 +0800 Subject: [PATCH 27/56] enh: rsma tasks share one checkpoint and fix memory leak --- source/dnode/vnode/src/sma/smaRollup.c | 4 ++++ source/libs/stream/src/tstreamFileState.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 665610304c..424a9b1cf2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1221,6 +1221,10 @@ _checkpoint: smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint/task:%" PRIi64 "/%p, table:%" PRIi64 ", level:%d", TD_VID(pVnode), pTask->checkpointingId, pTask, pRSmaInfo->suid, i + 1); + + // the stream states share one checkpoint + taosHashCancelIterate(pInfoHash, infoHash); + goto _exit; } } } diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index e38ba85f62..fc47498a3c 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -201,7 +201,7 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ streamFileStateDecode(&pFileState->flushMark, valBuf, len); qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark); } - + taosMemoryFreeClear(valBuf); return pFileState; _error: From 764f8d2e456a00357dae70ea846dc3926f8cbf9c Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 09:03:19 +0800 Subject: [PATCH 28/56] fix(vnode/s3): move init & cleanup to dnode --- source/dnode/mgmt/node_mgmt/src/dmEnv.c | 36 +++++++++++++------------ source/dnode/vnode/src/vnd/vnodeCos.c | 10 ++++--- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index d560ba1644..f79c9d97b8 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -18,17 +18,17 @@ #include "audit.h" #include "libs/function/tudf.h" -#define DM_INIT_AUDIT() \ - do { \ - auditCfg.port = tsMonitorPort; \ - auditCfg.server = tsMonitorFqdn; \ - auditCfg.comp = tsMonitorComp; \ - if (auditInit(&auditCfg) != 0) { \ - return -1; \ - } \ +#define DM_INIT_AUDIT() \ + do { \ + auditCfg.port = tsMonitorPort; \ + auditCfg.server = tsMonitorFqdn; \ + auditCfg.comp = tsMonitorComp; \ + if (auditInit(&auditCfg) != 0) { \ + return -1; \ + } \ } while (0) -static SDnode globalDnode = {0}; +static SDnode globalDnode = {0}; SDnode *dmInstance() { return &globalDnode; } @@ -146,6 +146,9 @@ static bool dmCheckDataDirVersion() { return true; } +extern int32_t s3Begin(); +extern void s3End(); + int32_t dmInit() { dInfo("start to init dnode env"); if (dmDiskInit() != 0) return -1; @@ -156,6 +159,7 @@ int32_t dmInit() { if (dmInitMonitor() != 0) return -1; if (dmInitAudit() != 0) return -1; if (dmInitDnode(dmInstance()) != 0) return -1; + if (s3Begin() != 0) return -1; dInfo("dnode env is initialized"); return 0; @@ -181,6 +185,7 @@ void dmCleanup() { udfStopUdfd(); taosStopCacheRefreshWorker(); dmDiskClose(); + s3End(); dInfo("dnode env is cleaned up"); taosCleanupCfg(); @@ -265,19 +270,19 @@ static int32_t dmProcessAlterNodeTypeReq(EDndNodeType ntype, SRpcMsg *pMsg) { pWrapper = &pDnode->wrappers[ntype]; - if(pWrapper->func.nodeRoleFp != NULL){ + if (pWrapper->func.nodeRoleFp != NULL) { ESyncRole role = (*pWrapper->func.nodeRoleFp)(pWrapper->pMgmt); dInfo("node:%s, checking node role:%d", pWrapper->name, role); - if(role == TAOS_SYNC_ROLE_VOTER){ + if (role == TAOS_SYNC_ROLE_VOTER) { dError("node:%s, failed to alter node type since node already is role:%d", pWrapper->name, role); terrno = TSDB_CODE_MNODE_ALREADY_IS_VOTER; return -1; } } - if(pWrapper->func.isCatchUpFp != NULL){ + if (pWrapper->func.isCatchUpFp != NULL) { dInfo("node:%s, checking node catch up", pWrapper->name); - if((*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) != 1){ + if ((*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) != 1) { terrno = TSDB_CODE_MNODE_NOT_CATCH_UP; return -1; } @@ -394,7 +399,4 @@ void dmReportStartup(const char *pName, const char *pDesc) { dDebug("step:%s, %s", pStartup->name, pStartup->desc); } -int64_t dmGetClusterId() { - return globalDnode.data.clusterId; -} - +int64_t dmGetClusterId() { return globalDnode.data.clusterId; } diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index 6e36739f5a..a16f926f0f 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -24,7 +24,7 @@ static S3UriStyle uriStyleG = S3UriStylePath; static int retriesG = 5; static int timeoutMsG = 0; -static int32_t s3Begin() { +int32_t s3Begin() { S3Status status; const char *hostname = tsS3Hostname; const char *env_hn = getenv("S3_HOSTNAME"); @@ -43,10 +43,12 @@ static int32_t s3Begin() { return 0; } -static void s3End() { S3_deinitialize(); } -int32_t s3Init() { return s3Begin(); } +void s3End() { S3_deinitialize(); } -void s3CleanUp() { s3End(); } +int32_t s3Init() { return 0; /*s3Begin();*/ } + +void s3CleanUp() { /*s3End();*/ +} static int should_retry() { /* From d1106c51dcf19f7bc12e6dd8f9b1e290e5ee3831 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 09:54:03 +0800 Subject: [PATCH 29/56] enh: rsma code optimization --- source/dnode/vnode/src/inc/sma.h | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 198c93a937..5e808c217c 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -140,7 +140,7 @@ struct SRSmaInfoItem { int8_t level; int8_t fetchLevel; int8_t triggerStat; - uint32_t nScanned; + int32_t nScanned; int32_t streamFlushed : 1; int32_t maxDelay : 31; // ms tmr_h tmrId; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 424a9b1cf2..68d829bea4 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -22,7 +22,7 @@ #define RSMA_FETCH_DELAY_MAX (120000) // ms #define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms -#define RSMA_TASK_FLAG "rsma_task" +#define RSMA_TASK_FLAG "rsma" #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -1368,7 +1368,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { #if 0 if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch executed", + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch executed", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); } else { int64_t curMs = taosGetTimestampMs(); @@ -1393,10 +1393,10 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { goto _err; } - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch finished", + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch finished", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); } else { - smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 + smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch not executed as fetch level is %" PRIi8, SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay, pItem->fetchLevel); } From 8bb65119a4c77f4ab9d309a7bbe9bbe51e0e7dfe Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 10:52:23 +0800 Subject: [PATCH 30/56] dnode: fix dnode s3 init --- source/dnode/mgmt/node_mgmt/src/dmEnv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index f79c9d97b8..6f13abcebc 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -146,9 +146,13 @@ static bool dmCheckDataDirVersion() { return true; } +#if defined(USE_S3) + extern int32_t s3Begin(); extern void s3End(); +#endif + int32_t dmInit() { dInfo("start to init dnode env"); if (dmDiskInit() != 0) return -1; @@ -159,7 +163,9 @@ int32_t dmInit() { if (dmInitMonitor() != 0) return -1; if (dmInitAudit() != 0) return -1; if (dmInitDnode(dmInstance()) != 0) return -1; +#if defined(USE_S3) if (s3Begin() != 0) return -1; +#endif dInfo("dnode env is initialized"); return 0; @@ -185,7 +191,9 @@ void dmCleanup() { udfStopUdfd(); taosStopCacheRefreshWorker(); dmDiskClose(); +#if defined(USE_S3) s3End(); +#endif dInfo("dnode env is cleaned up"); taosCleanupCfg(); From 15e1e4cd2c1ccc2729651f7a492616d57469641a Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 14:00:10 +0800 Subject: [PATCH 31/56] dnode/node_mgmt: cmake define for use_s3 --- source/dnode/mgmt/node_mgmt/CMakeLists.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/dnode/mgmt/node_mgmt/CMakeLists.txt b/source/dnode/mgmt/node_mgmt/CMakeLists.txt index f1be20289a..6b875db860 100644 --- a/source/dnode/mgmt/node_mgmt/CMakeLists.txt +++ b/source/dnode/mgmt/node_mgmt/CMakeLists.txt @@ -3,6 +3,17 @@ add_library(dnode STATIC ${IMPLEMENT_SRC}) target_link_libraries( dnode mgmt_mnode mgmt_qnode mgmt_snode mgmt_vnode mgmt_dnode ) + +IF (TD_STORAGE) + + IF(${BUILD_WITH_S3}) + add_definitions(-DUSE_S3) + ELSEIF(${BUILD_WITH_COS}) + add_definitions(-DUSE_COS) + ENDIF() + +ENDIF () + target_include_directories( dnode PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" From 3f125bc6bf45c878a41bebdaa79251a7c282b2ac Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 14:30:30 +0800 Subject: [PATCH 32/56] vnode/cos: make err msg buffer big enough for detailed msg --- source/dnode/vnode/src/vnd/vnodeCos.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index a16f926f0f..d2db8be026 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -74,7 +74,7 @@ static void s3PrintError(const char *func, S3Status status, char error_details[] } typedef struct { - char err_msg[128]; + char err_msg[512]; S3Status status; uint64_t content_length; char *buf; @@ -203,7 +203,7 @@ static void growbuffer_destroy(growbuffer *gb) { } typedef struct put_object_callback_data { - char err_msg[128]; + char err_msg[512]; S3Status status; // FILE *infile; TdFilePtr infileFD; @@ -216,7 +216,7 @@ typedef struct put_object_callback_data { #define MULTIPART_CHUNK_SIZE (768 << 20) // multipart is 768M typedef struct UploadManager { - char err_msg[128]; + char err_msg[512]; S3Status status; // used for initial multipart char *upload_id; @@ -231,7 +231,7 @@ typedef struct UploadManager { } UploadManager; typedef struct list_parts_callback_data { - char err_msg[128]; + char err_msg[512]; S3Status status; int isTruncated; char nextPartNumberMarker[24]; @@ -248,7 +248,7 @@ typedef struct list_parts_callback_data { } list_parts_callback_data; typedef struct MultipartPartData { - char err_msg[128]; + char err_msg[512]; S3Status status; put_object_callback_data put_object_data; int seq; @@ -611,7 +611,7 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { } typedef struct list_bucket_callback_data { - char err_msg[128]; + char err_msg[512]; S3Status status; int isTruncated; char nextMarker[1024]; From a1e692a796b64d0d3585cbade05f528b69907084 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 15:18:55 +0800 Subject: [PATCH 33/56] fix(vnode/cos): fix error printing to avoid buffer overflow --- source/dnode/vnode/src/vnd/vnodeCos.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index d2db8be026..d7eced78fb 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -99,20 +99,22 @@ static void responseCompleteCallback(S3Status status, const S3ErrorDetails *erro int len = 0; const int elen = sizeof(cbd->err_msg); if (error) { - if (error->message) { + if (error->message && elen - len > 0) { len += snprintf(&(cbd->err_msg[len]), elen - len, " Message: %s\n", error->message); } - if (error->resource) { + if (error->resource && elen - len > 0) { len += snprintf(&(cbd->err_msg[len]), elen - len, " Resource: %s\n", error->resource); } - if (error->furtherDetails) { + if (error->furtherDetails && elen - len > 0) { len += snprintf(&(cbd->err_msg[len]), elen - len, " Further Details: %s\n", error->furtherDetails); } - if (error->extraDetailsCount) { + if (error->extraDetailsCount && elen - len > 0) { len += snprintf(&(cbd->err_msg[len]), elen - len, "%s", " Extra Details:\n"); for (int i = 0; i < error->extraDetailsCount; i++) { - len += snprintf(&(cbd->err_msg[len]), elen - len, " %s: %s\n", error->extraDetails[i].name, - error->extraDetails[i].value); + if (elen - len > 0) { + len += snprintf(&(cbd->err_msg[len]), elen - len, " %s: %s\n", error->extraDetails[i].name, + error->extraDetails[i].value); + } } } } @@ -205,6 +207,7 @@ static void growbuffer_destroy(growbuffer *gb) { typedef struct put_object_callback_data { char err_msg[512]; S3Status status; + uint64_t content_length; // FILE *infile; TdFilePtr infileFD; growbuffer *gb; @@ -218,6 +221,7 @@ typedef struct put_object_callback_data { typedef struct UploadManager { char err_msg[512]; S3Status status; + uint64_t content_length; // used for initial multipart char *upload_id; @@ -233,6 +237,7 @@ typedef struct UploadManager { typedef struct list_parts_callback_data { char err_msg[512]; S3Status status; + uint64_t content_length; int isTruncated; char nextPartNumberMarker[24]; char initiatorId[256]; From e5bbcf76f1843a68fe478d5ecf22d80bab16f1f1 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 15:57:56 +0800 Subject: [PATCH 34/56] vnode/cos: fix get object block callback --- source/dnode/vnode/src/vnd/vnodeCos.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index d7eced78fb..f9d6ffe544 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -78,6 +78,7 @@ typedef struct { S3Status status; uint64_t content_length; char *buf; + int64_t buf_pos; } TS3SizeCBD; static S3Status responsePropertiesCallback(const S3ResponseProperties *properties, void *callbackData) { @@ -730,15 +731,19 @@ void s3DeleteObjects(const char *object_name[], int nobject) { static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *callbackData) { TS3SizeCBD *cbd = callbackData; + /* if (cbd->content_length != bufferSize) { cbd->status = S3StatusAbortedByCallback; return S3StatusAbortedByCallback; } + */ + if (!cbd->buf) { + cbd->buf = taosMemoryCalloc(1, bufferSize); + } - char *buf = taosMemoryCalloc(1, bufferSize); - if (buf) { - memcpy(buf, buffer, bufferSize); - cbd->buf = buf; + if (cbd->buf) { + memcpy(cbd->buf + cbd->buf_pos, buffer, bufferSize); + cbd->buf_pos += bufferSize; cbd->status = S3StatusOK; return S3StatusOK; } else { @@ -760,6 +765,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, TS3SizeCBD cbd = {0}; cbd.content_length = size; + cbd.buf_pos = 0; do { S3_get_object(&bucketContext, object_name, &getConditions, offset, size, 0, 0, &getObjectHandler, &cbd); } while (S3_status_is_retryable(cbd.status) && should_retry()); From 128353a861e0a18a116ff6ab44738d3716101a80 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 16:04:24 +0800 Subject: [PATCH 35/56] vnode/cos: fix get object handler mem --- source/dnode/vnode/src/vnd/vnodeCos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index f9d6ffe544..0bb16fcd9c 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -738,7 +738,7 @@ static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void * } */ if (!cbd->buf) { - cbd->buf = taosMemoryCalloc(1, bufferSize); + cbd->buf = taosMemoryCalloc(1, cbd->content_length); } if (cbd->buf) { From 5ccdde4495df36350ec342236c1755da8603b16c Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Wed, 8 Nov 2023 20:43:52 +0800 Subject: [PATCH 36/56] vnode/cos: error on incomplete fetching --- source/dnode/vnode/src/vnd/vnodeCos.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index 0bb16fcd9c..9941c53750 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -775,6 +775,11 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, return TAOS_SYSTEM_ERROR(EIO); } + if (cbd.buf_pos != size) { + vError("%s: %d(%s)", __func__, cbd.status, cbd.err_msg); + return TAOS_SYSTEM_ERROR(EIO); + } + *ppBlock = cbd.buf; return 0; From 72ecb0431c6fc8d44531240354d5f6942691d562 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 20:45:44 +0800 Subject: [PATCH 37/56] enh: rsma checkpoint --- source/dnode/vnode/src/inc/sma.h | 20 +- source/dnode/vnode/src/sma/smaCommit.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 133 ++++++---- source/dnode/vnode/src/sma/smaUtil.c | 5 - source/dnode/vnode/src/tsdb/tsdbMemTable.c | 12 - source/libs/stream/src/streamTask.c | 9 +- tests/parallel_test/cases.task | 1 + .../tsim/sync/vnodesnapshot-rsma-test.sim | 2 +- tests/script/win-test-file | 1 + tests/system-test/1-insert/rsma.py | 248 ++++++++++++++++++ 10 files changed, 348 insertions(+), 85 deletions(-) create mode 100644 tests/system-test/1-insert/rsma.py diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 5e808c217c..de6bb23f04 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -137,15 +137,17 @@ struct SSmaStat { #define RSMA_FS_LOCK(r) (&(r)->lock) struct SRSmaInfoItem { - int8_t level; - int8_t fetchLevel; - int8_t triggerStat; - int32_t nScanned; - int32_t streamFlushed : 1; - int32_t maxDelay : 31; // ms - tmr_h tmrId; - void *pStreamState; - void *pStreamTask; // SStreamTask + int8_t level; + int8_t fetchLevel; + int8_t triggerStat; + int32_t nScanned; + int32_t streamFlushed : 1; + int32_t maxDelay : 31; // ms + int64_t submitReqVer; + int64_t fetchResultVer; + tmr_h tmrId; + void *pStreamState; + void *pStreamTask; // SStreamTask }; struct SRSmaInfo { diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index fad2e4d7e9..92181f054d 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -169,7 +169,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { * 1) This is high cost task and should not put in asyncPreCommit originally. * 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently. */ - smaInfo("vgId:%d, rsma commit:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit, + smaInfo("vgId:%d, rsma commit, type:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit, (void *)taosGetSelfPthreadId()); nLoops = 0; while (atomic_load_64(&pRSmaStat->nBufItems) > 0) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 68d829bea4..7296f3d468 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -44,8 +44,8 @@ static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); -static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, SArray **ppResList, int8_t *streamFlushed); +static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, + int32_t execType, SArray **ppResList, int8_t *streamFlushed); static void tdRSmaFetchTrigger(void *param, void *tmrId); static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); @@ -235,19 +235,16 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui return TSDB_CODE_SUCCESS; } -#if 0 -static int64_t tdRSmaTaskGetCheckpointId(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { - int64_t checkpointId = -1; - STaskId id = {.streamId = streamId, .taskId = taskId}; +static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTaskId *pId) { + STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; taosRLockLatch(&pMeta->lock); SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask && *ppTask) { - checkpointId = (*ppTask)->chkInfo.checkpointId; + pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer; + pItem->fetchResultVer = (*ppTask)->info.triggerParam; } taosRUnLockLatch(&pMeta->lock); - return checkpointId; } -#endif static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) { streamMetaUnregisterTask(pMeta, streamId, taskId); @@ -295,12 +292,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->pMeta = pVnode->pTq->pStreamMeta; pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); -#if 0 - pStreamTask->chkInfo.checkpointId = - tdRSmaTaskGetCheckpointId(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId); -#else pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); -#endif + tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -318,7 +311,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } - pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot + if (pItem->fetchResultVer < pItem->submitReqVer) { + // fetch the data when reboot + pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; + } + if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); @@ -337,10 +334,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId); - smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 ", maxdelay:%" PRIi64 - " watermark:%" PRIi64 ", finally maxdelay:%" PRIi32, + smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64 + ", submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 ", maxdelay:%" PRIi64 " watermark:%" PRIi64 + ", finally maxdelay:%" PRIi32, TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId, - param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); + pItem->submitReqVer, pItem->fetchResultVer, param->maxdelay[idx], param->watermark[idx], pItem->maxDelay); } return TSDB_CODE_SUCCESS; } @@ -624,12 +622,14 @@ _end: return code; } -static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema, - int64_t suid, SArray **ppResList, int8_t *streamFlushed) { +static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, + int32_t execType, SArray **ppResList, int8_t *streamFlushed) { int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; SArray *pResList = NULL; + STSchema *pTSchema = pInfo->pTSchema; + int64_t suid = pInfo->suid; if (!(*ppResList)) { pResList = taosArrayInit(1, POINTER_BYTES); @@ -657,11 +657,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma if (taosArrayGetSize(pResList) == 0) { break; } -#if 0 - char flag[10] = {0}; - snprintf(flag, 10, "level %" PRIi8, pItem->level); - blockDebugShowDataBlocks(pResList, flag); -#endif + for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { output = taosArrayGetP(pResList, i); if (output->info.type == STREAM_CHECKPOINT) { @@ -674,12 +670,17 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; - // TODO: the schema update should be handled later(TD-17965) if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; TSDB_CHECK_CODE(code, lino, _exit); } + // reset the output version to handle reboot + if (STREAM_GET_ALL == execType && output->info.version == 0) { + // the submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously + output->info.version = pItem->submitReqVer; + } + if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); @@ -691,6 +692,10 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma TSDB_CHECK_CODE(code, lino, _exit); } + if (STREAM_GET_ALL == execType) { + atomic_store_64(&pItem->fetchResultVer, output->info.version); + } + smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); @@ -803,9 +808,10 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { */ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) { - int32_t idx = level - 1; - void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); - SArray *pResList = NULL; + int32_t idx = level - 1; + void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); + SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); + SArray *pResList = NULL; if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, @@ -833,8 +839,12 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_FAILED; } - SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL); + if (STREAM_INPUT__MERGED_SUBMIT == inputType) { + SPackedData *packData = POINTER_SHIFT(pMsg, sizeof(SPackedData) * (msgSize - 1)); + atomic_store_64(&pItem->submitReqVer, packData->ver); + } + + tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; @@ -1161,8 +1171,8 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { int8_t streamFlushed = 0; - code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo->pTSchema, - pRSmaInfo->suid, &pResList, &streamFlushed); + code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo, + STREAM_CHECKPOINT, &pResList, &streamFlushed); if (code) { taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); @@ -1190,7 +1200,10 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { _checkpoint: // stream state: build checkpoint in backend do { - void *infoHash = NULL; + SStreamMeta *pMeta = NULL; + int64_t checkpointId = taosGetTimestampNs(); + bool checkpointBuilt = false; + void *infoHash = NULL; while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { @@ -1202,32 +1215,48 @@ _checkpoint: if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->checkpointingId = taosGetTimestampNs(); + pTask->checkpointingId = checkpointId; pTask->chkInfo.checkpointId = pTask->checkpointingId; - code = streamTaskBuildCheckpoint(pTask); - if (code) { - taosHashCancelIterate(pInfoHash, infoHash); - TSDB_CHECK_CODE(code, lino, _exit); + pTask->chkInfo.checkpointVer = pItem->submitReqVer; + pTask->info.triggerParam = pItem->fetchResultVer; + + if (!checkpointBuilt) { + // the stream states share one checkpoint + code = streamTaskBuildCheckpoint(pTask); + if (code) { + taosHashCancelIterate(pInfoHash, infoHash); + TSDB_CHECK_CODE(code, lino, _exit); + } + pMeta = pTask->pMeta; + checkpointBuilt = true; } - taosWLockLatch(&pTask->pMeta->lock); - if (0 != streamMetaSaveTask(pTask->pMeta, pTask) || 0 != streamMetaCommit(pTask->pMeta)) { - taosWUnLockLatch(&pTask->pMeta->lock); + taosWLockLatch(&pMeta->lock); + if (0 != streamMetaSaveTask(pMeta, pTask)) { + taosWUnLockLatch(&pMeta->lock); code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } - taosWUnLockLatch(&pTask->pMeta->lock); - - smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint/task:%" PRIi64 "/%p, table:%" PRIi64 ", level:%d", - TD_VID(pVnode), pTask->checkpointingId, pTask, pRSmaInfo->suid, i + 1); - - // the stream states share one checkpoint - taosHashCancelIterate(pInfoHash, infoHash); - goto _exit; + taosWUnLockLatch(&pMeta->lock); + smaDebug("vgId:%d, rsma commit, succeed to commit task:%p, submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 + ", table:%" PRIi64 ", level:%d", + TD_VID(pVnode), pTask, pItem->submitReqVer, pItem->fetchResultVer, pRSmaInfo->suid, i + 1); } } } + if (pMeta) { + taosWLockLatch(&pMeta->lock); + if (0 != streamMetaCommit(pMeta)) { + taosWUnLockLatch(&pMeta->lock); + code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + taosWUnLockLatch(&pMeta->lock); + } + if (checkpointBuilt) { + smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint:%" PRIi64, TD_VID(pVnode), checkpointId); + } } while (0); _exit: taosArrayDestroy(pResList); @@ -1366,7 +1395,6 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { continue; } -#if 0 if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) { smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch executed", SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay); @@ -1384,12 +1412,11 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } pItem->nScanned = 0; -#endif if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid, &pResList, NULL) < 0) { + if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, &pResList, NULL) < 0) { goto _err; } diff --git a/source/dnode/vnode/src/sma/smaUtil.c b/source/dnode/vnode/src/sma/smaUtil.c index 8c04306d0f..e45cbac329 100644 --- a/source/dnode/vnode/src/sma/smaUtil.c +++ b/source/dnode/vnode/src/sma/smaUtil.c @@ -30,13 +30,8 @@ void tdRSmaGetDirName(SVnode *pVnode, STfs *pTfs, bool endWithSep, char *outputN offset = strlen(outputName); // rsma -#if 1 snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s", TD_DIRSEP, VNODE_RSMA_DIR, (endWithSep ? TD_DIRSEP : "")); -#else - snprintf(outputName + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s%s%s%s%s%s", TD_DIRSEP, "tq", TD_DIRSEP, "stream", - TD_DIRSEP, "state", (endWithSep ? TD_DIRSEP : "")); -#endif } // smaXXXUtil ================ diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 69b19f4bc5..cc77474e79 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -661,9 +661,6 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, if ((code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0))) goto _exit; pTbData->minKey = TMIN(pTbData->minKey, key.ts); lRow = tRow; - tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); // remain row ++tRow.iRow; @@ -683,9 +680,6 @@ static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; ++tRow.iRow; - tsdbDebug("vgId:%d, %s, insert col row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, tRow.iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); } } @@ -727,9 +721,6 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, code = tbDataDoPut(pMemTable, pTbData, pos, &tRow, 0); if (code) goto _exit; lRow = tRow; - tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); pTbData->minKey = TMIN(pTbData->minKey, key.ts); @@ -753,9 +744,6 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, lRow = tRow; iRow++; - tsdbDebug("vgId:%d, %s, insert row[%d] with ts:%" PRIi64 ", ver:%" PRIi64 ", uid:%" PRIi64, - TD_VID(pMemTable->pTsdb->pVnode), pMemTable->pTsdb->path, iRow, tRow.pTSRow->ts, tRow.version, - pSubmitTbData->uid); } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index a7fb590d1b..2f8de98039 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -120,8 +120,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { taskId = pTask->streamTaskId.taskId; if (tEncodeI32(pEncoder, taskId)) return -1; - if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; - if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; + if (tEncodeI64(pEncoder, pTask->dataRange.range.minVer)) return -1; + if (tEncodeI64(pEncoder, pTask->dataRange.range.maxVer)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; @@ -193,8 +193,9 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &taskId)) return -1; pTask->streamTaskId.taskId = taskId; - if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; - if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.range.minVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 21dcd16441..37d1c2aa59 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1175,6 +1175,7 @@ e ,,y,script,./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ,,y,script,./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim ,,y,script,./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim +,,y,script,./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ,,n,script,./test.sh -f tsim/valgrind/checkError1.sim ,,n,script,./test.sh -f tsim/valgrind/checkError2.sim ,,n,script,./test.sh -f tsim/valgrind/checkError3.sim diff --git a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim index 3b3cd01521..b1e5ed200f 100644 --- a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim +++ b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim @@ -114,7 +114,7 @@ endi vg_ready: print ====> create stable/child table -sql create table stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int) rollup(sum) watermark 3s,3s max_delay 3s,3s +sql create table stb (ts timestamp, c1 float, c2 float, c3 double) tags (t1 int) rollup(sum) watermark 3s,3s max_delay 3s,3s sql show stables if $rows != 1 then diff --git a/tests/script/win-test-file b/tests/script/win-test-file index 4ff4b52f7e..fe5f5c39e3 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -320,6 +320,7 @@ ./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim ./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim +./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ./test.sh -f tsim/valgrind/checkError1.sim ./test.sh -f tsim/valgrind/checkError2.sim ./test.sh -f tsim/valgrind/checkError3.sim diff --git a/tests/system-test/1-insert/rsma.py b/tests/system-test/1-insert/rsma.py new file mode 100644 index 0000000000..ab84185e87 --- /dev/null +++ b/tests/system-test/1-insert/rsma.py @@ -0,0 +1,248 @@ +from datetime import datetime +import time + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * + +PRIMARY_COL = "ts" + +INT_COL = "c_int" +BINT_COL = "c_bint" +SINT_COL = "c_sint" +TINT_COL = "c_tint" +FLOAT_COL = "c_float" +DOUBLE_COL = "c_double" +BOOL_COL = "c_bool" +TINT_UN_COL = "c_utint" +SINT_UN_COL = "c_usint" +BINT_UN_COL = "c_ubint" +INT_UN_COL = "c_uint" +BINARY_COL = "c_binary" +NCHAR_COL = "c_nchar" +TS_COL = "c_ts" + +INT_TAG = "t_int" + +TAG_COL = [INT_TAG] + +## insert data args: +TIME_STEP = 10000 +NOW = int(datetime.timestamp(datetime.now()) * 1000) + +# init db/table +DBNAME = "db" +DB1 = "db1" +DB2 = "db2" +DB3 = "db3" +DB4 = "db4" +STBNAME = "stb1" +CTBNAME = "ct1" +NTBNAME = "nt1" + +class TDTestCase: + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + + @property + def create_databases_sql_err(self): + return [ + # check grammar + "create database db1 retentions", + "create database db1 retentions 1s:1d", + "create database db1 retentions 1s:1d,2s:2d", + "create database db1 retentions 1s:1d,2s:2d,3s:3d", + "create database db1 retentions 1s:1d,2s:2d,3s:3d,4s:4d", + "create database db1 retentions -:1d,2s:2d,3s:3d,4s:4d", + "create database db1 retentions --:1d", + "create database db1 retentions -:-:1d", + "create database db1 retentions 1d:-", + "create database db1 retentions -:-", + "create database db1 retentions +:1d", + "create database db1 retentions :1d", + "create database db1 retentions -:1d,-:2d", + "create database db1 retentions -:1d,-:2d,-:3d", + "create database db1 retentions -:1d,1s:-", + "create database db1 retentions -:1d,15s:2d,-:3d", + + # check unit + "create database db1 retentions -:1d,1b:1d", + "create database db1 retentions -:1d,1u:1d", + "create database db1 retentions -:1d,1a:1d", + "create database db1 retentions -:1d,1n:1d", + "create database db1 retentions -:1d,1y:1d", + "create database db1 retentions -:1d,1s:86400s", + "create database db1 retentions -:1d,1s:86400000a", + "create database db1 retentions -:1d,1s:86400000000u", + "create database db1 retentions -:1d,1s:86400000000000b", + "create database db1 retentions -:1s,1s:2s", + "create database db1 retentions -:1d,1s:1w", + "create database db1 retentions -:1d,1s:1n", + "create database db1 retentions -:1d,1s:1y", + + # check value range + "create database db3 retentions -:-1d", + "create database db3 retentions -:0d", + "create database db3 retentions -:1439m", + "create database db3 retentions -:365001d", + "create database db3 retentions -:8760001h", + "create database db3 retentions -:525600001m", + "create database db3 retentions -:106581d precision 'ns'", + "create database db3 retentions -:2557921h precision 'ns'", + "create database db3 retentions -:153475201m precision 'ns'", + # check relationships + "create database db5 retentions -:1440m,1441m:1440m,2d:3d", + "create database db5 retentions -:1d,2m:1d,1s:2d", + "create database db5 retentions -:1440m,1s:2880m,2s:2879m", + "create database db5 retentions -:1d,2s:2d,2s:3d", + "create database db5 retentions -:1d,3s:2d,2s:3d", + "create database db1 retentions -:1d,2s:3d,3s:2d", + "create database db1 retentions -:1d,2s:3d,1s:2d", + + ] + + @property + def create_databases_sql_current(self): + return [ + f"create database {DB1} retentions -:1d", + f"create database {DB2} retentions -:1d,2m:2d,3h:3d", + ] + + @property + def alter_database_sql(self): + return [ + "alter database db1 retentions -:99d", + "alter database db2 retentions -:97d,98h:98d,99h:99d,", + ] + + @property + def create_stable_sql_err(self, dbname=DB2): + return [ + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(ceil) watermark 1s max_delay 1m", + f"create stable {dbname}.stb12 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(count) watermark 1min", + f"create stable {dbname}.stb13 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay -1s", + f"create stable {dbname}.stb14 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark -1m", + f"create stable {dbname}.stb15 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) watermark 1m ", + f"create stable {dbname}.stb16 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) max_delay 1m ", + f"create stable {dbname}.stb21 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} binary(16)) tags (tag1 int) rollup(avg) watermark 1s", + f"create stable {dbname}.stb22 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) rollup(avg) max_delay 1m", + f"create table {dbname}.ntb_1 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) rollup(avg) watermark 1s max_delay 1s", + f"create table {dbname}.ntb_2 ({PRIMARY_COL} timestamp, {INT_COL} int) " , + f"create stable {dbname}.stb23 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) " , + f"create stable {dbname}.stb24 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) " , + f"create stable {dbname}.stb25 ({PRIMARY_COL} timestamp, {INT_COL} int) " , + f"create stable {dbname}.stb26 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} nchar(16)) " , + # only float/double allowd for avg/sum + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(avg)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(sum)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(sum)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(sum)", + f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(sum)", + + + # watermark, max_delay: [0, 900000], [ms, s, m, ?] + f"create stable stb17 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1u", + f"create stable stb18 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 1b", + f"create stable stb19 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 900001ms", + f"create stable stb20 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 16m", + f"create stable stb27 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 901s", + f"create stable stb28 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1h", + f"create stable stb29 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 0.2h", + f"create stable stb30 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 0.002d", + + ] + + @property + def create_tb(self, stb=STBNAME, ctb_num=20, ntbnum=1, rsma=False, dbname=DBNAME, rsma_type="sum"): + tdLog.printNoPrefix("==========step: create table") + if rsma: + if rsma_type.lower().strip() in ("last", "first"): + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned, {BINARY_COL} binary(16) + ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s + ''' + elif rsma_type.lower().strip() in ("sum", "avg"): + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {DOUBLE_COL} double, {DOUBLE_COL}_1 double, {DOUBLE_COL}_2 double, {DOUBLE_COL}_3 double, + {FLOAT_COL} float, {DOUBLE_COL}_4 double, {FLOAT_COL}_1 float, {FLOAT_COL}_2 float, {FLOAT_COL}_3 float, + {DOUBLE_COL}_5 double) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s + ''' + else: + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned + ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s + ''' + tdSql.execute(create_stb_sql) + else: + create_stb_sql = f'''create table {dbname}.{stb}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, + {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, + {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned + ) tags ({INT_TAG} int) + ''' + tdSql.execute(create_stb_sql) + + for i in range(ntbnum): + create_ntb_sql = f'''create table {dbname}.nt{i+1}( + ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, + {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, + {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, + {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, + {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned + ) + ''' + tdSql.execute(create_ntb_sql) + + for i in range(ctb_num): + tdSql.execute(f'create table {dbname}.ct{i+1} using {dbname}.{stb} tags ( {i+1} )') + + def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1): + tsql.execute("use %s" %dbName) + pre_create = "create table" + sql = pre_create + #tdLog.debug("doing create one stable %s and %d child table in %s ..." %(stbname, count ,dbname)) + for i in range(ctbNum): + tagValue = 'beijing' + if (i % 2 == 0): + tagValue = 'shanghai' + sql += " %s%d using %s tags(%d, '%s')"%(ctbPrefix,i,stbName,i+1, tagValue) + if (i > 0) and (i%100 == 0): + tsql.execute(sql) + sql = pre_create + if sql != pre_create: + tsql.execute(sql) + + tdLog.debug("complete to create %d child tables in %s.%s" %(ctbNum, dbName, stbName)) + return + + + def run(self): + self.rows = 10 + tdLog.printNoPrefix("==========step0:all check") + dbname='d0' + tdSql.execute(f"create database {dbname} retentions -:10d,1m:15d,1h:30d STT_TRIGGER 1 vgroups 6;") + tdSql.execute(f"create stable if not exists {dbname}.st_min (ts timestamp, c1 int) tags (proid int,city binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") + tdSql.execute(f"create stable if not exists {dbname}.st_avg (ts timestamp, c1 double) tags (city binary(20),district binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") + self.create_ctable(tdSql, dbname, 'st_min', 'ct_min', 10000) + tdLog.printNoPrefix("==========step4:after wal, all check again ") + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From f97cf96fd60dcebdbaa357c698f32a6ce64bc389 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 20:47:41 +0800 Subject: [PATCH 38/56] chore: remove test case --- tests/system-test/1-insert/rsma.py | 248 ----------------------------- 1 file changed, 248 deletions(-) delete mode 100644 tests/system-test/1-insert/rsma.py diff --git a/tests/system-test/1-insert/rsma.py b/tests/system-test/1-insert/rsma.py deleted file mode 100644 index ab84185e87..0000000000 --- a/tests/system-test/1-insert/rsma.py +++ /dev/null @@ -1,248 +0,0 @@ -from datetime import datetime -import time - -from util.log import * -from util.sql import * -from util.cases import * -from util.dnodes import * -from util.common import * - -PRIMARY_COL = "ts" - -INT_COL = "c_int" -BINT_COL = "c_bint" -SINT_COL = "c_sint" -TINT_COL = "c_tint" -FLOAT_COL = "c_float" -DOUBLE_COL = "c_double" -BOOL_COL = "c_bool" -TINT_UN_COL = "c_utint" -SINT_UN_COL = "c_usint" -BINT_UN_COL = "c_ubint" -INT_UN_COL = "c_uint" -BINARY_COL = "c_binary" -NCHAR_COL = "c_nchar" -TS_COL = "c_ts" - -INT_TAG = "t_int" - -TAG_COL = [INT_TAG] - -## insert data args: -TIME_STEP = 10000 -NOW = int(datetime.timestamp(datetime.now()) * 1000) - -# init db/table -DBNAME = "db" -DB1 = "db1" -DB2 = "db2" -DB3 = "db3" -DB4 = "db4" -STBNAME = "stb1" -CTBNAME = "ct1" -NTBNAME = "nt1" - -class TDTestCase: - - def init(self, conn, logSql, replicaVar=1): - self.replicaVar = int(replicaVar) - tdLog.debug(f"start to excute {__file__}") - tdSql.init(conn.cursor(), True) - - @property - def create_databases_sql_err(self): - return [ - # check grammar - "create database db1 retentions", - "create database db1 retentions 1s:1d", - "create database db1 retentions 1s:1d,2s:2d", - "create database db1 retentions 1s:1d,2s:2d,3s:3d", - "create database db1 retentions 1s:1d,2s:2d,3s:3d,4s:4d", - "create database db1 retentions -:1d,2s:2d,3s:3d,4s:4d", - "create database db1 retentions --:1d", - "create database db1 retentions -:-:1d", - "create database db1 retentions 1d:-", - "create database db1 retentions -:-", - "create database db1 retentions +:1d", - "create database db1 retentions :1d", - "create database db1 retentions -:1d,-:2d", - "create database db1 retentions -:1d,-:2d,-:3d", - "create database db1 retentions -:1d,1s:-", - "create database db1 retentions -:1d,15s:2d,-:3d", - - # check unit - "create database db1 retentions -:1d,1b:1d", - "create database db1 retentions -:1d,1u:1d", - "create database db1 retentions -:1d,1a:1d", - "create database db1 retentions -:1d,1n:1d", - "create database db1 retentions -:1d,1y:1d", - "create database db1 retentions -:1d,1s:86400s", - "create database db1 retentions -:1d,1s:86400000a", - "create database db1 retentions -:1d,1s:86400000000u", - "create database db1 retentions -:1d,1s:86400000000000b", - "create database db1 retentions -:1s,1s:2s", - "create database db1 retentions -:1d,1s:1w", - "create database db1 retentions -:1d,1s:1n", - "create database db1 retentions -:1d,1s:1y", - - # check value range - "create database db3 retentions -:-1d", - "create database db3 retentions -:0d", - "create database db3 retentions -:1439m", - "create database db3 retentions -:365001d", - "create database db3 retentions -:8760001h", - "create database db3 retentions -:525600001m", - "create database db3 retentions -:106581d precision 'ns'", - "create database db3 retentions -:2557921h precision 'ns'", - "create database db3 retentions -:153475201m precision 'ns'", - # check relationships - "create database db5 retentions -:1440m,1441m:1440m,2d:3d", - "create database db5 retentions -:1d,2m:1d,1s:2d", - "create database db5 retentions -:1440m,1s:2880m,2s:2879m", - "create database db5 retentions -:1d,2s:2d,2s:3d", - "create database db5 retentions -:1d,3s:2d,2s:3d", - "create database db1 retentions -:1d,2s:3d,3s:2d", - "create database db1 retentions -:1d,2s:3d,1s:2d", - - ] - - @property - def create_databases_sql_current(self): - return [ - f"create database {DB1} retentions -:1d", - f"create database {DB2} retentions -:1d,2m:2d,3h:3d", - ] - - @property - def alter_database_sql(self): - return [ - "alter database db1 retentions -:99d", - "alter database db2 retentions -:97d,98h:98d,99h:99d,", - ] - - @property - def create_stable_sql_err(self, dbname=DB2): - return [ - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(ceil) watermark 1s max_delay 1m", - f"create stable {dbname}.stb12 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(count) watermark 1min", - f"create stable {dbname}.stb13 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay -1s", - f"create stable {dbname}.stb14 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark -1m", - f"create stable {dbname}.stb15 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) watermark 1m ", - f"create stable {dbname}.stb16 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) max_delay 1m ", - f"create stable {dbname}.stb21 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} binary(16)) tags (tag1 int) rollup(avg) watermark 1s", - f"create stable {dbname}.stb22 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) rollup(avg) max_delay 1m", - f"create table {dbname}.ntb_1 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) rollup(avg) watermark 1s max_delay 1s", - f"create table {dbname}.ntb_2 ({PRIMARY_COL} timestamp, {INT_COL} int) " , - f"create stable {dbname}.stb23 ({PRIMARY_COL} timestamp, {INT_COL} int, {NCHAR_COL} nchar(16)) tags (tag1 int) " , - f"create stable {dbname}.stb24 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) " , - f"create stable {dbname}.stb25 ({PRIMARY_COL} timestamp, {INT_COL} int) " , - f"create stable {dbname}.stb26 ({PRIMARY_COL} timestamp, {INT_COL} int, {BINARY_COL} nchar(16)) " , - # only float/double allowd for avg/sum - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(avg)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(sum)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINT_COL} bigint) tags (tag1 int) rollup(sum)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BOOL_COL} bool) tags (tag1 int) rollup(sum)", - f"create stable {dbname}.stb11 ({PRIMARY_COL} timestamp, {BINARY_COL} binary(10)) tags (tag1 int) rollup(sum)", - - - # watermark, max_delay: [0, 900000], [ms, s, m, ?] - f"create stable stb17 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1u", - f"create stable stb18 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 1b", - f"create stable stb19 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 900001ms", - f"create stable stb20 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 16m", - f"create stable stb27 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 901s", - f"create stable stb28 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 1h", - f"create stable stb29 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) max_delay 0.2h", - f"create stable stb30 ({PRIMARY_COL} timestamp, {INT_COL} int) tags (tag1 int) rollup(min) watermark 0.002d", - - ] - - @property - def create_tb(self, stb=STBNAME, ctb_num=20, ntbnum=1, rsma=False, dbname=DBNAME, rsma_type="sum"): - tdLog.printNoPrefix("==========step: create table") - if rsma: - if rsma_type.lower().strip() in ("last", "first"): - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned, {BINARY_COL} binary(16) - ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s - ''' - elif rsma_type.lower().strip() in ("sum", "avg"): - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {DOUBLE_COL} double, {DOUBLE_COL}_1 double, {DOUBLE_COL}_2 double, {DOUBLE_COL}_3 double, - {FLOAT_COL} float, {DOUBLE_COL}_4 double, {FLOAT_COL}_1 float, {FLOAT_COL}_2 float, {FLOAT_COL}_3 float, - {DOUBLE_COL}_5 double) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s - ''' - else: - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned - ) tags ({INT_TAG} int) rollup({rsma_type}) watermark 5s,5s max_delay 5s,5s - ''' - tdSql.execute(create_stb_sql) - else: - create_stb_sql = f'''create table {dbname}.{stb}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, - {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, - {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned - ) tags ({INT_TAG} int) - ''' - tdSql.execute(create_stb_sql) - - for i in range(ntbnum): - create_ntb_sql = f'''create table {dbname}.nt{i+1}( - ts timestamp, {INT_COL} int, {BINT_COL} bigint, {SINT_COL} smallint, {TINT_COL} tinyint, - {FLOAT_COL} float, {DOUBLE_COL} double, {BOOL_COL} bool, - {BINARY_COL} binary(16), {NCHAR_COL} nchar(32), {TS_COL} timestamp, - {TINT_UN_COL} tinyint unsigned, {SINT_UN_COL} smallint unsigned, - {INT_UN_COL} int unsigned, {BINT_UN_COL} bigint unsigned - ) - ''' - tdSql.execute(create_ntb_sql) - - for i in range(ctb_num): - tdSql.execute(f'create table {dbname}.ct{i+1} using {dbname}.{stb} tags ( {i+1} )') - - def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1): - tsql.execute("use %s" %dbName) - pre_create = "create table" - sql = pre_create - #tdLog.debug("doing create one stable %s and %d child table in %s ..." %(stbname, count ,dbname)) - for i in range(ctbNum): - tagValue = 'beijing' - if (i % 2 == 0): - tagValue = 'shanghai' - sql += " %s%d using %s tags(%d, '%s')"%(ctbPrefix,i,stbName,i+1, tagValue) - if (i > 0) and (i%100 == 0): - tsql.execute(sql) - sql = pre_create - if sql != pre_create: - tsql.execute(sql) - - tdLog.debug("complete to create %d child tables in %s.%s" %(ctbNum, dbName, stbName)) - return - - - def run(self): - self.rows = 10 - tdLog.printNoPrefix("==========step0:all check") - dbname='d0' - tdSql.execute(f"create database {dbname} retentions -:10d,1m:15d,1h:30d STT_TRIGGER 1 vgroups 6;") - tdSql.execute(f"create stable if not exists {dbname}.st_min (ts timestamp, c1 int) tags (proid int,city binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") - tdSql.execute(f"create stable if not exists {dbname}.st_avg (ts timestamp, c1 double) tags (city binary(20),district binary(20)) rollup(min) watermark 0s,1s max_delay 1m,180s;;") - self.create_ctable(tdSql, dbname, 'st_min', 'ct_min', 10000) - tdLog.printNoPrefix("==========step4:after wal, all check again ") - - def stop(self): - tdSql.close() - tdLog.success(f"{__file__} successfully executed") - -tdCases.addLinux(__file__, TDTestCase()) -tdCases.addWindows(__file__, TDTestCase()) From edef4de7b299b3b5a6ef1308c3ae49cb27754ffd Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 8 Nov 2023 20:57:57 +0800 Subject: [PATCH 39/56] chore: revert the code --- source/libs/stream/src/streamTask.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 2f8de98039..59002e456a 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -120,8 +120,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { taskId = pTask->streamTaskId.taskId; if (tEncodeI32(pEncoder, taskId)) return -1; - if (tEncodeI64(pEncoder, pTask->dataRange.range.minVer)) return -1; - if (tEncodeI64(pEncoder, pTask->dataRange.range.maxVer)) return -1; + if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; + if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; @@ -193,8 +193,8 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &taskId)) return -1; pTask->streamTaskId.taskId = taskId; - if (tDecodeI64(pDecoder, &pTask->dataRange.range.minVer)) return -1; - if (tDecodeI64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; + if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; From c58ec72031866186a1fe495d6ca3e12f5d3b6c3a Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 09:27:43 +0800 Subject: [PATCH 40/56] config/s3blocksize: enable alter for debugging --- source/common/src/tglobal.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index cb67fc1ba3..ead9a5926b 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -94,8 +94,8 @@ int32_t tsMonitorMaxLogs = 100; bool tsMonitorComp = false; // audit -bool tsEnableAudit = true; -bool tsEnableAuditCreateTable = true; +bool tsEnableAudit = true; +bool tsEnableAuditCreateTable = true; // telem #ifdef TD_ENTERPRISE @@ -222,7 +222,7 @@ char tsCompressor[32] = "ZSTD_COMPRESSOR"; // ZSTD_COMPRESSOR or GZIP_COMPR #ifdef WINDOWS bool tsStartUdfd = false; #else -bool tsStartUdfd = true; +bool tsStartUdfd = true; #endif // wal @@ -332,7 +332,9 @@ int32_t taosSetS3Cfg(SConfig *pCfg) { return 0; } -struct SConfig *taosGetCfg() { return tsCfg; } +struct SConfig *taosGetCfg() { + return tsCfg; +} static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile, char *apolloUrl) { @@ -442,8 +444,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "enableScience", tsEnableScience, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "querySmaOptimize", tsQuerySmaOptimize, 0, 1, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddBool(pCfg, "queryPlannerTrace", tsQueryPlannerTrace, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; - if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != - 0) + if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT, + CFG_DYN_CLIENT) != 0) return -1; if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; @@ -459,7 +461,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { // if (cfgAddInt32(pCfg, "smlBatchSize", tsSmlBatchSize, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) // return -1; if (cfgAddInt32(pCfg, "maxShellConns", tsMaxShellConns, 10, 50000000, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) + if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != + 0) return -1; if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0) return -1; @@ -546,7 +549,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) + return -1; if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; @@ -685,7 +689,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) + if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != + 0) return -1; if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, @@ -728,7 +733,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -100, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) + if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -100, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) @@ -1669,6 +1674,7 @@ void taosCfgDynamicOptions(const char *option, const char *value) { {"ttlBatchDropNum", &tsTtlBatchDropNum}, {"ttlFlushThreshold", &tsTtlFlushThreshold}, {"ttlPushInterval", &tsTtlPushIntervalSec}, + {"s3BlockSize", &tsS3BlockSize}, {"s3BlockCacheSize", &tsS3BlockCacheSize}, {"s3PageCacheSize", &tsS3PageCacheSize}, {"s3UploadDelaySec", &tsS3UploadDelaySec}, @@ -1692,8 +1698,8 @@ void taosCfgDynamicOptions(const char *option, const char *value) { switch (pItem->dtype) { case CFG_DTYPE_BOOL: { - int32_t flag = atoi(value); - bool *pVar = options[d].optionVar; + int32_t flag = atoi(value); + bool *pVar = options[d].optionVar; uInfo("%s set from %d to %d", optName, *pVar, flag); *pVar = flag; } break; From 1bb10bb8625a9c30ad651ad9f8d1d15d3a792670 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 09:32:13 +0800 Subject: [PATCH 41/56] vnode/cos: check get object block size optionally --- source/dnode/vnode/src/inc/vndCos.h | 2 +- source/dnode/vnode/src/tsdb/tsdbCache.c | 2 +- source/dnode/vnode/src/tsdb/tsdbReaderWriter.c | 2 +- source/dnode/vnode/src/vnd/vnodeCos.c | 15 +++++++++------ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/inc/vndCos.h b/source/dnode/vnode/src/inc/vndCos.h index 8581b039f8..0a055ed32a 100644 --- a/source/dnode/vnode/src/inc/vndCos.h +++ b/source/dnode/vnode/src/inc/vndCos.h @@ -38,7 +38,7 @@ void s3DeleteObjectsByPrefix(const char *prefix); void s3DeleteObjects(const char *object_name[], int nobject); bool s3Exists(const char *object_name); bool s3Get(const char *object_name, const char *path); -int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock); +int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock); void s3EvictCache(const char *path, long object_size); long s3Size(const char *object_name); diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index bf6f0cf4d6..249479d6bd 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -3099,7 +3099,7 @@ static int32_t tsdbCacheLoadBlockS3(STsdbFD *pFD, uint8_t **ppBlock) { } */ int64_t block_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage; - code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, ppBlock); + code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, 0, ppBlock); if (code != TSDB_CODE_SUCCESS) { // taosMemoryFree(pBlock); // code = TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index def9a73d10..ba9507530c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -340,7 +340,7 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage); int64_t pgnoEnd = pgno - 1 + (size - n + szPgCont - 1) / szPgCont; int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; - code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, &pBlock); + code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock); if (code != TSDB_CODE_SUCCESS) { goto _exit; } diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index 9941c53750..b7a13c1664 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -752,7 +752,7 @@ static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void * } } -int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock) { +int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) { int status = 0; int64_t ifModifiedSince = -1, ifNotModifiedSince = -1; const char *ifMatch = 0, *ifNotMatch = 0; @@ -775,7 +775,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, return TAOS_SYSTEM_ERROR(EIO); } - if (cbd.buf_pos != size) { + if (check && cbd.buf_pos != size) { vError("%s: %d(%s)", __func__, cbd.status, cbd.err_msg); return TAOS_SYSTEM_ERROR(EIO); } @@ -1063,7 +1063,8 @@ bool s3Get(const char *object_name, const char *path) { return ret; } -int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, uint8_t **ppBlock) { +int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, bool check, uint8_t **ppBlock) { + (void)check; int32_t code = 0; cos_pool_t *p = NULL; int is_cname = 0; @@ -1255,8 +1256,10 @@ void s3DeleteObjectsByPrefix(const char *prefix) {} void s3DeleteObjects(const char *object_name[], int nobject) {} bool s3Exists(const char *object_name) { return false; } bool s3Get(const char *object_name, const char *path) { return false; } -int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock) { return 0; } -void s3EvictCache(const char *path, long object_size) {} -long s3Size(const char *object_name) { return 0; } +int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) { + return 0; +} +void s3EvictCache(const char *path, long object_size) {} +long s3Size(const char *object_name) { return 0; } #endif From 98a2ca6bfdb6688706839ad9419adc5cb8f37180 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 9 Nov 2023 10:07:24 +0800 Subject: [PATCH 42/56] change chkpid gen way --- source/dnode/mnode/impl/src/mndStream.c | 261 +++++++++++++----------- 1 file changed, 138 insertions(+), 123 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index fd0c349dd2..eaeed579e5 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -37,23 +37,23 @@ typedef struct SNodeEntry { int32_t nodeId; - bool stageUpdated; // the stage has been updated due to the leader/follower change or node reboot. - SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes. - int64_t hbTimestamp; // second + bool stageUpdated; // the stage has been updated due to the leader/follower change or node reboot. + SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes. + int64_t hbTimestamp; // second } SNodeEntry; typedef struct SStreamExecInfo { - SArray *pNodeEntryList; + SArray * pNodeEntryList; int64_t ts; // snapshot ts int64_t activeCheckpoint; // active check point id - SHashObj *pTaskMap; - SArray *pTaskList; + SHashObj * pTaskMap; + SArray * pTaskList; TdThreadMutex lock; } SStreamExecInfo; typedef struct SVgroupChangeInfo { SHashObj *pDBMap; - SArray *pUpdateNodeList; // SArray + SArray * pUpdateNodeList; // SArray } SVgroupChangeInfo; static int32_t mndNodeCheckSentinel = 0; @@ -78,7 +78,7 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in static int32_t mndProcessNodeCheck(SRpcMsg *pReq); static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); static SArray *extractNodeListFromStream(SMnode *pMnode); -static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady); +static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); @@ -91,7 +91,7 @@ static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExe static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); static int32_t doKillActiveCheckpointTrans(SMnode *pMnode); -static int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList); +static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { @@ -193,9 +193,9 @@ STREAM_ENCODE_OVER: SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { terrno = TSDB_CODE_OUT_OF_MEMORY; - SSdbRow *pRow = NULL; + SSdbRow * pRow = NULL; SStreamObj *pStream = NULL; - void *buf = NULL; + void * buf = NULL; int8_t sver = 0; if (sdbGetRawSoftVer(pRaw, &sver) != 0) { @@ -272,7 +272,7 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream } SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName) { - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; SStreamObj *pStream = sdbAcquire(pSdb, SDB_STREAM, streamName); if (pStream == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) { terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; @@ -325,7 +325,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64 return TSDB_CODE_SUCCESS; } - SNode *pAst = NULL; + SNode * pAst = NULL; int32_t code = nodesStringToNode(ast, &pAst); SQueryPlan *pPlan = NULL; @@ -350,7 +350,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64 } static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj, SCMCreateStreamReq *pCreate) { - SNode *pAst = NULL; + SNode * pAst = NULL; SQueryPlan *pPlan = NULL; mInfo("stream:%s to create", pCreate->name); @@ -589,7 +589,7 @@ int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStreamObj *pStream, const char *user) { SStbObj *pStb = NULL; - SDbObj *pDb = NULL; + SDbObj * pDb = NULL; SMCreateStbReq createReq = {0}; tstrncpy(createReq.name, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); @@ -715,10 +715,12 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) } static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - int32_t code = -1; - SStreamObj *pStream = NULL; - SDbObj *pDb = NULL; + int32_t code = -1; + + SMnode * pMnode = pReq->info.node; + SStreamObj *pStream = NULL; + SDbObj * pDb = NULL; + SCMCreateStreamReq createStreamReq = {0}; SStreamObj streamObj = {0}; @@ -761,7 +763,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { int32_t numOfStream = 0; SStreamObj *pStream = NULL; - void *pIter = NULL; + void * pIter = NULL; while (1) { pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream); @@ -858,12 +860,12 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { SName name = {0}; tNameFromString(&name, createStreamReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - //reuse this function for stream + // reuse this function for stream - //TODO + // TODO if (createStreamReq.sql != NULL) { - auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, name.tname, - createStreamReq.sql, strlen(createStreamReq.sql)); + auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, name.tname, createStreamReq.sql, + strlen(createStreamReq.sql)); } _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -877,15 +879,31 @@ _OVER: return code; } +int64_t mndStreamGenChkpId(SMnode *pMnode) { + SStreamObj *pStream = NULL; + void * pIter = NULL; + SSdb * pSdb = pMnode->pSdb; + + int64_t maxChkpId = 0; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) break; + + maxChkpId = MAX(maxChkpId, pStream->checkpointId); + sdbRelease(pSdb, pStream); + } + return maxChkpId + 1; +} + static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { return 0; } SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); - pMsg->checkpointId = taosGetTimestampMs(); + pMsg->checkpointId = mndStreamGenChkpId(pMnode); int32_t size = sizeof(SMStreamDoCheckpointMsg); SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; @@ -919,7 +937,7 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in return -1; } - void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + void * abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); tEncodeStreamCheckpointSourceReq(&encoder, &req); @@ -1042,7 +1060,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream int32_t totLevel = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < totLevel; i++) { - SArray *pLevel = taosArrayGetP(pStream->tasks, i); + SArray * pLevel = taosArrayGetP(pStream->tasks, i); SStreamTask *pTask = taosArrayGetP(pLevel, 0); if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { @@ -1059,7 +1077,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream return -1; } - void *buf; + void * buf; int32_t tlen; if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId, pTask->id.taskId) < 0) { @@ -1070,7 +1088,8 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream STransAction action = {0}; SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); - initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, TSDB_CODE_SYN_PROPOSE_NOT_READY); + initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, + TSDB_CODE_SYN_PROPOSE_NOT_READY); mndReleaseVgroup(pMnode, pVgObj); if (mndTransAppendRedoAction(pTrans, &action) != 0) { @@ -1110,9 +1129,9 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream } static const char *mndGetStreamDB(SMnode *pMnode) { - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; - void *pIter = NULL; + void * pIter = NULL; pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) { @@ -1126,9 +1145,9 @@ static const char *mndGetStreamDB(SMnode *pMnode) { } static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; - void *pIter = NULL; + SMnode * pMnode = pReq->info.node; + SSdb * pSdb = pMnode->pSdb; + void * pIter = NULL; SStreamObj *pStream = NULL; int32_t code = 0; @@ -1149,19 +1168,18 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { return 0; } - for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); if (pNodeEntry->stageUpdated) { mDebug("stream task not ready due to node update detected, checkpoint not issued"); return 0; } } - bool allReady = true; + bool allReady = true; SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); if (!allReady) { - mWarn("not all vnodes are ready, ignore the checkpoint") - taosArrayDestroy(pNodeSnapshot); + mWarn("not all vnodes are ready, ignore the checkpoint") taosArrayDestroy(pNodeSnapshot); return 0; } @@ -1182,15 +1200,15 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { taosThreadMutexLock(&execInfo.lock); for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { - STaskId *p = taosArrayGet(execInfo.pTaskList, i); - STaskStatusEntry* pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); + STaskId * p = taosArrayGet(execInfo.pTaskList, i); + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); if (pEntry == NULL) { continue; } if (pEntry->status != TASK_STATUS__READY) { mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", - pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); + pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); ready = false; break; } @@ -1250,7 +1268,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { } static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; + SMnode * pMnode = pReq->info.node; SStreamObj *pStream = NULL; SMDropStreamReq dropReq = {0}; @@ -1327,7 +1345,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SName name = {0}; tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - //reuse this function for stream + // reuse this function for stream auditRecord(pReq, pMnode->clusterId, "dropStream", name.dbname, name.tname, dropReq.sql, dropReq.sqlLen); @@ -1379,7 +1397,7 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { } int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) { - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; SDbObj *pDb = mndAcquireDb(pMnode, dbName); if (pDb == NULL) { terrno = TSDB_CODE_MND_DB_NOT_SELECTED; @@ -1387,7 +1405,7 @@ int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) } int32_t numOfStreams = 0; - void *pIter = NULL; + void * pIter = NULL; while (1) { SStreamObj *pStream = NULL; pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); @@ -1406,8 +1424,8 @@ int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) } static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; + SMnode * pMnode = pReq->info.node; + SSdb * pSdb = pMnode->pSdb; int32_t numOfRows = 0; SStreamObj *pStream = NULL; @@ -1483,8 +1501,8 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) { } static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; + SMnode * pMnode = pReq->info.node; + SSdb * pSdb = pMnode->pSdb; int32_t numOfRows = 0; SStreamObj *pStream = NULL; @@ -1573,13 +1591,13 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // status char status[20 + VARSTR_HEADER_SIZE] = {0}; - STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - STaskStatusEntry* pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); if (pe == NULL) { continue; } - const char* pStatus = streamTaskGetStatusStr(pe->status); + const char *pStatus = streamTaskGetStatusStr(pe->status); STR_TO_VARSTR(status, pStatus); // status @@ -1591,24 +1609,24 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); // input queue - char vbuf[30] = {0}; - char buf[25] = {0}; - const char* queueInfoStr = "%4.2fMiB (%5.2f%)"; + char vbuf[30] = {0}; + char buf[25] = {0}; + const char *queueInfoStr = "%4.2fMiB (%5.2f%)"; sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); STR_TO_VARSTR(vbuf, buf); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); // output queue -// sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); -// STR_TO_VARSTR(vbuf, buf); + // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); + // STR_TO_VARSTR(vbuf, buf); -// pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); -// colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + // colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - const char* sinkStr = "%.2fMiB"; + const char *sinkStr = "%.2fMiB"; sprintf(buf, sinkStr, pe->sinkDataSize); } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { // offset info @@ -1619,7 +1637,7 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock STR_TO_VARSTR(vbuf, buf); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); numOfRows++; } @@ -1663,7 +1681,7 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) { } int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { - SArray* tasks = pStream->tasks; + SArray *tasks = pStream->tasks; int32_t size = taosArrayGetSize(tasks); for (int32_t i = 0; i < size; i++) { @@ -1700,7 +1718,7 @@ static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, in } static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; + SMnode * pMnode = pReq->info.node; SStreamObj *pStream = NULL; SMPauseStreamReq pauseReq = {0}; @@ -1816,7 +1834,7 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn } static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; + SMnode * pMnode = pReq->info.node; SStreamObj *pStream = NULL; SMResumeStreamReq pauseReq = {0}; @@ -1901,7 +1919,7 @@ static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChang } static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId, - SStreamTaskId* pId, int32_t transId) { + SStreamTaskId *pId, int32_t transId) { SStreamTaskNodeUpdateMsg req = {0}; initNodeUpdateMsg(&req, pInfo, pId, transId); @@ -1924,7 +1942,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha return -1; } - void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + void * abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); tEncodeStreamTaskUpdateMsg(&encoder, &req); @@ -1991,7 +2009,7 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p for (int32_t k = 0; k < numOfTasks; ++k) { SStreamTask *pTask = taosArrayGetP(pLevel, k); - void *pBuf = NULL; + void * pBuf = NULL; int32_t len = 0; streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList); doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, &pTask->id, pTrans->id); @@ -2012,7 +2030,7 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) { const SEp *pEp = GET_ACTIVE_EP(pPrevEpset); - const SEp* p = GET_ACTIVE_EP(pCurrent); + const SEp *p = GET_ACTIVE_EP(pCurrent); if (pEp->port == p->port && strncmp(pEp->fqdn, p->fqdn, TSDB_FQDN_LEN) == 0) { return false; @@ -2066,9 +2084,9 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP return info; } -static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady) { - SSdb *pSdb = pMnode->pSdb; - void *pIter = NULL; +static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { + SSdb * pSdb = pMnode->pSdb; + void * pIter = NULL; SVgObj *pVgroup = NULL; *allReady = true; @@ -2115,8 +2133,8 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange // check all streams that involved this vnode should update the epset info SStreamObj *pStream = NULL; - void *pIter = NULL; - STrans *pTrans = NULL; + void * pIter = NULL; + STrans * pTrans = NULL; while (1) { pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); @@ -2177,9 +2195,9 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange } static SArray *extractNodeListFromStream(SMnode *pMnode) { - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; - void *pIter = NULL; + void * pIter = NULL; SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); while (1) { @@ -2226,9 +2244,9 @@ static SArray *extractNodeListFromStream(SMnode *pMnode) { } static void doExtractTasksFromStream(SMnode *pMnode) { - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; - void *pIter = NULL; + void * pIter = NULL; while (1) { pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); @@ -2263,11 +2281,11 @@ static int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { return TSDB_CODE_SUCCESS; } -static bool taskNodeExists(SArray* pList, int32_t nodeId) { +static bool taskNodeExists(SArray *pList, int32_t nodeId) { size_t num = taosArrayGetSize(pList); - for(int32_t i = 0; i < num; ++i) { - SNodeEntry* pEntry = taosArrayGet(pList, i); + for (int32_t i = 0; i < num; ++i) { + SNodeEntry *pEntry = taosArrayGet(pList, i); if (pEntry->nodeId == nodeId) { return true; } @@ -2277,12 +2295,12 @@ static bool taskNodeExists(SArray* pList, int32_t nodeId) { } int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { - SArray* pRemovedTasks = taosArrayInit(4, sizeof(STaskId)); + SArray *pRemovedTasks = taosArrayInit(4, sizeof(STaskId)); int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList); - for(int32_t i = 0; i < numOfTask; ++i) { - STaskId* pId = taosArrayGet(execInfo.pTaskList, i); - STaskStatusEntry* pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + for (int32_t i = 0; i < numOfTask; ++i) { + STaskId * pId = taosArrayGet(execInfo.pTaskList, i); + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { @@ -2290,21 +2308,21 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { } } - for(int32_t i = 0; i < taosArrayGetSize(pRemovedTasks); ++i) { - STaskId* pId = taosArrayGet(pRemovedTasks, i); + for (int32_t i = 0; i < taosArrayGetSize(pRemovedTasks); ++i) { + STaskId *pId = taosArrayGet(pRemovedTasks, i); doRemoveTasks(&execInfo, pId); } mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemovedTasks), - (int32_t) taosArrayGetSize(execInfo.pTaskList)); + (int32_t)taosArrayGetSize(execInfo.pTaskList)); int32_t size = taosArrayGetSize(pNodeSnapshot); - SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { - SNodeEntry* p = taosArrayGet(execInfo.pNodeEntryList, i); + SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { + SNodeEntry *p = taosArrayGet(execInfo.pNodeEntryList, i); - for(int32_t j = 0; j < size; ++j) { - SNodeEntry* pEntry = taosArrayGet(pNodeSnapshot, j); + for (int32_t j = 0; j < size; ++j) { + SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); if (pEntry->nodeId == p->nodeId) { taosArrayPush(pValidNodeEntryList, p); break; @@ -2315,7 +2333,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); execInfo.pNodeEntryList = pValidNodeEntryList; - mDebug("remain %d valid node entries", (int32_t) taosArrayGetSize(pValidNodeEntryList)); + mDebug("remain %d valid node entries", (int32_t)taosArrayGetSize(pValidNodeEntryList)); taosArrayDestroy(pRemovedTasks); return 0; } @@ -2347,7 +2365,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { return 0; } - bool allVnodeReady = true; + bool allVnodeReady = true; SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVnodeReady); if (!allVnodeReady) { taosArrayDestroy(pNodeSnapshot); @@ -2361,7 +2379,6 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { - // kill current active checkpoint transaction, since the transaction is vnode wide. doKillActiveCheckpointTrans(pMnode); code = mndProcessVgroupChange(pMnode, &changeInfo); @@ -2396,7 +2413,7 @@ typedef struct SMStreamNodeCheckMsg { static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { return 0; } @@ -2420,7 +2437,7 @@ void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { SStreamTask *pTask = taosArrayGetP(pLevel, j); STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); + void * p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); if (p == NULL) { STaskStatusEntry entry = {0}; streamTaskStatusInit(&entry, pTask); @@ -2434,7 +2451,7 @@ void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { } } -void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) { +void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { int32_t level = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < level; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); @@ -2444,12 +2461,12 @@ void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) { SStreamTask *pTask = taosArrayGetP(pLevel, j); STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); + void * p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); if (p != NULL) { taosHashRemove(pExecNode->pTaskMap, &id, sizeof(id)); - for(int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { - STaskId* pId = taosArrayGet(pExecNode->pTaskList, k); + for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { + STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); if (pId->taskId == id.taskId && pId->streamId == id.streamId) { taosArrayRemove(pExecNode->pTaskList, k); mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, @@ -2457,7 +2474,6 @@ void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) { break; } } - } } } @@ -2487,7 +2503,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) { return pTrans; } -int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) { +int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset"); if (pTrans == NULL) { return terrno; @@ -2504,7 +2520,7 @@ int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) { SStreamTask *pTask = taosArrayGetP(pLevel, k); // todo extract method, with pause stream task - SVResetStreamTaskReq* pReq = taosMemoryCalloc(1, sizeof(SVResetStreamTaskReq)); + SVResetStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResetStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq), @@ -2550,9 +2566,9 @@ int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) { int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { int32_t transId = 0; - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; STrans *pTrans = NULL; - void *pIter = NULL; + void * pIter = NULL; while (1) { pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans); @@ -2583,13 +2599,13 @@ int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { return TSDB_CODE_SUCCESS; } -int32_t mndResetFromCheckpoint(SMnode* pMnode) { +int32_t mndResetFromCheckpoint(SMnode *pMnode) { doKillActiveCheckpointTrans(pMnode); // set all tasks status to be normal, refactor later to be stream level, instead of vnode level. - SSdb *pSdb = pMnode->pSdb; + SSdb * pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; - void *pIter = NULL; + void * pIter = NULL; while (1) { pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) { @@ -2608,15 +2624,15 @@ int32_t mndResetFromCheckpoint(SMnode* pMnode) { return 0; } -int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) { +int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { int32_t num = taosArrayGetSize(pNodeList); for (int k = 0; k < num; ++k) { - int32_t* pVgId = taosArrayGet(pNodeList, k); + int32_t *pVgId = taosArrayGet(pNodeList, k); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); for (int i = 0; i < numOfNodes; ++i) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); if (pNodeEntry->nodeId == *pVgId) { mInfo("vgId:%d expired in stream task, needs update nodeEp", *pVgId); @@ -2629,12 +2645,11 @@ int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) { return TSDB_CODE_SUCCESS; } -static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { +static void updateStageInfo(STaskStatusEntry *pTaskEntry, int32_t stage) { int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); - for(int32_t j = 0; j < numOfNodes; ++j) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, j); + for (int32_t j = 0; j < numOfNodes; ++j) { + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, j); if (pNodeEntry->nodeId == pTaskEntry->nodeId) { - mInfo("vgId:%d stage updated from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId); @@ -2646,7 +2661,7 @@ static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { } int32_t mndProcessStreamHb(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; + SMnode * pMnode = pReq->info.node; SStreamHbMsg req = {0}; bool checkpointFailed = false; @@ -2699,15 +2714,15 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { pTaskEntry->status = p->status; if (p->status != TASK_STATUS__READY) { - mDebug("received s-task:0x%"PRIx64" not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status)); + mDebug("received s-task:0x%" PRIx64 " not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status)); } } // current checkpoint is failed, rollback from the checkpoint trans // kill the checkpoint trans and then set all tasks status to be normal if (checkpointFailed && activeCheckpointId != 0) { - bool allReady = true; - SArray* p = mndTakeVgroupSnapshot(pMnode, &allReady); + bool allReady = true; + SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady); taosArrayDestroy(p); if (allReady) { From 5ac66679db18c91006eda9741a3f73d31b5d4cf1 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 10:17:46 +0800 Subject: [PATCH 43/56] config/s3blocksize: move range check from global to mnode --- source/common/src/tglobal.c | 2 +- source/dnode/mnode/impl/src/mndDnode.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ead9a5926b..8bb2fa3ab7 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1674,7 +1674,7 @@ void taosCfgDynamicOptions(const char *option, const char *value) { {"ttlBatchDropNum", &tsTtlBatchDropNum}, {"ttlFlushThreshold", &tsTtlFlushThreshold}, {"ttlPushInterval", &tsTtlPushIntervalSec}, - {"s3BlockSize", &tsS3BlockSize}, + //{"s3BlockSize", &tsS3BlockSize}, {"s3BlockCacheSize", &tsS3BlockCacheSize}, {"s3PageCacheSize", &tsS3PageCacheSize}, {"s3UploadDelaySec", &tsS3UploadDelaySec}, diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 85e4ef0fc2..f4108b52c6 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -1310,6 +1310,22 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { } tFreeSMCfgDnodeReq(&cfgReq); return 0; + } else if (strncasecmp(cfgReq.config, "s3blocksize", 11) == 0) { + int32_t optLen = strlen("s3blocksize"); + int32_t flag = -1; + int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag); + if (code < 0) return code; + + if (flag > 1024 * 1024 || (flag > -1 && flag < 4) || flag < -1) { + mError("dnode:%d, failed to config s3blocksize since value:%d. Valid range: -1 or [4, 1024 * 1024]", + cfgReq.dnodeId, flag); + terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); + return -1; + } + + strcpy(dcfgReq.config, "s3blocksize"); + snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); #endif } else { mndMCfg2DCfg(&cfgReq, &dcfgReq); From 3495efaac7a807d07cdca0ad7a3dc57122f071e7 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 10:49:40 +0800 Subject: [PATCH 44/56] enh: rsma exception process --- source/dnode/vnode/src/inc/sma.h | 1 + source/dnode/vnode/src/sma/smaCommit.c | 5 +++-- source/dnode/vnode/src/sma/smaRollup.c | 27 ++++++++++++++++--------- source/dnode/vnode/src/tsdb/tsdbWrite.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index de6bb23f04..f45050bfec 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -108,6 +108,7 @@ struct SRSmaStat { int64_t refId; // shared by fetch tasks volatile int64_t nBufItems; // number of items in queue buffer SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo) + volatile int32_t execStat; // 0 succeed, other failed volatile int32_t nFetchAll; // active number of fetch all volatile int8_t triggerStat; // shared by fetch tasks volatile int8_t commitStat; // 0 not in committing, 1 in committing diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 92181f054d..92b8c09fbc 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -179,13 +179,14 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { if (!isCommit) goto _exit; + code = atomic_load_32(&pRSmaStat->execStat); + TSDB_CHECK_CODE(code, lino, _exit); + code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); TSDB_CHECK_CODE(code, lino, _exit); smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); - - // all rsma results are written completely STsdb *pTsdb = NULL; if ((pTsdb = VND_RSMA1(pSma->pVnode))) { diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 7296f3d468..92494553d0 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -682,13 +682,14 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma } if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { - code = terrno ? terrno : TSDB_CODE_RSMA_RESULT; + if (terrno == TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE) { + // TODO: reconfigure SSubmitReq2 + } else { + if (terrno == 0) terrno = TSDB_CODE_RSMA_RESULT; + code = terrno; + } tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); - smaError("vgId:%d, %s failed at line %d since %s, suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIi64 - ", ver:%" PRIi64, - SMA_VID(pSma), __func__, lino, tstrerror(code), suid, pItem->level, output ? output->info.id.uid : -1, - output ? output->info.version : -1); TSDB_CHECK_CODE(code, lino, _exit); } @@ -844,10 +845,10 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, atomic_store_64(&pItem->submitReqVer, packData->ver); } - tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); + terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); taosArrayDestroy(pResList); - return TSDB_CODE_SUCCESS; + return terrno ? TSDB_CODE_FAILED : TDB_CODE_SUCCESS; } /** @@ -953,7 +954,12 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); if (!pEnv) { // only applicable when rsma env exists - return TSDB_CODE_SUCCESS; + return TDB_CODE_SUCCESS; + } + + if (0 != (terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { + smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr()); + goto _err; } STbUidStore uidStore = {0}; @@ -985,7 +991,7 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, return TSDB_CODE_SUCCESS; _err: tdUidStoreDestory(&uidStore); - return TSDB_CODE_FAILED; + return terrno; } /** @@ -1417,6 +1423,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { goto _err; } if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, &pResList, NULL) < 0) { + atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); goto _err; } @@ -1448,6 +1455,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA .msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t))}; if (!taosArrayPush(pSubmitArr, &packData)) { + terrno = TSDB_CODE_OUT_OF_MEMORY; tdFreeRSmaSubmitItems(pSubmitArr); goto _err; } @@ -1467,6 +1475,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA } return TSDB_CODE_SUCCESS; _err: + atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); smaError("vgId:%d, batch exec for suid:%" PRIi64 " execType:%d size:%d failed since %s", SMA_VID(pSma), pInfo->suid, type, (int32_t)taosArrayGetSize(pSubmitArr), terrstr()); tdFreeRSmaSubmitItems(pSubmitArr); diff --git a/source/dnode/vnode/src/tsdb/tsdbWrite.c b/source/dnode/vnode/src/tsdb/tsdbWrite.c index 1e6526da48..836fda9903 100644 --- a/source/dnode/vnode/src/tsdb/tsdbWrite.c +++ b/source/dnode/vnode/src/tsdb/tsdbWrite.c @@ -39,7 +39,7 @@ int tsdbInsertData(STsdb *pTsdb, int64_t version, SSubmitReq2 *pMsg, SSubmitRsp2 arrSize = taosArrayGetSize(pMsg->aSubmitTbData); // scan and convert - if (tsdbScanAndConvertSubmitMsg(pTsdb, pMsg) < 0) { + if ((terrno = tsdbScanAndConvertSubmitMsg(pTsdb, pMsg)) < 0) { if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) { tsdbError("vgId:%d, failed to insert data since %s", TD_VID(pTsdb->pVnode), tstrerror(terrno)); } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index b31463ac00..ed86f0c22b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1669,7 +1669,7 @@ _exit: atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1); if (code == 0) { atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1); - tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); + code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); } // clear From 50666987f00526b2815896a4b9b7349c589890e4 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 9 Nov 2023 11:23:17 +0800 Subject: [PATCH 45/56] change chkpid gen way --- source/libs/stream/src/streamSnapshot.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 3de5de9967..5f72129ebd 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -19,7 +19,6 @@ #include "streamBackendRocksdb.h" #include "streamInt.h" #include "tcommon.h" -#include "streamInt.h" enum SBackendFileType { ROCKSDB_OPTIONS_TYPE = 1, @@ -193,8 +192,8 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk taosArrayPush(pFile->pSst, &sst); } } - { - char* buf = taosMemoryCalloc(1, 512); + if (qDebugFlag & DEBUG_TRACE) { + char* buf = taosMemoryCalloc(1, 128 + taosArrayGetSize(pFile->pSst) * 16); sprintf(buf, "[current: %s,", pFile->pCurrent); sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); @@ -344,10 +343,10 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); - if(buf == NULL){ + if (buf == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } - int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); if (nread == -1) { taosMemoryFree(buf); code = TAOS_SYSTEM_ERROR(terrno); @@ -480,8 +479,8 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa } int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { SStreamSnapHandle* handle = &pWriter->handle; - if (qDebugFlag & DEBUG_DEBUG) { - char* buf = (char*)taosMemoryMalloc(1024); + if (qDebugFlag & DEBUG_TRACE) { + char* buf = (char*)taosMemoryMalloc(128 + taosArrayGetSize(handle->pFileList) * 16); int n = sprintf(buf, "["); for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { SBackendFileItem* item = taosArrayGet(handle->pFileList, i); From 349e190120b736830b56b96514ee655b7e1e113b Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 11:23:50 +0800 Subject: [PATCH 46/56] enh: rsma result --- source/dnode/vnode/src/inc/sma.h | 3 +- source/dnode/vnode/src/sma/smaEnv.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 90 +++++++++++--------------- 3 files changed, 39 insertions(+), 56 deletions(-) diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index f45050bfec..29eaa0509a 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -149,6 +149,7 @@ struct SRSmaInfoItem { tmr_h tmrId; void *pStreamState; void *pStreamTask; // SStreamTask + SArray *pResList; }; struct SRSmaInfo { @@ -218,7 +219,7 @@ static FORCE_INLINE void tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat) { int32_t smaPreClose(SSma *pSma); // rsma -void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); +void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback); int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index d47398bdff..dd12f2bca2 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -179,7 +179,7 @@ static void tRSmaInfoHashFreeNode(void *data) { if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 1)) && pItem->level) { taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES); } - tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo, true); + tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo); } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 92494553d0..73a0849ab2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -45,7 +45,7 @@ static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, - int32_t execType, SArray **ppResList, int8_t *streamFlushed); + int32_t execType, int8_t *streamFlushed); static void tdRSmaFetchTrigger(void *param, void *tmrId); static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level); static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables); @@ -74,41 +74,39 @@ static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t l * * @param pSma * @param pInfo - * @param isDeepFree Only stop tmrId and free pTSchema for deep free * @return void* */ -void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) { +void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) { if (pInfo) { - if (isDeepFree) { - for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { - SRSmaInfoItem *pItem = &pInfo->items[i]; + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + SRSmaInfoItem *pItem = &pInfo->items[i]; - if (pItem->tmrId) { - smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, - pInfo->suid, i + 1); - taosTmrStopA(&pItem->tmrId); - } - - if (pItem->pStreamState) { - streamStateClose(pItem->pStreamState, false); - } - - if (pItem->pStreamTask) { - tFreeStreamTask(pItem->pStreamTask); - } - tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); + if (pItem->tmrId) { + smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId, + pInfo->suid, i + 1); + taosTmrStopA(&pItem->tmrId); } - taosMemoryFreeClear(pInfo->pTSchema); + if (pItem->pStreamState) { + streamStateClose(pItem->pStreamState, false); + } - if (pInfo->queue) { - taosCloseQueue(pInfo->queue); - pInfo->queue = NULL; - } - if (pInfo->qall) { - taosFreeQall(pInfo->qall); - pInfo->qall = NULL; + if (pItem->pStreamTask) { + tFreeStreamTask(pItem->pStreamTask); } + taosArrayDestroy(pItem->pResList); + tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1); + } + + taosMemoryFreeClear(pInfo->pTSchema); + + if (pInfo->queue) { + taosCloseQueue(pInfo->queue); + pInfo->queue = NULL; + } + if (pInfo->qall) { + taosFreeQall(pInfo->qall); + pInfo->qall = NULL; } taosMemoryFree(pInfo); @@ -311,6 +309,10 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } + if (!(pItem->pResList = taosArrayInit(1, POINTER_BYTES))) { + return TSDB_CODE_FAILED; + } + if (pItem->fetchResultVer < pItem->submitReqVer) { // fetch the data when reboot pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; @@ -406,7 +408,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con return TSDB_CODE_SUCCESS; _err: - tdFreeRSmaInfo(pSma, pRSmaInfo, true); + tdFreeRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_FAILED; } @@ -623,27 +625,14 @@ _end: } static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, - int32_t execType, SArray **ppResList, int8_t *streamFlushed) { + int32_t execType, int8_t *streamFlushed) { int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; - SArray *pResList = NULL; + SArray *pResList = pItem->pResList; STSchema *pTSchema = pInfo->pTSchema; int64_t suid = pInfo->suid; - if (!(*ppResList)) { - pResList = taosArrayInit(1, POINTER_BYTES); - if (pResList == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - *ppResList = pResList; - } else { - pResList = *ppResList; - } - - taosArrayClear(pResList); - while (1) { uint64_t ts; bool hasMore = false; @@ -812,7 +801,6 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t idx = level - 1; void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); - SArray *pResList = NULL; if (!qTaskInfo) { smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, @@ -845,9 +833,8 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, atomic_store_64(&pItem->submitReqVer, packData->ver); } - terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, &pResList, NULL); + terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, NULL); - taosArrayDestroy(pResList); return terrno ? TSDB_CODE_FAILED : TDB_CODE_SUCCESS; } @@ -1135,7 +1122,6 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t nTaskInfo = 0; SSma *pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; - SArray *pResList = NULL; if (taosHashGetSize(pInfoHash) <= 0) { return TSDB_CODE_SUCCESS; @@ -1178,7 +1164,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) { int8_t streamFlushed = 0; code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo, - STREAM_CHECKPOINT, &pResList, &streamFlushed); + STREAM_CHECKPOINT, &streamFlushed); if (code) { taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); @@ -1265,7 +1251,6 @@ _checkpoint: } } while (0); _exit: - taosArrayDestroy(pResList); if (code) { smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); } @@ -1391,7 +1376,6 @@ static void tdFreeRSmaSubmitItems(SArray *pItems) { */ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; - SArray *pResList = NULL; for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) { SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1); @@ -1422,7 +1406,7 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) { goto _err; } - if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, &pResList, NULL) < 0) { + if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, NULL) < 0) { atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); goto _err; } @@ -1437,10 +1421,8 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) { } _end: - taosArrayDestroy(pResList); return TSDB_CODE_SUCCESS; _err: - taosArrayDestroy(pResList); return TSDB_CODE_FAILED; } From 1a950afdb85197dd23426ebb2b79462e8fefacff Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 12:09:27 +0800 Subject: [PATCH 47/56] enh: test case for rsma snapshot --- source/dnode/vnode/src/sma/smaRollup.c | 10 +++++----- source/libs/stream/src/streamTask.c | 1 - .../script/tsim/sync/vnodesnapshot-rsma-test.sim | 16 +++++++++++----- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 73a0849ab2..252a3ade36 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -944,7 +944,7 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, return TDB_CODE_SUCCESS; } - if (0 != (terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { + if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr()); goto _err; } @@ -1224,9 +1224,9 @@ _checkpoint: } taosWLockLatch(&pMeta->lock); - if (0 != streamMetaSaveTask(pMeta, pTask)) { + if (streamMetaSaveTask(pMeta, pTask)) { taosWUnLockLatch(&pMeta->lock); - code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; + code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY; taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1239,9 +1239,9 @@ _checkpoint: } if (pMeta) { taosWLockLatch(&pMeta->lock); - if (0 != streamMetaCommit(pMeta)) { + if (streamMetaCommit(pMeta)) { taosWUnLockLatch(&pMeta->lock); - code = terrno != 0 ? terrno : TSDB_CODE_OUT_OF_MEMORY; + code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } taosWUnLockLatch(&pMeta->lock); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 59002e456a..a7fb590d1b 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -195,7 +195,6 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; - if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; diff --git a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim index b1e5ed200f..8b1720d213 100644 --- a/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim +++ b/tests/script/tsim/sync/vnodesnapshot-rsma-test.sim @@ -167,9 +167,6 @@ system sh/exec.sh -n dnode4 -s start sleep 3000 - - - print =============== query data of level 1 sql connect sql use db @@ -181,12 +178,21 @@ if $rows != 100 then return -1 endi +print =============== sleep 5s to wait the result +sleep 5000 + print =============== query data of level 2 sql select * from ct1 where ts > now - 10d -print rows of level 2: $rows print $data00 $data01 $data02 +print $data10 $data11 $data12 +if $rows != 100 then + print rows of level 2: $rows +endi print =============== query data of level 3 sql select * from ct1 -print rows of level 3: $rows print $data00 $data01 $data02 +print $data10 $data11 $data12 +if $rows != 100 then + print rows of level 3: $rows +endi \ No newline at end of file From b80770dea8a83ffdc0d0c89ac12e5dcf53d3eea4 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 9 Nov 2023 11:46:45 +0800 Subject: [PATCH 48/56] fix: close vnode in the failed mode properly in vmCloseVnode --- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 7 ++++++- source/dnode/vnode/src/tsdb/tsdbOpen.c | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index d2093ff77c..7a2bd0f847 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -144,6 +144,10 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal) char path[TSDB_FILENAME_LEN] = {0}; bool atExit = true; + if (pVnode->failed) { + ASSERT(pVnode->pImpl == NULL); + goto _closed; + } if (vnodeIsLeader(pVnode->pImpl)) { vnodeProposeCommitOnNeed(pVnode->pImpl, atExit); } @@ -202,6 +206,8 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal) vnodeClose(pVnode->pImpl); pVnode->pImpl = NULL; + +_closed: dInfo("vgId:%d, vnode is closed", pVnode->vgId); if (commitAndRemoveWal) { @@ -386,7 +392,6 @@ static void *vmCloseVnodeInThread(void *param) { for (int32_t v = 0; v < pThread->vnodeNum; ++v) { SVnodeObj *pVnode = pThread->ppVnodes[v]; - if (pVnode->failed) continue; char stepDesc[TSDB_STEP_DESC_LEN] = {0}; snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to close, %d of %d have been closed", pVnode->vgId, diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index c32b2eedd7..a1f864814f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -89,6 +89,8 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee return 0; _err: + tsdbCloseFS(&pTsdb->pFS); + taosThreadMutexDestroy(&pTsdb->mutex); taosMemoryFree(pTsdb); return -1; } From 15b73354f8fb1dd5578d3172e76f1fd2e7d7e8c3 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 9 Nov 2023 13:59:21 +0800 Subject: [PATCH 49/56] change chkpid gen way --- source/dnode/mnode/impl/src/mndStream.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index eaeed579e5..135aab285b 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -715,12 +715,10 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) } static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { - int32_t code = -1; - - SMnode * pMnode = pReq->info.node; - SStreamObj *pStream = NULL; - SDbObj * pDb = NULL; - + SMnode * pMnode = pReq->info.node; + int32_t code = -1; + SStreamObj * pStream = NULL; + SDbObj * pDb = NULL; SCMCreateStreamReq createStreamReq = {0}; SStreamObj streamObj = {0}; @@ -883,13 +881,12 @@ int64_t mndStreamGenChkpId(SMnode *pMnode) { SStreamObj *pStream = NULL; void * pIter = NULL; SSdb * pSdb = pMnode->pSdb; - - int64_t maxChkpId = 0; + int64_t maxChkpId = 0; while (1) { pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) break; - maxChkpId = MAX(maxChkpId, pStream->checkpointId); + maxChkpId = TMAX(maxChkpId, pStream->checkpointId); sdbRelease(pSdb, pStream); } return maxChkpId + 1; From 8b6fc10bbd6313d40fe94bb443135197b16c01ab Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 13:59:39 +0800 Subject: [PATCH 50/56] config/block-size: make range > 0 --- source/common/src/tglobal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 8bb2fa3ab7..eb0059676e 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -733,7 +733,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -100, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) + if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, 1024, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) From 17bddf5ff49128cb30fd0737708990c722444bad Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 14:09:58 +0800 Subject: [PATCH 51/56] cos: use uError instead of vError --- source/common/src/cos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/cos.c b/source/common/src/cos.c index 7d8175637b..0b6b0db885 100644 --- a/source/common/src/cos.c +++ b/source/common/src/cos.c @@ -794,7 +794,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, } if (check && cbd.buf_pos != size) { - vError("%s: %d(%s)", __func__, cbd.status, cbd.err_msg); + uError("%s: %d(%s)", __func__, cbd.status, cbd.err_msg); return TAOS_SYSTEM_ERROR(EIO); } From e88fb845088bdfcfebd4cab04f7251f9f9c51742 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 14:15:01 +0800 Subject: [PATCH 52/56] config/block-size fix --- source/common/src/tglobal.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index eab4c2ea77..b66d811d10 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -128,12 +128,12 @@ char tsSmlAutoChildTableNameDelimiter[TSDB_TABLE_NAME_LEN] = ""; // int32_t tsSmlBatchSize = 10000; // checkpoint backup -char tsSnodeAddress[TSDB_FQDN_LEN] = {0}; +char tsSnodeAddress[TSDB_FQDN_LEN] = {0}; int32_t tsRsyncPort = 873; #ifdef WINDOWS char tsCheckpointBackupDir[PATH_MAX] = "C:\\TDengine\\data\\backup\\checkpoint\\"; #else -char tsCheckpointBackupDir[PATH_MAX] = "/var/lib/taos/backup/checkpoint/"; +char tsCheckpointBackupDir[PATH_MAX] = "/var/lib/taos/backup/checkpoint/"; #endif // tmq @@ -335,7 +335,7 @@ int32_t taosSetS3Cfg(SConfig *pCfg) { } if (tsS3BucketName[0] != '<') { #if defined(USE_COS) || defined(USE_S3) - if(tsDiskCfgNum > 1) tsS3Enabled = true; + if (tsDiskCfgNum > 1) tsS3Enabled = true; tsS3StreamEnabled = true; #endif } @@ -678,7 +678,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "rsyncPort", tsRsyncPort, 1, 65535, CFG_SCOPE_BOTH, CFG_DYN_SERVER) != 0) return -1; if (cfgAddString(pCfg, "snodeAddress", tsSnodeAddress, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; + if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) + return -1; if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -748,7 +749,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, 1024, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) + if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -1, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) From e60f69a7ed314800df0805a42f4c88106c79a4e1 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 9 Nov 2023 14:20:27 +0800 Subject: [PATCH 53/56] fix: code typo --- source/dnode/vnode/src/tsdb/tsdbCommit2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index 46e3aff0d4..22fb3b84ec 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -192,7 +192,7 @@ static int32_t tsdbCommitTombData(SCommitter2 *committer) { committer->ctx->tbid->uid = record->uid; if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) { - code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + code = tsdbIterMergerSkipTableData(committer->tombIterMerger, committer->ctx->tbid); TSDB_CHECK_CODE(code, lino, _exit); continue; } From 386c25a99a71092b8c7dd641e5ba2286415408d9 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 15:24:19 +0800 Subject: [PATCH 54/56] tsdb/retention: remove file when ref's clear --- source/dnode/vnode/src/tsdb/tsdbFile2.c | 13 ++++++++++++- source/dnode/vnode/src/tsdb/tsdbRetention.c | 12 ++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c index 9edb03d35b..bf3357dabb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -14,6 +14,7 @@ */ #include "tsdbFile2.h" +#include "cos.h" // to_json static int32_t head_to_json(const STFile *file, cJSON *json); @@ -44,7 +45,17 @@ static const struct { void remove_file(const char *fname) { int32_t code = taosRemoveFile(fname); if (code) { - tsdbError("file:%s remove failed", fname); + if (tsS3Enabled) { + const char *object_name = taosDirEntryBaseName((char *)fname); + long s3_size = tsS3Enabled ? s3Size(object_name) : 0; + if (!strncmp(fname + strlen(fname) - 5, ".data", 5) && s3_size > 0) { + s3DeleteObjects(&object_name, 1); + } else { + tsdbError("file:%s remove failed", fname); + } + } else { + tsdbError("file:%s remove failed", fname); + } } else { tsdbInfo("file:%s is removed", fname); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 1908f16529..0a41ac3cc8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -13,9 +13,9 @@ * along with this program. If not, see . */ +#include "cos.h" #include "tsdb.h" #include "tsdbFS2.h" -#include "cos.h" #include "vnd.h" typedef struct { @@ -292,15 +292,15 @@ static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) { if (expLevel < 0) { // remove the fileset for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = fset->farr[ftype], 1); ++ftype) { if (fobj == NULL) continue; - + /* int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs); if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && fobj->f->did.level == nlevel - 1) { code = tsdbRemoveFileObjectS3(rtner, fobj); TSDB_CHECK_CODE(code, lino, _exit); - } else { - code = tsdbDoRemoveFileObject(rtner, fobj); - TSDB_CHECK_CODE(code, lino, _exit); - } + } else {*/ + code = tsdbDoRemoveFileObject(rtner, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + //} } SSttLvl *lvl; From a6600ab23a1e3f794deab6a84da6ff31cf5c0052 Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Thu, 9 Nov 2023 14:23:42 +0800 Subject: [PATCH 55/56] correct colid in blockinfo --- include/libs/nodes/plannodes.h | 1 + source/dnode/vnode/src/tsdb/tsdbCacheRead.c | 5 +-- source/libs/executor/src/cachescanoperator.c | 17 ++++++++- source/libs/nodes/src/nodesCodeFuncs.c | 7 ++++ source/libs/nodes/src/nodesMsgFuncs.c | 9 ++++- source/libs/nodes/src/nodesUtilFuncs.c | 1 + source/libs/planner/src/planPhysiCreater.c | 1 + tests/system-test/2-query/last_cache_scan.py | 36 ++++++++++++++++++++ 8 files changed, 73 insertions(+), 4 deletions(-) diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index dbbe1d92dc..4ffcb616dd 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -388,6 +388,7 @@ typedef struct SLastRowScanPhysiNode { SNodeList* pGroupTags; bool groupSort; bool ignoreNull; + SNodeList* pTargets; } SLastRowScanPhysiNode; typedef SLastRowScanPhysiNode STableCountScanPhysiNode; diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index 2909b550d7..b6aa791cf0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -30,10 +30,12 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p if (HASTYPE(pReader->type, CACHESCAN_RETRIEVE_LAST)) { uint64_t ts = 0; SFirstLastRes* p; + col_id_t colId; for (int32_t i = 0; i < pReader->numOfCols; ++i) { SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, dstSlotIds[i]); int32_t slotId = slotIds[i]; SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, i); + colId = pColVal->colVal.cid; p = (SFirstLastRes*)varDataVal(pRes[i]); p->ts = pColVal->ts; @@ -63,8 +65,7 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p if (pCol->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID && pCol->info.type == TSDB_DATA_TYPE_TIMESTAMP) { colDataSetVal(pCol, numOfRows, (const char*)&ts, false); continue; - } - if (pReader->numOfCols == 1 && dstSlotIds[0] != idx) { + } else if (pReader->numOfCols == 1 && idx != dstSlotIds[0] && pCol->info.colId == colId) { if (!p->isNull) { colDataSetVal(pCol, numOfRows, p->buf, false); } else { diff --git a/source/libs/executor/src/cachescanoperator.c b/source/libs/executor/src/cachescanoperator.c index a7b4fe02f6..6d59698855 100644 --- a/source/libs/executor/src/cachescanoperator.c +++ b/source/libs/executor/src/cachescanoperator.c @@ -54,6 +54,19 @@ static int32_t removeRedundantTsCol(SLastRowScanPhysiNode* pScanNode, SColM #define SCAN_ROW_TYPE(_t) ((_t) ? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW) +static void setColIdForCacheReadBlock(SSDataBlock* pBlock, SNodeList* pTargets) { + SNode* pNode; + int32_t idx = 0; + FOREACH(pNode, pTargets) { + if (nodeType(pNode) == QUERY_NODE_COLUMN) { + SColumnNode* pCol = (SColumnNode*)pNode; + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, idx); + pColInfo->info.colId = pCol->colId; + } + idx++; + } +} + SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) { int32_t code = TSDB_CODE_SUCCESS; @@ -114,10 +127,12 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe capacity = TMIN(totalTables, 4096); pInfo->pBufferredRes = createOneDataBlock(pInfo->pRes, false); + setColIdForCacheReadBlock(pInfo->pBufferredRes, pScanNode->pTargets); blockDataEnsureCapacity(pInfo->pBufferredRes, capacity); } else { // by tags pInfo->retrieveType = CACHESCAN_RETRIEVE_TYPE_SINGLE | SCAN_ROW_TYPE(pScanNode->ignoreNull); capacity = 1; // only one row output + setColIdForCacheReadBlock(pInfo->pRes, pScanNode->pTargets); } initResultSizeInfo(&pOperator->resultInfo, capacity); @@ -192,7 +207,7 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) { if (pInfo->indexOfBufferedRes < pInfo->pBufferredRes->info.rows) { for (int32_t i = 0; i < taosArrayGetSize(pInfo->pBufferredRes->pDataBlock); ++i) { - SColumnInfoData* pCol = taosArrayGet(pInfo->pBufferredRes->pDataBlock, i); + SColumnInfoData* pCol = taosArrayGet(pRes->pDataBlock, i); int32_t slotId = pCol->info.slotId; SColumnInfoData* pSrc = taosArrayGet(pInfo->pBufferredRes->pDataBlock, slotId); diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index c97c920a3b..c9b49ee30f 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1773,6 +1773,7 @@ static int32_t jsonToPhysiTagScanNode(const SJson* pJson, void* pObj) { static const char* jkLastRowScanPhysiPlanGroupTags = "GroupTags"; static const char* jkLastRowScanPhysiPlanGroupSort = "GroupSort"; +static const char* jkLastRowScanPhysiPlanTargets = "Targets"; static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) { const SLastRowScanPhysiNode* pNode = (const SLastRowScanPhysiNode*)pObj; @@ -1784,6 +1785,9 @@ static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddBoolToObject(pJson, jkLastRowScanPhysiPlanGroupSort, pNode->groupSort); } + if (TSDB_CODE_SUCCESS == code) { + code = nodeListToJson(pJson, jkLastRowScanPhysiPlanTargets, pNode->pTargets); + } return code; } @@ -1798,6 +1802,9 @@ static int32_t jsonToPhysiLastRowScanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonGetBoolValue(pJson, jkLastRowScanPhysiPlanGroupSort, &pNode->groupSort); } + if (TSDB_CODE_SUCCESS == code) { + code = jsonToNodeList(pJson, jkLastRowScanPhysiPlanTargets, &pNode->pTargets); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index 99100b2a1d..ea59d93d7f 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -2052,7 +2052,8 @@ enum { PHY_LAST_ROW_SCAN_CODE_SCAN = 1, PHY_LAST_ROW_SCAN_CODE_GROUP_TAGS, PHY_LAST_ROW_SCAN_CODE_GROUP_SORT, - PHY_LAST_ROW_SCAN_CODE_IGNULL + PHY_LAST_ROW_SCAN_CODE_IGNULL, + PHY_LAST_ROW_SCAN_CODE_TARGETS }; static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -2068,6 +2069,9 @@ static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeBool(pEncoder, PHY_LAST_ROW_SCAN_CODE_IGNULL, pNode->ignoreNull); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeObj(pEncoder, PHY_LAST_ROW_SCAN_CODE_TARGETS, nodeListToMsg, pNode->pTargets); + } return code; } @@ -2091,6 +2095,9 @@ static int32_t msgToPhysiLastRowScanNode(STlvDecoder* pDecoder, void* pObj) { case PHY_LAST_ROW_SCAN_CODE_IGNULL: code = tlvDecodeBool(pTlv, &pNode->ignoreNull); break; + case PHY_LAST_ROW_SCAN_CODE_TARGETS: + code = msgToNodeListFromTlv(pTlv, (void**)&pNode->pTargets); + break; default: break; } diff --git a/source/libs/nodes/src/nodesUtilFuncs.c b/source/libs/nodes/src/nodesUtilFuncs.c index 4f6d3d95e1..ee22caf574 100644 --- a/source/libs/nodes/src/nodesUtilFuncs.c +++ b/source/libs/nodes/src/nodesUtilFuncs.c @@ -1285,6 +1285,7 @@ void nodesDestroyNode(SNode* pNode) { SLastRowScanPhysiNode* pPhyNode = (SLastRowScanPhysiNode*)pNode; destroyScanPhysiNode((SScanPhysiNode*)pNode); nodesDestroyList(pPhyNode->pGroupTags); + nodesDestroyList(pPhyNode->pTargets); break; } case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index d6799a25a7..5cf3426e6f 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -552,6 +552,7 @@ static int32_t createLastRowScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSu if (NULL == pScan) { return TSDB_CODE_OUT_OF_MEMORY; } + pScan->pTargets = nodesCloneList(pScanLogicNode->node.pTargets); pScan->pGroupTags = nodesCloneList(pScanLogicNode->pGroupTags); if (NULL != pScanLogicNode->pGroupTags && NULL == pScan->pGroupTags) { diff --git a/tests/system-test/2-query/last_cache_scan.py b/tests/system-test/2-query/last_cache_scan.py index e75729f960..0f0936ebab 100644 --- a/tests/system-test/2-query/last_cache_scan.py +++ b/tests/system-test/2-query/last_cache_scan.py @@ -283,6 +283,42 @@ class TDTestCase: tdSql.checkData(0, 3, 1001) tdSql.checkData(0, 4, "2018-11-25 19:30:00.000") + sql_template = 'select %s from meters partition by tbname' + select_items = ["ts, last(c10), c10, ts", "ts, ts, last(c10), c10, tbname", "last(c10), c10, ts"] + has_last_row_scan_res = [1,1,1] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + tdSql.query(sqls[0], queryTimes=1) + tdSql.checkRows(10) + tdSql.checkData(0,0, '2018-11-25 19:30:00.000') + tdSql.checkData(0,1, '2018-11-25 19:30:01.000') + tdSql.checkData(0,2, '2018-11-25 19:30:01.000') + tdSql.checkData(0,3, '2018-11-25 19:30:00.000') + + tdSql.query(sqls[1], queryTimes=1) + tdSql.checkRows(10) + tdSql.checkData(0,0, '2018-11-25 19:30:00.000') + tdSql.checkData(0,1, '2018-11-25 19:30:00.000') + tdSql.checkData(0,2, '2018-11-25 19:30:01.000') + tdSql.checkData(0,3, '2018-11-25 19:30:01.000') + + sql_template = 'select %s from meters partition by t1' + select_items = ["ts, last(c10), c10, ts", "ts, ts, last(c10), c10, t1", "last(c10), c10, ts"] + has_last_row_scan_res = [1,1,1] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + tdSql.query(sqls[0], queryTimes=1) + tdSql.checkRows(5) + tdSql.checkData(0,0, '2018-11-25 19:30:00.000') + tdSql.checkData(0,1, '2018-11-25 19:30:01.000') + tdSql.checkData(0,2, '2018-11-25 19:30:01.000') + tdSql.checkData(0,3, '2018-11-25 19:30:00.000') + + tdSql.query("select ts, last(c10), t1, t2 from meters partition by t1, t2") + tdSql.checkRows(10) + tdSql.checkData(0, 0, '2018-11-25 19:30:00.000') + tdSql.checkData(0, 1, '2018-11-25 19:30:01.000') + def run(self): self.prepareTestEnv() #time.sleep(99999999) From d3cf6a4340b7f919c2a99f15cd9949f3655811cf Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 9 Nov 2023 17:44:24 +0800 Subject: [PATCH 56/56] tsdb/file: new nlevel field for remove --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 7 ++++--- source/dnode/vnode/src/tsdb/tsdbFile2.c | 9 +++++---- source/dnode/vnode/src/tsdb/tsdbFile2.h | 1 + 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 348397272d..02ef75ae86 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -22,7 +22,7 @@ extern int vnodeScheduleTask(int (*execute)(void *), void *arg); extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); -extern void remove_file(const char *fname); +extern void remove_file(const char *fname, bool last_level); #define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT #define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1) @@ -532,7 +532,8 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { if (taosIsDir(file->aname)) continue; if (tsdbFSGetFileObjHashEntry(&fobjHash, file->aname) == NULL) { - remove_file(file->aname); + int32_t nlevel = tfsGetLevel(fs->tsdb->pVnode->pTfs); + remove_file(file->aname, nlevel > 1 && file->did.level == nlevel - 1); } } @@ -1282,4 +1283,4 @@ int32_t tsdbFSEnableBgTask(STFileSystem *fs) { fs->stop = false; taosThreadMutexUnlock(&fs->tsdb->mutex); return 0; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c index bf3357dabb..cc05b8ee18 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -42,10 +42,10 @@ static const struct { [TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json}, }; -void remove_file(const char *fname) { +void remove_file(const char *fname, bool last_level) { int32_t code = taosRemoveFile(fname); if (code) { - if (tsS3Enabled) { + if (tsS3Enabled && last_level) { const char *object_name = taosDirEntryBaseName((char *)fname); long s3_size = tsS3Enabled ? s3Size(object_name) : 0; if (!strncmp(fname + strlen(fname) - 5, ".data", 5) && s3_size > 0) { @@ -235,6 +235,7 @@ int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) { fobj[0]->state = TSDB_FSTATE_LIVE; fobj[0]->ref = 1; tsdbTFileName(pTsdb, f, fobj[0]->fname); + fobj[0]->nlevel = tfsGetLevel(pTsdb->pVnode->pTfs); return 0; } @@ -256,7 +257,7 @@ int32_t tsdbTFileObjUnref(STFileObj *fobj) { tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); if (nRef == 0) { if (fobj->state == TSDB_FSTATE_DEAD) { - remove_file(fobj->fname); + remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1); } taosMemoryFree(fobj); } @@ -272,7 +273,7 @@ int32_t tsdbTFileObjRemove(STFileObj *fobj) { taosThreadMutexUnlock(&fobj->mutex); tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); if (nRef == 0) { - remove_file(fobj->fname); + remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1); taosMemoryFree(fobj); } return 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.h b/source/dnode/vnode/src/tsdb/tsdbFile2.h index 9da198c1f0..b94f7a9fd0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.h @@ -76,6 +76,7 @@ struct STFileObj { STFile f[1]; int32_t state; int32_t ref; + int32_t nlevel; char fname[TSDB_FILENAME_LEN]; };