Merge pull request #14471 from taosdata/feature/TD-11274-3.0
refactor: rsma commit and recovery
This commit is contained in:
commit
a9161a5c7b
|
@ -62,12 +62,10 @@ struct STSmaStat {
|
|||
|
||||
struct SRSmaStat {
|
||||
SSma *pSma;
|
||||
int64_t refId; // shared by persistence/fetch tasks
|
||||
void *tmrHandle; // for persistence task
|
||||
tmr_h tmrId; // for persistence task
|
||||
int32_t tmrSeconds; // for persistence task
|
||||
int8_t triggerStat; // for persistence task
|
||||
int8_t runningStat; // for persistence task
|
||||
int64_t refId; // shared by fetch tasks
|
||||
void *tmrHandle; // shared by fetch tasks
|
||||
int8_t triggerStat; // shared by fetch tasks
|
||||
int8_t runningStat; // for persistence task
|
||||
SHashObj *rsmaInfoHash; // key: stbUid, value: SRSmaInfo;
|
||||
};
|
||||
|
||||
|
@ -82,7 +80,6 @@ struct SSmaStat {
|
|||
#define SMA_TSMA_STAT(s) (&(s)->tsmaStat)
|
||||
#define SMA_RSMA_STAT(s) (&(s)->rsmaStat)
|
||||
#define RSMA_INFO_HASH(r) ((r)->rsmaInfoHash)
|
||||
#define RSMA_TMR_ID(r) ((r)->tmrId)
|
||||
#define RSMA_TMR_HANDLE(r) ((r)->tmrHandle)
|
||||
#define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat)
|
||||
#define RSMA_RUNNING_STAT(r) (&(r)->runningStat)
|
||||
|
@ -185,9 +182,11 @@ static FORCE_INLINE void tdSmaStatSetDropped(STSmaStat *pTStat) {
|
|||
static int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType);
|
||||
void *tdFreeSmaState(SSmaStat *pSmaStat, int8_t smaType);
|
||||
void *tdFreeRSmaInfo(SRSmaInfo *pInfo);
|
||||
int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat);
|
||||
|
||||
int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName);
|
||||
int32_t tdProcessRSmaRestoreImpl(SSma *pSma);
|
||||
|
||||
int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg);
|
||||
int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg);
|
||||
int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days);
|
||||
|
@ -244,8 +243,8 @@ void tdUpdateTFileMagic(STFile *pTFile, void *pCksm);
|
|||
void tdCloseTFile(STFile *pTFile);
|
||||
void tdDestroyTFile(STFile *pTFile);
|
||||
|
||||
void tdGetVndFileName(int32_t vgId, const char *dname, const char *fname, int64_t version, char *outputName);
|
||||
void tdGetVndDirName(int32_t vgId, const char *dname, char *outputName);
|
||||
void tdGetVndFileName(int32_t vgId, const char *pdname, const char *dname, const char *fname, int64_t version, char *outputName);
|
||||
void tdGetVndDirName(int32_t vgId,const char *pdname, const char *dname, bool endWithSep, char *outputName);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -163,8 +163,8 @@ SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pSchema, bool
|
|||
|
||||
// sma
|
||||
int32_t smaOpen(SVnode* pVnode);
|
||||
int32_t smaCloseEnv(SSma* pSma);
|
||||
int32_t smaCloseEx(SSma* pSma);
|
||||
int32_t smaClose(SSma* pSma);
|
||||
int32_t smaBegin(SSma* pSma);
|
||||
int32_t smaPreCommit(SSma* pSma);
|
||||
int32_t smaCommit(SSma* pSma);
|
||||
int32_t smaPostCommit(SSma* pSma);
|
||||
|
|
|
@ -43,13 +43,48 @@ int32_t smaCommit(SSma *pSma) { return tdProcessRSmaCommitImpl(pSma); }
|
|||
*/
|
||||
int32_t smaPostCommit(SSma *pSma) { return tdProcessRSmaPostCommitImpl(pSma); }
|
||||
|
||||
/**
|
||||
* @brief set rsma trigger stat active
|
||||
*
|
||||
* @param pSma
|
||||
* @return int32_t
|
||||
*/
|
||||
int32_t smaBegin(SSma *pSma) {
|
||||
SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma);
|
||||
if (!pSmaEnv) {
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv);
|
||||
SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat);
|
||||
|
||||
int8_t rsmaTriggerStat =
|
||||
atomic_val_compare_exchange_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_PAUSED, TASK_TRIGGER_STAT_ACTIVE);
|
||||
switch (rsmaTriggerStat) {
|
||||
case TASK_TRIGGER_STAT_PAUSED: {
|
||||
smaDebug("vgId:%d rsma trigger stat from paused to active", SMA_VID(pSma));
|
||||
break;
|
||||
}
|
||||
case TASK_TRIGGER_STAT_INIT: {
|
||||
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_ACTIVE);
|
||||
smaDebug("vgId:%d rsma trigger stat from init to active", SMA_VID(pSma));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_ACTIVE);
|
||||
smaWarn("vgId:%d rsma trigger stat %" PRIi8 " is unexpected", SMA_VID(pSma), rsmaTriggerStat);
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief pre-commit for rollup sma.
|
||||
* 1) set trigger stat of rsma timer TASK_TRIGGER_STAT_PAUSED.
|
||||
* 2) perform persist task for qTaskInfo
|
||||
* 3) wait all triggered fetch tasks finished
|
||||
* 4) set trigger stat of rsma timer TASK_TRIGGER_STAT_ACTIVE.
|
||||
* 5) finish
|
||||
*
|
||||
* @param pSma
|
||||
* @return int32_t
|
||||
|
@ -63,10 +98,30 @@ static int32_t tdProcessRSmaPreCommitImpl(SSma *pSma) {
|
|||
SSmaStat *pStat = SMA_ENV_STAT(pSmaEnv);
|
||||
SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pStat);
|
||||
|
||||
// step 1
|
||||
|
||||
// step 1: set persistence task paused
|
||||
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_PAUSED);
|
||||
|
||||
// step 2
|
||||
// step 2: perform persist task for qTaskInfo
|
||||
tdRSmaPersistExecImpl(pRSmaStat);
|
||||
|
||||
// step 3: wait all triggered fetch tasks finished
|
||||
int32_t nLoops = 0;
|
||||
while (1) {
|
||||
if (T_REF_VAL_GET(pStat) == 0) {
|
||||
smaDebug("vgId:%d, rsma fetch tasks all finished", SMA_VID(pSma));
|
||||
break;
|
||||
} else {
|
||||
smaDebug("vgId:%d, rsma fetch tasks not all finished yet", SMA_VID(pSma));
|
||||
}
|
||||
++nLoops;
|
||||
if (nLoops > 1000) {
|
||||
sched_yield();
|
||||
nLoops = 0;
|
||||
}
|
||||
}
|
||||
|
||||
smaDebug("vgId:%d, rsma pre commit succeess", SMA_VID(pSma));
|
||||
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
@ -103,48 +158,68 @@ static int32_t tdProcessRSmaPostCommitImpl(SSma *pSma) {
|
|||
TdDirPtr pDir = NULL;
|
||||
TdDirEntryPtr pDirEntry = NULL;
|
||||
char dir[TSDB_FILENAME_LEN];
|
||||
char bname[TSDB_FILENAME_LEN];
|
||||
const char *pattern = "^v[0-9]+qtaskinfo\\.ver([0-9]+)?$";
|
||||
const char *pattern = "v[0-9]+qtaskinfo\\.ver([0-9]+)?$";
|
||||
regex_t regex;
|
||||
int code = 0;
|
||||
|
||||
tdGetVndDirName(TD_VID(pVnode), VNODE_RSMA_DIR, dir);
|
||||
tdGetVndDirName(TD_VID(pVnode), tfsGetPrimaryPath(pVnode->pTfs), VNODE_RSMA_DIR, true, dir);
|
||||
|
||||
// Resource allocation and init
|
||||
regcomp(®ex, pattern, REG_EXTENDED);
|
||||
|
||||
if ((pDir = taosOpenDir(dir)) == NULL) {
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
smaWarn("rsma post-commit open dir %s failed since %s", dir, terrstr());
|
||||
if ((code = regcomp(®ex, pattern, REG_EXTENDED)) != 0) {
|
||||
char errbuf[128];
|
||||
regerror(code, ®ex, errbuf, sizeof(errbuf));
|
||||
smaWarn("vgId:%d, rsma post commit, regcomp for %s failed since %s", TD_VID(pVnode), dir, errbuf);
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
if ((pDir = taosOpenDir(dir)) == NULL) {
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
smaWarn("vgId:%d, rsma post commit, open dir %s failed since %s", TD_VID(pVnode), dir, terrstr());
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
int32_t dirLen = strlen(dir);
|
||||
char *dirEnd = POINTER_SHIFT(dir, dirLen);
|
||||
regmatch_t regMatch[2];
|
||||
while ((pDirEntry = taosReadDir(pDir)) != NULL) {
|
||||
char *entryName = taosGetDirEntryName(pDirEntry);
|
||||
if (!entryName) {
|
||||
continue;
|
||||
}
|
||||
char *fileName = taosDirEntryBaseName(entryName);
|
||||
int code = regexec(®ex, bname, 2, regMatch, 0);
|
||||
|
||||
code = regexec(®ex, entryName, 2, regMatch, 0);
|
||||
|
||||
if (code == 0) {
|
||||
// match
|
||||
printf("match 0 = %s\n", (char *)POINTER_SHIFT(fileName, regMatch[0].rm_so));
|
||||
printf("match 1 = %s\n", (char *)POINTER_SHIFT(fileName, regMatch[1].rm_so));
|
||||
int64_t version = -1;
|
||||
sscanf((const char *)POINTER_SHIFT(entryName, regMatch[1].rm_so), "%" PRIi64, &version);
|
||||
if ((version < committed) && (version > -1)) {
|
||||
strncpy(dirEnd, entryName, TSDB_FILENAME_LEN - dirLen);
|
||||
if (taosRemoveFile(dir) != 0) {
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
smaWarn("vgId:%d, committed version:%" PRIi64 ", failed to remove %s since %s", TD_VID(pVnode), committed,
|
||||
dir, terrstr());
|
||||
} else {
|
||||
smaDebug("vgId:%d, committed version:%" PRIi64 ", success to remove %s", TD_VID(pVnode), committed, dir);
|
||||
}
|
||||
}
|
||||
} else if (code == REG_NOMATCH) {
|
||||
// not match
|
||||
smaInfo("rsma post-commit not match %s", fileName);
|
||||
smaTrace("vgId:%d, rsma post commit, not match %s", TD_VID(pVnode), entryName);
|
||||
continue;
|
||||
} else {
|
||||
// has other error
|
||||
terrno = TAOS_SYSTEM_ERROR(code);
|
||||
smaWarn("rsma post-commit regexec failed since %s", terrstr());
|
||||
char errbuf[128];
|
||||
regerror(code, ®ex, errbuf, sizeof(errbuf));
|
||||
smaWarn("vgId:%d, rsma post commit, regexec failed since %s", TD_VID(pVnode), errbuf);
|
||||
|
||||
taosCloseDir(&pDir);
|
||||
regfree(®ex);
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
taosCloseDir(&pDir);
|
||||
regfree(®ex);
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -132,6 +132,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS
|
|||
if (smaType == TSDB_SMA_TYPE_ROLLUP) {
|
||||
SRSmaStat *pRSmaStat = (SRSmaStat *)(*pSmaStat);
|
||||
pRSmaStat->pSma = (SSma *)pSma;
|
||||
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT);
|
||||
|
||||
// init smaMgmt
|
||||
smaMgmt.smaRef = taosOpenRef(SMA_MGMT_REF_NUM, tdDestroyRSmaStat);
|
||||
|
@ -192,22 +193,20 @@ static void *tdFreeTSmaStat(STSmaStat *pStat) {
|
|||
static void tdDestroyRSmaStat(void *pRSmaStat) {
|
||||
if (pRSmaStat) {
|
||||
SRSmaStat *pStat = (SRSmaStat *)pRSmaStat;
|
||||
smaDebug("vgId:%d %s:%d destroy rsma stat %p", SMA_VID(pStat->pSma), __func__, __LINE__, pRSmaStat);
|
||||
// step 1: set persistence task cancelled
|
||||
SSma *pSma = pStat->pSma;
|
||||
smaDebug("vgId:%d, destroy rsma stat %p", SMA_VID(pSma), pRSmaStat);
|
||||
// step 1: set rsma trigger stat cancelled
|
||||
atomic_store_8(RSMA_TRIGGER_STAT(pStat), TASK_TRIGGER_STAT_CANCELLED);
|
||||
|
||||
// step 2: stop the persistence timer
|
||||
taosTmrStopA(&RSMA_TMR_ID(pStat));
|
||||
|
||||
// step 3: wait the persistence thread to finish
|
||||
// step 2: wait the persistence thread to finish
|
||||
int32_t nLoops = 0;
|
||||
if (atomic_load_8(RSMA_RUNNING_STAT(pStat)) == 1) {
|
||||
while (1) {
|
||||
if (atomic_load_8(RSMA_TRIGGER_STAT(pStat)) == TASK_TRIGGER_STAT_FINISHED) {
|
||||
smaDebug("rsma, persist task finished already");
|
||||
smaDebug("vgId:%d, rsma persist task finished already", SMA_VID(pSma));
|
||||
break;
|
||||
} else {
|
||||
smaDebug("rsma, persist task not finished yet since rsma stat in %" PRIi8,
|
||||
smaDebug("vgId:%d, rsma persist task not finished yet since rsma stat in %" PRIi8, SMA_VID(pSma),
|
||||
atomic_load_8(RSMA_TRIGGER_STAT(pStat)));
|
||||
}
|
||||
++nLoops;
|
||||
|
@ -218,13 +217,15 @@ static void tdDestroyRSmaStat(void *pRSmaStat) {
|
|||
}
|
||||
}
|
||||
|
||||
// step 4: destroy the rsma info and associated fetch tasks
|
||||
// step 3: destroy the rsma info and associated fetch tasks
|
||||
// TODO: use taosHashSetFreeFp when taosHashSetFreeFp is ready.
|
||||
void *infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), NULL);
|
||||
while (infoHash) {
|
||||
SRSmaInfo *pSmaInfo = *(SRSmaInfo **)infoHash;
|
||||
tdFreeRSmaInfo(pSmaInfo);
|
||||
infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), infoHash);
|
||||
if (taosHashGetSize(RSMA_INFO_HASH(pStat)) > 0) {
|
||||
void *infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), NULL);
|
||||
while (infoHash) {
|
||||
SRSmaInfo *pSmaInfo = *(SRSmaInfo **)infoHash;
|
||||
tdFreeRSmaInfo(pSmaInfo);
|
||||
infoHash = taosHashIterate(RSMA_INFO_HASH(pStat), infoHash);
|
||||
}
|
||||
}
|
||||
taosHashCleanup(RSMA_INFO_HASH(pStat));
|
||||
|
||||
|
@ -232,10 +233,10 @@ static void tdDestroyRSmaStat(void *pRSmaStat) {
|
|||
nLoops = 0;
|
||||
while (1) {
|
||||
if (T_REF_VAL_GET((SSmaStat *)pStat) == 0) {
|
||||
smaDebug("rsma, all fetch task finished already");
|
||||
smaDebug("vgId:%d, rsma fetch tasks all finished", SMA_VID(pSma));
|
||||
break;
|
||||
} else {
|
||||
smaDebug("rsma, fetch tasks not all finished yet");
|
||||
smaDebug("vgId:%d, rsma fetch tasks not all finished yet", SMA_VID(pSma));
|
||||
}
|
||||
++nLoops;
|
||||
if (nLoops > 1000) {
|
||||
|
@ -275,7 +276,7 @@ int32_t tdDestroySmaState(SSmaStat *pSmaStat, int8_t smaType) {
|
|||
} else if (smaType == TSDB_SMA_TYPE_ROLLUP) {
|
||||
SRSmaStat *pRSmaStat = SMA_RSMA_STAT(pSmaStat);
|
||||
if (taosRemoveRef(smaMgmt.smaRef, RSMA_REF_ID(pRSmaStat)) < 0) {
|
||||
smaError("remove refId from smaRef failed, refId:0x%" PRIx64, RSMA_REF_ID(pRSmaStat));
|
||||
smaError("remove refId from rsmaRef:0x%" PRIx64 " failed since %s", RSMA_REF_ID(pRSmaStat), terrstr());
|
||||
}
|
||||
} else {
|
||||
ASSERT(0);
|
||||
|
|
|
@ -135,17 +135,11 @@ _err:
|
|||
return -1;
|
||||
}
|
||||
|
||||
int32_t smaCloseEnv(SSma *pSma) {
|
||||
if (pSma) {
|
||||
SMA_TSMA_ENV(pSma) = tdFreeSmaEnv(SMA_TSMA_ENV(pSma));
|
||||
SMA_RSMA_ENV(pSma) = tdFreeSmaEnv(SMA_RSMA_ENV(pSma));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t smaCloseEx(SSma *pSma) {
|
||||
int32_t smaClose(SSma *pSma) {
|
||||
if (pSma) {
|
||||
taosThreadMutexDestroy(&pSma->mutex);
|
||||
SMA_TSMA_ENV(pSma) = tdFreeSmaEnv(SMA_TSMA_ENV(pSma));
|
||||
SMA_RSMA_ENV(pSma) = tdFreeSmaEnv(SMA_RSMA_ENV(pSma));
|
||||
if SMA_RSMA_TSDB0 (pSma) tsdbClose(&SMA_RSMA_TSDB0(pSma));
|
||||
if SMA_RSMA_TSDB1 (pSma) tsdbClose(&SMA_RSMA_TSDB1(pSma));
|
||||
if SMA_RSMA_TSDB2 (pSma) tsdbClose(&SMA_RSMA_TSDB2(pSma));
|
||||
|
|
|
@ -15,9 +15,8 @@
|
|||
|
||||
#include "sma.h"
|
||||
|
||||
#define RSMA_QTASKINFO_PERSIST_MS 7200000
|
||||
#define RSMA_QTASKINFO_BUFSIZE 32768
|
||||
#define RSMA_QTASKINFO_HEAD_LEN (sizeof(int32_t) + sizeof(int8_t) + sizeof(int64_t)) // len + type + suid
|
||||
#define RSMA_QTASKINFO_BUFSIZE 32768
|
||||
#define RSMA_QTASKINFO_HEAD_LEN (sizeof(int32_t) + sizeof(int8_t) + sizeof(int64_t)) // len + type + suid
|
||||
|
||||
SSmaMgmt smaMgmt = {
|
||||
.smaRef = -1,
|
||||
|
@ -43,9 +42,9 @@ static int32_t tdRSmaQTaskInfoIterNextBlock(SRSmaQTaskInfoIter *pIter, bool *isF
|
|||
static int32_t tdRSmaQTaskInfoRestore(SSma *pSma, SRSmaQTaskInfoIter *pIter);
|
||||
static int32_t tdRSmaQTaskInfoItemRestore(SSma *pSma, const SRSmaQTaskInfoItem *infoItem);
|
||||
|
||||
static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma);
|
||||
static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma);
|
||||
static int32_t tdRSmaRestoreTSDataReload(SSma *pSma);
|
||||
static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables);
|
||||
static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma, int64_t *committed);
|
||||
static int32_t tdRSmaRestoreTSDataReload(SSma *pSma, int64_t committed);
|
||||
|
||||
struct SRSmaInfoItem {
|
||||
SRSmaInfo *pRsmaInfo;
|
||||
|
@ -88,7 +87,7 @@ struct SRSmaQTaskInfoIter {
|
|||
};
|
||||
|
||||
static void tdRSmaQTaskInfoGetFName(int32_t vgId, int64_t version, char *outputName) {
|
||||
tdGetVndFileName(vgId, VNODE_RSMA_DIR, TD_QTASKINFO_FNAME_PREFIX, version, outputName);
|
||||
tdGetVndFileName(vgId, NULL, VNODE_RSMA_DIR, TD_QTASKINFO_FNAME_PREFIX, version, outputName);
|
||||
}
|
||||
|
||||
static FORCE_INLINE int32_t tdRSmaQTaskInfoContLen(int32_t lenWithHead) {
|
||||
|
@ -114,12 +113,14 @@ void *tdFreeRSmaInfo(SRSmaInfo *pInfo) {
|
|||
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
|
||||
SRSmaInfoItem *pItem = &pInfo->items[i];
|
||||
if (pItem->taskInfo) {
|
||||
smaDebug("vgId:%d, stb %" PRIi64 " stop fetch-timer %p level %d", SMA_VID(pSma), pInfo->suid, pItem->tmrId,
|
||||
i + 1);
|
||||
taosTmrStopA(&pItem->tmrId);
|
||||
if (pItem->tmrId) {
|
||||
smaDebug("vgId:%d, table %" PRIi64 " stop fetch timer %p level %d", SMA_VID(pSma), pInfo->suid, pItem->tmrId,
|
||||
i + 1);
|
||||
taosTmrStopA(&pItem->tmrId);
|
||||
}
|
||||
tdFreeTaskHandle(&pItem->taskInfo, SMA_VID(pSma), i + 1);
|
||||
} else {
|
||||
smaDebug("vgId:%d, stb %" PRIi64 " no need to destroy rsma info level %d since empty taskInfo", SMA_VID(pSma),
|
||||
smaDebug("vgId:%d, table %" PRIi64 " no need to destroy rsma info level %d since empty taskInfo", SMA_VID(pSma),
|
||||
pInfo->suid, i + 1);
|
||||
}
|
||||
}
|
||||
|
@ -358,13 +359,7 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con
|
|||
goto _err;
|
||||
}
|
||||
|
||||
smaDebug("vgId:%d, register rsma info succeed for suid:%" PRIi64, SMA_VID(pSma), suid);
|
||||
|
||||
// start the persist timer
|
||||
if (TASK_TRIGGER_STAT_INIT ==
|
||||
atomic_val_compare_exchange_8(RSMA_TRIGGER_STAT(pStat), TASK_TRIGGER_STAT_INIT, TASK_TRIGGER_STAT_ACTIVE)) {
|
||||
taosTmrStart(tdRSmaPersistTrigger, RSMA_QTASKINFO_PERSIST_MS, pStat, RSMA_TMR_HANDLE(pStat));
|
||||
}
|
||||
smaDebug("vgId:%d, register rsma info succeed for table %" PRIi64, SMA_VID(pSma), suid);
|
||||
|
||||
return TSDB_CODE_SUCCESS;
|
||||
_err:
|
||||
|
@ -748,7 +743,7 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, void *pMsg, int32_t inputType) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma) {
|
||||
static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) {
|
||||
SVnode *pVnode = pSma->pVnode;
|
||||
|
||||
SArray *suidList = taosArrayInit(1, sizeof(tb_uid_t));
|
||||
|
@ -758,7 +753,12 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma) {
|
|||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
int32_t arrSize = taosArrayGetSize(suidList);
|
||||
int64_t arrSize = taosArrayGetSize(suidList);
|
||||
|
||||
if (nTables) {
|
||||
*nTables = arrSize;
|
||||
}
|
||||
|
||||
if (arrSize == 0) {
|
||||
taosArrayDestroy(suidList);
|
||||
smaDebug("vgId:%d, no need to restore rsma env since empty stb id list", TD_VID(pVnode));
|
||||
|
@ -767,9 +767,9 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma) {
|
|||
|
||||
SMetaReader mr = {0};
|
||||
metaReaderInit(&mr, SMA_META(pSma), 0);
|
||||
for (int32_t i = 0; i < arrSize; ++i) {
|
||||
for (int64_t i = 0; i < arrSize; ++i) {
|
||||
tb_uid_t suid = *(tb_uid_t *)taosArrayGet(suidList, i);
|
||||
smaDebug("vgId:%d, rsma restore, suid[%d] is %" PRIi64, TD_VID(pVnode), i, suid);
|
||||
smaDebug("vgId:%d, rsma restore, suid is %" PRIi64, TD_VID(pVnode), suid);
|
||||
if (metaGetTableEntryByUid(&mr, suid) < 0) {
|
||||
smaError("vgId:%d, rsma restore, failed to get table meta for %" PRIi64 " since %s", TD_VID(pVnode), suid,
|
||||
terrstr());
|
||||
|
@ -803,7 +803,7 @@ _err:
|
|||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma) {
|
||||
static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma, int64_t *committed) {
|
||||
SVnode *pVnode = pSma->pVnode;
|
||||
STFile tFile = {0};
|
||||
char qTaskInfoFName[TSDB_FILENAME_LEN] = {0};
|
||||
|
@ -814,6 +814,13 @@ static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma) {
|
|||
}
|
||||
|
||||
if (!taosCheckExistFile(TD_TFILE_FULL_NAME(&tFile))) {
|
||||
if (pVnode->state.committed > 0) {
|
||||
smaWarn("vgId:%d, rsma restore for version %" PRIi64 ", not start as %s not exist", TD_VID(pVnode),
|
||||
pVnode->state.committed, TD_TFILE_FULL_NAME(&tFile));
|
||||
} else {
|
||||
smaDebug("vgId:%d, rsma restore for version %" PRIi64 ", no need as %s not exist", TD_VID(pVnode),
|
||||
pVnode->state.committed, TD_TFILE_FULL_NAME(&tFile));
|
||||
}
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -839,9 +846,14 @@ static int32_t tdRSmaRestoreQTaskInfoReload(SSma *pSma) {
|
|||
tdRSmaQTaskInfoIterDestroy(&fIter);
|
||||
tdCloseTFile(&tFile);
|
||||
tdDestroyTFile(&tFile);
|
||||
|
||||
// restored successfully from committed
|
||||
*committed = pVnode->state.committed;
|
||||
|
||||
return TSDB_CODE_SUCCESS;
|
||||
_err:
|
||||
smaError("rsma restore, qtaskinfo reload failed since %s", terrstr());
|
||||
smaError("vgId:%d, rsma restore for version %" PRIi64 ", qtaskinfo reload failed since %s", TD_VID(pVnode),
|
||||
pVnode->state.committed, terrstr());
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
|
@ -849,35 +861,45 @@ _err:
|
|||
* @brief reload ts data from checkpoint
|
||||
*
|
||||
* @param pSma
|
||||
* @param committed restore from committed version
|
||||
* @return int32_t
|
||||
*/
|
||||
static int32_t tdRSmaRestoreTSDataReload(SSma *pSma) {
|
||||
static int32_t tdRSmaRestoreTSDataReload(SSma *pSma, int64_t committed) {
|
||||
// TODO
|
||||
smaDebug("vgId:%d, rsma restore from %" PRIi64 ", ts data reload success", SMA_VID(pSma), committed);
|
||||
return TSDB_CODE_SUCCESS;
|
||||
_err:
|
||||
smaError("rsma restore, ts data reload failed since %s", terrstr());
|
||||
smaError("vgId:%d, rsma restore from %" PRIi64 ", ts data reload failed since %s", SMA_VID(pSma), committed,
|
||||
terrstr());
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
int32_t tdProcessRSmaRestoreImpl(SSma *pSma) {
|
||||
// step 1: iterate all stables to restore the rsma env
|
||||
if (tdRSmaRestoreQTaskInfoInit(pSma) < 0) {
|
||||
int64_t nTables = 0;
|
||||
if (tdRSmaRestoreQTaskInfoInit(pSma, &nTables) < 0) {
|
||||
goto _err;
|
||||
}
|
||||
|
||||
if (nTables <= 0) {
|
||||
smaDebug("vgId:%d, no need to restore rsma task since no tables", SMA_VID(pSma));
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
// step 2: retrieve qtaskinfo items from the persistence file(rsma/qtaskinfo) and restore
|
||||
if (tdRSmaRestoreQTaskInfoReload(pSma) < 0) {
|
||||
int64_t committed = -1;
|
||||
if (tdRSmaRestoreQTaskInfoReload(pSma, &committed) < 0) {
|
||||
goto _err;
|
||||
}
|
||||
|
||||
// step 3: reload ts data from checkpoint
|
||||
if (tdRSmaRestoreTSDataReload(pSma) < 0) {
|
||||
if (tdRSmaRestoreTSDataReload(pSma, committed) < 0) {
|
||||
goto _err;
|
||||
}
|
||||
|
||||
return TSDB_CODE_SUCCESS;
|
||||
_err:
|
||||
smaError("failed to restore rsma task since %s", terrstr());
|
||||
smaError("vgId:%d failed to restore rsma task since %s", SMA_VID(pSma), terrstr());
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
|
@ -1017,7 +1039,8 @@ static int32_t tdRSmaQTaskInfoRestore(SSma *pSma, SRSmaQTaskInfoIter *pIter) {
|
|||
pIter->qBuf = taosDecodeFixedI32(pIter->qBuf, &qTaskInfoLenWithHead);
|
||||
if (qTaskInfoLenWithHead < RSMA_QTASKINFO_HEAD_LEN) {
|
||||
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
|
||||
smaError("restore rsma qtaskinfo file %s failed since %s", TD_TFILE_FULL_NAME(pIter->pTFile), terrstr());
|
||||
smaError("vgId:%d, restore rsma qtaskinfo file %s failed since %s", SMA_VID(pSma),
|
||||
TD_TFILE_FULL_NAME(pIter->pTFile), terrstr());
|
||||
return TSDB_CODE_FAILED;
|
||||
}
|
||||
|
||||
|
@ -1054,13 +1077,17 @@ static int32_t tdRSmaQTaskInfoRestore(SSma *pSma, SRSmaQTaskInfoIter *pIter) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat) {
|
||||
int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat) {
|
||||
SSma *pSma = pRSmaStat->pSma;
|
||||
SVnode *pVnode = pSma->pVnode;
|
||||
int32_t vid = SMA_VID(pSma);
|
||||
int64_t toffset = 0;
|
||||
bool isFileCreated = false;
|
||||
|
||||
if (taosHashGetSize(RSMA_INFO_HASH(pRSmaStat)) <= 0) {
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
void *infoHash = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), NULL);
|
||||
if (!infoHash) {
|
||||
return TSDB_CODE_SUCCESS;
|
||||
|
@ -1099,11 +1126,15 @@ static int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat) {
|
|||
char qTaskInfoFName[TSDB_FILENAME_LEN];
|
||||
tdRSmaQTaskInfoGetFName(vid, pSma->pVnode->state.applied, qTaskInfoFName);
|
||||
if (tdInitTFile(&tFile, tfsGetPrimaryPath(pVnode->pTfs), qTaskInfoFName) < 0) {
|
||||
smaError("vgId:%d, rsma persit, init %s failed since %s", vid, qTaskInfoFName, terrstr());
|
||||
goto _err;
|
||||
}
|
||||
if (tdCreateTFile(&tFile, true, -1) < 0) {
|
||||
smaError("vgId:%d, rsma persit, create %s failed since %s", vid, TD_TFILE_FULL_NAME(&tFile), terrstr());
|
||||
goto _err;
|
||||
}
|
||||
smaDebug("vgId:%d, rsma, table %" PRIi64 " level %d serialize qTaskInfo, file %s created", vid, pRSmaInfo->suid,
|
||||
i + 1, TD_TFILE_FULL_NAME(&tFile));
|
||||
|
||||
isFileCreated = true;
|
||||
}
|
||||
|
@ -1143,6 +1174,7 @@ static int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat) {
|
|||
}
|
||||
return TSDB_CODE_SUCCESS;
|
||||
_err:
|
||||
smaError("vgId:%d, rsma persit failed since %s", vid, terrstr());
|
||||
if (isFileCreated) {
|
||||
tdRemoveTFile(&tFile);
|
||||
tdDestroyTFile(&tFile);
|
||||
|
@ -1238,8 +1270,8 @@ static void tdRSmaPersistTrigger(void *param, void *tmrId) {
|
|||
// start persist task
|
||||
tdRSmaPersistTask(pRSmaStat);
|
||||
|
||||
taosTmrReset(tdRSmaPersistTrigger, RSMA_QTASKINFO_PERSIST_MS, pRSmaStat, pRSmaStat->tmrHandle,
|
||||
&pRSmaStat->tmrId);
|
||||
// taosTmrReset(tdRSmaPersistTrigger, 5000, pRSmaStat, pRSmaStat->tmrHandle,
|
||||
// RSMA_TMR_ID(pRSmaStat));
|
||||
} else {
|
||||
atomic_store_8(RSMA_RUNNING_STAT(pRSmaStat), 0);
|
||||
}
|
||||
|
|
|
@ -140,7 +140,7 @@ int64_t tdAppendTFile(STFile *pTFile, void *buf, int64_t nbyte, int64_t *offset)
|
|||
return -1;
|
||||
}
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
smaDebug("append to file %s, offset:%" PRIi64 " nbyte:%" PRIi64 " fsize:%" PRIi64, TD_TFILE_FULL_NAME(pTFile),
|
||||
toffset, nbyte, toffset + nbyte);
|
||||
#endif
|
||||
|
@ -181,16 +181,43 @@ void tdCloseTFile(STFile *pTFile) {
|
|||
|
||||
void tdDestroyTFile(STFile *pTFile) { taosMemoryFreeClear(TD_TFILE_FULL_NAME(pTFile)); }
|
||||
|
||||
void tdGetVndFileName(int32_t vgId, const char *dname, const char *fname, int64_t version, char *outputName) {
|
||||
void tdGetVndFileName(int32_t vgId, const char *pdname, const char *dname, const char *fname, int64_t version,
|
||||
char *outputName) {
|
||||
if (version < 0) {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode/vnode%d/%s/v%d%s", vgId, dname, vgId, fname);
|
||||
if (pdname) {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "%s%svnode%svnode%d%s%s%sv%d%s", pdname, TD_DIRSEP, TD_DIRSEP, vgId,
|
||||
TD_DIRSEP, dname, TD_DIRSEP, vgId, fname);
|
||||
} else {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode%svnode%d%s%s%sv%d%s", TD_DIRSEP, vgId, TD_DIRSEP, dname, TD_DIRSEP,
|
||||
vgId, fname);
|
||||
}
|
||||
} else {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode/vnode%d/%s/v%d%s%" PRIi64, vgId, dname, vgId, fname, version);
|
||||
if (pdname) {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "%s%svnode%svnode%d%s%s%sv%d%s%" PRIi64, pdname, TD_DIRSEP, TD_DIRSEP,
|
||||
vgId, TD_DIRSEP, dname, TD_DIRSEP, vgId, fname, version);
|
||||
} else {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode%svnode%d%s%s%sv%d%s%" PRIi64, TD_DIRSEP, vgId, TD_DIRSEP, dname,
|
||||
TD_DIRSEP, vgId, fname, version);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void tdGetVndDirName(int32_t vgId, const char *dname, char *outputName) {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode/vnode%d/%s", vgId, dname);
|
||||
void tdGetVndDirName(int32_t vgId, const char *pdname, const char *dname, bool endWithSep, char *outputName) {
|
||||
if (pdname) {
|
||||
if (endWithSep) {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "%s%svnode%svnode%d%s%s%s", pdname, TD_DIRSEP, TD_DIRSEP, vgId, TD_DIRSEP,
|
||||
dname, TD_DIRSEP);
|
||||
} else {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "%s%svnode%svnode%d%s%s", pdname, TD_DIRSEP, TD_DIRSEP, vgId, TD_DIRSEP,
|
||||
dname);
|
||||
}
|
||||
} else {
|
||||
if (endWithSep) {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode%svnode%d%s%s%s", TD_DIRSEP, vgId, TD_DIRSEP, dname, TD_DIRSEP);
|
||||
} else {
|
||||
snprintf(outputName, TSDB_FILENAME_LEN, "vnode%svnode%d%s%s", TD_DIRSEP, vgId, TD_DIRSEP, dname);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t tdInitTFile(STFile *pTFile, const char *dname, const char *fname) {
|
||||
|
@ -215,35 +242,36 @@ int32_t tdInitTFile(STFile *pTFile, const char *dname, const char *fname) {
|
|||
|
||||
int32_t tdCreateTFile(STFile *pTFile, bool updateHeader, int8_t fType) {
|
||||
ASSERT(pTFile->info.fsize == 0 && pTFile->info.magic == TD_FILE_INIT_MAGIC);
|
||||
|
||||
pTFile->pFile = taosOpenFile(TD_TFILE_FULL_NAME(pTFile), TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC);
|
||||
if (pTFile->pFile == NULL) {
|
||||
if (errno == ENOENT) {
|
||||
// Try to create directory recursively
|
||||
if (taosMulMkDir(taosDirName(TD_TFILE_FULL_NAME(pTFile))) != 0) {
|
||||
char *s = strdup(TD_TFILE_FULL_NAME(pTFile));
|
||||
if (taosMulMkDir(taosDirName(s)) != 0) {
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
taosMemoryFree(s);
|
||||
return -1;
|
||||
}
|
||||
taosMemoryFree(s);
|
||||
pTFile->pFile = taosOpenFile(TD_TFILE_FULL_NAME(pTFile), TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC);
|
||||
if (pTFile->pFile == NULL) {
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
return -1;
|
||||
} else {
|
||||
pTFile->pFile = taosOpenFile(TD_TFILE_FULL_NAME(pTFile), TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC);
|
||||
if (pTFile->pFile == NULL) {
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!updateHeader) {
|
||||
return 0;
|
||||
}
|
||||
if (!updateHeader) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pTFile->info.fsize += TD_FILE_HEAD_SIZE;
|
||||
pTFile->info.fver = 0;
|
||||
pTFile->info.fsize += TD_FILE_HEAD_SIZE;
|
||||
pTFile->info.fver = 0;
|
||||
|
||||
if (tdUpdateTFileHeader(pTFile) < 0) {
|
||||
tdCloseTFile(pTFile);
|
||||
tdRemoveTFile(pTFile);
|
||||
return -1;
|
||||
}
|
||||
if (tdUpdateTFileHeader(pTFile) < 0) {
|
||||
tdCloseTFile(pTFile);
|
||||
tdRemoveTFile(pTFile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -69,6 +69,9 @@ int vnodeBegin(SVnode *pVnode) {
|
|||
}
|
||||
}
|
||||
|
||||
// begin sma
|
||||
smaBegin(pVnode->pSma); // TODO: refactor to include the rsma1/rsma2 tsdbBegin() after tsdb_refact branch merged
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -230,8 +233,8 @@ int vnodeCommit(SVnode *pVnode) {
|
|||
}
|
||||
|
||||
// preCommit
|
||||
// TODO
|
||||
|
||||
smaPreCommit(pVnode->pSma);
|
||||
|
||||
// commit each sub-system
|
||||
if (metaCommit(pVnode->pMeta) < 0) {
|
||||
ASSERT(0);
|
||||
|
|
|
@ -152,12 +152,11 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) {
|
|||
return pVnode;
|
||||
|
||||
_err:
|
||||
if (pVnode->pSma) smaCloseEnv(pVnode->pSma);
|
||||
if (pVnode->pQuery) vnodeQueryClose(pVnode);
|
||||
if (pVnode->pTq) tqClose(pVnode->pTq);
|
||||
if (pVnode->pWal) walClose(pVnode->pWal);
|
||||
if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb);
|
||||
if (pVnode->pSma) smaCloseEx(pVnode->pSma);
|
||||
if (pVnode->pSma) smaClose(pVnode->pSma);
|
||||
if (pVnode->pMeta) metaClose(pVnode->pMeta);
|
||||
|
||||
tsem_destroy(&(pVnode->canCommit));
|
||||
|
@ -167,14 +166,13 @@ _err:
|
|||
|
||||
void vnodeClose(SVnode *pVnode) {
|
||||
if (pVnode) {
|
||||
smaCloseEnv(pVnode->pSma);
|
||||
vnodeCommit(pVnode);
|
||||
vnodeSyncClose(pVnode);
|
||||
vnodeQueryClose(pVnode);
|
||||
walClose(pVnode->pWal);
|
||||
tqClose(pVnode->pTq);
|
||||
if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb);
|
||||
smaCloseEx(pVnode->pSma);
|
||||
smaClose(pVnode->pSma);
|
||||
metaClose(pVnode->pMeta);
|
||||
vnodeCloseBufPool(pVnode);
|
||||
// destroy handle
|
||||
|
|
Loading…
Reference in New Issue