Merge branch '3.0' into enh/TD-23769-3.0x

This commit is contained in:
kailixu 2023-10-31 16:15:51 +08:00
commit 012d86d9a8
40 changed files with 1253 additions and 856 deletions

View File

@ -46,6 +46,7 @@ extern "C" {
#define SYNC_HEARTBEAT_SLOW_MS 1500
#define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500
#define SYNC_SNAP_RESEND_MS 1000 * 60
#define SYNC_SNAP_TIMEOUT_MS 1000 * 600
#define SYNC_VND_COMMIT_MIN_MS 3000

View File

@ -657,6 +657,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_PAR_CORRESPONDING_STABLE_ERR TAOS_DEF_ERROR_CODE(0, 0x2618)
#define TSDB_CODE_PAR_INVALID_DB_OPTION TAOS_DEF_ERROR_CODE(0, 0x2619)
#define TSDB_CODE_PAR_INVALID_TABLE_OPTION TAOS_DEF_ERROR_CODE(0, 0x261A)
#define TSDB_CODE_PAR_INTER_VALUE_TOO_BIG TAOS_DEF_ERROR_CODE(0, 0x261B)
#define TSDB_CODE_PAR_GROUPBY_WINDOW_COEXIST TAOS_DEF_ERROR_CODE(0, 0x2624)
#define TSDB_CODE_PAR_AGG_FUNC_NESTING TAOS_DEF_ERROR_CODE(0, 0x2627)
#define TSDB_CODE_PAR_INVALID_STATE_WIN_TYPE TAOS_DEF_ERROR_CODE(0, 0x2628)

View File

@ -303,6 +303,8 @@ typedef enum ELogicConditionType {
#define TSDB_SYNC_APPLYQ_SIZE_LIMIT 512
#define TSDB_SYNC_NEGOTIATION_WIN 512
#define TSDB_SYNC_SNAP_BUFFER_SIZE 2048
#define TSDB_TBNAME_COLUMN_INDEX (-1)
#define TSDB_MULTI_TABLEMETA_MAX_NUM 100000 // maximum batch size allowed to load table meta

View File

@ -104,13 +104,21 @@ uint16_t tsAuditPort = 6043;
bool tsEnableAuditCreateTable = true;
// telem
#ifdef TD_ENTERPRISE
bool tsEnableTelem = false;
#else
bool tsEnableTelem = true;
#endif
int32_t tsTelemInterval = 43200;
char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.tdengine.com";
uint16_t tsTelemPort = 80;
char *tsTelemUri = "/report";
#ifdef TD_ENTERPRISE
bool tsEnableCrashReport = false;
#else
bool tsEnableCrashReport = true;
#endif
char *tsClientCrashReportUri = "/ccrashreport";
char *tsSvrCrashReportUri = "/dcrashreport";

View File

@ -309,7 +309,7 @@ int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STs
void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive);
// tsdbMerge.c ==============================================================================================
int32_t tsdbMerge(void *arg);
int32_t tsdbSchedMerge(STsdb *tsdb, int32_t fid);
// tsdbDiskData ==============================================================================================
int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder);
@ -371,7 +371,7 @@ struct STsdb {
char *path;
SVnode *pVnode;
STsdbKeepCfg keepCfg;
TdThreadRwlock rwLock;
TdThreadMutex mutex;
SMemTable *mem;
SMemTable *imem;
STsdbFS fs; // old
@ -668,7 +668,7 @@ struct SDelFWriter {
};
#include "tarray2.h"
//#include "tsdbFS2.h"
// #include "tsdbFS2.h"
// struct STFileSet;
typedef struct STFileSet STFileSet;
typedef TARRAY2(STFileSet *) TFileSetArray;
@ -873,8 +873,8 @@ int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf);
void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter);
bool tMergeTreeNext(SMergeTree *pMTree);
void tMergeTreePinSttBlock(SMergeTree* pMTree);
void tMergeTreeUnpinSttBlock(SMergeTree* pMTree);
void tMergeTreePinSttBlock(SMergeTree *pMTree);
void tMergeTreeUnpinSttBlock(SMergeTree *pMTree);
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree);
void tMergeTreeClose(SMergeTree *pMTree);

View File

@ -31,8 +31,6 @@ SSmaMgmt smaMgmt = {
typedef struct SRSmaQTaskInfoItem SRSmaQTaskInfoItem;
extern int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now);
static int32_t tdUidStorePut(STbUidStore *pStore, tb_uid_t suid, tb_uid_t *uid);
static void tdUidStoreDestory(STbUidStore *pStore);
static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, bool isAdd);

View File

@ -131,7 +131,7 @@ int32_t tsdbBegin(STsdb *pTsdb) {
TSDB_CHECK_CODE(code, lino, _exit);
// lock
if ((code = taosThreadRwlockWrlock(&pTsdb->rwLock))) {
if ((code = taosThreadMutexLock(&pTsdb->mutex))) {
code = TAOS_SYSTEM_ERROR(code);
TSDB_CHECK_CODE(code, lino, _exit);
}
@ -139,7 +139,7 @@ int32_t tsdbBegin(STsdb *pTsdb) {
pTsdb->mem = pMemTable;
// unlock
if ((code = taosThreadRwlockUnlock(&pTsdb->rwLock))) {
if ((code = taosThreadMutexUnlock(&pTsdb->mutex))) {
code = TAOS_SYSTEM_ERROR(code);
TSDB_CHECK_CODE(code, lino, _exit);
}
@ -152,11 +152,11 @@ _exit:
}
int32_t tsdbPrepareCommit(STsdb *pTsdb) {
taosThreadRwlockWrlock(&pTsdb->rwLock);
taosThreadMutexLock(&pTsdb->mutex);
ASSERT(pTsdb->imem == NULL);
pTsdb->imem = pTsdb->mem;
pTsdb->mem = NULL;
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
return 0;
}
@ -171,9 +171,9 @@ int32_t tsdbCommit(STsdb *pTsdb, SCommitInfo *pInfo) {
// check
if (pMemTable->nRow == 0 && pMemTable->nDel == 0) {
taosThreadRwlockWrlock(&pTsdb->rwLock);
taosThreadMutexLock(&pTsdb->mutex);
pTsdb->imem = NULL;
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
tsdbUnrefMemTable(pMemTable, NULL, true);
goto _exit;
@ -501,6 +501,7 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) {
int32_t lino = 0;
STsdb *pTsdb = pCommitter->pTsdb;
SDFileSet *pRSet = NULL;
// memory
pCommitter->commitFid = tsdbKeyFid(pCommitter->nextKey, pCommitter->minutes, pCommitter->precision);
pCommitter->expLevel = tsdbFidLevel(pCommitter->commitFid, &pCommitter->pTsdb->keepCfg, taosGetTimestampSec());
@ -798,6 +799,7 @@ static int32_t tsdbCommitFileData(SCommitter *pCommitter) {
int32_t lino = 0;
STsdb *pTsdb = pCommitter->pTsdb;
SMemTable *pMemTable = pTsdb->imem;
// commit file data start
code = tsdbCommitFileDataStart(pCommitter);
TSDB_CHECK_CODE(code, lino, _exit);
@ -1650,18 +1652,18 @@ int32_t tsdbFinishCommit(STsdb *pTsdb) {
SMemTable *pMemTable = pTsdb->imem;
// lock
taosThreadRwlockWrlock(&pTsdb->rwLock);
taosThreadMutexLock(&pTsdb->mutex);
code = tsdbFSCommit(pTsdb);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
TSDB_CHECK_CODE(code, lino, _exit);
}
pTsdb->imem = NULL;
// unlock
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
if (pMemTable) {
tsdbUnrefMemTable(pMemTable, NULL, true);
}

View File

@ -367,7 +367,12 @@ static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) {
int32_t lino = 0;
STsdb *tsdb = committer->tsdb;
committer->ctx->fid = tsdbKeyFid(committer->ctx->nextKey, committer->minutes, committer->precision);
int32_t fid = tsdbKeyFid(committer->ctx->nextKey, committer->minutes, committer->precision);
// check if can commit
tsdbFSCheckCommit(tsdb, fid);
committer->ctx->fid = fid;
committer->ctx->expLevel = tsdbFidLevel(committer->ctx->fid, &tsdb->keepCfg, committer->ctx->now);
tsdbFidKeyRange(committer->ctx->fid, committer->minutes, committer->precision, &committer->ctx->minKey,
&committer->ctx->maxKey);
@ -549,11 +554,11 @@ _exit:
}
int32_t tsdbPreCommit(STsdb *tsdb) {
taosThreadRwlockWrlock(&tsdb->rwLock);
taosThreadMutexLock(&tsdb->mutex);
ASSERT(tsdb->imem == NULL);
tsdb->imem = tsdb->mem;
tsdb->mem = NULL;
taosThreadRwlockUnlock(&tsdb->rwLock);
taosThreadMutexUnlock(&tsdb->mutex);
return 0;
}
@ -568,15 +573,13 @@ int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info) {
int64_t nDel = imem->nDel;
if (nRow == 0 && nDel == 0) {
taosThreadRwlockWrlock(&tsdb->rwLock);
taosThreadMutexLock(&tsdb->mutex);
tsdb->imem = NULL;
taosThreadRwlockUnlock(&tsdb->rwLock);
taosThreadMutexUnlock(&tsdb->mutex);
tsdbUnrefMemTable(imem, NULL, true);
} else {
SCommitter2 committer[1];
tsdbFSCheckCommit(tsdb->pFS);
code = tsdbOpenCommitter(tsdb, info, committer);
TSDB_CHECK_CODE(code, lino, _exit);
@ -605,14 +608,14 @@ int32_t tsdbCommitCommit(STsdb *tsdb) {
if (tsdb->imem == NULL) goto _exit;
SMemTable *pMemTable = tsdb->imem;
taosThreadRwlockWrlock(&tsdb->rwLock);
taosThreadMutexLock(&tsdb->mutex);
code = tsdbFSEditCommit(tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&tsdb->rwLock);
taosThreadMutexUnlock(&tsdb->mutex);
TSDB_CHECK_CODE(code, lino, _exit);
}
tsdb->imem = NULL;
taosThreadRwlockUnlock(&tsdb->rwLock);
taosThreadMutexUnlock(&tsdb->mutex);
tsdbUnrefMemTable(pMemTable, NULL, true);
_exit:

View File

@ -55,25 +55,11 @@ static int32_t create_fs(STsdb *pTsdb, STFileSystem **fs) {
TARRAY2_INIT(fs[0]->fSetArr);
TARRAY2_INIT(fs[0]->fSetArrTmp);
// background task queue
taosThreadMutexInit(fs[0]->mutex, NULL);
fs[0]->bgTaskQueue->next = fs[0]->bgTaskQueue;
fs[0]->bgTaskQueue->prev = fs[0]->bgTaskQueue;
taosThreadMutexInit(&fs[0]->commitMutex, NULL);
taosThreadCondInit(&fs[0]->canCommit, NULL);
fs[0]->blockCommit = false;
return 0;
}
static int32_t destroy_fs(STFileSystem **fs) {
if (fs[0] == NULL) return 0;
taosThreadMutexDestroy(&fs[0]->commitMutex);
taosThreadCondDestroy(&fs[0]->canCommit);
taosThreadMutexDestroy(fs[0]->mutex);
ASSERT(fs[0]->bgTaskNum == 0);
TARRAY2_DESTROY(fs[0]->fSetArr, NULL);
TARRAY2_DESTROY(fs[0]->fSetArrTmp, NULL);
@ -264,10 +250,11 @@ static int32_t apply_commit(STFileSystem *fs) {
if (fset1 && fset2) {
if (fset1->fid < fset2->fid) {
// delete fset1
TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove);
tsdbTFileSetRemove(fset1);
i1++;
} else if (fset1->fid > fset2->fid) {
// create new file set with fid of fset2->fid
code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1);
code = tsdbTFileSetInitCopy(fs->tsdb, fset2, &fset1);
if (code) return code;
code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn);
if (code) return code;
@ -282,10 +269,11 @@ static int32_t apply_commit(STFileSystem *fs) {
}
} else if (fset1) {
// delete fset1
TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove);
tsdbTFileSetRemove(fset1);
i1++;
} else {
// create new file set with fid of fset2->fid
code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1);
code = tsdbTFileSetInitCopy(fs->tsdb, fset2, &fset1);
if (code) return code;
code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn);
if (code) return code;
@ -512,7 +500,8 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) {
TARRAY2_FOREACH(lvl->fobjArr, fobj) {
code = tsdbFSDoScanAndFixFile(fs, fobj);
if (code) {
fset->maxVerValid = (fobj->f->minVer <= fobj->f->maxVer) ? TMIN(fset->maxVerValid, fobj->f->minVer - 1) : -1;
fset->maxVerValid =
(fobj->f->minVer <= fobj->f->maxVer) ? TMIN(fset->maxVerValid, fobj->f->minVer - 1) : -1;
corrupt = true;
}
}
@ -592,7 +581,7 @@ static int32_t tsdbFSDupState(STFileSystem *fs) {
const STFileSet *fset1;
TARRAY2_FOREACH(src, fset1) {
STFileSet *fset2;
code = tsdbTFileSetInitDup(fs->tsdb, fset1, &fset2);
code = tsdbTFileSetInitCopy(fs->tsdb, fset1, &fset2);
if (code) return code;
code = TARRAY2_APPEND(dst, fset2);
if (code) return code;
@ -665,12 +654,6 @@ static int32_t close_file_system(STFileSystem *fs) {
return 0;
}
static int32_t apply_edit(STFileSystem *pFS) {
int32_t code = 0;
ASSERTS(0, "TODO: Not implemented yet");
return code;
}
static int32_t fset_cmpr_fn(const struct STFileSet *pSet1, const struct STFileSet *pSet2) {
if (pSet1->fid < pSet2->fid) {
return -1;
@ -710,10 +693,23 @@ static int32_t edit_fs(STFileSystem *fs, const TFileOpArray *opArray) {
TSDB_CHECK_CODE(code, lino, _exit);
}
// remove empty file set
// remove empty empty stt level and empty file set
int32_t i = 0;
while (i < TARRAY2_SIZE(fsetArray)) {
fset = TARRAY2_GET(fsetArray, i);
SSttLvl *lvl;
int32_t j = 0;
while (j < TARRAY2_SIZE(fset->lvlArr)) {
lvl = TARRAY2_GET(fset->lvlArr, j);
if (TARRAY2_SIZE(lvl->fobjArr) == 0) {
TARRAY2_REMOVE(fset->lvlArr, j, tsdbSttLvlClear);
} else {
j++;
}
}
if (tsdbTFileSetIsEmpty(fset)) {
TARRAY2_REMOVE(fsetArray, i, tsdbTFileSetClear);
} else {
@ -753,13 +749,13 @@ _exit:
static void tsdbDoWaitBgTask(STFileSystem *fs, STFSBgTask *task) {
task->numWait++;
taosThreadCondWait(task->done, fs->mutex);
taosThreadCondWait(task->done, &fs->tsdb->mutex);
task->numWait--;
if (task->numWait == 0) {
taosThreadCondDestroy(task->done);
if (task->free) {
task->free(task->arg);
if (task->destroy) {
task->destroy(task->arg);
}
taosMemoryFree(task);
}
@ -770,8 +766,8 @@ static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) {
taosThreadCondBroadcast(task->done);
} else {
taosThreadCondDestroy(task->done);
if (task->free) {
task->free(task->arg);
if (task->destroy) {
task->destroy(task->arg);
}
taosMemoryFree(task);
}
@ -780,23 +776,16 @@ static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) {
int32_t tsdbCloseFS(STFileSystem **fs) {
if (fs[0] == NULL) return 0;
taosThreadMutexLock(fs[0]->mutex);
fs[0]->stop = true;
if (fs[0]->bgTaskRunning) {
tsdbDoWaitBgTask(fs[0], fs[0]->bgTaskRunning);
}
taosThreadMutexUnlock(fs[0]->mutex);
tsdbFSDisableBgTask(fs[0]);
close_file_system(fs[0]);
destroy_fs(fs);
return 0;
}
int64_t tsdbFSAllocEid(STFileSystem *fs) {
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
taosThreadMutexLock(&fs->tsdb->mutex);
int64_t cid = ++fs->neid;
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
taosThreadMutexUnlock(&fs->tsdb->mutex);
return cid;
}
@ -837,27 +826,34 @@ _exit:
return code;
}
static int32_t tsdbFSSetBlockCommit(STFileSystem *fs, bool block) {
taosThreadMutexLock(&fs->commitMutex);
static int32_t tsdbFSSetBlockCommit(STFileSet *fset, bool block) {
if (block) {
fs->blockCommit = true;
fset->blockCommit = true;
} else {
fs->blockCommit = false;
taosThreadCondSignal(&fs->canCommit);
fset->blockCommit = false;
if (fset->numWaitCommit > 0) {
taosThreadCondSignal(&fset->canCommit);
}
}
taosThreadMutexUnlock(&fs->commitMutex);
return 0;
}
int32_t tsdbFSCheckCommit(STFileSystem *fs) {
taosThreadMutexLock(&fs->commitMutex);
while (fs->blockCommit) {
taosThreadCondWait(&fs->canCommit, &fs->commitMutex);
int32_t tsdbFSCheckCommit(STsdb *tsdb, int32_t fid) {
taosThreadMutexLock(&tsdb->mutex);
STFileSet *fset;
tsdbFSGetFSet(tsdb->pFS, fid, &fset);
if (fset) {
while (fset->blockCommit) {
fset->numWaitCommit++;
taosThreadCondWait(&fset->canCommit, &tsdb->mutex);
fset->numWaitCommit--;
}
taosThreadMutexUnlock(&fs->commitMutex);
}
taosThreadMutexUnlock(&tsdb->mutex);
return 0;
}
// IMPORTANT: the caller must hold fs->tsdb->mutex
int32_t tsdbFSEditCommit(STFileSystem *fs) {
int32_t code = 0;
int32_t lino = 0;
@ -867,36 +863,57 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) {
TSDB_CHECK_CODE(code, lino, _exit);
// schedule merge
if (fs->tsdb->pVnode->config.sttTrigger > 1) {
STFileSet *fset;
int32_t sttTrigger = fs->tsdb->pVnode->config.sttTrigger;
bool schedMerge = false;
bool blockCommit = false;
if (sttTrigger > 1) {
STFileSet *fset;
TARRAY2_FOREACH_REVERSE(fs->fSetArr, fset) {
if (TARRAY2_SIZE(fset->lvlArr) == 0) continue;
if (TARRAY2_SIZE(fset->lvlArr) == 0) {
tsdbFSSetBlockCommit(fset, false);
continue;
}
SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr);
if (lvl->level != 0) continue;
if (lvl->level != 0) {
tsdbFSSetBlockCommit(fset, false);
continue;
}
int32_t numFile = TARRAY2_SIZE(lvl->fobjArr);
if (numFile >= sttTrigger) {
schedMerge = true;
}
if (numFile >= sttTrigger * BLOCK_COMMIT_FACTOR) {
blockCommit = true;
}
if (schedMerge && blockCommit) break;
}
if (schedMerge) {
code = tsdbFSScheduleBgTask(fs, TSDB_BG_TASK_MERGER, tsdbMerge, NULL, fs->tsdb, NULL);
// launch merge
code = tsdbSchedMerge(fs->tsdb, fset->fid);
TSDB_CHECK_CODE(code, lino, _exit);
}
tsdbFSSetBlockCommit(fs, blockCommit);
if (numFile >= sttTrigger * BLOCK_COMMIT_FACTOR) {
tsdbFSSetBlockCommit(fset, true);
} else {
tsdbFSSetBlockCommit(fset, false);
}
}
}
// clear empty level and fset
int32_t i = 0;
while (i < TARRAY2_SIZE(fs->fSetArr)) {
STFileSet *fset = TARRAY2_GET(fs->fSetArr, i);
int32_t j = 0;
while (j < TARRAY2_SIZE(fset->lvlArr)) {
SSttLvl *lvl = TARRAY2_GET(fset->lvlArr, j);
if (TARRAY2_SIZE(lvl->fobjArr) == 0) {
TARRAY2_REMOVE(fset->lvlArr, j, tsdbSttLvlClear);
} else {
j++;
}
}
if (tsdbTFileSetIsEmpty(fset) && fset->bgTaskRunning == NULL) {
TARRAY2_REMOVE(fs->fSetArr, i, tsdbTFileSetClear);
} else {
i++;
}
}
_exit:
@ -933,15 +950,15 @@ int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr) {
TARRAY2_INIT(fsetArr[0]);
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
taosThreadMutexLock(&fs->tsdb->mutex);
TARRAY2_FOREACH(fs->fSetArr, fset) {
code = tsdbTFileSetInitDup(fs->tsdb, fset, &fset1);
code = tsdbTFileSetInitCopy(fs->tsdb, fset, &fset1);
if (code) break;
code = TARRAY2_APPEND(fsetArr[0], fset1);
if (code) break;
}
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
taosThreadMutexUnlock(&fs->tsdb->mutex);
if (code) {
TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear);
@ -961,9 +978,9 @@ int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr) {
}
int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) {
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
taosThreadMutexLock(&fs->tsdb->mutex);
int32_t code = tsdbFSCreateRefSnapshotWithoutLock(fs, fsetArr);
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
taosThreadMutexUnlock(&fs->tsdb->mutex);
return code;
}
@ -1017,7 +1034,7 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRange
}
}
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
taosThreadMutexLock(&fs->tsdb->mutex);
TARRAY2_FOREACH(fs->fSetArr, fset) {
int64_t ever = VERSION_MAX;
if (pHash) {
@ -1034,7 +1051,7 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRange
code = TARRAY2_APPEND(fsetArr[0], fset1);
if (code) break;
}
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
taosThreadMutexUnlock(&fs->tsdb->mutex);
_out:
if (code) {
@ -1089,7 +1106,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev
}
}
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
taosThreadMutexLock(&fs->tsdb->mutex);
TARRAY2_FOREACH(fs->fSetArr, fset) {
int64_t sver1 = sver;
int64_t ever1 = ever;
@ -1118,7 +1135,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev
fsr1 = NULL;
}
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
taosThreadMutexUnlock(&fs->tsdb->mutex);
if (code) {
tsdbTSnapRangeClear(&fsr1);
@ -1137,59 +1154,69 @@ _out:
const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"};
static int32_t tsdbFSRunBgTask(void *arg) {
STFileSystem *fs = (STFileSystem *)arg;
STFSBgTask *task = (STFSBgTask *)arg;
STFileSystem *fs = task->fs;
STFileSet *fset;
ASSERT(fs->bgTaskRunning != NULL);
tsdbFSGetFSet(fs, task->fid, &fset);
fs->bgTaskRunning->launchTime = taosGetTimestampMs();
fs->bgTaskRunning->run(fs->bgTaskRunning->arg);
fs->bgTaskRunning->finishTime = taosGetTimestampMs();
ASSERT(fset != NULL && fset->bgTaskRunning == task);
task->launchTime = taosGetTimestampMs();
task->run(task->arg);
task->finishTime = taosGetTimestampMs();
tsdbDebug("vgId:%d bg task:%s task id:%" PRId64 " finished, schedule time:%" PRId64 " launch time:%" PRId64
" finish time:%" PRId64,
TD_VID(fs->tsdb->pVnode), gFSBgTaskName[fs->bgTaskRunning->type], fs->bgTaskRunning->taskid,
fs->bgTaskRunning->scheduleTime, fs->bgTaskRunning->launchTime, fs->bgTaskRunning->finishTime);
TD_VID(fs->tsdb->pVnode), gFSBgTaskName[task->type], task->taskid, task->scheduleTime, task->launchTime,
task->finishTime);
taosThreadMutexLock(fs->mutex);
taosThreadMutexLock(&fs->tsdb->mutex);
// free last
tsdbDoDoneBgTask(fs, fs->bgTaskRunning);
fs->bgTaskRunning = NULL;
tsdbDoDoneBgTask(fs, task);
fset->bgTaskRunning = NULL;
// schedule next
if (fs->bgTaskNum > 0) {
if (fset->bgTaskNum > 0) {
if (fs->stop) {
while (fs->bgTaskNum > 0) {
STFSBgTask *task = fs->bgTaskQueue->next;
task->prev->next = task->next;
task->next->prev = task->prev;
fs->bgTaskNum--;
tsdbDoDoneBgTask(fs, task);
while (fset->bgTaskNum > 0) {
STFSBgTask *nextTask = fset->bgTaskQueue->next;
nextTask->prev->next = nextTask->next;
nextTask->next->prev = nextTask->prev;
fset->bgTaskNum--;
tsdbDoDoneBgTask(fs, nextTask);
}
} else {
// pop task from head
fs->bgTaskRunning = fs->bgTaskQueue->next;
fs->bgTaskRunning->prev->next = fs->bgTaskRunning->next;
fs->bgTaskRunning->next->prev = fs->bgTaskRunning->prev;
fs->bgTaskNum--;
vnodeScheduleTaskEx(1, tsdbFSRunBgTask, arg);
fset->bgTaskRunning = fset->bgTaskQueue->next;
fset->bgTaskRunning->prev->next = fset->bgTaskRunning->next;
fset->bgTaskRunning->next->prev = fset->bgTaskRunning->prev;
fset->bgTaskNum--;
vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fset->bgTaskRunning);
}
}
taosThreadMutexUnlock(fs->mutex);
taosThreadMutexUnlock(&fs->tsdb->mutex);
return 0;
}
static int32_t tsdbFSScheduleBgTaskImpl(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *),
// IMPORTANT: the caller must hold the fs->tsdb->mutex
int32_t tsdbFSScheduleBgTask(STFileSystem *fs, int32_t fid, EFSBgTaskT type, int32_t (*run)(void *),
void (*destroy)(void *), void *arg, int64_t *taskid) {
if (fs->stop) {
if (destroy) {
destroy(arg);
}
return 0; // TODO: use a better error code
return 0;
}
for (STFSBgTask *task = fs->bgTaskQueue->next; task != fs->bgTaskQueue; task = task->next) {
STFileSet *fset;
tsdbFSGetFSet(fs, fid, &fset);
ASSERT(fset != NULL);
for (STFSBgTask *task = fset->bgTaskQueue->next; task != fset->bgTaskQueue; task = task->next) {
if (task->type == type) {
if (destroy) {
destroy(arg);
@ -1203,22 +1230,24 @@ static int32_t tsdbFSScheduleBgTaskImpl(STFileSystem *fs, EFSBgTaskT type, int
if (task == NULL) return TSDB_CODE_OUT_OF_MEMORY;
taosThreadCondInit(task->done, NULL);
task->fs = fs;
task->fid = fid;
task->type = type;
task->run = run;
task->free = destroy;
task->destroy = destroy;
task->arg = arg;
task->scheduleTime = taosGetTimestampMs();
task->taskid = ++fs->taskid;
if (fs->bgTaskRunning == NULL && fs->bgTaskNum == 0) {
if (fset->bgTaskRunning == NULL && fset->bgTaskNum == 0) {
// launch task directly
fs->bgTaskRunning = task;
vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fs);
fset->bgTaskRunning = task;
vnodeScheduleTaskEx(1, tsdbFSRunBgTask, task);
} else {
// add to the queue tail
fs->bgTaskNum++;
task->next = fs->bgTaskQueue;
task->prev = fs->bgTaskQueue->prev;
fset->bgTaskNum++;
task->next = fset->bgTaskQueue;
task->prev = fset->bgTaskQueue->prev;
task->prev->next = task;
task->next->prev = task;
}
@ -1227,68 +1256,30 @@ static int32_t tsdbFSScheduleBgTaskImpl(STFileSystem *fs, EFSBgTaskT type, int
return 0;
}
int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void (*free)(void *), void *arg,
int64_t *taskid) {
taosThreadMutexLock(fs->mutex);
int32_t code = tsdbFSScheduleBgTaskImpl(fs, type, run, free, arg, taskid);
taosThreadMutexUnlock(fs->mutex);
return code;
}
int32_t tsdbFSDisableBgTask(STFileSystem *fs) {
taosThreadMutexLock(&fs->tsdb->mutex);
for (;;) {
fs->stop = true;
bool done = true;
int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid) {
STFSBgTask *task = NULL;
taosThreadMutexLock(fs->mutex);
if (fs->bgTaskRunning && fs->bgTaskRunning->taskid == taskid) {
task = fs->bgTaskRunning;
} else {
for (STFSBgTask *taskt = fs->bgTaskQueue->next; taskt != fs->bgTaskQueue; taskt = taskt->next) {
if (taskt->taskid == taskid) {
task = taskt;
STFileSet *fset;
TARRAY2_FOREACH(fs->fSetArr, fset) {
if (fset->bgTaskRunning) {
tsdbDoWaitBgTask(fs, fset->bgTaskRunning);
done = false;
break;
}
}
}
if (task) {
tsdbDoWaitBgTask(fs, task);
if (done) break;
}
taosThreadMutexUnlock(fs->mutex);
taosThreadMutexUnlock(&fs->tsdb->mutex);
return 0;
}
int32_t tsdbFSWaitAllBgTask(STFileSystem *fs) {
taosThreadMutexLock(fs->mutex);
while (fs->bgTaskRunning) {
taosThreadCondWait(fs->bgTaskRunning->done, fs->mutex);
}
taosThreadMutexUnlock(fs->mutex);
return 0;
}
static int32_t tsdbFSDoDisableBgTask(STFileSystem *fs) {
fs->stop = true;
if (fs->bgTaskRunning) {
tsdbDoWaitBgTask(fs, fs->bgTaskRunning);
}
return 0;
}
int32_t tsdbFSDisableBgTask(STFileSystem *fs) {
taosThreadMutexLock(fs->mutex);
int32_t code = tsdbFSDoDisableBgTask(fs);
taosThreadMutexUnlock(fs->mutex);
return code;
}
int32_t tsdbFSEnableBgTask(STFileSystem *fs) {
taosThreadMutexLock(fs->mutex);
taosThreadMutexLock(&fs->tsdb->mutex);
fs->stop = false;
taosThreadMutexUnlock(fs->mutex);
taosThreadMutexUnlock(&fs->tsdb->mutex);
return 0;
}

View File

@ -22,22 +22,11 @@
extern "C" {
#endif
/* Exposed Handle */
typedef struct STFileSystem STFileSystem;
typedef struct STFSBgTask STFSBgTask;
// typedef TARRAY2(STFileSet *) TFileSetArray;
typedef enum {
TSDB_FEDIT_COMMIT = 1, //
TSDB_FEDIT_MERGE
} EFEditT;
typedef enum {
TSDB_BG_TASK_MERGER = 1,
TSDB_BG_TASK_RETENTION,
TSDB_BG_TASK_COMPACT,
} EFSBgTaskT;
typedef enum {
TSDB_FCURRENT = 1,
TSDB_FCURRENT_C, // for commit
@ -67,37 +56,17 @@ int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT e
int32_t tsdbFSEditCommit(STFileSystem *fs);
int32_t tsdbFSEditAbort(STFileSystem *fs);
// background task
int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void (*free)(void *), void *arg,
int64_t *taskid);
int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid);
int32_t tsdbFSWaitAllBgTask(STFileSystem *fs);
int32_t tsdbFSScheduleBgTask(STFileSystem *fs, int32_t fid, EFSBgTaskT type, int32_t (*run)(void *),
void (*destroy)(void *), void *arg, int64_t *taskid);
int32_t tsdbFSDisableBgTask(STFileSystem *fs);
int32_t tsdbFSEnableBgTask(STFileSystem *fs);
// other
int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset);
int32_t tsdbFSCheckCommit(STFileSystem *fs);
int32_t tsdbFSCheckCommit(STsdb *tsdb, int32_t fid);
// utils
int32_t save_fs(const TFileSetArray *arr, const char *fname);
int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype);
struct STFSBgTask {
EFSBgTaskT type;
int32_t (*run)(void *arg);
void (*free)(void *arg);
void *arg;
TdThreadCond done[1];
int32_t numWait;
int64_t taskid;
int64_t scheduleTime;
int64_t launchTime;
int64_t finishTime;
struct STFSBgTask *prev;
struct STFSBgTask *next;
};
/* Exposed Structs */
struct STFileSystem {
STsdb *tsdb;
@ -109,17 +78,8 @@ struct STFileSystem {
TFileSetArray fSetArrTmp[1];
// background task queue
TdThreadMutex mutex[1];
bool stop;
int64_t taskid;
int32_t bgTaskNum;
STFSBgTask bgTaskQueue[1];
STFSBgTask *bgTaskRunning;
// block commit variables
TdThreadMutex commitMutex;
TdThreadCond canCommit;
bool blockCommit;
};
#ifdef __cplusplus

View File

@ -342,11 +342,6 @@ int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op) {
int32_t idx = TARRAY2_SEARCH_IDX(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ);
ASSERT(idx >= 0);
TARRAY2_REMOVE(lvl->fobjArr, idx, tsdbSttLvlClearFObj);
if (TARRAY2_SIZE(lvl->fobjArr) == 0) {
// TODO: remove the stt level if no file exists anymore
// TARRAY2_REMOVE(&fset->lvlArr, lvl - fset->lvlArr.data, tsdbSttLvlClear);
}
} else {
ASSERT(tsdbIsSameTFile(&op->of, fset->farr[op->of.type]->f));
tsdbTFileObjUnref(fset->farr[op->of.type]);
@ -454,10 +449,22 @@ int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) {
fset[0]->fid = fid;
fset[0]->maxVerValid = VERSION_MAX;
TARRAY2_INIT(fset[0]->lvlArr);
// background task queue
fset[0]->bgTaskNum = 0;
fset[0]->bgTaskQueue->next = fset[0]->bgTaskQueue;
fset[0]->bgTaskQueue->prev = fset[0]->bgTaskQueue;
fset[0]->bgTaskRunning = NULL;
// block commit variables
taosThreadCondInit(&fset[0]->canCommit, NULL);
fset[0]->numWaitCommit = 0;
fset[0]->blockCommit = false;
return 0;
}
int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) {
int32_t tsdbTFileSetInitCopy(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) {
int32_t code = tsdbTFileSetInit(fset1->fid, fset);
if (code) return code;
@ -588,21 +595,23 @@ int32_t tsdbTFileSetClear(STFileSet **fset) {
TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlClear);
taosThreadCondDestroy(&fset[0]->canCommit);
taosMemoryFree(fset[0]);
fset[0] = NULL;
return 0;
}
int32_t tsdbTFileSetRemove(STFileSet **fset) {
int32_t tsdbTFileSetRemove(STFileSet *fset) {
if (fset == NULL) return 0;
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset[0]->farr[ftype] == NULL) continue;
tsdbTFileObjRemove(fset[0]->farr[ftype]);
if (fset->farr[ftype] == NULL) continue;
tsdbTFileObjRemove(fset->farr[ftype]);
}
TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlRemove);
taosMemoryFree(fset[0]);
fset[0] = NULL;
TARRAY2_DESTROY(fset->lvlArr, tsdbSttLvlRemove);
return 0;
}

View File

@ -28,6 +28,8 @@ typedef struct SSttLvl SSttLvl;
typedef TARRAY2(STFileObj *) TFileObjArray;
typedef TARRAY2(SSttLvl *) TSttLvlArray;
typedef TARRAY2(STFileOp) TFileOpArray;
typedef struct STFileSystem STFileSystem;
typedef struct STFSBgTask STFSBgTask;
typedef enum {
TSDB_FOP_NONE = 0,
@ -41,10 +43,10 @@ typedef enum {
// init/clear
int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset);
int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset);
int32_t tsdbTFileSetInitCopy(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset);
int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset);
int32_t tsdbTFileSetClear(STFileSet **fset);
int32_t tsdbTFileSetRemove(STFileSet **fset);
int32_t tsdbTFileSetRemove(STFileSet *fset);
int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_t ever, STFileSet **fset,
TFileOpArray *fopArr);
@ -58,6 +60,7 @@ int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset);
// cmpr
int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2);
// edit
int32_t tsdbSttLvlClear(SSttLvl **lvl);
int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op);
int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset);
// max commit id
@ -70,6 +73,33 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset);
int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl);
int32_t tsdbSttLvlClear(SSttLvl **lvl);
typedef enum {
TSDB_BG_TASK_MERGER = 1,
TSDB_BG_TASK_RETENTION,
TSDB_BG_TASK_COMPACT,
} EFSBgTaskT;
struct STFSBgTask {
STFileSystem *fs;
int32_t fid;
EFSBgTaskT type;
int32_t (*run)(void *arg);
void (*destroy)(void *arg);
void *arg;
TdThreadCond done[1];
int32_t numWait;
int64_t taskid;
int64_t scheduleTime;
int64_t launchTime;
int64_t finishTime;
struct STFSBgTask *prev;
struct STFSBgTask *next;
};
struct STFileOp {
tsdb_fop_t optype;
int32_t fid;
@ -87,6 +117,16 @@ struct STFileSet {
int64_t maxVerValid;
STFileObj *farr[TSDB_FTYPE_MAX]; // file array
TSttLvlArray lvlArr[1]; // level array
// background task queue
int32_t bgTaskNum;
STFSBgTask bgTaskQueue[1];
STFSBgTask *bgTaskRunning;
// block commit variables
TdThreadCond canCommit;
int32_t numWaitCommit;
bool blockCommit;
};
struct STSnapRange {

View File

@ -191,7 +191,7 @@ int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid
pMemTable->nDel++;
pMemTable->minVer = TMIN(pMemTable->minVer, version);
pMemTable->maxVer = TMIN(pMemTable->maxVer, version);
pMemTable->maxVer = TMAX(pMemTable->maxVer, version);
/*
if (TSDB_CACHE_LAST_ROW(pMemTable->pTsdb->pVnode->config) && tsdbKeyCmprFn(&lastKey, &pTbData->maxKey) >= 0) {
tsdbCacheDeleteLastrow(pTsdb->lruCache, pTbData->uid, eKey);

View File

@ -15,11 +15,17 @@
#include "tsdbMerge.h"
#define TSDB_MAX_LEVEL 6 // means max level is 7
#define TSDB_MAX_LEVEL 2 // means max level is 3
typedef struct {
STsdb *tsdb;
TFileSetArray *fsetArr;
int32_t fid;
} SMergeArg;
typedef struct {
STsdb *tsdb;
int32_t fid;
STFileSet *fset;
int32_t sttTrigger;
int32_t maxRow;
@ -313,7 +319,6 @@ static int32_t tsdbMergeFileSetBeginOpenWriter(SMerger *merger) {
if (merger->ctx->fset->farr[ftype]) {
config.files[ftype].exist = true;
config.files[ftype].file = merger->ctx->fset->farr[ftype]->f[0];
} else {
config.files[ftype].exist = false;
}
@ -397,13 +402,13 @@ static int32_t tsdbMergeFileSetEnd(SMerger *merger) {
code = tsdbFSEditBegin(merger->tsdb->pFS, merger->fopArr, TSDB_FEDIT_MERGE);
TSDB_CHECK_CODE(code, lino, _exit);
taosThreadRwlockWrlock(&merger->tsdb->rwLock);
taosThreadMutexLock(&merger->tsdb->mutex);
code = tsdbFSEditCommit(merger->tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&merger->tsdb->rwLock);
taosThreadMutexUnlock(&merger->tsdb->mutex);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockUnlock(&merger->tsdb->rwLock);
taosThreadMutexUnlock(&merger->tsdb->mutex);
_exit:
if (code) {
@ -480,28 +485,19 @@ _exit:
static int32_t tsdbDoMerge(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
SSttLvl *lvl = TARRAY2_FIRST(merger->fset->lvlArr);
STFileSet *fset;
TARRAY2_FOREACH(merger->fsetArr, fset) {
if (TARRAY2_SIZE(fset->lvlArr) == 0) continue;
if (TARRAY2_SIZE(merger->fset->lvlArr) == 0) return 0;
if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < merger->sttTrigger) return 0;
SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr);
if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < merger->sttTrigger) continue;
if (!merger->ctx->opened) {
code = tsdbMergerOpen(merger);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbMergeFileSet(merger, fset);
code = tsdbMergeFileSet(merger, merger->fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (merger->ctx->opened) {
code = tsdbMergerClose(merger);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
@ -512,36 +508,73 @@ _exit:
return code;
}
int32_t tsdbMerge(void *arg) {
int32_t code = 0;
int32_t lino = 0;
STsdb *tsdb = (STsdb *)arg;
static int32_t tsdbMergeGetFSet(SMerger *merger) {
STFileSet *fset;
SMerger merger[1] = {{
.tsdb = tsdb,
.sttTrigger = tsdb->pVnode->config.sttTrigger,
}};
if (merger->sttTrigger <= 1) {
taosThreadMutexLock(&merger->tsdb->mutex);
tsdbFSGetFSet(merger->tsdb->pFS, merger->fid, &fset);
if (fset == NULL) {
taosThreadMutexUnlock(&merger->tsdb->mutex);
return 0;
}
code = tsdbFSCreateCopySnapshot(tsdb->pFS, &merger->fsetArr);
int32_t code = tsdbTFileSetInitCopy(merger->tsdb, fset, &merger->fset);
if (code) {
taosThreadMutexUnlock(&merger->tsdb->mutex);
return code;
}
taosThreadMutexUnlock(&merger->tsdb->mutex);
return 0;
}
static int32_t tsdbMerge(void *arg) {
int32_t code = 0;
int32_t lino = 0;
SMergeArg *mergeArg = (SMergeArg *)arg;
STsdb *tsdb = mergeArg->tsdb;
SMerger merger[1] = {{
.tsdb = tsdb,
.fid = mergeArg->fid,
.sttTrigger = tsdb->pVnode->config.sttTrigger,
}};
if (merger->sttTrigger <= 1) return 0;
// copy snapshot
code = tsdbMergeGetFSet(merger);
TSDB_CHECK_CODE(code, lino, _exit);
if (merger->fset == NULL) return 0;
// do merge
tsdbDebug("vgId:%d merge begin, fid:%d", TD_VID(tsdb->pVnode), merger->fid);
code = tsdbDoMerge(merger);
tsdbDebug("vgId:%d merge done, fid:%d", TD_VID(tsdb->pVnode), mergeArg->fid);
TSDB_CHECK_CODE(code, lino, _exit);
tsdbFSDestroyCopySnapshot(&merger->fsetArr);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
tsdbFatal("vgId:%d, failed to merge stt files since %s. code:%d", TD_VID(tsdb->pVnode), terrstr(), code);
taosMsleep(100);
exit(EXIT_FAILURE);
} else if (merger->ctx->opened) {
tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__);
}
tsdbTFileSetClear(&merger->fset);
return code;
}
int32_t tsdbSchedMerge(STsdb *tsdb, int32_t fid) {
SMergeArg *arg = taosMemoryMalloc(sizeof(*arg));
if (arg == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
arg->tsdb = tsdb;
arg->fid = fid;
int32_t code = tsdbFSScheduleBgTask(tsdb->pFS, fid, TSDB_BG_TASK_MERGER, tsdbMerge, taosMemoryFree, arg, NULL);
if (code) taosMemoryFree(arg);
return code;
}

View File

@ -53,7 +53,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee
snprintf(pTsdb->path, TD_PATH_MAX, "%s%s%s", pVnode->path, TD_DIRSEP, dir);
// taosRealPath(pTsdb->path, NULL, slen);
pTsdb->pVnode = pVnode;
taosThreadRwlockInit(&pTsdb->rwLock, NULL);
taosThreadMutexInit(&pTsdb->mutex, NULL);
if (!pKeepCfg) {
tsdbSetKeepCfg(pTsdb, &pVnode->config.tsdbCfg);
} else {
@ -99,15 +99,14 @@ int tsdbClose(STsdb **pTsdb) {
tsdbDebug("vgId:%d, tsdb is close at %s, days:%d, keep:%d,%d,%d, keepTimeOffset:%d", TD_VID(pdb->pVnode), pdb->path,
pdb->keepCfg.days, pdb->keepCfg.keep0, pdb->keepCfg.keep1, pdb->keepCfg.keep2,
pdb->keepCfg.keepTimeOffset);
taosThreadRwlockWrlock(&(*pTsdb)->rwLock);
taosThreadMutexLock(&(*pTsdb)->mutex);
tsdbMemTableDestroy((*pTsdb)->mem, true);
(*pTsdb)->mem = NULL;
taosThreadRwlockUnlock(&(*pTsdb)->rwLock);
taosThreadRwlockDestroy(&(*pTsdb)->rwLock);
taosThreadMutexUnlock(&(*pTsdb)->mutex);
tsdbCloseFS(&(*pTsdb)->pFS);
tsdbCloseCache(*pTsdb);
taosThreadMutexDestroy(&(*pTsdb)->mutex);
taosMemoryFreeClear(*pTsdb);
}
return 0;

View File

@ -1105,8 +1105,9 @@ static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* p
(pVerRange->maxVer < pBlock->record.maxVer && pVerRange->maxVer >= pBlock->record.minVer);
}
static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo,
int32_t* nextIndex, int32_t order, SBrinRecord* pRecord) {
static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pBlockInfo,
STableBlockScanInfo* pTableBlockScanInfo, int32_t* nextIndex, int32_t order,
SBrinRecord* pRecord) {
bool asc = ASCENDING_TRAVERSE(order);
if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockIdxList) - 1) {
return false;
@ -1119,7 +1120,8 @@ static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlo
int32_t step = asc ? 1 : -1;
// *nextIndex = pBlockInfo->tbBlockIdx + step;
// *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex);
STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step);
STableDataBlockIdx* pTableDataBlockIdx =
taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step);
SFileDataBlockInfo* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex);
memcpy(pRecord, &p->record, sizeof(SBrinRecord));
@ -1145,7 +1147,8 @@ static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlock
return -1;
}
static int32_t setFileBlockActiveInBlockIter(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t index, int32_t step) {
static int32_t setFileBlockActiveInBlockIter(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t index,
int32_t step) {
if (index < 0 || index >= pBlockIter->numOfBlocks) {
return -1;
}
@ -1158,7 +1161,8 @@ static int32_t setFileBlockActiveInBlockIter(STsdbReader* pReader, SDataBlockIte
for (int32_t i = index - 1; i >= pBlockIter->index; --i) {
SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, i);
STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr);
STableBlockScanInfo* pBlockScanInfo =
getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr);
STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx);
pTableDataBlockIdx->globalIndex = i + 1;
@ -1168,13 +1172,13 @@ static int32_t setFileBlockActiveInBlockIter(STsdbReader* pReader, SDataBlockIte
for (int32_t i = index + 1; i <= pBlockIter->index; ++i) {
SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, i);
STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr);
STableBlockScanInfo* pBlockScanInfo =
getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr);
STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx);
pTableDataBlockIdx->globalIndex = i - 1;
taosArraySet(pBlockIter->blockList, i - 1, pBlockInfo);
}
}
taosArraySet(pBlockIter->blockList, pBlockIter->index, &fblock);
@ -1286,7 +1290,8 @@ static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo*
int32_t neighborIndex = 0;
SBrinRecord rec = {0};
bool hasNeighbor = getNeighborBlockOfSameTable(&pReader->status.blockIter, pBlockInfo, pScanInfo, &neighborIndex, pReader->info.order, &rec);
bool hasNeighbor = getNeighborBlockOfSameTable(&pReader->status.blockIter, pBlockInfo, pScanInfo, &neighborIndex,
pReader->info.order, &rec);
// overlap with neighbor
if (hasNeighbor) {
@ -1420,9 +1425,7 @@ static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBloc
}
}
static void doPinSttBlock(SLastBlockReader* pLastBlockReader) {
tMergeTreePinSttBlock(&pLastBlockReader->mergeTree);
}
static void doPinSttBlock(SLastBlockReader* pLastBlockReader) { tMergeTreePinSttBlock(&pLastBlockReader->mergeTree); }
static void doUnpinSttBlock(SLastBlockReader* pLastBlockReader) {
tMergeTreeUnpinSttBlock(&pLastBlockReader->mergeTree);
@ -2219,8 +2222,9 @@ static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanI
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
TSDBROW *pRow = NULL, *piRow = NULL;
int64_t key = (pBlockData->nRow > 0 && (!pDumpInfo->allDumped)) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] :
(ASCENDING_TRAVERSE(pReader->info.order) ? INT64_MAX : INT64_MIN);
int64_t key = (pBlockData->nRow > 0 && (!pDumpInfo->allDumped))
? pBlockData->aTSKEY[pDumpInfo->rowIndex]
: (ASCENDING_TRAVERSE(pReader->info.order) ? INT64_MAX : INT64_MIN);
if (pBlockScanInfo->iter.hasVal) {
pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader);
}
@ -2257,7 +2261,8 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock
*loadNeighbor = false;
SBrinRecord rec = {0};
bool hasNeighbor = getNeighborBlockOfSameTable(&pReader->status.blockIter, pBlockInfo, pBlockScanInfo, &nextIndex, pReader->info.order, &rec);
bool hasNeighbor = getNeighborBlockOfSameTable(&pReader->status.blockIter, pBlockInfo, pBlockScanInfo, &nextIndex,
pReader->info.order, &rec);
if (!hasNeighbor) { // do nothing
return code;
}
@ -4907,12 +4912,12 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs
SVersionRange* pRange = &pReader->info.verRange;
// lock
taosThreadRwlockRdlock(&pTsdb->rwLock);
taosThreadMutexLock(&pTsdb->mutex);
// alloc
STsdbReadSnap* pSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap));
if (pSnap == NULL) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
@ -4922,7 +4927,7 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs
pSnap->pMem = pTsdb->mem;
pSnap->pNode = taosMemoryMalloc(sizeof(*pSnap->pNode));
if (pSnap->pNode == NULL) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
@ -4937,7 +4942,7 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs
pSnap->pIMem = pTsdb->imem;
pSnap->pINode = taosMemoryMalloc(sizeof(*pSnap->pINode));
if (pSnap->pINode == NULL) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
@ -4952,7 +4957,7 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs
code = tsdbFSCreateRefSnapshotWithoutLock(pTsdb->pFS, &pSnap->pfSetArray);
// unlock
taosThreadRwlockUnlock(&pTsdb->rwLock);
taosThreadMutexUnlock(&pTsdb->mutex);
if (code == TSDB_CODE_SUCCESS) {
tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode));
@ -5005,4 +5010,5 @@ void tsdbReaderSetId2(STsdbReader* pReader, const char* idstr) {
pReader->status.fileIter.pLastBlockReader->mergeTree.idStr = pReader->idStr;
}
void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/ }
void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/
}

View File

@ -25,11 +25,6 @@ typedef struct {
TFileSetArray *fsetArr;
TFileOpArray fopArr[1];
struct {
int32_t fsetArrIdx;
STFileSet *fset;
} ctx[1];
} SRTNer;
static int32_t tsdbDoRemoveFileObject(SRTNer *rtner, const STFileObj *fobj) {
@ -227,8 +222,8 @@ _exit:
typedef struct {
STsdb *tsdb;
int32_t sync;
int64_t now;
int32_t fid;
} SRtnArg;
static int32_t tsdbDoRetentionBegin(SRtnArg *arg, SRTNer *rtner) {
@ -263,15 +258,15 @@ static int32_t tsdbDoRetentionEnd(SRTNer *rtner) {
code = tsdbFSEditBegin(rtner->tsdb->pFS, rtner->fopArr, TSDB_FEDIT_MERGE);
TSDB_CHECK_CODE(code, lino, _exit);
taosThreadRwlockWrlock(&rtner->tsdb->rwLock);
taosThreadMutexLock(&rtner->tsdb->mutex);
code = tsdbFSEditCommit(rtner->tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&rtner->tsdb->rwLock);
taosThreadMutexUnlock(&rtner->tsdb->mutex);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockUnlock(&rtner->tsdb->rwLock);
taosThreadMutexUnlock(&rtner->tsdb->mutex);
TARRAY2_DESTROY(rtner->fopArr, NULL);
@ -285,22 +280,14 @@ _exit:
return code;
}
static int32_t tsdbDoRetention2(void *arg) {
static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
SRTNer rtner[1] = {0};
STFileObj *fobj = NULL;
int32_t expLevel = tsdbFidLevel(fset->fid, &rtner->tsdb->keepCfg, rtner->now);
code = tsdbDoRetentionBegin(arg, rtner);
TSDB_CHECK_CODE(code, lino, _exit);
for (rtner->ctx->fsetArrIdx = 0; rtner->ctx->fsetArrIdx < TARRAY2_SIZE(rtner->fsetArr); rtner->ctx->fsetArrIdx++) {
rtner->ctx->fset = TARRAY2_GET(rtner->fsetArr, rtner->ctx->fsetArrIdx);
STFileObj *fobj;
int32_t expLevel = tsdbFidLevel(rtner->ctx->fset->fid, &rtner->tsdb->keepCfg, rtner->now);
if (expLevel < 0) { // remove the file set
for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) {
if (expLevel < 0) { // remove the fileset
for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue;
int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs);
@ -314,15 +301,15 @@ static int32_t tsdbDoRetention2(void *arg) {
}
SSttLvl *lvl;
TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) {
TARRAY2_FOREACH(fset->lvlArr, lvl) {
TARRAY2_FOREACH(lvl->fobjArr, fobj) {
code = tsdbDoRemoveFileObject(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
} else if (expLevel == 0) {
continue;
} else {
} else if (expLevel == 0) { // only migrate to upper level
return 0;
} else { // migrate
SDiskID did;
if (tfsAllocDisk(rtner->tsdb->pVnode->pTfs, expLevel, &did) < 0) {
@ -332,7 +319,7 @@ static int32_t tsdbDoRetention2(void *arg) {
tfsMkdirRecurAt(rtner->tsdb->pVnode->pTfs, rtner->tsdb->path, did);
// data
for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) {
for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue;
if (fobj->f->did.level == did.level) continue;
@ -360,7 +347,7 @@ static int32_t tsdbDoRetention2(void *arg) {
// stt
SSttLvl *lvl;
TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) {
TARRAY2_FOREACH(fset->lvlArr, lvl) {
TARRAY2_FOREACH(lvl->fobjArr, fobj) {
if (fobj->f->did.level == did.level) continue;
@ -369,10 +356,6 @@ static int32_t tsdbDoRetention2(void *arg) {
}
}
}
}
code = tsdbDoRetentionEnd(rtner);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
@ -389,30 +372,105 @@ _exit:
return code;
}
static void tsdbFreeRtnArg(void *arg) {
SRtnArg *rArg = (SRtnArg *)arg;
if (rArg->sync) {
tsem_post(&rArg->tsdb->pVnode->canCommit);
static int32_t tsdbDoRetentionSync(void *arg) {
int32_t code = 0;
int32_t lino = 0;
SRTNer rtner[1] = {0};
code = tsdbDoRetentionBegin(arg, rtner);
TSDB_CHECK_CODE(code, lino, _exit);
STFileSet *fset;
TARRAY2_FOREACH(rtner->fsetArr, fset) {
code = tsdbDoRetentionOnFileSet(rtner, fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosMemoryFree(arg);
code = tsdbDoRetentionEnd(rtner);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
}
tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit);
return code;
}
int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) {
SRtnArg *arg = taosMemoryMalloc(sizeof(*arg));
if (arg == NULL) return TSDB_CODE_OUT_OF_MEMORY;
arg->tsdb = tsdb;
arg->sync = sync;
arg->now = now;
static int32_t tsdbDoRetentionAsync(void *arg) {
int32_t code = 0;
int32_t lino = 0;
SRTNer rtner[1] = {0};
if (sync) {
tsem_wait(&tsdb->pVnode->canCommit);
code = tsdbDoRetentionBegin(arg, rtner);
TSDB_CHECK_CODE(code, lino, _exit);
STFileSet *fset;
TARRAY2_FOREACH(rtner->fsetArr, fset) {
if (fset->fid != ((SRtnArg *)arg)->fid) continue;
code = tsdbDoRetentionOnFileSet(rtner, fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
int64_t taskid;
int32_t code =
tsdbFSScheduleBgTask(tsdb->pFS, TSDB_BG_TASK_RETENTION, tsdbDoRetention2, tsdbFreeRtnArg, arg, &taskid);
code = tsdbDoRetentionEnd(rtner);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbFreeRtnArg(arg);
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
}
return code;
}
static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); }
int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) {
int32_t code = 0;
if (sync) { // sync retention
SRtnArg *arg = taosMemoryMalloc(sizeof(*arg));
if (arg == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
arg->tsdb = tsdb;
arg->now = now;
arg->fid = INT32_MAX;
tsem_wait(&tsdb->pVnode->canCommit);
code = vnodeScheduleTask(tsdbDoRetentionSync, arg);
if (code) {
tsem_post(&tsdb->pVnode->canCommit);
taosMemoryFree(arg);
return code;
}
} else { // async retention
taosThreadMutexLock(&tsdb->mutex);
STFileSet *fset;
TARRAY2_FOREACH(tsdb->pFS->fSetArr, fset) {
SRtnArg *arg = taosMemoryMalloc(sizeof(*arg));
if (arg == NULL) {
taosThreadMutexUnlock(&tsdb->mutex);
return TSDB_CODE_OUT_OF_MEMORY;
}
arg->tsdb = tsdb;
arg->now = now;
arg->fid = fset->fid;
code = tsdbFSScheduleBgTask(tsdb->pFS, fset->fid, TSDB_BG_TASK_RETENTION, tsdbDoRetentionAsync, tsdbFreeRtnArg,
arg, NULL);
if (code) {
tsdbFreeRtnArg(arg);
taosThreadMutexUnlock(&tsdb->mutex);
return code;
}
}
taosThreadMutexUnlock(&tsdb->mutex);
}
return code;
}

View File

@ -1095,17 +1095,17 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) {
code = tsdbFSEditAbort(writer[0]->tsdb->pFS);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
taosThreadRwlockWrlock(&writer[0]->tsdb->rwLock);
taosThreadMutexLock(&writer[0]->tsdb->mutex);
code = tsdbFSEditCommit(writer[0]->tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock);
taosThreadMutexUnlock(&writer[0]->tsdb->mutex);
TSDB_CHECK_CODE(code, lino, _exit);
}
writer[0]->tsdb->pFS->fsstate = TSDB_FS_STATE_NORMAL;
taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock);
taosThreadMutexUnlock(&writer[0]->tsdb->mutex);
}
tsdbFSEnableBgTask(tsdb->pFS);
@ -1299,7 +1299,7 @@ static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) {
}
int32_t code = 0;
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
taosThreadMutexLock(&fs->tsdb->mutex);
STFileSet* fset;
TARRAY2_FOREACH(fs->fSetArr, fset) {
STsdbSnapPartition* pItem = NULL;
@ -1311,7 +1311,7 @@ static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) {
code = TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn);
ASSERT(code == 0);
}
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
taosThreadMutexUnlock(&fs->tsdb->mutex);
if (code) {
TARRAY2_DESTROY(pList, tsdbSnapPartitionClear);

View File

@ -584,6 +584,7 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot *
// commit json
if (!rollback) {
ASSERT(pVnode->config.vgId == pWriter->info.config.vgId);
pWriter->info.state.committed = pWriter->ever;
pVnode->config = pWriter->info.config;
pVnode->state = (SVState){.committed = pWriter->info.state.committed,

View File

@ -20,6 +20,7 @@
#include "vnode.h"
#include "vnodeInt.h"
#include "audit.h"
#include "tstrbuild.h"
static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
@ -886,6 +887,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
char tbName[TSDB_TABLE_FNAME_LEN];
STbUidStore *pStore = NULL;
SArray *tbUids = NULL;
SArray *tbNames = NULL;
pRsp->msgType = TDMT_VND_CREATE_TABLE_RSP;
pRsp->code = TSDB_CODE_SUCCESS;
@ -902,7 +904,8 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
rsp.pArray = taosArrayInit(req.nReqs, sizeof(cRsp));
tbUids = taosArrayInit(req.nReqs, sizeof(int64_t));
if (rsp.pArray == NULL || tbUids == NULL) {
tbNames = taosArrayInit(req.nReqs, sizeof(char*));
if (rsp.pArray == NULL || tbUids == NULL || tbNames == NULL) {
rcode = -1;
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
@ -948,12 +951,9 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
taosArrayPush(rsp.pArray, &cRsp);
if(tsEnableAuditCreateTable){
int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId;
SName name = {0};
tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB);
auditRecord(NULL, clusterId, "createTable", name.dbname, "", pCreateReq->name, strlen(pCreateReq->name));
char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
strcpy(str, pCreateReq->name);
taosArrayPush(tbNames, &str);
}
}
@ -976,6 +976,30 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
tEncoderInit(&encoder, pRsp->pCont, pRsp->contLen);
tEncodeSVCreateTbBatchRsp(&encoder, &rsp);
if(tsEnableAuditCreateTable){
int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId;
SName name = {0};
tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB);
SStringBuilder sb = {0};
for(int32_t iReq = 0; iReq < req.nReqs; iReq++){
char** key = (char**)taosArrayGet(tbNames, iReq);
taosStringBuilderAppendStringLen(&sb, *key, strlen(*key));
if(iReq < req.nReqs - 1){
taosStringBuilderAppendChar(&sb, ',');
}
taosMemoryFreeClear(*key);
}
size_t len = 0;
char* keyJoined = taosStringBuilderGetResult(&sb, &len);
auditRecord(NULL, clusterId, "createTable", name.dbname, "", keyJoined, len);
taosStringBuilderDestroy(&sb);
}
_exit:
for (int32_t iReq = 0; iReq < req.nReqs; iReq++) {
pCreateReq = req.pReqs + iReq;
@ -987,6 +1011,7 @@ _exit:
taosArrayDestroy(tbUids);
tDecoderClear(&decoder);
tEncoderClear(&encoder);
taosArrayDestroy(tbNames);
return rcode;
}
@ -1120,6 +1145,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in
int32_t ret;
SArray *tbUids = NULL;
STbUidStore *pStore = NULL;
SArray *tbNames = NULL;
pRsp->msgType = TDMT_VND_DROP_TABLE_RSP;
pRsp->pCont = NULL;
@ -1138,7 +1164,8 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in
// process req
tbUids = taosArrayInit(req.nReqs, sizeof(int64_t));
rsp.pArray = taosArrayInit(req.nReqs, sizeof(SVDropTbRsp));
if (tbUids == NULL || rsp.pArray == NULL) goto _exit;
tbNames = taosArrayInit(req.nReqs, sizeof(char*));
if (tbUids == NULL || rsp.pArray == NULL || tbNames == NULL) goto _exit;
for (int32_t iReq = 0; iReq < req.nReqs; iReq++) {
SVDropTbReq *pDropTbReq = req.pReqs + iReq;
@ -1159,11 +1186,41 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in
}
taosArrayPush(rsp.pArray, &dropTbRsp);
if(tsEnableAuditCreateTable){
char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
strcpy(str, pDropTbReq->name);
taosArrayPush(tbNames, &str);
}
}
tqUpdateTbUidList(pVnode->pTq, tbUids, false);
tdUpdateTbUidList(pVnode->pSma, pStore, false);
if(tsEnableAuditCreateTable){
int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId;
SName name = {0};
tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB);
SStringBuilder sb = {0};
for(int32_t iReq = 0; iReq < req.nReqs; iReq++){
char** key = (char**)taosArrayGet(tbNames, iReq);
taosStringBuilderAppendStringLen(&sb, *key, strlen(*key));
if(iReq < req.nReqs - 1){
taosStringBuilderAppendChar(&sb, ',');
}
taosMemoryFreeClear(*key);
}
size_t len = 0;
char* keyJoined = taosStringBuilderGetResult(&sb, &len);
auditRecord(NULL, clusterId, "dropTable", name.dbname, "", keyJoined, len);
taosStringBuilderDestroy(&sb);
}
_exit:
taosArrayDestroy(tbUids);
tdUidStoreFree(pStore);
@ -1174,6 +1231,7 @@ _exit:
tEncodeSVDropTbBatchRsp(&encoder, &rsp);
tEncoderClear(&encoder);
taosArrayDestroy(rsp.pArray);
taosArrayDestroy(tbNames);
return 0;
}

View File

@ -516,7 +516,10 @@ static int32_t vnodeSnapshotStopWrite(const SSyncFSM *pFsm, void *pWriter, bool
pVnode->config.vgId, isApply, pSnapshot->lastApplyIndex, pSnapshot->lastApplyTerm, pSnapshot->lastConfigIndex);
int32_t code = vnodeSnapWriterClose(pWriter, !isApply, pSnapshot);
vInfo("vgId:%d, apply vnode snapshot finished, code:0x%x", pVnode->config.vgId, code);
if (code != 0) {
vError("vgId:%d, failed to finish applying vnode snapshot since %s, code:0x%x", pVnode->config.vgId, terrstr(),
code);
}
return code;
}

View File

@ -1030,8 +1030,12 @@ SNode* createSelectStmt(SAstCreateContext* pCxt, bool isDistinct, SNodeList* pPr
SNode* setSelectStmtTagMode(SAstCreateContext* pCxt, SNode* pStmt, bool bSelectTags) {
if (pStmt && QUERY_NODE_SELECT_STMT == nodeType(pStmt)) {
if (pCxt->pQueryCxt->biMode) {
((SSelectStmt*)pStmt)->tagScan = true;
} else {
((SSelectStmt*)pStmt)->tagScan = bSelectTags;
}
}
return pStmt;
}

View File

@ -3499,6 +3499,20 @@ static const char* getPrecisionStr(uint8_t precision) {
return "unknown";
}
static int64_t getPrecisionMultiple(uint8_t precision) {
switch (precision) {
case TSDB_TIME_PRECISION_MILLI:
return 1;
case TSDB_TIME_PRECISION_MICRO:
return 1000;
case TSDB_TIME_PRECISION_NANO:
return 1000000;
default:
break;
}
return 1;
}
static void convertVarDuration(SValueNode* pOffset, uint8_t precision) {
const int64_t factors[3] = {NANOSECOND_PER_MSEC, NANOSECOND_PER_USEC, 1};
const int8_t units[3] = {TIME_UNIT_MILLISECOND, TIME_UNIT_MICROSECOND, TIME_UNIT_NANOSECOND};
@ -3512,6 +3526,7 @@ static void convertVarDuration(SValueNode* pOffset, uint8_t precision) {
pOffset->unit = units[precision];
}
static const int64_t tsdbMaxKeepMS = (int64_t)60 * 1000 * TSDB_MAX_KEEP;
static int32_t checkIntervalWindow(STranslateContext* pCxt, SIntervalWindowNode* pInterval) {
uint8_t precision = ((SColumnNode*)pInterval->pCol)->node.resType.precision;
@ -3520,6 +3535,8 @@ static int32_t checkIntervalWindow(STranslateContext* pCxt, SIntervalWindowNode*
if (pInter->datum.i <= 0 || (!valInter && pInter->datum.i < tsMinIntervalTime)) {
return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INTER_VALUE_TOO_SMALL, tsMinIntervalTime,
getPrecisionStr(precision));
} else if (pInter->datum.i / getPrecisionMultiple(precision) > tsdbMaxKeepMS) {
return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INTER_VALUE_TOO_BIG, 1000, "years");
}
if (NULL != pInterval->pOffset) {

View File

@ -65,6 +65,8 @@ static char* getSyntaxErrFormat(int32_t errCode) {
return "This statement is no longer supported";
case TSDB_CODE_PAR_INTER_VALUE_TOO_SMALL:
return "Interval cannot be less than %d %s";
case TSDB_CODE_PAR_INTER_VALUE_TOO_BIG:
return "Interval cannot be more than %d %s";
case TSDB_CODE_PAR_DB_NOT_SPECIFIED:
return "Database not specified";
case TSDB_CODE_PAR_INVALID_IDENTIFIER_NAME:

View File

@ -249,8 +249,8 @@ int32_t syncNodeOnRequestVote(SSyncNode* pNode, const SRpcMsg* pMsg);
int32_t syncNodeOnRequestVoteReply(SSyncNode* pNode, const SRpcMsg* pMsg);
int32_t syncNodeOnAppendEntries(SSyncNode* pNode, const SRpcMsg* pMsg);
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pMsg);
int32_t syncNodeOnSnapshot(SSyncNode* ths, const SRpcMsg* pMsg);
int32_t syncNodeOnSnapshotRsp(SSyncNode* ths, const SRpcMsg* pMsg);
int32_t syncNodeOnSnapshot(SSyncNode* ths, SRpcMsg* pMsg);
int32_t syncNodeOnSnapshotRsp(SSyncNode* ths, SRpcMsg* pMsg);
int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pMsg);
int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, const SRpcMsg* pMsg);
int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pMsg);

View File

@ -22,21 +22,41 @@ extern "C" {
#include "syncInt.h"
#define SYNC_SNAPSHOT_SEQ_INVALID -2
#define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -3
#define SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT -1
#define SYNC_SNAPSHOT_SEQ_INVALID -2
#define SYNC_SNAPSHOT_SEQ_PREP -1
#define SYNC_SNAPSHOT_SEQ_BEGIN 0
#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF
#define SYNC_SNAPSHOT_RETRY_MS 5000
typedef struct SSyncSnapBuffer {
void *entries[TSDB_SYNC_SNAP_BUFFER_SIZE];
int64_t start;
int64_t cursor;
int64_t end;
int64_t size;
TdThreadMutex mutex;
void (*entryDeleteCb)(void *ptr);
} SSyncSnapBuffer;
typedef struct SyncSnapBlock {
int32_t seq;
int8_t acked;
int64_t sendTimeMs;
int16_t blockType;
void *pBlock;
int32_t blockLen;
} SyncSnapBlock;
void syncSnapBlockDestroy(void *ptr);
typedef struct SSyncSnapshotSender {
int8_t start;
int32_t seq;
int32_t ack;
void *pReader;
void *pCurrentBlock;
int32_t blockLen;
SSnapshotParam snapshotParam;
SSnapshot snapshot;
SSyncCfg lastConfig;
@ -47,6 +67,9 @@ typedef struct SSyncSnapshotSender {
int64_t lastSendTime;
bool finish;
// ring buffer for ack
SSyncSnapBuffer *pSndBuf;
// init when create
SSyncNode *pSyncNode;
int32_t replicaIndex;
@ -72,6 +95,9 @@ typedef struct SSyncSnapshotReceiver {
SSnapshotParam snapshotParam;
SSnapshot snapshot;
// buffer
SSyncSnapBuffer *pRcvBuf;
// init when create
SSyncNode *pSyncNode;
} SSyncSnapshotReceiver;
@ -83,8 +109,8 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver);
bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver);
// on message
int32_t syncNodeOnSnapshot(SSyncNode *ths, const SRpcMsg *pMsg);
int32_t syncNodeOnSnapshotRsp(SSyncNode *ths, const SRpcMsg *pMsg);
// int32_t syncNodeOnSnapshot(SSyncNode *ths, const SRpcMsg *pMsg);
// int32_t syncNodeOnSnapshotRsp(SSyncNode *ths, const SRpcMsg *pMsg);
SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode *pSyncNode, SyncIndex snapshotLastApplyIndex);

View File

@ -818,7 +818,8 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo, int32_t vnodeVersion) {
if (!taosCheckExistFile(pSyncNode->configPath)) {
// create a new raft config file
sInfo("vgId:%d, create a new raft config file", pSyncNode->vgId);
sInfo("vgId:%d, create a new raft config file", pSyncInfo->vgId);
pSyncNode->vgId = pSyncInfo->vgId;
pSyncNode->raftCfg.isStandBy = pSyncInfo->isStandBy;
pSyncNode->raftCfg.snapshotStrategy = pSyncInfo->snapshotStrategy;
pSyncNode->raftCfg.lastConfigIndex = pSyncInfo->syncCfg.lastIndex;

View File

@ -797,7 +797,7 @@ _out:
pMgr->retryBackoff = syncLogReplGetNextRetryBackoff(pMgr);
SSyncLogBuffer* pBuf = pNode->pLogBuf;
sInfo("vgId:%d, resend %d sync log entries. dest:%" PRIx64 ", indexes:%" PRId64 " ..., terms: ... %" PRId64
", retryWaitMs:%" PRId64 ", mgr: [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64
", retryWaitMs:%" PRId64 ", repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64
" %" PRId64 ", %" PRId64 ")",
pNode->vgId, count, pDestId->addr, firstIndex, term, retryWaitMs, pMgr->startIndex, pMgr->matchIndex,
pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
@ -815,9 +815,9 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn
ASSERT(pMgr->matchIndex == 0);
if (pMsg->matchIndex < 0) {
pMgr->restored = true;
sInfo("vgId:%d, sync log repl restored. peer: dnode:%d (%" PRIx64 "), mgr: rs(%d) [%" PRId64 " %" PRId64
", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(&destId), destId.addr, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
sInfo("vgId:%d, sync log repl restored. peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64
"), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
return 0;
}
@ -832,9 +832,9 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn
if (pMsg->success && pMsg->matchIndex == pMsg->lastSendIndex) {
pMgr->matchIndex = pMsg->matchIndex;
pMgr->restored = true;
sInfo("vgId:%d, sync log repl restored. peer: dnode:%d (%" PRIx64 "), mgr: rs(%d) [%" PRId64 " %" PRId64
", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(&destId), destId.addr, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
sInfo("vgId:%d, sync log repl restored. peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64
"), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
return 0;
}
@ -958,10 +958,10 @@ int32_t syncLogReplProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex inde
pMgr->endIndex = index + 1;
SSyncLogBuffer* pBuf = pNode->pLogBuf;
sTrace("vgId:%d, probe peer:%" PRIx64 " with msg of index:%" PRId64 " term:%" PRId64 ". mgr (rs:%d): [%" PRId64
sTrace("vgId:%d, probe peer:%" PRIx64 " with msg of index:%" PRId64 " term:%" PRId64 ". repl-mgr:[%" PRId64
" %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, pDestId->addr, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
pNode->vgId, pDestId->addr, index, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pBuf->startIndex,
pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
return 0;
}
@ -1002,9 +1002,9 @@ int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
pMgr->endIndex = index + 1;
if (barrier) {
sInfo("vgId:%d, replicated sync barrier to dnode:%d. index:%" PRId64 ", term:%" PRId64
", repl mgr: rs(%d) [%" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(pDestId), index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex);
sInfo("vgId:%d, replicated sync barrier to dnode:%d. index:%" PRId64 ", term:%" PRId64 ", repl-mgr:[%" PRId64
" %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(pDestId), index, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex);
break;
}
}
@ -1013,10 +1013,10 @@ int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
SSyncLogBuffer* pBuf = pNode->pLogBuf;
sTrace("vgId:%d, replicated %d msgs to peer:%" PRIx64 ". indexes:%" PRId64 "..., terms: ...%" PRId64
", mgr: (rs:%d) [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64
", repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64
")",
pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex,
pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
return 0;
}

View File

@ -23,6 +23,44 @@
#include "syncReplication.h"
#include "syncUtil.h"
static void syncSnapBufferReset(SSyncSnapBuffer *pBuf) {
taosThreadMutexLock(&pBuf->mutex);
for (int64_t i = pBuf->start; i < pBuf->end; ++i) {
if (pBuf->entryDeleteCb) {
pBuf->entryDeleteCb(pBuf->entries[i % pBuf->size]);
}
pBuf->entries[i % pBuf->size] = NULL;
}
pBuf->start = SYNC_SNAPSHOT_SEQ_BEGIN + 1;
pBuf->end = pBuf->start;
pBuf->cursor = pBuf->start - 1;
taosThreadMutexUnlock(&pBuf->mutex);
}
static void syncSnapBufferDestroy(SSyncSnapBuffer **ppBuf) {
if (ppBuf == NULL || ppBuf[0] == NULL) return;
SSyncSnapBuffer *pBuf = ppBuf[0];
syncSnapBufferReset(pBuf);
taosThreadMutexDestroy(&pBuf->mutex);
taosMemoryFree(ppBuf[0]);
ppBuf[0] = NULL;
return;
}
static SSyncSnapBuffer *syncSnapBufferCreate() {
SSyncSnapBuffer *pBuf = taosMemoryCalloc(1, sizeof(SSyncSnapBuffer));
if (pBuf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pBuf->size = sizeof(pBuf->entries) / sizeof(void *);
ASSERT(pBuf->size == TSDB_SYNC_SNAP_BUFFER_SIZE);
taosThreadMutexInit(&pBuf->mutex, NULL);
return pBuf;
}
SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex) {
bool condition = (pSyncNode->pFsm->FpSnapshotStartRead != NULL) && (pSyncNode->pFsm->FpSnapshotStopRead != NULL) &&
(pSyncNode->pFsm->FpSnapshotDoRead != NULL);
@ -38,8 +76,6 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID;
pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID;
pSender->pReader = NULL;
pSender->pCurrentBlock = NULL;
pSender->blockLen = 0;
pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS;
pSender->pSyncNode = pSyncNode;
pSender->replicaIndex = replicaIndex;
@ -49,24 +85,42 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &pSender->snapshot);
pSender->finish = false;
SSyncSnapBuffer *pSndBuf = syncSnapBufferCreate();
if (pSndBuf == NULL) {
taosMemoryFree(pSender);
pSender = NULL;
return NULL;
}
pSndBuf->entryDeleteCb = syncSnapBlockDestroy;
pSender->pSndBuf = pSndBuf;
syncSnapBufferReset(pSender->pSndBuf);
return pSender;
}
void syncSnapBlockDestroy(void *ptr) {
SyncSnapBlock *pBlk = ptr;
if (pBlk->pBlock != NULL) {
taosMemoryFree(pBlk->pBlock);
pBlk->pBlock = NULL;
pBlk->blockLen = 0;
}
taosMemoryFree(pBlk);
}
void snapshotSenderDestroy(SSyncSnapshotSender *pSender) {
if (pSender == NULL) return;
// free current block
if (pSender->pCurrentBlock != NULL) {
taosMemoryFree(pSender->pCurrentBlock);
pSender->pCurrentBlock = NULL;
}
// close reader
if (pSender->pReader != NULL) {
pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader);
pSender->pReader = NULL;
}
// free snap buffer
if (pSender->pSndBuf) {
syncSnapBufferDestroy(&pSender->pSndBuf);
}
// free sender
taosMemoryFree(pSender);
}
@ -79,11 +133,9 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) {
int8_t started = atomic_val_compare_exchange_8(&pSender->start, false, true);
if (started) return 0;
pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN;
pSender->seq = SYNC_SNAPSHOT_SEQ_PREP;
pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID;
pSender->pReader = NULL;
pSender->pCurrentBlock = NULL;
pSender->blockLen = 0;
pSender->snapshotParam.start = SYNC_INDEX_INVALID;
pSender->snapshotParam.end = SYNC_INDEX_INVALID;
pSender->snapshot.data = NULL;
@ -127,29 +179,28 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) {
SyncSnapshotSend *pMsg = rpcMsg.pCont;
pMsg->srcId = pSender->pSyncNode->myRaftId;
pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex];
pMsg->term = raftStoreGetTerm(pSender->pSyncNode);
pMsg->term = pSender->term;
pMsg->beginIndex = pSender->snapshotParam.start;
pMsg->lastIndex = pSender->snapshot.lastApplyIndex;
pMsg->lastTerm = pSender->snapshot.lastApplyTerm;
pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex;
pMsg->lastConfig = pSender->lastConfig;
pMsg->startTime = pSender->startTime;
pMsg->seq = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT;
pMsg->seq = pSender->seq;
if (dataLen > 0) {
pMsg->payloadType = snapInfo.type;
memcpy(pMsg->data, snapInfo.data, dataLen);
}
// event log
syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender start");
// send msg
if (syncNodeSendMsgById(&pMsg->destId, pSender->pSyncNode, &rpcMsg) != 0) {
sSError(pSender, "snapshot sender send msg failed since %s", terrstr());
goto _out;
}
sSInfo(pSender, "snapshot sender start, to dnode:%d.", DID(&pMsg->destId));
code = 0;
_out:
if (snapInfo.data) {
@ -175,48 +226,59 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) {
pSender->pReader = NULL;
}
// free current block
if (pSender->pCurrentBlock != NULL) {
taosMemoryFree(pSender->pCurrentBlock);
pSender->pCurrentBlock = NULL;
pSender->blockLen = 0;
}
syncSnapBufferReset(pSender->pSndBuf);
SRaftId destId = pSender->pSyncNode->replicasId[pSender->replicaIndex];
sSInfo(pSender, "snapshot sender stop, to dnode:%d, finish:%d", DID(&destId), finish);
}
// when sender receive ack, call this function to send msg from seq
// seq = ack + 1, already updated
static int32_t snapshotSend(SSyncSnapshotSender *pSender) {
// free memory last time (current seq - 1)
if (pSender->pCurrentBlock != NULL) {
taosMemoryFree(pSender->pCurrentBlock);
pSender->pCurrentBlock = NULL;
pSender->blockLen = 0;
int32_t code = -1;
SyncSnapBlock *pBlk = NULL;
if (pSender->seq < SYNC_SNAPSHOT_SEQ_END) {
pSender->seq++;
if (pSender->seq > SYNC_SNAPSHOT_SEQ_BEGIN) {
pBlk = taosMemoryCalloc(1, sizeof(SyncSnapBlock));
if (pBlk == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _OUT;
}
pBlk->seq = pSender->seq;
// read data
int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotDoRead(pSender->pSyncNode->pFsm, pSender->pReader,
&pSender->pCurrentBlock, &pSender->blockLen);
&pBlk->pBlock, &pBlk->blockLen);
if (ret != 0) {
sSError(pSender, "snapshot sender read failed since %s", terrstr());
return -1;
goto _OUT;
}
if (pSender->blockLen > 0) {
if (pBlk->blockLen > 0) {
// has read data
sSDebug(pSender, "vgId:%d, snapshot sender continue to read, blockLen:%d seq:%d", pSender->pSyncNode->vgId,
pSender->blockLen, pSender->seq);
sSDebug(pSender, "snapshot sender continue to read, blockLen:%d seq:%d", pBlk->blockLen, pBlk->seq);
} else {
// read finish, update seq to end
pSender->seq = SYNC_SNAPSHOT_SEQ_END;
sSInfo(pSender, "vgId:%d, snapshot sender read to the end, blockLen:%d seq:%d", pSender->pSyncNode->vgId,
pSender->blockLen, pSender->seq);
sSInfo(pSender, "snapshot sender read to the end");
code = 0;
goto _OUT;
}
}
}
ASSERT(pSender->seq >= SYNC_SNAPSHOT_SEQ_BEGIN && pSender->seq <= SYNC_SNAPSHOT_SEQ_END);
int32_t blockLen = (pBlk != NULL) ? pBlk->blockLen : 0;
// build msg
SRpcMsg rpcMsg = {0};
if (syncBuildSnapshotSend(&rpcMsg, pSender->blockLen, pSender->pSyncNode->vgId) != 0) {
if (syncBuildSnapshotSend(&rpcMsg, blockLen, pSender->pSyncNode->vgId) != 0) {
sSError(pSender, "vgId:%d, snapshot sender build msg failed since %s", pSender->pSyncNode->vgId, terrstr());
return -1;
goto _OUT;
}
SyncSnapshotSend *pMsg = rpcMsg.pCont;
@ -231,77 +293,83 @@ static int32_t snapshotSend(SSyncSnapshotSender *pSender) {
pMsg->startTime = pSender->startTime;
pMsg->seq = pSender->seq;
if (pSender->pCurrentBlock != NULL) {
memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen);
}
// event log
if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) {
syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender finish");
} else {
syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender sending");
if (pBlk != NULL && pBlk->pBlock != NULL && pBlk->blockLen > 0) {
memcpy(pMsg->data, pBlk->pBlock, pBlk->blockLen);
}
// send msg
if (syncNodeSendMsgById(&pMsg->destId, pSender->pSyncNode, &rpcMsg) != 0) {
sSError(pSender, "snapshot sender send msg failed since %s", terrstr());
return -1;
goto _OUT;
}
pSender->lastSendTime = taosGetTimestampMs();
return 0;
// put in buffer
int64_t nowMs = taosGetTimestampMs();
if (pBlk) {
ASSERT(pBlk->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pBlk->seq < SYNC_SNAPSHOT_SEQ_END);
pBlk->sendTimeMs = nowMs;
pSender->pSndBuf->entries[pSender->seq % pSender->pSndBuf->size] = pBlk;
pBlk = NULL;
pSender->pSndBuf->end = TMAX(pSender->seq + 1, pSender->pSndBuf->end);
}
pSender->lastSendTime = nowMs;
code = 0;
_OUT:;
if (pBlk != NULL) {
syncSnapBlockDestroy(pBlk);
pBlk = NULL;
}
return code;
}
// send snapshot data from cache
int32_t snapshotReSend(SSyncSnapshotSender *pSender) {
SSyncSnapBuffer *pSndBuf = pSender->pSndBuf;
int32_t code = -1;
taosThreadMutexLock(&pSndBuf->mutex);
for (int32_t seq = pSndBuf->cursor + 1; seq < pSndBuf->end; ++seq) {
SyncSnapBlock *pBlk = pSndBuf->entries[seq % pSndBuf->size];
ASSERT(pBlk && !pBlk->acked);
int64_t nowMs = taosGetTimestampMs();
if (nowMs < pBlk->sendTimeMs + SYNC_SNAP_RESEND_MS) {
continue;
}
// build msg
SRpcMsg rpcMsg = {0};
if (syncBuildSnapshotSend(&rpcMsg, pSender->blockLen, pSender->pSyncNode->vgId) != 0) {
if (syncBuildSnapshotSend(&rpcMsg, pBlk->blockLen, pSender->pSyncNode->vgId) != 0) {
sSError(pSender, "snapshot sender build msg failed since %s", terrstr());
return -1;
goto _out;
}
SyncSnapshotSend *pMsg = rpcMsg.pCont;
pMsg->srcId = pSender->pSyncNode->myRaftId;
pMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex];
pMsg->term = raftStoreGetTerm(pSender->pSyncNode);
pMsg->term = pSender->term;
pMsg->beginIndex = pSender->snapshotParam.start;
pMsg->lastIndex = pSender->snapshot.lastApplyIndex;
pMsg->lastTerm = pSender->snapshot.lastApplyTerm;
pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex;
pMsg->lastConfig = pSender->lastConfig;
pMsg->startTime = pSender->startTime;
pMsg->seq = pSender->seq;
pMsg->seq = pBlk->seq;
if (pSender->pCurrentBlock != NULL && pSender->blockLen > 0) {
memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen);
if (pBlk->pBlock != NULL && pBlk->blockLen > 0) {
memcpy(pMsg->data, pBlk->pBlock, pBlk->blockLen);
}
// event log
syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender resend");
// send msg
if (syncNodeSendMsgById(&pMsg->destId, pSender->pSyncNode, &rpcMsg) != 0) {
sSError(pSender, "snapshot sender resend msg failed since %s", terrstr());
return -1;
goto _out;
}
pSender->lastSendTime = taosGetTimestampMs();
return 0;
}
static int32_t snapshotSenderUpdateProgress(SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) {
if (pMsg->ack != pSender->seq) {
sSError(pSender, "snapshot sender update seq failed, ack:%d seq:%d", pMsg->ack, pSender->seq);
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
return -1;
pBlk->sendTimeMs = nowMs;
}
pSender->ack = pMsg->ack;
pSender->seq++;
sSDebug(pSender, "snapshot sender update seq:%d", pSender->seq);
return 0;
code = 0;
_out:;
taosThreadMutexUnlock(&pSndBuf->mutex);
return code;
}
// return 0, start ok
@ -328,8 +396,6 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) {
return 0;
}
sSInfo(pSender, "snapshot sender start");
int32_t code = snapshotSenderStart(pSender);
if (code != 0) {
sSError(pSender, "snapshot sender start error since %s", terrstr());
@ -362,6 +428,16 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from
pReceiver->snapshot.lastApplyTerm = 0;
pReceiver->snapshot.lastConfigIndex = SYNC_INDEX_INVALID;
SSyncSnapBuffer *pRcvBuf = syncSnapBufferCreate();
if (pRcvBuf == NULL) {
taosMemoryFree(pReceiver);
pReceiver = NULL;
return NULL;
}
pRcvBuf->entryDeleteCb = rpcFreeCont;
pReceiver->pRcvBuf = pRcvBuf;
syncSnapBufferReset(pReceiver->pRcvBuf);
return pReceiver;
}
@ -389,6 +465,11 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) {
pReceiver->snapshot.data = NULL;
}
// free snap buf
if (pReceiver->pRcvBuf) {
syncSnapBufferDestroy(&pReceiver->pRcvBuf);
}
// free receiver
taosMemoryFree(pReceiver);
}
@ -444,20 +525,19 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p
int8_t started = atomic_val_compare_exchange_8(&pReceiver->start, false, true);
if (started) return;
pReceiver->ack = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT;
pReceiver->ack = SYNC_SNAPSHOT_SEQ_PREP;
pReceiver->term = pPreMsg->term;
pReceiver->fromId = pPreMsg->srcId;
pReceiver->startTime = pPreMsg->startTime;
ASSERT(pReceiver->startTime);
// event log
sRInfo(pReceiver, "snapshot receiver is start");
sRInfo(pReceiver, "snapshot receiver start, from dnode:%d.", DID(&pReceiver->fromId));
}
// just set start = false
// FpSnapshotStopWrite should not be called
void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) {
sRInfo(pReceiver, "snapshot receiver stop, not apply, writer:%p", pReceiver->pWriter);
sRDebug(pReceiver, "snapshot receiver stop, not apply, writer:%p", pReceiver->pWriter);
int8_t stopped = !atomic_val_compare_exchange_8(&pReceiver->start, true, false);
if (stopped) return;
@ -472,6 +552,8 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) {
} else {
sRInfo(pReceiver, "snapshot receiver stop, writer is null");
}
syncSnapBufferReset(pReceiver->pRcvBuf);
}
// when recv last snapshot block, apply data into snapshot
@ -479,7 +561,7 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap
int32_t code = 0;
if (pReceiver->pWriter != NULL) {
// write data
sRInfo(pReceiver, "snapshot receiver write finish, blockLen:%d seq:%d", pMsg->dataLen, pMsg->seq);
sRInfo(pReceiver, "snapshot receiver write about to finish, blockLen:%d seq:%d", pMsg->dataLen, pMsg->seq);
if (pMsg->dataLen > 0) {
code = pReceiver->pSyncNode->pFsm->FpSnapshotDoWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, pMsg->data,
pMsg->dataLen);
@ -489,15 +571,6 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap
}
}
// reset wal
sRInfo(pReceiver, "snapshot receiver log restore");
code =
pReceiver->pSyncNode->pLogStore->syncLogRestoreFromSnapshot(pReceiver->pSyncNode->pLogStore, pMsg->lastIndex);
if (code != 0) {
sRError(pReceiver, "failed to snapshot receiver log restore since %s", terrstr());
return -1;
}
// update commit index
if (pReceiver->snapshot.lastApplyIndex > pReceiver->pSyncNode->commitIndex) {
pReceiver->pSyncNode->commitIndex = pReceiver->snapshot.lastApplyIndex;
@ -509,7 +582,6 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap
}
// stop writer, apply data
sRInfo(pReceiver, "snapshot receiver apply write");
code = pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, true,
&pReceiver->snapshot);
if (code != 0) {
@ -517,21 +589,29 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap
return -1;
}
pReceiver->pWriter = NULL;
sRInfo(pReceiver, "snapshot receiver write stopped");
// update progress
pReceiver->ack = SYNC_SNAPSHOT_SEQ_END;
// get fsmState
SSnapshot snapshot = {0};
pReceiver->pSyncNode->pFsm->FpGetSnapshotInfo(pReceiver->pSyncNode->pFsm, &snapshot);
pReceiver->pSyncNode->fsmState = snapshot.state;
// reset wal
code =
pReceiver->pSyncNode->pLogStore->syncLogRestoreFromSnapshot(pReceiver->pSyncNode->pLogStore, pMsg->lastIndex);
if (code != 0) {
sRError(pReceiver, "failed to snapshot receiver log restore since %s", terrstr());
return -1;
}
sRInfo(pReceiver, "wal log restored from snapshot");
} else {
sRError(pReceiver, "snapshot receiver finish error since writer is null");
return -1;
}
// event log
sRInfo(pReceiver, "snapshot receiver got last data and apply snapshot finished");
return 0;
}
@ -599,22 +679,22 @@ static int32_t syncNodeOnSnapshotPrep(SSyncNode *pSyncNode, SyncSnapshotSend *pM
int32_t order = 0;
if ((order = snapshotReceiverSignatureCmp(pReceiver, pMsg)) < 0) {
sRInfo(pReceiver,
"received a new snapshot preparation. restart receiver"
"receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime);
"received a new snapshot preparation. restart receiver."
" msg signature:(%" PRId64 ", %" PRId64 ")",
pMsg->term, pMsg->startTime);
goto _START_RECEIVER;
} else if (order == 0) {
sRInfo(pReceiver,
"received a duplicate snapshot preparation. send reply"
"receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime);
"received a duplicate snapshot preparation. send reply."
" msg signature:(%" PRId64 ", %" PRId64 ")",
pMsg->term, pMsg->startTime);
goto _SEND_REPLY;
} else {
// ignore
sRError(pReceiver,
"received a stale snapshot preparation. ignore"
"receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime);
"received a stale snapshot preparation. ignore."
" msg signature:(%" PRId64 ", %" PRId64 ")",
pMsg->term, pMsg->startTime);
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
code = terrno;
goto _SEND_REPLY;
@ -765,29 +845,8 @@ _SEND_REPLY:
return code;
}
static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
// condition 4
// transfering
SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver;
int64_t timeNow = taosGetTimestampMs();
int32_t code = 0;
if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) {
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
sRError(pReceiver, "failed to receive snapshot data since %s.", terrstr());
code = terrno;
goto _SEND_REPLY;
}
if (snapshotReceiverGotData(pReceiver, pMsg) != 0) {
code = terrno;
if (code >= SYNC_SNAPSHOT_SEQ_INVALID) {
code = TSDB_CODE_SYN_INTERNAL_ERROR;
}
}
_SEND_REPLY:;
static int32_t syncSnapSendRsp(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg, int32_t code) {
SSyncNode *pSyncNode = pReceiver->pSyncNode;
// build msg
SRpcMsg rpcMsg = {0};
if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId)) {
@ -811,10 +870,79 @@ _SEND_REPLY:;
sRError(pReceiver, "failed to send snapshot receiver resp since %s", terrstr());
return -1;
}
return 0;
}
static int32_t syncSnapBufferRecv(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend **ppMsg) {
int32_t code = 0;
SSyncSnapBuffer *pRcvBuf = pReceiver->pRcvBuf;
SyncSnapshotSend *pMsg = ppMsg[0];
terrno = TSDB_CODE_SUCCESS;
taosThreadMutexLock(&pRcvBuf->mutex);
if (pMsg->seq - pRcvBuf->start >= pRcvBuf->size) {
terrno = TSDB_CODE_SYN_BUFFER_FULL;
code = terrno;
goto _out;
}
ASSERT(pRcvBuf->start <= pRcvBuf->cursor + 1 && pRcvBuf->cursor < pRcvBuf->end);
if (pMsg->seq > pRcvBuf->cursor) {
pRcvBuf->entries[pMsg->seq % pRcvBuf->size] = pMsg;
ppMsg[0] = NULL;
pRcvBuf->end = TMAX(pMsg->seq + 1, pRcvBuf->end);
} else {
syncSnapSendRsp(pReceiver, pMsg, code);
goto _out;
}
for (int64_t seq = pRcvBuf->cursor + 1; seq < pRcvBuf->end; ++seq) {
if (pRcvBuf->entries[seq]) {
pRcvBuf->cursor = seq;
} else {
break;
}
}
for (int64_t seq = pRcvBuf->start; seq <= pRcvBuf->cursor; ++seq) {
if (snapshotReceiverGotData(pReceiver, pRcvBuf->entries[seq % pRcvBuf->size]) != 0) {
code = terrno;
if (code >= SYNC_SNAPSHOT_SEQ_INVALID) {
code = TSDB_CODE_SYN_INTERNAL_ERROR;
}
}
pRcvBuf->start = seq + 1;
syncSnapSendRsp(pReceiver, pRcvBuf->entries[seq % pRcvBuf->size], code);
pRcvBuf->entryDeleteCb(pRcvBuf->entries[seq % pRcvBuf->size]);
pRcvBuf->entries[seq % pRcvBuf->size] = NULL;
if (code) goto _out;
}
_out:
taosThreadMutexUnlock(&pRcvBuf->mutex);
return code;
}
static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend **ppMsg) {
// condition 4
// transfering
SyncSnapshotSend *pMsg = ppMsg[0];
ASSERT(pMsg);
SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver;
int64_t timeNow = taosGetTimestampMs();
int32_t code = 0;
if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) {
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
sRError(pReceiver, "failed to receive snapshot data since %s.", terrstr());
return syncSnapSendRsp(pReceiver, pMsg, terrno);
}
return syncSnapBufferRecv(pReceiver, ppMsg);
}
static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
// condition 2
// end, finish FSM
@ -867,7 +995,7 @@ _SEND_REPLY:;
// receiver on message
//
// condition 1, recv SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT
// condition 1, recv SYNC_SNAPSHOT_SEQ_PREP
// if receiver already start
// if sender.start-time > receiver.start-time, restart receiver(reply snapshot start)
// if sender.start-time = receiver.start-time, maybe duplicate msg
@ -885,9 +1013,11 @@ _SEND_REPLY:;
//
// condition 5, got data, update ack
//
int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
SyncSnapshotSend *pMsg = pRpcMsg->pCont;
int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SRpcMsg *pRpcMsg) {
SyncSnapshotSend **ppMsg = (SyncSnapshotSend **)&pRpcMsg->pCont;
SyncSnapshotSend *pMsg = ppMsg[0];
SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver;
int32_t code = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) {
@ -911,49 +1041,56 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
syncNodeUpdateTermWithoutStepDown(pSyncNode, pMsg->term);
}
// state, term, seq/ack
int32_t code = 0;
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER || pSyncNode->state == TAOS_SYNC_STATE_LEARNER) {
if (pMsg->term == raftStoreGetTerm(pSyncNode)) {
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) {
sInfo("vgId:%d, receive pre-snapshot msg of snapshot replication. signature:(%" PRId64 ", %" PRId64 ")",
pSyncNode->vgId, pMsg->term, pMsg->startTime);
code = syncNodeOnSnapshotPrep(pSyncNode, pMsg);
} else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) {
sInfo("vgId:%d, receive begin msg of snapshot replication. signature:(%" PRId64 ", %" PRId64 ")",
pSyncNode->vgId, pMsg->term, pMsg->startTime);
code = syncNodeOnSnapshotBegin(pSyncNode, pMsg);
} else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) {
sInfo("vgId:%d, receive end msg of snapshot replication. signature: (%" PRId64 ", %" PRId64 ")",
pSyncNode->vgId, pMsg->term, pMsg->startTime);
code = syncNodeOnSnapshotEnd(pSyncNode, pMsg);
if (syncLogBufferReInit(pSyncNode->pLogBuf, pSyncNode) != 0) {
sRError(pReceiver, "failed to reinit log buffer since %s", terrstr());
code = -1;
}
} else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) {
// force close, no response
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process force stop");
snapshotReceiverStop(pReceiver);
} else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) {
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq data");
code = syncNodeOnSnapshotReceive(pSyncNode, pMsg);
} else {
// error log
sRError(pReceiver, "snapshot receiver recv error seq:%d, my ack:%d", pMsg->seq, pReceiver->ack);
code = -1;
}
} else {
// error log
sRError(pReceiver, "snapshot receiver term not equal");
code = -1;
}
} else {
// error log
sRError(pReceiver, "snapshot receiver not follower");
code = -1;
if (pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER && pSyncNode->state != TAOS_SYNC_STATE_LEARNER) {
sRError(pReceiver, "snapshot receiver not a follower or learner");
return -1;
}
if (pMsg->seq < SYNC_SNAPSHOT_SEQ_PREP || pMsg->seq > SYNC_SNAPSHOT_SEQ_END) {
sRError(pReceiver, "snap replication msg with invalid seq:%d", pMsg->seq);
return -1;
}
// prepare
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP) {
sInfo("vgId:%d, prepare snap replication. msg signature:(%" PRId64 ", %" PRId64 ")", pSyncNode->vgId, pMsg->term,
pMsg->startTime);
code = syncNodeOnSnapshotPrep(pSyncNode, pMsg);
goto _out;
}
// begin
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) {
sInfo("vgId:%d, begin snap replication. msg signature:(%" PRId64 ", %" PRId64 ")", pSyncNode->vgId, pMsg->term,
pMsg->startTime);
code = syncNodeOnSnapshotBegin(pSyncNode, pMsg);
goto _out;
}
// data
if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) {
code = syncNodeOnSnapshotReceive(pSyncNode, ppMsg);
goto _out;
}
// end
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) {
sInfo("vgId:%d, end snap replication. msg signature:(%" PRId64 ", %" PRId64 ")", pSyncNode->vgId, pMsg->term,
pMsg->startTime);
code = syncNodeOnSnapshotEnd(pSyncNode, pMsg);
if (code != 0) {
sRError(pReceiver, "failed to end snapshot.");
goto _out;
}
code = syncLogBufferReInit(pSyncNode->pLogBuf, pSyncNode);
if (code != 0) {
sRError(pReceiver, "failed to reinit log buffer since %s", terrstr());
}
goto _out;
}
_out:;
syncNodeResetElectTimer(pSyncNode);
return code;
}
@ -993,41 +1130,7 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend
// update next index
syncIndexMgrSetIndex(pSyncNode->pNextIndex, &pMsg->srcId, snapshot.lastApplyIndex + 1);
// update seq
pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN;
// build begin msg
SRpcMsg rpcMsg = {0};
if (syncBuildSnapshotSend(&rpcMsg, 0, pSender->pSyncNode->vgId) != 0) {
sSError(pSender, "prepare snapshot failed since build msg error");
return -1;
}
SyncSnapshotSend *pSendMsg = rpcMsg.pCont;
pSendMsg->srcId = pSender->pSyncNode->myRaftId;
pSendMsg->destId = pSender->pSyncNode->replicasId[pSender->replicaIndex];
pSendMsg->term = raftStoreGetTerm(pSender->pSyncNode);
pSendMsg->beginIndex = pSender->snapshotParam.start;
pSendMsg->lastIndex = pSender->snapshot.lastApplyIndex;
pSendMsg->lastTerm = pSender->snapshot.lastApplyTerm;
pSendMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex;
pSendMsg->lastConfig = pSender->lastConfig;
pSendMsg->startTime = pSender->startTime;
pSendMsg->seq = SYNC_SNAPSHOT_SEQ_BEGIN;
ASSERT(pSendMsg->startTime);
sSInfo(pSender, "begin snapshot replication to dnode %d. startTime:%" PRId64, DID(&pSendMsg->destId),
pSendMsg->startTime);
// send msg
syncLogSendSyncSnapshotSend(pSyncNode, pSendMsg, "snapshot sender reply pre");
if (syncNodeSendMsgById(&pSendMsg->destId, pSender->pSyncNode, &rpcMsg) != 0) {
sSError(pSender, "prepare snapshot failed since send msg error");
return -1;
}
return 0;
return snapshotSend(pSender);
}
static int32_t snapshotSenderSignatureCmp(SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) {
@ -1038,14 +1141,77 @@ static int32_t snapshotSenderSignatureCmp(SSyncSnapshotSender *pSender, SyncSnap
return 0;
}
static int32_t syncSnapBufferSend(SSyncSnapshotSender *pSender, SyncSnapshotRsp **ppMsg) {
int32_t code = 0;
SSyncSnapBuffer *pSndBuf = pSender->pSndBuf;
SyncSnapshotRsp *pMsg = ppMsg[0];
taosThreadMutexLock(&pSndBuf->mutex);
if (snapshotSenderSignatureCmp(pSender, pMsg) != 0) {
code = terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
goto _out;
}
if (pSender->pReader == NULL || pSender->finish) {
code = terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
goto _out;
}
if (pMsg->ack - pSndBuf->start >= pSndBuf->size) {
code = terrno = TSDB_CODE_SYN_BUFFER_FULL;
goto _out;
}
ASSERT(pSndBuf->start <= pSndBuf->cursor + 1 && pSndBuf->cursor < pSndBuf->end);
if (pMsg->ack > pSndBuf->cursor && pMsg->ack < pSndBuf->end) {
SyncSnapBlock *pBlk = pSndBuf->entries[pMsg->ack % pSndBuf->size];
ASSERT(pBlk);
pBlk->acked = 1;
}
for (int64_t ack = pSndBuf->cursor + 1; ack < pSndBuf->end; ++ack) {
SyncSnapBlock *pBlk = pSndBuf->entries[ack % pSndBuf->size];
if (pBlk->acked) {
pSndBuf->cursor = ack;
} else {
break;
}
}
for (int64_t ack = pSndBuf->start; ack <= pSndBuf->cursor; ++ack) {
pSndBuf->entryDeleteCb(pSndBuf->entries[ack % pSndBuf->size]);
pSndBuf->entries[ack % pSndBuf->size] = NULL;
pSndBuf->start = ack + 1;
}
while (pSender->seq != SYNC_SNAPSHOT_SEQ_END && pSender->seq - pSndBuf->start < (pSndBuf->size >> 2)) {
if (snapshotSend(pSender) != 0) {
code = terrno;
goto _out;
}
}
if (pSender->seq == SYNC_SNAPSHOT_SEQ_END && pSndBuf->end <= pSndBuf->start) {
if (snapshotSend(pSender) != 0) {
code = terrno;
goto _out;
}
}
_out:
taosThreadMutexUnlock(&pSndBuf->mutex);
return code;
}
// sender on message
//
// condition 1 sender receives SYNC_SNAPSHOT_SEQ_END, close sender
// condition 2 sender receives ack, set seq = ack + 1, send msg from seq
// condition 3 sender receives error msg, just print error log
//
int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
SyncSnapshotRsp *pMsg = pRpcMsg->pCont;
int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, SRpcMsg *pRpcMsg) {
SyncSnapshotRsp **ppMsg = (SyncSnapshotRsp **)&pRpcMsg->pCont;
SyncSnapshotRsp *pMsg = ppMsg[0];
// if already drop replica, do not process
if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) {
@ -1071,10 +1237,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
// check signature
int32_t order = 0;
if ((order = snapshotSenderSignatureCmp(pSender, pMsg)) > 0) {
sSError(pSender,
"received a stale snapshot rsp. ignore it"
"sender signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
pSender->term, pSender->startTime, pMsg->term, pMsg->startTime);
sSWarn(pSender, "ignore a stale snap rsp, msg signature:(%" PRId64 ", %" PRId64 ").", pMsg->term, pMsg->startTime);
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
return -1;
} else if (order < 0) {
@ -1083,9 +1246,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
goto _ERROR;
}
// state, term, seq/ack
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader");
sSError(pSender, "snapshot sender not leader");
terrno = TSDB_CODE_SYN_NOT_LEADER;
goto _ERROR;
@ -1093,83 +1254,46 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
SyncTerm currentTerm = raftStoreGetTerm(pSyncNode);
if (pMsg->term != currentTerm) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match");
sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term,
sSError(pSender, "snapshot sender term mismatch, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term,
currentTerm);
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
goto _ERROR;
}
if (pMsg->code != 0) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code");
sSError(pSender, "snapshot sender receive error:%s 0x%x and stop sender", tstrerror(pMsg->code), pMsg->code);
terrno = pMsg->code;
goto _ERROR;
}
// prepare <begin, end>, send begin msg
if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq pre-snapshot");
return syncNodeOnSnapshotPrepRsp(pSyncNode, pSender, pMsg);
}
if (pSender->pReader == NULL || pSender->finish) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender invalid");
sSError(pSender, "snapshot sender invalid error:%s 0x%x, pReader:%p finish:%d", tstrerror(pMsg->code), pMsg->code,
pSender->pReader, pSender->finish);
terrno = pMsg->code;
// send begin
if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PREP) {
sSInfo(pSender, "process prepare rsp");
if (syncNodeOnSnapshotPrepRsp(pSyncNode, pSender, pMsg) != 0) {
goto _ERROR;
}
if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq begin");
if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) {
return -1;
}
if (snapshotSend(pSender) != 0) {
return -1;
// send msg of data or end
if (pMsg->ack >= SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->ack < SYNC_SNAPSHOT_SEQ_END) {
if (syncSnapBufferSend(pSender, ppMsg) != 0) {
sSError(pSender, "failed to replicate snap since %s. seq:%d, pReader:%p, finish:%d", terrstr(), pSender->seq,
pSender->pReader, pSender->finish);
goto _ERROR;
}
return 0;
}
// receive ack is finish, close sender
// end
if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq end");
sSInfo(pSender, "process end rsp");
snapshotSenderStop(pSender, true);
syncNodeReplicateReset(pSyncNode, &pMsg->srcId);
return 0;
}
// send next msg
if (pMsg->ack == pSender->seq) {
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq data");
// update sender ack
if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) {
return -1;
}
if (snapshotSend(pSender) != 0) {
return -1;
}
} else if (pMsg->ack == pSender->seq - 1) {
// maybe resend
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq and resend");
if (snapshotReSend(pSender) != 0) {
return -1;
}
} else {
// error log
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error ack");
sSError(pSender, "snapshot sender receive error ack:%d, my seq:%d", pMsg->ack, pSender->seq);
snapshotSenderStop(pSender, true);
syncNodeReplicateReset(pSyncNode, &pMsg->srcId);
return -1;
}
return 0;
_ERROR:
snapshotSenderStop(pSender, true);
snapshotSenderStop(pSender, false);
syncNodeReplicateReset(pSyncNode, &pMsg->srcId);
return -1;
}

View File

@ -77,12 +77,19 @@ static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
for (int i = 0; i < ths->peersNum; ++i) {
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(ths->peersId[i]));
if (pSender != NULL) {
if (ths->isStart && ths->state == TAOS_SYNC_STATE_LEADER && pSender->start &&
timeNow - pSender->lastSendTime > SYNC_SNAP_RESEND_MS) {
snapshotReSend(pSender);
if (ths->isStart && ths->state == TAOS_SYNC_STATE_LEADER && pSender->start) {
int64_t elapsedMs = timeNow - pSender->lastSendTime;
if (elapsedMs < SYNC_SNAP_RESEND_MS) {
continue;
}
if (elapsedMs > SYNC_SNAP_TIMEOUT_MS) {
sSError(pSender, "snap replication timeout, terminate.");
snapshotSenderStop(pSender, false);
} else {
sTrace("vgId:%d, do not resend: nstart%d, now:%" PRId64 ", lstsend:%" PRId64 ", diff:%" PRId64, ths->vgId,
ths->isStart, timeNow, pSender->lastSendTime, timeNow - pSender->lastSendTime);
sSWarn(pSender, "snap replication resend.");
snapshotReSend(pSender);
}
}
}
}

View File

@ -267,21 +267,23 @@ void syncPrintSnapshotSenderLog(const char* flags, ELogLevel level, int32_t dfla
va_end(argpointer);
taosPrintLog(flags, level, dflag,
"vgId:%d, %s, sync:%s, snap-sender:{%p start:%" PRId64 " end:%" PRId64 " last-index:%" PRId64
" last-term:%" PRIu64 " last-cfg:%" PRId64
", seq:%d ack:%d finish:%d, as:%d dnode:%d}"
"vgId:%d, %s, sync:%s, snap-sender:%p signature:(%" PRId64 ", %" PRId64 "), {start:%" PRId64
" end:%" PRId64 " last-index:%" PRId64 " last-term:%" PRIu64 " last-cfg:%" PRId64
", seq:%d, ack:%d, "
" buf:[%" PRId64 " %" PRId64 ", %" PRId64
"), finish:%d, as:%d, to-dnode:%d}"
", term:%" PRIu64 ", commit-index:%" PRId64 ", firstver:%" PRId64 ", lastver:%" PRId64
", min-match:%" PRId64 ", snap:{last-index:%" PRId64 ", term:%" PRIu64
"}, standby:%d, batch-sz:%d, replicas:%d, last-cfg:%" PRId64
", chging:%d, restore:%d, quorum:%d, lc-timer:{elect:%" PRId64 ", hb:%" PRId64 "}, peer:%s, cfg:%s",
pNode->vgId, eventLog, syncStr(pNode->state), pSender, pSender->snapshotParam.start,
pSender->snapshotParam.end, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm,
pSender->snapshot.lastConfigIndex, pSender->seq, pSender->ack, pSender->finish, pSender->replicaIndex,
DID(&pNode->replicasId[pSender->replicaIndex]), raftStoreGetTerm(pNode), pNode->commitIndex,
logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pNode->raftCfg.isStandBy, pNode->raftCfg.batchSize, pNode->replicaNum, pNode->raftCfg.lastConfigIndex,
pNode->changing, pNode->restoreFinish, syncNodeDynamicQuorum(pNode), pNode->electTimerLogicClock,
pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr);
", chging:%d, restore:%d, quorum:%d, peer:%s, cfg:%s",
pNode->vgId, eventLog, syncStr(pNode->state), pSender, pSender->term, pSender->startTime,
pSender->snapshotParam.start, pSender->snapshotParam.end, pSender->snapshot.lastApplyIndex,
pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->seq, pSender->ack,
pSender->pSndBuf->start, pSender->pSndBuf->cursor, pSender->pSndBuf->end, pSender->finish,
pSender->replicaIndex, DID(&pNode->replicasId[pSender->replicaIndex]), raftStoreGetTerm(pNode),
pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex,
snapshot.lastApplyTerm, pNode->raftCfg.isStandBy, pNode->raftCfg.batchSize, pNode->replicaNum,
pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, pNode->quorum, peerStr, cfgStr);
}
void syncPrintSnapshotReceiverLog(const char* flags, ELogLevel level, int32_t dflag, SSyncSnapshotReceiver* pReceiver,
@ -316,19 +318,21 @@ void syncPrintSnapshotReceiverLog(const char* flags, ELogLevel level, int32_t df
taosPrintLog(
flags, level, dflag,
"vgId:%d, %s, sync:%s,"
" snap-receiver:{%p started:%d acked:%d term:%" PRIu64 " start-time:%" PRId64 " from-dnode:%d, start:%" PRId64
" end:%" PRId64 " last-index:%" PRId64 " last-term:%" PRIu64 " last-cfg:%" PRId64
" snap-receiver:%p signature:(%" PRId64 ", %" PRId64 "), {start:%d ack:%d buf:[%" PRId64 " %" PRId64 ", %" PRId64
")"
" from-dnode:%d, start:%" PRId64 " end:%" PRId64 " last-index:%" PRId64 " last-term:%" PRIu64 " last-cfg:%" PRId64
"}"
", term:%" PRIu64 ", commit-index:%" PRId64 ", firstver:%" PRId64 ", lastver:%" PRId64 ", min-match:%" PRId64
", snap:{last-index:%" PRId64 ", last-term:%" PRIu64 "}, standby:%d, batch-sz:%d, replicas:%d, last-cfg:%" PRId64
", chging:%d, restore:%d, quorum:%d, lc-timers:{elect:%" PRId64 ", hb:%" PRId64 "}, peer:%s, cfg:%s",
pNode->vgId, eventLog, syncStr(pNode->state), pReceiver, pReceiver->start, pReceiver->ack, pReceiver->term,
pReceiver->startTime, DID(&pReceiver->fromId), pReceiver->snapshotParam.start, pReceiver->snapshotParam.end,
", chging:%d, restore:%d, quorum:%d, peer:%s, cfg:%s",
pNode->vgId, eventLog, syncStr(pNode->state), pReceiver, pReceiver->term, pReceiver->startTime, pReceiver->start,
pReceiver->ack, pReceiver->pRcvBuf->start, pReceiver->pRcvBuf->cursor, pReceiver->pRcvBuf->end,
DID(&pReceiver->fromId), pReceiver->snapshotParam.start, pReceiver->snapshotParam.end,
pReceiver->snapshot.lastApplyIndex, pReceiver->snapshot.lastApplyTerm, pReceiver->snapshot.lastConfigIndex,
raftStoreGetTerm(pNode), pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex,
snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->raftCfg.isStandBy, pNode->raftCfg.batchSize,
pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish,
syncNodeDynamicQuorum(pNode), pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr);
pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, pNode->quorum, peerStr,
cfgStr);
}
void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* s) {

View File

@ -110,7 +110,9 @@ int32_t tfsAllocDiskOnTier(STfsTier *pTier) {
}
int32_t retId = -1;
int64_t avail = 0;
for (int32_t id = 0; id < TFS_MAX_DISKS_PER_TIER; ++id) {
#if 0 // round-robin
int32_t diskId = (pTier->nextid + id) % pTier->ndisk;
STfsDisk *pDisk = pTier->disks[diskId];
@ -126,6 +128,18 @@ int32_t tfsAllocDiskOnTier(STfsTier *pTier) {
terrno = 0;
pTier->nextid = (diskId + 1) % pTier->ndisk;
break;
#else // select the disk with the most available space
STfsDisk *pDisk = pTier->disks[id];
if (pDisk == NULL) continue;
if (pDisk->size.avail < tsMinDiskFreeSize) continue;
if (pDisk->size.avail > avail) {
avail = pDisk->size.avail;
retId = id;
terrno = 0;
}
#endif
}
tfsUnLockTier(pTier);

View File

@ -8,7 +8,7 @@ target_link_libraries(
PUBLIC gtest_main
)
add_test(
NAME tfs_test
COMMAND tfs_test
)
# add_test(
# NAME tfs_test
# COMMAND tfs_test
# )

View File

@ -477,7 +477,6 @@ struct tm *taosLocalTime(const time_t *timep, struct tm *result, char *buf) {
return res;
}
#ifdef WINDOWS
if (*timep < 0) {
if (*timep < -2208988800LL) {
if (buf != NULL) {
sprintf(buf, "NaN");
@ -490,7 +489,7 @@ struct tm *taosLocalTime(const time_t *timep, struct tm *result, char *buf) {
LARGE_INTEGER offset;
struct tm tm1;
time_t tt = 0;
if (localtime_s(&tm1, &tt) != 0 ) {
if (localtime_s(&tm1, &tt) != 0) {
if (buf != NULL) {
sprintf(buf, "NaN");
}
@ -510,14 +509,6 @@ struct tm *taosLocalTime(const time_t *timep, struct tm *result, char *buf) {
result->tm_wday = s.wDayOfWeek;
result->tm_yday = 0;
result->tm_isdst = 0;
} else {
if (localtime_s(result, timep) != 0) {
if (buf != NULL) {
sprintf(buf, "NaN");
}
return NULL;
}
}
#else
res = localtime_r(timep, result);
if (res == NULL && buf != NULL) {

View File

@ -114,10 +114,6 @@ TEST(osTimeTests, taosLocalTime) {
ASSERT_EQ(local_time->tm_min, 0);
ASSERT_EQ(local_time->tm_sec, 0);
time_t over_timep = 6406301441633558;
local_time = taosLocalTime(&over_timep, &result, NULL);
ASSERT_EQ(local_time, nullptr);
time_t neg_timep3 = -78115158887;
local_time = taosLocalTime(&neg_timep3, &result, NULL);
ASSERT_EQ(local_time, nullptr);

View File

@ -518,6 +518,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_PORT, "Port should be an in
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_ENDPOINT, "Endpoint should be in the format of 'fqdn:port'")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_EXPRIE_STATEMENT, "This statement is no longer supported")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_VALUE_TOO_SMALL, "Interval too small")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTER_VALUE_TOO_BIG, "Interval too big")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_DB_NOT_SPECIFIED, "Database not specified")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_IDENTIFIER_NAME, "Invalid identifier name")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_CORRESPONDING_STABLE_ERR, "Corresponding super table not in this db")

View File

@ -1048,6 +1048,7 @@ e
,,y,script,./test.sh -f tsim/query/tableCount.sim
,,y,script,./test.sh -f tsim/query/show_db_table_kind.sim
,,y,script,./test.sh -f tsim/query/bi_star_table.sim
,,y,script,./test.sh -f tsim/query/bi_tag_scan.sim
,,y,script,./test.sh -f tsim/query/tag_scan.sim
,,y,script,./test.sh -f tsim/query/nullColSma.sim
,,y,script,./test.sh -f tsim/query/bug3398.sim

View File

@ -69,7 +69,7 @@ docker run \
-v ${REP_REAL_PATH}/community/contrib/libuv/:${REP_DIR}/community/contrib/libuv \
-v ${REP_REAL_PATH}/community/contrib/lz4/:${REP_DIR}/community/contrib/lz4 \
-v ${REP_REAL_PATH}/community/contrib/zlib/:${REP_DIR}/community/contrib/zlib \
--rm --ulimit core=-1 taos_test:v1.0 sh -c "pip uninstall taospy -y;pip3 install taospy==2.7.2;cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_TAOSX=true -DJEMALLOC_ENABLED=0;make -j 10|| exit 1"
--rm --ulimit core=-1 taos_test:v1.0 sh -c "pip uninstall taospy -y;pip3 install taospy==2.7.2;cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_TAOSX=false -DJEMALLOC_ENABLED=0;make -j 10|| exit 1"
# -v ${REP_REAL_PATH}/community/contrib/jemalloc/:${REP_DIR}/community/contrib/jemalloc \
if [[ -d ${WORKDIR}/debugNoSan ]] ;then
@ -99,7 +99,7 @@ docker run \
-v ${REP_REAL_PATH}/community/contrib/lz4/:${REP_DIR}/community/contrib/lz4 \
-v ${REP_REAL_PATH}/community/contrib/zlib/:${REP_DIR}/community/contrib/zlib \
-v ${REP_REAL_PATH}/community/contrib/jemalloc/:${REP_DIR}/community/contrib/jemalloc \
--rm --ulimit core=-1 taos_test:v1.0 sh -c "pip uninstall taospy -y;pip3 install taospy==2.7.2;cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug -DBUILD_TAOSX=true -DJEMALLOC_ENABLED=0;make -j 10|| exit 1 "
--rm --ulimit core=-1 taos_test:v1.0 sh -c "pip uninstall taospy -y;pip3 install taospy==2.7.2;cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug -DBUILD_TAOSX=false -DJEMALLOC_ENABLED=0;make -j 10|| exit 1 "
mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugSan

View File

@ -0,0 +1,31 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/exec.sh -n dnode1 -s start
sql connect
sql drop database if exists db1;
sql create database db1 vgroups 3;
sql create database db1;
sql use db1;
sql create stable sta (ts timestamp, f1 int, f2 binary(200)) tags(t1 int, t2 int, t3 int);
sql create stable stb (ts timestamp, f1 int, f2 binary(200)) tags(t1 int, t2 int, t3 int);
sql create table tba1 using sta tags(1, 1, 1);
sql create table tba2 using sta tags(2, 2, 2);
sql insert into tba1 values(now, 1, "1")(now+3s, 3, "3")(now+5s, 5, "5");
sql insert into tba2 values(now + 1s, 2, "2")(now+2s, 2, "2")(now+4s, 4, "4");
sql create table tbn1 (ts timestamp, f1 int);
set_bi_mode 1
sql select t1,t2,t3 from db1.sta order by t1;
print $rows $data00 $data10
if $rows != 2 then
return -1
endi
if $data00 != 1 then
return -1
endi
if $data10 != 2 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -21,6 +21,7 @@ sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 in
sql create stream streams2 trigger at_once watermark 1d IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt2 as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from t1 interval(10s) sliding (5s);
sql create stream stream_t1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamtST as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from st interval(10s) sliding (5s);
sql create stream stream_t2 trigger at_once watermark 1d IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamtST2 as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from st interval(10s) sliding (5s);
sleep 1000
sql insert into t1 values(1648791210000,1,2,3,1.0);
sql insert into t1 values(1648791216000,2,2,3,1.1);
@ -311,6 +312,7 @@ sql create table t2 using st tags(2,2,2);
sql create stream streams11 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from t1 interval(10s, 5s);
sql create stream streams12 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt2 as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from st interval(10s, 5s);
sleep 1000
sql insert into t1 values(1648791213000,1,2,3,1.0);
sql insert into t1 values(1648791223001,2,2,3,1.1);
@ -444,6 +446,7 @@ sql create table t2 using st tags(2,2,2);
sql create stream streams21 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt21 as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from t1 interval(10s, 5s);
sql create stream streams22 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt22 as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from st interval(10s, 5s);
sleep 1000
sql insert into t1 values(1648791213000,1,1,1,1.0);
sql insert into t1 values(1648791223001,2,2,2,1.1);
@ -582,6 +585,7 @@ sql create table t1 using st tags(1,1,1);
sql create table t2 using st tags(2,2,2);
sql create stream streams23 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt23 as select _wstart, count(*) c1, sum(a) c3 , max(b) c4, min(c) c5 from st interval(20s) sliding(10s);
sleep 1000
sql insert into t1 values(1648791213000,1,1,1,1.0);
sql insert into t1 values(1648791223001,2,2,2,1.1);
@ -706,6 +710,7 @@ sql create table t1 using st tags(1,1,1);
sql create table t2 using st tags(2,2,2);
sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt4 as select _wstart as ts, count(*),min(a) c1 from st interval(10s) sliding(5s);
sleep 1000
sql insert into t1 values(1648791213000,1,1,1,1.0);
sql insert into t1 values(1648791243000,2,1,1,1.0);