refactor: vnode

This commit is contained in:
Hongze Cheng 2022-04-27 07:04:56 +00:00
parent 0b0bdf2f7a
commit 09ff7f6aa0
11 changed files with 129 additions and 3203 deletions

View File

@ -27,10 +27,9 @@ target_sources(
"src/tsdb/tsdbTDBImpl.c"
"src/tsdb/tsdbCommit.c"
"src/tsdb/tsdbCommit2.c"
"src/tsdb/tsdbCompact.c"
"src/tsdb/tsdbFile.c"
"src/tsdb/tsdbFS.c"
"src/tsdb/tsdbMain.c"
"src/tsdb/tsdbOpen.c"
"src/tsdb/tsdbMemTable.c"
"src/tsdb/tsdbRead.c"
"src/tsdb/tsdbReadImpl.c"

View File

@ -76,6 +76,7 @@ typedef struct SMetaEntry SMetaEntry;
void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags);
void metaReaderClear(SMetaReader *pReader);
int metaGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid);
int metaReadNext(SMetaReader *pReader);
#if 1 // refact APIs below (TODO)

View File

@ -46,11 +46,44 @@ int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKe
// tsdbCommit ================
// tsdbFS ================
typedef struct STsdbFS STsdbFS;
// tsdbSma ================
typedef struct SSmaEnv SSmaEnv;
typedef struct SSmaEnvs SSmaEnvs;
// structs
typedef struct {
int minFid;
int midFid;
int maxFid;
TSKEY minKey;
} SRtn;
struct SSmaEnvs {
int16_t nTSma;
int16_t nRSma;
SSmaEnv *pTSmaEnv;
SSmaEnv *pRSmaEnv;
};
struct STsdb {
char *path;
SVnode *pVnode;
bool repoLocked;
TdThreadMutex mutex;
STsdbCfg config;
STsdbMemTable *mem;
STsdbMemTable *imem;
SRtn rtn;
STsdbFS *fs;
SSmaEnvs smaEnvs;
};
#if 1 // ======================================
typedef struct SSmaStat SSmaStat;
typedef struct SSmaEnv SSmaEnv;
typedef struct SSmaEnvs SSmaEnvs;
struct STable {
uint64_t tid;
@ -97,13 +130,6 @@ typedef struct {
uint8_t state;
} SDFile;
struct SSmaEnvs {
int16_t nTSma;
int16_t nRSma;
SSmaEnv *pTSmaEnv;
SSmaEnv *pRSmaEnv;
};
typedef struct {
int fid;
int8_t state; // -128~127
@ -112,13 +138,6 @@ typedef struct {
SDFile files[TSDB_FILE_MAX];
} SDFileSet;
typedef struct {
int minFid;
int midFid;
int maxFid;
TSKEY minKey;
} SRtn;
struct STbData {
tb_uid_t uid;
TSKEY keyMin;
@ -155,7 +174,7 @@ typedef struct {
SArray *sf; // sma data file array v2f1900.index_name_1
} SFSStatus;
typedef struct {
struct STsdbFS {
TdThreadRwlock lock;
SFSStatus *cstatus; // current status
@ -163,19 +182,6 @@ typedef struct {
SHashObj *metaCacheComp; // meta cache for compact
bool intxn;
SFSStatus *nstatus; // new status
} STsdbFS;
struct STsdb {
char *path;
SVnode *pVnode;
bool repoLocked;
TdThreadMutex mutex;
STsdbCfg config;
STsdbMemTable *mem;
STsdbMemTable *imem;
SRtn rtn;
STsdbFS *fs;
SSmaEnvs smaEnvs;
};
#define REPO_ID(r) TD_VID((r)->pVnode)
@ -506,12 +512,6 @@ static FORCE_INLINE void *taosTZfree(void *ptr) {
// tsdbCommit
typedef struct {
uint64_t uid;
int64_t offset;
int64_t size;
} SKVRecord;
void tsdbGetRtnSnap(STsdb *pRepo, SRtn *pRtn);
static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) {

View File

@ -75,7 +75,6 @@ int metaCreateSTable(SMeta* pMeta, int64_t version, SVCreateStbReq*
int metaCreateTable(SMeta* pMeta, int64_t version, SVCreateTbReq* pReq);
SSchemaWrapper* metaGetTableSchema(SMeta* pMeta, tb_uid_t uid, int32_t sver, bool isinline);
STSchema* metaGetTbTSchema(SMeta* pMeta, tb_uid_t uid, int32_t sver);
int metaGetTableEntryByUid(SMetaReader* pReader, tb_uid_t uid);
int metaGetTableEntryByName(SMetaReader* pReader, const char* name);
int metaGetTbNum(SMeta* pMeta);
SMCtbCursor* metaOpenCtbCursor(SMeta* pMeta, tb_uid_t uid);

View File

@ -752,334 +752,7 @@ int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) {
return 0;
}
// // =================== Commit Meta Data
// static int tsdbInitCommitMetaFile(STsdbRepo *pRepo, SMFile* pMf, bool open) {
// STsdbFS * pfs = REPO_FS(pRepo);
// SMFile * pOMFile = pfs->cstatus->pmf;
// SDiskID did;
// // Create/Open a meta file or open the existing file
// if (pOMFile == NULL) {
// // Create a new meta file
// did.level = TFS_PRIMARY_LEVEL;
// did.id = TFS_PRIMARY_ID;
// tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
// if (open && tsdbCreateMFile(pMf, true) < 0) {
// tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf));
// } else {
// tsdbInitMFileEx(pMf, pOMFile);
// if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) {
// tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// }
// return 0;
// }
// static int tsdbCommitMeta(STsdbRepo *pRepo) {
// STsdbFS * pfs = REPO_FS(pRepo);
// SMemTable *pMem = pRepo->imem;
// SMFile * pOMFile = pfs->cstatus->pmf;
// SMFile mf;
// SActObj * pAct = NULL;
// SActCont * pCont = NULL;
// SListNode *pNode = NULL;
// ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0);
// if (listNEles(pMem->actList) <= 0) {
// // no meta data to commit, just keep the old meta file
// tsdbUpdateMFile(pfs, pOMFile);
// if (tsTsdbMetaCompactRatio > 0) {
// if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) {
// return -1;
// }
// int ret = tsdbCompactMetaFile(pRepo, pfs, &mf);
// if (ret < 0) tsdbError("compact meta file error");
// return ret;
// }
// return 0;
// } else {
// if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) {
// return -1;
// }
// }
// // Loop to write
// while ((pNode = tdListPopHead(pMem->actList)) != NULL) {
// pAct = (SActObj *)pNode->data;
// if (pAct->act == TSDB_UPDATE_META) {
// pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj));
// if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) {
// tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
// tstrerror(terrno));
// tsdbCloseMFile(&mf);
// (void)tsdbApplyMFileChange(&mf, pOMFile);
// // TODO: need to reload metaCache
// return -1;
// }
// } else if (pAct->act == TSDB_DROP_META) {
// if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) {
// tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
// tstrerror(terrno));
// tsdbCloseMFile(&mf);
// tsdbApplyMFileChange(&mf, pOMFile);
// // TODO: need to reload metaCache
// return -1;
// }
// } else {
// ASSERT(false);
// }
// }
// if (tsdbUpdateMFileHeader(&mf) < 0) {
// tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
// tsdbApplyMFileChange(&mf, pOMFile);
// // TODO: need to reload metaCache
// return -1;
// }
// TSDB_FILE_FSYNC(&mf);
// tsdbCloseMFile(&mf);
// tsdbUpdateMFile(pfs, &mf);
// if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) {
// tsdbError("compact meta file error");
// }
// return 0;
// }
// int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) {
// int tlen = 0;
// tlen += taosEncodeFixedU64(buf, pRecord->uid);
// tlen += taosEncodeFixedI64(buf, pRecord->offset);
// tlen += taosEncodeFixedI64(buf, pRecord->size);
// return tlen;
// }
// void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) {
// buf = taosDecodeFixedU64(buf, &(pRecord->uid));
// buf = taosDecodeFixedI64(buf, &(pRecord->offset));
// buf = taosDecodeFixedI64(buf, &(pRecord->size));
// return buf;
// }
// static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) {
// char buf[64] = "\0";
// void * pBuf = buf;
// SKVRecord rInfo;
// int64_t offset;
// // Seek to end of meta file
// offset = tsdbSeekMFile(pMFile, 0, SEEK_END);
// if (offset < 0) {
// return -1;
// }
// rInfo.offset = offset;
// rInfo.uid = uid;
// rInfo.size = contLen;
// int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo);
// if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) {
// return -1;
// }
// if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) {
// return -1;
// }
// tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM)));
// SHashObj* cache = compact ? pfs->metaCacheComp : pfs->metaCache;
// pMFile->info.nRecords++;
// SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid));
// if (pRecord != NULL) {
// pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord));
// } else {
// pMFile->info.nRecords++;
// }
// taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo));
// return 0;
// }
// static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) {
// SKVRecord rInfo = {0};
// char buf[128] = "\0";
// SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid));
// if (pRecord == NULL) {
// tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid);
// return -1;
// }
// rInfo.offset = -pRecord->offset;
// rInfo.uid = pRecord->uid;
// rInfo.size = pRecord->size;
// void *pBuf = buf;
// tsdbEncodeKVRecord(&pBuf, &rInfo);
// if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) {
// return -1;
// }
// pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord));
// pMFile->info.nDels++;
// pMFile->info.nRecords--;
// pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
// taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid));
// return 0;
// }
// static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile) {
// float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords);
// float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size);
// float compactRatio = (float)(tsTsdbMetaCompactRatio)/100;
// if (delPercent < compactRatio && tombPercent < compactRatio) {
// return 0;
// }
// if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
// tsdbError("open meta file %s compact fail", pMFile->f.rname);
// return -1;
// }
// tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64
// ",size:%" PRId64,
// tsTsdbMetaCompactRatio, pMFile->info.nDels,pMFile->info.nRecords,pMFile->info.tombSize,pMFile->info.size);
// SMFile mf;
// SDiskID did;
// // first create tmp meta file
// did.level = TFS_PRIMARY_LEVEL;
// did.id = TFS_PRIMARY_ID;
// tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1);
// if (tsdbCreateMFile(&mf, true) < 0) {
// tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// // second iterator metaCache
// int code = -1;
// int64_t maxBufSize = 1024;
// SKVRecord *pRecord;
// void *pBuf = NULL;
// pBuf = taosMemoryMalloc((size_t)maxBufSize);
// if (pBuf == NULL) {
// goto _err;
// }
// // init Comp
// assert(pfs->metaCacheComp == NULL);
// pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
// if (pfs->metaCacheComp == NULL) {
// goto _err;
// }
// pRecord = taosHashIterate(pfs->metaCache, NULL);
// while (pRecord) {
// if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
// tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// goto _err;
// }
// if (pRecord->size > maxBufSize) {
// maxBufSize = pRecord->size;
// void* tmp = taosMemoryRealloc(pBuf, (size_t)maxBufSize);
// if (tmp == NULL) {
// goto _err;
// }
// pBuf = tmp;
// }
// int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
// if (nread < 0) {
// tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// goto _err;
// }
// if (nread < pRecord->size) {
// tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
// REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
// goto _err;
// }
// if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) {
// tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid,
// tstrerror(terrno));
// goto _err;
// }
// pRecord = taosHashIterate(pfs->metaCache, pRecord);
// }
// code = 0;
// _err:
// if (code == 0) TSDB_FILE_FSYNC(&mf);
// tsdbCloseMFile(&mf);
// tsdbCloseMFile(pMFile);
// if (code == 0) {
// // rename meta.tmp -> meta
// tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf),
// TSDB_FILE_FULL_NAME(pMFile)); taosRename(mf.f.aname,pMFile->f.aname); tstrncpy(mf.f.aname, pMFile->f.aname,
// TSDB_FILENAME_LEN); tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN);
// // update current meta file info
// pfs->nstatus->pmf = NULL;
// tsdbUpdateMFile(pfs, &mf);
// taosHashCleanup(pfs->metaCache);
// pfs->metaCache = pfs->metaCacheComp;
// pfs->metaCacheComp = NULL;
// } else {
// // remove meta.tmp file
// taosRemoveFile(mf.f.aname);
// taosHashCleanup(pfs->metaCacheComp);
// pfs->metaCacheComp = NULL;
// }
// taosMemoryFreeClear(pBuf);
// ASSERT(mf.info.nDels == 0);
// ASSERT(mf.info.tombSize == 0);
// tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64,
// code,mf.info.nRecords,mf.info.size);
// return code;
// }
// // =================== Commit Time-Series Data
// #if 0
// static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) {
// for (int i = 0; i < nIters; i++) {
// TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter);
// if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true;
// }
// return false;
// }
// #endif
// =================== Commit Time-Series Data
static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
SCommitIter *pIter = pCommith->iters + tid;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);

View File

@ -1,533 +0,0 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#if 0
#include "tsdb.h"
typedef struct {
STable * pTable;
SBlockIdx * pBlkIdx;
SBlockIdx bindex;
SBlockInfo *pInfo;
} STableCompactH;
typedef struct {
SRtn rtn;
SFSIter fsIter;
SArray * tbArray; // table array to cache table obj and block indexes
SReadH readh;
SDFileSet wSet;
SArray * aBlkIdx;
SArray * aSupBlk;
SDataCols *pDataCols;
} SCompactH;
#define TSDB_COMPACT_WSET(pComph) (&((pComph)->wSet))
#define TSDB_COMPACT_REPO(pComph) TSDB_READ_REPO(&((pComph)->readh))
#define TSDB_COMPACT_HEAD_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_HEAD)
#define TSDB_COMPACT_DATA_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_DATA)
#define TSDB_COMPACT_LAST_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_LAST)
#define TSDB_COMPACT_SMAD_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_SMAD)
#define TSDB_COMPACT_SMAL_FILE(pComph) TSDB_DFILE_IN_SET(TSDB_COMPACT_WSET(pComph), TSDB_FILE_SMAL)
#define TSDB_COMPACT_BUF(pComph) TSDB_READ_BUF(&((pComph)->readh))
#define TSDB_COMPACT_COMP_BUF(pComph) TSDB_READ_COMP_BUF(&((pComph)->readh))
static int tsdbAsyncCompact(STsdbRepo *pRepo);
static void tsdbStartCompact(STsdbRepo *pRepo);
static void tsdbEndCompact(STsdbRepo *pRepo, int eno);
static int tsdbCompactMeta(STsdbRepo *pRepo);
static int tsdbCompactTSData(STsdbRepo *pRepo);
static int tsdbCompactFSet(SCompactH *pComph, SDFileSet *pSet);
static bool tsdbShouldCompact(SCompactH *pComph);
static int tsdbInitCompactH(SCompactH *pComph, STsdbRepo *pRepo);
static void tsdbDestroyCompactH(SCompactH *pComph);
static int tsdbInitCompTbArray(SCompactH *pComph);
static void tsdbDestroyCompTbArray(SCompactH *pComph);
static int tsdbCacheFSetIndex(SCompactH *pComph);
static int tsdbCompactFSetInit(SCompactH *pComph, SDFileSet *pSet);
static void tsdbCompactFSetEnd(SCompactH *pComph);
static int tsdbCompactFSetImpl(SCompactH *pComph);
static int tsdbWriteBlockToRightFile(SCompactH *pComph, STable *pTable, SDataCols *pDataCols, void **ppBuf,
void **ppCBuf);
enum { TSDB_NO_COMPACT, TSDB_IN_COMPACT, TSDB_WAITING_COMPACT};
int tsdbCompact(STsdbRepo *pRepo) { return tsdbAsyncCompact(pRepo); }
void *tsdbCompactImpl(STsdbRepo *pRepo) {
// Check if there are files in TSDB FS to compact
if (REPO_FS(pRepo)->cstatus->pmf == NULL) {
tsdbInfo("vgId:%d no file to compact in FS", REPO_ID(pRepo));
return NULL;
}
tsdbStartCompact(pRepo);
if (tsdbCompactMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to compact META data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
if (tsdbCompactTSData(pRepo) < 0) {
tsdbError("vgId:%d failed to compact TS data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
tsdbEndCompact(pRepo, TSDB_CODE_SUCCESS);
return NULL;
_err:
pRepo->code = terrno;
tsdbEndCompact(pRepo, terrno);
return NULL;
}
static int tsdbAsyncCompact(STsdbRepo *pRepo) {
if (pRepo->compactState != TSDB_NO_COMPACT) {
tsdbInfo("vgId:%d not compact tsdb again ", REPO_ID(pRepo));
return 0;
}
pRepo->compactState = TSDB_WAITING_COMPACT;
tsem_wait(&(pRepo->readyToCommit));
return tsdbScheduleCommit(pRepo, COMPACT_REQ);
}
static void tsdbStartCompact(STsdbRepo *pRepo) {
assert(pRepo->compactState != TSDB_IN_COMPACT);
tsdbInfo("vgId:%d start to compact!", REPO_ID(pRepo));
tsdbStartFSTxn(pRepo, 0, 0);
pRepo->code = TSDB_CODE_SUCCESS;
pRepo->compactState = TSDB_IN_COMPACT;
}
static void tsdbEndCompact(STsdbRepo *pRepo, int eno) {
if (eno != TSDB_CODE_SUCCESS) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
} else {
tsdbEndFSTxn(pRepo);
}
pRepo->compactState = TSDB_NO_COMPACT;
tsdbInfo("vgId:%d compact over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
tsem_post(&(pRepo->readyToCommit));
}
static int tsdbCompactMeta(STsdbRepo *pRepo) {
STsdbFS *pfs = REPO_FS(pRepo);
tsdbUpdateMFile(pfs, pfs->cstatus->pmf);
return 0;
}
static int tsdbCompactTSData(STsdbRepo *pRepo) {
SCompactH compactH;
SDFileSet *pSet = NULL;
tsdbDebug("vgId:%d start to compact TS data", REPO_ID(pRepo));
// If no file, just return 0;
if (taosArrayGetSize(REPO_FS(pRepo)->cstatus->df) <= 0) {
tsdbDebug("vgId:%d no TS data file to compact, compact over", REPO_ID(pRepo));
return 0;
}
if (tsdbInitCompactH(&compactH, pRepo) < 0) {
return -1;
}
while ((pSet = tsdbFSIterNext(&(compactH.fsIter)))) {
// Remove those expired files
if (pSet->fid < compactH.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
continue;
}
if (TSDB_FSET_LEVEL(pSet) == TFS_MAX_LEVEL) {
tsdbDebug("vgId:%d FSET %d on level %d, should not compact", REPO_ID(pRepo), pSet->fid, TFS_MAX_LEVEL);
tsdbUpdateDFileSet(REPO_FS(pRepo), pSet);
continue;
}
if (tsdbCompactFSet(&compactH, pSet) < 0) {
tsdbDestroyCompactH(&compactH);
tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno));
return -1;
}
}
tsdbDestroyCompactH(&compactH);
tsdbDebug("vgId:%d compact TS data over", REPO_ID(pRepo));
return 0;
}
static int tsdbCompactFSet(SCompactH *pComph, SDFileSet *pSet) {
STsdbRepo *pRepo = TSDB_COMPACT_REPO(pComph);
SDiskID did;
tsdbDebug("vgId:%d start to compact FSET %d on level %d id %d", REPO_ID(pRepo), pSet->fid, TSDB_FSET_LEVEL(pSet),
TSDB_FSET_ID(pSet));
if (tsdbCompactFSetInit(pComph, pSet) < 0) {
return -1;
}
if (!tsdbShouldCompact(pComph)) {
tsdbDebug("vgId:%d no need to compact FSET %d", REPO_ID(pRepo), pSet->fid);
if (tsdbApplyRtnOnFSet(TSDB_COMPACT_REPO(pComph), pSet, &(pComph->rtn)) < 0) {
tsdbCompactFSetEnd(pComph);
return -1;
}
} else {
// Create new fset as compacted fset
if (tfsAllocDisk(pRepo->pTfs, tsdbGetFidLevel(pSet->fid, &(pComph->rtn)), &did) < 0) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno));
tsdbCompactFSetEnd(pComph);
return -1;
}
tsdbInitDFileSet(TSDB_COMPACT_WSET(pComph), did, REPO_ID(pRepo), TSDB_FSET_FID(pSet),
FS_TXN_VERSION(REPO_FS(pRepo)));
if (tsdbCreateDFileSet(TSDB_COMPACT_WSET(pComph), true) < 0) {
tsdbError("vgId:%d failed to compact FSET %d since %s", REPO_ID(pRepo), pSet->fid, tstrerror(terrno));
tsdbCompactFSetEnd(pComph);
return -1;
}
if (tsdbCompactFSetImpl(pComph) < 0) {
tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph));
tsdbRemoveDFileSet(TSDB_COMPACT_WSET(pComph));
tsdbCompactFSetEnd(pComph);
return -1;
}
tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph));
tsdbUpdateDFileSet(REPO_FS(pRepo), TSDB_COMPACT_WSET(pComph));
tsdbDebug("vgId:%d FSET %d compact over", REPO_ID(pRepo), pSet->fid);
}
tsdbCompactFSetEnd(pComph);
return 0;
}
static bool tsdbShouldCompact(SCompactH *pComph) {
STsdbRepo * pRepo = TSDB_COMPACT_REPO(pComph);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SReadH * pReadh = &(pComph->readh);
STableCompactH *pTh;
SBlock * pBlock;
int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
SDFile * pDataF = TSDB_READ_DATA_FILE(pReadh);
SDFile * pLastF = TSDB_READ_LAST_FILE(pReadh);
int tblocks = 0; // total blocks
int nSubBlocks = 0; // # of blocks with sub-blocks
int nSmallBlocks = 0; // # of blocks with rows < defaultRows
int64_t tsize = 0;
for (size_t i = 0; i < taosArrayGetSize(pComph->tbArray); i++) {
pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, i);
if (pTh->pTable == NULL || pTh->pBlkIdx == NULL) continue;
for (size_t bidx = 0; bidx < pTh->pBlkIdx->numOfBlocks; bidx++) {
tblocks++;
pBlock = pTh->pInfo->blocks + bidx;
if (pBlock->numOfRows < defaultRows) {
nSmallBlocks++;
}
if (pBlock->numOfSubBlocks > 1) {
nSubBlocks++;
for (int k = 0; k < pBlock->numOfSubBlocks; k++) {
SBlock *iBlock = ((SBlock *)POINTER_SHIFT(pTh->pInfo, pBlock->offset)) + k;
tsize = tsize + iBlock->len;
}
} else if (pBlock->numOfSubBlocks == 1) {
tsize += pBlock->len;
} else {
ASSERT(0);
}
}
}
return (((nSubBlocks * 1.0 / tblocks) > 0.33) || ((nSmallBlocks * 1.0 / tblocks) > 0.33) ||
(tsize * 1.0 / (pDataF->info.size + pLastF->info.size - 2 * TSDB_FILE_HEAD_SIZE) < 0.85));
}
static int tsdbInitCompactH(SCompactH *pComph, STsdbRepo *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
memset(pComph, 0, sizeof(*pComph));
TSDB_FSET_SET_CLOSED(TSDB_COMPACT_WSET(pComph));
tsdbGetRtnSnap(pRepo, &(pComph->rtn));
tsdbFSIterInit(&(pComph->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
if (tsdbInitReadH(&(pComph->readh), pRepo) < 0) {
return -1;
}
if (tsdbInitCompTbArray(pComph) < 0) {
tsdbDestroyCompactH(pComph);
return -1;
}
pComph->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pComph->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCompactH(pComph);
return -1;
}
pComph->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pComph->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCompactH(pComph);
return -1;
}
pComph->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pComph->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCompactH(pComph);
return -1;
}
return 0;
}
static void tsdbDestroyCompactH(SCompactH *pComph) {
pComph->pDataCols = tdFreeDataCols(pComph->pDataCols);
pComph->aSupBlk = taosArrayDestroy(pComph->aSupBlk);
pComph->aBlkIdx = taosArrayDestroy(pComph->aBlkIdx);
tsdbDestroyCompTbArray(pComph);
tsdbDestroyReadH(&(pComph->readh));
tsdbCloseDFileSet(TSDB_COMPACT_WSET(pComph));
}
static int tsdbInitCompTbArray(SCompactH *pComph) { // Init pComp->tbArray
STsdbRepo *pRepo = TSDB_COMPACT_REPO(pComph);
STsdbMeta *pMeta = pRepo->tsdbMeta;
if (tsdbRLockRepoMeta(pRepo) < 0) return -1;
pComph->tbArray = taosArrayInit(pMeta->maxTables, sizeof(STableCompactH));
if (pComph->tbArray == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbUnlockRepoMeta(pRepo);
return -1;
}
// Note here must start from 0
for (int i = 0; i < pMeta->maxTables; i++) {
STableCompactH ch = {0};
if (pMeta->tables[i] != NULL) {
tsdbRefTable(pMeta->tables[i]);
ch.pTable = pMeta->tables[i];
}
if (taosArrayPush(pComph->tbArray, &ch) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbUnlockRepoMeta(pRepo);
return -1;
}
}
if (tsdbUnlockRepoMeta(pRepo) < 0) return -1;
return 0;
}
static void tsdbDestroyCompTbArray(SCompactH *pComph) {
STableCompactH *pTh;
if (pComph->tbArray == NULL) return;
for (size_t i = 0; i < taosArrayGetSize(pComph->tbArray); i++) {
pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, i);
if (pTh->pTable) {
tsdbUnRefTable(pTh->pTable);
}
pTh->pInfo = taosTZfree(pTh->pInfo);
}
pComph->tbArray = taosArrayDestroy(pComph->tbArray);
}
static int tsdbCacheFSetIndex(SCompactH *pComph) {
SReadH *pReadH = &(pComph->readh);
if (tsdbLoadBlockIdx(pReadH) < 0) {
return -1;
}
for (int tid = 1; tid < taosArrayGetSize(pComph->tbArray); tid++) {
STableCompactH *pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, tid);
pTh->pBlkIdx = NULL;
if (pTh->pTable == NULL) continue;
if (tsdbSetReadTable(pReadH, pTh->pTable) < 0) {
return -1;
}
if (pReadH->pBlkIdx == NULL) continue;
pTh->bindex = *(pReadH->pBlkIdx);
pTh->pBlkIdx = &(pTh->bindex);
if (tsdbMakeRoom((void **)(&(pTh->pInfo)), pTh->pBlkIdx->len) < 0) {
return -1;
}
if (tsdbLoadBlockInfo(pReadH, (void *)(pTh->pInfo)) < 0) {
return -1;
}
}
return 0;
}
static int tsdbCompactFSetInit(SCompactH *pComph, SDFileSet *pSet) {
taosArrayClear(pComph->aBlkIdx);
taosArrayClear(pComph->aSupBlk);
if (tsdbSetAndOpenReadFSet(&(pComph->readh), pSet) < 0) {
return -1;
}
if (tsdbCacheFSetIndex(pComph) < 0) {
tsdbCloseAndUnsetFSet(&(pComph->readh));
return -1;
}
return 0;
}
static void tsdbCompactFSetEnd(SCompactH *pComph) { tsdbCloseAndUnsetFSet(&(pComph->readh)); }
static int tsdbCompactFSetImpl(SCompactH *pComph) {
STsdbRepo *pRepo = TSDB_COMPACT_REPO(pComph);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SReadH * pReadh = &(pComph->readh);
SBlockIdx blkIdx;
void ** ppBuf = &(TSDB_COMPACT_BUF(pComph));
void ** ppCBuf = &(TSDB_COMPACT_COMP_BUF(pComph));
int defaultRows = TSDB_DEFAULT_BLOCK_ROWS(pCfg->maxRowsPerFileBlock);
taosArrayClear(pComph->aBlkIdx);
for (int tid = 1; tid < taosArrayGetSize(pComph->tbArray); tid++) {
STableCompactH *pTh = (STableCompactH *)taosArrayGet(pComph->tbArray, tid);
STSchema * pSchema;
if (pTh->pTable == NULL || pTh->pBlkIdx == NULL) continue;
pSchema = tsdbGetTableSchemaImpl(pTh->pTable, true, true, -1);
taosArrayClear(pComph->aSupBlk);
if ((tdInitDataCols(pComph->pDataCols, pSchema) < 0) || (tdInitDataCols(pReadh->pDCols[0], pSchema) < 0) ||
(tdInitDataCols(pReadh->pDCols[1], pSchema) < 0)) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tdFreeSchema(pSchema);
// Loop to compact each block data
for (int i = 0; i < pTh->pBlkIdx->numOfBlocks; i++) {
SBlock *pBlock = pTh->pInfo->blocks + i;
// Load the block data
if (tsdbLoadBlockData(pReadh, pBlock, pTh->pInfo) < 0) {
return -1;
}
// Merge pComph->pDataCols and pReadh->pDCols[0] and write data to file
if (pComph->pDataCols->numOfRows == 0 && pBlock->numOfRows >= defaultRows) {
if (tsdbWriteBlockToRightFile(pComph, pTh->pTable, pReadh->pDCols[0], ppBuf, ppCBuf) < 0) {
return -1;
}
} else {
int ridx = 0;
while (true) {
if (pReadh->pDCols[0]->numOfRows - ridx == 0) break;
int rowsToMerge = TMIN(pReadh->pDCols[0]->numOfRows - ridx, defaultRows - pComph->pDataCols->numOfRows);
tdMergeDataCols(pComph->pDataCols, pReadh->pDCols[0], rowsToMerge, &ridx, pCfg->update != TD_ROW_PARTIAL_UPDATE);
if (pComph->pDataCols->numOfRows < defaultRows) {
break;
}
if (tsdbWriteBlockToRightFile(pComph, pTh->pTable, pComph->pDataCols, ppBuf, ppCBuf) < 0) {
return -1;
}
tdResetDataCols(pComph->pDataCols);
}
}
}
if (pComph->pDataCols->numOfRows > 0 &&
tsdbWriteBlockToRightFile(pComph, pTh->pTable, pComph->pDataCols, ppBuf, ppCBuf) < 0) {
return -1;
}
if (tsdbWriteBlockInfoImpl(TSDB_COMPACT_HEAD_FILE(pComph), pTh->pTable, pComph->aSupBlk, NULL, ppBuf, &blkIdx) <
0) {
return -1;
}
if ((blkIdx.numOfBlocks > 0) && (taosArrayPush(pComph->aBlkIdx, (void *)(&blkIdx)) == NULL)) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
}
if (tsdbWriteBlockIdx(TSDB_COMPACT_HEAD_FILE(pComph), pComph->aBlkIdx, ppBuf) < 0) {
return -1;
}
return 0;
}
static int tsdbWriteBlockToRightFile(SCompactH *pComph, STable *pTable, SDataCols *pDataCols, void **ppBuf,
void **ppCBuf) {
STsdbRepo *pRepo = TSDB_COMPACT_REPO(pComph);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SDFile * pDFile;
bool isLast;
SBlock block;
ASSERT(pDataCols->numOfRows > 0);
if (pDataCols->numOfRows < pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMPACT_LAST_FILE(pComph);
isLast = true;
} else {
pDFile = TSDB_COMPACT_DATA_FILE(pComph);
isLast = false;
}
if (tsdbWriteBlockImpl(pRepo, pTable, pDFile, pDataCols, &block, isLast, true, ppBuf, ppCBuf) < 0) {
return -1;
}
if (taosArrayPush(pComph->aSupBlk, (void *)(&block)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
#endif

View File

@ -246,62 +246,6 @@ void *tsdbFreeFS(STsdbFS *pfs) {
return NULL;
}
// static int tsdbProcessExpiredFS(STsdb *pRepo) {
// tsdbStartFSTxn(pRepo, 0, 0);
// // if (tsdbCreateMeta(pRepo) < 0) {
// // tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno));
// // return -1;
// // }
// if (tsdbApplyRtn(pRepo) < 0) {
// tsdbEndFSTxnWithError(REPO_FS(pRepo));
// tsdbError("vgId:%d failed to apply rtn since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// if (tsdbEndFSTxn(pRepo) < 0) {
// tsdbError("vgId:%d failed to end fs txn since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// return 0;
// }
// static int tsdbCreateMeta(STsdb *pRepo) {
// STsdbFS *pfs = REPO_FS(pRepo);
// SMFile * pOMFile = pfs->cstatus->pmf;
// SMFile mf;
// SDiskID did;
// if (pOMFile != NULL) {
// // keep the old meta file
// tsdbUpdateMFile(pfs, pOMFile);
// return 0;
// }
// // Create a new meta file
// did.level = TFS_PRIMARY_LEVEL;
// did.id = TFS_PRIMARY_ID;
// tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
// if (tsdbCreateMFile(&mf, true) < 0) {
// tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// tsdbInfo("vgId:%d meta file %s is created", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// if (tsdbUpdateMFileHeader(&mf) < 0) {
// tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
// tsdbApplyMFileChange(&mf, pOMFile);
// return -1;
// }
// TSDB_FILE_FSYNC(&mf);
// tsdbCloseMFile(&mf);
// tsdbUpdateMFile(pfs, &mf);
// return 0;
// }
int tsdbOpenFS(STsdb *pRepo) {
STsdbFS *pfs = REPO_FS(pRepo);
char current[TSDB_FILENAME_LEN] = "\0";
@ -769,142 +713,6 @@ static int tsdbScanAndTryFixFS(STsdb *pRepo) {
return 0;
}
// int tsdbLoadMetaCache(STsdb *pRepo, bool recoverMeta) {
// char tbuf[128];
// STsdbFS * pfs = REPO_FS(pRepo);
// SMFile mf;
// SMFile * pMFile = &mf;
// void * pBuf = NULL;
// SKVRecord rInfo;
// int64_t maxBufSize = 0;
// SMFInfo minfo;
// taosHashClear(pfs->metaCache);
// // No meta file, just return
// if (pfs->cstatus->pmf == NULL) return 0;
// mf = pfs->cstatus->mf;
// // Load cache first
// if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
// return -1;
// }
// if (tsdbLoadMFileHeader(pMFile, &minfo) < 0) {
// tsdbCloseMFile(pMFile);
// return -1;
// }
// while (true) {
// int64_t tsize = tsdbReadMFile(pMFile, tbuf, sizeof(SKVRecord));
// if (tsize == 0) break;
// if (tsize < 0) {
// tsdbError("vgId:%d failed to read META file since %s", REPO_ID(pRepo), tstrerror(terrno));
// return -1;
// }
// if (tsize < sizeof(SKVRecord)) {
// tsdbError("vgId:%d failed to read %" PRIzu " bytes from file %s", REPO_ID(pRepo), sizeof(SKVRecord),
// TSDB_FILE_FULL_NAME(pMFile));
// terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
// tsdbCloseMFile(pMFile);
// return -1;
// }
// void *ptr = tsdbDecodeKVRecord(tbuf, &rInfo);
// ASSERT(POINTER_DISTANCE(ptr, tbuf) == sizeof(SKVRecord));
// // ASSERT((rInfo.offset > 0) ? (pStore->info.size == rInfo.offset) : true);
// if (rInfo.offset < 0) {
// taosHashRemove(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid));
// #if 0
// pStore->info.size += sizeof(SKVRecord);
// pStore->info.nRecords--;
// pStore->info.nDels++;
// pStore->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
// #endif
// } else {
// ASSERT(rInfo.offset > 0 && rInfo.size > 0);
// if (taosHashPut(pfs->metaCache, (void *)(&rInfo.uid), sizeof(rInfo.uid), &rInfo, sizeof(rInfo)) < 0) {
// tsdbError("vgId:%d failed to load meta cache from file %s since OOM", REPO_ID(pRepo),
// TSDB_FILE_FULL_NAME(pMFile));
// terrno = TSDB_CODE_COM_OUT_OF_MEMORY;
// tsdbCloseMFile(pMFile);
// return -1;
// }
// maxBufSize = TMAX(maxBufSize, rInfo.size);
// if (tsdbSeekMFile(pMFile, rInfo.size, SEEK_CUR) < 0) {
// tsdbError("vgId:%d failed to lseek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// tsdbCloseMFile(pMFile);
// return -1;
// }
// #if 0
// pStore->info.size += (sizeof(SKVRecord) + rInfo.size);
// pStore->info.nRecords++;
// #endif
// }
// }
// if (recoverMeta) {
// pBuf = taosMemoryMalloc((size_t)maxBufSize);
// if (pBuf == NULL) {
// terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
// tsdbCloseMFile(pMFile);
// return -1;
// }
// SKVRecord *pRecord = taosHashIterate(pfs->metaCache, NULL);
// while (pRecord) {
// if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
// tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// taosMemoryFreeClear(pBuf);
// tsdbCloseMFile(pMFile);
// return -1;
// }
// int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
// if (nread < 0) {
// tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
// tstrerror(terrno));
// taosMemoryFreeClear(pBuf);
// tsdbCloseMFile(pMFile);
// return -1;
// }
// if (nread < pRecord->size) {
// tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
// REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
// terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
// taosMemoryFreeClear(pBuf);
// tsdbCloseMFile(pMFile);
// return -1;
// }
// if (tsdbRestoreTable(pRepo, pBuf, (int)pRecord->size) < 0) {
// tsdbError("vgId:%d failed to restore table, uid %" PRId64 ", since %s" PRIu64, REPO_ID(pRepo), pRecord->uid,
// tstrerror(terrno));
// taosMemoryFreeClear(pBuf);
// tsdbCloseMFile(pMFile);
// return -1;
// }
// pRecord = taosHashIterate(pfs->metaCache, pRecord);
// }
// tsdbOrgMeta(pRepo);
// }
// tsdbCloseMFile(pMFile);
// taosMemoryFreeClear(pBuf);
// return 0;
// }
static int tsdbScanRootDir(STsdb *pRepo) {
char rootDir[TSDB_FILENAME_LEN];
char bname[TSDB_FILENAME_LEN];
@ -983,127 +791,6 @@ static bool tsdbIsTFileInFS(STsdbFS *pfs, const STfsFile *pf) {
return false;
}
// static int tsdbRestoreMeta(STsdb *pRepo) {
// char rootDir[TSDB_FILENAME_LEN];
// char bname[TSDB_FILENAME_LEN];
// STfsDir * tdir = NULL;
// const STfsFile *pf = NULL;
// const char * pattern = "^meta(-ver[0-9]+)?$";
// regex_t regex;
// STsdbFS * pfs = REPO_FS(pRepo);
// regcomp(&regex, pattern, REG_EXTENDED);
// tsdbInfo("vgId:%d try to restore meta", REPO_ID(pRepo));
// tsdbGetRootDir(REPO_ID(pRepo), rootDir);
// tdir = tfsOpendir(rootDir);
// if (tdir == NULL) {
// tsdbError("vgId:%d failed to open dir %s since %s", REPO_ID(pRepo), rootDir, tstrerror(terrno));
// regfree(&regex);
// return -1;
// }
// while ((pf = tfsReaddir(tdir))) {
// tfsBasename(pf, bname);
// if (strcmp(bname, "data") == 0) {
// // Skip the data/ directory
// continue;
// }
// if (strcmp(bname, tsdbTxnFname[TSDB_TXN_TEMP_FILE]) == 0) {
// // Skip current.t file
// tsdbInfo("vgId:%d file %s exists, remove it", REPO_ID(pRepo), pf->aname);
// (void)tfsremove(pf);
// continue;
// }
// int code = regexec(&regex, bname, 0, NULL, 0);
// if (code == 0) {
// // Match
// if (pfs->cstatus->pmf != NULL) {
// tsdbError("vgId:%d failed to restore meta since two file exists, file1 %s and file2 %s", REPO_ID(pRepo),
// TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), pf->aname);
// terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
// tfsClosedir(tdir);
// regfree(&regex);
// return -1;
// } else {
// uint32_t _version = 0;
// if (strcmp(bname, "meta") != 0) {
// sscanf(bname, "meta-ver%" PRIu32, &_version);
// pfs->cstatus->meta.version = _version;
// }
// pfs->cstatus->pmf = &(pfs->cstatus->mf);
// pfs->cstatus->pmf->f = *pf;
// TSDB_FILE_SET_CLOSED(pfs->cstatus->pmf);
// if (tsdbOpenMFile(pfs->cstatus->pmf, O_RDONLY) < 0) {
// tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno));
// tfsClosedir(tdir);
// regfree(&regex);
// return -1;
// }
// if (tsdbLoadMFileHeader(pfs->cstatus->pmf, &(pfs->cstatus->pmf->info)) < 0) {
// tsdbError("vgId:%d failed to restore meta since %s", REPO_ID(pRepo), tstrerror(terrno));
// tsdbCloseMFile(pfs->cstatus->pmf);
// tfsClosedir(tdir);
// regfree(&regex);
// return -1;
// }
// if (tsdbForceKeepFile) {
// struct stat tfstat;
// // Get real file size
// if (fstat(pfs->cstatus->pmf->fd, &tfstat) < 0) {
// terrno = TAOS_SYSTEM_ERROR(errno);
// tsdbCloseMFile(pfs->cstatus->pmf);
// tfsClosedir(tdir);
// regfree(&regex);
// return -1;
// }
// if (pfs->cstatus->pmf->info.size != tfstat.st_size) {
// int64_t tfsize = pfs->cstatus->pmf->info.size;
// pfs->cstatus->pmf->info.size = tfstat.st_size;
// tsdbInfo("vgId:%d file %s header size is changed from %" PRId64 " to %" PRId64, REPO_ID(pRepo),
// TSDB_FILE_FULL_NAME(pfs->cstatus->pmf), tfsize, pfs->cstatus->pmf->info.size);
// }
// }
// tsdbCloseMFile(pfs->cstatus->pmf);
// }
// } else if (code == REG_NOMATCH) {
// // Not match
// tsdbInfo("vgId:%d invalid file %s exists, remove it", REPO_ID(pRepo), pf->aname);
// tfsremove(pf);
// continue;
// } else {
// // Has other error
// tsdbError("vgId:%d failed to restore meta file while run regexec since %s", REPO_ID(pRepo), strerror(code));
// terrno = TAOS_SYSTEM_ERROR(code);
// tfsClosedir(tdir);
// regfree(&regex);
// return -1;
// }
// }
// if (pfs->cstatus->pmf) {
// tsdbInfo("vgId:%d meta file %s is restored", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pfs->cstatus->pmf));
// } else {
// tsdbInfo("vgId:%d no meta file is restored", REPO_ID(pRepo));
// }
// tfsClosedir(tdir);
// regfree(&regex);
// return 0;
// }
static int tsdbRestoreDFileSet(STsdb *pRepo) {
char dataDir[TSDB_FILENAME_LEN];
char bname[TSDB_FILENAME_LEN];

View File

@ -33,271 +33,6 @@ static int tsdbEncodeDFInfo(void **buf, SDFInfo *pInfo);
static void *tsdbDecodeDFInfo(void *buf, SDFInfo *pInfo);
static int tsdbRollBackDFile(SDFile *pDFile);
#if 0
// ============== SMFile
void tsdbInitMFile(SMFile *pMFile, SDiskID did, int vid, uint32_t ver) {
char fname[TSDB_FILENAME_LEN];
TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_OK);
memset(&(pMFile->info), 0, sizeof(pMFile->info));
pMFile->info.magic = TSDB_FILE_INIT_MAGIC;
tsdbGetFilename(vid, 0, ver, TSDB_FILE_META, fname);
tfsInitFile(TSDB_FILE_F(pMFile), did.level, did.id, fname);
}
void tsdbInitMFileEx(SMFile *pMFile, const SMFile *pOMFile) {
*pMFile = *pOMFile;
TSDB_FILE_SET_CLOSED(pMFile);
}
int tsdbEncodeSMFile(void **buf, SMFile *pMFile) {
int tlen = 0;
tlen += tsdbEncodeMFInfo(buf, &(pMFile->info));
tlen += tfsEncodeFile(buf, &(pMFile->f));
return tlen;
}
void *tsdbDecodeSMFile(void *buf, SMFile *pMFile) {
buf = tsdbDecodeMFInfo(buf, &(pMFile->info));
buf = tfsDecodeFile(buf, &(pMFile->f));
TSDB_FILE_SET_CLOSED(pMFile);
return buf;
}
int tsdbEncodeSMFileEx(void **buf, SMFile *pMFile) {
int tlen = 0;
tlen += tsdbEncodeMFInfo(buf, &(pMFile->info));
tlen += taosEncodeString(buf, TSDB_FILE_FULL_NAME(pMFile));
return tlen;
}
void *tsdbDecodeSMFileEx(void *buf, SMFile *pMFile) {
char *aname;
buf = tsdbDecodeMFInfo(buf, &(pMFile->info));
buf = taosDecodeString(buf, &aname);
strncpy(TSDB_FILE_FULL_NAME(pMFile), aname, TSDB_FILENAME_LEN);
TSDB_FILE_SET_CLOSED(pMFile);
taosMemoryFreeClear(aname);
return buf;
}
int tsdbApplyMFileChange(SMFile *from, SMFile *to) {
if (from == NULL && to == NULL) return 0;
if (from != NULL) {
if (to == NULL) {
return tsdbRemoveMFile(from);
} else {
if (tfsIsSameFile(TSDB_FILE_F(from), TSDB_FILE_F(to))) {
if (from->info.size > to->info.size) {
tsdbRollBackMFile(to);
}
} else {
return tsdbRemoveMFile(from);
}
}
}
return 0;
}
int tsdbCreateMFile(SMFile *pMFile, bool updateHeader) {
ASSERT(pMFile->info.size == 0 && pMFile->info.magic == TSDB_FILE_INIT_MAGIC);
pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (pMFile->fd < 0) {
if (errno == ENOENT) {
// Try to create directory recursively
char *s = strdup(TFILE_REL_NAME(&(pMFile->f)));
if (tfsMkdirRecurAt(dirname(s), TSDB_FILE_LEVEL(pMFile), TSDB_FILE_ID(pMFile)) < 0) {
taosMemoryFreeClear(s);
return -1;
}
taosMemoryFreeClear(s);
pMFile->fd = open(TSDB_FILE_FULL_NAME(pMFile), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0755);
if (pMFile->fd < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
} else {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
}
if (!updateHeader) {
return 0;
}
pMFile->info.size += TSDB_FILE_HEAD_SIZE;
if (tsdbUpdateMFileHeader(pMFile) < 0) {
tsdbCloseMFile(pMFile);
tsdbRemoveMFile(pMFile);
return -1;
}
return 0;
}
int tsdbUpdateMFileHeader(SMFile *pMFile) {
char buf[TSDB_FILE_HEAD_SIZE] = "\0";
if (tsdbSeekMFile(pMFile, 0, SEEK_SET) < 0) {
return -1;
}
void *ptr = buf;
tsdbEncodeMFInfo(&ptr, TSDB_FILE_INFO(pMFile));
taosCalcChecksumAppend(0, (uint8_t *)buf, TSDB_FILE_HEAD_SIZE);
if (tsdbWriteMFile(pMFile, buf, TSDB_FILE_HEAD_SIZE) < 0) {
return -1;
}
return 0;
}
int tsdbLoadMFileHeader(SMFile *pMFile, SMFInfo *pInfo) {
char buf[TSDB_FILE_HEAD_SIZE] = "\0";
ASSERT(TSDB_FILE_OPENED(pMFile));
if (tsdbSeekMFile(pMFile, 0, SEEK_SET) < 0) {
return -1;
}
if (tsdbReadMFile(pMFile, buf, TSDB_FILE_HEAD_SIZE) < 0) {
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)buf, TSDB_FILE_HEAD_SIZE)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
tsdbDecodeMFInfo(buf, pInfo);
return 0;
}
int tsdbScanAndTryFixMFile(STsdb *pRepo) {
SMFile * pMFile = pRepo->fs->cstatus->pmf;
struct stat mfstat;
SMFile mf;
if (pMFile == NULL) {
// No meta file, no need to scan
return 0;
}
tsdbInitMFileEx(&mf, pMFile);
if (access(TSDB_FILE_FULL_NAME(pMFile), F_OK) != 0) {
tsdbError("vgId:%d meta file %s not exit, report to upper layer to fix it", REPO_ID(pRepo),
TSDB_FILE_FULL_NAME(pMFile));
pRepo->state |= TSDB_STATE_BAD_META;
TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_BAD);
return 0;
}
if (stat(TSDB_FILE_FULL_NAME(&mf), &mfstat) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (pMFile->info.size < mfstat.st_size) {
if (tsdbOpenMFile(&mf, O_WRONLY) < 0) {
return -1;
}
if (taosFtruncate(mf.fd, mf.info.size) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseMFile(&mf);
return -1;
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbCloseMFile(&mf);
return -1;
}
tsdbCloseMFile(&mf);
tsdbInfo("vgId:%d file %s is truncated from %" PRId64 " to %" PRId64, REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
mfstat.st_size, pMFile->info.size);
} else if (pMFile->info.size > mfstat.st_size) {
tsdbError("vgId:%d meta file %s has wrong size %" PRId64 " expected %" PRId64 ", report to upper layer to fix it",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), mfstat.st_size, pMFile->info.size);
pRepo->state |= TSDB_STATE_BAD_META;
TSDB_FILE_SET_STATE(pMFile, TSDB_FILE_STATE_BAD);
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return 0;
} else {
tsdbDebug("vgId:%d meta file %s passes the scan", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile));
}
return 0;
}
int tsdbEncodeMFInfo(void **buf, SMFInfo *pInfo) {
int tlen = 0;
tlen += taosEncodeVariantI64(buf, pInfo->size);
tlen += taosEncodeVariantI64(buf, pInfo->tombSize);
tlen += taosEncodeVariantI64(buf, pInfo->nRecords);
tlen += taosEncodeVariantI64(buf, pInfo->nDels);
tlen += taosEncodeFixedU32(buf, pInfo->magic);
return tlen;
}
void *tsdbDecodeMFInfo(void *buf, SMFInfo *pInfo) {
buf = taosDecodeVariantI64(buf, &(pInfo->size));
buf = taosDecodeVariantI64(buf, &(pInfo->tombSize));
buf = taosDecodeVariantI64(buf, &(pInfo->nRecords));
buf = taosDecodeVariantI64(buf, &(pInfo->nDels));
buf = taosDecodeFixedU32(buf, &(pInfo->magic));
return buf;
}
static int tsdbRollBackMFile(SMFile *pMFile) {
SMFile mf;
tsdbInitMFileEx(&mf, pMFile);
if (tsdbOpenMFile(&mf, O_WRONLY) < 0) {
return -1;
}
if (taosFtruncate(TSDB_FILE_FD(&mf), pMFile->info.size) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
tsdbCloseMFile(&mf);
return -1;
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbCloseMFile(&mf);
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
return 0;
}
#endif
// ============== Operations on SDFile
void tsdbInitDFile(STsdb *pRepo, SDFile *pDFile, SDiskID did, int fid, uint32_t ver, TSDB_FILE_T ftype) {
char fname[TSDB_FILENAME_LEN];

File diff suppressed because it is too large Load Diff

View File

@ -285,21 +285,6 @@ static STbData *tsdbNewTbData(tb_uid_t uid) {
pTbData->keyMax = TSKEY_MIN;
pTbData->nrows = 0;
// uint8_t skipListCreateFlags;
// if (pCfg->update == TD_ROW_DISCARD_UPDATE)
// skipListCreateFlags = SL_DISCARD_DUP_KEY;
// else
// skipListCreateFlags = SL_UPDATE_DUP_KEY;
// pTableData->pData =
// tSkipListCreate(TSDB_DATA_SKIPLIST_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, TYPE_BYTES[TSDB_DATA_TYPE_TIMESTAMP],
// tkeyComparFn, skipListCreateFlags, tsdbGetTsTupleKey);
// if (pTableData->pData == NULL) {
// terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
// taosMemoryFree(pTableData);
// return NULL;
// }
pTbData->pData = tSkipListCreate(5, TSDB_DATA_TYPE_TIMESTAMP, sizeof(int64_t), tkeyComparFn, SL_DISCARD_DUP_KEY,
tsdbGetTsTupleKey);
if (pTbData->pData == NULL) {
@ -351,608 +336,3 @@ static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema *
return 0;
}
/* ------------------------ REFACTORING ------------------------ */
#if 0
int tsdbInsertDataToMemTable(STsdbMemTable *pMemTable, SSubmitReq *pMsg) {
SMemAllocator *pMA = pMemTable->pMA;
STbData * pTbData = (STbData *)TD_MA_MALLOC(pMA, sizeof(*pTbData));
if (pTbData == NULL) {
// TODO
}
TD_SLIST_PUSH(&(pMemTable->list), pTbData);
return 0;
}
#include "tdataformat.h"
#include "tfunctional.h"
#include "tsdbRowMergeBuf.h"
#include "tsdbint.h"
#include "tskiplist.h"
#define TSDB_DATA_SKIPLIST_LEVEL 5
#define TSDB_MAX_INSERT_BATCH 512
typedef struct {
int32_t totalLen;
int32_t len;
STSRow* row;
} SSubmitBlkIter;
typedef struct {
int32_t totalLen;
int32_t len;
void * pMsg;
} SSubmitMsgIter;
static SMemTable * tsdbNewMemTable(STsdbRepo *pRepo);
static void tsdbFreeMemTable(SMemTable *pMemTable);
static STableData* tsdbNewTableData(STsdbCfg *pCfg, STable *pTable);
static void tsdbFreeTableData(STableData *pTableData);
static int tsdbAdjustMemMaxTables(SMemTable *pMemTable, int maxTables);
static int tsdbAppendTableRowToCols(STable *pTable, SDataCols *pCols, STSchema **ppSchema, STSRow* row);
static int tsdbInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter);
static STSRow* tsdbGetSubmitBlkNext(SSubmitBlkIter *pIter);
static int tsdbInsertDataToTable(STsdbRepo *pRepo, SSubmitBlk *pBlock, int32_t *affectedrows);
static int tsdbInitSubmitMsgIter(SSubmitReq *pMsg, SSubmitMsgIter *pIter);
static int tsdbGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock);
static int tsdbCheckTableSchema(STsdbRepo *pRepo, SSubmitBlk *pBlock, STable *pTable);
static int tsdbUpdateTableLatestInfo(STsdbRepo *pRepo, STable *pTable, STSRow* row);
static FORCE_INLINE int tsdbCheckRowRange(STsdbRepo *pRepo, STable *pTable, STSRow* row, TSKEY minKey, TSKEY maxKey,
TSKEY now);
// ---------------- INTERNAL FUNCTIONS ----------------
int tsdbRefMemTable(STsdbRepo *pRepo, SMemTable *pMemTable) {
if (pMemTable == NULL) return 0;
int ref = T_REF_INC(pMemTable);
tsdbDebug("vgId:%d ref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref);
return 0;
}
// Need to lock the repository
int tsdbUnRefMemTable(STsdbRepo *pRepo, SMemTable *pMemTable) {
if (pMemTable == NULL) return 0;
int ref = T_REF_DEC(pMemTable);
tsdbDebug("vgId:%d unref memtable %p ref %d", REPO_ID(pRepo), pMemTable, ref);
if (ref == 0) {
STsdbBufPool *pBufPool = pRepo->pPool;
SListNode *pNode = NULL;
bool addNew = false;
if (tsdbLockRepo(pRepo) < 0) return -1;
while ((pNode = tdListPopHead(pMemTable->bufBlockList)) != NULL) {
if (pBufPool->nRecycleBlocks > 0) {
tsdbRecycleBufferBlock(pBufPool, pNode, false);
pBufPool->nRecycleBlocks -= 1;
} else {
if(pBufPool->nElasticBlocks > 0 && listNEles(pBufPool->bufBlockList) > 2) {
tsdbRecycleBufferBlock(pBufPool, pNode, true);
} else {
tdListAppendNode(pBufPool->bufBlockList, pNode);
addNew = true;
}
}
}
if (addNew) {
int code = taosThreadCondSignal(&pBufPool->poolNotEmpty);
if (code != 0) {
if (tsdbUnlockRepo(pRepo) < 0) return -1;
tsdbError("vgId:%d failed to signal pool not empty since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
}
if (tsdbUnlockRepo(pRepo) < 0) return -1;
for (int i = 0; i < pMemTable->maxTables; i++) {
if (pMemTable->tData[i] != NULL) {
tsdbFreeTableData(pMemTable->tData[i]);
}
}
tdListDiscard(pMemTable->actList);
tdListDiscard(pMemTable->bufBlockList);
tsdbFreeMemTable(pMemTable);
}
return 0;
}
int tsdbTakeMemSnapshot(STsdbRepo *pRepo, SMemSnapshot *pSnapshot, SArray *pATable) {
memset(pSnapshot, 0, sizeof(*pSnapshot));
if (tsdbLockRepo(pRepo) < 0) return -1;
pSnapshot->omem = pRepo->mem;
pSnapshot->imem = pRepo->imem;
tsdbRefMemTable(pRepo, pRepo->mem);
tsdbRefMemTable(pRepo, pRepo->imem);
if (tsdbUnlockRepo(pRepo) < 0) return -1;
if (pSnapshot->omem) {
taosRLockLatch(&(pSnapshot->omem->latch));
pSnapshot->mem = &(pSnapshot->mtable);
pSnapshot->mem->tData = (STableData **)taosMemoryCalloc(pSnapshot->omem->maxTables, sizeof(STableData *));
if (pSnapshot->mem->tData == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
taosRUnLockLatch(&(pSnapshot->omem->latch));
tsdbUnRefMemTable(pRepo, pSnapshot->omem);
tsdbUnRefMemTable(pRepo, pSnapshot->imem);
pSnapshot->mem = NULL;
pSnapshot->imem = NULL;
pSnapshot->omem = NULL;
return -1;
}
pSnapshot->mem->keyFirst = pSnapshot->omem->keyFirst;
pSnapshot->mem->keyLast = pSnapshot->omem->keyLast;
pSnapshot->mem->numOfRows = pSnapshot->omem->numOfRows;
pSnapshot->mem->maxTables = pSnapshot->omem->maxTables;
for (size_t i = 0; i < taosArrayGetSize(pATable); i++) {
STable * pTable = *(STable **)taosArrayGet(pATable, i);
int32_t tid = TABLE_TID(pTable);
STableData *pTableData = (tid < pSnapshot->omem->maxTables) ? pSnapshot->omem->tData[tid] : NULL;
if ((pTableData == NULL) || (TABLE_UID(pTable) != pTableData->uid)) continue;
pSnapshot->mem->tData[tid] = pTableData;
T_REF_INC(pTableData);
}
taosRUnLockLatch(&(pSnapshot->omem->latch));
}
tsdbDebug("vgId:%d take memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem);
return 0;
}
void tsdbUnTakeMemSnapShot(STsdbRepo *pRepo, SMemSnapshot *pSnapshot) {
tsdbDebug("vgId:%d untake memory snapshot, pMem %p pIMem %p", REPO_ID(pRepo), pSnapshot->omem, pSnapshot->imem);
if (pSnapshot->mem) {
ASSERT(pSnapshot->omem != NULL);
for (size_t i = 0; i < pSnapshot->mem->maxTables; i++) {
STableData *pTableData = pSnapshot->mem->tData[i];
if (pTableData) {
tsdbFreeTableData(pTableData);
}
}
taosMemoryFreeClear(pSnapshot->mem->tData);
tsdbUnRefMemTable(pRepo, pSnapshot->omem);
}
tsdbUnRefMemTable(pRepo, pSnapshot->imem);
pSnapshot->mem = NULL;
pSnapshot->imem = NULL;
pSnapshot->omem = NULL;
}
int tsdbSyncCommitConfig(STsdbRepo* pRepo) {
ASSERT(pRepo->config_changed == true);
tsem_wait(&(pRepo->readyToCommit));
if (pRepo->code != TSDB_CODE_SUCCESS) {
tsdbWarn("vgId:%d try to commit config when TSDB not in good state: %s", REPO_ID(pRepo), tstrerror(terrno));
}
if (tsdbLockRepo(pRepo) < 0) return -1;
tsdbScheduleCommit(pRepo, COMMIT_CONFIG_REQ);
if (tsdbUnlockRepo(pRepo) < 0) return -1;
tsem_wait(&(pRepo->readyToCommit));
tsem_post(&(pRepo->readyToCommit));
if (pRepo->code != TSDB_CODE_SUCCESS) {
terrno = pRepo->code;
return -1;
}
terrno = TSDB_CODE_SUCCESS;
return 0;
}
/**
* This is an important function to load data or try to load data from memory skiplist iterator.
*
* This function load memory data until:
* 1. iterator ends
* 2. data key exceeds maxKey
* 3. rowsIncreased = rowsInserted - rowsDeleteSucceed >= maxRowsToRead
* 4. operations in pCols not exceeds its max capacity if pCols is given
*
* The function tries to procceed AS MUCH AS POSSIBLE.
*/
int tsdbLoadDataFromCache(STable *pTable, SSkipListIterator *pIter, TSKEY maxKey, int maxRowsToRead, SDataCols *pCols,
TKEY *filterKeys, int nFilterKeys, bool keepDup, SMergeInfo *pMergeInfo) {
ASSERT(maxRowsToRead > 0 && nFilterKeys >= 0);
if (pIter == NULL) return 0;
STSchema * pSchema = NULL;
TSKEY rowKey = 0;
TSKEY fKey = 0;
bool isRowDel = false;
int filterIter = 0;
STSRow* row = NULL;
SMergeInfo mInfo;
if (pMergeInfo == NULL) pMergeInfo = &mInfo;
memset(pMergeInfo, 0, sizeof(*pMergeInfo));
pMergeInfo->keyFirst = INT64_MAX;
pMergeInfo->keyLast = INT64_MIN;
if (pCols) tdResetDataCols(pCols);
row = tsdbNextIterRow(pIter);
if (row == NULL || TD_ROW_KEY(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = TD_ROW_KEY(row);
isRowDel = memRowDeleted(row);
}
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
while (true) {
if (fKey == INT64_MAX && rowKey == INT64_MAX) break;
if (fKey < rowKey) {
pMergeInfo->keyFirst = TMIN(pMergeInfo->keyFirst, fKey);
pMergeInfo->keyLast = TMAX(pMergeInfo->keyLast, fKey);
filterIter++;
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
} else if (fKey > rowKey) {
if (isRowDel) {
pMergeInfo->rowsDeleteFailed++;
} else {
if (pMergeInfo->rowsInserted - pMergeInfo->rowsDeleteSucceed >= maxRowsToRead) break;
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsInserted++;
pMergeInfo->nOperations++;
pMergeInfo->keyFirst = TMIN(pMergeInfo->keyFirst, rowKey);
pMergeInfo->keyLast = TMAX(pMergeInfo->keyLast, rowKey);
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
}
tSkipListIterNext(pIter);
row = tsdbNextIterRow(pIter);
if (row == NULL || TD_ROW_KEY(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = TD_ROW_KEY(row);
isRowDel = memRowDeleted(row);
}
} else {
if (isRowDel) {
ASSERT(!keepDup);
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsDeleteSucceed++;
pMergeInfo->nOperations++;
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
} else {
if (keepDup) {
if (pCols && pMergeInfo->nOperations >= pCols->maxPoints) break;
pMergeInfo->rowsUpdated++;
pMergeInfo->nOperations++;
pMergeInfo->keyFirst = TMIN(pMergeInfo->keyFirst, rowKey);
pMergeInfo->keyLast = TMAX(pMergeInfo->keyLast, rowKey);
tsdbAppendTableRowToCols(pTable, pCols, &pSchema, row);
} else {
pMergeInfo->keyFirst = TMIN(pMergeInfo->keyFirst, fKey);
pMergeInfo->keyLast = TMAX(pMergeInfo->keyLast, fKey);
}
}
tSkipListIterNext(pIter);
row = tsdbNextIterRow(pIter);
if (row == NULL || TD_ROW_KEY(row) > maxKey) {
rowKey = INT64_MAX;
isRowDel = false;
} else {
rowKey = TD_ROW_KEY(row);
isRowDel = memRowDeleted(row);
}
filterIter++;
if (filterIter >= nFilterKeys) {
fKey = INT64_MAX;
} else {
fKey = tdGetKey(filterKeys[filterIter]);
}
}
}
return 0;
}
// ---------------- LOCAL FUNCTIONS ----------------
static FORCE_INLINE int tsdbCheckRowRange(STsdbRepo *pRepo, STable *pTable, STSRow* row, TSKEY minKey, TSKEY maxKey,
TSKEY now) {
TSKEY rowKey = TD_ROW_KEY(row);
if (rowKey < minKey || rowKey > maxKey) {
tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " timestamp is out of range! now %" PRId64 " minKey %" PRId64
" maxKey %" PRId64 " row key %" PRId64,
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), now, minKey, maxKey,
rowKey);
terrno = TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE;
return -1;
}
return 0;
}
//row1 has higher priority
static STSRow* tsdbInsertDupKeyMerge(STSRow* row1, STSRow* row2, STsdbRepo* pRepo,
STSchema **ppSchema1, STSchema **ppSchema2,
STable* pTable, int32_t* pPoints, STSRow** pLastRow) {
//for compatiblity, duplicate key inserted when update=0 should be also calculated as affected rows!
if(row1 == NULL && row2 == NULL && pRepo->config.update == TD_ROW_DISCARD_UPDATE) {
(*pPoints)++;
return NULL;
}
tsdbTrace("vgId:%d a row is %s table %s tid %d uid %" PRIu64 " key %" PRIu64, REPO_ID(pRepo),
"updated in", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable),
TD_ROW_KEY(row1));
if(row2 == NULL || pRepo->config.update != TD_ROW_PARTIAL_UPDATE) {
void* pMem = tsdbAllocBytes(pRepo, TD_ROW_LEN(row1));
if(pMem == NULL) return NULL;
memRowCpy(pMem, row1);
(*pPoints)++;
*pLastRow = pMem;
return pMem;
}
STSchema *pSchema1 = *ppSchema1;
STSchema *pSchema2 = *ppSchema2;
SMergeBuf * pBuf = &pRepo->mergeBuf;
int dv1 = memRowVersion(row1);
int dv2 = memRowVersion(row2);
if(pSchema1 == NULL || schemaVersion(pSchema1) != dv1) {
if(pSchema2 != NULL && schemaVersion(pSchema2) == dv1) {
*ppSchema1 = pSchema2;
} else {
*ppSchema1 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row1), (int8_t)memRowType(row1));
}
pSchema1 = *ppSchema1;
}
if(pSchema2 == NULL || schemaVersion(pSchema2) != dv2) {
if(schemaVersion(pSchema1) == dv2) {
pSchema2 = pSchema1;
} else {
*ppSchema2 = tsdbGetTableSchemaImpl(pTable, false, false, memRowVersion(row2), (int8_t)memRowType(row2));
pSchema2 = *ppSchema2;
}
}
STSRow* tmp = tsdbMergeTwoRows(pBuf, row1, row2, pSchema1, pSchema2);
void* pMem = tsdbAllocBytes(pRepo, TD_ROW_LEN(tmp));
if(pMem == NULL) return NULL;
memRowCpy(pMem, tmp);
(*pPoints)++;
*pLastRow = pMem;
return pMem;
}
static void* tsdbInsertDupKeyMergePacked(void** args) {
return tsdbInsertDupKeyMerge(args[0], args[1], args[2], (STSchema**)&args[3], (STSchema**)&args[4], args[5], args[6], args[7]);
}
static void tsdbSetupSkipListHookFns(SSkipList* pSkipList, STsdbRepo *pRepo, STable *pTable, int32_t* pPoints, STSRow** pLastRow) {
if(pSkipList->insertHandleFn == NULL) {
tGenericSavedFunc *dupHandleSavedFunc = genericSavedFuncInit((GenericVaFunc)&tsdbInsertDupKeyMergePacked, 9);
dupHandleSavedFunc->args[2] = pRepo;
dupHandleSavedFunc->args[3] = NULL;
dupHandleSavedFunc->args[4] = NULL;
dupHandleSavedFunc->args[5] = pTable;
pSkipList->insertHandleFn = dupHandleSavedFunc;
}
pSkipList->insertHandleFn->args[6] = pPoints;
pSkipList->insertHandleFn->args[7] = pLastRow;
}
static int tsdbCheckTableSchema(STsdbRepo *pRepo, SSubmitBlk *pBlock, STable *pTable) {
ASSERT(pTable != NULL);
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1, -1);
int sversion = schemaVersion(pSchema);
if (pBlock->sversion == sversion) {
return 0;
} else {
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) { // stream table is not allowed to change schema
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
return -1;
}
}
if (pBlock->sversion > sversion) { // may need to update table schema
if (pBlock->schemaLen > 0) {
tsdbDebug(
"vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, update...",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion);
ASSERT(pBlock->schemaLen % sizeof(STColumn) == 0);
int numOfCols = pBlock->schemaLen / sizeof(STColumn);
STColumn *pTCol = (STColumn *)pBlock->data;
STSchemaBuilder schemaBuilder = {0};
if (tdInitTSchemaBuilder(&schemaBuilder, pBlock->sversion) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
return -1;
}
for (int i = 0; i < numOfCols; i++) {
if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, htons(pTCol[i].colId), htons(pTCol[i].bytes)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to update schema of table %s since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
tdDestroyTSchemaBuilder(&schemaBuilder);
return -1;
}
}
STSchema *pNSchema = tdGetSchemaFromBuilder(&schemaBuilder);
if (pNSchema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tdDestroyTSchemaBuilder(&schemaBuilder);
return -1;
}
tdDestroyTSchemaBuilder(&schemaBuilder);
tsdbUpdateTableSchema(pRepo, pTable, pNSchema, true);
} else {
tsdbDebug(
"vgId:%d table %s tid %d uid %" PRIu64 " schema version %d is out of data, client version %d, reconfigure...",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable), sversion, pBlock->sversion);
terrno = TSDB_CODE_TDB_TABLE_RECONFIGURE;
return -1;
}
} else {
ASSERT(pBlock->sversion >= 0);
if (tsdbGetTableSchemaImpl(pTable, false, false, pBlock->sversion, -1) == NULL) {
tsdbError("vgId:%d invalid submit schema version %d to table %s tid %d from client", REPO_ID(pRepo),
pBlock->sversion, TABLE_CHAR_NAME(pTable), TABLE_TID(pTable));
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
return -1;
}
}
return 0;
}
static void updateTableLatestColumn(STsdbRepo *pRepo, STable *pTable, STSRow* row) {
tsdbDebug("vgId:%d updateTableLatestColumn, %s row version:%d", REPO_ID(pRepo), pTable->name->data,
memRowVersion(row));
STSchema* pSchema = tsdbGetTableLatestSchema(pTable);
if (tsdbUpdateLastColSchema(pTable, pSchema) < 0) {
return;
}
pSchema = tsdbGetTableSchemaByVersion(pTable, memRowVersion(row), (int8_t)memRowType(row));
if (pSchema == NULL) {
return;
}
SDataCol *pLatestCols = pTable->lastCols;
int32_t kvIdx = 0;
for (int16_t j = 0; j < schemaNCols(pSchema); j++) {
STColumn *pTCol = schemaColAt(pSchema, j);
// ignore not exist colId
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pTCol->colId);
if (idx == -1) {
continue;
}
void *value = NULL;
value = tdGetMemRowDataOfColEx(row, pTCol->colId, (int8_t)pTCol->type,
TD_DATA_ROW_HEAD_SIZE + pSchema->columns[j].offset, &kvIdx);
if ((value == NULL) || isNull(value, pTCol->type)) {
continue;
}
// lock
TSDB_WLOCK_TABLE(pTable);
SDataCol *pDataCol = &(pLatestCols[idx]);
if (pDataCol->pData == NULL) {
pDataCol->pData = taosMemoryMalloc(pTCol->bytes);
pDataCol->bytes = pTCol->bytes;
} else if (pDataCol->bytes < pTCol->bytes) {
pDataCol->pData = taosMemoryRealloc(pDataCol->pData, pTCol->bytes);
pDataCol->bytes = pTCol->bytes;
}
// the actual value size
uint16_t bytes = IS_VAR_DATA_TYPE(pTCol->type) ? varDataTLen(value) : pTCol->bytes;
// the actual data size CANNOT larger than column size
assert(pTCol->bytes >= bytes);
memcpy(pDataCol->pData, value, bytes);
//tsdbInfo("updateTableLatestColumn vgId:%d cache column %d for %d,%s", REPO_ID(pRepo), j, pDataCol->bytes, (char*)pDataCol->pData);
pDataCol->ts = TD_ROW_KEY(row);
// unlock
TSDB_WUNLOCK_TABLE(pTable);
}
}
static int tsdbUpdateTableLatestInfo(STsdbRepo *pRepo, STable *pTable, STSRow* row) {
STsdbCfg *pCfg = &pRepo->config;
// if cacheLastRow config has been reset, free the lastRow
if (!pCfg->cacheLastRow && pTable->lastRow != NULL) {
STSRow* cachedLastRow = pTable->lastRow;
TSDB_WLOCK_TABLE(pTable);
pTable->lastRow = NULL;
TSDB_WUNLOCK_TABLE(pTable);
taosTZfree(cachedLastRow);
}
if (tsdbGetTableLastKeyImpl(pTable) <= TD_ROW_KEY(row)) {
if (CACHE_LAST_ROW(pCfg) || pTable->lastRow != NULL) {
STSRow* nrow = pTable->lastRow;
if (taosTSizeof(nrow) < TD_ROW_LEN(row)) {
STSRow* orow = nrow;
nrow = taosTMalloc(TD_ROW_LEN(row));
if (nrow == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memRowCpy(nrow, row);
TSDB_WLOCK_TABLE(pTable);
pTable->lastKey = TD_ROW_KEY(row);
pTable->lastRow = nrow;
TSDB_WUNLOCK_TABLE(pTable);
taosTZfree(orow);
} else {
TSDB_WLOCK_TABLE(pTable);
pTable->lastKey = TD_ROW_KEY(row);
memRowCpy(nrow, row);
TSDB_WUNLOCK_TABLE(pTable);
}
} else {
pTable->lastKey = TD_ROW_KEY(row);
}
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
updateTableLatestColumn(pRepo, pTable, row);
}
}
pTable->cacheLastConfigVersion = pRepo->cacheLastConfigVersion;
return 0;
}
#endif

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdb.h"
int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb) {
STsdb *pTsdb = NULL;
int slen = 0;
*ppTsdb = NULL;
slen = strlen(tfsGetPrimaryPath(pVnode->pTfs)) + strlen(pVnode->path) + strlen(VNODE_TSDB_DIR) + 3;
// create handle
pTsdb = (STsdb *)taosMemoryCalloc(1, sizeof(*pTsdb) + slen);
if (pTsdb == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pTsdb->path = (char *)&pTsdb[1];
sprintf(pTsdb->path, "%s%s%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path, TD_DIRSEP,
VNODE_TSDB_DIR);
pTsdb->pVnode = pVnode;
pTsdb->repoLocked = false;
tdbMutexInit(&pTsdb->mutex, NULL);
pTsdb->config = pVnode->config.tsdbCfg;
pTsdb->fs = tsdbNewFS(&pTsdb->config);
// create dir (TODO: use tfsMkdir)
taosMkDir(pTsdb->path);
// open tsdb
if (tsdbOpenFS(pTsdb) < 0) {
goto _err;
}
tsdbDebug("vgId: %d tsdb is opened", TD_VID(pVnode));
*ppTsdb = pTsdb;
return 0;
_err:
taosMemoryFree(pTsdb);
return -1;
}
int tsdbClose(STsdb *pTsdb) {
if (pTsdb) {
tsdbCloseFS(pTsdb);
tsdbFreeFS(pTsdb->fs);
taosMemoryFree(pTsdb);
}
return 0;
}
int tsdbLockRepo(STsdb *pTsdb) {
int code = taosThreadMutexLock(&pTsdb->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb since %s", REPO_ID(pTsdb), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
pTsdb->repoLocked = true;
return 0;
}
int tsdbUnlockRepo(STsdb *pTsdb) {
ASSERT(IS_REPO_LOCKED(pTsdb));
pTsdb->repoLocked = false;
int code = taosThreadMutexUnlock(&pTsdb->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to unlock tsdb since %s", REPO_ID(pTsdb), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}