From 684dd82358bf3863aec4a5a28b22ebdec5956ef6 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Jul 2022 11:42:42 +0000 Subject: [PATCH] fix read concurrency --- source/dnode/vnode/src/inc/tsdb.h | 47 +- source/dnode/vnode/src/tsdb/tsdbCache.c | 804 +++++------ source/dnode/vnode/src/tsdb/tsdbCommit.c | 42 +- source/dnode/vnode/src/tsdb/tsdbFS.c | 1357 ++++++++++++------- source/dnode/vnode/src/tsdb/tsdbFile.c | 17 +- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 45 - source/dnode/vnode/src/tsdb/tsdbOpen.c | 4 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 37 +- source/dnode/vnode/src/tsdb/tsdbRetention.c | 8 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 51 +- 10 files changed, 1391 insertions(+), 1021 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index d465ba4d9b..cfadc91d89 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -62,7 +62,6 @@ typedef struct SDelFReader SDelFReader; typedef struct SRowIter SRowIter; typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; -typedef struct STsdbFSState STsdbFSState; typedef struct STsdbSnapHdr STsdbSnapHdr; typedef struct STsdbReadSnap STsdbReadSnap; @@ -177,8 +176,6 @@ void tsdbMemTableDestroy(SMemTable *pMemTable); void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData); void tsdbRefMemTable(SMemTable *pMemTable); void tsdbUnrefMemTable(SMemTable *pMemTable); -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem); -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem); // STbDataIter int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter); void *tsdbTbDataIterDestroy(STbDataIter *pIter); @@ -208,17 +205,20 @@ void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, ch // SDelFile void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]); // tsdbFS.c ============================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS); -int32_t tsdbFSClose(STsdbFS *pFS); -int32_t tsdbFSBegin(STsdbFS *pFS); -int32_t tsdbFSCommit(STsdbFS *pFS); +int32_t tsdbFSOpen(STsdb *pTsdb); +int32_t tsdbFSClose(STsdb *pTsdb); +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSDestroy(STsdbFS *pFS); +int32_t tDFileSetCmprFn(const void *p1, const void *p2); +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS); + int32_t tsdbFSRollback(STsdbFS *pFS); -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile); -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet); -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid); -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag); +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet); +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile); // tsdbReaderWriter.c ============================================================================================== // SDataFWriter int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet); @@ -285,6 +285,11 @@ typedef struct { TSKEY minKey; } SRtn; +struct STsdbFS { + SDelFile *pDelFile; + SArray *aDFileSet; // SArray +}; + struct STsdb { char *path; SVnode *pVnode; @@ -292,7 +297,7 @@ struct STsdb { TdThreadRwlock rwLock; SMemTable *mem; SMemTable *imem; - STsdbFS *pFS; + STsdbFS fs; SLRUCache *lruCache; }; @@ -540,22 +545,6 @@ struct SRowMerger { SArray *pArray; // SArray }; -struct STsdbFSState { - SDelFile *pDelFile; - SArray *aDFileSet; // SArray - SDelFile delFile; -}; - -struct STsdbFS { - STsdb *pTsdb; - STsdbFSState *cState; - STsdbFSState *nState; - - // new - SDelFile *pDelFile; - SArray aDFileSetP; // SArray -}; - struct SDelFWriter { STsdb *pTsdb; SDelFile fDel; diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 484020e6e1..e259dde29c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -464,7 +464,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { switch (state->state) { case SFSNEXTROW_FS: - state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; + // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; state->nFileSet = taosArrayGetSize(state->aDFileSet); state->iFileSet = state->nFileSet; @@ -793,9 +793,10 @@ typedef struct { TSDBROW memRow, imemRow, fsRow; TsdbNextRowState input[3]; - SMemTable *pMemTable; - SMemTable *pIMemTable; - STsdb *pTsdb; + // SMemTable *pMemTable; + // SMemTable *pIMemTable; + STsdbReadSnap *pReadSnap; + STsdb *pTsdb; } CacheNextRowIter; static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb) { @@ -803,16 +804,16 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - tsdbTakeMemSnapshot(pTsdb, &pIter->pMemTable, &pIter->pIMemTable); + tsdbTakeReadSnap(pTsdb, &pIter->pReadSnap); STbData *pMem = NULL; - if (pIter->pMemTable) { - tsdbGetTbDataFromMemTable(pIter->pMemTable, suid, uid, &pMem); + if (pIter->pReadSnap->pMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid, &pMem); } STbData *pIMem = NULL; - if (pIter->pIMemTable) { - tsdbGetTbDataFromMemTable(pIter->pIMemTable, suid, uid, &pIMem); + if (pIter->pReadSnap->pIMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid, &pIMem); } pIter->pTsdb = pTsdb; @@ -821,7 +822,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile *pDelFile = pIter->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader *pDelFReader; @@ -846,6 +847,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->fsState.state = SFSNEXTROW_FS; pIter->fsState.pTsdb = pTsdb; + pIter->fsState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; pIter->fsState.pBlockIdxExp = &pIter->idx; pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, &pIter->memState, getNextRowFromMem, NULL}; @@ -885,7 +887,7 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { taosArrayDestroy(pIter->pSkyline); } - tsdbUntakeMemSnapshot(pIter->pTsdb, pIter->pMemTable, pIter->pIMemTable); + tsdbUntakeReadSnap(pIter->pTsdb, pIter->pReadSnap); return code; _err: @@ -1172,480 +1174,480 @@ _err: return code; } -static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { - int32_t code = 0; - SArray *pSkyline = NULL; +// static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { +// int32_t code = 0; +// SArray *pSkyline = NULL; - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } - *ppRow = NULL; +// *ppRow = NULL; - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - SDelIdx delIdx; +// SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - SBlockIdx idx = {.suid = suid, .uid = uid}; +// SBlockIdx idx = {.suid = suid, .uid = uid}; - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - if (input[i].pRow == NULL) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// if (input[i].pRow == NULL) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } - } +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } +// } - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = i; - merge[nMerge++] = max[i]; - } +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = i; +// merge[nMerge++] = max[i]; +// } - input[iMax[i]].next = deleted; - } +// input[iMax[i]].next = deleted; +// } - // merge if nMerge > 1 - if (nMerge > 0) { - *dup = false; +// // merge if nMerge > 1 +// if (nMerge > 0) { +// *dup = false; - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } - } while (1); +// } while (1); - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); - return code; -_err: - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// return code; +// _err: +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } // static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, STSRow **ppRow) { -static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { - int32_t code = 0; - SArray *pSkyline = NULL; - STSRow *pRow = NULL; - STSRow **ppRow = &pRow; +// static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { +// int32_t code = 0; +// SArray *pSkyline = NULL; +// STSRow *pRow = NULL; +// STSRow **ppRow = &pRow; - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } - *ppLastArray = NULL; +// *ppLastArray = NULL; - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - SDelIdx delIdx; +// SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - SBlockIdx idx = {.suid = suid, .uid = uid}; +// SBlockIdx idx = {.suid = suid, .uid = uid}; - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = iMax[i]; - merge[nMerge++] = max[i]; - } +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = iMax[i]; +// merge[nMerge++] = max[i]; +// } - input[iMax[i]].next = deleted; - } +// input[iMax[i]].next = deleted; +// } - // merge if nMerge > 1 - if (nMerge > 0) { - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; +// // merge if nMerge > 1 +// if (nMerge > 0) { +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } else { - /* *ppRow = NULL; */ - /* return code; */ - continue; - } +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } else { +// /* *ppRow = NULL; */ +// /* return code; */ +// continue; +// } - if (iCol == 0) { - STColumn *pTColumn = &pTSchema->columns[0]; - SColVal *pColVal = &(SColVal){0}; +// if (iCol == 0) { +// STColumn *pTColumn = &pTSchema->columns[0]; +// SColVal *pColVal = &(SColVal){0}; - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); +// *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } - ++iCol; +// ++iCol; - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); - tTSRowGetVal(*ppRow, pTSchema, i, pColVal); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); +// tTSRowGetVal(*ppRow, pTSchema, i, pColVal); +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } - if (pColVal->isNull || pColVal->isNone) { - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - if (!setICol) { - iCol = i; - setICol = true; - } - } else { - --nilColCount; - } - } +// if (pColVal->isNull || pColVal->isNone) { +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// if (!setICol) { +// iCol = i; +// setICol = true; +// } +// } else { +// --nilColCount; +// } +// } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } - continue; - } +// continue; +// } - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - SColVal colVal = {0}; - tTSRowGetVal(*ppRow, pTSchema, i, &colVal); - TSKEY rowTs = (*ppRow)->ts; +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// SColVal colVal = {0}; +// tTSRowGetVal(*ppRow, pTSchema, i, &colVal); +// TSKEY rowTs = (*ppRow)->ts; - // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); - SColVal *tColVal = &tTsVal->colVal; +// // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); +// SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); +// SColVal *tColVal = &tTsVal->colVal; - if (!colVal.isNone && !colVal.isNull) { - if (tColVal->isNull || tColVal->isNone) { - // taosArraySet(pColArray, i, &colVal); - taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); - --nilColCount; - } - } else { - if ((tColVal->isNull || tColVal->isNone) && !setICol) { - iCol = i; - setICol = true; +// if (!colVal.isNone && !colVal.isNull) { +// if (tColVal->isNull || tColVal->isNone) { +// // taosArraySet(pColArray, i, &colVal); +// taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); +// --nilColCount; +// } +// } else { +// if ((tColVal->isNull || tColVal->isNone) && !setICol) { +// iCol = i; +// setICol = true; - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - } - } - } +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// } +// } +// } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - } while (nilColCount > 0); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// } while (nilColCount > 0); - // if () new ts row from pColArray if non empty - /* if (taosArrayGetSize(pColArray) == nCol) { */ - /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ - /* if (code) goto _err; */ - /* } */ - /* taosArrayDestroy(pColArray); */ - if (taosArrayGetSize(pColArray) <= 0) { - *ppLastArray = NULL; - taosArrayDestroy(pColArray); - } else { - *ppLastArray = pColArray; - } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } +// // if () new ts row from pColArray if non empty +// /* if (taosArrayGetSize(pColArray) == nCol) { */ +// /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ +// /* if (code) goto _err; */ +// /* } */ +// /* taosArrayDestroy(pColArray); */ +// if (taosArrayGetSize(pColArray) <= 0) { +// *ppLastArray = NULL; +// taosArrayDestroy(pColArray); +// } else { +// *ppLastArray = pColArray; +// } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); - return code; -_err: - taosArrayDestroy(pColArray); - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// return code; +// _err: +// taosArrayDestroy(pColArray); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, STsdb *pTsdb, LRUHandle **handle) { int32_t code = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 3c496918e8..c4dc341a63 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -29,6 +29,7 @@ typedef struct { int32_t minRow; int32_t maxRow; int8_t cmprAlg; + STsdbFS fs; // -------------- TSKEY nextKey; // reset by each table commit int32_t commitFid; @@ -119,9 +120,6 @@ int32_t tsdbCommit(STsdb *pTsdb) { code = tsdbCommitDel(&commith); if (code) goto _err; - code = tsdbCommitCache(&commith); - if (code) goto _err; - // end commit code = tsdbEndCommit(&commith, 0); if (code) goto _err; @@ -158,7 +156,7 @@ static int32_t tsdbCommitDelStart(SCommitter *pCommitter) { goto _err; } - SDelFile *pDelFileR = pTsdb->pFS->nState->pDelFile; + SDelFile *pDelFileR = pCommitter->fs.pDelFile; if (pDelFileR) { code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb, NULL); if (code) goto _err; @@ -247,7 +245,7 @@ static int32_t tsdbCommitDelEnd(SCommitter *pCommitter) { code = tsdbUpdateDelFileHdr(pCommitter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pCommitter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pCommitter->fs, &pCommitter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pCommitter->pDelFWriter, 1); @@ -281,7 +279,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { taosArrayClear(pCommitter->aBlockIdx); tMapDataReset(&pCommitter->oBlockMap); tBlockDataReset(&pCommitter->oBlockData); - pRSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, pCommitter->commitFid, TD_EQ); + pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, + tDFileSetCmprFn, TD_EQ); if (pRSet) { code = tsdbDataFReaderOpen(&pCommitter->pReader, pTsdb, pRSet); if (code) goto _err; @@ -860,7 +859,7 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { if (code) goto _err; // upsert SDFileSet - code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, &pCommitter->pWriter->wSet); + code = tsdbFSUpsertFSet(&pCommitter->fs, &pCommitter->pWriter->wSet); if (code) goto _err; // close and sync @@ -978,7 +977,7 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - code = tsdbFSBegin(pTsdb->pFS); + code = tsdbFSCopy(pTsdb, &pCommitter->fs); if (code) goto _err; return code; @@ -1147,28 +1146,33 @@ _err: return code; } -static int32_t tsdbCommitCache(SCommitter *pCommitter) { - int32_t code = 0; - // TODO - return code; -} - static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SMemTable *pMemTable = pTsdb->imem; - if (eno == 0) { - code = tsdbFSCommit(pTsdb->pFS); - } else { - code = tsdbFSRollback(pTsdb->pFS); + ASSERT(eno); + + code = tsdbFSCommit1(pTsdb, &pCommitter->fs); + if (code) goto _err; + + // lock + taosThreadRwlockWrlock(&pTsdb->rwLock); + + // commit or rollback + code = tsdbFSCommit2(pTsdb, &pCommitter->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; } - taosThreadRwlockWrlock(&pTsdb->rwLock); pTsdb->imem = NULL; + + // unlock taosThreadRwlockUnlock(&pTsdb->rwLock); tsdbUnrefMemTable(pMemTable); + tsdbFSDestroy(&pCommitter->fs); tsdbInfo("vgId:%d tsdb end commit", TD_VID(pTsdb->pVnode)); return code; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index f5e6e9744e..5c95e6cfec 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -16,67 +16,41 @@ #include "tsdb.h" // ================================================================================================= -static int32_t tPutFSState(uint8_t *p, STsdbFSState *pState) { +static int32_t tsdbEncodeFS(uint8_t *p, STsdbFS *pFS) { int32_t n = 0; - int8_t hasDel = pState->pDelFile ? 1 : 0; - uint32_t nDFileSet = taosArrayGetSize(pState->aDFileSet); + int8_t hasDel = pFS->pDelFile ? 1 : 0; + uint32_t nSet = taosArrayGetSize(pFS->aDFileSet); // SDelFile n += tPutI8(p ? p + n : p, hasDel); if (hasDel) { - n += tPutDelFile(p ? p + n : p, pState->pDelFile); + n += tPutDelFile(p ? p + n : p, pFS->pDelFile); } // SArray - n += tPutU32v(p ? p + n : p, nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pState->aDFileSet, iDFileSet)); + n += tPutU32v(p ? p + n : p, nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet)); } return n; } -static int32_t tGetFSState(uint8_t *p, STsdbFSState *pState) { - int32_t n = 0; - int8_t hasDel; - uint32_t nDFileSet; - SDFileSet *pSet = &(SDFileSet){0}; - - // SDelFile - n += tGetI8(p + n, &hasDel); - if (hasDel) { - pState->pDelFile = &pState->delFile; - n += tGetDelFile(p + n, pState->pDelFile); - } else { - pState->pDelFile = NULL; - } - - // SArray - taosArrayClear(pState->aDFileSet); - n += tGetU32v(p + n, &nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tGetDFileSet(p + n, pSet); - taosArrayPush(pState->aDFileSet, pSet); - } - - return n; -} - -static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { +static int32_t tsdbGnrtCurrent(STsdb *pTsdb, STsdbFS *pFS, char *fname) { int32_t code = 0; int64_t n; int64_t size; - uint8_t *pData; + uint8_t *pData = NULL; TdFilePtr pFD = NULL; // to binary - size = tPutFSState(NULL, pState) + sizeof(TSCKSUM); + size = tsdbEncodeFS(NULL, pFS) + sizeof(TSCKSUM); pData = taosMemoryMalloc(size); if (pData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - n = tPutFSState(pData, pState); + n = tsdbEncodeFS(pData, pFS); ASSERT(n + sizeof(TSCKSUM) == size); taosCalcChecksumAppend(0, pData, size); @@ -104,411 +78,267 @@ static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { return code; _err: - tsdbError("tsdb gnrt current failed since %s", tstrerror(code)); + tsdbError("vgId:%d tsdb gnrt current failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); if (pData) taosMemoryFree(pData); return code; } -static int32_t tsdbLoadCurrentState(STsdbFS *pFS, STsdbFSState *pState) { - int32_t code = 0; - int64_t size; - int64_t n; - char fname[TSDB_FILENAME_LEN]; - uint8_t *pData = NULL; - TdFilePtr pFD; +// static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); +// if (pFrom && pTo) { +// bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); - if (!taosCheckExistFile(fname)) { - // create an empry CURRENT file if not exists - code = tsdbGnrtCurrent(fname, pState); - if (code) goto _err; - } else { - // open the file and load - pFD = taosOpenFile(fname, TD_FILE_READ); - if (pFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // head +// if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { +// ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); +// ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); +// } else { +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); +// } - if (taosFStatFile(pFD, &size, NULL) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // data +// if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { +// if (pFrom->pDataF->size > pTo->pDataF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); +// } - pData = taosMemoryMalloc(size); - if (pData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } +// // last +// if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { +// if (pFrom->pLastF->size > pTo->pLastF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); +// } - n = taosReadFile(pFD, pData, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +// // sma +// if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { +// if (pFrom->pSmaF->size > pTo->pSmaF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } +// } else if (pFrom) { +// // head +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); - if (!taosCheckChecksumWhole(pData, size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } +// // data +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); - taosCloseFile(&pFD); +// // last +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); - // decode - tGetFSState(pData, pState); +// // fsm +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; + +// if (pFrom && pTo) { +// if (!tsdbDelFileIsSame(pFrom, pTo)) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } +// } else if (pFrom) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } else { +// // do nothing +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { +// int32_t code = 0; +// int32_t iFrom = 0; +// int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); +// int32_t iTo = 0; +// int32_t nTo = taosArrayGetSize(pTo->aDFileSet); +// SDFileSet *pDFileSetFrom; +// SDFileSet *pDFileSetTo; + +// // SDelFile +// code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); +// if (code) goto _err; + +// // SDFileSet +// while (iFrom < nFrom && iTo < nTo) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); + +// if (pDFileSetFrom->fid == pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); +// if (code) goto _err; + +// iFrom++; +// iTo++; +// } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } else { +// iTo++; +// } +// } + +// while (iFrom < nFrom) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } + +// #if 0 +// // do noting +// while (iTo < nTo) { +// pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); +// code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); +// if (code) goto _err; + +// iTo++; +// } +// #endif + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +void tsdbFSDestroy(STsdbFS *pFS) { + if (pFS->pDelFile) { + taosMemoryFree(pFS->pDelFile); } - if (pData) taosMemoryFree(pData); - return code; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + } -_err: - tsdbError("vgId:%d tsdb load current state failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - if (pData) taosMemoryFree(pData); - return code; + taosArrayDestroy(pFS->aDFileSet); } -static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { int32_t code = 0; + int64_t size; char fname[TSDB_FILENAME_LEN]; - if (pFrom && pTo) { - bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); - - // head - if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { - ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); - ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); - } else { - tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); - taosRemoveFile(fname); - } - - // data - if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { - if (pFrom->pDataF->size > pTo->pDataF->size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); - taosRemoveFile(fname); - } - - // last - if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { - if (pFrom->pLastF->size > pTo->pLastF->size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); - if (code) goto _err; - } - } else { - tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); - taosRemoveFile(fname); - } - - // sma - if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { - if (pFrom->pSmaF->size > pTo->pSmaF->size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); - if (code) goto _err; - } - } else { - tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); - taosRemoveFile(fname); - } - } else if (pFrom) { - // head - tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); - taosRemoveFile(fname); - - // data - tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); - taosRemoveFile(fname); - - // last - tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); - taosRemoveFile(fname); - - // fsm - tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); - taosRemoveFile(fname); - } - - return code; - -_err: - tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { - int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; - - if (pFrom && pTo) { - if (!tsdbDelFileIsSame(pFrom, pTo)) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } - } else if (pFrom) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } else { - // do nothing - } - - return code; - -_err: - tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { - int32_t code = 0; - int32_t iFrom = 0; - int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); - int32_t iTo = 0; - int32_t nTo = taosArrayGetSize(pTo->aDFileSet); - SDFileSet *pDFileSetFrom; - SDFileSet *pDFileSetTo; - // SDelFile - code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); - if (code) goto _err; - - // SDFileSet - while (iFrom < nFrom && iTo < nTo) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); - - if (pDFileSetFrom->fid == pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); - if (code) goto _err; - - iFrom++; - iTo++; - } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; - - iFrom++; - } else { - iTo++; - } - } - - while (iFrom < nFrom) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; - - iFrom++; - } - -#if 0 - // do noting - while (iTo < nTo) { - pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); - code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); - if (code) goto _err; - - iTo++; - } -#endif - - return code; - -_err: - tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static void tsdbFSDestroy(STsdbFS *pFS) { - if (pFS) { - if (pFS->nState) { - taosArrayDestroy(pFS->nState->aDFileSet); - taosMemoryFree(pFS->nState); - } - - if (pFS->cState) { - taosArrayDestroy(pFS->cState->aDFileSet); - taosMemoryFree(pFS->cState); - } - - taosMemoryFree(pFS); - } - // TODO -} - -static int32_t tsdbFSCreate(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - STsdbFS *pFS = NULL; - - pFS = (STsdbFS *)taosMemoryCalloc(1, sizeof(*pFS)); - if (pFS == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->pTsdb = pTsdb; - - pFS->cState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->cState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->cState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->cState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pFS->nState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->nState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->nState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->nState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - *ppFS = pFS; - return code; - -_err: - tsdbError("vgId:%d tsdb fs create failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSDestroy(pFS); - *ppFS = NULL; - return code; -} - -static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { - int32_t code = 0; - STsdb *pTsdb = pFS->pTsdb; - STfs *pTfs = pTsdb->pVnode->pTfs; - int64_t size; - char fname[TSDB_FILENAME_LEN]; - char pHdr[TSDB_FHDR_SIZE]; - TdFilePtr pFD; - - // SDelFile - if (pFS->cState->pDelFile) { - tsdbDelFileName(pTsdb, pFS->cState->pDelFile, fname); + if (pTsdb->fs.pDelFile) { + tsdbDelFileName(pTsdb, pTsdb->fs.pDelFile, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (size != pFS->cState->pDelFile->size) { + if (size != pTsdb->fs.pDelFile->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } - - if (deepScan) { - // TODO - } } // SArray - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); // head ========= - tsdbHeadFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pHeadF, fname); + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (deepScan) { - // TODO + if (size != pSet->pHeadF->size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; } // data ========= - tsdbDataFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pDataF, fname); + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->pDataF->size) { + if (size < pSet->pDataF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->pDataF->size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO + } else if (size > pSet->pDataF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); + if (code) goto _err; } // last =========== - tsdbLastFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pLastF, fname); + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->pLastF->size) { + if (size != pSet->pLastF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->pLastF->size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO } // sma ============= - tsdbSmaFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pSmaF, fname); + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - - if (size < pDFileSet->pSmaF->size) { + if (size < pSet->pSmaF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; - } else if (size > pDFileSet->pSmaF->size) { - ASSERT(0); - // need to rollback the file - } - - if (deepScan) { - // TODO + } else if (size > pSet->pSmaF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); + if (code) goto _err; } } - // remove those invalid files (todo) -#if 0 - STfsDir *tdir; - const STfsFile *pf; - - tdir = tfsOpendir(pTfs, pTsdb->path); - if (tdir == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; + { + // remove those invalid files (todo) } - while ((pf = tfsReaddir(tdir))) { - tfsBasename(pf, fname); - } - - tfsClosedir(tdir); -#endif - return code; _err: @@ -516,7 +346,7 @@ _err: return code; } -static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { +int32_t tDFileSetCmprFn(const void *p1, const void *p2) { if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) { return -1; } else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) { @@ -526,87 +356,372 @@ static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { return 0; } -// EXPOSED APIS ==================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - - // create handle - code = tsdbFSCreate(pTsdb, ppFS); - if (code) goto _err; - - // load current state - code = tsdbLoadCurrentState(*ppFS, (*ppFS)->cState); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } - - // scan and fix FS - code = tsdbScanAndTryFixFS(*ppFS, 0); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } - - return code; - -_err: - *ppFS = NULL; - tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} - -int32_t tsdbFSClose(STsdbFS *pFS) { - int32_t code = 0; - tsdbFSDestroy(pFS); - return code; -} - -int32_t tsdbFSBegin(STsdbFS *pFS) { - int32_t code = 0; +static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) { + int32_t code = 0; + int8_t hasDel; + uint32_t nSet; + int32_t n; // SDelFile - pFS->nState->pDelFile = NULL; - if (pFS->cState->pDelFile) { - pFS->nState->delFile = pFS->cState->delFile; - pFS->nState->pDelFile = &pFS->nState->delFile; + n = 0; + n += tGetI8(pData + n, &hasDel); + if (hasDel) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pTsdb->fs.pDelFile->nRef = 1; + n += tGetDelFile(pData + n, pTsdb->fs.pDelFile); + } else { + pTsdb->fs.pDelFile = NULL; } - // SArray - taosArrayClear(pFS->nState->aDFileSet); - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + // SArray + taosArrayClear(pTsdb->fs.aDFileSet); + n += tGetU32v(pData + n, &nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + SDFileSet fSet; - if (taosArrayPush(pFS->nState->aDFileSet, pDFileSet) == NULL) { + // head + fSet.pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pHeadF->nRef = 1; + + // data + fSet.pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryCalloc(1, sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pLastF->nRef = 1; + + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + fSet.pSmaF->nRef = 1; + + n += tGetDFileSet(pData + n, &fSet); + + if (taosArrayPush(pTsdb->fs.aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } } + ASSERT(n + sizeof(TSCKSUM) == nData); return code; _err: - tsdbError("vgId:%d tsdb fs begin failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSCommit(STsdbFS *pFS) { - int32_t code = 0; - STsdbFSState *pState = pFS->nState; - char tfname[TSDB_FILENAME_LEN]; - char fname[TSDB_FILENAME_LEN]; +// EXPOSED APIS ==================================================================================== +int32_t tsdbFSOpen(STsdb *pTsdb) { + int32_t code = 0; - // need lock (todo) - pFS->nState = pFS->cState; - pFS->cState = pState; + // open handle + pTsdb->fs.pDelFile = NULL; + pTsdb->fs.aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); + if (pTsdb->fs.aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } - snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); + // load fs or keep empty + char fname[TSDB_FILENAME_LEN]; + + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + + if (!taosCheckExistFile(fname)) { + // empty one + code = tsdbGnrtCurrent(pTsdb, &pTsdb->fs, fname); + if (code) goto _err; + } else { + // read + TdFilePtr pFD = taosOpenFile(fname, TD_FILE_READ); + if (pFD == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + int64_t size; + if (taosFStatFile(pFD, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosCloseFile(&pFD); + goto _err; + } + + uint8_t *pData = taosMemoryMalloc(size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + taosCloseFile(&pFD); + goto _err; + } + + int64_t n = taosReadFile(pFD, pData, size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } + + if (!taosCheckChecksumWhole(pData, size)) { + code = TSDB_CODE_FILE_CORRUPTED; + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } + + taosCloseFile(&pFD); + + // recover fs + code = tsdbRecoverFS(pTsdb, pData, size); + if (code) { + taosMemoryFree(pData); + goto _err; + } + + taosMemoryFree(pData); + } + + // scan and fix FS + code = tsdbScanAndTryFixFS(pTsdb); + if (code) goto _err; + + return code; + +_err: + tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbFSClose(STsdb *pTsdb) { + int32_t code = 0; + + if (pTsdb->fs.pDelFile) { + ASSERT(pTsdb->fs.pDelFile->nRef == 1); + taosMemoryFree(pTsdb->fs.pDelFile); + } + + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + + // head + ASSERT(pSet->pHeadF->nRef == 1); + taosMemoryFree(pSet->pHeadF); + + // data + ASSERT(pSet->pDataF->nRef == 1); + taosMemoryFree(pSet->pDataF); + + // last + ASSERT(pSet->pLastF->nRef == 1); + taosMemoryFree(pSet->pLastF); + + // sma + ASSERT(pSet->pSmaF->nRef == 1); + taosMemoryFree(pSet->pSmaF); + } + + taosArrayClear(pTsdb->fs.aDFileSet); + + return code; +} + +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; + + pFS->pDelFile = NULL; + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + if (pTsdb->fs.pDelFile) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + *pFS->pDelFile = *pTsdb->fs.pDelFile; + } + + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pHeadF->nRef = 0; + fSet.pHeadF->commitID = pSet->pHeadF->commitID; + fSet.pHeadF->size = pSet->pHeadF->size; + fSet.pHeadF->offset = pSet->pHeadF->offset; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pDataF->nRef = 0; + fSet.pDataF->commitID = pSet->pDataF->commitID; + fSet.pDataF->size = pSet->pDataF->size; + + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pLastF->nRef = 0; + fSet.pLastF->commitID = pSet->pLastF->commitID; + fSet.pLastF->size = pSet->pLastF->size; + + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + fSet.pSmaF->nRef = 0; + fSet.pSmaF->commitID = pSet->pSmaF->commitID; + fSet.pSmaF->size = pSet->pSmaF->size; + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + } + +_exit: + return code; +} + +int32_t tsdbFSRollback(STsdbFS *pFS) { + int32_t code = 0; + + ASSERT(0); + + return code; + +_err: + return code; +} + +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile) { + int32_t code = 0; + + if (pFS->pDelFile == NULL) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + } + *pFS->pDelFile = *pDelFile; + +_exit: + return code; +} + +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { + int32_t code = 0; + int32_t idx = taosArraySearchIdx(pFS->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); + + if (idx < 0) { + idx = taosArrayGetSize(pFS->aDFileSet); + } else { + SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, idx); + int32_t c = tDFileSetCmprFn(pSet, pDFileSet); + if (c == 0) { + *pDFileSet->pHeadF = *pSet->pHeadF; + *pDFileSet->pDataF = *pSet->pDataF; + *pDFileSet->pLastF = *pSet->pLastF; + *pDFileSet->pSmaF = *pSet->pSmaF; + + goto _exit; + } + } + + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pHeadF = *pSet->pHeadF; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pDataF = *pSet->pDataF; + + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pLastF = *pSet->pLastF; + + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pSmaF = *pSet->pSmaF; + + if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + +_exit: + return code; +} + +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFSNew) { + int32_t code = 0; + char tfname[TSDB_FILENAME_LEN]; + char fname[TSDB_FILENAME_LEN]; + + snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); // gnrt CURRENT.t - code = tsdbGnrtCurrent(tfname, pFS->cState); + code = tsdbGnrtCurrent(pTsdb, pFSNew, tfname); if (code) goto _err; // rename @@ -616,56 +731,295 @@ int32_t tsdbFSCommit(STsdbFS *pFS) { goto _err; } - // apply commit on disk - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; - return code; _err: - tsdbError("vgId:%d tsdb fs commit failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb fs commit phase 1 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSRollback(STsdbFS *pFS) { +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { int32_t code = 0; + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; + // del + if (pFSNew->pDelFile) { + SDelFile *pDelFile = pTsdb->fs.pDelFile; - return code; + if (pDelFile == NULL || (pDelFile->commitID != pFSNew->pDelFile->commitID)) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } -_err: - tsdbError("vgId:%d tsdb fs rollback failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} + *pTsdb->fs.pDelFile = *pFSNew->pDelFile; + pTsdb->fs.pDelFile->nRef = 1; -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile) { - int32_t code = 0; - pState->delFile = *pDelFile; - pState->pDelFile = &pState->delFile; - return code; -} - -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet) { - int32_t code = 0; - int32_t idx = taosArraySearchIdx(pState->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); - - if (idx < 0) { - if (taosArrayPush(pState->aDFileSet, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + if (pDelFile) { + nRef = atomic_sub_fetch_32(&pDelFile->nRef, 1); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pDelFile); + } + } } } else { - SDFileSet *tDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, idx); - int32_t c = tDFileSetCmprFn(pSet, tDFileSet); - if (c == 0) { - taosArraySet(pState->aDFileSet, idx, pSet); - } else { - if (taosArrayInsert(pState->aDFileSet, idx, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + ASSERT(pTsdb->fs.pDelFile == NULL); + } + + // data + int32_t iOld = 0; + int32_t iNew = 0; + while (true) { + int32_t nOld = taosArrayGetSize(pTsdb->fs.aDFileSet); + int32_t nNew = taosArrayGetSize(pFSNew->aDFileSet); + SDFileSet fSet; + int8_t sameDisk; + + if (iOld >= nOld && iNew >= nNew) break; + + SDFileSet *pSetOld = (iOld < nOld) ? taosArrayGet(pTsdb->fs.aDFileSet, iOld) : NULL; + SDFileSet *pSetNew = (iNew < nNew) ? taosArrayGet(pFSNew->aDFileSet, iNew) : NULL; + + if (pSetOld && pSetNew) { + if (pSetOld->fid == pSetNew->fid) { + goto _merge_old_and_new; + } else if (pSetOld->fid < pSetNew->fid) { + goto _remove_old; + } else { + goto _add_new; } + } else if (pSetOld) { + goto _remove_old; + } else { + goto _add_new; + } + + _merge_old_and_new: + sameDisk = ((pSetOld->diskId.level == pSetNew->diskId.level) && (pSetOld->diskId.id == pSetNew->diskId.id)); + + // head + fSet.pHeadF = pSetOld->pHeadF; + if ((!sameDisk) || (pSetOld->pHeadF->commitID != pSetNew->pHeadF->commitID)) { + pSetOld->pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (pSetOld->pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pHeadF = *pSetNew->pHeadF; + pSetOld->pHeadF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pHeadF); + } + } else { + ASSERT(fSet.pHeadF->size == pSetNew->pHeadF->size); + ASSERT(fSet.pHeadF->offset == pSetNew->pHeadF->offset); + } + + // data + fSet.pDataF = pSetOld->pDataF; + if ((!sameDisk) || (pSetOld->pDataF->commitID != pSetNew->pDataF->commitID)) { + pSetOld->pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (pSetOld->pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pDataF = *pSetNew->pDataF; + pSetOld->pDataF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pDataF); + } + } else { + ASSERT(pSetOld->pDataF->size <= pSetNew->pDataF->size); + pSetOld->pDataF->size = pSetNew->pDataF->size; + } + + // last + fSet.pLastF = pSetOld->pLastF; + if ((!sameDisk) || (pSetOld->pLastF->commitID != pSetNew->pLastF->commitID)) { + pSetOld->pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (pSetOld->pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pLastF = *pSetNew->pLastF; + pSetOld->pLastF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pLastF); + } + } else { + ASSERT(pSetOld->pLastF->size == pSetNew->pLastF->size); + } + + // sma + fSet.pSmaF = pSetOld->pSmaF; + if ((!sameDisk) || (pSetOld->pSmaF->commitID != pSetNew->pSmaF->commitID)) { + pSetOld->pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (pSetOld->pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pSmaF = *pSetNew->pSmaF; + pSetOld->pSmaF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pSmaF); + } + } else { + ASSERT(pSetOld->pSmaF->size <= pSetNew->pSmaF->size); + pSetOld->pSmaF->size = pSetNew->pSmaF->size; + } + + if (!sameDisk) { + pSetOld->diskId = pSetNew->diskId; + } + + iOld++; + iNew++; + continue; + + _remove_old: + nRef = atomic_sub_fetch_32(&pSetOld->pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pHeadF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pDataF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pLastF); + } + + nRef = atomic_sub_fetch_32(&pSetOld->pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pSmaF); + } + + taosArrayRemove(pTsdb->fs.aDFileSet, iOld); + continue; + + _add_new: + fSet.diskId = pSetNew->diskId; + fSet.fid = pSetNew->fid; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pHeadF = *pSetNew->pHeadF; + fSet.pHeadF->nRef = 1; + + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pDataF = *pSetNew->pDataF; + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pLastF = *pSetNew->pLastF; + fSet.pLastF->nRef = 1; + + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pSmaF = *pSetNew->pSmaF; + fSet.pSmaF->nRef = 1; + + if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + iOld++; + iNew++; + continue; + } + + return code; + +_err: + tsdbError("vgId:%d tsdb fs commit phase 2 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; + int32_t nRef; + + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + pFS->pDelFile = pTsdb->fs.pDelFile; + if (pFS->pDelFile) { + nRef = atomic_fetch_add_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef > 0); + } + + SDFileSet fSet; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + fSet = *pSet; + + nRef = atomic_fetch_add_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef > 0); + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } } @@ -673,16 +1027,59 @@ _exit: return code; } -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid) { - int32_t idx; +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) { + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - idx = taosArraySearchIdx(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - ASSERT(idx >= 0); - taosArrayRemove(pState->aDFileSet, idx); -} + if (pFS->pDelFile) { + nRef = atomic_sub_fetch_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pFS->pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pFS->pDelFile); + } + } -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState) { return pState->pDelFile; } + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag) { - return (SDFileSet *)taosArraySearch(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, flag); -} + // head + nRef = atomic_sub_fetch_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pHeadF); + } + + // data + nRef = atomic_sub_fetch_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pDataF); + } + + // last + nRef = atomic_sub_fetch_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pLastF); + } + + // sma + nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pSmaF); + } + } + + taosArrayDestroy(pFS->aDFileSet); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 4a41e9fb41..135ee23d44 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -122,21 +122,11 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { // truncate switch (ftype) { - case TSDB_HEAD_FILE: - size = pSet->pHeadF->size; - tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); - tPutHeadFile(hdr, pSet->pHeadF); - break; case TSDB_DATA_FILE: size = pSet->pDataF->size; tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); tPutDataFile(hdr, pSet->pDataF); break; - case TSDB_LAST_FILE: - size = pSet->pLastF->size; - tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); - tPutLastFile(hdr, pSet->pLastF); - break; case TSDB_SMA_FILE: size = pSet->pSmaF->size; tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); @@ -186,6 +176,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { return code; _err: + tsdbError("vgId:%d tsdb rollback file failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } @@ -219,10 +210,8 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { // SDelFile =============================================== void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; - - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, pTsdb->path, - TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); } int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile) { diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 80ba5f0363..52b6e07903 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -605,48 +605,3 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) { tsdbMemTableDestroy(pMemTable); } } - -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem) { - ASSERT(0); - int32_t code = 0; - - // lock - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - - // take snapshot - *ppMem = pTsdb->mem; - *ppIMem = pTsdb->imem; - - if (*ppMem) { - tsdbRefMemTable(*ppMem); - } - - if (*ppIMem) { - tsdbRefMemTable(*ppIMem); - } - - // unlock - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - -_exit: - return code; -} - -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem) { - ASSERT(0); - if (pMem) { - tsdbUnrefMemTable(pMem); - } - - if (pIMem) { - tsdbUnrefMemTable(pIMem); - } -} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index 064c7adf4b..0b355d91b4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -66,7 +66,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee tfsMkdir(pVnode->pTfs, pTsdb->path); // open tsdb - if (tsdbFSOpen(pTsdb, &pTsdb->pFS) < 0) { + if (tsdbFSOpen(pTsdb) < 0) { goto _err; } @@ -88,7 +88,7 @@ _err: int tsdbClose(STsdb **pTsdb) { if (*pTsdb) { taosThreadRwlockDestroy(&(*pTsdb)->rwLock); - tsdbFSClose((*pTsdb)->pFS); + tsdbFSClose(*pTsdb); tsdbCloseCache((*pTsdb)->lruCache); taosMemoryFreeClear(*pTsdb); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 3375dd69ba..27afd7d0c5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -118,8 +118,7 @@ struct STsdbReader { char* idStr; // query info handle, for debug purpose int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; - SMemTable* pMem; - SMemTable* pIMem; + STsdbReadSnap* pReadSnap; SIOCostSummary cost; STSchema* pSchema; @@ -275,12 +274,12 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap } // init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) { - size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet); +static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) { + size_t numOfFileset = taosArrayGetSize(aDFileSet); pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset; pIter->order = order; - pIter->pFileList = taosArrayDup(pFState->aDFileSet); + pIter->pFileList = aDFileSet; pIter->numOfFiles = numOfFileset; tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr); @@ -1881,8 +1880,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea int32_t backward = (!ASCENDING_TRAVERSE(pReader->order)); STbData* d = NULL; - if (pReader->pMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pMem, pReader->suid, pBlockScanInfo->uid, &d); + if (pReader->pReadSnap->pMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); if (d != NULL) { code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1902,8 +1901,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea } STbData* di = NULL; - if (pReader->pIMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pIMem, pReader->suid, pBlockScanInfo->uid, &di); + if (pReader->pReadSnap->pIMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); if (di != NULL) { code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1939,7 +1938,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader* pDelFReader = NULL; code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); @@ -2830,8 +2829,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl SDataBlockIter* pBlockIter = &pReader->status.blockIter; - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); // no data in files, let's try buffer in memory @@ -2844,7 +2842,8 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl } } - tsdbTakeMemSnapshot(pReader->pTsdb, &pReader->pMem, &pReader->pIMem); + code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap); + if (code) goto _err; tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr); return code; @@ -2861,7 +2860,7 @@ void tsdbReaderClose(STsdbReader* pReader) { SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - tsdbUntakeMemSnapshot(pReader->pTsdb, pReader->pMem, pReader->pIMem); + tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap); taosMemoryFreeClear(pSupInfo->plist); taosMemoryFree(pSupInfo->colIds); @@ -3081,8 +3080,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { tsdbDataFReaderClose(&pReader->pFileReader); - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); resetDataBlockScanInfo(pReader->status.pTableMap); @@ -3275,6 +3273,11 @@ int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { } // fs (todo) + code = tsdbFSRef(pTsdb, &(*ppSnap)->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _exit; + } // unlock code = taosThreadRwlockUnlock(&pTsdb->rwLock); @@ -3297,6 +3300,6 @@ void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { tsdbUnrefMemTable(pSnap->pIMem); } - // fs (todo) + tsdbFSUnref(pTsdb, &pSnap->fs); } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 137ef9a4a6..77ca49e33e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -16,7 +16,8 @@ #include "tsdb.h" static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t *canDo) { - int32_t code = 0; + int32_t code = 0; +#if 0 STsdbFSState *pState; if (try) { @@ -64,18 +65,20 @@ static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t code = tsdbDFileSetCopy(pTsdb, pDFileSet, &nDFileSet); if (code) goto _exit; - code = tsdbFSStateUpsertDFileSet(pState, &nDFileSet); + code = tsdbFSUpsertFSet(pState, &nDFileSet); if (code) goto _exit; } } } +#endif _exit: return code; } int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { int32_t code = 0; +#if 0 int8_t canDo; // try @@ -100,5 +103,6 @@ _exit: _err: tsdbError("vgId:%d tsdb do retention failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); tsdbFSRollback(pTsdb->pFS); +#endif return code; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index dfb01f2ded..43537c9a8d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -20,6 +20,7 @@ struct STsdbSnapReader { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // for data file int8_t dataDone; int32_t fid; @@ -45,7 +46,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { while (true) { if (pReader->pDataFReader == NULL) { - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->cState, pReader->fid, TD_GT); + SDFileSet* pSet = + taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); if (pSet == NULL) goto _exit; @@ -159,7 +161,7 @@ _err: static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pTsdb->pFS->cState->pDelFile; + SDelFile* pDelFile = pReader->fs.pDelFile; if (pReader->pDelFReader == NULL) { if (pDelFile == NULL) { @@ -254,6 +256,24 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapRe pReader->sver = sver; pReader->ever = ever; + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + + code = tsdbFSRef(pTsdb, &pReader->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + pReader->fid = INT32_MIN; pReader->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pReader->aBlockIdx == NULL) { @@ -305,6 +325,8 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { taosArrayDestroy(pReader->aDelIdx); taosArrayDestroy(pReader->aDelData); + tsdbFSUnref(pReader->pTsdb, &pReader->fs); + tsdbInfo("vgId:%d vnode snapshot tsdb reader closed", TD_VID(pReader->pTsdb->pVnode)); taosMemoryFree(pReader); @@ -358,6 +380,7 @@ struct STsdbSnapWriter { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // config int32_t minutes; @@ -798,7 +821,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL); if (code) goto _err; - code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, &pWriter->pDataFWriter->wSet); + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); if (code) goto _err; code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); @@ -843,7 +866,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 pWriter->fid = fid; // read - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, fid, TD_EQ); + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); if (pSet) { code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); if (code) goto _err; @@ -911,7 +934,7 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 STsdb* pTsdb = pWriter->pTsdb; if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->nState); + SDelFile* pDelFile = pWriter->fs.pDelFile; // reader if (pDelFile) { @@ -1021,7 +1044,7 @@ static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pWriter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); @@ -1055,6 +1078,9 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->sver = sver; pWriter->ever = ever; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + if (code) goto _err; + // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; @@ -1100,9 +1126,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr goto _err; } - code = tsdbFSBegin(pTsdb->pFS); - if (code) goto _err; - *ppWriter = pWriter; return code; @@ -1117,8 +1140,9 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { STsdbSnapWriter* pWriter = *ppWriter; if (rollback) { - code = tsdbFSRollback(pWriter->pTsdb->pFS); - if (code) goto _err; + ASSERT(0); + // code = tsdbFSRollback(pWriter->pTsdb->pFS); + // if (code) goto _err; } else { code = tsdbSnapWriteDataEnd(pWriter); if (code) goto _err; @@ -1126,7 +1150,10 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { code = tsdbSnapWriteDelEnd(pWriter); if (code) goto _err; - code = tsdbFSCommit(pWriter->pTsdb->pFS); + code = tsdbFSCommit1(pWriter->pTsdb, &pWriter->fs); + if (code) goto _err; + + code = tsdbFSCommit2(pWriter->pTsdb, &pWriter->fs); if (code) goto _err; }