1210 lines
34 KiB
C
1210 lines
34 KiB
C
/*
|
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
|
*
|
|
* This program is free software: you can use, redistribute, and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3
|
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "crypt.h"
|
|
#include "tdbInt.h"
|
|
#include "tglobal.h"
|
|
|
|
struct hashset_st {
|
|
size_t nbits;
|
|
size_t mask;
|
|
size_t capacity;
|
|
size_t *items;
|
|
size_t nitems;
|
|
double load_factor;
|
|
};
|
|
|
|
static const unsigned int prime = 39;
|
|
static const unsigned int prime2 = 5009;
|
|
|
|
static hashset_t hashset_create(void) {
|
|
hashset_t set = tdbOsCalloc(1, sizeof(struct hashset_st));
|
|
if (!set) {
|
|
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
|
return NULL;
|
|
}
|
|
|
|
set->nbits = 4;
|
|
set->capacity = (size_t)(1 << set->nbits);
|
|
set->items = tdbOsCalloc(set->capacity, sizeof(size_t));
|
|
if (!set->items) {
|
|
tdbOsFree(set);
|
|
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
|
return NULL;
|
|
}
|
|
set->mask = set->capacity - 1;
|
|
set->nitems = 0;
|
|
|
|
set->load_factor = 0.75;
|
|
|
|
return set;
|
|
}
|
|
|
|
void hashset_destroy(hashset_t set) {
|
|
if (set) {
|
|
tdbOsFree(set->items);
|
|
tdbOsFree(set);
|
|
}
|
|
}
|
|
|
|
static int hashset_add_member(hashset_t set, void *item) {
|
|
size_t value = (size_t)item;
|
|
size_t h;
|
|
|
|
if (value == 0) {
|
|
return -1;
|
|
}
|
|
|
|
for (h = set->mask & (prime * value); set->items[h] != 0; h = set->mask & (h + prime2)) {
|
|
if (set->items[h] == value) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
set->items[h] = value;
|
|
++set->nitems;
|
|
return 1;
|
|
}
|
|
|
|
static int hashset_add(hashset_t set, void *item) {
|
|
int ret = hashset_add_member(set, item);
|
|
|
|
size_t old_capacity = set->capacity;
|
|
if (set->nitems >= (double)old_capacity * set->load_factor) {
|
|
size_t *old_items = set->items;
|
|
++set->nbits;
|
|
set->capacity = (size_t)(1 << set->nbits);
|
|
set->mask = set->capacity - 1;
|
|
|
|
set->items = tdbOsCalloc(set->capacity, sizeof(size_t));
|
|
if (!set->items) {
|
|
return -1;
|
|
}
|
|
|
|
set->nitems = 0;
|
|
for (size_t i = 0; i < old_capacity; ++i) {
|
|
(void)hashset_add_member(set, (void *)old_items[i]);
|
|
}
|
|
tdbOsFree(old_items);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int hashset_remove(hashset_t set, void *item) {
|
|
size_t value = (size_t)item;
|
|
|
|
for (size_t h = set->mask & (prime * value); set->items[h] != 0; h = set->mask & (h + prime2)) {
|
|
if (set->items[h] == value) {
|
|
set->items[h] = 0;
|
|
--set->nitems;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int hashset_contains(hashset_t set, void *item) {
|
|
size_t value = (size_t)item;
|
|
|
|
for (size_t h = set->mask & (prime * value); set->items[h] != 0; h = set->mask & (h + prime2)) {
|
|
if (set->items[h] == value) {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define TDB_PAGE_INITIALIZED(pPage) ((pPage)->pPager != NULL)
|
|
|
|
static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage *, void *, int), void *arg,
|
|
u8 loadPage);
|
|
static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage);
|
|
static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage);
|
|
|
|
static FORCE_INLINE int32_t pageCmpFn(const SRBTreeNode *lhs, const SRBTreeNode *rhs) {
|
|
SPage *pPageL = (SPage *)(((uint8_t *)lhs) - offsetof(SPage, node));
|
|
SPage *pPageR = (SPage *)(((uint8_t *)rhs) - offsetof(SPage, node));
|
|
|
|
SPgno pgnoL = TDB_PAGE_PGNO(pPageL);
|
|
SPgno pgnoR = TDB_PAGE_PGNO(pPageR);
|
|
|
|
if (pgnoL < pgnoR) {
|
|
return -1;
|
|
} else if (pgnoL > pgnoR) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager) {
|
|
uint8_t *pPtr;
|
|
SPager *pPager;
|
|
int fsize;
|
|
int zsize;
|
|
int ret;
|
|
|
|
*ppPager = NULL;
|
|
|
|
fsize = strlen(fileName);
|
|
zsize = sizeof(*pPager) /* SPager */
|
|
+ fsize + 1 /* dbFileName */
|
|
+ fsize + 8 + 1; /* jFileName */
|
|
pPtr = (uint8_t *)tdbOsCalloc(1, zsize);
|
|
if (pPtr == NULL) {
|
|
return TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
|
|
pPager = (SPager *)pPtr;
|
|
pPtr += sizeof(*pPager);
|
|
// pPager->dbFileName
|
|
pPager->dbFileName = (char *)pPtr;
|
|
memcpy(pPager->dbFileName, fileName, fsize);
|
|
pPager->dbFileName[fsize] = '\0';
|
|
pPtr += fsize + 1;
|
|
// pPager->jFileName
|
|
pPager->jFileName = (char *)pPtr;
|
|
memcpy(pPager->jFileName, fileName, fsize);
|
|
memcpy(pPager->jFileName + fsize, "-journal", 8);
|
|
pPager->jFileName[fsize + 8] = '\0';
|
|
// pPager->pCache
|
|
pPager->pCache = pCache;
|
|
|
|
pPager->fd = tdbOsOpen(pPager->dbFileName, TDB_O_CREAT | TDB_O_RDWR, 0755);
|
|
if (TDB_FD_INVALID(pPager->fd)) {
|
|
// if (pPager->fd < 0) {
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
ret = tdbGnrtFileID(pPager->fd, pPager->fid, false);
|
|
if (ret < 0) {
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
// pPager->jfd = -1;
|
|
pPager->pageSize = tdbPCacheGetPageSize(pCache);
|
|
// pPager->dbOrigSize
|
|
ret = tdbGetFileSize(pPager->fd, pPager->pageSize, &(pPager->dbOrigSize));
|
|
pPager->dbFileSize = pPager->dbOrigSize;
|
|
|
|
tdbTrace("pager/open reset dirty tree: %p", &pPager->rbt);
|
|
tRBTreeCreate(&pPager->rbt, pageCmpFn);
|
|
|
|
*ppPager = pPager;
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerClose(SPager *pPager) {
|
|
if (pPager) {
|
|
(void)tdbOsClose(pPager->fd);
|
|
tdbOsFree(pPager);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerWrite(SPager *pPager, SPage *pPage) {
|
|
int ret;
|
|
SPage **ppPage;
|
|
|
|
if (pPage->isDirty) return 0;
|
|
|
|
// ref page one more time so the page will not be release
|
|
(void)tdbRefPage(pPage);
|
|
tdbTrace("pager/mdirty page %p/%d/%d", pPage, TDB_PAGE_PGNO(pPage), pPage->id);
|
|
|
|
// Set page as dirty
|
|
pPage->isDirty = 1;
|
|
|
|
tdbTrace("tdb/pager-write: put page: %p %d to dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt);
|
|
(void)tRBTreePut(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
|
|
// Write page to journal if neccessary
|
|
if (TDB_PAGE_PGNO(pPage) <= pPager->dbOrigSize &&
|
|
(pPager->pActiveTxn->jPageSet == NULL ||
|
|
!hashset_contains(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))))) {
|
|
ret = tdbPagerWritePageToJournal(pPager, pPage);
|
|
if (ret < 0) {
|
|
tdbError("failed to write page to journal since %s", tstrerror(ret));
|
|
return ret;
|
|
}
|
|
|
|
if (pPager->pActiveTxn->jPageSet) {
|
|
(void)hashset_add(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerBegin(SPager *pPager, TXN *pTxn) {
|
|
/*
|
|
if (pPager->inTran) {
|
|
return 0;
|
|
}
|
|
*/
|
|
// Open the journal
|
|
char jTxnFileName[TDB_FILENAME_LEN];
|
|
sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId);
|
|
pTxn->jfd = tdbOsOpen(jTxnFileName, TDB_O_CREAT | TDB_O_RDWR, 0755);
|
|
if (TDB_FD_INVALID(pTxn->jfd)) {
|
|
tdbError("failed to open file due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
pTxn->jPageSet = hashset_create();
|
|
if (pTxn->jPageSet == NULL) {
|
|
return terrno;
|
|
}
|
|
|
|
pPager->pActiveTxn = pTxn;
|
|
|
|
tdbDebug("pager/begin: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId);
|
|
|
|
// TODO: write the size of the file
|
|
/*
|
|
pPager->inTran = 1;
|
|
*/
|
|
return 0;
|
|
}
|
|
/*
|
|
int tdbPagerCancelDirty(SPager *pPager, SPage *pPage, TXN *pTxn) {
|
|
SRBTreeNode *pNode = tRBTreeGet(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
if (pNode) {
|
|
pPage->isDirty = 0;
|
|
|
|
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
if (pTxn->jPageSet) {
|
|
hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
|
|
}
|
|
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
*/
|
|
int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
|
|
SPage *pPage;
|
|
int ret;
|
|
|
|
// sync the journal file
|
|
ret = tdbOsFSync(pTxn->jfd);
|
|
if (ret < 0) {
|
|
tdbError("failed to fsync: %s. jFileName:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
// loop to write the dirty pages to file
|
|
SRBTreeIter iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
SRBTreeNode *pNode = NULL;
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
|
|
if (pPage->nOverflow != 0) {
|
|
tdbError("tdb/pager-commit: %p, pPage: %p, ovfl: %d, commit page failed.", pPager, pPage, pPage->nOverflow);
|
|
return TSDB_CODE_INVALID_DATA_FMT;
|
|
}
|
|
|
|
ret = tdbPagerPWritePageToDB(pPager, pPage);
|
|
if (ret < 0) {
|
|
tdbError("failed to write page to db since %s", tstrerror(terrno));
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
tdbDebug("pager/commit: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId);
|
|
|
|
pPager->dbOrigSize = pPager->dbFileSize;
|
|
|
|
// release the page
|
|
iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
|
|
pPage->isDirty = 0;
|
|
|
|
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
if (pTxn->jPageSet) {
|
|
(void)hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
|
|
}
|
|
|
|
tdbTrace("tdb/pager-commit: remove page: %p %d from dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt);
|
|
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
}
|
|
|
|
tdbTrace("tdb/pager-commit reset dirty tree: %p", &pPager->rbt);
|
|
tRBTreeCreate(&pPager->rbt, pageCmpFn);
|
|
|
|
// sync the db file
|
|
if (tdbOsFSync(pPager->fd) < 0) {
|
|
tdbError("failed to fsync fd due to %s. file:%s", strerror(errno), pPager->dbFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerPostCommit(SPager *pPager, TXN *pTxn) {
|
|
char jTxnFileName[TDB_FILENAME_LEN];
|
|
sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId);
|
|
|
|
// remove the journal file
|
|
if (tdbOsClose(pTxn->jfd) < 0) {
|
|
tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) {
|
|
tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
// pPager->inTran = 0;
|
|
|
|
tdbDebug("pager/post-commit:%p, %d/%d", pPager, pPager->dbOrigSize, pPager->dbFileSize);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) {
|
|
SPage *pPage;
|
|
SPgno maxPgno = pPager->dbOrigSize;
|
|
int ret;
|
|
|
|
// sync the journal file
|
|
ret = tdbOsFSync(pTxn->jfd);
|
|
if (ret < 0) {
|
|
tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
// loop to write the dirty pages to file
|
|
SRBTreeIter iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
SRBTreeNode *pNode = NULL;
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
if (pPage->isLocal) continue;
|
|
|
|
SPgno pgno = TDB_PAGE_PGNO(pPage);
|
|
if (pgno > maxPgno) {
|
|
maxPgno = pgno;
|
|
}
|
|
ret = tdbPagerPWritePageToDB(pPager, pPage);
|
|
if (ret < 0) {
|
|
tdbError("failed to write page to db since %s", tstrerror(terrno));
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
tdbTrace("tdbttl commit:%p, %d/%d", pPager, pPager->dbOrigSize, pPager->dbFileSize);
|
|
pPager->dbOrigSize = maxPgno;
|
|
// pPager->dbOrigSize = pPager->dbFileSize;
|
|
|
|
// release the page
|
|
iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
if (pPage->isLocal) continue;
|
|
pPage->isDirty = 0;
|
|
|
|
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static char *tdbEncryptPage(SPager *pPager, char *pPageData, int32_t pageSize, const char *function, int64_t offset) {
|
|
int32_t encryptAlgorithm = pPager->pEnv->encryptAlgorithm;
|
|
char *encryptKey = pPager->pEnv->encryptKey;
|
|
|
|
char *buf = pPageData;
|
|
|
|
if (encryptAlgorithm == DND_CA_SM4) {
|
|
// tdbInfo("CBC_Encrypt key:%d %s %s", encryptAlgorithm, encryptKey, __FUNCTION__);
|
|
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d before Encrypt", offset, pPage->pData[0]);
|
|
|
|
buf = taosMemoryMalloc(pageSize);
|
|
if (buf == NULL) {
|
|
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
|
return NULL;
|
|
}
|
|
|
|
unsigned char packetData[128];
|
|
|
|
int32_t count = 0;
|
|
while (count < pageSize) {
|
|
SCryptOpts opts = {0};
|
|
opts.len = 128;
|
|
opts.source = pPageData + count;
|
|
opts.result = packetData;
|
|
opts.unitLen = 128;
|
|
strncpy(opts.key, encryptKey, ENCRYPT_KEY_LEN);
|
|
|
|
int32_t newLen = CBC_Encrypt(&opts);
|
|
|
|
memcpy(buf + count, packetData, newLen);
|
|
count += newLen;
|
|
}
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", Encrypt count:%d %s", offset, count, function);
|
|
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d after Encrypt", offset, (uint8_t)buf[0]);
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
void tdbFreeEncryptBuf(SPager *pPager, char *buf) {
|
|
int32_t encryptAlgorithm = pPager->pEnv->encryptAlgorithm;
|
|
if (encryptAlgorithm == DND_CA_SM4) taosMemoryFreeClear(buf);
|
|
}
|
|
// recovery dirty pages
|
|
int tdbPagerAbort(SPager *pPager, TXN *pTxn) {
|
|
SPage *pPage;
|
|
int pgIdx;
|
|
SPgno journalSize = 0;
|
|
int ret;
|
|
|
|
if (pTxn->jfd == 0) {
|
|
// txn is commited
|
|
return 0;
|
|
}
|
|
|
|
// sync the journal file
|
|
ret = tdbOsFSync(pTxn->jfd);
|
|
if (ret < 0) {
|
|
tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdb_fd_t jfd = pTxn->jfd;
|
|
|
|
ret = tdbGetFileSize(jfd, pPager->pageSize, &journalSize);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
if (tdbOsLSeek(jfd, 0L, SEEK_SET) < 0) {
|
|
tdbError("failed to lseek jfd due to %s. file:%s, offset:0", strerror(errno), pPager->dbFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
u8 *pageBuf = tdbOsCalloc(1, pPager->pageSize);
|
|
if (pageBuf == NULL) {
|
|
return terrno = TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
|
|
tdbDebug("pager/abort: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId);
|
|
|
|
for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) {
|
|
// read pgno & the page from journal
|
|
SPgno pgno;
|
|
|
|
int ret = tdbOsRead(jfd, &pgno, sizeof(pgno));
|
|
if (ret < 0) {
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbTrace("pager/abort: restore pgno:%d,", pgno);
|
|
|
|
tdbPCacheInvalidatePage(pPager->pCache, pPager, pgno);
|
|
|
|
ret = tdbOsRead(jfd, pageBuf, pPager->pageSize);
|
|
if (ret < 0) {
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
i64 offset = pPager->pageSize * (pgno - 1);
|
|
if (tdbOsLSeek(pPager->fd, offset, SEEK_SET) < 0) {
|
|
tdbError("failed to lseek fd due to %s. file:%s, offset:%" PRId64, strerror(errno), pPager->dbFileName, offset);
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
char *buf = tdbEncryptPage(pPager, pageBuf, pPager->pageSize, __FUNCTION__, offset);
|
|
if (buf == NULL) {
|
|
return terrno;
|
|
}
|
|
|
|
ret = tdbOsWrite(pPager->fd, buf, pPager->pageSize);
|
|
if (ret < 0) {
|
|
tdbError("failed to write buf due to %s. file: %s, bufsize:%d", strerror(errno), pPager->dbFileName,
|
|
pPager->pageSize);
|
|
tdbFreeEncryptBuf(pPager, buf);
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbFreeEncryptBuf(pPager, buf);
|
|
}
|
|
|
|
if (tdbOsFSync(pPager->fd) < 0) {
|
|
tdbError("failed to fsync fd due to %s. dbfile:%s", strerror(errno), pPager->dbFileName);
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbOsFree(pageBuf);
|
|
|
|
// 3, release the dirty pages
|
|
SRBTreeIter iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
SRBTreeNode *pNode = NULL;
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
SPgno pgno = TDB_PAGE_PGNO(pPage);
|
|
|
|
tdbTrace("pager/abort: drop dirty pgno:%d,", pgno);
|
|
|
|
pPage->isDirty = 0;
|
|
|
|
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
(void)hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
|
|
tdbPCacheMarkFree(pPager->pCache, pPage);
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
}
|
|
|
|
tdbTrace("pager/abort: reset dirty tree: %p", &pPager->rbt);
|
|
tRBTreeCreate(&pPager->rbt, pageCmpFn);
|
|
|
|
// 4, remove the journal file
|
|
if (tdbOsClose(pTxn->jfd) < 0) {
|
|
tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
char jTxnFileName[TDB_FILENAME_LEN];
|
|
sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId);
|
|
|
|
if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) {
|
|
tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
// pPager->inTran = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerFlushPage(SPager *pPager, TXN *pTxn) {
|
|
SPage *pPage;
|
|
i32 nRef;
|
|
SPgno maxPgno = pPager->dbOrigSize;
|
|
int ret;
|
|
|
|
// loop to write the dirty pages to file
|
|
SRBTreeIter iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
SRBTreeNode *pNode = NULL;
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
nRef = tdbGetPageRef(pPage);
|
|
if (nRef > 1) {
|
|
continue;
|
|
}
|
|
|
|
SPgno pgno = TDB_PAGE_PGNO(pPage);
|
|
if (pgno > maxPgno) {
|
|
maxPgno = pgno;
|
|
}
|
|
ret = tdbPagerPWritePageToDB(pPager, pPage);
|
|
if (ret < 0) {
|
|
tdbError("failed to write page to db since %s", tstrerror(terrno));
|
|
return ret;
|
|
}
|
|
|
|
tdbTrace("tdb/flush:%p, pgno:%d, %d/%d/%d", pPager, pgno, pPager->dbOrigSize, pPager->dbFileSize, maxPgno);
|
|
pPager->dbOrigSize = maxPgno;
|
|
|
|
pPage->isDirty = 0;
|
|
|
|
tdbTrace("pager/flush drop page: %p, pgno:%d, from dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt);
|
|
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
|
|
break;
|
|
}
|
|
|
|
tdbDebug("pager/flush: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId);
|
|
|
|
/*
|
|
tdbTrace("tdb/flush:%p, %d/%d/%d", pPager, pPager->dbOrigSize, pPager->dbFileSize, maxPgno);
|
|
pPager->dbOrigSize = maxPgno;
|
|
|
|
// release the page
|
|
iter = tRBTreeIterCreate(&pPager->rbt, 1);
|
|
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
|
|
pPage = (SPage *)pNode;
|
|
nRef = tdbGetPageRef(pPage);
|
|
if (nRef > 1) {
|
|
continue;
|
|
}
|
|
|
|
pPage->isDirty = 0;
|
|
|
|
tdbTrace("pager/flush drop page: %p %d from dirty tree: %p", pPage, TDB_PAGE_PGNO(pPage), &pPager->rbt);
|
|
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
}
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
static int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno, TXN *pTxn);
|
|
|
|
int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPage)(SPage *, void *, int), void *arg,
|
|
TXN *pTxn) {
|
|
SPage *pPage;
|
|
SPgid pgid;
|
|
int ret;
|
|
SPgno pgno;
|
|
u8 loadPage;
|
|
|
|
pgno = *ppgno;
|
|
loadPage = 1;
|
|
|
|
// alloc new page
|
|
if (pgno == 0) {
|
|
loadPage = 0;
|
|
ret = tdbPagerAllocPage(pPager, &pgno, pTxn);
|
|
if (ret < 0) {
|
|
tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
if (pgno == 0) {
|
|
tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno);
|
|
return TSDB_CODE_INVALID_DATA_FMT;
|
|
}
|
|
|
|
// fetch a page container
|
|
memcpy(&pgid, pPager->fid, TDB_FILE_ID_LEN);
|
|
pgid.pgno = pgno;
|
|
while ((pPage = tdbPCacheFetch(pPager->pCache, &pgid, pTxn)) == NULL) {
|
|
(void)tdbPagerFlushPage(pPager, pTxn);
|
|
}
|
|
|
|
tdbTrace("tdbttl fetch pager:%p", pPage->pPager);
|
|
// init page if need
|
|
if (!TDB_PAGE_INITIALIZED(pPage)) {
|
|
ret = tdbPagerInitPage(pPager, pPage, initPage, arg, loadPage);
|
|
if (ret < 0) {
|
|
tdbError("tdb/pager: %p, pPage: %p, init page failed.", pPager, pPage);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
// printf("thread %" PRId64 " pager fetch page %d pgno %d ppage %p\n", taosGetSelfPthreadId(), pPage->id,
|
|
// TDB_PAGE_PGNO(pPage), pPage);
|
|
|
|
if (!TDB_PAGE_INITIALIZED(pPage)) {
|
|
tdbError("tdb/pager: %p, pPage: %p, fetch page uninited.", pPager, pPage);
|
|
return TSDB_CODE_INVALID_DATA_FMT;
|
|
}
|
|
if (pPage->pPager != pPager) {
|
|
tdbError("tdb/pager: %p/%p, fetch page failed.", pPager, pPage->pPager);
|
|
return TSDB_CODE_INVALID_DATA_FMT;
|
|
}
|
|
|
|
*ppgno = pgno;
|
|
*ppPage = pPage;
|
|
return 0;
|
|
}
|
|
|
|
void tdbPagerReturnPage(SPager *pPager, SPage *pPage, TXN *pTxn) {
|
|
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
|
|
// printf("thread %" PRId64 " pager retun page %d pgno %d ppage %p\n", taosGetSelfPthreadId(), pPage->id,
|
|
// TDB_PAGE_PGNO(pPage), pPage);
|
|
}
|
|
|
|
int tdbPagerInsertFreePage(SPager *pPager, SPage *pPage, TXN *pTxn) {
|
|
int code = 0;
|
|
SPgno pgno = TDB_PAGE_PGNO(pPage);
|
|
|
|
if (pPager->frps) {
|
|
if (taosArrayPush(pPager->frps, &pgno) == NULL) {
|
|
return TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
pPage->pPager = NULL;
|
|
return code;
|
|
}
|
|
|
|
pPager->frps = taosArrayInit(8, sizeof(SPgno));
|
|
if (pPager->frps == NULL) {
|
|
return TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
// memset(pPage->pData, 0, pPage->pageSize);
|
|
tdbTrace("tdb/insert-free-page: tbc recycle page: %d.", pgno);
|
|
// printf("tdb/insert-free-page: tbc recycle page: %d.\n", pgno);
|
|
code = tdbTbInsert(pPager->pEnv->pFreeDb, &pgno, sizeof(pgno), NULL, 0, pTxn);
|
|
if (code < 0) {
|
|
tdbError("tdb/insert-free-page: tb insert failed with ret: %d.", code);
|
|
taosArrayDestroy(pPager->frps);
|
|
pPager->frps = NULL;
|
|
return code;
|
|
}
|
|
|
|
while (TARRAY_SIZE(pPager->frps) > 0) {
|
|
pgno = *(SPgno *)taosArrayPop(pPager->frps);
|
|
|
|
code = tdbTbInsert(pPager->pEnv->pFreeDb, &pgno, sizeof(pgno), NULL, 0, pTxn);
|
|
if (code < 0) {
|
|
tdbError("tdb/insert-free-page: tb insert failed with ret: %d.", code);
|
|
taosArrayDestroy(pPager->frps);
|
|
pPager->frps = NULL;
|
|
return code;
|
|
}
|
|
}
|
|
|
|
taosArrayDestroy(pPager->frps);
|
|
pPager->frps = NULL;
|
|
|
|
pPage->pPager = NULL;
|
|
|
|
return code;
|
|
}
|
|
|
|
static int tdbPagerRemoveFreePage(SPager *pPager, SPgno *pPgno, TXN *pTxn) {
|
|
int code = 0;
|
|
TBC *pCur;
|
|
|
|
if (!pPager->pEnv->pFreeDb) {
|
|
return code;
|
|
}
|
|
|
|
if (pPager->frps) {
|
|
return code;
|
|
}
|
|
|
|
code = tdbTbcOpen(pPager->pEnv->pFreeDb, &pCur, pTxn);
|
|
if (code < 0) {
|
|
return code;
|
|
}
|
|
|
|
code = tdbTbcMoveToFirst(pCur);
|
|
if (code) {
|
|
tdbError("tdb/remove-free-page: moveto first failed with ret: %d.", code);
|
|
(void)tdbTbcClose(pCur);
|
|
return 0;
|
|
}
|
|
|
|
void *pKey = NULL;
|
|
int nKey = 0;
|
|
|
|
code = tdbTbcGet(pCur, (const void **)&pKey, &nKey, NULL, NULL);
|
|
if (code < 0) {
|
|
// tdbError("tdb/remove-free-page: tbc get failed with ret: %d.", code);
|
|
(void)tdbTbcClose(pCur);
|
|
return 0;
|
|
}
|
|
|
|
*pPgno = *(SPgno *)pKey;
|
|
tdbTrace("tdb/remove-free-page: tbc get page: %d.", *pPgno);
|
|
// printf("tdb/remove-free-page: tbc get page: %d.\n", *pPgno);
|
|
|
|
code = tdbTbcDelete(pCur);
|
|
if (code < 0) {
|
|
tdbError("tdb/remove-free-page: tbc delete failed with ret: %d.", code);
|
|
(void)tdbTbcClose(pCur);
|
|
return 0;
|
|
}
|
|
(void)tdbTbcClose(pCur);
|
|
return 0;
|
|
}
|
|
|
|
static int tdbPagerAllocFreePage(SPager *pPager, SPgno *ppgno, TXN *pTxn) {
|
|
// Allocate a page from the free list
|
|
return tdbPagerRemoveFreePage(pPager, ppgno, pTxn);
|
|
}
|
|
|
|
static int tdbPagerAllocNewPage(SPager *pPager, SPgno *ppgno) {
|
|
*ppgno = ++pPager->dbFileSize;
|
|
// tdbError("tdb/alloc-new-page: %d.", *ppgno);
|
|
return 0;
|
|
}
|
|
|
|
static int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno, TXN *pTxn) {
|
|
int ret;
|
|
|
|
*ppgno = 0;
|
|
|
|
// Try to allocate from the free list of the pager
|
|
ret = tdbPagerAllocFreePage(pPager, ppgno, pTxn);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
if (*ppgno != 0) return 0;
|
|
|
|
// Allocate the page by extending the pager
|
|
ret = tdbPagerAllocNewPage(pPager, ppgno);
|
|
if (ret < 0) {
|
|
return -1;
|
|
}
|
|
|
|
if (*ppgno == 0) {
|
|
tdbError("tdb/pager:%p, alloc new page failed.", pPager);
|
|
return TSDB_CODE_FAILED;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage *, void *, int), void *arg,
|
|
u8 loadPage) {
|
|
int ret;
|
|
int lcode;
|
|
int nLoops;
|
|
i64 nRead = 0;
|
|
SPgno pgno = 0;
|
|
int init = 0;
|
|
|
|
lcode = TDB_TRY_LOCK_PAGE(pPage);
|
|
if (lcode == P_LOCK_SUCC) {
|
|
if (TDB_PAGE_INITIALIZED(pPage)) {
|
|
(void)TDB_UNLOCK_PAGE(pPage);
|
|
return 0;
|
|
}
|
|
|
|
pgno = TDB_PAGE_PGNO(pPage);
|
|
|
|
tdbTrace("tdb/pager:%p, pgno:%d, loadPage:%d, size:%d", pPager, pgno, loadPage, pPager->dbOrigSize);
|
|
if (loadPage && pgno <= pPager->dbOrigSize) {
|
|
init = 1;
|
|
|
|
nRead = tdbOsPRead(pPager->fd, pPage->pData, pPage->pageSize, ((i64)pPage->pageSize) * (pgno - 1));
|
|
tdbTrace("tdb/pager:%p, pgno:%d, nRead:%" PRId64, pPager, pgno, nRead);
|
|
if (nRead < pPage->pageSize) {
|
|
tdbError("tdb/pager:%p, pgno:%d, nRead:%" PRId64 "pgSize:%" PRId32, pPager, pgno, nRead, pPage->pageSize);
|
|
(void)TDB_UNLOCK_PAGE(pPage);
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
int32_t encryptAlgorithm = pPager->pEnv->encryptAlgorithm;
|
|
char *encryptKey = pPager->pEnv->encryptKey;
|
|
|
|
if (encryptAlgorithm == DND_CA_SM4) {
|
|
// tdbInfo("CBC_Decrypt key:%d %s %s", encryptAlgorithm, encryptKey, __FUNCTION__);
|
|
|
|
// uint8_t flags = pPage->pData[0];
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d before Decrypt", ((i64)pPage->pageSize) * (pgno - 1), flags);
|
|
|
|
unsigned char packetData[128];
|
|
|
|
int32_t count = 0;
|
|
while (count < pPage->pageSize) {
|
|
SCryptOpts opts = {0};
|
|
opts.len = 128;
|
|
opts.source = pPage->pData + count;
|
|
opts.result = packetData;
|
|
opts.unitLen = 128;
|
|
strncpy(opts.key, encryptKey, ENCRYPT_KEY_LEN);
|
|
|
|
int newLen = CBC_Decrypt(&opts);
|
|
|
|
memcpy(pPage->pData + count, packetData, newLen);
|
|
count += newLen;
|
|
}
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", Decrypt count:%d %s", ((i64)pPage->pageSize) * (pgno - 1), count,
|
|
// __FUNCTION__);
|
|
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d after Decrypt %s", ((i64)pPage->pageSize) * (pgno - 1),
|
|
// pPage->pData[0], __FUNCTION__);
|
|
}
|
|
} else {
|
|
init = 0;
|
|
}
|
|
|
|
// tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d initPage %s", ((i64)pPage->pageSize) * (pgno - 1), pPage->pData[0],
|
|
// __FUNCTION__);
|
|
|
|
ret = (*initPage)(pPage, arg, init);
|
|
if (ret < 0) {
|
|
tdbError("tdb/pager:%p, pgno:%d, nRead:%" PRId64 "pgSize:%" PRId32 " init page failed.", pPager, pgno, nRead,
|
|
pPage->pageSize);
|
|
(void)TDB_UNLOCK_PAGE(pPage);
|
|
return ret;
|
|
}
|
|
|
|
tmemory_barrier();
|
|
|
|
pPage->pPager = pPager;
|
|
|
|
(void)TDB_UNLOCK_PAGE(pPage);
|
|
} else if (lcode == P_LOCK_BUSY) {
|
|
nLoops = 0;
|
|
for (;;) {
|
|
if (TDB_PAGE_INITIALIZED(pPage)) break;
|
|
nLoops++;
|
|
if (nLoops > 1000) {
|
|
(void)sched_yield();
|
|
nLoops = 0;
|
|
}
|
|
}
|
|
} else {
|
|
tdbError("tdb/pager:%p, pgno:%d, nRead:%" PRId64 "pgSize:%" PRId32 " lock page failed.", pPager, pgno, nRead,
|
|
pPage->pageSize);
|
|
return TSDB_CODE_FAILED;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// ---------------------------- Journal manipulation
|
|
static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage) {
|
|
int ret;
|
|
SPgno pgno;
|
|
|
|
pgno = TDB_PAGE_PGNO(pPage);
|
|
|
|
ret = tdbOsWrite(pPager->pActiveTxn->jfd, &pgno, sizeof(pgno));
|
|
if (ret < 0) {
|
|
tdbError("failed to write pgno due to %s. file:%s, pgno:%u, txnId:%" PRId64, strerror(errno), pPager->jFileName,
|
|
pgno, pPager->pActiveTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
ret = tdbOsWrite(pPager->pActiveTxn->jfd, pPage->pData, pPage->pageSize);
|
|
if (ret < 0) {
|
|
tdbError("failed to write page data due to %s. file:%s, pageSize:%d, txnId:%" PRId64, strerror(errno),
|
|
pPager->jFileName, pPage->pageSize, pPager->pActiveTxn->txnId);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
/*
|
|
static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage) {
|
|
i64 offset;
|
|
int ret;
|
|
|
|
offset = (i64)pPage->pageSize * (TDB_PAGE_PGNO(pPage) - 1);
|
|
if (tdbOsLSeek(pPager->fd, offset, SEEK_SET) < 0) {
|
|
tdbError("failed to lseek due to %s. file:%s, offset:%" PRId64, strerror(errno), pPager->dbFileName, offset);
|
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
|
return -1;
|
|
}
|
|
|
|
ret = tdbOsWrite(pPager->fd, pPage->pData, pPage->pageSize);
|
|
if (ret < 0) {
|
|
tdbError("failed to write page data due to %s. file:%s, pageSize:%d", strerror(errno), pPager->dbFileName,
|
|
pPage->pageSize);
|
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
*/
|
|
static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage) {
|
|
i64 offset;
|
|
int ret;
|
|
|
|
offset = (i64)pPage->pageSize * (TDB_PAGE_PGNO(pPage) - 1);
|
|
|
|
char *buf = tdbEncryptPage(pPager, pPage->pData, pPage->pageSize, __FUNCTION__, offset);
|
|
|
|
ret = tdbOsPWrite(pPager->fd, buf, pPage->pageSize, offset);
|
|
if (ret < 0) {
|
|
tdbFreeEncryptBuf(pPager, buf);
|
|
tdbError("failed to pwrite page data due to %s. file:%s, pageSize:%d", strerror(errno), pPager->dbFileName,
|
|
pPage->pageSize);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbFreeEncryptBuf(pPager, buf);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int tdbPagerRestore(SPager *pPager, const char *jFileName) {
|
|
int ret = 0;
|
|
SPgno journalSize = 0;
|
|
u8 *pageBuf = NULL;
|
|
|
|
tdb_fd_t jfd = tdbOsOpen(jFileName, TDB_O_RDWR, 0755);
|
|
if (jfd == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
ret = tdbGetFileSize(jfd, pPager->pageSize, &journalSize);
|
|
if (ret < 0) {
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
if (tdbOsLSeek(jfd, 0L, SEEK_SET) < 0) {
|
|
tdbError("failed to lseek jfd due to %s. file:%s, offset:0", strerror(errno), pPager->dbFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
pageBuf = tdbOsCalloc(1, pPager->pageSize);
|
|
if (pageBuf == NULL) {
|
|
return TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
|
|
tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName);
|
|
|
|
for (int pgIndex = 0; pgIndex < journalSize; ++pgIndex) {
|
|
// read pgno & the page from journal
|
|
SPgno pgno;
|
|
|
|
int ret = tdbOsRead(jfd, &pgno, sizeof(pgno));
|
|
if (ret < 0) {
|
|
tdbOsFree(pageBuf);
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbTrace("pager/restore: restore pgno:%d,", pgno);
|
|
|
|
ret = tdbOsRead(jfd, pageBuf, pPager->pageSize);
|
|
if (ret < 0) {
|
|
tdbOsFree(pageBuf);
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
i64 offset = pPager->pageSize * (pgno - 1);
|
|
if (tdbOsLSeek(pPager->fd, offset, SEEK_SET) < 0) {
|
|
tdbError("failed to lseek fd due to %s. file:%s, offset:%" PRId64, strerror(errno), pPager->dbFileName, offset);
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
char *buf = tdbEncryptPage(pPager, pageBuf, pPager->pageSize, __FUNCTION__, offset);
|
|
if (buf == NULL) {
|
|
return terrno;
|
|
}
|
|
|
|
ret = tdbOsWrite(pPager->fd, buf, pPager->pageSize);
|
|
if (ret < 0) {
|
|
tdbError("failed to write buf due to %s. file: %s, bufsize:%d", strerror(errno), pPager->dbFileName,
|
|
pPager->pageSize);
|
|
tdbFreeEncryptBuf(pPager, buf);
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbFreeEncryptBuf(pPager, buf);
|
|
}
|
|
|
|
if (tdbOsFSync(pPager->fd) < 0) {
|
|
tdbError("failed to fsync fd due to %s. dbfile:%s", strerror(errno), pPager->dbFileName);
|
|
tdbOsFree(pageBuf);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
tdbOsFree(pageBuf);
|
|
|
|
if (tdbOsClose(jfd) < 0) {
|
|
tdbError("failed to close jfd due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
if (tdbOsRemove(jFileName) < 0 && errno != ENOENT) {
|
|
tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int32_t txnIdCompareDesc(const void *pLeft, const void *pRight) {
|
|
int64_t lhs = *(int64_t *)pLeft;
|
|
int64_t rhs = *(int64_t *)pRight;
|
|
return lhs > rhs ? -1 : 1;
|
|
}
|
|
|
|
int tdbPagerRestoreJournals(SPager *pPager) {
|
|
int32_t code = 0;
|
|
tdbDirEntryPtr pDirEntry;
|
|
tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName);
|
|
if (pDir == NULL) {
|
|
tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno));
|
|
return TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
SArray *pTxnList = taosArrayInit(16, sizeof(int64_t));
|
|
if (pTxnList == NULL) {
|
|
return TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
|
|
while ((pDirEntry = tdbReadDir(pDir)) != NULL) {
|
|
char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry));
|
|
if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) {
|
|
int64_t txnId = -1;
|
|
(void)sscanf(name, TDB_MAINDB_NAME "-journal.%" PRId64, &txnId);
|
|
if (taosArrayPush(pTxnList, &txnId) == NULL) {
|
|
return TSDB_CODE_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
}
|
|
taosArraySort(pTxnList, txnIdCompareDesc);
|
|
for (int i = 0; i < TARRAY_SIZE(pTxnList); ++i) {
|
|
int64_t *pTxnId = taosArrayGet(pTxnList, i);
|
|
char jname[TD_PATH_MAX] = {0};
|
|
int dirLen = strlen(pPager->pEnv->dbName);
|
|
memcpy(jname, pPager->pEnv->dbName, dirLen);
|
|
jname[dirLen] = '/';
|
|
sprintf(jname + dirLen + 1, TDB_MAINDB_NAME "-journal.%" PRId64, *pTxnId);
|
|
code = tdbPagerRestore(pPager, jname);
|
|
if (code) {
|
|
taosArrayDestroy(pTxnList);
|
|
(void)tdbCloseDir(&pDir);
|
|
tdbError("failed to restore file due to %s. jFileName:%s", tstrerror(code), jname);
|
|
return code;
|
|
}
|
|
}
|
|
|
|
taosArrayDestroy(pTxnList);
|
|
(void)tdbCloseDir(&pDir);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tdbPagerRollback(SPager *pPager) {
|
|
tdbDirEntryPtr pDirEntry;
|
|
tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName);
|
|
if (pDir == NULL) {
|
|
tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno));
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
|
|
while ((pDirEntry = tdbReadDir(pDir)) != NULL) {
|
|
char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry));
|
|
|
|
if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) {
|
|
char jname[TD_PATH_MAX] = {0};
|
|
int dirLen = strlen(pPager->pEnv->dbName);
|
|
memcpy(jname, pPager->pEnv->dbName, dirLen);
|
|
jname[dirLen] = '/';
|
|
memcpy(jname + dirLen + 1, name, strlen(name));
|
|
if (tdbOsRemove(jname) < 0 && errno != ENOENT) {
|
|
(void)tdbCloseDir(&pDir);
|
|
|
|
tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name);
|
|
return terrno = TAOS_SYSTEM_ERROR(errno);
|
|
}
|
|
}
|
|
}
|
|
|
|
(void)tdbCloseDir(&pDir);
|
|
|
|
return 0;
|
|
}
|