diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index 59f030a60c..8fdac0da7f 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -35,7 +35,7 @@ typedef struct STdbState { TTB* pFillStateDb; // todo refactor TTB* pSessionStateDb; TTB* pParNameDb; - TXN txn; + TXN* txn; } STdbState; // incremental state storage diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 0196dcb0a8..16cf960724 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -620,7 +620,7 @@ typedef struct SStreamMeta { SHashObj* pTasks; SHashObj* pRecoverStatus; void* ahandle; - TXN txn; + TXN* txn; FTaskExpand* expandFunc; int32_t vgId; SRWLatch lock; diff --git a/include/os/osFile.h b/include/os/osFile.h index f6759d19a7..ae77e0f27a 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -88,6 +88,7 @@ int32_t taosFsyncFile(TdFilePtr pFile); int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count); int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset); int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count); +int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset); void taosFprintfFile(TdFilePtr pFile, const char *format, ...); int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf); diff --git a/source/dnode/vnode/src/inc/meta.h b/source/dnode/vnode/src/inc/meta.h index dbfe9e1671..3999aa0b7f 100644 --- a/source/dnode/vnode/src/inc/meta.h +++ b/source/dnode/vnode/src/inc/meta.h @@ -70,7 +70,7 @@ int32_t metaCacheDrop(SMeta* pMeta, int64_t uid); int32_t metaStatsCacheUpsert(SMeta* pMeta, SMetaStbStats* pInfo); int32_t metaStatsCacheDrop(SMeta* pMeta, int64_t uid); int32_t metaStatsCacheGet(SMeta* pMeta, int64_t uid, SMetaStbStats* pInfo); -void metaUpdateStbStats(SMeta *pMeta, int64_t uid, int64_t delta); +void metaUpdateStbStats(SMeta* pMeta, int64_t uid, int64_t delta); int32_t metaUidFilterCacheGet(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, LRUHandle** pHandle); struct SMeta { @@ -79,7 +79,7 @@ struct SMeta { char* path; SVnode* pVnode; TDB* pEnv; - TXN txn; + TXN* txn; TTB* pTbDb; TTB* pSkmDb; TTB* pUidIdx; diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 15e64ffaed..28797c5361 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -77,7 +77,7 @@ void vnodeBufPoolReset(SVBufPool* pPool); // vnodeQuery.c int32_t vnodeQueryOpen(SVnode* pVnode); -void vnodeQueryPreClose(SVnode *pVnode); +void vnodeQueryPreClose(SVnode* pVnode); void vnodeQueryClose(SVnode* pVnode); int32_t vnodeGetTableMeta(SVnode* pVnode, SRpcMsg* pMsg, bool direct); int vnodeGetTableCfg(SVnode* pVnode, SRpcMsg* pMsg, bool direct); @@ -86,7 +86,6 @@ int32_t vnodeGetBatchMeta(SVnode* pVnode, SRpcMsg* pMsg); // vnodeCommit.c int32_t vnodeBegin(SVnode* pVnode); int32_t vnodeShouldCommit(SVnode* pVnode); -int32_t vnodeCommit(SVnode* pVnode); void vnodeRollback(SVnode* pVnode); int32_t vnodeSaveInfo(const char* dir, const SVnodeInfo* pCfg); int32_t vnodeCommitInfo(const char* dir, const SVnodeInfo* pInfo); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 16941c523d..124b0b1215 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -75,6 +75,7 @@ typedef struct SStreamStateWriter SStreamStateWriter; typedef struct SRSmaSnapReader SRSmaSnapReader; typedef struct SRSmaSnapWriter SRSmaSnapWriter; typedef struct SSnapDataHdr SSnapDataHdr; +typedef struct SCommitInfo SCommitInfo; #define VNODE_META_DIR "meta" #define VNODE_TSDB_DIR "tsdb" @@ -100,8 +101,9 @@ typedef struct STbUidStore STbUidStore; int metaOpen(SVnode* pVnode, SMeta** ppMeta, int8_t rollback); int metaClose(SMeta* pMeta); int metaBegin(SMeta* pMeta, int8_t fromSys); -int metaCommit(SMeta* pMeta); -int metaFinishCommit(SMeta* pMeta); +TXN* metaGetTxn(SMeta* pMeta); +int metaCommit(SMeta* pMeta, TXN* txn); +int metaFinishCommit(SMeta* pMeta, TXN* txn); int metaPrepareAsyncCommit(SMeta* pMeta); int metaCreateSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); int metaAlterSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); @@ -146,7 +148,8 @@ int32_t metaGetInfo(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo, SMetaReader* pR int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback); int tsdbClose(STsdb** pTsdb); int32_t tsdbBegin(STsdb* pTsdb); -int32_t tsdbCommit(STsdb* pTsdb); +int32_t tsdbPrepareCommit(STsdb* pTsdb); +int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbFinishCommit(STsdb* pTsdb); int32_t tsdbRollbackCommit(STsdb* pTsdb); int32_t tsdbDoRetention(STsdb* pTsdb, int64_t now); @@ -203,8 +206,8 @@ int32_t smaBegin(SSma* pSma); int32_t smaSyncPreCommit(SSma* pSma); int32_t smaSyncCommit(SSma* pSma); int32_t smaSyncPostCommit(SSma* pSma); -int32_t smaPreCommit(SSma* pSma); -int32_t smaCommit(SSma* pSma); +int32_t smaPrepareAsyncCommit(SSma* pSma); +int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo); int32_t smaFinishCommit(SSma* pSma); int32_t smaPostCommit(SSma* pSma); int32_t smaDoRetention(SSma* pSma, int64_t now); @@ -405,6 +408,12 @@ struct SSnapDataHdr { uint8_t data[]; }; +struct SCommitInfo { + SVnodeInfo info; + SVnode* pVnode; + TXN* txn; +}; + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/meta/metaCommit.c b/source/dnode/vnode/src/meta/metaCommit.c index 0be0c3e407..b2cd85e9fa 100644 --- a/source/dnode/vnode/src/meta/metaCommit.c +++ b/source/dnode/vnode/src/meta/metaCommit.c @@ -20,12 +20,19 @@ static FORCE_INLINE void metaFree(void *pPool, void *p) { vnodeBufPoolFree((SVB // begin a meta txn int metaBegin(SMeta *pMeta, int8_t fromSys) { + void *(*xMalloc)(void *, size_t); + void (*xFree)(void *, void *); + void *xArg = NULL; + if (fromSys) { - tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + xMalloc = tdbDefaultMalloc; + xFree = tdbDefaultFree; } else { - tdbTxnOpen(&pMeta->txn, 0, metaMalloc, metaFree, pMeta->pVnode->inUse, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + xMalloc = metaMalloc; + xFree = metaFree; + xArg = pMeta->pVnode->inUse; } - if (tdbBegin(pMeta->pEnv, &pMeta->txn) < 0) { + if (tdbBegin(pMeta->pEnv, &pMeta->txn, xMalloc, xFree, xArg, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } @@ -33,9 +40,10 @@ int metaBegin(SMeta *pMeta, int8_t fromSys) { } // commit the meta txn -int metaCommit(SMeta *pMeta) { return tdbCommit(pMeta->pEnv, &pMeta->txn); } -int metaFinishCommit(SMeta *pMeta) { return tdbPostCommit(pMeta->pEnv, &pMeta->txn); } -int metaPrepareAsyncCommit(SMeta *pMeta) { return tdbPrepareAsyncCommit(pMeta->pEnv, &pMeta->txn); } +TXN *metaGetTxn(SMeta *pMeta) { return pMeta->txn; } +int metaCommit(SMeta *pMeta, TXN *txn) { return tdbCommit(pMeta->pEnv, txn); } +int metaFinishCommit(SMeta *pMeta, TXN *txn) { return tdbPostCommit(pMeta->pEnv, txn); } +int metaPrepareAsyncCommit(SMeta *pMeta) { return tdbPrepareAsyncCommit(pMeta->pEnv, pMeta->txn); } // abort the meta txn -int metaAbort(SMeta *pMeta) { return tdbAbort(pMeta->pEnv, &pMeta->txn); } +int metaAbort(SMeta *pMeta) { return tdbAbort(pMeta->pEnv, pMeta->txn); } diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index 52452bf710..8d5821e28b 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -117,7 +117,7 @@ static int metaSaveSmaToDB(SMeta *pMeta, const SMetaEntry *pME) { tEncoderClear(&coder); // write to table.db - if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, &pMeta->txn) < 0) { + if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, pMeta->txn) < 0) { goto _err; } @@ -131,17 +131,17 @@ _err: static int metaUpdateUidIdx(SMeta *pMeta, const SMetaEntry *pME) { SUidIdxVal uidIdxVal = {.suid = pME->smaEntry.tsma->indexUid, .version = pME->version, .skmVer = 0}; - return tdbTbInsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), &pMeta->txn); + return tdbTbInsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), pMeta->txn); } static int metaUpdateNameIdx(SMeta *pMeta, const SMetaEntry *pME) { - return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), &pMeta->txn); + return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), pMeta->txn); } static int metaUpdateSmaIdx(SMeta *pMeta, const SMetaEntry *pME) { SSmaIdxKey smaIdxKey = {.uid = pME->smaEntry.tsma->tableUid, .smaUid = pME->smaEntry.tsma->indexUid}; - return tdbTbInsert(pMeta->pSmaIdx, &smaIdxKey, sizeof(smaIdxKey), NULL, 0, &pMeta->txn); + return tdbTbInsert(pMeta->pSmaIdx, &smaIdxKey, sizeof(smaIdxKey), NULL, 0, pMeta->txn); } static int metaHandleSmaEntry(SMeta *pMeta, const SMetaEntry *pME) { diff --git a/source/dnode/vnode/src/meta/metaSnapshot.c b/source/dnode/vnode/src/meta/metaSnapshot.c index 5c5b49ece5..6a4dcf6ead 100644 --- a/source/dnode/vnode/src/meta/metaSnapshot.c +++ b/source/dnode/vnode/src/meta/metaSnapshot.c @@ -163,9 +163,9 @@ int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback) { if (rollback) { ASSERT(0); } else { - code = metaCommit(pWriter->pMeta); + code = metaCommit(pWriter->pMeta, pWriter->pMeta->txn); if (code) goto _err; - code = metaFinishCommit(pWriter->pMeta); + code = metaFinishCommit(pWriter->pMeta, pWriter->pMeta->txn); if (code) goto _err; } taosMemoryFree(pWriter); diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index 5c97ee5633..0b00f036de 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -261,7 +261,7 @@ int metaDropSTable(SMeta *pMeta, int64_t verison, SVDropStbReq *pReq, SArray *tb // drop all child tables TBC *pCtbIdxc = NULL; - tdbTbcOpen(pMeta->pCtbIdx, &pCtbIdxc, &pMeta->txn); + tdbTbcOpen(pMeta->pCtbIdx, &pCtbIdxc, NULL); rc = tdbTbcMoveTo(pCtbIdxc, &(SCtbIdxKey){.suid = pReq->suid, .uid = INT64_MIN}, sizeof(SCtbIdxKey), &c); if (rc < 0) { tdbTbcClose(pCtbIdxc); @@ -295,10 +295,10 @@ int metaDropSTable(SMeta *pMeta, int64_t verison, SVDropStbReq *pReq, SArray *tb _drop_super_table: tdbTbGet(pMeta->pUidIdx, &pReq->suid, sizeof(tb_uid_t), &pData, &nData); tdbTbDelete(pMeta->pTbDb, &(STbDbKey){.version = ((SUidIdxVal *)pData)[0].version, .uid = pReq->suid}, - sizeof(STbDbKey), &pMeta->txn); - tdbTbDelete(pMeta->pNameIdx, pReq->name, strlen(pReq->name) + 1, &pMeta->txn); - tdbTbDelete(pMeta->pUidIdx, &pReq->suid, sizeof(tb_uid_t), &pMeta->txn); - tdbTbDelete(pMeta->pSuidIdx, &pReq->suid, sizeof(tb_uid_t), &pMeta->txn); + sizeof(STbDbKey), pMeta->txn); + tdbTbDelete(pMeta->pNameIdx, pReq->name, strlen(pReq->name) + 1, pMeta->txn); + tdbTbDelete(pMeta->pUidIdx, &pReq->suid, sizeof(tb_uid_t), pMeta->txn); + tdbTbDelete(pMeta->pSuidIdx, &pReq->suid, sizeof(tb_uid_t), pMeta->txn); metaULock(pMeta); @@ -321,7 +321,7 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { int32_t ret; int32_t c = -2; - tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn); + tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); ret = tdbTbcMoveTo(pUidIdxc, &pReq->suid, sizeof(tb_uid_t), &c); if (ret < 0 || c) { tdbTbcClose(pUidIdxc); @@ -340,7 +340,7 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { oversion = ((SUidIdxVal *)pData)[0].version; - tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn); + tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); ret = tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = pReq->suid, .version = oversion}), sizeof(STbDbKey), &c); ASSERT(ret == 0 && c == 0); @@ -595,7 +595,7 @@ static int metaDeleteTtlIdx(SMeta *pMeta, const SMetaEntry *pME) { STtlIdxKey ttlKey = {0}; metaBuildTtlIdxKey(&ttlKey, pME); if (ttlKey.dtime == 0) return 0; - return tdbTbDelete(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), &pMeta->txn); + return tdbTbDelete(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), pMeta->txn); } static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { @@ -657,7 +657,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { if (metaCreateTagIdxKey(e.ctbEntry.suid, pTagColumn->colId, pTagData, nTagData, pTagColumn->type, uid, &pTagIdxKey, &nTagIdxKey) == 0) { - tdbTbDelete(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, &pMeta->txn); + tdbTbDelete(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, pMeta->txn); } metaDestroyTagIdxKey(pTagIdxKey); } @@ -667,9 +667,9 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { } } - tdbTbDelete(pMeta->pTbDb, &(STbDbKey){.version = version, .uid = uid}, sizeof(STbDbKey), &pMeta->txn); - tdbTbDelete(pMeta->pNameIdx, e.name, strlen(e.name) + 1, &pMeta->txn); - tdbTbDelete(pMeta->pUidIdx, &uid, sizeof(uid), &pMeta->txn); + tdbTbDelete(pMeta->pTbDb, &(STbDbKey){.version = version, .uid = uid}, sizeof(STbDbKey), pMeta->txn); + tdbTbDelete(pMeta->pNameIdx, e.name, strlen(e.name) + 1, pMeta->txn); + tdbTbDelete(pMeta->pUidIdx, &uid, sizeof(uid), pMeta->txn); if (e.type == TSDB_CHILD_TABLE || e.type == TSDB_NORMAL_TABLE) metaDeleteCtimeIdx(pMeta, &e); if (e.type == TSDB_NORMAL_TABLE) metaDeleteNcolIdx(pMeta, &e); @@ -677,7 +677,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { if (e.type != TSDB_SUPER_TABLE) metaDeleteTtlIdx(pMeta, &e); if (e.type == TSDB_CHILD_TABLE) { - tdbTbDelete(pMeta->pCtbIdx, &(SCtbIdxKey){.suid = e.ctbEntry.suid, .uid = uid}, sizeof(SCtbIdxKey), &pMeta->txn); + tdbTbDelete(pMeta->pCtbIdx, &(SCtbIdxKey){.suid = e.ctbEntry.suid, .uid = uid}, sizeof(SCtbIdxKey), pMeta->txn); --pMeta->pVnode->config.vndStats.numOfCTables; @@ -689,7 +689,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { --pMeta->pVnode->config.vndStats.numOfNTables; pMeta->pVnode->config.vndStats.numOfNTimeSeries -= e.ntbEntry.schemaRow.nCols - 1; } else if (e.type == TSDB_SUPER_TABLE) { - tdbTbDelete(pMeta->pSuidIdx, &e.uid, sizeof(tb_uid_t), &pMeta->txn); + tdbTbDelete(pMeta->pSuidIdx, &e.uid, sizeof(tb_uid_t), pMeta->txn); // drop schema.db (todo) metaStatsCacheDrop(pMeta, uid); @@ -710,7 +710,7 @@ int metaUpdateCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) { if (metaBuildCtimeIdxKey(&ctimeKey, pME) < 0) { return 0; } - return tdbTbInsert(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), NULL, 0, &pMeta->txn); + return tdbTbInsert(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), NULL, 0, pMeta->txn); } int metaDeleteCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) { @@ -718,14 +718,14 @@ int metaDeleteCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) { if (metaBuildCtimeIdxKey(&ctimeKey, pME) < 0) { return 0; } - return tdbTbDelete(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), &pMeta->txn); + return tdbTbDelete(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), pMeta->txn); } int metaUpdateNcolIdx(SMeta *pMeta, const SMetaEntry *pME) { SNcolIdxKey ncolKey = {0}; if (metaBuildNColIdxKey(&ncolKey, pME) < 0) { return 0; } - return tdbTbInsert(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), NULL, 0, &pMeta->txn); + return tdbTbInsert(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), NULL, 0, pMeta->txn); } int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME) { @@ -733,7 +733,7 @@ int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME) { if (metaBuildNColIdxKey(&ncolKey, pME) < 0) { return 0; } - return tdbTbDelete(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), &pMeta->txn); + return tdbTbDelete(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), pMeta->txn); } static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTbReq, STableMetaRsp *pMetaRsp) { @@ -768,7 +768,7 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl // search uid index TBC *pUidIdxc = NULL; - tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn); + tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c); ASSERT(c == 0); @@ -778,7 +778,7 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl // search table.db TBC *pTbDbc = NULL; - tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn); + tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c); ASSERT(c == 0); tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); @@ -959,7 +959,7 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA // search uid index TBC *pUidIdxc = NULL; - tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn); + tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c); ASSERT(c == 0); @@ -972,7 +972,7 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA SDecoder dc2 = {0}; /* get ctbEntry */ - tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn); + tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c); ASSERT(c == 0); tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); @@ -1075,7 +1075,7 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA ASSERT(ctbEntry.ctbEntry.pTags); SCtbIdxKey ctbIdxKey = {.suid = ctbEntry.ctbEntry.suid, .uid = uid}; tdbTbUpsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), ctbEntry.ctbEntry.pTags, - ((STag *)(ctbEntry.ctbEntry.pTags))->len, &pMeta->txn); + ((STag *)(ctbEntry.ctbEntry.pTags))->len, pMeta->txn); metaUidCacheClear(pMeta, ctbEntry.ctbEntry.suid); @@ -1125,7 +1125,7 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p // search uid index TBC *pUidIdxc = NULL; - tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn); + tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL); tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c); ASSERT(c == 0); @@ -1135,7 +1135,7 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p // search table.db TBC *pTbDbc = NULL; - tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn); + tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL); tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c); ASSERT(c == 0); tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData); @@ -1242,7 +1242,7 @@ static int metaSaveToTbDb(SMeta *pMeta, const SMetaEntry *pME) { tEncoderClear(&coder); // write to table.db - if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, &pMeta->txn) < 0) { + if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, pMeta->txn) < 0) { goto _err; } @@ -1265,29 +1265,29 @@ static int metaUpdateUidIdx(SMeta *pMeta, const SMetaEntry *pME) { SUidIdxVal uidIdxVal = {.suid = info.suid, .version = info.version, .skmVer = info.skmVer}; - return tdbTbUpsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), &pMeta->txn); + return tdbTbUpsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), pMeta->txn); } static int metaUpdateSuidIdx(SMeta *pMeta, const SMetaEntry *pME) { - return tdbTbInsert(pMeta->pSuidIdx, &pME->uid, sizeof(tb_uid_t), NULL, 0, &pMeta->txn); + return tdbTbInsert(pMeta->pSuidIdx, &pME->uid, sizeof(tb_uid_t), NULL, 0, pMeta->txn); } static int metaUpdateNameIdx(SMeta *pMeta, const SMetaEntry *pME) { - return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), &pMeta->txn); + return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), pMeta->txn); } static int metaUpdateTtlIdx(SMeta *pMeta, const SMetaEntry *pME) { STtlIdxKey ttlKey = {0}; metaBuildTtlIdxKey(&ttlKey, pME); if (ttlKey.dtime == 0) return 0; - return tdbTbInsert(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), NULL, 0, &pMeta->txn); + return tdbTbInsert(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), NULL, 0, pMeta->txn); } static int metaUpdateCtbIdx(SMeta *pMeta, const SMetaEntry *pME) { SCtbIdxKey ctbIdxKey = {.suid = pME->ctbEntry.suid, .uid = pME->uid}; return tdbTbInsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), pME->ctbEntry.pTags, - ((STag *)(pME->ctbEntry.pTags))->len, &pMeta->txn); + ((STag *)(pME->ctbEntry.pTags))->len, pMeta->txn); } int metaCreateTagIdxKey(tb_uid_t suid, int32_t cid, const void *pTagData, int32_t nTagData, int8_t type, tb_uid_t uid, @@ -1379,7 +1379,7 @@ static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry) { ret = -1; goto end; } - tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, &pMeta->txn); + tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, pMeta->txn); } end: metaDestroyTagIdxKey(pTagIdxKey); @@ -1426,7 +1426,7 @@ static int metaSaveToSkmDb(SMeta *pMeta, const SMetaEntry *pME) { tEncoderInit(&coder, pVal, vLen); tEncodeSSchemaWrapper(&coder, pSW); - if (tdbTbInsert(pMeta->pSkmDb, &skmDbKey, sizeof(skmDbKey), pVal, vLen, &pMeta->txn) < 0) { + if (tdbTbInsert(pMeta->pSkmDb, &skmDbKey, sizeof(skmDbKey), pVal, vLen, pMeta->txn) < 0) { rcode = -1; goto _exit; } diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index a79ae35d79..9748963722 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -23,7 +23,7 @@ static int32_t tdProcessRSmaSyncCommitImpl(SSma *pSma); static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma); #endif static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma); -static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma); +static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo); static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma); static int32_t tdUpdateQTaskInfoFiles(SSma *pSma, SRSmaStat *pRSmaStat); @@ -59,7 +59,7 @@ int32_t smaSyncPostCommit(SSma *pSma) { return tdProcessRSmaSyncPostCommitImpl(p * @param pSma * @return int32_t */ -int32_t smaPreCommit(SSma *pSma) { return tdProcessRSmaAsyncPreCommitImpl(pSma); } +int32_t smaPrepareAsyncCommit(SSma *pSma) { return tdProcessRSmaAsyncPreCommitImpl(pSma); } /** * @brief async commit, only applicable to Rollup SMA @@ -67,7 +67,7 @@ int32_t smaPreCommit(SSma *pSma) { return tdProcessRSmaAsyncPreCommitImpl(pSma); * @param pSma * @return int32_t */ -int32_t smaCommit(SSma *pSma) { return tdProcessRSmaAsyncCommitImpl(pSma); } +int32_t smaCommit(SSma *pSma, SCommitInfo *pInfo) { return tdProcessRSmaAsyncCommitImpl(pSma, pInfo); } /** * @brief async commit, only applicable to Rollup SMA @@ -127,8 +127,8 @@ _exit: } int32_t smaFinishCommit(SSma *pSma) { - int32_t code = 0; - SVnode *pVnode = pSma->pVnode; + int32_t code = 0; + SVnode *pVnode = pSma->pVnode; if (VND_RSMA1(pVnode) && (code = tsdbFinishCommit(VND_RSMA1(pVnode))) < 0) { smaError("vgId:%d, failed to finish commit tsdb rsma1 since %s", TD_VID(pVnode), tstrerror(code)); @@ -378,6 +378,11 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); #endif + // all rsma results are written completely + STsdb *pTsdb = NULL; + if ((pTsdb = VND_RSMA1(pSma->pVnode))) tsdbPrepareCommit(pTsdb); + if ((pTsdb = VND_RSMA2(pSma->pVnode))) tsdbPrepareCommit(pTsdb); + return TSDB_CODE_SUCCESS; } @@ -387,9 +392,9 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { * @param pSma * @return int32_t */ -static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) { - int32_t code = 0; - SVnode *pVnode = pSma->pVnode; +static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { + int32_t code = 0; + SVnode *pVnode = pSma->pVnode; #if 0 SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pSmaEnv); @@ -399,11 +404,11 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) { } #endif - if ((code = tsdbCommit(VND_RSMA1(pVnode))) < 0) { + if ((code = tsdbCommit(VND_RSMA1(pVnode), pInfo)) < 0) { smaError("vgId:%d, failed to commit tsdb rsma1 since %s", TD_VID(pVnode), tstrerror(code)); goto _exit; } - if ((code = tsdbCommit(VND_RSMA2(pVnode))) < 0) { + if ((code = tsdbCommit(VND_RSMA2(pVnode), pInfo)) < 0) { smaError("vgId:%d, failed to commit tsdb rsma2 since %s", TD_VID(pVnode), tstrerror(code)); goto _exit; } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 612e465295..f476f58b56 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -108,24 +108,22 @@ int32_t tqMetaClose(STQ* pTq) { } int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_t vLen) { - TXN txn; - if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + TXN* txn; + + if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < + 0) { return -1; } - if (tdbBegin(pTq->pMetaDB, &txn) < 0) { + if (tdbTbUpsert(pTq->pCheckStore, key, strlen(key), value, vLen, txn) < 0) { return -1; } - if (tdbTbUpsert(pTq->pCheckStore, key, strlen(key), value, vLen, &txn) < 0) { + if (tdbCommit(pTq->pMetaDB, txn) < 0) { return -1; } - if (tdbCommit(pTq->pMetaDB, &txn) < 0) { - return -1; - } - - if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) { + if (tdbPostCommit(pTq->pMetaDB, txn) < 0) { return -1; } @@ -133,25 +131,22 @@ int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_ } int32_t tqMetaDeleteCheckInfo(STQ* pTq, const char* key) { - TXN txn; + TXN* txn; - if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < + 0) { ASSERT(0); } - if (tdbBegin(pTq->pMetaDB, &txn) < 0) { - ASSERT(0); - } - - if (tdbTbDelete(pTq->pCheckStore, key, (int)strlen(key), &txn) < 0) { + if (tdbTbDelete(pTq->pCheckStore, key, (int)strlen(key), txn) < 0) { /*ASSERT(0);*/ } - if (tdbCommit(pTq->pMetaDB, &txn) < 0) { + if (tdbCommit(pTq->pMetaDB, txn) < 0) { ASSERT(0); } - if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) { + if (tdbPostCommit(pTq->pMetaDB, txn) < 0) { ASSERT(0); } @@ -219,25 +214,22 @@ int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle) { ASSERT(0); } - TXN txn; + TXN* txn; - if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < + 0) { ASSERT(0); } - if (tdbBegin(pTq->pMetaDB, &txn) < 0) { + if (tdbTbUpsert(pTq->pExecStore, key, (int)strlen(key), buf, vlen, txn) < 0) { ASSERT(0); } - if (tdbTbUpsert(pTq->pExecStore, key, (int)strlen(key), buf, vlen, &txn) < 0) { + if (tdbCommit(pTq->pMetaDB, txn) < 0) { ASSERT(0); } - if (tdbCommit(pTq->pMetaDB, &txn) < 0) { - ASSERT(0); - } - - if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) { + if (tdbPostCommit(pTq->pMetaDB, txn) < 0) { ASSERT(0); } @@ -247,25 +239,22 @@ int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle) { } int32_t tqMetaDeleteHandle(STQ* pTq, const char* key) { - TXN txn; + TXN* txn; - if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < + 0) { ASSERT(0); } - if (tdbBegin(pTq->pMetaDB, &txn) < 0) { - ASSERT(0); - } - - if (tdbTbDelete(pTq->pExecStore, key, (int)strlen(key), &txn) < 0) { + if (tdbTbDelete(pTq->pExecStore, key, (int)strlen(key), txn) < 0) { /*ASSERT(0);*/ } - if (tdbCommit(pTq->pMetaDB, &txn) < 0) { + if (tdbCommit(pTq->pMetaDB, txn) < 0) { ASSERT(0); } - if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) { + if (tdbPostCommit(pTq->pMetaDB, txn) < 0) { ASSERT(0); } diff --git a/source/dnode/vnode/src/tq/tqSnapshot.c b/source/dnode/vnode/src/tq/tqSnapshot.c index b68763867e..d811d943ed 100644 --- a/source/dnode/vnode/src/tq/tqSnapshot.c +++ b/source/dnode/vnode/src/tq/tqSnapshot.c @@ -129,7 +129,7 @@ struct STqSnapWriter { STQ* pTq; int64_t sver; int64_t ever; - TXN txn; + TXN* txn; }; int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { @@ -146,8 +146,10 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbTxnOpen(&pWriter->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { - ASSERT(0); + if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + code = -1; + taosMemoryFree(pWriter); + goto _err; } *ppWriter = pWriter; @@ -165,11 +167,11 @@ int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { STQ* pTq = pWriter->pTq; if (rollback) { - tdbAbort(pWriter->pTq->pMetaDB, &pWriter->txn); + tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn); } else { - code = tdbCommit(pWriter->pTq->pMetaDB, &pWriter->txn); + code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn); if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaDB, &pWriter->txn); + code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn); if (code) goto _err; } diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 08d5931bc3..b1f00bdf74 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -129,7 +129,7 @@ struct STqSnapWriter { STQ* pTq; int64_t sver; int64_t ever; - TXN txn; + TXN* txn; }; int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { @@ -146,8 +146,10 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbTxnOpen(&pWriter->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { - ASSERT(0); + if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + code = -1; + taosMemoryFree(pWriter); + goto _err; } *ppWriter = pWriter; @@ -165,11 +167,12 @@ int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { STQ* pTq = pWriter->pTq; if (rollback) { + tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn); ASSERT(0); } else { - code = tdbCommit(pWriter->pTq->pMetaDB, &pWriter->txn); + code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn); if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaDB, &pWriter->txn); + code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn); if (code) goto _err; } diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index 31e44a5b6d..305378bc93 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -129,7 +129,7 @@ struct STqSnapWriter { STQ* pTq; int64_t sver; int64_t ever; - TXN txn; + TXN* txn; }; int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) { @@ -146,8 +146,10 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p pWriter->sver = sver; pWriter->ever = ever; - if (tdbTxnOpen(&pWriter->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { - ASSERT(0); + if (tdbBegin(pTq->pMetaStore, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + code = -1; + taosMemoryFree(pWriter); + goto _err; } *ppWriter = pWriter; @@ -165,11 +167,12 @@ int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) { STQ* pTq = pWriter->pTq; if (rollback) { + tdbAbort(pWriter->pTq->pMetaStore, pWriter->txn); ASSERT(0); } else { - code = tdbCommit(pWriter->pTq->pMetaStore, &pWriter->txn); + code = tdbCommit(pWriter->pTq->pMetaStore, pWriter->txn); if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pMetaStore, &pWriter->txn); + code = tdbPostCommit(pWriter->pTq->pMetaStore, pWriter->txn); if (code) goto _err; } diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 391e10e223..906e3b2638 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -93,7 +93,7 @@ typedef struct { SArray *aDelData; // SArray } SCommitter; -static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter); +static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter, SCommitInfo *pInfo); static int32_t tsdbCommitData(SCommitter *pCommitter); static int32_t tsdbCommitDel(SCommitter *pCommitter); static int32_t tsdbCommitCache(SCommitter *pCommitter); @@ -150,18 +150,28 @@ _exit: return code; } -int32_t tsdbCommit(STsdb *pTsdb) { +int32_t tsdbPrepareCommit(STsdb *pTsdb) { + taosThreadRwlockWrlock(&pTsdb->rwLock); + ASSERT(pTsdb->imem == NULL); + pTsdb->imem = pTsdb->mem; + pTsdb->mem = NULL; + taosThreadRwlockUnlock(&pTsdb->rwLock); + + return 0; +} + +int32_t tsdbCommit(STsdb *pTsdb, SCommitInfo *pInfo) { if (!pTsdb) return 0; int32_t code = 0; int32_t lino = 0; SCommitter commith; - SMemTable *pMemTable = pTsdb->mem; + SMemTable *pMemTable = pTsdb->imem; // check if (pMemTable->nRow == 0 && pMemTable->nDel == 0) { taosThreadRwlockWrlock(&pTsdb->rwLock); - pTsdb->mem = NULL; + pTsdb->imem = NULL; taosThreadRwlockUnlock(&pTsdb->rwLock); tsdbUnrefMemTable(pMemTable); @@ -169,7 +179,7 @@ int32_t tsdbCommit(STsdb *pTsdb) { } // start commit - code = tsdbStartCommit(pTsdb, &commith); + code = tsdbStartCommit(pTsdb, &commith, pInfo); TSDB_CHECK_CODE(code, lino, _exit); // commit impl @@ -806,26 +816,21 @@ _exit: } // ---------------------------------------------------------------------------- -static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { +static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter, SCommitInfo *pInfo) { int32_t code = 0; int32_t lino = 0; memset(pCommitter, 0, sizeof(*pCommitter)); - ASSERT(pTsdb->mem && pTsdb->imem == NULL && "last tsdb commit incomplete"); - - taosThreadRwlockWrlock(&pTsdb->rwLock); - pTsdb->imem = pTsdb->mem; - pTsdb->mem = NULL; - taosThreadRwlockUnlock(&pTsdb->rwLock); + ASSERT(pTsdb->imem && "last tsdb commit incomplete"); pCommitter->pTsdb = pTsdb; - pCommitter->commitID = pTsdb->pVnode->state.commitID; + pCommitter->commitID = pInfo->info.state.commitID; pCommitter->minutes = pTsdb->keepCfg.days; pCommitter->precision = pTsdb->keepCfg.precision; - pCommitter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; - pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; - pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - pCommitter->sttTrigger = pTsdb->pVnode->config.sttTrigger; + pCommitter->minRow = pInfo->info.config.tsdbCfg.minRows; + pCommitter->maxRow = pInfo->info.config.tsdbCfg.maxRows; + pCommitter->cmprAlg = pInfo->info.config.tsdbCfg.compression; + pCommitter->sttTrigger = pInfo->info.config.sttTrigger; pCommitter->aTbDataP = tsdbMemTableGetTbDataArray(pTsdb->imem); if (pCommitter->aTbDataP == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 4bdaf8d353..f9a598fec6 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -14,14 +14,14 @@ */ #include "vnd.h" +#include "vnodeInt.h" -#define VND_INFO_FNAME "vnode.json" +#define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" -static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData); -static int vnodeDecodeInfo(uint8_t *pData, SVnodeInfo *pInfo); -static int vnodeCommitImpl(void *arg); -static void vnodeWaitCommit(SVnode *pVnode); +static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData); +static int vnodeDecodeInfo(uint8_t *pData, SVnodeInfo *pInfo); +static int vnodeCommitImpl(SCommitInfo *pInfo); int vnodeBegin(SVnode *pVnode) { // alloc buffer pool @@ -107,7 +107,8 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) { // free info binary taosMemoryFree(data); - vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d", pInfo->config.vgId, fname, pInfo->config.syncCfg.replicaNum); + vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d", pInfo->config.vgId, fname, + pInfo->config.syncCfg.replicaNum); return 0; @@ -185,29 +186,78 @@ _err: return -1; } +static void vnodePrepareCommit(SVnode *pVnode) { + tsem_wait(&pVnode->canCommit); + + tsdbPrepareCommit(pVnode->pTsdb); + metaPrepareAsyncCommit(pVnode->pMeta); + smaPrepareAsyncCommit(pVnode->pSma); + + + vnodeBufPoolUnRef(pVnode->inUse); + pVnode->inUse = NULL; +} +static int32_t vnodeCommitTask(void *arg) { + int32_t code = 0; + + SCommitInfo *pInfo = (SCommitInfo *)arg; + + // commit + code = vnodeCommitImpl(pInfo); + if (code) goto _exit; + + // end commit + tsem_post(&pInfo->pVnode->canCommit); + +_exit: + taosMemoryFree(pInfo); + return code; +} int vnodeAsyncCommit(SVnode *pVnode) { - vnodeWaitCommit(pVnode); + int32_t code = 0; - // vnodeBufPoolSwitch(pVnode); - // tsdbPrepareCommit(pVnode->pTsdb); + // prepare to commit + vnodePrepareCommit(pVnode); - vnodeScheduleTask(vnodeCommitImpl, pVnode); + // schedule the task + pVnode->state.commitTerm = pVnode->state.applyTerm; - return 0; + SCommitInfo *pInfo = (SCommitInfo *)taosMemoryCalloc(1, sizeof(*pInfo)); + if (NULL == pInfo) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + pInfo->info.config = pVnode->config; + pInfo->info.state.committed = pVnode->state.applied; + pInfo->info.state.commitTerm = pVnode->state.applyTerm; + pInfo->info.state.commitID = pVnode->state.commitID; + pInfo->pVnode = pVnode; + pInfo->txn = metaGetTxn(pVnode->pMeta); + vnodeScheduleTask(vnodeCommitTask, pInfo); + +_exit: + if (code) { + vError("vgId:%d %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), + pVnode->state.commitID); + } else { + vDebug("vgId:%d %s done", TD_VID(pVnode), __func__); + } + return code; } int vnodeSyncCommit(SVnode *pVnode) { vnodeAsyncCommit(pVnode); - vnodeWaitCommit(pVnode); - tsem_post(&(pVnode->canCommit)); + tsem_wait(&pVnode->canCommit); + tsem_post(&pVnode->canCommit); return 0; } -int vnodeCommit(SVnode *pVnode) { - int32_t code = 0; - int32_t lino = 0; - SVnodeInfo info = {0}; - char dir[TSDB_FILENAME_LEN]; +static int vnodeCommitImpl(SCommitInfo *pInfo) { + int32_t code = 0; + int32_t lino = 0; + + char dir[TSDB_FILENAME_LEN] = {0}; + SVnode *pVnode = pInfo->pVnode; vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); @@ -218,19 +268,13 @@ int vnodeCommit(SVnode *pVnode) { return -1; } - pVnode->state.commitTerm = pVnode->state.applyTerm; - // save info - info.config = pVnode->config; - info.state.committed = pVnode->state.applied; - info.state.commitTerm = pVnode->state.applyTerm; - info.state.commitID = pVnode->state.commitID; if (pVnode->pTfs) { snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); } else { snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path); } - if (vnodeSaveInfo(dir, &info) < 0) { + if (vnodeSaveInfo(dir, &pInfo->info) < 0) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } @@ -238,23 +282,17 @@ int vnodeCommit(SVnode *pVnode) { // walBeginSnapshot(pVnode->pWal, pVnode->state.applied); syncBeginSnapshot(pVnode->sync, pVnode->state.applied); - code = smaPreCommit(pVnode->pSma); - TSDB_CHECK_CODE(code, lino, _exit); - - vnodeBufPoolUnRef(pVnode->inUse); - pVnode->inUse = NULL; - // commit each sub-system - if (metaCommit(pVnode->pMeta) < 0) { + if (metaCommit(pVnode->pMeta, pInfo->txn) < 0) { code = TSDB_CODE_FAILED; TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbCommit(pVnode->pTsdb); + code = tsdbCommit(pVnode->pTsdb, pInfo); TSDB_CHECK_CODE(code, lino, _exit); if (VND_IS_RSMA(pVnode)) { - code = smaCommit(pVnode->pSma); + code = smaCommit(pVnode->pSma, pInfo); TSDB_CHECK_CODE(code, lino, _exit); } @@ -264,7 +302,7 @@ int vnodeCommit(SVnode *pVnode) { } // commit info - if (vnodeCommitInfo(dir, &info) < 0) { + if (vnodeCommitInfo(dir, &pInfo->info) < 0) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } @@ -277,19 +315,18 @@ int vnodeCommit(SVnode *pVnode) { TSDB_CHECK_CODE(code, lino, _exit); } - if (metaFinishCommit(pVnode->pMeta) < 0) { + if (metaFinishCommit(pVnode->pMeta, pInfo->txn) < 0) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } - pVnode->state.committed = info.state.committed; + pVnode->state.committed = pInfo->info.state.committed; if (smaPostCommit(pVnode->pSma) < 0) { vError("vgId:%d, failed to post-commit sma since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } - // apply the commit (TODO) // walEndSnapshot(pVnode->pWal); syncEndSnapshot(pVnode->sync); @@ -318,20 +355,6 @@ void vnodeRollback(SVnode *pVnode) { (void)taosRemoveFile(tFName); } -static int vnodeCommitImpl(void *arg) { - SVnode *pVnode = (SVnode *)arg; - - // metaCommit(pVnode->pMeta); - tqCommit(pVnode->pTq); - // tsdbCommit(pVnode->pTsdb, ); - - // vnodeBufPoolRecycle(pVnode); - tsem_post(&(pVnode->canCommit)); - return 0; -} - -static FORCE_INLINE void vnodeWaitCommit(SVnode *pVnode) { tsem_wait(&pVnode->canCommit); } - static int vnodeEncodeState(const void *pObj, SJson *pJson) { const SVState *pState = (SVState *)pObj; diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 0ff4a46d44..e09fafb756 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -242,14 +242,14 @@ _err: return NULL; } -void vnodePreClose(SVnode *pVnode) { +void vnodePreClose(SVnode *pVnode) { vnodeQueryPreClose(pVnode); - vnodeSyncPreClose(pVnode); + vnodeSyncPreClose(pVnode); } void vnodeClose(SVnode *pVnode) { if (pVnode) { - vnodeCommit(pVnode); + vnodeSyncCommit(pVnode); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); walClose(pVnode->pWal); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index e8cdf9513f..a34744a1da 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -259,7 +259,7 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWr pWriter->ever = ever; // commit it - code = vnodeCommit(pVnode); + code = vnodeSyncCommit(pVnode); if (code) { taosMemoryFree(pWriter); goto _err; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index ee2e8f6f6d..d8c8a3e1b2 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -292,7 +292,9 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp vnodeProcessAlterConfigReq(pVnode, version, pReq, len, pRsp); break; case TDMT_VND_COMMIT: - goto _do_commit; + vnodeSyncCommit(pVnode); + vnodeBegin(pVnode); + goto _exit; default: vError("vgId:%d, unprocessed msg, %d", TD_VID(pVnode), pMsg->msgType); return -1; @@ -310,13 +312,12 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp // commit if need if (vnodeShouldCommit(pVnode)) { - _do_commit: vInfo("vgId:%d, commit at version %" PRId64, TD_VID(pVnode), version); - // commit current change - if (vnodeCommit(pVnode) < 0) { - vError("vgId:%d, failed to commit vnode since %s.", TD_VID(pVnode), tstrerror(terrno)); - goto _err; - } +#if 0 + vnodeSyncCommit(pVnode); +#else + vnodeAsyncCommit(pVnode); +#endif // start a new one if (vnodeBegin(pVnode) < 0) { @@ -325,6 +326,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } } +_exit: return 0; _err: diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index fc6e1668ba..afad78c5e5 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -69,8 +69,8 @@ _err: } void streamMetaClose(SStreamMeta* pMeta) { - tdbCommit(pMeta->db, &pMeta->txn); - tdbPostCommit(pMeta->db, &pMeta->txn); + tdbCommit(pMeta->db, pMeta->txn); + tdbPostCommit(pMeta->db, pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); @@ -115,7 +115,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, goto FAIL; } - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, &pMeta->txn) < 0) { + if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) { taosHashRemove(pMeta->pTasks, &pTask->taskId, sizeof(int32_t)); ASSERT(0); goto FAIL; @@ -152,7 +152,7 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { tEncodeSStreamTask(&encoder, pTask); tEncoderClear(&encoder); - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, &pMeta->txn) < 0) { + if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { ASSERT(0); return -1; } @@ -220,42 +220,35 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { } int32_t streamMetaBegin(SStreamMeta* pMeta) { - if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < - 0) { - return -1; - } - - if (tdbBegin(pMeta->db, &pMeta->txn) < 0) { + if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } return 0; } int32_t streamMetaCommit(SStreamMeta* pMeta) { - if (tdbCommit(pMeta->db, &pMeta->txn) < 0) { + if (tdbCommit(pMeta->db, pMeta->txn) < 0) { return -1; } - memset(&pMeta->txn, 0, sizeof(TXN)); - if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < - 0) { + if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) { return -1; } - if (tdbBegin(pMeta->db, &pMeta->txn) < 0) { + + if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } return 0; } int32_t streamMetaAbort(SStreamMeta* pMeta) { - if (tdbAbort(pMeta->db, &pMeta->txn) < 0) { + if (tdbAbort(pMeta->db, pMeta->txn) < 0) { return -1; } - memset(&pMeta->txn, 0, sizeof(TXN)); - if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < - 0) { - return -1; - } - if (tdbBegin(pMeta->db, &pMeta->txn) < 0) { + + if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } return 0; diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index ea31347db5..af1d738de0 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -179,8 +179,8 @@ _err: } void streamStateClose(SStreamState* pState) { - tdbCommit(pState->pTdbState->db, &pState->pTdbState->txn); - tdbPostCommit(pState->pTdbState->db, &pState->pTdbState->txn); + tdbCommit(pState->pTdbState->db, pState->pTdbState->txn); + tdbPostCommit(pState->pTdbState->db, pState->pTdbState->txn); tdbTbClose(pState->pTdbState->pStateDb); tdbTbClose(pState->pTdbState->pFuncStateDb); tdbTbClose(pState->pTdbState->pFillStateDb); @@ -192,71 +192,61 @@ void streamStateClose(SStreamState* pState) { } int32_t streamStateBegin(SStreamState* pState) { - if (tdbTxnOpen(&pState->pTdbState->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, - TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { - return -1; - } - - if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn) < 0) { - tdbTxnClose(&pState->pTdbState->txn); + if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + tdbAbort(pState->pTdbState->db, pState->pTdbState->txn); return -1; } return 0; } int32_t streamStateCommit(SStreamState* pState) { - if (tdbCommit(pState->pTdbState->db, &pState->pTdbState->txn) < 0) { + if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) { return -1; } - if (tdbPostCommit(pState->pTdbState->db, &pState->pTdbState->txn) < 0) { + if (tdbPostCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) { return -1; } - memset(&pState->pTdbState->txn, 0, sizeof(TXN)); - if (tdbTxnOpen(&pState->pTdbState->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, - TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { - return -1; - } - if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn) < 0) { + + if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } return 0; } int32_t streamStateAbort(SStreamState* pState) { - if (tdbAbort(pState->pTdbState->db, &pState->pTdbState->txn) < 0) { + if (tdbAbort(pState->pTdbState->db, pState->pTdbState->txn) < 0) { return -1; } - memset(&pState->pTdbState->txn, 0, sizeof(TXN)); - if (tdbTxnOpen(&pState->pTdbState->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, - TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { - return -1; - } - if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn) < 0) { + + if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, + TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } return 0; } int32_t streamStateFuncPut(SStreamState* pState, const STupleKey* key, const void* value, int32_t vLen) { - return tdbTbUpsert(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), value, vLen, &pState->pTdbState->txn); + return tdbTbUpsert(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), value, vLen, pState->pTdbState->txn); } int32_t streamStateFuncGet(SStreamState* pState, const STupleKey* key, void** pVal, int32_t* pVLen) { return tdbTbGet(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), pVal, pVLen); } int32_t streamStateFuncDel(SStreamState* pState, const STupleKey* key) { - return tdbTbDelete(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), &pState->pTdbState->txn); + return tdbTbDelete(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), pState->pTdbState->txn); } // todo refactor int32_t streamStatePut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen) { SStateKey sKey = {.key = *key, .opNum = pState->number}; - return tdbTbUpsert(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), value, vLen, &pState->pTdbState->txn); + return tdbTbUpsert(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), value, vLen, pState->pTdbState->txn); } // todo refactor int32_t streamStateFillPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen) { - return tdbTbUpsert(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), value, vLen, &pState->pTdbState->txn); + return tdbTbUpsert(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), value, vLen, pState->pTdbState->txn); } // todo refactor @@ -273,7 +263,7 @@ int32_t streamStateFillGet(SStreamState* pState, const SWinKey* key, void** pVal // todo refactor int32_t streamStateDel(SStreamState* pState, const SWinKey* key) { SStateKey sKey = {.key = *key, .opNum = pState->number}; - return tdbTbDelete(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), &pState->pTdbState->txn); + return tdbTbDelete(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), pState->pTdbState->txn); } int32_t streamStateClear(SStreamState* pState) { @@ -297,7 +287,7 @@ void streamStateSetNumber(SStreamState* pState, int32_t number) { pState->number // todo refactor int32_t streamStateFillDel(SStreamState* pState, const SWinKey* key) { - return tdbTbDelete(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), &pState->pTdbState->txn); + return tdbTbDelete(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), pState->pTdbState->txn); } int32_t streamStateAddIfNotExist(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen) { @@ -531,7 +521,7 @@ void streamFreeVal(void* val) { tdbFree(val); } int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; return tdbTbUpsert(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), value, vLen, - &pState->pTdbState->txn); + pState->pTdbState->txn); } int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) { @@ -554,7 +544,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; - return tdbTbDelete(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), &pState->pTdbState->txn); + return tdbTbDelete(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), pState->pTdbState->txn); } SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, const SSessionKey* key) { @@ -833,7 +823,7 @@ _end: int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) { tdbTbUpsert(pState->pTdbState->pParNameDb, &groupId, sizeof(int64_t), tbname, TSDB_TABLE_NAME_LEN, - &pState->pTdbState->txn); + pState->pTdbState->txn); return 0; } diff --git a/source/libs/tdb/inc/tdb.h b/source/libs/tdb/inc/tdb.h index 7ab2bc3995..c728e29641 100644 --- a/source/libs/tdb/inc/tdb.h +++ b/source/libs/tdb/inc/tdb.h @@ -17,6 +17,7 @@ #define _TD_TDB_H_ #include "os.h" +#include "tdbOs.h" #ifdef __cplusplus extern "C" { @@ -33,7 +34,8 @@ typedef struct STxn TXN; // TDB int32_t tdbOpen(const char *dbname, int szPage, int pages, TDB **ppDb, int8_t rollback); int32_t tdbClose(TDB *pDb); -int32_t tdbBegin(TDB *pDb, TXN *pTxn); +int32_t tdbBegin(TDB *pDb, TXN **pTxn, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg, + int flags); int32_t tdbCommit(TDB *pDb, TXN *pTxn); int32_t tdbPostCommit(TDB *pDb, TXN *pTxn); int32_t tdbPrepareAsyncCommit(TDB *pDb, TXN *pTxn); @@ -77,12 +79,16 @@ int32_t tdbTxnClose(TXN *pTxn); // other void tdbFree(void *); +typedef struct hashset_st *hashset_t; + struct STxn { int flags; int64_t txnId; void *(*xMalloc)(void *, size_t); void (*xFree)(void *, void *); - void *xArg; + void *xArg; + tdb_fd_t jfd; + hashset_t jPageSet; }; // error code diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index 6a60073426..8c62f89b64 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -69,7 +69,7 @@ static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int dropOfp, TXN * static int tdbBtcMoveDownward(SBTC *pBtc); static int tdbBtcMoveUpward(SBTC *pBtc); -int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, +int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, TDB *pEnv, SBTree **ppBt) { SBTree *pBt; int ret; @@ -106,22 +106,26 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg if (pgno == 0) { // fetch page & insert into main db SPage *pPage; - TXN txn; - tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + TXN *txn; - pPager->inTran = 1; + ret = tdbBegin(pEnv, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + if (ret < 0) { + return -1; + } SBtreeInitPageArg zArg; zArg.flags = 0x1 | 0x2; // root leaf node; zArg.pBt = pBt; - ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, &txn); + ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, txn); if (ret < 0) { + tdbAbort(pEnv, txn); return -1; } ret = tdbPagerWrite(pPager, pPage); if (ret < 0) { tdbError("failed to write page since %s", terrstr()); + tdbAbort(pEnv, txn); return -1; } @@ -130,18 +134,18 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg pBt->info.nLevel = 1; pBt->info.nData = 0; pBt->tbname = (char *)tbname; - // ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pgno, sizeof(SPgno), &txn); - ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pBt->info, sizeof(pBt->info), &txn); + + ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pBt->info, sizeof(pBt->info), txn); if (ret < 0) { + tdbAbort(pEnv, txn); return -1; } } - // tdbUnrefPage(pPage); - tdbPCacheRelease(pPager->pCache, pPage, &txn); - tdbCommit(pPager->pEnv, &txn); - tdbPostCommit(pPager->pEnv, &txn); - tdbTxnClose(&txn); + tdbPCacheRelease(pPager->pCache, pPage, txn); + + tdbCommit(pPager->pEnv, txn); + tdbPostCommit(pPager->pEnv, txn); } ASSERT(pgno != 0); @@ -1534,10 +1538,21 @@ int tdbBtcOpen(SBTC *pBtc, SBTree *pBt, TXN *pTxn) { memset(&pBtc->coder, 0, sizeof(SCellDecoder)); if (pTxn == NULL) { - pBtc->pTxn = &pBtc->txn; - tdbTxnOpen(pBtc->pTxn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0); + TXN *pTxn = tdbOsCalloc(1, sizeof(*pTxn)); + if (!pTxn) { + return -1; + } + + if (tdbTxnOpen(pTxn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + tdbOsFree(pTxn); + return -1; + } + + pBtc->pTxn = pTxn; + pBtc->freeTxn = 1; } else { pBtc->pTxn = pTxn; + pBtc->freeTxn = 0; } return 0; @@ -2231,6 +2246,10 @@ int tdbBtcClose(SBTC *pBtc) { tdbFree(pBtc->coder.pVal); } + if (pBtc->freeTxn) { + tdbTxnClose(pBtc->pTxn); + } + return 0; } diff --git a/source/libs/tdb/src/db/tdbDb.c b/source/libs/tdb/src/db/tdbDb.c index 5aff5b7bb2..c79279c658 100644 --- a/source/libs/tdb/src/db/tdbDb.c +++ b/source/libs/tdb/src/db/tdbDb.c @@ -99,19 +99,37 @@ int tdbClose(TDB *pDb) { int32_t tdbAlter(TDB *pDb, int pages) { return tdbPCacheAlter(pDb->pCache, pages); } -int32_t tdbBegin(TDB *pDb, TXN *pTxn) { +int32_t tdbBegin(TDB *pDb, TXN **ppTxn, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg, + int flags) { SPager *pPager; int ret; + int64_t txnId = ++pDb->txnId; + if (txnId == INT64_MAX) { + pDb->txnId = 0; + } + + TXN *pTxn = tdbOsCalloc(1, sizeof(*pTxn)); + if (!pTxn) { + return -1; + } + + if (tdbTxnOpen(pTxn, txnId, xMalloc, xFree, xArg, flags) < 0) { + tdbOsFree(pTxn); + return -1; + } for (pPager = pDb->pgrList; pPager; pPager = pPager->pNext) { ret = tdbPagerBegin(pPager, pTxn); if (ret < 0) { tdbError("failed to begin pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName, pTxn->txnId); + tdbTxnClose(pTxn); return -1; } } + *ppTxn = pTxn; + return 0; } @@ -144,6 +162,8 @@ int32_t tdbPostCommit(TDB *pDb, TXN *pTxn) { } } + tdbTxnClose(pTxn); + return 0; } @@ -176,6 +196,8 @@ int32_t tdbAbort(TDB *pDb, TXN *pTxn) { } } + tdbTxnClose(pTxn); + return 0; } diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index 016ad65cf3..0653b6fc36 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -69,14 +69,15 @@ int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t) *ppPage = pPage; - tdbDebug("page/create: %p %p", pPage, xMalloc); + tdbTrace("page/create: %p/%d %p", pPage, pPage->id, xMalloc); return 0; } int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) { u8 *ptr; - tdbDebug("page/destroy: %p %p", pPage, xFree); + tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree); + ASSERT(!pPage->isDirty); ASSERT(xFree); for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index ea67986da2..7f0fdf9efc 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -142,7 +142,7 @@ int hashset_contains(hashset_t set, void *item) { static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage *, void *, int), void *arg, u8 loadPage); static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage); -static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage); +static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage); static FORCE_INLINE int32_t pageCmpFn(const SRBTreeNode *lhs, const SRBTreeNode *rhs) { SPage *pPageL = (SPage *)(((uint8_t *)lhs) - offsetof(SPage, node)); @@ -219,80 +219,22 @@ int tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager) { int tdbPagerClose(SPager *pPager) { if (pPager) { + /* if (pPager->inTran) { tdbOsClose(pPager->jfd); } + */ tdbOsClose(pPager->fd); tdbOsFree(pPager); } return 0; } -/* -int tdbPagerOpenDB(SPager *pPager, SPgno *ppgno, bool toCreate, SBTree *pBt) { - SPgno pgno; - SPage *pPage; - int ret; - if (pPager->dbOrigSize > 0) { - pgno = 1; - } else { - pgno = 0; - } - - { - // TODO: try to search the main DB to get the page number - // pgno = 0; - } - - if (pgno == 0 && toCreate) { - // allocate a new child page - TXN txn; - tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0); - - pPager->inTran = 1; - - SBtreeInitPageArg zArg; - zArg.flags = 0x1 | 0x2; // root leaf node; - zArg.pBt = pBt; - ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, &txn); - if (ret < 0) { - return -1; - } - - // ret = tdbPagerAllocPage(pPager, &pPage, &pgno); - // if (ret < 0) { - // return -1; - //} - - // TODO: Need to zero the page - - ret = tdbPagerWrite(pPager, pPage); - if (ret < 0) { - tdbError("failed to write page since %s", terrstr()); - return -1; - } - - tdbTxnClose(&txn); - } - - *ppgno = pgno; - return 0; -} -*/ int tdbPagerWrite(SPager *pPager, SPage *pPage) { int ret; SPage **ppPage; - ASSERT(pPager->inTran); -#if 0 - if (pPager->inTran == 0) { - ret = tdbPagerBegin(pPager); - if (ret < 0) { - return -1; - } - } -#endif - + // ASSERT(pPager->inTran); if (pPage->isDirty) return 0; // ref page one more time so the page will not be release @@ -322,15 +264,16 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) { // Write page to journal if neccessary if (TDB_PAGE_PGNO(pPage) <= pPager->dbOrigSize && - (pPager->jPageSet == NULL || !hashset_contains(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))))) { + (pPager->pActiveTxn->jPageSet == NULL || + !hashset_contains(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))))) { ret = tdbPagerWritePageToJournal(pPager, pPage); if (ret < 0) { tdbError("failed to write page to journal since %s", tstrerror(terrno)); return -1; } - if (pPager->jPageSet) { - hashset_add(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))); + if (pPager->pActiveTxn->jPageSet) { + hashset_add(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))); } } @@ -338,23 +281,28 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) { } int tdbPagerBegin(SPager *pPager, TXN *pTxn) { + /* if (pPager->inTran) { return 0; } - + */ // Open the journal - pPager->jfd = tdbOsOpen(pPager->jFileName, TDB_O_CREAT | TDB_O_RDWR, 0755); - if (TDB_FD_INVALID(pPager->jfd)) { + char jTxnFileName[TDB_FILENAME_LEN]; + sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId); + pTxn->jfd = tdbOsOpen(jTxnFileName, TDB_O_CREAT | TDB_O_RDWR, 0755); + if (TDB_FD_INVALID(pTxn->jfd)) { tdbError("failed to open file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } - pPager->jPageSet = hashset_create(); + pTxn->jPageSet = hashset_create(); + + pPager->pActiveTxn = pTxn; // TODO: write the size of the file - + /* pPager->inTran = 1; - + */ return 0; } @@ -363,9 +311,9 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { int ret; // sync the journal file - ret = tdbOsFSync(pPager->jfd); + ret = tdbOsFSync(pTxn->jfd); if (ret < 0) { - tdbError("failed to fsync jfd due to %s. jFileName:%s", strerror(errno), pPager->jFileName); + tdbError("failed to fsync: %s. jFileName:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } @@ -377,7 +325,7 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { pPage = (SPage *)pNode; ASSERT(pPage->nOverflow == 0); - ret = tdbPagerWritePageToDB(pPager, pPage); + ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); return -1; @@ -395,8 +343,8 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { pPage->isDirty = 0; tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage); - if (pPager->jPageSet) { - hashset_remove(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))); + if (pTxn->jPageSet) { + hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))); } tdbPCacheRelease(pPager->pCache, pPage, pTxn); } @@ -415,35 +363,39 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { } int tdbPagerPostCommit(SPager *pPager, TXN *pTxn) { + char jTxnFileName[TDB_FILENAME_LEN]; + sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId); + // remove the journal file - if (tdbOsClose(pPager->jfd) < 0) { - tdbError("failed to close jfd due to %s. file:%s", strerror(errno), pPager->jFileName); + if (tdbOsClose(pTxn->jfd) < 0) { + tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { - tdbError("failed to remove file due to %s. file:%s", strerror(errno), pPager->jFileName); + if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) { + tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } - if (pPager->jPageSet) { - hashset_destroy(pPager->jPageSet); + if (pTxn->jPageSet) { + hashset_destroy(pTxn->jPageSet); } - pPager->inTran = 0; + // pPager->inTran = 0; return 0; } int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { SPage *pPage; + SPgno maxPgno = pPager->dbOrigSize; int ret; // sync the journal file - ret = tdbOsFSync(pPager->jfd); + ret = tdbOsFSync(pTxn->jfd); if (ret < 0) { - tdbError("failed to fsync jfd due to %s. jFileName:%s", strerror(errno), pPager->jFileName); + tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } @@ -454,7 +406,12 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { while ((pNode = tRBTreeIterNext(&iter)) != NULL) { pPage = (SPage *)pNode; if (pPage->isLocal) continue; - ret = tdbPagerWritePageToDB(pPager, pPage); + + SPgno pgno = TDB_PAGE_PGNO(pPage); + if (pgno > maxPgno) { + maxPgno = pgno; + } + ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); return -1; @@ -462,7 +419,8 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { } tdbTrace("tdbttl commit:%p, %d/%d", pPager, pPager->dbOrigSize, pPager->dbFileSize); - pPager->dbOrigSize = pPager->dbFileSize; + pPager->dbOrigSize = maxPgno; + // pPager->dbOrigSize = pPager->dbFileSize; // release the page iter = tRBTreeIterCreate(&pPager->rbt, 1); @@ -474,6 +432,7 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage); tdbPCacheRelease(pPager->pCache, pPage, pTxn); } + /* tdbTrace("reset dirty tree: %p", &pPager->rbt); tRBTreeCreate(&pPager->rbt, pageCmpFn); @@ -495,15 +454,15 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { SPgno journalSize = 0; int ret; - // 0, sync the journal file - ret = tdbOsFSync(pPager->jfd); + // sync the journal file + ret = tdbOsFSync(pTxn->jfd); if (ret < 0) { - tdbError("failed to fsync jfd due to %s. file:%s", strerror(errno), pPager->jFileName); + tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } - tdb_fd_t jfd = pPager->jfd; + tdb_fd_t jfd = pTxn->jfd; ret = tdbGetFileSize(jfd, pPager->pageSize, &journalSize); if (ret < 0) { @@ -567,7 +526,7 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { pPage->isDirty = 0; tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage); - hashset_remove(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))); + hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))); tdbPCacheRelease(pPager->pCache, pPage, pTxn); } @@ -575,11 +534,24 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { tRBTreeCreate(&pPager->rbt, pageCmpFn); // 4, remove the journal file - tdbOsClose(pPager->jfd); - (void)tdbOsRemove(pPager->jFileName); - hashset_destroy(pPager->jPageSet); + if (tdbOsClose(pTxn->jfd) < 0) { + tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } - pPager->inTran = 0; + char jTxnFileName[TDB_FILENAME_LEN]; + sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId); + + if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) { + tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + hashset_destroy(pTxn->jPageSet); + + // pPager->inTran = 0; return 0; } @@ -604,7 +576,7 @@ int tdbPagerFlushPage(SPager *pPager, TXN *pTxn) { if (pgno > maxPgno) { maxPgno = pgno; } - ret = tdbPagerWritePageToDB(pPager, pPage); + ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); return -1; @@ -802,17 +774,18 @@ static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage) { pgno = TDB_PAGE_PGNO(pPage); - ret = tdbOsWrite(pPager->jfd, &pgno, sizeof(pgno)); + ret = tdbOsWrite(pPager->pActiveTxn->jfd, &pgno, sizeof(pgno)); if (ret < 0) { - tdbError("failed to write pgno due to %s. file:%s, pgno:%u", strerror(errno), pPager->jFileName, pgno); + tdbError("failed to write pgno due to %s. file:%s, pgno:%u, txnId:%" PRId64, strerror(errno), pPager->jFileName, + pgno, pPager->pActiveTxn->txnId); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } - ret = tdbOsWrite(pPager->jfd, pPage->pData, pPage->pageSize); + ret = tdbOsWrite(pPager->pActiveTxn->jfd, pPage->pData, pPage->pageSize); if (ret < 0) { - tdbError("failed to write page data due to %s. file:%s, pageSize:%ld", strerror(errno), pPager->jFileName, - (long)pPage->pageSize); + tdbError("failed to write page data due to %s. file:%s, pageSize:%d, txnId:%" PRId64, strerror(errno), + pPager->jFileName, pPage->pageSize, pPager->pActiveTxn->txnId); terrno = TAOS_SYSTEM_ERROR(errno); return -1; } @@ -820,13 +793,6 @@ static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage) { return 0; } /* -struct TdFile { - TdThreadRwlock rwlock; - int refId; - int fd; - FILE *fp; -} TdFile; -*/ static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage) { i64 offset; int ret; @@ -846,16 +812,32 @@ static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage) { return -1; } - // pwrite(pPager->fd->fd, pPage->pData, pPage->pageSize, offset); + return 0; +} +*/ +static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage) { + i64 offset; + int ret; + + offset = (i64)pPage->pageSize * (TDB_PAGE_PGNO(pPage) - 1); + + ret = tdbOsPWrite(pPager->fd, pPage->pData, pPage->pageSize, offset); + if (ret < 0) { + tdbError("failed to pwrite page data due to %s. file:%s, pageSize:%d", strerror(errno), pPager->dbFileName, + pPage->pageSize); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + return 0; } -int tdbPagerRestore(SPager *pPager, SBTree *pBt) { +static int tdbPagerRestore(SPager *pPager, SBTree *pBt, const char *jFileName) { int ret = 0; SPgno journalSize = 0; u8 *pageBuf = NULL; - tdb_fd_t jfd = tdbOsOpen(pPager->jFileName, TDB_O_RDWR, 0755); + tdb_fd_t jfd = tdbOsOpen(jFileName, TDB_O_RDWR, 0755); if (jfd == NULL) { return 0; } @@ -928,12 +910,50 @@ int tdbPagerRestore(SPager *pPager, SBTree *pBt) { return 0; } -int tdbPagerRollback(SPager *pPager) { - if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) { - tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); - terrno = TAOS_SYSTEM_ERROR(errno); +int tdbPagerRestoreJournals(SPager *pPager, SBTree *pBt) { + tdbDirEntryPtr pDirEntry; + tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName); + if (pDir == NULL) { + tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno)); return -1; } + while ((pDirEntry = tdbReadDir(pDir)) != NULL) { + char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); + if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { + if (tdbPagerRestore(pPager, pBt, name) < 0) { + tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name); + return -1; + } + } + } + + tdbCloseDir(&pDir); + + return 0; +} + +int tdbPagerRollback(SPager *pPager) { + tdbDirEntryPtr pDirEntry; + tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName); + if (pDir == NULL) { + tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno)); + return -1; + } + + while ((pDirEntry = tdbReadDir(pDir)) != NULL) { + char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); + + if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { + if (tdbOsRemove(name) < 0 && errno != ENOENT) { + tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + } + } + + tdbCloseDir(&pDir); + return 0; } diff --git a/source/libs/tdb/src/db/tdbTable.c b/source/libs/tdb/src/db/tdbTable.c index 8b029b06d6..c5c2d6aebe 100644 --- a/source/libs/tdb/src/db/tdbTable.c +++ b/source/libs/tdb/src/db/tdbTable.c @@ -108,7 +108,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF ASSERT(pPager != NULL); // pTb->pBt - ret = tdbBtreeOpen(keyLen, valLen, pPager, tbname, pgno, keyCmprFn, &(pTb->pBt)); + ret = tdbBtreeOpen(keyLen, valLen, pPager, tbname, pgno, keyCmprFn, pEnv, &(pTb->pBt)); if (ret < 0) { tdbOsFree(pTb); return -1; @@ -117,7 +117,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF if (rollback) { tdbPagerRollback(pPager); } else { - ret = tdbPagerRestore(pPager, pTb->pBt); + ret = tdbPagerRestoreJournals(pPager, pTb->pBt); if (ret < 0) { tdbOsFree(pTb); return -1; diff --git a/source/libs/tdb/src/db/tdbTxn.c b/source/libs/tdb/src/db/tdbTxn.c index f173d89779..77c87d18f2 100644 --- a/source/libs/tdb/src/db/tdbTxn.c +++ b/source/libs/tdb/src/db/tdbTxn.c @@ -28,4 +28,10 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void return 0; } -int tdbTxnClose(TXN *pTxn) { return 0; } \ No newline at end of file +int tdbTxnClose(TXN *pTxn) { + if (pTxn) { + tdbOsFree(pTxn); + } + + return 0; +} diff --git a/source/libs/tdb/src/inc/tdbInt.h b/source/libs/tdb/src/inc/tdbInt.h index 731b1927e7..055a8a1062 100644 --- a/source/libs/tdb/src/inc/tdbInt.h +++ b/source/libs/tdb/src/inc/tdbInt.h @@ -147,11 +147,11 @@ struct SBTC { SPage *pgStack[BTREE_MAX_DEPTH + 1]; SCellDecoder coder; TXN *pTxn; - TXN txn; + i8 freeTxn; }; // SBTree -int tdbBtreeOpen(int keyLen, int valLen, SPager *pFile, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, +int tdbBtreeOpen(int keyLen, int valLen, SPager *pFile, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, TDB *pEnv, SBTree **ppBt); int tdbBtreeClose(SBTree *pBt); int tdbBtreeInsert(SBTree *pBt, const void *pKey, int kLen, const void *pVal, int vLen, TXN *pTxn); @@ -197,7 +197,7 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initP TXN *pTxn); void tdbPagerReturnPage(SPager *pPager, SPage *pPage, TXN *pTxn); int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno); -int tdbPagerRestore(SPager *pPager, SBTree *pBt); +int tdbPagerRestoreJournals(SPager *pPager, SBTree *pBt); int tdbPagerRollback(SPager *pPager); // tdbPCache.c ==================================== @@ -382,26 +382,24 @@ struct STDB { #ifdef USE_MAINDB TTB *pMainDb; #endif + int64_t txnId; }; -typedef struct hashset_st *hashset_t; - struct SPager { char *dbFileName; char *jFileName; int pageSize; uint8_t fid[TDB_FILE_ID_LEN]; tdb_fd_t fd; - tdb_fd_t jfd; SPCache *pCache; SPgno dbFileSize; SPgno dbOrigSize; - //SPage *pDirty; - hashset_t jPageSet; - SRBTree rbt; - u8 inTran; - SPager *pNext; // used by TDB - SPager *pHashNext; // used by TDB + // SPage *pDirty; + SRBTree rbt; + // u8 inTran; + TXN *pActiveTxn; + SPager *pNext; // used by TDB + SPager *pHashNext; // used by TDB #ifdef USE_MAINDB TDB *pEnv; #endif diff --git a/source/libs/tdb/src/inc/tdbOs.h b/source/libs/tdb/src/inc/tdbOs.h index b5dd27052c..db4283e267 100644 --- a/source/libs/tdb/src/inc/tdbOs.h +++ b/source/libs/tdb/src/inc/tdbOs.h @@ -47,15 +47,21 @@ typedef TdFilePtr tdb_fd_t; #define TDB_O_RDWR (TD_FILE_WRITE) | (TD_FILE_READ) #define tdbOsOpen(PATH, OPTION, MODE) taosOpenFile((PATH), (OPTION)) - -#define tdbOsClose(FD) taosCloseFile(&(FD)) -#define tdbOsRead taosReadFile -#define tdbOsPRead taosPReadFile -#define tdbOsWrite taosWriteFile -#define tdbOsFSync taosFsyncFile -#define tdbOsLSeek taosLSeekFile -#define tdbOsRemove remove -#define tdbOsFileSize(FD, PSIZE) taosFStatFile(FD, PSIZE, NULL) +#define tdbOsClose(FD) taosCloseFile(&(FD)) +#define tdbOsRead taosReadFile +#define tdbOsPRead taosPReadFile +#define tdbOsWrite taosWriteFile +#define tdbOsPWrite taosPWriteFile +#define tdbOsFSync taosFsyncFile +#define tdbOsLSeek taosLSeekFile +#define tdbDirPtr TdDirPtr +#define tdbDirEntryPtr TdDirEntryPtr +#define tdbReadDir taosReadDir +#define tdbGetDirEntryName taosGetDirEntryName +#define tdbDirEntryBaseName taosDirEntryBaseName +#define tdbCloseDir taosCloseDir +#define tdbOsRemove remove +#define tdbOsFileSize(FD, PSIZE) taosFStatFile(FD, PSIZE, NULL) /* directory */ #define tdbOsMkdir taosMkDir diff --git a/source/libs/tdb/test/tdbExOVFLTest.cpp b/source/libs/tdb/test/tdbExOVFLTest.cpp index 305e91f62c..f4f09b20b8 100644 --- a/source/libs/tdb/test/tdbExOVFLTest.cpp +++ b/source/libs/tdb/test/tdbExOVFLTest.cpp @@ -170,11 +170,9 @@ static void insertOfp(void) { SPoolMem *pPool = openPool(); // start a transaction - TXN txn; - int64_t txnid = 0; - ++txnid; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + TXN *txn = NULL; + + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); // generate value payload // char val[((4083 - 4 - 3 - 2) + 1) * 100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) @@ -185,12 +183,12 @@ static void insertOfp(void) { // insert the generated big data // char const *key = "key1"; char const *key = "key123456789"; - ret = tdbTbInsert(pDb, key, strlen(key), val, valLen, &txn); + ret = tdbTbInsert(pDb, key, strlen(key), val, valLen, txn); GTEST_ASSERT_EQ(ret, 0); // commit current transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); } // TEST(TdbOVFLPagesTest, DISABLED_TbInsertTest) { @@ -258,11 +256,9 @@ TEST(TdbOVFLPagesTest, TbDeleteTest) { SPoolMem *pPool = openPool(); // start a transaction - TXN txn; - int64_t txnid = 0; - ++txnid; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + TXN *txn; + + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); // generate value payload // char val[((4083 - 4 - 3 - 2) + 1) * 100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) @@ -271,7 +267,7 @@ TEST(TdbOVFLPagesTest, TbDeleteTest) { generateBigVal(val, valLen); { // insert the generated big data - ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn); + ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, txn); GTEST_ASSERT_EQ(ret, 0); } @@ -297,7 +293,7 @@ tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_REA tdbBegin(pEnv, &txn); */ { // upsert the data - ret = tdbTbUpsert(pDb, "key1", strlen("key1"), "value1", strlen("value1"), &txn); + ret = tdbTbUpsert(pDb, "key1", strlen("key1"), "value1", strlen("value1"), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -316,7 +312,7 @@ tdbBegin(pEnv, &txn); } { // delete the data - ret = tdbTbDelete(pDb, "key1", strlen("key1"), &txn); + ret = tdbTbDelete(pDb, "key1", strlen("key1"), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -335,8 +331,8 @@ tdbBegin(pEnv, &txn); } // commit current transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); } // TEST(tdb_test, DISABLED_simple_insert1) { @@ -346,7 +342,7 @@ TEST(tdb_test, simple_insert1) { TTB *pDb; tdb_cmpr_fn_t compFunc; int nData = 1; - TXN txn; + TXN *txn; int const pageSize = 4096; taosRemoveDir("tdb"); @@ -365,16 +361,13 @@ TEST(tdb_test, simple_insert1) { // char val[(4083 - 4 - 3 - 2)]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) char val[(4083 - 4 - 3 - 2) + 1]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) int64_t poolLimit = 4096; // 1M pool limit - int64_t txnid = 0; SPoolMem *pPool; // open the pool pPool = openPool(); // start a transaction - txnid++; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); for (int iData = 1; iData <= nData; iData++) { sprintf(key, "key0"); @@ -393,26 +386,25 @@ TEST(tdb_test, simple_insert1) { val[i] = c; } - ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn); + ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, txn); GTEST_ASSERT_EQ(ret, 0); // if pool is full, commit the transaction and start a new one if (pPool->size >= poolLimit) { // commit current transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); // start a new transaction clearPool(pPool); - txnid++; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); } } // commit the transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); { // Query the data void *pVal = NULL; diff --git a/source/libs/tdb/test/tdbTest.cpp b/source/libs/tdb/test/tdbTest.cpp index f3a301cf5b..54e80a0009 100644 --- a/source/libs/tdb/test/tdbTest.cpp +++ b/source/libs/tdb/test/tdbTest.cpp @@ -125,7 +125,7 @@ TEST(tdb_test, DISABLED_simple_insert1) { TTB *pDb; tdb_cmpr_fn_t compFunc; int nData = 1000000; - TXN txn; + TXN *txn; taosRemoveDir("tdb"); @@ -142,40 +142,35 @@ TEST(tdb_test, DISABLED_simple_insert1) { char key[64]; char val[64]; int64_t poolLimit = 4096; // 1M pool limit - int64_t txnid = 0; SPoolMem *pPool; // open the pool pPool = openPool(); // start a transaction - txnid++; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); for (int iData = 1; iData <= nData; iData++) { sprintf(key, "key%d", iData); sprintf(val, "value%d", iData); - ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn); + ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), txn); GTEST_ASSERT_EQ(ret, 0); // if pool is full, commit the transaction and start a new one if (pPool->size >= poolLimit) { // commit current transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); // start a new transaction clearPool(pPool); - txnid++; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); } } // commit the transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); { // Query the data void *pVal = NULL; @@ -245,7 +240,7 @@ TEST(tdb_test, DISABLED_simple_insert2) { TTB *pDb; tdb_cmpr_fn_t compFunc; int nData = 1000000; - TXN txn; + TXN *txn; taosRemoveDir("tdb"); @@ -261,21 +256,18 @@ TEST(tdb_test, DISABLED_simple_insert2) { { char key[64]; char val[64]; - int64_t txnid = 0; SPoolMem *pPool; // open the pool pPool = openPool(); // start a transaction - txnid++; - tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); for (int iData = 1; iData <= nData; iData++) { sprintf(key, "key%d", iData); sprintf(val, "value%d", iData); - ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn); + ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -312,8 +304,8 @@ TEST(tdb_test, DISABLED_simple_insert2) { } // commit the transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); ret = tdbTbDrop(pDb); GTEST_ASSERT_EQ(ret, 0); @@ -331,7 +323,7 @@ TEST(tdb_test, DISABLED_simple_delete1) { TTB *pDb; char key[128]; char data[128]; - TXN txn; + TXN *txn; TDB *pEnv; SPoolMem *pPool; void *pKey = NULL; @@ -353,14 +345,13 @@ TEST(tdb_test, DISABLED_simple_delete1) { ret = tdbTbOpen("db.db", -1, -1, tKeyCmpr, pEnv, &pDb, 0); GTEST_ASSERT_EQ(ret, 0); - tdbTxnOpen(&txn, 0, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); // loop to insert batch data for (int iData = 0; iData < nKV; iData++) { sprintf(key, "key%d", iData); sprintf(data, "data%d", iData); - ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), &txn); + ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -378,7 +369,7 @@ TEST(tdb_test, DISABLED_simple_delete1) { for (int iData = nKV - 1; iData > 30; iData--) { sprintf(key, "key%d", iData); - ret = tdbTbDelete(pDb, key, strlen(key), &txn); + ret = tdbTbDelete(pDb, key, strlen(key), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -413,7 +404,8 @@ TEST(tdb_test, DISABLED_simple_delete1) { tdbTbcClose(pDbc); - tdbCommit(pEnv, &txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); closePool(pPool); @@ -430,7 +422,7 @@ TEST(tdb_test, DISABLED_simple_upsert1) { char data[64]; void *pData = NULL; SPoolMem *pPool; - TXN txn; + TXN *txn; taosRemoveDir("tdb"); @@ -444,13 +436,12 @@ TEST(tdb_test, DISABLED_simple_upsert1) { pPool = openPool(); // insert some data - tdbTxnOpen(&txn, 0, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); for (int iData = 0; iData < nData; iData++) { sprintf(key, "key%d", iData); sprintf(data, "data%d", iData); - ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), &txn); + ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -467,11 +458,12 @@ TEST(tdb_test, DISABLED_simple_upsert1) { for (int iData = 0; iData < nData; iData++) { sprintf(key, "key%d", iData); sprintf(data, "data%d-u", iData); - ret = tdbTbUpsert(pDb, key, strlen(key), data, strlen(data), &txn); + ret = tdbTbUpsert(pDb, key, strlen(key), data, strlen(data), txn); GTEST_ASSERT_EQ(ret, 0); } - tdbCommit(pEnv, &txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); // query the data for (int iData = 0; iData < nData; iData++) { @@ -492,7 +484,7 @@ TEST(tdb_test, multi_thread_query) { TTB *pDb; tdb_cmpr_fn_t compFunc; int nData = 1000000; - TXN txn; + TXN *txn; taosRemoveDir("tdb"); @@ -508,26 +500,18 @@ TEST(tdb_test, multi_thread_query) { char key[64]; char val[64]; int64_t poolLimit = 4096 * 20; // 1M pool limit - int64_t txnid = 0; SPoolMem *pPool; // open the pool pPool = openPool(); // start a transaction - txnid++; - txn.flags = TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED; - txn.txnId = -1; - txn.xMalloc = poolMalloc; - txn.xFree = poolFree; - txn.xArg = pPool; - // tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, ); - tdbBegin(pEnv, &txn); + tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); for (int iData = 1; iData <= nData; iData++) { sprintf(key, "key%d", iData); sprintf(val, "value%d", iData); - ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn); + ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), txn); GTEST_ASSERT_EQ(ret, 0); } @@ -578,7 +562,7 @@ TEST(tdb_test, multi_thread_query) { std::vector threads; for (int i = 0; i < nThreads; i++) { if (i == 0) { - threads.push_back(std::thread(tdbCommit, pEnv, &txn)); + threads.push_back(std::thread(tdbCommit, pEnv, txn)); } else { threads.push_back(std::thread(f, pDb, nData)); } @@ -589,8 +573,8 @@ TEST(tdb_test, multi_thread_query) { } // commit the transaction - tdbCommit(pEnv, &txn); - tdbTxnClose(&txn); + tdbCommit(pEnv, txn); + tdbPostCommit(pEnv, txn); // Close a database tdbTbClose(pDb); @@ -621,17 +605,12 @@ TEST(tdb_test, DISABLED_multi_thread1) { GTEST_ASSERT_EQ(ret, 0); auto insert = [](TDB *pDb, TTB *pTb, int nData, int *stop, std::shared_timed_mutex *mu) { - TXN txn = {0}; + TXN *txn = NULL; char key[128]; char val[128]; SPoolMem *pPool = openPool(); - txn.flags = TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED; - txn.txnId = -1; - txn.xMalloc = poolMalloc; - txn.xFree = poolFree; - txn.xArg = pPool; - tdbBegin(pDb, &txn); + tdbBegin(pDb, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); for (int iData = 1; iData <= nData; iData++) { sprintf(key, "key%d", iData); sprintf(val, "value%d", iData); @@ -644,14 +623,17 @@ TEST(tdb_test, DISABLED_multi_thread1) { } if (pPool->size > 1024 * 1024) { - tdbCommit(pDb, &txn); + tdbCommit(pDb, txn); + tdbPostCommit(pDb, txn); clearPool(pPool); - tdbBegin(pDb, &txn); + tdbBegin(pDb, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); } } - tdbCommit(pDb, &txn); + tdbCommit(pDb, txn); + tdbPostCommit(pDb, txn); + closePool(pPool); *stop = 1; diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index c3283ffe84..68365be481 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -491,6 +491,28 @@ int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { return count; } +int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset) { + if (pFile == NULL) { + return 0; + } +#if FILE_WITH_LOCK + taosThreadRwlockWrlock(&(pFile->rwlock)); +#endif + assert(pFile->fd >= 0); // Please check if you have closed the file. +#ifdef WINDOWS + size_t pos = _lseeki64(pFile->fd, 0, SEEK_CUR); + _lseeki64(pFile->fd, offset, SEEK_SET); + int64_t ret = _write(pFile->fd, buf, count); + _lseeki64(pFile->fd, pos, SEEK_SET); +#else + int64_t ret = pwrite(pFile->fd, buf, count, offset); +#endif +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return ret; +} + int64_t taosLSeekFile(TdFilePtr pFile, int64_t offset, int32_t whence) { #if FILE_WITH_LOCK taosThreadRwlockRdlock(&(pFile->rwlock));