From 3beef665c86fb6f01395455024ddd02379e21fb9 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 18 Mar 2022 06:59:37 +0000 Subject: [PATCH] more TDB --- source/libs/tdb/CMakeLists.txt | 10 - source/libs/tdb/src/sqlite/btmutex.c | 308 - source/libs/tdb/src/sqlite/btree.c | 10869 -------------------- source/libs/tdb/src/sqlite/pager.c | 6851 ------------ source/libs/tdb/src/sqlite/pcache.c | 851 -- source/libs/tdb/src/sqlite/pcache1.c | 1211 --- source/libs/tdb/src/sqlite/wal.c | 4153 -------- source/libs/tdb/src/sqliteinc/btree.h | 412 - source/libs/tdb/src/sqliteinc/btreeInt.h | 729 -- source/libs/tdb/src/sqliteinc/pager.h | 241 - source/libs/tdb/src/sqliteinc/pcache.h | 210 - source/libs/tdb/src/sqliteinc/sqlite3.h | 95 - source/libs/tdb/src/sqliteinc/sqliteInt.h | 58 - source/libs/tdb/src/sqliteinc/wal.h | 155 - 14 files changed, 26153 deletions(-) delete mode 100644 source/libs/tdb/src/sqlite/btmutex.c delete mode 100644 source/libs/tdb/src/sqlite/btree.c delete mode 100644 source/libs/tdb/src/sqlite/pager.c delete mode 100644 source/libs/tdb/src/sqlite/pcache.c delete mode 100644 source/libs/tdb/src/sqlite/pcache1.c delete mode 100644 source/libs/tdb/src/sqlite/wal.c delete mode 100644 source/libs/tdb/src/sqliteinc/btree.h delete mode 100644 source/libs/tdb/src/sqliteinc/btreeInt.h delete mode 100644 source/libs/tdb/src/sqliteinc/pager.h delete mode 100644 source/libs/tdb/src/sqliteinc/pcache.h delete mode 100644 source/libs/tdb/src/sqliteinc/sqlite3.h delete mode 100644 source/libs/tdb/src/sqliteinc/sqliteInt.h delete mode 100644 source/libs/tdb/src/sqliteinc/wal.h diff --git a/source/libs/tdb/CMakeLists.txt b/source/libs/tdb/CMakeLists.txt index e2d5e13add..aebe944630 100644 --- a/source/libs/tdb/CMakeLists.txt +++ b/source/libs/tdb/CMakeLists.txt @@ -22,16 +22,6 @@ target_link_libraries( PUBLIC util ) -# for tdb_sqlite -add_library(tdb_sqlite "") -target_sources(tdb_sqlite - PRIVATE - "src/sqlite/pcache.c" - "src/sqlite/pcache1.c" - "src/sqlite/pager.c" -) -target_include_directories(tdb_sqlite PUBLIC "src/sqliteinc") - # for test if(${BUILD_TEST}) add_subdirectory(test) diff --git a/source/libs/tdb/src/sqlite/btmutex.c b/source/libs/tdb/src/sqlite/btmutex.c deleted file mode 100644 index 275a93ff21..0000000000 --- a/source/libs/tdb/src/sqlite/btmutex.c +++ /dev/null @@ -1,308 +0,0 @@ -/* -** 2007 August 27 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -** This file contains code used to implement mutexes on Btree objects. -** This code really belongs in btree.c. But btree.c is getting too -** big and we want to break it down some. This packaged seemed like -** a good breakout. -*/ -#include "btreeInt.h" -#ifndef SQLITE_OMIT_SHARED_CACHE -#if SQLITE_THREADSAFE - -/* -** Obtain the BtShared mutex associated with B-Tree handle p. Also, -** set BtShared.db to the database handle associated with p and the -** p->locked boolean to true. -*/ -static void lockBtreeMutex(Btree *p){ - assert( p->locked==0 ); - assert( sqlite3_mutex_notheld(p->pBt->mutex) ); - assert( sqlite3_mutex_held(p->db->mutex) ); - - sqlite3_mutex_enter(p->pBt->mutex); - p->pBt->db = p->db; - p->locked = 1; -} - -/* -** Release the BtShared mutex associated with B-Tree handle p and -** clear the p->locked boolean. -*/ -static void SQLITE_NOINLINE unlockBtreeMutex(Btree *p){ - BtShared *pBt = p->pBt; - assert( p->locked==1 ); - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( sqlite3_mutex_held(p->db->mutex) ); - assert( p->db==pBt->db ); - - sqlite3_mutex_leave(pBt->mutex); - p->locked = 0; -} - -/* Forward reference */ -static void SQLITE_NOINLINE btreeLockCarefully(Btree *p); - -/* -** Enter a mutex on the given BTree object. -** -** If the object is not sharable, then no mutex is ever required -** and this routine is a no-op. The underlying mutex is non-recursive. -** But we keep a reference count in Btree.wantToLock so the behavior -** of this interface is recursive. -** -** To avoid deadlocks, multiple Btrees are locked in the same order -** by all database connections. The p->pNext is a list of other -** Btrees belonging to the same database connection as the p Btree -** which need to be locked after p. If we cannot get a lock on -** p, then first unlock all of the others on p->pNext, then wait -** for the lock to become available on p, then relock all of the -** subsequent Btrees that desire a lock. -*/ -void sqlite3BtreeEnter(Btree *p){ - /* Some basic sanity checking on the Btree. The list of Btrees - ** connected by pNext and pPrev should be in sorted order by - ** Btree.pBt value. All elements of the list should belong to - ** the same connection. Only shared Btrees are on the list. */ - assert( p->pNext==0 || p->pNext->pBt>p->pBt ); - assert( p->pPrev==0 || p->pPrev->pBtpBt ); - assert( p->pNext==0 || p->pNext->db==p->db ); - assert( p->pPrev==0 || p->pPrev->db==p->db ); - assert( p->sharable || (p->pNext==0 && p->pPrev==0) ); - - /* Check for locking consistency */ - assert( !p->locked || p->wantToLock>0 ); - assert( p->sharable || p->wantToLock==0 ); - - /* We should already hold a lock on the database connection */ - assert( sqlite3_mutex_held(p->db->mutex) ); - - /* Unless the database is sharable and unlocked, then BtShared.db - ** should already be set correctly. */ - assert( (p->locked==0 && p->sharable) || p->pBt->db==p->db ); - - if( !p->sharable ) return; - p->wantToLock++; - if( p->locked ) return; - btreeLockCarefully(p); -} - -/* This is a helper function for sqlite3BtreeLock(). By moving -** complex, but seldom used logic, out of sqlite3BtreeLock() and -** into this routine, we avoid unnecessary stack pointer changes -** and thus help the sqlite3BtreeLock() routine to run much faster -** in the common case. -*/ -static void SQLITE_NOINLINE btreeLockCarefully(Btree *p){ - Btree *pLater; - - /* In most cases, we should be able to acquire the lock we - ** want without having to go through the ascending lock - ** procedure that follows. Just be sure not to block. - */ - if( sqlite3_mutex_try(p->pBt->mutex)==SQLITE_OK ){ - p->pBt->db = p->db; - p->locked = 1; - return; - } - - /* To avoid deadlock, first release all locks with a larger - ** BtShared address. Then acquire our lock. Then reacquire - ** the other BtShared locks that we used to hold in ascending - ** order. - */ - for(pLater=p->pNext; pLater; pLater=pLater->pNext){ - assert( pLater->sharable ); - assert( pLater->pNext==0 || pLater->pNext->pBt>pLater->pBt ); - assert( !pLater->locked || pLater->wantToLock>0 ); - if( pLater->locked ){ - unlockBtreeMutex(pLater); - } - } - lockBtreeMutex(p); - for(pLater=p->pNext; pLater; pLater=pLater->pNext){ - if( pLater->wantToLock ){ - lockBtreeMutex(pLater); - } - } -} - - -/* -** Exit the recursive mutex on a Btree. -*/ -void sqlite3BtreeLeave(Btree *p){ - assert( sqlite3_mutex_held(p->db->mutex) ); - if( p->sharable ){ - assert( p->wantToLock>0 ); - p->wantToLock--; - if( p->wantToLock==0 ){ - unlockBtreeMutex(p); - } - } -} - -#ifndef NDEBUG -/* -** Return true if the BtShared mutex is held on the btree, or if the -** B-Tree is not marked as sharable. -** -** This routine is used only from within assert() statements. -*/ -int sqlite3BtreeHoldsMutex(Btree *p){ - assert( p->sharable==0 || p->locked==0 || p->wantToLock>0 ); - assert( p->sharable==0 || p->locked==0 || p->db==p->pBt->db ); - assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->pBt->mutex) ); - assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->db->mutex) ); - - return (p->sharable==0 || p->locked); -} -#endif - - -/* -** Enter the mutex on every Btree associated with a database -** connection. This is needed (for example) prior to parsing -** a statement since we will be comparing table and column names -** against all schemas and we do not want those schemas being -** reset out from under us. -** -** There is a corresponding leave-all procedures. -** -** Enter the mutexes in accending order by BtShared pointer address -** to avoid the possibility of deadlock when two threads with -** two or more btrees in common both try to lock all their btrees -** at the same instant. -*/ -static void SQLITE_NOINLINE btreeEnterAll(sqlite3 *db){ - int i; - int skipOk = 1; - Btree *p; - assert( sqlite3_mutex_held(db->mutex) ); - for(i=0; inDb; i++){ - p = db->aDb[i].pBt; - if( p && p->sharable ){ - sqlite3BtreeEnter(p); - skipOk = 0; - } - } - db->noSharedCache = skipOk; -} -void sqlite3BtreeEnterAll(sqlite3 *db){ - if( db->noSharedCache==0 ) btreeEnterAll(db); -} -static void SQLITE_NOINLINE btreeLeaveAll(sqlite3 *db){ - int i; - Btree *p; - assert( sqlite3_mutex_held(db->mutex) ); - for(i=0; inDb; i++){ - p = db->aDb[i].pBt; - if( p ) sqlite3BtreeLeave(p); - } -} -void sqlite3BtreeLeaveAll(sqlite3 *db){ - if( db->noSharedCache==0 ) btreeLeaveAll(db); -} - -#ifndef NDEBUG -/* -** Return true if the current thread holds the database connection -** mutex and all required BtShared mutexes. -** -** This routine is used inside assert() statements only. -*/ -int sqlite3BtreeHoldsAllMutexes(sqlite3 *db){ - int i; - if( !sqlite3_mutex_held(db->mutex) ){ - return 0; - } - for(i=0; inDb; i++){ - Btree *p; - p = db->aDb[i].pBt; - if( p && p->sharable && - (p->wantToLock==0 || !sqlite3_mutex_held(p->pBt->mutex)) ){ - return 0; - } - } - return 1; -} -#endif /* NDEBUG */ - -#ifndef NDEBUG -/* -** Return true if the correct mutexes are held for accessing the -** db->aDb[iDb].pSchema structure. The mutexes required for schema -** access are: -** -** (1) The mutex on db -** (2) if iDb!=1, then the mutex on db->aDb[iDb].pBt. -** -** If pSchema is not NULL, then iDb is computed from pSchema and -** db using sqlite3SchemaToIndex(). -*/ -int sqlite3SchemaMutexHeld(sqlite3 *db, int iDb, Schema *pSchema){ - Btree *p; - assert( db!=0 ); - if( pSchema ) iDb = sqlite3SchemaToIndex(db, pSchema); - assert( iDb>=0 && iDbnDb ); - if( !sqlite3_mutex_held(db->mutex) ) return 0; - if( iDb==1 ) return 1; - p = db->aDb[iDb].pBt; - assert( p!=0 ); - return p->sharable==0 || p->locked==1; -} -#endif /* NDEBUG */ - -#else /* SQLITE_THREADSAFE>0 above. SQLITE_THREADSAFE==0 below */ -/* -** The following are special cases for mutex enter routines for use -** in single threaded applications that use shared cache. Except for -** these two routines, all mutex operations are no-ops in that case and -** are null #defines in btree.h. -** -** If shared cache is disabled, then all btree mutex routines, including -** the ones below, are no-ops and are null #defines in btree.h. -*/ - -void sqlite3BtreeEnter(Btree *p){ - p->pBt->db = p->db; -} -void sqlite3BtreeEnterAll(sqlite3 *db){ - int i; - for(i=0; inDb; i++){ - Btree *p = db->aDb[i].pBt; - if( p ){ - p->pBt->db = p->db; - } - } -} -#endif /* if SQLITE_THREADSAFE */ - -#ifndef SQLITE_OMIT_INCRBLOB -/* -** Enter a mutex on a Btree given a cursor owned by that Btree. -** -** These entry points are used by incremental I/O only. Enter() is required -** any time OMIT_SHARED_CACHE is not defined, regardless of whether or not -** the build is threadsafe. Leave() is only required by threadsafe builds. -*/ -void sqlite3BtreeEnterCursor(BtCursor *pCur){ - sqlite3BtreeEnter(pCur->pBtree); -} -# if SQLITE_THREADSAFE -void sqlite3BtreeLeaveCursor(BtCursor *pCur){ - sqlite3BtreeLeave(pCur->pBtree); -} -# endif -#endif /* ifndef SQLITE_OMIT_INCRBLOB */ - -#endif /* ifndef SQLITE_OMIT_SHARED_CACHE */ diff --git a/source/libs/tdb/src/sqlite/btree.c b/source/libs/tdb/src/sqlite/btree.c deleted file mode 100644 index 1169b2b821..0000000000 --- a/source/libs/tdb/src/sqlite/btree.c +++ /dev/null @@ -1,10869 +0,0 @@ -/* -** 2004 April 6 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file implements an external (disk-based) database using BTrees. -** See the header comment on "btreeInt.h" for additional information. -** Including a description of file format and an overview of operation. -*/ -#include "btreeInt.h" - -/* -** The header string that appears at the beginning of every -** SQLite database. -*/ -static const char zMagicHeader[] = SQLITE_FILE_HEADER; - -/* -** Set this global variable to 1 to enable tracing using the TRACE -** macro. -*/ -#if 0 -int sqlite3BtreeTrace=1; /* True to enable tracing */ -# define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);} -#else -# define TRACE(X) -#endif - -/* -** Extract a 2-byte big-endian integer from an array of unsigned bytes. -** But if the value is zero, make it 65536. -** -** This routine is used to extract the "offset to cell content area" value -** from the header of a btree page. If the page size is 65536 and the page -** is empty, the offset should be 65536, but the 2-byte value stores zero. -** This routine makes the necessary adjustment to 65536. -*/ -#define get2byteNotZero(X) (((((int)get2byte(X))-1)&0xffff)+1) - -/* -** Values passed as the 5th argument to allocateBtreePage() -*/ -#define BTALLOC_ANY 0 /* Allocate any page */ -#define BTALLOC_EXACT 1 /* Allocate exact page if possible */ -#define BTALLOC_LE 2 /* Allocate any page <= the parameter */ - -/* -** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not -** defined, or 0 if it is. For example: -** -** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum); -*/ -#ifndef SQLITE_OMIT_AUTOVACUUM -#define IfNotOmitAV(expr) (expr) -#else -#define IfNotOmitAV(expr) 0 -#endif - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** A list of BtShared objects that are eligible for participation -** in shared cache. This variable has file scope during normal builds, -** but the test harness needs to access it so we make it global for -** test builds. -** -** Access to this variable is protected by SQLITE_MUTEX_STATIC_MAIN. -*/ -#ifdef SQLITE_TEST -BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; -#else -static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; -#endif -#endif /* SQLITE_OMIT_SHARED_CACHE */ - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** Enable or disable the shared pager and schema features. -** -** This routine has no effect on existing database connections. -** The shared cache setting effects only future calls to -** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2(). -*/ -int sqlite3_enable_shared_cache(int enable){ - sqlite3GlobalConfig.sharedCacheEnabled = enable; - return SQLITE_OK; -} -#endif - - - -#ifdef SQLITE_OMIT_SHARED_CACHE - /* - ** The functions querySharedCacheTableLock(), setSharedCacheTableLock(), - ** and clearAllSharedCacheTableLocks() - ** manipulate entries in the BtShared.pLock linked list used to store - ** shared-cache table level locks. If the library is compiled with the - ** shared-cache feature disabled, then there is only ever one user - ** of each BtShared structure and so this locking is not necessary. - ** So define the lock related functions as no-ops. - */ - #define querySharedCacheTableLock(a,b,c) SQLITE_OK - #define setSharedCacheTableLock(a,b,c) SQLITE_OK - #define clearAllSharedCacheTableLocks(a) - #define downgradeAllSharedCacheTableLocks(a) - #define hasSharedCacheTableLock(a,b,c,d) 1 - #define hasReadConflicts(a, b) 0 -#endif - -#ifdef SQLITE_DEBUG -/* -** Return and reset the seek counter for a Btree object. -*/ -sqlite3_uint64 sqlite3BtreeSeekCount(Btree *pBt){ - u64 n = pBt->nSeek; - pBt->nSeek = 0; - return n; -} -#endif - -/* -** Implementation of the SQLITE_CORRUPT_PAGE() macro. Takes a single -** (MemPage*) as an argument. The (MemPage*) must not be NULL. -** -** If SQLITE_DEBUG is not defined, then this macro is equivalent to -** SQLITE_CORRUPT_BKPT. Or, if SQLITE_DEBUG is set, then the log message -** normally produced as a side-effect of SQLITE_CORRUPT_BKPT is augmented -** with the page number and filename associated with the (MemPage*). -*/ -#ifdef SQLITE_DEBUG -int corruptPageError(int lineno, MemPage *p){ - char *zMsg; - sqlite3BeginBenignMalloc(); - zMsg = sqlite3_mprintf("database corruption page %d of %s", - (int)p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0) - ); - sqlite3EndBenignMalloc(); - if( zMsg ){ - sqlite3ReportError(SQLITE_CORRUPT, lineno, zMsg); - } - sqlite3_free(zMsg); - return SQLITE_CORRUPT_BKPT; -} -# define SQLITE_CORRUPT_PAGE(pMemPage) corruptPageError(__LINE__, pMemPage) -#else -# define SQLITE_CORRUPT_PAGE(pMemPage) SQLITE_CORRUPT_PGNO(pMemPage->pgno) -#endif - -#ifndef SQLITE_OMIT_SHARED_CACHE - -#ifdef SQLITE_DEBUG -/* -**** This function is only used as part of an assert() statement. *** -** -** Check to see if pBtree holds the required locks to read or write to the -** table with root page iRoot. Return 1 if it does and 0 if not. -** -** For example, when writing to a table with root-page iRoot via -** Btree connection pBtree: -** -** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) ); -** -** When writing to an index that resides in a sharable database, the -** caller should have first obtained a lock specifying the root page of -** the corresponding table. This makes things a bit more complicated, -** as this module treats each table as a separate structure. To determine -** the table corresponding to the index being written, this -** function has to search through the database schema. -** -** Instead of a lock on the table/index rooted at page iRoot, the caller may -** hold a write-lock on the schema table (root page 1). This is also -** acceptable. -*/ -static int hasSharedCacheTableLock( - Btree *pBtree, /* Handle that must hold lock */ - Pgno iRoot, /* Root page of b-tree */ - int isIndex, /* True if iRoot is the root of an index b-tree */ - int eLockType /* Required lock type (READ_LOCK or WRITE_LOCK) */ -){ - Schema *pSchema = (Schema *)pBtree->pBt->pSchema; - Pgno iTab = 0; - BtLock *pLock; - - /* If this database is not shareable, or if the client is reading - ** and has the read-uncommitted flag set, then no lock is required. - ** Return true immediately. - */ - if( (pBtree->sharable==0) - || (eLockType==READ_LOCK && (pBtree->db->flags & SQLITE_ReadUncommit)) - ){ - return 1; - } - - /* If the client is reading or writing an index and the schema is - ** not loaded, then it is too difficult to actually check to see if - ** the correct locks are held. So do not bother - just return true. - ** This case does not come up very often anyhow. - */ - if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){ - return 1; - } - - /* Figure out the root-page that the lock should be held on. For table - ** b-trees, this is just the root page of the b-tree being read or - ** written. For index b-trees, it is the root page of the associated - ** table. */ - if( isIndex ){ - HashElem *p; - int bSeen = 0; - for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){ - Index *pIdx = (Index *)sqliteHashData(p); - if( pIdx->tnum==(int)iRoot ){ - if( bSeen ){ - /* Two or more indexes share the same root page. There must - ** be imposter tables. So just return true. The assert is not - ** useful in that case. */ - return 1; - } - iTab = pIdx->pTable->tnum; - bSeen = 1; - } - } - }else{ - iTab = iRoot; - } - - /* Search for the required lock. Either a write-lock on root-page iTab, a - ** write-lock on the schema table, or (if the client is reading) a - ** read-lock on iTab will suffice. Return 1 if any of these are found. */ - for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){ - if( pLock->pBtree==pBtree - && (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1)) - && pLock->eLock>=eLockType - ){ - return 1; - } - } - - /* Failed to find the required lock. */ - return 0; -} -#endif /* SQLITE_DEBUG */ - -#ifdef SQLITE_DEBUG -/* -**** This function may be used as part of assert() statements only. **** -** -** Return true if it would be illegal for pBtree to write into the -** table or index rooted at iRoot because other shared connections are -** simultaneously reading that same table or index. -** -** It is illegal for pBtree to write if some other Btree object that -** shares the same BtShared object is currently reading or writing -** the iRoot table. Except, if the other Btree object has the -** read-uncommitted flag set, then it is OK for the other object to -** have a read cursor. -** -** For example, before writing to any part of the table or index -** rooted at page iRoot, one should call: -** -** assert( !hasReadConflicts(pBtree, iRoot) ); -*/ -static int hasReadConflicts(Btree *pBtree, Pgno iRoot){ - BtCursor *p; - for(p=pBtree->pBt->pCursor; p; p=p->pNext){ - if( p->pgnoRoot==iRoot - && p->pBtree!=pBtree - && 0==(p->pBtree->db->flags & SQLITE_ReadUncommit) - ){ - return 1; - } - } - return 0; -} -#endif /* #ifdef SQLITE_DEBUG */ - -/* -** Query to see if Btree handle p may obtain a lock of type eLock -** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return -** SQLITE_OK if the lock may be obtained (by calling -** setSharedCacheTableLock()), or SQLITE_LOCKED if not. -*/ -static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){ - BtShared *pBt = p->pBt; - BtLock *pIter; - - assert( sqlite3BtreeHoldsMutex(p) ); - assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); - assert( p->db!=0 ); - assert( !(p->db->flags&SQLITE_ReadUncommit)||eLock==WRITE_LOCK||iTab==1 ); - - /* If requesting a write-lock, then the Btree must have an open write - ** transaction on this file. And, obviously, for this to be so there - ** must be an open write transaction on the file itself. - */ - assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) ); - assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE ); - - /* This routine is a no-op if the shared-cache is not enabled */ - if( !p->sharable ){ - return SQLITE_OK; - } - - /* If some other connection is holding an exclusive lock, the - ** requested lock may not be obtained. - */ - if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){ - sqlite3ConnectionBlocked(p->db, pBt->pWriter->db); - return SQLITE_LOCKED_SHAREDCACHE; - } - - for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ - /* The condition (pIter->eLock!=eLock) in the following if(...) - ** statement is a simplification of: - ** - ** (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK) - ** - ** since we know that if eLock==WRITE_LOCK, then no other connection - ** may hold a WRITE_LOCK on any table in this file (since there can - ** only be a single writer). - */ - assert( pIter->eLock==READ_LOCK || pIter->eLock==WRITE_LOCK ); - assert( eLock==READ_LOCK || pIter->pBtree==p || pIter->eLock==READ_LOCK); - if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){ - sqlite3ConnectionBlocked(p->db, pIter->pBtree->db); - if( eLock==WRITE_LOCK ){ - assert( p==pBt->pWriter ); - pBt->btsFlags |= BTS_PENDING; - } - return SQLITE_LOCKED_SHAREDCACHE; - } - } - return SQLITE_OK; -} -#endif /* !SQLITE_OMIT_SHARED_CACHE */ - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** Add a lock on the table with root-page iTable to the shared-btree used -** by Btree handle p. Parameter eLock must be either READ_LOCK or -** WRITE_LOCK. -** -** This function assumes the following: -** -** (a) The specified Btree object p is connected to a sharable -** database (one with the BtShared.sharable flag set), and -** -** (b) No other Btree objects hold a lock that conflicts -** with the requested lock (i.e. querySharedCacheTableLock() has -** already been called and returned SQLITE_OK). -** -** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM -** is returned if a malloc attempt fails. -*/ -static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){ - BtShared *pBt = p->pBt; - BtLock *pLock = 0; - BtLock *pIter; - - assert( sqlite3BtreeHoldsMutex(p) ); - assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); - assert( p->db!=0 ); - - /* A connection with the read-uncommitted flag set will never try to - ** obtain a read-lock using this function. The only read-lock obtained - ** by a connection in read-uncommitted mode is on the sqlite_schema - ** table, and that lock is obtained in BtreeBeginTrans(). */ - assert( 0==(p->db->flags&SQLITE_ReadUncommit) || eLock==WRITE_LOCK ); - - /* This function should only be called on a sharable b-tree after it - ** has been determined that no other b-tree holds a conflicting lock. */ - assert( p->sharable ); - assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) ); - - /* First search the list for an existing lock on this table. */ - for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ - if( pIter->iTable==iTable && pIter->pBtree==p ){ - pLock = pIter; - break; - } - } - - /* If the above search did not find a BtLock struct associating Btree p - ** with table iTable, allocate one and link it into the list. - */ - if( !pLock ){ - pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); - if( !pLock ){ - return SQLITE_NOMEM; - } - pLock->iTable = iTable; - pLock->pBtree = p; - pLock->pNext = pBt->pLock; - pBt->pLock = pLock; - } - - /* Set the BtLock.eLock variable to the maximum of the current lock - ** and the requested lock. This means if a write-lock was already held - ** and a read-lock requested, we don't incorrectly downgrade the lock. - */ - assert( WRITE_LOCK>READ_LOCK ); - if( eLock>pLock->eLock ){ - pLock->eLock = eLock; - } - - return SQLITE_OK; -} -#endif /* !SQLITE_OMIT_SHARED_CACHE */ - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** Release all the table locks (locks obtained via calls to -** the setSharedCacheTableLock() procedure) held by Btree object p. -** -** This function assumes that Btree p has an open read or write -** transaction. If it does not, then the BTS_PENDING flag -** may be incorrectly cleared. -*/ -static void clearAllSharedCacheTableLocks(Btree *p){ - BtShared *pBt = p->pBt; - BtLock **ppIter = &pBt->pLock; - - assert( sqlite3BtreeHoldsMutex(p) ); - assert( p->sharable || 0==*ppIter ); - assert( p->inTrans>0 ); - - while( *ppIter ){ - BtLock *pLock = *ppIter; - assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree ); - assert( pLock->pBtree->inTrans>=pLock->eLock ); - if( pLock->pBtree==p ){ - *ppIter = pLock->pNext; - assert( pLock->iTable!=1 || pLock==&p->lock ); - if( pLock->iTable!=1 ){ - sqlite3_free(pLock); - } - }else{ - ppIter = &pLock->pNext; - } - } - - assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter ); - if( pBt->pWriter==p ){ - pBt->pWriter = 0; - pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); - }else if( pBt->nTransaction==2 ){ - /* This function is called when Btree p is concluding its - ** transaction. If there currently exists a writer, and p is not - ** that writer, then the number of locks held by connections other - ** than the writer must be about to drop to zero. In this case - ** set the BTS_PENDING flag to 0. - ** - ** If there is not currently a writer, then BTS_PENDING must - ** be zero already. So this next line is harmless in that case. - */ - pBt->btsFlags &= ~BTS_PENDING; - } -} - -/* -** This function changes all write-locks held by Btree p into read-locks. -*/ -static void downgradeAllSharedCacheTableLocks(Btree *p){ - BtShared *pBt = p->pBt; - if( pBt->pWriter==p ){ - BtLock *pLock; - pBt->pWriter = 0; - pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); - for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){ - assert( pLock->eLock==READ_LOCK || pLock->pBtree==p ); - pLock->eLock = READ_LOCK; - } - } -} - -#endif /* SQLITE_OMIT_SHARED_CACHE */ - -static void releasePage(MemPage *pPage); /* Forward reference */ -static void releasePageOne(MemPage *pPage); /* Forward reference */ -static void releasePageNotNull(MemPage *pPage); /* Forward reference */ - -/* -***** This routine is used inside of assert() only **** -** -** Verify that the cursor holds the mutex on its BtShared -*/ -#ifdef SQLITE_DEBUG -static int cursorHoldsMutex(BtCursor *p){ - return sqlite3_mutex_held(p->pBt->mutex); -} - -/* Verify that the cursor and the BtShared agree about what is the current -** database connetion. This is important in shared-cache mode. If the database -** connection pointers get out-of-sync, it is possible for routines like -** btreeInitPage() to reference an stale connection pointer that references a -** a connection that has already closed. This routine is used inside assert() -** statements only and for the purpose of double-checking that the btree code -** does keep the database connection pointers up-to-date. -*/ -static int cursorOwnsBtShared(BtCursor *p){ - assert( cursorHoldsMutex(p) ); - return (p->pBtree->db==p->pBt->db); -} -#endif - -/* -** Invalidate the overflow cache of the cursor passed as the first argument. -** on the shared btree structure pBt. -*/ -#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) - -/* -** Invalidate the overflow page-list cache for all cursors opened -** on the shared btree structure pBt. -*/ -static void invalidateAllOverflowCache(BtShared *pBt){ - BtCursor *p; - assert( sqlite3_mutex_held(pBt->mutex) ); - for(p=pBt->pCursor; p; p=p->pNext){ - invalidateOverflowCache(p); - } -} - -#ifndef SQLITE_OMIT_INCRBLOB -/* -** This function is called before modifying the contents of a table -** to invalidate any incrblob cursors that are open on the -** row or one of the rows being modified. -** -** If argument isClearTable is true, then the entire contents of the -** table is about to be deleted. In this case invalidate all incrblob -** cursors open on any row within the table with root-page pgnoRoot. -** -** Otherwise, if argument isClearTable is false, then the row with -** rowid iRow is being replaced or deleted. In this case invalidate -** only those incrblob cursors open on that specific row. -*/ -static void invalidateIncrblobCursors( - Btree *pBtree, /* The database file to check */ - Pgno pgnoRoot, /* The table that might be changing */ - i64 iRow, /* The rowid that might be changing */ - int isClearTable /* True if all rows are being deleted */ -){ - BtCursor *p; - assert( pBtree->hasIncrblobCur ); - assert( sqlite3BtreeHoldsMutex(pBtree) ); - pBtree->hasIncrblobCur = 0; - for(p=pBtree->pBt->pCursor; p; p=p->pNext){ - if( (p->curFlags & BTCF_Incrblob)!=0 ){ - pBtree->hasIncrblobCur = 1; - if( p->pgnoRoot==pgnoRoot && (isClearTable || p->info.nKey==iRow) ){ - p->eState = CURSOR_INVALID; - } - } - } -} - -#else - /* Stub function when INCRBLOB is omitted */ - #define invalidateIncrblobCursors(w,x,y,z) -#endif /* SQLITE_OMIT_INCRBLOB */ - -/* -** Set bit pgno of the BtShared.pHasContent bitvec. This is called -** when a page that previously contained data becomes a free-list leaf -** page. -** -** The BtShared.pHasContent bitvec exists to work around an obscure -** bug caused by the interaction of two useful IO optimizations surrounding -** free-list leaf pages: -** -** 1) When all data is deleted from a page and the page becomes -** a free-list leaf page, the page is not written to the database -** (as free-list leaf pages contain no meaningful data). Sometimes -** such a page is not even journalled (as it will not be modified, -** why bother journalling it?). -** -** 2) When a free-list leaf page is reused, its content is not read -** from the database or written to the journal file (why should it -** be, if it is not at all meaningful?). -** -** By themselves, these optimizations work fine and provide a handy -** performance boost to bulk delete or insert operations. However, if -** a page is moved to the free-list and then reused within the same -** transaction, a problem comes up. If the page is not journalled when -** it is moved to the free-list and it is also not journalled when it -** is extracted from the free-list and reused, then the original data -** may be lost. In the event of a rollback, it may not be possible -** to restore the database to its original configuration. -** -** The solution is the BtShared.pHasContent bitvec. Whenever a page is -** moved to become a free-list leaf page, the corresponding bit is -** set in the bitvec. Whenever a leaf page is extracted from the free-list, -** optimization 2 above is omitted if the corresponding bit is already -** set in BtShared.pHasContent. The contents of the bitvec are cleared -** at the end of every transaction. -*/ -static int btreeSetHasContent(BtShared *pBt, Pgno pgno){ - int rc = SQLITE_OK; - if( !pBt->pHasContent ){ - assert( pgno<=pBt->nPage ); - pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage); - if( !pBt->pHasContent ){ - rc = SQLITE_NOMEM; - } - } - if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){ - rc = sqlite3BitvecSet(pBt->pHasContent, pgno); - } - return rc; -} - -/* -** Query the BtShared.pHasContent vector. -** -** This function is called when a free-list leaf page is removed from the -** free-list for reuse. It returns false if it is safe to retrieve the -** page from the pager layer with the 'no-content' flag set. True otherwise. -*/ -static int btreeGetHasContent(BtShared *pBt, Pgno pgno){ - Bitvec *p = pBt->pHasContent; - return p && (pgno>sqlite3BitvecSize(p) || sqlite3BitvecTestNotNull(p, pgno)); -} - -/* -** Clear (destroy) the BtShared.pHasContent bitvec. This should be -** invoked at the conclusion of each write-transaction. -*/ -static void btreeClearHasContent(BtShared *pBt){ - sqlite3BitvecDestroy(pBt->pHasContent); - pBt->pHasContent = 0; -} - -/* -** Release all of the apPage[] pages for a cursor. -*/ -static void btreeReleaseAllCursorPages(BtCursor *pCur){ - int i; - if( pCur->iPage>=0 ){ - for(i=0; iiPage; i++){ - releasePageNotNull(pCur->apPage[i]); - } - releasePageNotNull(pCur->pPage); - pCur->iPage = -1; - } -} - -/* -** The cursor passed as the only argument must point to a valid entry -** when this function is called (i.e. have eState==CURSOR_VALID). This -** function saves the current cursor key in variables pCur->nKey and -** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error -** code otherwise. -** -** If the cursor is open on an intkey table, then the integer key -** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to -** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is -** set to point to a malloced buffer pCur->nKey bytes in size containing -** the key. -*/ -static int saveCursorKey(BtCursor *pCur){ - int rc = SQLITE_OK; - assert( CURSOR_VALID==pCur->eState ); - assert( 0==pCur->pKey ); - assert( cursorHoldsMutex(pCur) ); - - if( pCur->curIntKey ){ - /* Only the rowid is required for a table btree */ - pCur->nKey = sqlite3BtreeIntegerKey(pCur); - }else{ - /* For an index btree, save the complete key content. It is possible - ** that the current key is corrupt. In that case, it is possible that - ** the sqlite3VdbeRecordUnpack() function may overread the buffer by - ** up to the size of 1 varint plus 1 8-byte value when the cursor - ** position is restored. Hence the 17 bytes of padding allocated - ** below. */ - void *pKey; - pCur->nKey = sqlite3BtreePayloadSize(pCur); - pKey = sqlite3Malloc( pCur->nKey + 9 + 8 ); - if( pKey ){ - rc = sqlite3BtreePayload(pCur, 0, (int)pCur->nKey, pKey); - if( rc==SQLITE_OK ){ - memset(((u8*)pKey)+pCur->nKey, 0, 9+8); - pCur->pKey = pKey; - }else{ - sqlite3_free(pKey); - } - }else{ - rc = SQLITE_NOMEM; - } - } - assert( !pCur->curIntKey || !pCur->pKey ); - return rc; -} - -/* -** Save the current cursor position in the variables BtCursor.nKey -** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. -** -** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID) -** prior to calling this routine. -*/ -static int saveCursorPosition(BtCursor *pCur){ - int rc; - - assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState ); - assert( 0==pCur->pKey ); - assert( cursorHoldsMutex(pCur) ); - - if( pCur->curFlags & BTCF_Pinned ){ - return SQLITE_CONSTRAINT_PINNED; - } - if( pCur->eState==CURSOR_SKIPNEXT ){ - pCur->eState = CURSOR_VALID; - }else{ - pCur->skipNext = 0; - } - - rc = saveCursorKey(pCur); - if( rc==SQLITE_OK ){ - btreeReleaseAllCursorPages(pCur); - pCur->eState = CURSOR_REQUIRESEEK; - } - - pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl|BTCF_AtLast); - return rc; -} - -/* Forward reference */ -static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); - -/* -** Save the positions of all cursors (except pExcept) that are open on -** the table with root-page iRoot. "Saving the cursor position" means that -** the location in the btree is remembered in such a way that it can be -** moved back to the same spot after the btree has been modified. This -** routine is called just before cursor pExcept is used to modify the -** table, for example in BtreeDelete() or BtreeInsert(). -** -** If there are two or more cursors on the same btree, then all such -** cursors should have their BTCF_Multiple flag set. The btreeCursor() -** routine enforces that rule. This routine only needs to be called in -** the uncommon case when pExpect has the BTCF_Multiple flag set. -** -** If pExpect!=NULL and if no other cursors are found on the same root-page, -** then the BTCF_Multiple flag on pExpect is cleared, to avoid another -** pointless call to this routine. -** -** Implementation note: This routine merely checks to see if any cursors -** need to be saved. It calls out to saveCursorsOnList() in the (unusual) -** event that cursors are in need to being saved. -*/ -static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ - BtCursor *p; - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( pExcept==0 || pExcept->pBt==pBt ); - for(p=pBt->pCursor; p; p=p->pNext){ - if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; - } - if( p ) return saveCursorsOnList(p, iRoot, pExcept); - if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; - return SQLITE_OK; -} - -/* This helper routine to saveAllCursors does the actual work of saving -** the cursors if and when a cursor is found that actually requires saving. -** The common case is that no cursors need to be saved, so this routine is -** broken out from its caller to avoid unnecessary stack pointer movement. -*/ -static int SQLITE_NOINLINE saveCursorsOnList( - BtCursor *p, /* The first cursor that needs saving */ - Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */ - BtCursor *pExcept /* Do not save this cursor */ -){ - do{ - if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ - if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ - int rc = saveCursorPosition(p); - if( SQLITE_OK!=rc ){ - return rc; - } - }else{ - testcase( p->iPage>=0 ); - btreeReleaseAllCursorPages(p); - } - } - p = p->pNext; - }while( p ); - return SQLITE_OK; -} - -/* -** Clear the current cursor position. -*/ -void sqlite3BtreeClearCursor(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - sqlite3_free(pCur->pKey); - pCur->pKey = 0; - pCur->eState = CURSOR_INVALID; -} - -/* -** In this version of BtreeMoveto, pKey is a packed index record -** such as is generated by the OP_MakeRecord opcode. Unpack the -** record and then call BtreeMovetoUnpacked() to do the work. -*/ -static int btreeMoveto( - BtCursor *pCur, /* Cursor open on the btree to be searched */ - const void *pKey, /* Packed key if the btree is an index */ - i64 nKey, /* Integer key for tables. Size of pKey for indices */ - int bias, /* Bias search to the high end */ - int *pRes /* Write search results here */ -){ - int rc; /* Status code */ - UnpackedRecord *pIdxKey; /* Unpacked index key */ - - if( pKey ){ - KeyInfo *pKeyInfo = pCur->pKeyInfo; - assert( nKey==(i64)(int)nKey ); - pIdxKey = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); - if( pIdxKey==0 ) return SQLITE_NOMEM; - sqlite3VdbeRecordUnpack(pKeyInfo, (int)nKey, pKey, pIdxKey); - if( pIdxKey->nField==0 || pIdxKey->nField>pKeyInfo->nAllField ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = sqlite3BtreeIndexMoveto(pCur, pIdxKey, pRes); - } - sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey); - }else{ - pIdxKey = 0; - rc = sqlite3BtreeTableMoveto(pCur, nKey, bias, pRes); - } - return rc; -} - -/* -** Restore the cursor to the position it was in (or as close to as possible) -** when saveCursorPosition() was called. Note that this call deletes the -** saved position info stored by saveCursorPosition(), so there can be -** at most one effective restoreCursorPosition() call after each -** saveCursorPosition(). -*/ -static int btreeRestoreCursorPosition(BtCursor *pCur){ - int rc; - int skipNext = 0; - assert( cursorOwnsBtShared(pCur) ); - assert( pCur->eState>=CURSOR_REQUIRESEEK ); - if( pCur->eState==CURSOR_FAULT ){ - return pCur->skipNext; - } - pCur->eState = CURSOR_INVALID; - if( sqlite3FaultSim(410) ){ - rc = SQLITE_IOERR; - }else{ - rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext); - } - if( rc==SQLITE_OK ){ - sqlite3_free(pCur->pKey); - pCur->pKey = 0; - assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); - if( skipNext ) pCur->skipNext = skipNext; - if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ - pCur->eState = CURSOR_SKIPNEXT; - } - } - return rc; -} - -#define restoreCursorPosition(p) \ - (p->eState>=CURSOR_REQUIRESEEK ? \ - btreeRestoreCursorPosition(p) : \ - SQLITE_OK) - -/* -** Determine whether or not a cursor has moved from the position where -** it was last placed, or has been invalidated for any other reason. -** Cursors can move when the row they are pointing at is deleted out -** from under them, for example. Cursor might also move if a btree -** is rebalanced. -** -** Calling this routine with a NULL cursor pointer returns false. -** -** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor -** back to where it ought to be if this routine returns true. -*/ -int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ - assert( EIGHT_BYTE_ALIGNMENT(pCur) - || pCur==sqlite3BtreeFakeValidCursor() ); - assert( offsetof(BtCursor, eState)==0 ); - assert( sizeof(pCur->eState)==1 ); - return CURSOR_VALID != *(u8*)pCur; -} - -/* -** Return a pointer to a fake BtCursor object that will always answer -** false to the sqlite3BtreeCursorHasMoved() routine above. The fake -** cursor returned must not be used with any other Btree interface. -*/ -BtCursor *sqlite3BtreeFakeValidCursor(void){ - static u8 fakeCursor = CURSOR_VALID; - assert( offsetof(BtCursor, eState)==0 ); - return (BtCursor*)&fakeCursor; -} - -/* -** This routine restores a cursor back to its original position after it -** has been moved by some outside activity (such as a btree rebalance or -** a row having been deleted out from under the cursor). -** -** On success, the *pDifferentRow parameter is false if the cursor is left -** pointing at exactly the same row. *pDifferntRow is the row the cursor -** was pointing to has been deleted, forcing the cursor to point to some -** nearby row. -** -** This routine should only be called for a cursor that just returned -** TRUE from sqlite3BtreeCursorHasMoved(). -*/ -int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ - int rc; - - assert( pCur!=0 ); - assert( pCur->eState!=CURSOR_VALID ); - rc = restoreCursorPosition(pCur); - if( rc ){ - *pDifferentRow = 1; - return rc; - } - if( pCur->eState!=CURSOR_VALID ){ - *pDifferentRow = 1; - }else{ - *pDifferentRow = 0; - } - return SQLITE_OK; -} - -#ifdef SQLITE_ENABLE_CURSOR_HINTS -/* -** Provide hints to the cursor. The particular hint given (and the type -** and number of the varargs parameters) is determined by the eHintType -** parameter. See the definitions of the BTREE_HINT_* macros for details. -*/ -void sqlite3BtreeCursorHint(BtCursor *pCur, int eHintType, ...){ - /* Used only by system that substitute their own storage engine */ -} -#endif - -/* -** Provide flag hints to the cursor. -*/ -void sqlite3BtreeCursorHintFlags(BtCursor *pCur, unsigned x){ - assert( x==BTREE_SEEK_EQ || x==BTREE_BULKLOAD || x==0 ); - pCur->hints = x; -} - - -#ifndef SQLITE_OMIT_AUTOVACUUM -/* -** Given a page number of a regular database page, return the page -** number for the pointer-map page that contains the entry for the -** input page number. -** -** Return 0 (not a valid page) for pgno==1 since there is -** no pointer map associated with page 1. The integrity_check logic -** requires that ptrmapPageno(*,1)!=1. -*/ -static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){ - int nPagesPerMapPage; - Pgno iPtrMap, ret; - assert( sqlite3_mutex_held(pBt->mutex) ); - if( pgno<2 ) return 0; - nPagesPerMapPage = (pBt->usableSize/5)+1; - iPtrMap = (pgno-2)/nPagesPerMapPage; - ret = (iPtrMap*nPagesPerMapPage) + 2; - if( ret==PENDING_BYTE_PAGE(pBt) ){ - ret++; - } - return ret; -} - -/* -** Write an entry into the pointer map. -** -** This routine updates the pointer map entry for page number 'key' -** so that it maps to type 'eType' and parent page number 'pgno'. -** -** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is -** a no-op. If an error occurs, the appropriate error code is written -** into *pRC. -*/ -static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){ - DbPage *pDbPage; /* The pointer map page */ - u8 *pPtrmap; /* The pointer map data */ - Pgno iPtrmap; /* The pointer map page number */ - int offset; /* Offset in pointer map page */ - int rc; /* Return code from subfunctions */ - - if( *pRC ) return; - - assert( sqlite3_mutex_held(pBt->mutex) ); - /* The super-journal page number must never be used as a pointer map page */ - assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); - - assert( pBt->autoVacuum ); - if( key==0 ){ - *pRC = SQLITE_CORRUPT_BKPT; - return; - } - iPtrmap = PTRMAP_PAGENO(pBt, key); - rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0); - if( rc!=SQLITE_OK ){ - *pRC = rc; - return; - } - if( ((char*)sqlite3PagerGetExtra(pDbPage))[0]!=0 ){ - /* The first byte of the extra data is the MemPage.isInit byte. - ** If that byte is set, it means this page is also being used - ** as a btree page. */ - *pRC = SQLITE_CORRUPT_BKPT; - goto ptrmap_exit; - } - offset = PTRMAP_PTROFFSET(iPtrmap, key); - if( offset<0 ){ - *pRC = SQLITE_CORRUPT_BKPT; - goto ptrmap_exit; - } - assert( offset <= (int)pBt->usableSize-5 ); - pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); - - if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ - TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); - *pRC= rc = sqlite3PagerWrite(pDbPage); - if( rc==SQLITE_OK ){ - pPtrmap[offset] = eType; - put4byte(&pPtrmap[offset+1], parent); - } - } - -ptrmap_exit: - sqlite3PagerUnref(pDbPage); -} - -/* -** Read an entry from the pointer map. -** -** This routine retrieves the pointer map entry for page 'key', writing -** the type and parent page number to *pEType and *pPgno respectively. -** An error code is returned if something goes wrong, otherwise SQLITE_OK. -*/ -static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ - DbPage *pDbPage; /* The pointer map page */ - int iPtrmap; /* Pointer map page index */ - u8 *pPtrmap; /* Pointer map page data */ - int offset; /* Offset of entry in pointer map */ - int rc; - - assert( sqlite3_mutex_held(pBt->mutex) ); - - iPtrmap = PTRMAP_PAGENO(pBt, key); - rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0); - if( rc!=0 ){ - return rc; - } - pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); - - offset = PTRMAP_PTROFFSET(iPtrmap, key); - if( offset<0 ){ - sqlite3PagerUnref(pDbPage); - return SQLITE_CORRUPT_BKPT; - } - assert( offset <= (int)pBt->usableSize-5 ); - assert( pEType!=0 ); - *pEType = pPtrmap[offset]; - if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); - - sqlite3PagerUnref(pDbPage); - if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_PGNO(iPtrmap); - return SQLITE_OK; -} - -#else /* if defined SQLITE_OMIT_AUTOVACUUM */ - #define ptrmapPut(w,x,y,z,rc) - #define ptrmapGet(w,x,y,z) SQLITE_OK - #define ptrmapPutOvflPtr(x, y, z, rc) -#endif - -/* -** Given a btree page and a cell index (0 means the first cell on -** the page, 1 means the second cell, and so forth) return a pointer -** to the cell content. -** -** findCellPastPtr() does the same except it skips past the initial -** 4-byte child pointer found on interior pages, if there is one. -** -** This routine works only for pages that do not contain overflow cells. -*/ -#define findCell(P,I) \ - ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) -#define findCellPastPtr(P,I) \ - ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) - - -/* -** This is common tail processing for btreeParseCellPtr() and -** btreeParseCellPtrIndex() for the case when the cell does not fit entirely -** on a single B-tree page. Make necessary adjustments to the CellInfo -** structure. -*/ -static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow( - MemPage *pPage, /* Page containing the cell */ - u8 *pCell, /* Pointer to the cell text. */ - CellInfo *pInfo /* Fill in this structure */ -){ - /* If the payload will not fit completely on the local page, we have - ** to decide how much to store locally and how much to spill onto - ** overflow pages. The strategy is to minimize the amount of unused - ** space on overflow pages while keeping the amount of local storage - ** in between minLocal and maxLocal. - ** - ** Warning: changing the way overflow payload is distributed in any - ** way will result in an incompatible file format. - */ - int minLocal; /* Minimum amount of payload held locally */ - int maxLocal; /* Maximum amount of payload held locally */ - int surplus; /* Overflow payload available for local storage */ - - minLocal = pPage->minLocal; - maxLocal = pPage->maxLocal; - surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4); - testcase( surplus==maxLocal ); - testcase( surplus==maxLocal+1 ); - if( surplus <= maxLocal ){ - pInfo->nLocal = (u16)surplus; - }else{ - pInfo->nLocal = (u16)minLocal; - } - pInfo->nSize = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell) + 4; -} - -/* -** Given a record with nPayload bytes of payload stored within btree -** page pPage, return the number of bytes of payload stored locally. -*/ -static int btreePayloadToLocal(MemPage *pPage, i64 nPayload){ - int maxLocal; /* Maximum amount of payload held locally */ - maxLocal = pPage->maxLocal; - if( nPayload<=maxLocal ){ - return nPayload; - }else{ - int minLocal; /* Minimum amount of payload held locally */ - int surplus; /* Overflow payload available for local storage */ - minLocal = pPage->minLocal; - surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize-4); - return ( surplus <= maxLocal ) ? surplus : minLocal; - } -} - -/* -** The following routines are implementations of the MemPage.xParseCell() -** method. -** -** Parse a cell content block and fill in the CellInfo structure. -** -** btreeParseCellPtr() => table btree leaf nodes -** btreeParseCellNoPayload() => table btree internal nodes -** btreeParseCellPtrIndex() => index btree nodes -** -** There is also a wrapper function btreeParseCell() that works for -** all MemPage types and that references the cell by index rather than -** by pointer. -*/ -static void btreeParseCellPtrNoPayload( - MemPage *pPage, /* Page containing the cell */ - u8 *pCell, /* Pointer to the cell text. */ - CellInfo *pInfo /* Fill in this structure */ -){ - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pPage->leaf==0 ); - assert( pPage->childPtrSize==4 ); -#ifndef SQLITE_DEBUG - UNUSED_PARAMETER(pPage); -#endif - pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); - pInfo->nPayload = 0; - pInfo->nLocal = 0; - pInfo->pPayload = 0; - return; -} -static void btreeParseCellPtr( - MemPage *pPage, /* Page containing the cell */ - u8 *pCell, /* Pointer to the cell text. */ - CellInfo *pInfo /* Fill in this structure */ -){ - u8 *pIter; /* For scanning through pCell */ - u32 nPayload; /* Number of bytes of cell payload */ - u64 iKey; /* Extracted Key value */ - - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pPage->leaf==0 || pPage->leaf==1 ); - assert( pPage->intKeyLeaf ); - assert( pPage->childPtrSize==0 ); - pIter = pCell; - - /* The next block of code is equivalent to: - ** - ** pIter += getVarint32(pIter, nPayload); - ** - ** The code is inlined to avoid a function call. - */ - nPayload = *pIter; - if( nPayload>=0x80 ){ - u8 *pEnd = &pIter[8]; - nPayload &= 0x7f; - do{ - nPayload = (nPayload<<7) | (*++pIter & 0x7f); - }while( (*pIter)>=0x80 && pIternKey); - ** - ** The code is inlined to avoid a function call. - */ - iKey = *pIter; - if( iKey>=0x80 ){ - u8 *pEnd = &pIter[7]; - iKey &= 0x7f; - while(1){ - iKey = (iKey<<7) | (*++pIter & 0x7f); - if( (*pIter)<0x80 ) break; - if( pIter>=pEnd ){ - iKey = (iKey<<8) | *++pIter; - break; - } - } - } - pIter++; - - pInfo->nKey = *(i64*)&iKey; - pInfo->nPayload = nPayload; - pInfo->pPayload = pIter; - testcase( nPayload==pPage->maxLocal ); - testcase( nPayload==pPage->maxLocal+1 ); - if( nPayload<=pPage->maxLocal ){ - /* This is the (easy) common case where the entire payload fits - ** on the local page. No overflow is required. - */ - pInfo->nSize = nPayload + (u16)(pIter - pCell); - if( pInfo->nSize<4 ) pInfo->nSize = 4; - pInfo->nLocal = (u16)nPayload; - }else{ - btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); - } -} -static void btreeParseCellPtrIndex( - MemPage *pPage, /* Page containing the cell */ - u8 *pCell, /* Pointer to the cell text. */ - CellInfo *pInfo /* Fill in this structure */ -){ - u8 *pIter; /* For scanning through pCell */ - u32 nPayload; /* Number of bytes of cell payload */ - - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pPage->leaf==0 || pPage->leaf==1 ); - assert( pPage->intKeyLeaf==0 ); - pIter = pCell + pPage->childPtrSize; - nPayload = *pIter; - if( nPayload>=0x80 ){ - u8 *pEnd = &pIter[8]; - nPayload &= 0x7f; - do{ - nPayload = (nPayload<<7) | (*++pIter & 0x7f); - }while( *(pIter)>=0x80 && pIternKey = nPayload; - pInfo->nPayload = nPayload; - pInfo->pPayload = pIter; - testcase( nPayload==pPage->maxLocal ); - testcase( nPayload==pPage->maxLocal+1 ); - if( nPayload<=pPage->maxLocal ){ - /* This is the (easy) common case where the entire payload fits - ** on the local page. No overflow is required. - */ - pInfo->nSize = nPayload + (u16)(pIter - pCell); - if( pInfo->nSize<4 ) pInfo->nSize = 4; - pInfo->nLocal = (u16)nPayload; - }else{ - btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); - } -} -static void btreeParseCell( - MemPage *pPage, /* Page containing the cell */ - int iCell, /* The cell index. First cell is 0 */ - CellInfo *pInfo /* Fill in this structure */ -){ - pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo); -} - -/* -** The following routines are implementations of the MemPage.xCellSize -** method. -** -** Compute the total number of bytes that a Cell needs in the cell -** data area of the btree-page. The return number includes the cell -** data header and the local payload, but not any overflow page or -** the space used by the cell pointer. -** -** cellSizePtrNoPayload() => table internal nodes -** cellSizePtr() => all index nodes & table leaf nodes -*/ -static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ - u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ - u8 *pEnd; /* End mark for a varint */ - u32 nSize; /* Size value to return */ - -#ifdef SQLITE_DEBUG - /* The value returned by this function should always be the same as - ** the (CellInfo.nSize) value found by doing a full parse of the - ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of - ** this function verifies that this invariant is not violated. */ - CellInfo debuginfo; - pPage->xParseCell(pPage, pCell, &debuginfo); -#endif - - nSize = *pIter; - if( nSize>=0x80 ){ - pEnd = &pIter[8]; - nSize &= 0x7f; - do{ - nSize = (nSize<<7) | (*++pIter & 0x7f); - }while( *(pIter)>=0x80 && pIterintKey ){ - /* pIter now points at the 64-bit integer key value, a variable length - ** integer. The following block moves pIter to point at the first byte - ** past the end of the key value. */ - pEnd = &pIter[9]; - while( (*pIter++)&0x80 && pItermaxLocal ); - testcase( nSize==pPage->maxLocal+1 ); - if( nSize<=pPage->maxLocal ){ - nSize += (u32)(pIter - pCell); - if( nSize<4 ) nSize = 4; - }else{ - int minLocal = pPage->minLocal; - nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4); - testcase( nSize==pPage->maxLocal ); - testcase( nSize==pPage->maxLocal+1 ); - if( nSize>pPage->maxLocal ){ - nSize = minLocal; - } - nSize += 4 + (u16)(pIter - pCell); - } - assert( nSize==debuginfo.nSize || CORRUPT_DB ); - return (u16)nSize; -} -static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){ - u8 *pIter = pCell + 4; /* For looping over bytes of pCell */ - u8 *pEnd; /* End mark for a varint */ - -#ifdef SQLITE_DEBUG - /* The value returned by this function should always be the same as - ** the (CellInfo.nSize) value found by doing a full parse of the - ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of - ** this function verifies that this invariant is not violated. */ - CellInfo debuginfo; - pPage->xParseCell(pPage, pCell, &debuginfo); -#else - UNUSED_PARAMETER(pPage); -#endif - - assert( pPage->childPtrSize==4 ); - pEnd = pIter + 9; - while( (*pIter++)&0x80 && pIterxCellSize(pPage, findCell(pPage, iCell)); -} -#endif - -#ifndef SQLITE_OMIT_AUTOVACUUM -/* -** The cell pCell is currently part of page pSrc but will ultimately be part -** of pPage. (pSrc and pPager are often the same.) If pCell contains a -** pointer to an overflow page, insert an entry into the pointer-map for -** the overflow page that will be valid after pCell has been moved to pPage. -*/ -static void ptrmapPutOvflPtr(MemPage *pPage, MemPage *pSrc, u8 *pCell,int *pRC){ - CellInfo info; - if( *pRC ) return; - assert( pCell!=0 ); - pPage->xParseCell(pPage, pCell, &info); - if( info.nLocalaDataEnd, pCell, pCell+info.nLocal) ){ - testcase( pSrc!=pPage ); - *pRC = SQLITE_CORRUPT_BKPT; - return; - } - ovfl = get4byte(&pCell[info.nSize-4]); - ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); - } -} -#endif - - -/* -** Defragment the page given. This routine reorganizes cells within the -** page so that there are no free-blocks on the free-block list. -** -** Parameter nMaxFrag is the maximum amount of fragmented space that may be -** present in the page after this routine returns. -** -** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a -** b-tree page so that there are no freeblocks or fragment bytes, all -** unused bytes are contained in the unallocated space region, and all -** cells are packed tightly at the end of the page. -*/ -static int defragmentPage(MemPage *pPage, int nMaxFrag){ - int i; /* Loop counter */ - int pc; /* Address of the i-th cell */ - int hdr; /* Offset to the page header */ - int size; /* Size of a cell */ - int usableSize; /* Number of usable bytes on a page */ - int cellOffset; /* Offset to the cell pointer array */ - int cbrk; /* Offset to the cell content area */ - int nCell; /* Number of cells on the page */ - unsigned char *data; /* The page data */ - unsigned char *temp; /* Temp area for cell content */ - unsigned char *src; /* Source of content */ - int iCellFirst; /* First allowable cell index */ - int iCellLast; /* Last possible cell index */ - int iCellStart; /* First cell offset in input */ - - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( pPage->pBt!=0 ); - assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); - assert( pPage->nOverflow==0 ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - temp = 0; - src = data = pPage->aData; - hdr = pPage->hdrOffset; - cellOffset = pPage->cellOffset; - nCell = pPage->nCell; - assert( nCell==get2byte(&data[hdr+3]) || CORRUPT_DB ); - iCellFirst = cellOffset + 2*nCell; - usableSize = pPage->pBt->usableSize; - - /* This block handles pages with two or fewer free blocks and nMaxFrag - ** or fewer fragmented bytes. In this case it is faster to move the - ** two (or one) blocks of cells using memmove() and add the required - ** offsets to each pointer in the cell-pointer array than it is to - ** reconstruct the entire page. */ - if( (int)data[hdr+7]<=nMaxFrag ){ - int iFree = get2byte(&data[hdr+1]); - if( iFree>usableSize-4 ) return SQLITE_CORRUPT_PAGE(pPage); - if( iFree ){ - int iFree2 = get2byte(&data[iFree]); - if( iFree2>usableSize-4 ) return SQLITE_CORRUPT_PAGE(pPage); - if( 0==iFree2 || (data[iFree2]==0 && data[iFree2+1]==0) ){ - u8 *pEnd = &data[cellOffset + nCell*2]; - u8 *pAddr; - int sz2 = 0; - int sz = get2byte(&data[iFree+2]); - int top = get2byte(&data[hdr+5]); - if( top>=iFree ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - if( iFree2 ){ - if( iFree+sz>iFree2 ) return SQLITE_CORRUPT_PAGE(pPage); - sz2 = get2byte(&data[iFree2+2]); - if( iFree2+sz2 > usableSize ) return SQLITE_CORRUPT_PAGE(pPage); - memmove(&data[iFree+sz+sz2], &data[iFree+sz], iFree2-(iFree+sz)); - sz += sz2; - }else if( NEVER(iFree+sz>usableSize) ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - - cbrk = top+sz; - assert( cbrk+(iFree-top) <= usableSize ); - memmove(&data[cbrk], &data[top], iFree-top); - for(pAddr=&data[cellOffset]; pAddriCellLast ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - assert( pc>=iCellStart && pc<=iCellLast ); - size = pPage->xCellSize(pPage, &src[pc]); - cbrk -= size; - if( cbrkusableSize ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - assert( cbrk+size<=usableSize && cbrk>=iCellStart ); - testcase( cbrk+size==usableSize ); - testcase( pc+size==usableSize ); - put2byte(pAddr, cbrk); - if( temp==0 ){ - if( cbrk==pc ) continue; - temp = sqlite3PagerTempSpace(pPage->pBt->pPager); - memcpy(&temp[iCellStart], &data[iCellStart], usableSize - iCellStart); - src = temp; - } - memcpy(&data[cbrk], &src[pc], size); - } - data[hdr+7] = 0; - - defragment_out: - assert( pPage->nFree>=0 ); - if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - assert( cbrk>=iCellFirst ); - put2byte(&data[hdr+5], cbrk); - data[hdr+1] = 0; - data[hdr+2] = 0; - memset(&data[iCellFirst], 0, cbrk-iCellFirst); - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - return SQLITE_OK; -} - -/* -** Search the free-list on page pPg for space to store a cell nByte bytes in -** size. If one can be found, return a pointer to the space and remove it -** from the free-list. -** -** If no suitable space can be found on the free-list, return NULL. -** -** This function may detect corruption within pPg. If corruption is -** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. -** -** Slots on the free list that are between 1 and 3 bytes larger than nByte -** will be ignored if adding the extra space to the fragmentation count -** causes the fragmentation count to exceed 60. -*/ -static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ - const int hdr = pPg->hdrOffset; /* Offset to page header */ - u8 * const aData = pPg->aData; /* Page data */ - int iAddr = hdr + 1; /* Address of ptr to pc */ - int pc = get2byte(&aData[iAddr]); /* Address of a free slot */ - int x; /* Excess size of the slot */ - int maxPC = pPg->pBt->usableSize - nByte; /* Max address for a usable slot */ - int size; /* Size of the free slot */ - - assert( pc>0 ); - while( pc<=maxPC ){ - /* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each - ** freeblock form a big-endian integer which is the size of the freeblock - ** in bytes, including the 4-byte header. */ - size = get2byte(&aData[pc+2]); - if( (x = size - nByte)>=0 ){ - testcase( x==4 ); - testcase( x==3 ); - if( x<4 ){ - /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total - ** number of bytes in fragments may not exceed 60. */ - if( aData[hdr+7]>57 ) return 0; - - /* Remove the slot from the free-list. Update the number of - ** fragmented bytes within the page. */ - memcpy(&aData[iAddr], &aData[pc], 2); - aData[hdr+7] += (u8)x; - }else if( x+pc > maxPC ){ - /* This slot extends off the end of the usable part of the page */ - *pRc = SQLITE_CORRUPT_PAGE(pPg); - return 0; - }else{ - /* The slot remains on the free-list. Reduce its size to account - ** for the portion used by the new allocation. */ - put2byte(&aData[pc+2], x); - } - return &aData[pc + x]; - } - iAddr = pc; - pc = get2byte(&aData[pc]); - if( pc<=iAddr+size ){ - if( pc ){ - /* The next slot in the chain is not past the end of the current slot */ - *pRc = SQLITE_CORRUPT_PAGE(pPg); - } - return 0; - } - } - if( pc>maxPC+nByte-4 ){ - /* The free slot chain extends off the end of the page */ - *pRc = SQLITE_CORRUPT_PAGE(pPg); - } - return 0; -} - -/* -** Allocate nByte bytes of space from within the B-Tree page passed -** as the first argument. Write into *pIdx the index into pPage->aData[] -** of the first byte of allocated space. Return either SQLITE_OK or -** an error code (usually SQLITE_CORRUPT). -** -** The caller guarantees that there is sufficient space to make the -** allocation. This routine might need to defragment in order to bring -** all the space together, however. This routine will avoid using -** the first two bytes past the cell pointer area since presumably this -** allocation is being made in order to insert a new cell, so we will -** also end up needing a new cell pointer. -*/ -static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ - const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */ - u8 * const data = pPage->aData; /* Local cache of pPage->aData */ - int top; /* First byte of cell content area */ - int rc = SQLITE_OK; /* Integer return code */ - int gap; /* First byte of gap between cell pointers and cell content */ - - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( pPage->pBt ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( nByte>=0 ); /* Minimum cell size is 4 */ - assert( pPage->nFree>=nByte ); - assert( pPage->nOverflow==0 ); - assert( nByte < (int)(pPage->pBt->usableSize-8) ); - - assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); - gap = pPage->cellOffset + 2*pPage->nCell; - assert( gap<=65536 ); - /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size - ** and the reserved space is zero (the usual value for reserved space) - ** then the cell content offset of an empty page wants to be 65536. - ** However, that integer is too large to be stored in a 2-byte unsigned - ** integer, so a value of 0 is used in its place. */ - top = get2byte(&data[hdr+5]); - assert( top<=(int)pPage->pBt->usableSize ); /* by btreeComputeFreeSpace() */ - if( gap>top ){ - if( top==0 && pPage->pBt->usableSize==65536 ){ - top = 65536; - }else{ - return SQLITE_CORRUPT_PAGE(pPage); - } - } - - /* If there is enough space between gap and top for one more cell pointer, - ** and if the freelist is not empty, then search the - ** freelist looking for a slot big enough to satisfy the request. - */ - testcase( gap+2==top ); - testcase( gap+1==top ); - testcase( gap==top ); - if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ - u8 *pSpace = pageFindSlot(pPage, nByte, &rc); - if( pSpace ){ - int g2; - assert( pSpace+nByte<=data+pPage->pBt->usableSize ); - *pIdx = g2 = (int)(pSpace-data); - if( g2<=gap ){ - return SQLITE_CORRUPT_PAGE(pPage); - }else{ - return SQLITE_OK; - } - }else if( rc ){ - return rc; - } - } - - /* The request could not be fulfilled using a freelist slot. Check - ** to see if defragmentation is necessary. - */ - testcase( gap+2+nByte==top ); - if( gap+2+nByte>top ){ - assert( pPage->nCell>0 || CORRUPT_DB ); - assert( pPage->nFree>=0 ); - rc = defragmentPage(pPage, MIN(4, pPage->nFree - (2+nByte))); - if( rc ) return rc; - top = get2byteNotZero(&data[hdr+5]); - assert( gap+2+nByte<=top ); - } - - - /* Allocate memory from the gap in between the cell pointer array - ** and the cell content area. The btreeComputeFreeSpace() call has already - ** validated the freelist. Given that the freelist is valid, there - ** is no way that the allocation can extend off the end of the page. - ** The assert() below verifies the previous sentence. - */ - top -= nByte; - put2byte(&data[hdr+5], top); - assert( top+nByte <= (int)pPage->pBt->usableSize ); - *pIdx = top; - return SQLITE_OK; -} - -/* -** Return a section of the pPage->aData to the freelist. -** The first byte of the new free block is pPage->aData[iStart] -** and the size of the block is iSize bytes. -** -** Adjacent freeblocks are coalesced. -** -** Even though the freeblock list was checked by btreeComputeFreeSpace(), -** that routine will not detect overlap between cells or freeblocks. Nor -** does it detect cells or freeblocks that encrouch into the reserved bytes -** at the end of the page. So do additional corruption checks inside this -** routine and return SQLITE_CORRUPT if any problems are found. -*/ -static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ - u16 iPtr; /* Address of ptr to next freeblock */ - u16 iFreeBlk; /* Address of the next freeblock */ - u8 hdr; /* Page header size. 0 or 100 */ - u8 nFrag = 0; /* Reduction in fragmentation */ - u16 iOrigSize = iSize; /* Original value of iSize */ - u16 x; /* Offset to cell content area */ - u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */ - unsigned char *data = pPage->aData; /* Page content */ - - assert( pPage->pBt!=0 ); - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); - assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( iSize>=4 ); /* Minimum cell size is 4 */ - assert( iStart<=pPage->pBt->usableSize-4 ); - - /* The list of freeblocks must be in ascending order. Find the - ** spot on the list where iStart should be inserted. - */ - hdr = pPage->hdrOffset; - iPtr = hdr + 1; - if( data[iPtr+1]==0 && data[iPtr]==0 ){ - iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ - }else{ - while( (iFreeBlk = get2byte(&data[iPtr]))pPage->pBt->usableSize-4 ){ /* TH3: corrupt081.100 */ - return SQLITE_CORRUPT_PAGE(pPage); - } - assert( iFreeBlk>iPtr || iFreeBlk==0 ); - - /* At this point: - ** iFreeBlk: First freeblock after iStart, or zero if none - ** iPtr: The address of a pointer to iFreeBlk - ** - ** Check to see if iFreeBlk should be coalesced onto the end of iStart. - */ - if( iFreeBlk && iEnd+3>=iFreeBlk ){ - nFrag = iFreeBlk - iEnd; - if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_PAGE(pPage); - iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); - if( iEnd > pPage->pBt->usableSize ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - iSize = iEnd - iStart; - iFreeBlk = get2byte(&data[iFreeBlk]); - } - - /* If iPtr is another freeblock (that is, if iPtr is not the freelist - ** pointer in the page header) then check to see if iStart should be - ** coalesced onto the end of iPtr. - */ - if( iPtr>hdr+1 ){ - int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); - if( iPtrEnd+3>=iStart ){ - if( iPtrEnd>iStart ) return SQLITE_CORRUPT_PAGE(pPage); - nFrag += iStart - iPtrEnd; - iSize = iEnd - iPtr; - iStart = iPtr; - } - } - if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_PAGE(pPage); - data[hdr+7] -= nFrag; - } - x = get2byte(&data[hdr+5]); - if( iStart<=x ){ - /* The new freeblock is at the beginning of the cell content area, - ** so just extend the cell content area rather than create another - ** freelist entry */ - if( iStartpBt->btsFlags & BTS_FAST_SECURE ){ - /* Overwrite deleted information with zeros when the secure_delete - ** option is enabled */ - memset(&data[iStart], 0, iSize); - } - put2byte(&data[iStart], iFreeBlk); - put2byte(&data[iStart+2], iSize); - pPage->nFree += iOrigSize; - return SQLITE_OK; -} - -/* -** Decode the flags byte (the first byte of the header) for a page -** and initialize fields of the MemPage structure accordingly. -** -** Only the following combinations are supported. Anything different -** indicates a corrupt database files: -** -** PTF_ZERODATA -** PTF_ZERODATA | PTF_LEAF -** PTF_LEAFDATA | PTF_INTKEY -** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF -*/ -static int decodeFlags(MemPage *pPage, int flagByte){ - BtShared *pBt; /* A copy of pPage->pBt */ - - assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); - flagByte &= ~PTF_LEAF; - pPage->childPtrSize = 4-4*pPage->leaf; - pPage->xCellSize = cellSizePtr; - pBt = pPage->pBt; - if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ - /* EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an - ** interior table b-tree page. */ - assert( (PTF_LEAFDATA|PTF_INTKEY)==5 ); - /* EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a - ** leaf table b-tree page. */ - assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); - pPage->intKey = 1; - if( pPage->leaf ){ - pPage->intKeyLeaf = 1; - pPage->xParseCell = btreeParseCellPtr; - }else{ - pPage->intKeyLeaf = 0; - pPage->xCellSize = cellSizePtrNoPayload; - pPage->xParseCell = btreeParseCellPtrNoPayload; - } - pPage->maxLocal = pBt->maxLeaf; - pPage->minLocal = pBt->minLeaf; - }else if( flagByte==PTF_ZERODATA ){ - /* EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an - ** interior index b-tree page. */ - assert( (PTF_ZERODATA)==2 ); - /* EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a - ** leaf index b-tree page. */ - assert( (PTF_ZERODATA|PTF_LEAF)==10 ); - pPage->intKey = 0; - pPage->intKeyLeaf = 0; - pPage->xParseCell = btreeParseCellPtrIndex; - pPage->maxLocal = pBt->maxLocal; - pPage->minLocal = pBt->minLocal; - }else{ - /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is - ** an error. */ - return SQLITE_CORRUPT_PAGE(pPage); - } - pPage->max1bytePayload = pBt->max1bytePayload; - return SQLITE_OK; -} - -/* -** Compute the amount of freespace on the page. In other words, fill -** in the pPage->nFree field. -*/ -static int btreeComputeFreeSpace(MemPage *pPage){ - int pc; /* Address of a freeblock within pPage->aData[] */ - u8 hdr; /* Offset to beginning of page header */ - u8 *data; /* Equal to pPage->aData */ - int usableSize; /* Amount of usable space on each page */ - int nFree; /* Number of unused bytes on the page */ - int top; /* First byte of the cell content area */ - int iCellFirst; /* First allowable cell or freeblock offset */ - int iCellLast; /* Last possible cell or freeblock offset */ - - assert( pPage->pBt!=0 ); - assert( pPage->pBt->db!=0 ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); - assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); - assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); - assert( pPage->isInit==1 ); - assert( pPage->nFree<0 ); - - usableSize = pPage->pBt->usableSize; - hdr = pPage->hdrOffset; - data = pPage->aData; - /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates - ** the start of the cell content area. A zero value for this integer is - ** interpreted as 65536. */ - top = get2byteNotZero(&data[hdr+5]); - iCellFirst = hdr + 8 + pPage->childPtrSize + 2*pPage->nCell; - iCellLast = usableSize - 4; - - /* Compute the total free space on the page - ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the - ** start of the first freeblock on the page, or is zero if there are no - ** freeblocks. */ - pc = get2byte(&data[hdr+1]); - nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ - if( pc>0 ){ - u32 next, size; - if( pciCellLast ){ - /* Freeblock off the end of the page */ - return SQLITE_CORRUPT_PAGE(pPage); - } - next = get2byte(&data[pc]); - size = get2byte(&data[pc+2]); - nFree = nFree + size; - if( next<=pc+size+3 ) break; - pc = next; - } - if( next>0 ){ - /* Freeblock not in ascending order */ - return SQLITE_CORRUPT_PAGE(pPage); - } - if( pc+size>(unsigned int)usableSize ){ - /* Last freeblock extends past page end */ - return SQLITE_CORRUPT_PAGE(pPage); - } - } - - /* At this point, nFree contains the sum of the offset to the start - ** of the cell-content area plus the number of free bytes within - ** the cell-content area. If this is greater than the usable-size - ** of the page, then the page must be corrupted. This check also - ** serves to verify that the offset to the start of the cell-content - ** area, according to the page header, lies within the page. - */ - if( nFree>usableSize || nFreenFree = (u16)(nFree - iCellFirst); - return SQLITE_OK; -} - -/* -** Do additional sanity check after btreeInitPage() if -** PRAGMA cell_size_check=ON -*/ -static SQLITE_NOINLINE int btreeCellSizeCheck(MemPage *pPage){ - int iCellFirst; /* First allowable cell or freeblock offset */ - int iCellLast; /* Last possible cell or freeblock offset */ - int i; /* Index into the cell pointer array */ - int sz; /* Size of a cell */ - int pc; /* Address of a freeblock within pPage->aData[] */ - u8 *data; /* Equal to pPage->aData */ - int usableSize; /* Maximum usable space on the page */ - int cellOffset; /* Start of cell content area */ - - iCellFirst = pPage->cellOffset + 2*pPage->nCell; - usableSize = pPage->pBt->usableSize; - iCellLast = usableSize - 4; - data = pPage->aData; - cellOffset = pPage->cellOffset; - if( !pPage->leaf ) iCellLast--; - for(i=0; inCell; i++){ - pc = get2byteAligned(&data[cellOffset+i*2]); - testcase( pc==iCellFirst ); - testcase( pc==iCellLast ); - if( pciCellLast ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - sz = pPage->xCellSize(pPage, &data[pc]); - testcase( pc+sz==usableSize ); - if( pc+sz>usableSize ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - } - return SQLITE_OK; -} - -/* -** Initialize the auxiliary information for a disk block. -** -** Return SQLITE_OK on success. If we see that the page does -** not contain a well-formed database page, then return -** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not -** guarantee that the page is well-formed. It only shows that -** we failed to detect any corruption. -*/ -static int btreeInitPage(MemPage *pPage){ - u8 *data; /* Equal to pPage->aData */ - BtShared *pBt; /* The main btree structure */ - - assert( pPage->pBt!=0 ); - assert( pPage->pBt->db!=0 ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); - assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); - assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); - assert( pPage->isInit==0 ); - - pBt = pPage->pBt; - data = pPage->aData + pPage->hdrOffset; - /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating - ** the b-tree page type. */ - if( decodeFlags(pPage, data[0]) ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); - pPage->maskPage = (u16)(pBt->pageSize - 1); - pPage->nOverflow = 0; - pPage->cellOffset = pPage->hdrOffset + 8 + pPage->childPtrSize; - pPage->aCellIdx = data + pPage->childPtrSize + 8; - pPage->aDataEnd = pPage->aData + pBt->usableSize; - pPage->aDataOfst = pPage->aData + pPage->childPtrSize; - /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the - ** number of cells on the page. */ - pPage->nCell = get2byte(&data[3]); - if( pPage->nCell>MX_CELL(pBt) ){ - /* To many cells for a single page. The page must be corrupt */ - return SQLITE_CORRUPT_PAGE(pPage); - } - testcase( pPage->nCell==MX_CELL(pBt) ); - /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only - ** possible for a root page of a table that contains no rows) then the - ** offset to the cell content area will equal the page size minus the - ** bytes of reserved space. */ - assert( pPage->nCell>0 - || get2byteNotZero(&data[5])==(int)pBt->usableSize - || CORRUPT_DB ); - pPage->nFree = -1; /* Indicate that this value is yet uncomputed */ - pPage->isInit = 1; - if( pBt->db->flags & SQLITE_CellSizeCk ){ - return btreeCellSizeCheck(pPage); - } - return SQLITE_OK; -} - -/* -** Set up a raw page so that it looks like a database page holding -** no entries. -*/ -static void zeroPage(MemPage *pPage, int flags){ - unsigned char *data = pPage->aData; - BtShared *pBt = pPage->pBt; - u8 hdr = pPage->hdrOffset; - u16 first; - - assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno ); - assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage) == data ); - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( sqlite3_mutex_held(pBt->mutex) ); - if( pBt->btsFlags & BTS_FAST_SECURE ){ - memset(&data[hdr], 0, pBt->usableSize - hdr); - } - data[hdr] = (char)flags; - first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); - memset(&data[hdr+1], 0, 4); - data[hdr+7] = 0; - put2byte(&data[hdr+5], pBt->usableSize); - pPage->nFree = (u16)(pBt->usableSize - first); - decodeFlags(pPage, flags); - pPage->cellOffset = first; - pPage->aDataEnd = &data[pBt->usableSize]; - pPage->aCellIdx = &data[first]; - pPage->aDataOfst = &data[pPage->childPtrSize]; - pPage->nOverflow = 0; - assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); - pPage->maskPage = (u16)(pBt->pageSize - 1); - pPage->nCell = 0; - pPage->isInit = 1; -} - - -/* -** Convert a DbPage obtained from the pager into a MemPage used by -** the btree layer. -*/ -static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ - MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); - if( pgno!=pPage->pgno ){ - pPage->aData = sqlite3PagerGetData(pDbPage); - pPage->pDbPage = pDbPage; - pPage->pBt = pBt; - pPage->pgno = pgno; - pPage->hdrOffset = pgno==1 ? 100 : 0; - } - assert( pPage->aData==sqlite3PagerGetData(pDbPage) ); - return pPage; -} - -/* -** Get a page from the pager. Initialize the MemPage.pBt and -** MemPage.aData elements if needed. See also: btreeGetUnusedPage(). -** -** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care -** about the content of the page at this time. So do not go to the disk -** to fetch the content. Just fill in the content with zeros for now. -** If in the future we call sqlite3PagerWrite() on this page, that -** means we have started to be concerned about content and the disk -** read should occur at that point. -*/ -static int btreeGetPage( - BtShared *pBt, /* The btree */ - Pgno pgno, /* Number of the page to fetch */ - MemPage **ppPage, /* Return the page in this parameter */ - int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ -){ - int rc; - DbPage *pDbPage; - - assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); - assert( sqlite3_mutex_held(pBt->mutex) ); - rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); - if( rc ) return rc; - *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); - return SQLITE_OK; -} - -/* -** Retrieve a page from the pager cache. If the requested page is not -** already in the pager cache return NULL. Initialize the MemPage.pBt and -** MemPage.aData elements if needed. -*/ -static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){ - DbPage *pDbPage; - assert( sqlite3_mutex_held(pBt->mutex) ); - pDbPage = sqlite3PagerLookup(pBt->pPager, pgno); - if( pDbPage ){ - return btreePageFromDbPage(pDbPage, pgno, pBt); - } - return 0; -} - -/* -** Return the size of the database file in pages. If there is any kind of -** error, return ((unsigned int)-1). -*/ -static Pgno btreePagecount(BtShared *pBt){ - return pBt->nPage; -} -Pgno sqlite3BtreeLastPage(Btree *p){ - assert( sqlite3BtreeHoldsMutex(p) ); - return btreePagecount(p->pBt); -} - -/* -** Get a page from the pager and initialize it. -** -** If pCur!=0 then the page is being fetched as part of a moveToChild() -** call. Do additional sanity checking on the page in this case. -** And if the fetch fails, this routine must decrement pCur->iPage. -** -** The page is fetched as read-write unless pCur is not NULL and is -** a read-only cursor. -** -** If an error occurs, then *ppPage is undefined. It -** may remain unchanged, or it may be set to an invalid value. -*/ -static int getAndInitPage( - BtShared *pBt, /* The database file */ - Pgno pgno, /* Number of the page to get */ - MemPage **ppPage, /* Write the page pointer here */ - BtCursor *pCur, /* Cursor to receive the page, or NULL */ - int bReadOnly /* True for a read-only page */ -){ - int rc; - DbPage *pDbPage; - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( pCur==0 || ppPage==&pCur->pPage ); - assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); - assert( pCur==0 || pCur->iPage>0 ); - - if( pgno>btreePagecount(pBt) ){ - rc = SQLITE_CORRUPT_BKPT; - goto getAndInitPage_error1; - } - rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); - if( rc ){ - goto getAndInitPage_error1; - } - *ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); - if( (*ppPage)->isInit==0 ){ - btreePageFromDbPage(pDbPage, pgno, pBt); - rc = btreeInitPage(*ppPage); - if( rc!=SQLITE_OK ){ - goto getAndInitPage_error2; - } - } - assert( (*ppPage)->pgno==pgno ); - assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) ); - - /* If obtaining a child page for a cursor, we must verify that the page is - ** compatible with the root page. */ - if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){ - rc = SQLITE_CORRUPT_PGNO(pgno); - goto getAndInitPage_error2; - } - return SQLITE_OK; - -getAndInitPage_error2: - releasePage(*ppPage); -getAndInitPage_error1: - if( pCur ){ - pCur->iPage--; - pCur->pPage = pCur->apPage[pCur->iPage]; - } - testcase( pgno==0 ); - assert( pgno!=0 || rc==SQLITE_CORRUPT ); - return rc; -} - -/* -** Release a MemPage. This should be called once for each prior -** call to btreeGetPage. -** -** Page1 is a special case and must be released using releasePageOne(). -*/ -static void releasePageNotNull(MemPage *pPage){ - assert( pPage->aData ); - assert( pPage->pBt ); - assert( pPage->pDbPage!=0 ); - assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - sqlite3PagerUnrefNotNull(pPage->pDbPage); -} -static void releasePage(MemPage *pPage){ - if( pPage ) releasePageNotNull(pPage); -} -static void releasePageOne(MemPage *pPage){ - assert( pPage!=0 ); - assert( pPage->aData ); - assert( pPage->pBt ); - assert( pPage->pDbPage!=0 ); - assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - sqlite3PagerUnrefPageOne(pPage->pDbPage); -} - -/* -** Get an unused page. -** -** This works just like btreeGetPage() with the addition: -** -** * If the page is already in use for some other purpose, immediately -** release it and return an SQLITE_CURRUPT error. -** * Make sure the isInit flag is clear -*/ -static int btreeGetUnusedPage( - BtShared *pBt, /* The btree */ - Pgno pgno, /* Number of the page to fetch */ - MemPage **ppPage, /* Return the page in this parameter */ - int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ -){ - int rc = btreeGetPage(pBt, pgno, ppPage, flags); - if( rc==SQLITE_OK ){ - if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ - releasePage(*ppPage); - *ppPage = 0; - return SQLITE_CORRUPT_BKPT; - } - (*ppPage)->isInit = 0; - }else{ - *ppPage = 0; - } - return rc; -} - - -/* -** During a rollback, when the pager reloads information into the cache -** so that the cache is restored to its original state at the start of -** the transaction, for each page restored this routine is called. -** -** This routine needs to reset the extra data section at the end of the -** page to agree with the restored data. -*/ -static void pageReinit(DbPage *pData){ - MemPage *pPage; - pPage = (MemPage *)sqlite3PagerGetExtra(pData); - assert( sqlite3PagerPageRefcount(pData)>0 ); - if( pPage->isInit ){ - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - pPage->isInit = 0; - if( sqlite3PagerPageRefcount(pData)>1 ){ - /* pPage might not be a btree page; it might be an overflow page - ** or ptrmap page or a free page. In those cases, the following - ** call to btreeInitPage() will likely return SQLITE_CORRUPT. - ** But no harm is done by this. And it is very important that - ** btreeInitPage() be called on every btree page so we make - ** the call for every page that comes in for re-initing. */ - btreeInitPage(pPage); - } - } -} - -/* -** Invoke the busy handler for a btree. -*/ -static int btreeInvokeBusyHandler(void *pArg){ - BtShared *pBt = (BtShared*)pArg; - assert( pBt->db ); - assert( sqlite3_mutex_held(pBt->db->mutex) ); - return sqlite3InvokeBusyHandler(&pBt->db->busyHandler); -} - -/* -** Open a database file. -** -** zFilename is the name of the database file. If zFilename is NULL -** then an ephemeral database is created. The ephemeral database might -** be exclusively in memory, or it might use a disk-based memory cache. -** Either way, the ephemeral database will be automatically deleted -** when sqlite3BtreeClose() is called. -** -** If zFilename is ":memory:" then an in-memory database is created -** that is automatically destroyed when it is closed. -** -** The "flags" parameter is a bitmask that might contain bits like -** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. -** -** If the database is already opened in the same database connection -** and we are in shared cache mode, then the open will fail with an -** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared -** objects in the same database connection since doing so will lead -** to problems with locking. -*/ -int sqlite3BtreeOpen( - sqlite3_vfs *pVfs, /* VFS to use for this b-tree */ - const char *zFilename, /* Name of the file containing the BTree database */ - sqlite3 *db, /* Associated database handle */ - Btree **ppBtree, /* Pointer to new Btree object written here */ - int flags, /* Options */ - int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ -){ - BtShared *pBt = 0; /* Shared part of btree structure */ - Btree *p; /* Handle to return */ - sqlite3_mutex *mutexOpen = 0; /* Prevents a race condition. Ticket #3537 */ - int rc = SQLITE_OK; /* Result code from this function */ - u8 nReserve; /* Byte of unused space on each page */ - unsigned char zDbHeader[100]; /* Database header content */ - - /* True if opening an ephemeral, temporary database */ - const int isTempDb = zFilename==0 || zFilename[0]==0; - - /* Set the variable isMemdb to true for an in-memory database, or - ** false for a file-based database. - */ -#ifdef SQLITE_OMIT_MEMORYDB - const int isMemdb = 0; -#else - const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0) - || (isTempDb && sqlite3TempInMemory(db)) - || (vfsFlags & SQLITE_OPEN_MEMORY)!=0; -#endif - - assert( db!=0 ); - assert( pVfs!=0 ); - assert( sqlite3_mutex_held(db->mutex) ); - assert( (flags&0xff)==flags ); /* flags fit in 8 bits */ - - /* Only a BTREE_SINGLE database can be BTREE_UNORDERED */ - assert( (flags & BTREE_UNORDERED)==0 || (flags & BTREE_SINGLE)!=0 ); - - /* A BTREE_SINGLE database is always a temporary and/or ephemeral */ - assert( (flags & BTREE_SINGLE)==0 || isTempDb ); - - if( isMemdb ){ - flags |= BTREE_MEMORY; - } - if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){ - vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB; - } - p = sqlite3MallocZero(sizeof(Btree)); - if( !p ){ - return SQLITE_NOMEM; - } - p->inTrans = TRANS_NONE; - p->db = db; -#ifndef SQLITE_OMIT_SHARED_CACHE - p->lock.pBtree = p; - p->lock.iTable = 1; -#endif - -#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) - /* - ** If this Btree is a candidate for shared cache, try to find an - ** existing BtShared object that we can share with - */ - if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){ - if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ - int nFilename = sqlite3Strlen30(zFilename)+1; - int nFullPathname = pVfs->mxPathname+1; - char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename)); - MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) - - p->sharable = 1; - if( !zFullPathname ){ - sqlite3_free(p); - return SQLITE_NOMEM; - } - if( isMemdb ){ - memcpy(zFullPathname, zFilename, nFilename); - }else{ - rc = sqlite3OsFullPathname(pVfs, zFilename, - nFullPathname, zFullPathname); - if( rc ){ - if( rc==SQLITE_OK_SYMLINK ){ - rc = SQLITE_OK; - }else{ - sqlite3_free(zFullPathname); - sqlite3_free(p); - return rc; - } - } - } -#if SQLITE_THREADSAFE - mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN); - sqlite3_mutex_enter(mutexOpen); - mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); - sqlite3_mutex_enter(mutexShared); -#endif - for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){ - assert( pBt->nRef>0 ); - if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0)) - && sqlite3PagerVfs(pBt->pPager)==pVfs ){ - int iDb; - for(iDb=db->nDb-1; iDb>=0; iDb--){ - Btree *pExisting = db->aDb[iDb].pBt; - if( pExisting && pExisting->pBt==pBt ){ - sqlite3_mutex_leave(mutexShared); - sqlite3_mutex_leave(mutexOpen); - sqlite3_free(zFullPathname); - sqlite3_free(p); - return SQLITE_CONSTRAINT; - } - } - p->pBt = pBt; - pBt->nRef++; - break; - } - } - sqlite3_mutex_leave(mutexShared); - sqlite3_free(zFullPathname); - } -#ifdef SQLITE_DEBUG - else{ - /* In debug mode, we mark all persistent databases as sharable - ** even when they are not. This exercises the locking code and - ** gives more opportunity for asserts(sqlite3_mutex_held()) - ** statements to find locking problems. - */ - p->sharable = 1; - } -#endif - } -#endif - if( pBt==0 ){ - /* - ** The following asserts make sure that structures used by the btree are - ** the right size. This is to guard against size changes that result - ** when compiling on a different architecture. - */ - assert( sizeof(i64)==8 ); - assert( sizeof(u64)==8 ); - assert( sizeof(u32)==4 ); - assert( sizeof(u16)==2 ); - assert( sizeof(Pgno)==4 ); - - pBt = sqlite3MallocZero( sizeof(*pBt) ); - if( pBt==0 ){ - rc = SQLITE_NOMEM; - goto btree_open_out; - } - rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, - sizeof(MemPage), flags, vfsFlags, pageReinit); - if( rc==SQLITE_OK ){ - sqlite3PagerSetMmapLimit(pBt->pPager, db ? db->szMmap : 0); - rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); - } - if( rc!=SQLITE_OK ){ - goto btree_open_out; - } - pBt->openFlags = (u8)flags; - pBt->db = db; - sqlite3PagerSetBusyHandler(pBt->pPager, btreeInvokeBusyHandler, pBt); - p->pBt = pBt; - - pBt->pCursor = 0; - pBt->pPage1 = 0; - if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY; -#if defined(SQLITE_SECURE_DELETE) - pBt->btsFlags |= BTS_SECURE_DELETE; -#elif defined(SQLITE_FAST_SECURE_DELETE) - pBt->btsFlags |= BTS_OVERWRITE; -#endif - /* EVIDENCE-OF: R-51873-39618 The page size for a database file is - ** determined by the 2-byte integer located at an offset of 16 bytes from - ** the beginning of the database file. */ - pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16); - if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE - || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ - pBt->pageSize = 0; -#ifndef SQLITE_OMIT_AUTOVACUUM - /* If the magic name ":memory:" will create an in-memory database, then - ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if - ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if - ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a - ** regular file-name. In this case the auto-vacuum applies as per normal. - */ - if( zFilename && !isMemdb ){ - pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); - pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); - } -#endif - nReserve = 0; - }else{ - /* EVIDENCE-OF: R-37497-42412 The size of the reserved region is - ** determined by the one-byte unsigned integer found at an offset of 20 - ** into the database file header. */ - nReserve = zDbHeader[20]; - pBt->btsFlags |= BTS_PAGESIZE_FIXED; -#ifndef SQLITE_OMIT_AUTOVACUUM - pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); - pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); -#endif - } - rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); - if( rc ) goto btree_open_out; - pBt->usableSize = pBt->pageSize - nReserve; - assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ - -#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) - /* Add the new BtShared object to the linked list sharable BtShareds. - */ - pBt->nRef = 1; - if( p->sharable ){ - MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) - MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN);) - if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ - pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); - if( pBt->mutex==0 ){ - rc = SQLITE_NOMEM; - goto btree_open_out; - } - } - sqlite3_mutex_enter(mutexShared); - pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList); - GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt; - sqlite3_mutex_leave(mutexShared); - } -#endif - } - -#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) - /* If the new Btree uses a sharable pBtShared, then link the new - ** Btree into the list of all sharable Btrees for the same connection. - ** The list is kept in ascending order by pBt address. - */ - if( p->sharable ){ - int i; - Btree *pSib; - for(i=0; inDb; i++){ - if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){ - while( pSib->pPrev ){ pSib = pSib->pPrev; } - if( (uptr)p->pBt<(uptr)pSib->pBt ){ - p->pNext = pSib; - p->pPrev = 0; - pSib->pPrev = p; - }else{ - while( pSib->pNext && (uptr)pSib->pNext->pBt<(uptr)p->pBt ){ - pSib = pSib->pNext; - } - p->pNext = pSib->pNext; - p->pPrev = pSib; - if( p->pNext ){ - p->pNext->pPrev = p; - } - pSib->pNext = p; - } - break; - } - } - } -#endif - *ppBtree = p; - -btree_open_out: - if( rc!=SQLITE_OK ){ - if( pBt && pBt->pPager ){ - sqlite3PagerClose(pBt->pPager, 0); - } - sqlite3_free(pBt); - sqlite3_free(p); - *ppBtree = 0; - }else{ - sqlite3_file *pFile; - - /* If the B-Tree was successfully opened, set the pager-cache size to the - ** default value. Except, when opening on an existing shared pager-cache, - ** do not change the pager-cache size. - */ - if( sqlite3BtreeSchema(p, 0, 0)==0 ){ - sqlite3BtreeSetCacheSize(p, SQLITE_DEFAULT_CACHE_SIZE); - } - - pFile = sqlite3PagerFile(pBt->pPager); - if( pFile->pMethods ){ - sqlite3OsFileControlHint(pFile, SQLITE_FCNTL_PDB, (void*)&pBt->db); - } - } - if( mutexOpen ){ - assert( sqlite3_mutex_held(mutexOpen) ); - sqlite3_mutex_leave(mutexOpen); - } - assert( rc!=SQLITE_OK || sqlite3BtreeConnectionCount(*ppBtree)>0 ); - return rc; -} - -/* -** Decrement the BtShared.nRef counter. When it reaches zero, -** remove the BtShared structure from the sharing list. Return -** true if the BtShared.nRef counter reaches zero and return -** false if it is still positive. -*/ -static int removeFromSharingList(BtShared *pBt){ -#ifndef SQLITE_OMIT_SHARED_CACHE - MUTEX_LOGIC( sqlite3_mutex *pMainMtx; ) - BtShared *pList; - int removed = 0; - - assert( sqlite3_mutex_notheld(pBt->mutex) ); - MUTEX_LOGIC( pMainMtx = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); ) - sqlite3_mutex_enter(pMainMtx); - pBt->nRef--; - if( pBt->nRef<=0 ){ - if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){ - GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext; - }else{ - pList = GLOBAL(BtShared*,sqlite3SharedCacheList); - while( ALWAYS(pList) && pList->pNext!=pBt ){ - pList=pList->pNext; - } - if( ALWAYS(pList) ){ - pList->pNext = pBt->pNext; - } - } - if( SQLITE_THREADSAFE ){ - sqlite3_mutex_free(pBt->mutex); - } - removed = 1; - } - sqlite3_mutex_leave(pMainMtx); - return removed; -#else - return 1; -#endif -} - -/* -** Make sure pBt->pTmpSpace points to an allocation of -** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child -** pointer. -*/ -static void allocateTempSpace(BtShared *pBt){ - if( !pBt->pTmpSpace ){ - pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); - - /* One of the uses of pBt->pTmpSpace is to format cells before - ** inserting them into a leaf page (function fillInCell()). If - ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes - ** by the various routines that manipulate binary cells. Which - ** can mean that fillInCell() only initializes the first 2 or 3 - ** bytes of pTmpSpace, but that the first 4 bytes are copied from - ** it into a database page. This is not actually a problem, but it - ** does cause a valgrind error when the 1 or 2 bytes of unitialized - ** data is passed to system call write(). So to avoid this error, - ** zero the first 4 bytes of temp space here. - ** - ** Also: Provide four bytes of initialized space before the - ** beginning of pTmpSpace as an area available to prepend the - ** left-child pointer to the beginning of a cell. - */ - if( pBt->pTmpSpace ){ - memset(pBt->pTmpSpace, 0, 8); - pBt->pTmpSpace += 4; - } - } -} - -/* -** Free the pBt->pTmpSpace allocation -*/ -static void freeTempSpace(BtShared *pBt){ - if( pBt->pTmpSpace ){ - pBt->pTmpSpace -= 4; - sqlite3PageFree(pBt->pTmpSpace); - pBt->pTmpSpace = 0; - } -} - -/* -** Close an open database and invalidate all cursors. -*/ -int sqlite3BtreeClose(Btree *p){ - BtShared *pBt = p->pBt; - - /* Close all cursors opened via this handle. */ - assert( sqlite3_mutex_held(p->db->mutex) ); - sqlite3BtreeEnter(p); - - /* Verify that no other cursors have this Btree open */ -#ifdef SQLITE_DEBUG - { - BtCursor *pCur = pBt->pCursor; - while( pCur ){ - BtCursor *pTmp = pCur; - pCur = pCur->pNext; - assert( pTmp->pBtree!=p ); - - } - } -#endif - - /* Rollback any active transaction and free the handle structure. - ** The call to sqlite3BtreeRollback() drops any table-locks held by - ** this handle. - */ - sqlite3BtreeRollback(p, SQLITE_OK, 0); - sqlite3BtreeLeave(p); - - /* If there are still other outstanding references to the shared-btree - ** structure, return now. The remainder of this procedure cleans - ** up the shared-btree. - */ - assert( p->wantToLock==0 && p->locked==0 ); - if( !p->sharable || removeFromSharingList(pBt) ){ - /* The pBt is no longer on the sharing list, so we can access - ** it without having to hold the mutex. - ** - ** Clean out and delete the BtShared object. - */ - assert( !pBt->pCursor ); - sqlite3PagerClose(pBt->pPager, p->db); - if( pBt->xFreeSchema && pBt->pSchema ){ - pBt->xFreeSchema(pBt->pSchema); - } - sqlite3DbFree(0, pBt->pSchema); - freeTempSpace(pBt); - sqlite3_free(pBt); - } - -#ifndef SQLITE_OMIT_SHARED_CACHE - assert( p->wantToLock==0 ); - assert( p->locked==0 ); - if( p->pPrev ) p->pPrev->pNext = p->pNext; - if( p->pNext ) p->pNext->pPrev = p->pPrev; -#endif - - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Change the "soft" limit on the number of pages in the cache. -** Unused and unmodified pages will be recycled when the number of -** pages in the cache exceeds this soft limit. But the size of the -** cache is allowed to grow larger than this limit if it contains -** dirty pages or pages still in active use. -*/ -int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ - BtShared *pBt = p->pBt; - assert( sqlite3_mutex_held(p->db->mutex) ); - sqlite3BtreeEnter(p); - sqlite3PagerSetCachesize(pBt->pPager, mxPage); - sqlite3BtreeLeave(p); - return SQLITE_OK; -} - -/* -** Change the "spill" limit on the number of pages in the cache. -** If the number of pages exceeds this limit during a write transaction, -** the pager might attempt to "spill" pages to the journal early in -** order to free up memory. -** -** The value returned is the current spill size. If zero is passed -** as an argument, no changes are made to the spill size setting, so -** using mxPage of 0 is a way to query the current spill size. -*/ -int sqlite3BtreeSetSpillSize(Btree *p, int mxPage){ - BtShared *pBt = p->pBt; - int res; - assert( sqlite3_mutex_held(p->db->mutex) ); - sqlite3BtreeEnter(p); - res = sqlite3PagerSetSpillsize(pBt->pPager, mxPage); - sqlite3BtreeLeave(p); - return res; -} - -#if SQLITE_MAX_MMAP_SIZE>0 -/* -** Change the limit on the amount of the database file that may be -** memory mapped. -*/ -int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ - BtShared *pBt = p->pBt; - assert( sqlite3_mutex_held(p->db->mutex) ); - sqlite3BtreeEnter(p); - sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); - sqlite3BtreeLeave(p); - return SQLITE_OK; -} -#endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -/* -** Change the way data is synced to disk in order to increase or decrease -** how well the database resists damage due to OS crashes and power -** failures. Level 1 is the same as asynchronous (no syncs() occur and -** there is a high probability of damage) Level 2 is the default. There -** is a very low but non-zero probability of damage. Level 3 reduces the -** probability of damage to near zero but with a write performance reduction. -*/ -#ifndef SQLITE_OMIT_PAGER_PRAGMAS -int sqlite3BtreeSetPagerFlags( - Btree *p, /* The btree to set the safety level on */ - unsigned pgFlags /* Various PAGER_* flags */ -){ - BtShared *pBt = p->pBt; - assert( sqlite3_mutex_held(p->db->mutex) ); - sqlite3BtreeEnter(p); - sqlite3PagerSetFlags(pBt->pPager, pgFlags); - sqlite3BtreeLeave(p); - return SQLITE_OK; -} -#endif - -/* -** Change the default pages size and the number of reserved bytes per page. -** Or, if the page size has already been fixed, return SQLITE_READONLY -** without changing anything. -** -** The page size must be a power of 2 between 512 and 65536. If the page -** size supplied does not meet this constraint then the page size is not -** changed. -** -** Page sizes are constrained to be a power of two so that the region -** of the database file used for locking (beginning at PENDING_BYTE, -** the first byte past the 1GB boundary, 0x40000000) needs to occur -** at the beginning of a page. -** -** If parameter nReserve is less than zero, then the number of reserved -** bytes per page is left unchanged. -** -** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size -** and autovacuum mode can no longer be changed. -*/ -int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ - int rc = SQLITE_OK; - int x; - BtShared *pBt = p->pBt; - assert( nReserve>=0 && nReserve<=255 ); - sqlite3BtreeEnter(p); - pBt->nReserveWanted = nReserve; - x = pBt->pageSize - pBt->usableSize; - if( nReservebtsFlags & BTS_PAGESIZE_FIXED ){ - sqlite3BtreeLeave(p); - return SQLITE_READONLY; - } - assert( nReserve>=0 && nReserve<=255 ); - if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && - ((pageSize-1)&pageSize)==0 ){ - assert( (pageSize & 7)==0 ); - assert( !pBt->pCursor ); - if( nReserve>32 && pageSize==512 ) pageSize = 1024; - pBt->pageSize = (u32)pageSize; - freeTempSpace(pBt); - } - rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); - pBt->usableSize = pBt->pageSize - (u16)nReserve; - if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED; - sqlite3BtreeLeave(p); - return rc; -} - -/* -** Return the currently defined page size -*/ -int sqlite3BtreeGetPageSize(Btree *p){ - return p->pBt->pageSize; -} - -/* -** This function is similar to sqlite3BtreeGetReserve(), except that it -** may only be called if it is guaranteed that the b-tree mutex is already -** held. -** -** This is useful in one special case in the backup API code where it is -** known that the shared b-tree mutex is held, but the mutex on the -** database handle that owns *p is not. In this case if sqlite3BtreeEnter() -** were to be called, it might collide with some other operation on the -** database handle that owns *p, causing undefined behavior. -*/ -int sqlite3BtreeGetReserveNoMutex(Btree *p){ - int n; - assert( sqlite3_mutex_held(p->pBt->mutex) ); - n = p->pBt->pageSize - p->pBt->usableSize; - return n; -} - -/* -** Return the number of bytes of space at the end of every page that -** are intentually left unused. This is the "reserved" space that is -** sometimes used by extensions. -** -** The value returned is the larger of the current reserve size and -** the latest reserve size requested by SQLITE_FILECTRL_RESERVE_BYTES. -** The amount of reserve can only grow - never shrink. -*/ -int sqlite3BtreeGetRequestedReserve(Btree *p){ - int n1, n2; - sqlite3BtreeEnter(p); - n1 = (int)p->pBt->nReserveWanted; - n2 = sqlite3BtreeGetReserveNoMutex(p); - sqlite3BtreeLeave(p); - return n1>n2 ? n1 : n2; -} - - -/* -** Set the maximum page count for a database if mxPage is positive. -** No changes are made if mxPage is 0 or negative. -** Regardless of the value of mxPage, return the maximum page count. -*/ -Pgno sqlite3BtreeMaxPageCount(Btree *p, Pgno mxPage){ - Pgno n; - sqlite3BtreeEnter(p); - n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); - sqlite3BtreeLeave(p); - return n; -} - -/* -** Change the values for the BTS_SECURE_DELETE and BTS_OVERWRITE flags: -** -** newFlag==0 Both BTS_SECURE_DELETE and BTS_OVERWRITE are cleared -** newFlag==1 BTS_SECURE_DELETE set and BTS_OVERWRITE is cleared -** newFlag==2 BTS_SECURE_DELETE cleared and BTS_OVERWRITE is set -** newFlag==(-1) No changes -** -** This routine acts as a query if newFlag is less than zero -** -** With BTS_OVERWRITE set, deleted content is overwritten by zeros, but -** freelist leaf pages are not written back to the database. Thus in-page -** deleted content is cleared, but freelist deleted content is not. -** -** With BTS_SECURE_DELETE, operation is like BTS_OVERWRITE with the addition -** that freelist leaf pages are written back into the database, increasing -** the amount of disk I/O. -*/ -int sqlite3BtreeSecureDelete(Btree *p, int newFlag){ - int b; - if( p==0 ) return 0; - sqlite3BtreeEnter(p); - assert( BTS_OVERWRITE==BTS_SECURE_DELETE*2 ); - assert( BTS_FAST_SECURE==(BTS_OVERWRITE|BTS_SECURE_DELETE) ); - if( newFlag>=0 ){ - p->pBt->btsFlags &= ~BTS_FAST_SECURE; - p->pBt->btsFlags |= BTS_SECURE_DELETE*newFlag; - } - b = (p->pBt->btsFlags & BTS_FAST_SECURE)/BTS_SECURE_DELETE; - sqlite3BtreeLeave(p); - return b; -} - -/* -** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' -** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it -** is disabled. The default value for the auto-vacuum property is -** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. -*/ -int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ -#ifdef SQLITE_OMIT_AUTOVACUUM - return SQLITE_READONLY; -#else - BtShared *pBt = p->pBt; - int rc = SQLITE_OK; - u8 av = (u8)autoVacuum; - - sqlite3BtreeEnter(p); - if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=0 && (av ?1:0)!=pBt->autoVacuum ){ - rc = SQLITE_READONLY; - }else{ - pBt->autoVacuum = av ?1:0; - pBt->incrVacuum = av==2 ?1:0; - } - sqlite3BtreeLeave(p); - return rc; -#endif -} - -/* -** Return the value of the 'auto-vacuum' property. If auto-vacuum is -** enabled 1 is returned. Otherwise 0. -*/ -int sqlite3BtreeGetAutoVacuum(Btree *p){ -#ifdef SQLITE_OMIT_AUTOVACUUM - return BTREE_AUTOVACUUM_NONE; -#else - int rc; - sqlite3BtreeEnter(p); - rc = ( - (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE: - (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL: - BTREE_AUTOVACUUM_INCR - ); - sqlite3BtreeLeave(p); - return rc; -#endif -} - -/* -** If the user has not set the safety-level for this database connection -** using "PRAGMA synchronous", and if the safety-level is not already -** set to the value passed to this function as the second parameter, -** set it so. -*/ -#if SQLITE_DEFAULT_SYNCHRONOUS!=SQLITE_DEFAULT_WAL_SYNCHRONOUS \ - && !defined(SQLITE_OMIT_WAL) -static void setDefaultSyncFlag(BtShared *pBt, u8 safety_level){ - sqlite3 *db; - Db *pDb; - if( (db=pBt->db)!=0 && (pDb=db->aDb)!=0 ){ - while( pDb->pBt==0 || pDb->pBt->pBt!=pBt ){ pDb++; } - if( pDb->bSyncSet==0 - && pDb->safety_level!=safety_level - && pDb!=&db->aDb[1] - ){ - pDb->safety_level = safety_level; - sqlite3PagerSetFlags(pBt->pPager, - pDb->safety_level | (db->flags & PAGER_FLAGS_MASK)); - } - } -} -#else -# define setDefaultSyncFlag(pBt,safety_level) -#endif - -/* Forward declaration */ -static int newDatabase(BtShared*); - - -/* -** Get a reference to pPage1 of the database file. This will -** also acquire a readlock on that file. -** -** SQLITE_OK is returned on success. If the file is not a -** well-formed database file, then SQLITE_CORRUPT is returned. -** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM -** is returned if we run out of memory. -*/ -static int lockBtree(BtShared *pBt){ - int rc; /* Result code from subfunctions */ - MemPage *pPage1; /* Page 1 of the database file */ - u32 nPage; /* Number of pages in the database */ - u32 nPageFile = 0; /* Number of pages in the database file */ - - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( pBt->pPage1==0 ); - rc = sqlite3PagerSharedLock(pBt->pPager); - if( rc!=SQLITE_OK ) return rc; - rc = btreeGetPage(pBt, 1, &pPage1, 0); - if( rc!=SQLITE_OK ) return rc; - - /* Do some checking to help insure the file we opened really is - ** a valid database file. - */ - nPage = get4byte(28+(u8*)pPage1->aData); - sqlite3PagerPagecount(pBt->pPager, (int*)&nPageFile); - if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){ - nPage = nPageFile; - } - if( (pBt->db->flags & SQLITE_ResetDatabase)!=0 ){ - nPage = 0; - } - if( nPage>0 ){ - u32 pageSize; - u32 usableSize; - u8 *page1 = pPage1->aData; - rc = SQLITE_NOTADB; - /* EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins - ** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d - ** 61 74 20 33 00. */ - if( memcmp(page1, zMagicHeader, 16)!=0 ){ - goto page1_init_failed; - } - -#ifdef SQLITE_OMIT_WAL - if( page1[18]>1 ){ - pBt->btsFlags |= BTS_READ_ONLY; - } - if( page1[19]>1 ){ - goto page1_init_failed; - } -#else - if( page1[18]>2 ){ - pBt->btsFlags |= BTS_READ_ONLY; - } - if( page1[19]>2 ){ - goto page1_init_failed; - } - - /* If the read version is set to 2, this database should be accessed - ** in WAL mode. If the log is not already open, open it now. Then - ** return SQLITE_OK and return without populating BtShared.pPage1. - ** The caller detects this and calls this function again. This is - ** required as the version of page 1 currently in the page1 buffer - ** may not be the latest version - there may be a newer one in the log - ** file. - */ - if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ - int isOpen = 0; - rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); - if( rc!=SQLITE_OK ){ - goto page1_init_failed; - }else{ - setDefaultSyncFlag(pBt, SQLITE_DEFAULT_WAL_SYNCHRONOUS+1); - if( isOpen==0 ){ - releasePageOne(pPage1); - return SQLITE_OK; - } - } - rc = SQLITE_NOTADB; - }else{ - setDefaultSyncFlag(pBt, SQLITE_DEFAULT_SYNCHRONOUS+1); - } -#endif - - /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload - ** fractions and the leaf payload fraction values must be 64, 32, and 32. - ** - ** The original design allowed these amounts to vary, but as of - ** version 3.6.0, we require them to be fixed. - */ - if( memcmp(&page1[21], "\100\040\040",3)!=0 ){ - goto page1_init_failed; - } - /* EVIDENCE-OF: R-51873-39618 The page size for a database file is - ** determined by the 2-byte integer located at an offset of 16 bytes from - ** the beginning of the database file. */ - pageSize = (page1[16]<<8) | (page1[17]<<16); - /* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two - ** between 512 and 65536 inclusive. */ - if( ((pageSize-1)&pageSize)!=0 - || pageSize>SQLITE_MAX_PAGE_SIZE - || pageSize<=256 - ){ - goto page1_init_failed; - } - pBt->btsFlags |= BTS_PAGESIZE_FIXED; - assert( (pageSize & 7)==0 ); - /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte - ** integer at offset 20 is the number of bytes of space at the end of - ** each page to reserve for extensions. - ** - ** EVIDENCE-OF: R-37497-42412 The size of the reserved region is - ** determined by the one-byte unsigned integer found at an offset of 20 - ** into the database file header. */ - usableSize = pageSize - page1[20]; - if( (u32)pageSize!=pBt->pageSize ){ - /* After reading the first page of the database assuming a page size - ** of BtShared.pageSize, we have discovered that the page-size is - ** actually pageSize. Unlock the database, leave pBt->pPage1 at - ** zero and return SQLITE_OK. The caller will call this function - ** again with the correct page-size. - */ - releasePageOne(pPage1); - pBt->usableSize = usableSize; - pBt->pageSize = pageSize; - freeTempSpace(pBt); - rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, - pageSize-usableSize); - return rc; - } - if( sqlite3WritableSchema(pBt->db)==0 && nPage>nPageFile ){ - rc = SQLITE_CORRUPT_BKPT; - goto page1_init_failed; - } - /* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to - ** be less than 480. In other words, if the page size is 512, then the - ** reserved space size cannot exceed 32. */ - if( usableSize<480 ){ - goto page1_init_failed; - } - pBt->pageSize = pageSize; - pBt->usableSize = usableSize; -#ifndef SQLITE_OMIT_AUTOVACUUM - pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); - pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); -#endif - } - - /* maxLocal is the maximum amount of payload to store locally for - ** a cell. Make sure it is small enough so that at least minFanout - ** cells can will fit on one page. We assume a 10-byte page header. - ** Besides the payload, the cell must store: - ** 2-byte pointer to the cell - ** 4-byte child pointer - ** 9-byte nKey value - ** 4-byte nData value - ** 4-byte overflow page pointer - ** So a cell consists of a 2-byte pointer, a header which is as much as - ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow - ** page pointer. - */ - pBt->maxLocal = (u16)((pBt->usableSize-12)*64/255 - 23); - pBt->minLocal = (u16)((pBt->usableSize-12)*32/255 - 23); - pBt->maxLeaf = (u16)(pBt->usableSize - 35); - pBt->minLeaf = (u16)((pBt->usableSize-12)*32/255 - 23); - if( pBt->maxLocal>127 ){ - pBt->max1bytePayload = 127; - }else{ - pBt->max1bytePayload = (u8)pBt->maxLocal; - } - assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); - pBt->pPage1 = pPage1; - pBt->nPage = nPage; - return SQLITE_OK; - -page1_init_failed: - releasePageOne(pPage1); - pBt->pPage1 = 0; - return rc; -} - -#ifndef NDEBUG -/* -** Return the number of cursors open on pBt. This is for use -** in assert() expressions, so it is only compiled if NDEBUG is not -** defined. -** -** Only write cursors are counted if wrOnly is true. If wrOnly is -** false then all cursors are counted. -** -** For the purposes of this routine, a cursor is any cursor that -** is capable of reading or writing to the database. Cursors that -** have been tripped into the CURSOR_FAULT state are not counted. -*/ -static int countValidCursors(BtShared *pBt, int wrOnly){ - BtCursor *pCur; - int r = 0; - for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ - if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0) - && pCur->eState!=CURSOR_FAULT ) r++; - } - return r; -} -#endif - -/* -** If there are no outstanding cursors and we are not in the middle -** of a transaction but there is a read lock on the database, then -** this routine unrefs the first page of the database file which -** has the effect of releasing the read lock. -** -** If there is a transaction in progress, this routine is a no-op. -*/ -static void unlockBtreeIfUnused(BtShared *pBt){ - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); - if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ - MemPage *pPage1 = pBt->pPage1; - assert( pPage1->aData ); - assert( sqlite3PagerRefcount(pBt->pPager)==1 ); - pBt->pPage1 = 0; - releasePageOne(pPage1); - } -} - -/* -** If pBt points to an empty file then convert that empty file -** into a new empty database by initializing the first page of -** the database. -*/ -static int newDatabase(BtShared *pBt){ - MemPage *pP1; - unsigned char *data; - int rc; - - assert( sqlite3_mutex_held(pBt->mutex) ); - if( pBt->nPage>0 ){ - return SQLITE_OK; - } - pP1 = pBt->pPage1; - assert( pP1!=0 ); - data = pP1->aData; - rc = sqlite3PagerWrite(pP1->pDbPage); - if( rc ) return rc; - memcpy(data, zMagicHeader, sizeof(zMagicHeader)); - assert( sizeof(zMagicHeader)==16 ); - data[16] = (u8)((pBt->pageSize>>8)&0xff); - data[17] = (u8)((pBt->pageSize>>16)&0xff); - data[18] = 1; - data[19] = 1; - assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+255>=pBt->pageSize); - data[20] = (u8)(pBt->pageSize - pBt->usableSize); - data[21] = 64; - data[22] = 32; - data[23] = 32; - memset(&data[24], 0, 100-24); - zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); - pBt->btsFlags |= BTS_PAGESIZE_FIXED; -#ifndef SQLITE_OMIT_AUTOVACUUM - assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); - assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); - put4byte(&data[36 + 4*4], pBt->autoVacuum); - put4byte(&data[36 + 7*4], pBt->incrVacuum); -#endif - pBt->nPage = 1; - data[31] = 1; - return SQLITE_OK; -} - -/* -** Initialize the first page of the database file (creating a database -** consisting of a single page and no schema objects). Return SQLITE_OK -** if successful, or an SQLite error code otherwise. -*/ -int sqlite3BtreeNewDb(Btree *p){ - int rc; - sqlite3BtreeEnter(p); - p->pBt->nPage = 0; - rc = newDatabase(p->pBt); - sqlite3BtreeLeave(p); - return rc; -} - -/* -** Attempt to start a new transaction. A write-transaction -** is started if the second argument is nonzero, otherwise a read- -** transaction. If the second argument is 2 or more and exclusive -** transaction is started, meaning that no other process is allowed -** to access the database. A preexisting transaction may not be -** upgraded to exclusive by calling this routine a second time - the -** exclusivity flag only works for a new transaction. -** -** A write-transaction must be started before attempting any -** changes to the database. None of the following routines -** will work unless a transaction is started first: -** -** sqlite3BtreeCreateTable() -** sqlite3BtreeCreateIndex() -** sqlite3BtreeClearTable() -** sqlite3BtreeDropTable() -** sqlite3BtreeInsert() -** sqlite3BtreeDelete() -** sqlite3BtreeUpdateMeta() -** -** If an initial attempt to acquire the lock fails because of lock contention -** and the database was previously unlocked, then invoke the busy handler -** if there is one. But if there was previously a read-lock, do not -** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is -** returned when there is already a read-lock in order to avoid a deadlock. -** -** Suppose there are two processes A and B. A has a read lock and B has -** a reserved lock. B tries to promote to exclusive but is blocked because -** of A's read lock. A tries to promote to reserved but is blocked by B. -** One or the other of the two processes must give way or there can be -** no progress. By returning SQLITE_BUSY and not invoking the busy callback -** when A already has a read lock, we encourage A to give up and let B -** proceed. -*/ -int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){ - BtShared *pBt = p->pBt; - Pager *pPager = pBt->pPager; - int rc = SQLITE_OK; - - sqlite3BtreeEnter(p); - btreeIntegrity(p); - - /* If the btree is already in a write-transaction, or it - ** is already in a read-transaction and a read-transaction - ** is requested, this is a no-op. - */ - if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ - goto trans_begun; - } - assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); - - if( (p->db->flags & SQLITE_ResetDatabase) - && sqlite3PagerIsreadonly(pPager)==0 - ){ - pBt->btsFlags &= ~BTS_READ_ONLY; - } - - /* Write transactions are not possible on a read-only database */ - if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ - rc = SQLITE_READONLY; - goto trans_begun; - } - -#ifndef SQLITE_OMIT_SHARED_CACHE - { - sqlite3 *pBlock = 0; - /* If another database handle has already opened a write transaction - ** on this shared-btree structure and a second write transaction is - ** requested, return SQLITE_LOCKED. - */ - if( (wrflag && pBt->inTransaction==TRANS_WRITE) - || (pBt->btsFlags & BTS_PENDING)!=0 - ){ - pBlock = pBt->pWriter->db; - }else if( wrflag>1 ){ - BtLock *pIter; - for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ - if( pIter->pBtree!=p ){ - pBlock = pIter->pBtree->db; - break; - } - } - } - if( pBlock ){ - sqlite3ConnectionBlocked(p->db, pBlock); - rc = SQLITE_LOCKED_SHAREDCACHE; - goto trans_begun; - } - } -#endif - - /* Any read-only or read-write transaction implies a read-lock on - ** page 1. So if some other shared-cache client already has a write-lock - ** on page 1, the transaction cannot be opened. */ - rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK); - if( SQLITE_OK!=rc ) goto trans_begun; - - pBt->btsFlags &= ~BTS_INITIALLY_EMPTY; - if( pBt->nPage==0 ) pBt->btsFlags |= BTS_INITIALLY_EMPTY; - do { - sqlite3PagerWalDb(pPager, p->db); - -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - /* If transitioning from no transaction directly to a write transaction, - ** block for the WRITER lock first if possible. */ - if( pBt->pPage1==0 && wrflag ){ - assert( pBt->inTransaction==TRANS_NONE ); - rc = sqlite3PagerWalWriteLock(pPager, 1); - if( rc!=SQLITE_BUSY && rc!=SQLITE_OK ) break; - } -#endif - - /* Call lockBtree() until either pBt->pPage1 is populated or - ** lockBtree() returns something other than SQLITE_OK. lockBtree() - ** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after - ** reading page 1 it discovers that the page-size of the database - ** file is not pBt->pageSize. In this case lockBtree() will update - ** pBt->pageSize to the page-size of the file on disk. - */ - while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) ); - - if( rc==SQLITE_OK && wrflag ){ - if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ - rc = SQLITE_READONLY; - }else{ - rc = sqlite3PagerBegin(pPager, wrflag>1, sqlite3TempInMemory(p->db)); - if( rc==SQLITE_OK ){ - rc = newDatabase(pBt); - }else if( rc==SQLITE_BUSY_SNAPSHOT && pBt->inTransaction==TRANS_NONE ){ - /* if there was no transaction opened when this function was - ** called and SQLITE_BUSY_SNAPSHOT is returned, change the error - ** code to SQLITE_BUSY. */ - rc = SQLITE_BUSY; - } - } - } - - if( rc!=SQLITE_OK ){ - (void)sqlite3PagerWalWriteLock(pPager, 0); - unlockBtreeIfUnused(pBt); - } - }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && - btreeInvokeBusyHandler(pBt) ); - sqlite3PagerWalDb(pPager, 0); -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY; -#endif - - if( rc==SQLITE_OK ){ - if( p->inTrans==TRANS_NONE ){ - pBt->nTransaction++; -#ifndef SQLITE_OMIT_SHARED_CACHE - if( p->sharable ){ - assert( p->lock.pBtree==p && p->lock.iTable==1 ); - p->lock.eLock = READ_LOCK; - p->lock.pNext = pBt->pLock; - pBt->pLock = &p->lock; - } -#endif - } - p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); - if( p->inTrans>pBt->inTransaction ){ - pBt->inTransaction = p->inTrans; - } - if( wrflag ){ - MemPage *pPage1 = pBt->pPage1; -#ifndef SQLITE_OMIT_SHARED_CACHE - assert( !pBt->pWriter ); - pBt->pWriter = p; - pBt->btsFlags &= ~BTS_EXCLUSIVE; - if( wrflag>1 ) pBt->btsFlags |= BTS_EXCLUSIVE; -#endif - - /* If the db-size header field is incorrect (as it may be if an old - ** client has been writing the database file), update it now. Doing - ** this sooner rather than later means the database size can safely - ** re-read the database size from page 1 if a savepoint or transaction - ** rollback occurs within the transaction. - */ - if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc==SQLITE_OK ){ - put4byte(&pPage1->aData[28], pBt->nPage); - } - } - } - } - -trans_begun: - if( rc==SQLITE_OK ){ - if( pSchemaVersion ){ - *pSchemaVersion = get4byte(&pBt->pPage1->aData[40]); - } - if( wrflag ){ - /* This call makes sure that the pager has the correct number of - ** open savepoints. If the second parameter is greater than 0 and - ** the sub-journal is not already open, then it will be opened here. - */ - rc = sqlite3PagerOpenSavepoint(pPager, p->db->nSavepoint); - } - } - - btreeIntegrity(p); - sqlite3BtreeLeave(p); - return rc; -} - -#ifndef SQLITE_OMIT_AUTOVACUUM - -/* -** Set the pointer-map entries for all children of page pPage. Also, if -** pPage contains cells that point to overflow pages, set the pointer -** map entries for the overflow pages as well. -*/ -static int setChildPtrmaps(MemPage *pPage){ - int i; /* Counter variable */ - int nCell; /* Number of cells in page pPage */ - int rc; /* Return code */ - BtShared *pBt = pPage->pBt; - Pgno pgno = pPage->pgno; - - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - rc = pPage->isInit ? SQLITE_OK : btreeInitPage(pPage); - if( rc!=SQLITE_OK ) return rc; - nCell = pPage->nCell; - - for(i=0; ileaf ){ - Pgno childPgno = get4byte(pCell); - ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); - } - } - - if( !pPage->leaf ){ - Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); - ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); - } - - return rc; -} - -/* -** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so -** that it points to iTo. Parameter eType describes the type of pointer to -** be modified, as follows: -** -** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child -** page of pPage. -** -** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow -** page pointed to by one of the cells on pPage. -** -** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next -** overflow page in the list. -*/ -static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - if( eType==PTRMAP_OVERFLOW2 ){ - /* The pointer is always the first 4 bytes of the page in this case. */ - if( get4byte(pPage->aData)!=iFrom ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - put4byte(pPage->aData, iTo); - }else{ - int i; - int nCell; - int rc; - - rc = pPage->isInit ? SQLITE_OK : btreeInitPage(pPage); - if( rc ) return rc; - nCell = pPage->nCell; - - for(i=0; ixParseCell(pPage, pCell, &info); - if( info.nLocal pPage->aData+pPage->pBt->usableSize ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - if( iFrom==get4byte(pCell+info.nSize-4) ){ - put4byte(pCell+info.nSize-4, iTo); - break; - } - } - }else{ - if( get4byte(pCell)==iFrom ){ - put4byte(pCell, iTo); - break; - } - } - } - - if( i==nCell ){ - if( eType!=PTRMAP_BTREE || - get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); - } - } - return SQLITE_OK; -} - - -/* -** Move the open database page pDbPage to location iFreePage in the -** database. The pDbPage reference remains valid. -** -** The isCommit flag indicates that there is no need to remember that -** the journal needs to be sync()ed before database page pDbPage->pgno -** can be written to. The caller has already promised not to write to that -** page. -*/ -static int relocatePage( - BtShared *pBt, /* Btree */ - MemPage *pDbPage, /* Open page to move */ - u8 eType, /* Pointer map 'type' entry for pDbPage */ - Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */ - Pgno iFreePage, /* The location to move pDbPage to */ - int isCommit /* isCommit flag passed to sqlite3PagerMovepage */ -){ - MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */ - Pgno iDbPage = pDbPage->pgno; - Pager *pPager = pBt->pPager; - int rc; - - assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || - eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ); - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( pDbPage->pBt==pBt ); - if( iDbPage<3 ) return SQLITE_CORRUPT_BKPT; - - /* Move page iDbPage from its current location to page number iFreePage */ - TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", - iDbPage, iFreePage, iPtrPage, eType)); - rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit); - if( rc!=SQLITE_OK ){ - return rc; - } - pDbPage->pgno = iFreePage; - - /* If pDbPage was a btree-page, then it may have child pages and/or cells - ** that point to overflow pages. The pointer map entries for all these - ** pages need to be changed. - ** - ** If pDbPage is an overflow page, then the first 4 bytes may store a - ** pointer to a subsequent overflow page. If this is the case, then - ** the pointer map needs to be updated for the subsequent overflow page. - */ - if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){ - rc = setChildPtrmaps(pDbPage); - if( rc!=SQLITE_OK ){ - return rc; - } - }else{ - Pgno nextOvfl = get4byte(pDbPage->aData); - if( nextOvfl!=0 ){ - ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage, &rc); - if( rc!=SQLITE_OK ){ - return rc; - } - } - } - - /* Fix the database pointer on page iPtrPage that pointed at iDbPage so - ** that it points at iFreePage. Also fix the pointer map entry for - ** iPtrPage. - */ - if( eType!=PTRMAP_ROOTPAGE ){ - rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - rc = sqlite3PagerWrite(pPtrPage->pDbPage); - if( rc!=SQLITE_OK ){ - releasePage(pPtrPage); - return rc; - } - rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); - releasePage(pPtrPage); - if( rc==SQLITE_OK ){ - ptrmapPut(pBt, iFreePage, eType, iPtrPage, &rc); - } - } - return rc; -} - -/* Forward declaration required by incrVacuumStep(). */ -static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); - -/* -** Perform a single step of an incremental-vacuum. If successful, return -** SQLITE_OK. If there is no work to do (and therefore no point in -** calling this function again), return SQLITE_DONE. Or, if an error -** occurs, return some other error code. -** -** More specifically, this function attempts to re-organize the database so -** that the last page of the file currently in use is no longer in use. -** -** Parameter nFin is the number of pages that this database would contain -** were this function called until it returns SQLITE_DONE. -** -** If the bCommit parameter is non-zero, this function assumes that the -** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE -** or an error. bCommit is passed true for an auto-vacuum-on-commit -** operation, or false for an incremental vacuum. -*/ -static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ - Pgno nFreeList; /* Number of pages still on the free-list */ - int rc; - - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( iLastPg>nFin ); - - if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ - u8 eType; - Pgno iPtrPage; - - nFreeList = get4byte(&pBt->pPage1->aData[36]); - if( nFreeList==0 ){ - return SQLITE_DONE; - } - - rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage); - if( rc!=SQLITE_OK ){ - return rc; - } - if( eType==PTRMAP_ROOTPAGE ){ - return SQLITE_CORRUPT_BKPT; - } - - if( eType==PTRMAP_FREEPAGE ){ - if( bCommit==0 ){ - /* Remove the page from the files free-list. This is not required - ** if bCommit is non-zero. In that case, the free-list will be - ** truncated to zero after this function returns, so it doesn't - ** matter if it still contains some garbage entries. - */ - Pgno iFreePg; - MemPage *pFreePg; - rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT); - if( rc!=SQLITE_OK ){ - return rc; - } - assert( iFreePg==iLastPg ); - releasePage(pFreePg); - } - } else { - Pgno iFreePg; /* Index of free page to move pLastPg to */ - MemPage *pLastPg; - u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ - Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ - - rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* If bCommit is zero, this loop runs exactly once and page pLastPg - ** is swapped with the first free page pulled off the free list. - ** - ** On the other hand, if bCommit is greater than zero, then keep - ** looping until a free-page located within the first nFin pages - ** of the file is found. - */ - if( bCommit==0 ){ - eMode = BTALLOC_LE; - iNear = nFin; - } - do { - MemPage *pFreePg; - rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode); - if( rc!=SQLITE_OK ){ - releasePage(pLastPg); - return rc; - } - releasePage(pFreePg); - }while( bCommit && iFreePg>nFin ); - assert( iFreePgbDoTruncate = 1; - pBt->nPage = iLastPg; - } - return SQLITE_OK; -} - -/* -** The database opened by the first argument is an auto-vacuum database -** nOrig pages in size containing nFree free pages. Return the expected -** size of the database in pages following an auto-vacuum operation. -*/ -static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){ - int nEntry; /* Number of entries on one ptrmap page */ - Pgno nPtrmap; /* Number of PtrMap pages to be freed */ - Pgno nFin; /* Return value */ - - nEntry = pBt->usableSize/5; - nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; - nFin = nOrig - nFree - nPtrmap; - if( nOrig>PENDING_BYTE_PAGE(pBt) && nFinpBt; - - sqlite3BtreeEnter(p); - assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); - if( !pBt->autoVacuum ){ - rc = SQLITE_DONE; - }else{ - Pgno nOrig = btreePagecount(pBt); - Pgno nFree = get4byte(&pBt->pPage1->aData[36]); - Pgno nFin = finalDbSize(pBt, nOrig, nFree); - - if( nOrig=nOrig ){ - rc = SQLITE_CORRUPT_BKPT; - }else if( nFree>0 ){ - rc = saveAllCursors(pBt, 0, 0); - if( rc==SQLITE_OK ){ - invalidateAllOverflowCache(pBt); - rc = incrVacuumStep(pBt, nFin, nOrig, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); - put4byte(&pBt->pPage1->aData[28], pBt->nPage); - } - }else{ - rc = SQLITE_DONE; - } - } - sqlite3BtreeLeave(p); - return rc; -} - -/* -** This routine is called prior to sqlite3PagerCommit when a transaction -** is committed for an auto-vacuum database. -*/ -static int autoVacuumCommit(Btree *p){ - int rc = SQLITE_OK; - Pager *pPager; - BtShared *pBt; - sqlite3 *db; - VVA_ONLY( int nRef ); - - assert( p!=0 ); - pBt = p->pBt; - pPager = pBt->pPager; - VVA_ONLY( nRef = sqlite3PagerRefcount(pPager); ) - - assert( sqlite3_mutex_held(pBt->mutex) ); - invalidateAllOverflowCache(pBt); - assert(pBt->autoVacuum); - if( !pBt->incrVacuum ){ - Pgno nFin; /* Number of pages in database after autovacuuming */ - Pgno nFree; /* Number of pages on the freelist initially */ - Pgno nVac; /* Number of pages to vacuum */ - Pgno iFree; /* The next page to be freed */ - Pgno nOrig; /* Database size before freeing */ - - nOrig = btreePagecount(pBt); - if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){ - /* It is not possible to create a database for which the final page - ** is either a pointer-map page or the pending-byte page. If one - ** is encountered, this indicates corruption. - */ - return SQLITE_CORRUPT_BKPT; - } - - nFree = get4byte(&pBt->pPage1->aData[36]); - db = p->db; - if( db->xAutovacPages ){ - int iDb; - for(iDb=0; ALWAYS(iDbnDb); iDb++){ - if( db->aDb[iDb].pBt==p ) break; - } - nVac = db->xAutovacPages( - db->pAutovacPagesArg, - db->aDb[iDb].zDbSName, - nOrig, - nFree, - pBt->pageSize - ); - if( nVac>nFree ){ - nVac = nFree; - } - if( nVac==0 ){ - return SQLITE_OK; - } - }else{ - nVac = nFree; - } - nFin = finalDbSize(pBt, nOrig, nVac); - if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; - if( nFinnFin && rc==SQLITE_OK; iFree--){ - rc = incrVacuumStep(pBt, nFin, iFree, nVac==nFree); - } - if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); - if( nVac==nFree ){ - put4byte(&pBt->pPage1->aData[32], 0); - put4byte(&pBt->pPage1->aData[36], 0); - } - put4byte(&pBt->pPage1->aData[28], nFin); - pBt->bDoTruncate = 1; - pBt->nPage = nFin; - } - if( rc!=SQLITE_OK ){ - sqlite3PagerRollback(pPager); - } - } - - assert( nRef>=sqlite3PagerRefcount(pPager) ); - return rc; -} - -#else /* ifndef SQLITE_OMIT_AUTOVACUUM */ -# define setChildPtrmaps(x) SQLITE_OK -#endif - -/* -** This routine does the first phase of a two-phase commit. This routine -** causes a rollback journal to be created (if it does not already exist) -** and populated with enough information so that if a power loss occurs -** the database can be restored to its original state by playing back -** the journal. Then the contents of the journal are flushed out to -** the disk. After the journal is safely on oxide, the changes to the -** database are written into the database file and flushed to oxide. -** At the end of this call, the rollback journal still exists on the -** disk and we are still holding all locks, so the transaction has not -** committed. See sqlite3BtreeCommitPhaseTwo() for the second phase of the -** commit process. -** -** This call is a no-op if no write-transaction is currently active on pBt. -** -** Otherwise, sync the database file for the btree pBt. zSuperJrnl points to -** the name of a super-journal file that should be written into the -** individual journal file, or is NULL, indicating no super-journal file -** (single database transaction). -** -** When this is called, the super-journal should already have been -** created, populated with this journal pointer and synced to disk. -** -** Once this is routine has returned, the only thing required to commit -** the write-transaction for this database file is to delete the journal. -*/ -int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ - int rc = SQLITE_OK; - if( p->inTrans==TRANS_WRITE ){ - BtShared *pBt = p->pBt; - sqlite3BtreeEnter(p); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ - rc = autoVacuumCommit(p); - if( rc!=SQLITE_OK ){ - sqlite3BtreeLeave(p); - return rc; - } - } - if( pBt->bDoTruncate ){ - sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); - } -#endif - rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); - sqlite3BtreeLeave(p); - } - return rc; -} - -/* -** This function is called from both BtreeCommitPhaseTwo() and BtreeRollback() -** at the conclusion of a transaction. -*/ -static void btreeEndTransaction(Btree *p){ - BtShared *pBt = p->pBt; - sqlite3 *db = p->db; - assert( sqlite3BtreeHoldsMutex(p) ); - -#ifndef SQLITE_OMIT_AUTOVACUUM - pBt->bDoTruncate = 0; -#endif - if( p->inTrans>TRANS_NONE && db->nVdbeRead>1 ){ - /* If there are other active statements that belong to this database - ** handle, downgrade to a read-only transaction. The other statements - ** may still be reading from the database. */ - downgradeAllSharedCacheTableLocks(p); - p->inTrans = TRANS_READ; - }else{ - /* If the handle had any kind of transaction open, decrement the - ** transaction count of the shared btree. If the transaction count - ** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused() - ** call below will unlock the pager. */ - if( p->inTrans!=TRANS_NONE ){ - clearAllSharedCacheTableLocks(p); - pBt->nTransaction--; - if( 0==pBt->nTransaction ){ - pBt->inTransaction = TRANS_NONE; - } - } - - /* Set the current transaction state to TRANS_NONE and unlock the - ** pager if this call closed the only read or write transaction. */ - p->inTrans = TRANS_NONE; - unlockBtreeIfUnused(pBt); - } - - btreeIntegrity(p); -} - -/* -** Commit the transaction currently in progress. -** -** This routine implements the second phase of a 2-phase commit. The -** sqlite3BtreeCommitPhaseOne() routine does the first phase and should -** be invoked prior to calling this routine. The sqlite3BtreeCommitPhaseOne() -** routine did all the work of writing information out to disk and flushing the -** contents so that they are written onto the disk platter. All this -** routine has to do is delete or truncate or zero the header in the -** the rollback journal (which causes the transaction to commit) and -** drop locks. -** -** Normally, if an error occurs while the pager layer is attempting to -** finalize the underlying journal file, this function returns an error and -** the upper layer will attempt a rollback. However, if the second argument -** is non-zero then this b-tree transaction is part of a multi-file -** transaction. In this case, the transaction has already been committed -** (by deleting a super-journal file) and the caller will ignore this -** functions return code. So, even if an error occurs in the pager layer, -** reset the b-tree objects internal state to indicate that the write -** transaction has been closed. This is quite safe, as the pager will have -** transitioned to the error state. -** -** This will release the write lock on the database file. If there -** are no active cursors, it also releases the read lock. -*/ -int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ - - if( p->inTrans==TRANS_NONE ) return SQLITE_OK; - sqlite3BtreeEnter(p); - btreeIntegrity(p); - - /* If the handle has a write-transaction open, commit the shared-btrees - ** transaction and set the shared state to TRANS_READ. - */ - if( p->inTrans==TRANS_WRITE ){ - int rc; - BtShared *pBt = p->pBt; - assert( pBt->inTransaction==TRANS_WRITE ); - assert( pBt->nTransaction>0 ); - rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); - if( rc!=SQLITE_OK && bCleanup==0 ){ - sqlite3BtreeLeave(p); - return rc; - } - p->iBDataVersion--; /* Compensate for pPager->iDataVersion++; */ - pBt->inTransaction = TRANS_READ; - btreeClearHasContent(pBt); - } - - btreeEndTransaction(p); - sqlite3BtreeLeave(p); - return SQLITE_OK; -} - -/* -** Do both phases of a commit. -*/ -int sqlite3BtreeCommit(Btree *p){ - int rc; - sqlite3BtreeEnter(p); - rc = sqlite3BtreeCommitPhaseOne(p, 0); - if( rc==SQLITE_OK ){ - rc = sqlite3BtreeCommitPhaseTwo(p, 0); - } - sqlite3BtreeLeave(p); - return rc; -} - -/* -** This routine sets the state to CURSOR_FAULT and the error -** code to errCode for every cursor on any BtShared that pBtree -** references. Or if the writeOnly flag is set to 1, then only -** trip write cursors and leave read cursors unchanged. -** -** Every cursor is a candidate to be tripped, including cursors -** that belong to other database connections that happen to be -** sharing the cache with pBtree. -** -** This routine gets called when a rollback occurs. If the writeOnly -** flag is true, then only write-cursors need be tripped - read-only -** cursors save their current positions so that they may continue -** following the rollback. Or, if writeOnly is false, all cursors are -** tripped. In general, writeOnly is false if the transaction being -** rolled back modified the database schema. In this case b-tree root -** pages may be moved or deleted from the database altogether, making -** it unsafe for read cursors to continue. -** -** If the writeOnly flag is true and an error is encountered while -** saving the current position of a read-only cursor, all cursors, -** including all read-cursors are tripped. -** -** SQLITE_OK is returned if successful, or if an error occurs while -** saving a cursor position, an SQLite error code. -*/ -int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ - BtCursor *p; - int rc = SQLITE_OK; - - assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); - if( pBtree ){ - sqlite3BtreeEnter(pBtree); - for(p=pBtree->pBt->pCursor; p; p=p->pNext){ - if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ - if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ - rc = saveCursorPosition(p); - if( rc!=SQLITE_OK ){ - (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); - break; - } - } - }else{ - sqlite3BtreeClearCursor(p); - p->eState = CURSOR_FAULT; - p->skipNext = errCode; - } - btreeReleaseAllCursorPages(p); - } - sqlite3BtreeLeave(pBtree); - } - return rc; -} - -/* -** Set the pBt->nPage field correctly, according to the current -** state of the database. Assume pBt->pPage1 is valid. -*/ -static void btreeSetNPage(BtShared *pBt, MemPage *pPage1){ - int nPage = get4byte(&pPage1->aData[28]); - testcase( nPage==0 ); - if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); - testcase( pBt->nPage!=nPage ); - pBt->nPage = nPage; -} - -/* -** Rollback the transaction in progress. -** -** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). -** Only write cursors are tripped if writeOnly is true but all cursors are -** tripped if writeOnly is false. Any attempt to use -** a tripped cursor will result in an error. -** -** This will release the write lock on the database file. If there -** are no active cursors, it also releases the read lock. -*/ -int sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){ - int rc; - BtShared *pBt = p->pBt; - MemPage *pPage1; - - assert( writeOnly==1 || writeOnly==0 ); - assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK ); - sqlite3BtreeEnter(p); - if( tripCode==SQLITE_OK ){ - rc = tripCode = saveAllCursors(pBt, 0, 0); - if( rc ) writeOnly = 0; - }else{ - rc = SQLITE_OK; - } - if( tripCode ){ - int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly); - assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) ); - if( rc2!=SQLITE_OK ) rc = rc2; - } - btreeIntegrity(p); - - if( p->inTrans==TRANS_WRITE ){ - int rc2; - - assert( TRANS_WRITE==pBt->inTransaction ); - rc2 = sqlite3PagerRollback(pBt->pPager); - if( rc2!=SQLITE_OK ){ - rc = rc2; - } - - /* The rollback may have destroyed the pPage1->aData value. So - ** call btreeGetPage() on page 1 again to make - ** sure pPage1->aData is set correctly. */ - if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ - btreeSetNPage(pBt, pPage1); - releasePageOne(pPage1); - } - assert( countValidCursors(pBt, 1)==0 ); - pBt->inTransaction = TRANS_READ; - btreeClearHasContent(pBt); - } - - btreeEndTransaction(p); - sqlite3BtreeLeave(p); - return rc; -} - -/* -** Start a statement subtransaction. The subtransaction can be rolled -** back independently of the main transaction. You must start a transaction -** before starting a subtransaction. The subtransaction is ended automatically -** if the main transaction commits or rolls back. -** -** Statement subtransactions are used around individual SQL statements -** that are contained within a BEGIN...COMMIT block. If a constraint -** error occurs within the statement, the effect of that one statement -** can be rolled back without having to rollback the entire transaction. -** -** A statement sub-transaction is implemented as an anonymous savepoint. The -** value passed as the second parameter is the total number of savepoints, -** including the new anonymous savepoint, open on the B-Tree. i.e. if there -** are no active savepoints and no other statement-transactions open, -** iStatement is 1. This anonymous savepoint can be released or rolled back -** using the sqlite3BtreeSavepoint() function. -*/ -int sqlite3BtreeBeginStmt(Btree *p, int iStatement){ - int rc; - BtShared *pBt = p->pBt; - sqlite3BtreeEnter(p); - assert( p->inTrans==TRANS_WRITE ); - assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); - assert( iStatement>0 ); - assert( iStatement>p->db->nSavepoint ); - assert( pBt->inTransaction==TRANS_WRITE ); - /* At the pager level, a statement transaction is a savepoint with - ** an index greater than all savepoints created explicitly using - ** SQL statements. It is illegal to open, release or rollback any - ** such savepoints while the statement transaction savepoint is active. - */ - rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement); - sqlite3BtreeLeave(p); - return rc; -} - -/* -** The second argument to this function, op, is always SAVEPOINT_ROLLBACK -** or SAVEPOINT_RELEASE. This function either releases or rolls back the -** savepoint identified by parameter iSavepoint, depending on the value -** of op. -** -** Normally, iSavepoint is greater than or equal to zero. However, if op is -** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the -** contents of the entire transaction are rolled back. This is different -** from a normal transaction rollback, as no locks are released and the -** transaction remains open. -*/ -int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ - int rc = SQLITE_OK; - if( p && p->inTrans==TRANS_WRITE ){ - BtShared *pBt = p->pBt; - assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); - assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); - sqlite3BtreeEnter(p); - if( op==SAVEPOINT_ROLLBACK ){ - rc = saveAllCursors(pBt, 0, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); - } - if( rc==SQLITE_OK ){ - if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){ - pBt->nPage = 0; - } - rc = newDatabase(pBt); - btreeSetNPage(pBt, pBt->pPage1); - - /* pBt->nPage might be zero if the database was corrupt when - ** the transaction was started. Otherwise, it must be at least 1. */ - assert( CORRUPT_DB || pBt->nPage>0 ); - } - sqlite3BtreeLeave(p); - } - return rc; -} - -/* -** Create a new cursor for the BTree whose root is on the page -** iTable. If a read-only cursor is requested, it is assumed that -** the caller already has at least a read-only transaction open -** on the database already. If a write-cursor is requested, then -** the caller is assumed to have an open write transaction. -** -** If the BTREE_WRCSR bit of wrFlag is clear, then the cursor can only -** be used for reading. If the BTREE_WRCSR bit is set, then the cursor -** can be used for reading or for writing if other conditions for writing -** are also met. These are the conditions that must be met in order -** for writing to be allowed: -** -** 1: The cursor must have been opened with wrFlag containing BTREE_WRCSR -** -** 2: Other database connections that share the same pager cache -** but which are not in the READ_UNCOMMITTED state may not have -** cursors open with wrFlag==0 on the same table. Otherwise -** the changes made by this write cursor would be visible to -** the read cursors in the other database connection. -** -** 3: The database must be writable (not on read-only media) -** -** 4: There must be an active transaction. -** -** The BTREE_FORDELETE bit of wrFlag may optionally be set if BTREE_WRCSR -** is set. If FORDELETE is set, that is a hint to the implementation that -** this cursor will only be used to seek to and delete entries of an index -** as part of a larger DELETE statement. The FORDELETE hint is not used by -** this implementation. But in a hypothetical alternative storage engine -** in which index entries are automatically deleted when corresponding table -** rows are deleted, the FORDELETE flag is a hint that all SEEK and DELETE -** operations on this cursor can be no-ops and all READ operations can -** return a null row (2-bytes: 0x01 0x00). -** -** No checking is done to make sure that page iTable really is the -** root page of a b-tree. If it is not, then the cursor acquired -** will not work correctly. -** -** It is assumed that the sqlite3BtreeCursorZero() has been called -** on pCur to initialize the memory space prior to invoking this routine. -*/ -static int btreeCursor( - Btree *p, /* The btree */ - Pgno iTable, /* Root page of table to open */ - int wrFlag, /* 1 to write. 0 read-only */ - struct KeyInfo *pKeyInfo, /* First arg to comparison function */ - BtCursor *pCur /* Space for new cursor */ -){ - BtShared *pBt = p->pBt; /* Shared b-tree handle */ - BtCursor *pX; /* Looping over other all cursors */ - - assert( sqlite3BtreeHoldsMutex(p) ); - assert( wrFlag==0 - || wrFlag==BTREE_WRCSR - || wrFlag==(BTREE_WRCSR|BTREE_FORDELETE) - ); - - /* The following assert statements verify that if this is a sharable - ** b-tree database, the connection is holding the required table locks, - ** and that no other connection has any open cursor that conflicts with - ** this lock. The iTable<1 term disables the check for corrupt schemas. */ - assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, (wrFlag?2:1)) - || iTable<1 ); - assert( wrFlag==0 || !hasReadConflicts(p, iTable) ); - - /* Assert that the caller has opened the required transaction. */ - assert( p->inTrans>TRANS_NONE ); - assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); - assert( pBt->pPage1 && pBt->pPage1->aData ); - assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); - - if( wrFlag ){ - allocateTempSpace(pBt); - if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; - } - if( iTable<=1 ){ - if( iTable<1 ){ - return SQLITE_CORRUPT_BKPT; - }else if( btreePagecount(pBt)==0 ){ - assert( wrFlag==0 ); - iTable = 0; - } - } - - /* Now that no other errors can occur, finish filling in the BtCursor - ** variables and link the cursor into the BtShared list. */ - pCur->pgnoRoot = iTable; - pCur->iPage = -1; - pCur->pKeyInfo = pKeyInfo; - pCur->pBtree = p; - pCur->pBt = pBt; - pCur->curFlags = wrFlag ? BTCF_WriteFlag : 0; - pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; - /* If there are two or more cursors on the same btree, then all such - ** cursors *must* have the BTCF_Multiple flag set. */ - for(pX=pBt->pCursor; pX; pX=pX->pNext){ - if( pX->pgnoRoot==iTable ){ - pX->curFlags |= BTCF_Multiple; - pCur->curFlags |= BTCF_Multiple; - } - } - pCur->pNext = pBt->pCursor; - pBt->pCursor = pCur; - pCur->eState = CURSOR_INVALID; - return SQLITE_OK; -} -static int btreeCursorWithLock( - Btree *p, /* The btree */ - Pgno iTable, /* Root page of table to open */ - int wrFlag, /* 1 to write. 0 read-only */ - struct KeyInfo *pKeyInfo, /* First arg to comparison function */ - BtCursor *pCur /* Space for new cursor */ -){ - int rc; - sqlite3BtreeEnter(p); - rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); - sqlite3BtreeLeave(p); - return rc; -} -int sqlite3BtreeCursor( - Btree *p, /* The btree */ - Pgno iTable, /* Root page of table to open */ - int wrFlag, /* 1 to write. 0 read-only */ - struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ - BtCursor *pCur /* Write new cursor here */ -){ - if( p->sharable ){ - return btreeCursorWithLock(p, iTable, wrFlag, pKeyInfo, pCur); - }else{ - return btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); - } -} - -/* -** Return the size of a BtCursor object in bytes. -** -** This interfaces is needed so that users of cursors can preallocate -** sufficient storage to hold a cursor. The BtCursor object is opaque -** to users so they cannot do the sizeof() themselves - they must call -** this routine. -*/ -int sqlite3BtreeCursorSize(void){ - return ROUND8(sizeof(BtCursor)); -} - -/* -** Initialize memory that will be converted into a BtCursor object. -** -** The simple approach here would be to memset() the entire object -** to zero. But it turns out that the apPage[] and aiIdx[] arrays -** do not need to be zeroed and they are large, so we can save a lot -** of run-time by skipping the initialization of those elements. -*/ -void sqlite3BtreeCursorZero(BtCursor *p){ - memset(p, 0, offsetof(BtCursor, BTCURSOR_FIRST_UNINIT)); -} - -/* -** Close a cursor. The read lock on the database file is released -** when the last cursor is closed. -*/ -int sqlite3BtreeCloseCursor(BtCursor *pCur){ - Btree *pBtree = pCur->pBtree; - if( pBtree ){ - BtShared *pBt = pCur->pBt; - sqlite3BtreeEnter(pBtree); - assert( pBt->pCursor!=0 ); - if( pBt->pCursor==pCur ){ - pBt->pCursor = pCur->pNext; - }else{ - BtCursor *pPrev = pBt->pCursor; - do{ - if( pPrev->pNext==pCur ){ - pPrev->pNext = pCur->pNext; - break; - } - pPrev = pPrev->pNext; - }while( ALWAYS(pPrev) ); - } - btreeReleaseAllCursorPages(pCur); - unlockBtreeIfUnused(pBt); - sqlite3_free(pCur->aOverflow); - sqlite3_free(pCur->pKey); - if( (pBt->openFlags & BTREE_SINGLE) && pBt->pCursor==0 ){ - /* Since the BtShared is not sharable, there is no need to - ** worry about the missing sqlite3BtreeLeave() call here. */ - assert( pBtree->sharable==0 ); - sqlite3BtreeClose(pBtree); - }else{ - sqlite3BtreeLeave(pBtree); - } - pCur->pBtree = 0; - } - return SQLITE_OK; -} - -/* -** Make sure the BtCursor* given in the argument has a valid -** BtCursor.info structure. If it is not already valid, call -** btreeParseCell() to fill it in. -** -** BtCursor.info is a cache of the information in the current cell. -** Using this cache reduces the number of calls to btreeParseCell(). -*/ -#ifndef NDEBUG - static int cellInfoEqual(CellInfo *a, CellInfo *b){ - if( a->nKey!=b->nKey ) return 0; - if( a->pPayload!=b->pPayload ) return 0; - if( a->nPayload!=b->nPayload ) return 0; - if( a->nLocal!=b->nLocal ) return 0; - if( a->nSize!=b->nSize ) return 0; - return 1; - } - static void assertCellInfo(BtCursor *pCur){ - CellInfo info; - memset(&info, 0, sizeof(info)); - btreeParseCell(pCur->pPage, pCur->ix, &info); - assert( CORRUPT_DB || cellInfoEqual(&info, &pCur->info) ); - } -#else - #define assertCellInfo(x) -#endif -static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){ - if( pCur->info.nSize==0 ){ - pCur->curFlags |= BTCF_ValidNKey; - btreeParseCell(pCur->pPage,pCur->ix,&pCur->info); - }else{ - assertCellInfo(pCur); - } -} - -#ifndef NDEBUG /* The next routine used only within assert() statements */ -/* -** Return true if the given BtCursor is valid. A valid cursor is one -** that is currently pointing to a row in a (non-empty) table. -** This is a verification routine is used only within assert() statements. -*/ -int sqlite3BtreeCursorIsValid(BtCursor *pCur){ - return pCur && pCur->eState==CURSOR_VALID; -} -#endif /* NDEBUG */ -int sqlite3BtreeCursorIsValidNN(BtCursor *pCur){ - assert( pCur!=0 ); - return pCur->eState==CURSOR_VALID; -} - -/* -** Return the value of the integer key or "rowid" for a table btree. -** This routine is only valid for a cursor that is pointing into a -** ordinary table btree. If the cursor points to an index btree or -** is invalid, the result of this routine is undefined. -*/ -i64 sqlite3BtreeIntegerKey(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - assert( pCur->curIntKey ); - getCellInfo(pCur); - return pCur->info.nKey; -} - -/* -** Pin or unpin a cursor. -*/ -void sqlite3BtreeCursorPin(BtCursor *pCur){ - assert( (pCur->curFlags & BTCF_Pinned)==0 ); - pCur->curFlags |= BTCF_Pinned; -} -void sqlite3BtreeCursorUnpin(BtCursor *pCur){ - assert( (pCur->curFlags & BTCF_Pinned)!=0 ); - pCur->curFlags &= ~BTCF_Pinned; -} - -#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC -/* -** Return the offset into the database file for the start of the -** payload to which the cursor is pointing. -*/ -i64 sqlite3BtreeOffset(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - getCellInfo(pCur); - return (i64)pCur->pBt->pageSize*((i64)pCur->pPage->pgno - 1) + - (i64)(pCur->info.pPayload - pCur->pPage->aData); -} -#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */ - -/* -** Return the number of bytes of payload for the entry that pCur is -** currently pointing to. For table btrees, this will be the amount -** of data. For index btrees, this will be the size of the key. -** -** The caller must guarantee that the cursor is pointing to a non-NULL -** valid entry. In other words, the calling procedure must guarantee -** that the cursor has Cursor.eState==CURSOR_VALID. -*/ -u32 sqlite3BtreePayloadSize(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - getCellInfo(pCur); - return pCur->info.nPayload; -} - -/* -** Return an upper bound on the size of any record for the table -** that the cursor is pointing into. -** -** This is an optimization. Everything will still work if this -** routine always returns 2147483647 (which is the largest record -** that SQLite can handle) or more. But returning a smaller value might -** prevent large memory allocations when trying to interpret a -** corrupt datrabase. -** -** The current implementation merely returns the size of the underlying -** database file. -*/ -sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - return pCur->pBt->pageSize * (sqlite3_int64)pCur->pBt->nPage; -} - -/* -** Given the page number of an overflow page in the database (parameter -** ovfl), this function finds the page number of the next page in the -** linked list of overflow pages. If possible, it uses the auto-vacuum -** pointer-map data instead of reading the content of page ovfl to do so. -** -** If an error occurs an SQLite error code is returned. Otherwise: -** -** The page number of the next overflow page in the linked list is -** written to *pPgnoNext. If page ovfl is the last page in its linked -** list, *pPgnoNext is set to zero. -** -** If ppPage is not NULL, and a reference to the MemPage object corresponding -** to page number pOvfl was obtained, then *ppPage is set to point to that -** reference. It is the responsibility of the caller to call releasePage() -** on *ppPage to free the reference. In no reference was obtained (because -** the pointer-map was used to obtain the value for *pPgnoNext), then -** *ppPage is set to zero. -*/ -static int getOverflowPage( - BtShared *pBt, /* The database file */ - Pgno ovfl, /* Current overflow page number */ - MemPage **ppPage, /* OUT: MemPage handle (may be NULL) */ - Pgno *pPgnoNext /* OUT: Next overflow page number */ -){ - Pgno next = 0; - MemPage *pPage = 0; - int rc = SQLITE_OK; - - assert( sqlite3_mutex_held(pBt->mutex) ); - assert(pPgnoNext); - -#ifndef SQLITE_OMIT_AUTOVACUUM - /* Try to find the next page in the overflow list using the - ** autovacuum pointer-map pages. Guess that the next page in - ** the overflow list is page number (ovfl+1). If that guess turns - ** out to be wrong, fall back to loading the data of page - ** number ovfl to determine the next page number. - */ - if( pBt->autoVacuum ){ - Pgno pgno; - Pgno iGuess = ovfl+1; - u8 eType; - - while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){ - iGuess++; - } - - if( iGuess<=btreePagecount(pBt) ){ - rc = ptrmapGet(pBt, iGuess, &eType, &pgno); - if( rc==SQLITE_OK && eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){ - next = iGuess; - rc = SQLITE_DONE; - } - } - } -#endif - - assert( next==0 || rc==SQLITE_DONE ); - if( rc==SQLITE_OK ){ - rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0); - assert( rc==SQLITE_OK || pPage==0 ); - if( rc==SQLITE_OK ){ - next = get4byte(pPage->aData); - } - } - - *pPgnoNext = next; - if( ppPage ){ - *ppPage = pPage; - }else{ - releasePage(pPage); - } - return (rc==SQLITE_DONE ? SQLITE_OK : rc); -} - -/* -** Copy data from a buffer to a page, or from a page to a buffer. -** -** pPayload is a pointer to data stored on database page pDbPage. -** If argument eOp is false, then nByte bytes of data are copied -** from pPayload to the buffer pointed at by pBuf. If eOp is true, -** then sqlite3PagerWrite() is called on pDbPage and nByte bytes -** of data are copied from the buffer pBuf to pPayload. -** -** SQLITE_OK is returned on success, otherwise an error code. -*/ -static int copyPayload( - void *pPayload, /* Pointer to page data */ - void *pBuf, /* Pointer to buffer */ - int nByte, /* Number of bytes to copy */ - int eOp, /* 0 -> copy from page, 1 -> copy to page */ - DbPage *pDbPage /* Page containing pPayload */ -){ - if( eOp ){ - /* Copy data from buffer to page (a write operation) */ - int rc = sqlite3PagerWrite(pDbPage); - if( rc!=SQLITE_OK ){ - return rc; - } - memcpy(pPayload, pBuf, nByte); - }else{ - /* Copy data from page to buffer (a read operation) */ - memcpy(pBuf, pPayload, nByte); - } - return SQLITE_OK; -} - -/* -** This function is used to read or overwrite payload information -** for the entry that the pCur cursor is pointing to. The eOp -** argument is interpreted as follows: -** -** 0: The operation is a read. Populate the overflow cache. -** 1: The operation is a write. Populate the overflow cache. -** -** A total of "amt" bytes are read or written beginning at "offset". -** Data is read to or from the buffer pBuf. -** -** The content being read or written might appear on the main page -** or be scattered out on multiple overflow pages. -** -** If the current cursor entry uses one or more overflow pages -** this function may allocate space for and lazily populate -** the overflow page-list cache array (BtCursor.aOverflow). -** Subsequent calls use this cache to make seeking to the supplied offset -** more efficient. -** -** Once an overflow page-list cache has been allocated, it must be -** invalidated if some other cursor writes to the same table, or if -** the cursor is moved to a different row. Additionally, in auto-vacuum -** mode, the following events may invalidate an overflow page-list cache. -** -** * An incremental vacuum, -** * A commit in auto_vacuum="full" mode, -** * Creating a table (may require moving an overflow page). -*/ -static int accessPayload( - BtCursor *pCur, /* Cursor pointing to entry to read from */ - u32 offset, /* Begin reading this far into payload */ - u32 amt, /* Read this many bytes */ - unsigned char *pBuf, /* Write the bytes into this buffer */ - int eOp /* zero to read. non-zero to write. */ -){ - unsigned char *aPayload; - int rc = SQLITE_OK; - int iIdx = 0; - MemPage *pPage = pCur->pPage; /* Btree page of current entry */ - BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ -#ifdef SQLITE_DIRECT_OVERFLOW_READ - unsigned char * const pBufStart = pBuf; /* Start of original out buffer */ -#endif - - assert( pPage ); - assert( eOp==0 || eOp==1 ); - assert( pCur->eState==CURSOR_VALID ); - if( pCur->ix>=pPage->nCell ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - assert( cursorHoldsMutex(pCur) ); - - getCellInfo(pCur); - aPayload = pCur->info.pPayload; - assert( offset+amt <= pCur->info.nPayload ); - - assert( aPayload > pPage->aData ); - if( (uptr)(aPayload - pPage->aData) > (pBt->usableSize - pCur->info.nLocal) ){ - /* Trying to read or write past the end of the data is an error. The - ** conditional above is really: - ** &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] - ** but is recast into its current form to avoid integer overflow problems - */ - return SQLITE_CORRUPT_PAGE(pPage); - } - - /* Check if data must be read/written to/from the btree page itself. */ - if( offsetinfo.nLocal ){ - int a = amt; - if( a+offset>pCur->info.nLocal ){ - a = pCur->info.nLocal - offset; - } - rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); - offset = 0; - pBuf += a; - amt -= a; - }else{ - offset -= pCur->info.nLocal; - } - - - if( rc==SQLITE_OK && amt>0 ){ - const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ - Pgno nextPage; - - nextPage = get4byte(&aPayload[pCur->info.nLocal]); - - /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. - ** - ** The aOverflow[] array is sized at one entry for each overflow page - ** in the overflow chain. The page number of the first overflow page is - ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array - ** means "not yet known" (the cache is lazily populated). - */ - if( (pCur->curFlags & BTCF_ValidOvfl)==0 ){ - int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; - if( pCur->aOverflow==0 - || nOvfl*(int)sizeof(Pgno) > sqlite3MallocSize(pCur->aOverflow) - ){ - Pgno *aNew = (Pgno*)sqlite3Realloc( - pCur->aOverflow, nOvfl*2*sizeof(Pgno) - ); - if( aNew==0 ){ - return SQLITE_NOMEM; - }else{ - pCur->aOverflow = aNew; - } - } - memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); - pCur->curFlags |= BTCF_ValidOvfl; - }else{ - /* If the overflow page-list cache has been allocated and the - ** entry for the first required overflow page is valid, skip - ** directly to it. - */ - if( pCur->aOverflow[offset/ovflSize] ){ - iIdx = (offset/ovflSize); - nextPage = pCur->aOverflow[iIdx]; - offset = (offset%ovflSize); - } - } - - assert( rc==SQLITE_OK && amt>0 ); - while( nextPage ){ - /* If required, populate the overflow page-list cache. */ - if( nextPage > pBt->nPage ) return SQLITE_CORRUPT_BKPT; - assert( pCur->aOverflow[iIdx]==0 - || pCur->aOverflow[iIdx]==nextPage - || CORRUPT_DB ); - pCur->aOverflow[iIdx] = nextPage; - - if( offset>=ovflSize ){ - /* The only reason to read this page is to obtain the page - ** number for the next page in the overflow chain. The page - ** data is not required. So first try to lookup the overflow - ** page-list cache, if any, then fall back to the getOverflowPage() - ** function. - */ - assert( pCur->curFlags & BTCF_ValidOvfl ); - assert( pCur->pBtree->db==pBt->db ); - if( pCur->aOverflow[iIdx+1] ){ - nextPage = pCur->aOverflow[iIdx+1]; - }else{ - rc = getOverflowPage(pBt, nextPage, 0, &nextPage); - } - offset -= ovflSize; - }else{ - /* Need to read this page properly. It contains some of the - ** range of data that is being read (eOp==0) or written (eOp!=0). - */ - int a = amt; - if( a + offset > ovflSize ){ - a = ovflSize - offset; - } - -#ifdef SQLITE_DIRECT_OVERFLOW_READ - /* If all the following are true: - ** - ** 1) this is a read operation, and - ** 2) data is required from the start of this overflow page, and - ** 3) there are no dirty pages in the page-cache - ** 4) the database is file-backed, and - ** 5) the page is not in the WAL file - ** 6) at least 4 bytes have already been read into the output buffer - ** - ** then data can be read directly from the database file into the - ** output buffer, bypassing the page-cache altogether. This speeds - ** up loading large records that span many overflow pages. - */ - if( eOp==0 /* (1) */ - && offset==0 /* (2) */ - && sqlite3PagerDirectReadOk(pBt->pPager, nextPage) /* (3,4,5) */ - && &pBuf[-4]>=pBufStart /* (6) */ - ){ - sqlite3_file *fd = sqlite3PagerFile(pBt->pPager); - u8 aSave[4]; - u8 *aWrite = &pBuf[-4]; - assert( aWrite>=pBufStart ); /* due to (6) */ - memcpy(aSave, aWrite, 4); - rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); - if( rc && nextPage>pBt->nPage ) rc = SQLITE_CORRUPT_BKPT; - nextPage = get4byte(aWrite); - memcpy(aWrite, aSave, 4); - }else -#endif - - { - DbPage *pDbPage; - rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage, - (eOp==0 ? PAGER_GET_READONLY : 0) - ); - if( rc==SQLITE_OK ){ - aPayload = sqlite3PagerGetData(pDbPage); - nextPage = get4byte(aPayload); - rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); - sqlite3PagerUnref(pDbPage); - offset = 0; - } - } - amt -= a; - if( amt==0 ) return rc; - pBuf += a; - } - if( rc ) break; - iIdx++; - } - } - - if( rc==SQLITE_OK && amt>0 ){ - /* Overflow chain ends prematurely */ - return SQLITE_CORRUPT_PAGE(pPage); - } - return rc; -} - -/* -** Read part of the payload for the row at which that cursor pCur is currently -** pointing. "amt" bytes will be transferred into pBuf[]. The transfer -** begins at "offset". -** -** pCur can be pointing to either a table or an index b-tree. -** If pointing to a table btree, then the content section is read. If -** pCur is pointing to an index b-tree then the key section is read. -** -** For sqlite3BtreePayload(), the caller must ensure that pCur is pointing -** to a valid row in the table. For sqlite3BtreePayloadChecked(), the -** cursor might be invalid or might need to be restored before being read. -** -** Return SQLITE_OK on success or an error code if anything goes -** wrong. An error is returned if "offset+amt" is larger than -** the available payload. -*/ -int sqlite3BtreePayload(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ - assert( cursorHoldsMutex(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - assert( pCur->iPage>=0 && pCur->pPage ); - return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0); -} - -/* -** This variant of sqlite3BtreePayload() works even if the cursor has not -** in the CURSOR_VALID state. It is only used by the sqlite3_blob_read() -** interface. -*/ -#ifndef SQLITE_OMIT_INCRBLOB -static SQLITE_NOINLINE int accessPayloadChecked( - BtCursor *pCur, - u32 offset, - u32 amt, - void *pBuf -){ - int rc; - if ( pCur->eState==CURSOR_INVALID ){ - return SQLITE_ABORT; - } - assert( cursorOwnsBtShared(pCur) ); - rc = btreeRestoreCursorPosition(pCur); - return rc ? rc : accessPayload(pCur, offset, amt, pBuf, 0); -} -int sqlite3BtreePayloadChecked(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ - if( pCur->eState==CURSOR_VALID ){ - assert( cursorOwnsBtShared(pCur) ); - return accessPayload(pCur, offset, amt, pBuf, 0); - }else{ - return accessPayloadChecked(pCur, offset, amt, pBuf); - } -} -#endif /* SQLITE_OMIT_INCRBLOB */ - -/* -** Return a pointer to payload information from the entry that the -** pCur cursor is pointing to. The pointer is to the beginning of -** the key if index btrees (pPage->intKey==0) and is the data for -** table btrees (pPage->intKey==1). The number of bytes of available -** key/data is written into *pAmt. If *pAmt==0, then the value -** returned will not be a valid pointer. -** -** This routine is an optimization. It is common for the entire key -** and data to fit on the local page and for there to be no overflow -** pages. When that is so, this routine can be used to access the -** key and data without making a copy. If the key and/or data spills -** onto overflow pages, then accessPayload() must be used to reassemble -** the key/data and copy it into a preallocated buffer. -** -** The pointer returned by this routine looks directly into the cached -** page of the database. The data might change or move the next time -** any btree routine is called. -*/ -static const void *fetchPayload( - BtCursor *pCur, /* Cursor pointing to entry to read from */ - u32 *pAmt /* Write the number of available bytes here */ -){ - int amt; - assert( pCur!=0 && pCur->iPage>=0 && pCur->pPage); - assert( pCur->eState==CURSOR_VALID ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - assert( cursorOwnsBtShared(pCur) ); - assert( pCur->ixpPage->nCell || CORRUPT_DB ); - assert( pCur->info.nSize>0 ); - assert( pCur->info.pPayload>pCur->pPage->aData || CORRUPT_DB ); - assert( pCur->info.pPayloadpPage->aDataEnd ||CORRUPT_DB); - amt = pCur->info.nLocal; - if( amt>(int)(pCur->pPage->aDataEnd - pCur->info.pPayload) ){ - /* There is too little space on the page for the expected amount - ** of local content. Database must be corrupt. */ - assert( CORRUPT_DB ); - amt = MAX(0, (int)(pCur->pPage->aDataEnd - pCur->info.pPayload)); - } - *pAmt = (u32)amt; - return (void*)pCur->info.pPayload; -} - - -/* -** For the entry that cursor pCur is point to, return as -** many bytes of the key or data as are available on the local -** b-tree page. Write the number of available bytes into *pAmt. -** -** The pointer returned is ephemeral. The key/data may move -** or be destroyed on the next call to any Btree routine, -** including calls from other threads against the same cache. -** Hence, a mutex on the BtShared should be held prior to calling -** this routine. -** -** These routines is used to get quick access to key and data -** in the common case where no overflow pages are used. -*/ -const void *sqlite3BtreePayloadFetch(BtCursor *pCur, u32 *pAmt){ - return fetchPayload(pCur, pAmt); -} - - -/* -** Move the cursor down to a new child page. The newPgno argument is the -** page number of the child page to move to. -** -** This function returns SQLITE_CORRUPT if the page-header flags field of -** the new child page does not match the flags field of the parent (i.e. -** if an intkey page appears to be the parent of a non-intkey page, or -** vice-versa). -*/ -static int moveToChild(BtCursor *pCur, u32 newPgno){ - BtShared *pBt = pCur->pBt; - - assert( cursorOwnsBtShared(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - assert( pCur->iPageiPage>=0 ); - if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ - return SQLITE_CORRUPT_BKPT; - } - pCur->info.nSize = 0; - pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); - pCur->aiIdx[pCur->iPage] = pCur->ix; - pCur->apPage[pCur->iPage] = pCur->pPage; - pCur->ix = 0; - pCur->iPage++; - return getAndInitPage(pBt, newPgno, &pCur->pPage, pCur, pCur->curPagerFlags); -} - -#ifdef SQLITE_DEBUG -/* -** Page pParent is an internal (non-leaf) tree page. This function -** asserts that page number iChild is the left-child if the iIdx'th -** cell in page pParent. Or, if iIdx is equal to the total number of -** cells in pParent, that page number iChild is the right-child of -** the page. -*/ -static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ - if( CORRUPT_DB ) return; /* The conditions tested below might not be true - ** in a corrupt database */ - assert( iIdx<=pParent->nCell ); - if( iIdx==pParent->nCell ){ - assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); - }else{ - assert( get4byte(findCell(pParent, iIdx))==iChild ); - } -} -#else -# define assertParentIndex(x,y,z) -#endif - -/* -** Move the cursor up to the parent page. -** -** pCur->idx is set to the cell index that contains the pointer -** to the page we are coming from. If we are coming from the -** right-most child page then pCur->idx is set to one more than -** the largest cell index. -*/ -static void moveToParent(BtCursor *pCur){ - MemPage *pLeaf; - assert( cursorOwnsBtShared(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - assert( pCur->iPage>0 ); - assert( pCur->pPage ); - assertParentIndex( - pCur->apPage[pCur->iPage-1], - pCur->aiIdx[pCur->iPage-1], - pCur->pPage->pgno - ); - testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); - pCur->info.nSize = 0; - pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); - pCur->ix = pCur->aiIdx[pCur->iPage-1]; - pLeaf = pCur->pPage; - pCur->pPage = pCur->apPage[--pCur->iPage]; - releasePageNotNull(pLeaf); -} - -/* -** Move the cursor to point to the root page of its b-tree structure. -** -** If the table has a virtual root page, then the cursor is moved to point -** to the virtual root page instead of the actual root page. A table has a -** virtual root page when the actual root page contains no cells and a -** single child page. This can only happen with the table rooted at page 1. -** -** If the b-tree structure is empty, the cursor state is set to -** CURSOR_INVALID and this routine returns SQLITE_EMPTY. Otherwise, -** the cursor is set to point to the first cell located on the root -** (or virtual root) page and the cursor state is set to CURSOR_VALID. -** -** If this function returns successfully, it may be assumed that the -** page-header flags indicate that the [virtual] root-page is the expected -** kind of b-tree page (i.e. if when opening the cursor the caller did not -** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D, -** indicating a table b-tree, or if the caller did specify a KeyInfo -** structure the flags byte is set to 0x02 or 0x0A, indicating an index -** b-tree). -*/ -static int moveToRoot(BtCursor *pCur){ - MemPage *pRoot; - int rc = SQLITE_OK; - - assert( cursorOwnsBtShared(pCur) ); - assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); - assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); - assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); - assert( pCur->eState < CURSOR_REQUIRESEEK || pCur->iPage<0 ); - assert( pCur->pgnoRoot>0 || pCur->iPage<0 ); - - if( pCur->iPage>=0 ){ - if( pCur->iPage ){ - releasePageNotNull(pCur->pPage); - while( --pCur->iPage ){ - releasePageNotNull(pCur->apPage[pCur->iPage]); - } - pCur->pPage = pCur->apPage[0]; - goto skip_init; - } - }else if( pCur->pgnoRoot==0 ){ - pCur->eState = CURSOR_INVALID; - return SQLITE_EMPTY; - }else{ - assert( pCur->iPage==(-1) ); - if( pCur->eState>=CURSOR_REQUIRESEEK ){ - if( pCur->eState==CURSOR_FAULT ){ - assert( pCur->skipNext!=SQLITE_OK ); - return pCur->skipNext; - } - sqlite3BtreeClearCursor(pCur); - } - rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->pPage, - 0, pCur->curPagerFlags); - if( rc!=SQLITE_OK ){ - pCur->eState = CURSOR_INVALID; - return rc; - } - pCur->iPage = 0; - pCur->curIntKey = pCur->pPage->intKey; - } - pRoot = pCur->pPage; - assert( pRoot->pgno==pCur->pgnoRoot ); - - /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor - ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is - ** NULL, the caller expects a table b-tree. If this is not the case, - ** return an SQLITE_CORRUPT error. - ** - ** Earlier versions of SQLite assumed that this test could not fail - ** if the root page was already loaded when this function was called (i.e. - ** if pCur->iPage>=0). But this is not so if the database is corrupted - ** in such a way that page pRoot is linked into a second b-tree table - ** (or the freelist). */ - assert( pRoot->intKey==1 || pRoot->intKey==0 ); - if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ - return SQLITE_CORRUPT_PAGE(pCur->pPage); - } - -skip_init: - pCur->ix = 0; - pCur->info.nSize = 0; - pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); - - pRoot = pCur->pPage; - if( pRoot->nCell>0 ){ - pCur->eState = CURSOR_VALID; - }else if( !pRoot->leaf ){ - Pgno subpage; - if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; - subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); - pCur->eState = CURSOR_VALID; - rc = moveToChild(pCur, subpage); - }else{ - pCur->eState = CURSOR_INVALID; - rc = SQLITE_EMPTY; - } - return rc; -} - -/* -** Move the cursor down to the left-most leaf entry beneath the -** entry to which it is currently pointing. -** -** The left-most leaf is the one with the smallest key - the first -** in ascending order. -*/ -static int moveToLeftmost(BtCursor *pCur){ - Pgno pgno; - int rc = SQLITE_OK; - MemPage *pPage; - - assert( cursorOwnsBtShared(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){ - assert( pCur->ixnCell ); - pgno = get4byte(findCell(pPage, pCur->ix)); - rc = moveToChild(pCur, pgno); - } - return rc; -} - -/* -** Move the cursor down to the right-most leaf entry beneath the -** page to which it is currently pointing. Notice the difference -** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() -** finds the left-most entry beneath the *entry* whereas moveToRightmost() -** finds the right-most entry beneath the *page*. -** -** The right-most entry is the one with the largest key - the last -** key in ascending order. -*/ -static int moveToRightmost(BtCursor *pCur){ - Pgno pgno; - int rc = SQLITE_OK; - MemPage *pPage = 0; - - assert( cursorOwnsBtShared(pCur) ); - assert( pCur->eState==CURSOR_VALID ); - while( !(pPage = pCur->pPage)->leaf ){ - pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); - pCur->ix = pPage->nCell; - rc = moveToChild(pCur, pgno); - if( rc ) return rc; - } - pCur->ix = pPage->nCell-1; - assert( pCur->info.nSize==0 ); - assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); - return SQLITE_OK; -} - -/* Move the cursor to the first entry in the table. Return SQLITE_OK -** on success. Set *pRes to 0 if the cursor actually points to something -** or set *pRes to 1 if the table is empty. -*/ -int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ - int rc; - - assert( cursorOwnsBtShared(pCur) ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - rc = moveToRoot(pCur); - if( rc==SQLITE_OK ){ - assert( pCur->pPage->nCell>0 ); - *pRes = 0; - rc = moveToLeftmost(pCur); - }else if( rc==SQLITE_EMPTY ){ - assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); - *pRes = 1; - rc = SQLITE_OK; - } - return rc; -} - -/* Move the cursor to the last entry in the table. Return SQLITE_OK -** on success. Set *pRes to 0 if the cursor actually points to something -** or set *pRes to 1 if the table is empty. -*/ -int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ - int rc; - - assert( cursorOwnsBtShared(pCur) ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - - /* If the cursor already points to the last entry, this is a no-op. */ - if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ -#ifdef SQLITE_DEBUG - /* This block serves to assert() that the cursor really does point - ** to the last entry in the b-tree. */ - int ii; - for(ii=0; iiiPage; ii++){ - assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell ); - } - assert( pCur->ix==pCur->pPage->nCell-1 || CORRUPT_DB ); - testcase( pCur->ix!=pCur->pPage->nCell-1 ); - /* ^-- dbsqlfuzz b92b72e4de80b5140c30ab71372ca719b8feb618 */ - assert( pCur->pPage->leaf ); -#endif - *pRes = 0; - return SQLITE_OK; - } - - rc = moveToRoot(pCur); - if( rc==SQLITE_OK ){ - assert( pCur->eState==CURSOR_VALID ); - *pRes = 0; - rc = moveToRightmost(pCur); - if( rc==SQLITE_OK ){ - pCur->curFlags |= BTCF_AtLast; - }else{ - pCur->curFlags &= ~BTCF_AtLast; - } - }else if( rc==SQLITE_EMPTY ){ - assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); - *pRes = 1; - rc = SQLITE_OK; - } - return rc; -} - -/* Move the cursor so that it points to an entry in a table (a.k.a INTKEY) -** table near the key intKey. Return a success code. -** -** If an exact match is not found, then the cursor is always -** left pointing at a leaf page which would hold the entry if it -** were present. The cursor might point to an entry that comes -** before or after the key. -** -** An integer is written into *pRes which is the result of -** comparing the key with the entry to which the cursor is -** pointing. The meaning of the integer written into -** *pRes is as follows: -** -** *pRes<0 The cursor is left pointing at an entry that -** is smaller than intKey or if the table is empty -** and the cursor is therefore left point to nothing. -** -** *pRes==0 The cursor is left pointing at an entry that -** exactly matches intKey. -** -** *pRes>0 The cursor is left pointing at an entry that -** is larger than intKey. -*/ -int sqlite3BtreeTableMoveto( - BtCursor *pCur, /* The cursor to be moved */ - i64 intKey, /* The table key */ - int biasRight, /* If true, bias the search to the high end */ - int *pRes /* Write search results here */ -){ - int rc; - - assert( cursorOwnsBtShared(pCur) ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - assert( pRes ); - assert( pCur->pKeyInfo==0 ); - assert( pCur->eState!=CURSOR_VALID || pCur->curIntKey!=0 ); - - /* If the cursor is already positioned at the point we are trying - ** to move to, then just return without doing any work */ - if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 ){ - if( pCur->info.nKey==intKey ){ - *pRes = 0; - return SQLITE_OK; - } - if( pCur->info.nKeycurFlags & BTCF_AtLast)!=0 ){ - *pRes = -1; - return SQLITE_OK; - } - /* If the requested key is one more than the previous key, then - ** try to get there using sqlite3BtreeNext() rather than a full - ** binary search. This is an optimization only. The correct answer - ** is still obtained without this case, only a little more slowely */ - if( pCur->info.nKey+1==intKey ){ - *pRes = 0; - rc = sqlite3BtreeNext(pCur, 0); - if( rc==SQLITE_OK ){ - getCellInfo(pCur); - if( pCur->info.nKey==intKey ){ - return SQLITE_OK; - } - }else if( rc!=SQLITE_DONE ){ - return rc; - } - } - } - } - -#ifdef SQLITE_DEBUG - pCur->pBtree->nSeek++; /* Performance measurement during testing */ -#endif - - rc = moveToRoot(pCur); - if( rc ){ - if( rc==SQLITE_EMPTY ){ - assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); - *pRes = -1; - return SQLITE_OK; - } - return rc; - } - assert( pCur->pPage ); - assert( pCur->pPage->isInit ); - assert( pCur->eState==CURSOR_VALID ); - assert( pCur->pPage->nCell > 0 ); - assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey ); - assert( pCur->curIntKey ); - - for(;;){ - int lwr, upr, idx, c; - Pgno chldPg; - MemPage *pPage = pCur->pPage; - u8 *pCell; /* Pointer to current cell in pPage */ - - /* pPage->nCell must be greater than zero. If this is the root-page - ** the cursor would have been INVALID above and this for(;;) loop - ** not run. If this is not the root-page, then the moveToChild() routine - ** would have already detected db corruption. Similarly, pPage must - ** be the right kind (index or table) of b-tree page. Otherwise - ** a moveToChild() or moveToRoot() call would have detected corruption. */ - assert( pPage->nCell>0 ); - assert( pPage->intKey ); - lwr = 0; - upr = pPage->nCell-1; - assert( biasRight==0 || biasRight==1 ); - idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ - pCur->ix = (u16)idx; - for(;;){ - i64 nCellKey; - pCell = findCellPastPtr(pPage, idx); - if( pPage->intKeyLeaf ){ - while( 0x80 <= *(pCell++) ){ - if( pCell>=pPage->aDataEnd ){ - return SQLITE_CORRUPT_PAGE(pPage); - } - } - } - getVarint(pCell, (u64*)&nCellKey); - if( nCellKeyupr ){ c = -1; break; } - }else if( nCellKey>intKey ){ - upr = idx-1; - if( lwr>upr ){ c = +1; break; } - }else{ - assert( nCellKey==intKey ); - pCur->ix = (u16)idx; - if( !pPage->leaf ){ - lwr = idx; - goto moveto_table_next_layer; - }else{ - pCur->curFlags |= BTCF_ValidNKey; - pCur->info.nKey = nCellKey; - pCur->info.nSize = 0; - *pRes = 0; - return SQLITE_OK; - } - } - assert( lwr+upr>=0 ); - idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ - } - assert( lwr==upr+1 || !pPage->leaf ); - assert( pPage->isInit ); - if( pPage->leaf ){ - assert( pCur->ixpPage->nCell ); - pCur->ix = (u16)idx; - *pRes = c; - rc = SQLITE_OK; - goto moveto_table_finish; - } -moveto_table_next_layer: - if( lwr>=pPage->nCell ){ - chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); - }else{ - chldPg = get4byte(findCell(pPage, lwr)); - } - pCur->ix = (u16)lwr; - rc = moveToChild(pCur, chldPg); - if( rc ) break; - } -moveto_table_finish: - pCur->info.nSize = 0; - assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); - return rc; -} - -/* Move the cursor so that it points to an entry in an index table -** near the key pIdxKey. Return a success code. -** -** If an exact match is not found, then the cursor is always -** left pointing at a leaf page which would hold the entry if it -** were present. The cursor might point to an entry that comes -** before or after the key. -** -** An integer is written into *pRes which is the result of -** comparing the key with the entry to which the cursor is -** pointing. The meaning of the integer written into -** *pRes is as follows: -** -** *pRes<0 The cursor is left pointing at an entry that -** is smaller than pIdxKey or if the table is empty -** and the cursor is therefore left point to nothing. -** -** *pRes==0 The cursor is left pointing at an entry that -** exactly matches pIdxKey. -** -** *pRes>0 The cursor is left pointing at an entry that -** is larger than pIdxKey. -** -** The pIdxKey->eqSeen field is set to 1 if there -** exists an entry in the table that exactly matches pIdxKey. -*/ -int sqlite3BtreeIndexMoveto( - BtCursor *pCur, /* The cursor to be moved */ - UnpackedRecord *pIdxKey, /* Unpacked index key */ - int *pRes /* Write search results here */ -){ - int rc; - RecordCompare xRecordCompare; - - assert( cursorOwnsBtShared(pCur) ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - assert( pRes ); - assert( pCur->pKeyInfo!=0 ); - -#ifdef SQLITE_DEBUG - pCur->pBtree->nSeek++; /* Performance measurement during testing */ -#endif - - xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); - pIdxKey->errCode = 0; - assert( pIdxKey->default_rc==1 - || pIdxKey->default_rc==0 - || pIdxKey->default_rc==-1 - ); - - rc = moveToRoot(pCur); - if( rc ){ - if( rc==SQLITE_EMPTY ){ - assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 ); - *pRes = -1; - return SQLITE_OK; - } - return rc; - } - assert( pCur->pPage ); - assert( pCur->pPage->isInit ); - assert( pCur->eState==CURSOR_VALID ); - assert( pCur->pPage->nCell > 0 ); - assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey ); - assert( pCur->curIntKey || pIdxKey ); - for(;;){ - int lwr, upr, idx, c; - Pgno chldPg; - MemPage *pPage = pCur->pPage; - u8 *pCell; /* Pointer to current cell in pPage */ - - /* pPage->nCell must be greater than zero. If this is the root-page - ** the cursor would have been INVALID above and this for(;;) loop - ** not run. If this is not the root-page, then the moveToChild() routine - ** would have already detected db corruption. Similarly, pPage must - ** be the right kind (index or table) of b-tree page. Otherwise - ** a moveToChild() or moveToRoot() call would have detected corruption. */ - assert( pPage->nCell>0 ); - assert( pPage->intKey==(pIdxKey==0) ); - lwr = 0; - upr = pPage->nCell-1; - idx = upr>>1; /* idx = (lwr+upr)/2; */ - pCur->ix = (u16)idx; - for(;;){ - int nCell; /* Size of the pCell cell in bytes */ - pCell = findCellPastPtr(pPage, idx); - - /* The maximum supported page-size is 65536 bytes. This means that - ** the maximum number of record bytes stored on an index B-Tree - ** page is less than 16384 bytes and may be stored as a 2-byte - ** varint. This information is used to attempt to avoid parsing - ** the entire cell by checking for the cases where the record is - ** stored entirely within the b-tree page by inspecting the first - ** 2 bytes of the cell. - */ - nCell = pCell[0]; - if( nCell<=pPage->max1bytePayload ){ - /* This branch runs if the record-size field of the cell is a - ** single byte varint and the record fits entirely on the main - ** b-tree page. */ - testcase( pCell+nCell+1==pPage->aDataEnd ); - c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); - }else if( !(pCell[1] & 0x80) - && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal - ){ - /* The record-size field is a 2 byte varint and the record - ** fits entirely on the main b-tree page. */ - testcase( pCell+nCell+2==pPage->aDataEnd ); - c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); - }else{ - /* The record flows over onto one or more overflow pages. In - ** this case the whole cell needs to be parsed, a buffer allocated - ** and accessPayload() used to retrieve the record into the - ** buffer before VdbeRecordCompare() can be called. - ** - ** If the record is corrupt, the xRecordCompare routine may read - ** up to two varints past the end of the buffer. An extra 18 - ** bytes of padding is allocated at the end of the buffer in - ** case this happens. */ - void *pCellKey; - u8 * const pCellBody = pCell - pPage->childPtrSize; - const int nOverrun = 18; /* Size of the overrun padding */ - pPage->xParseCell(pPage, pCellBody, &pCur->info); - nCell = (int)pCur->info.nKey; - testcase( nCell<0 ); /* True if key size is 2^32 or more */ - testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ - testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ - testcase( nCell==2 ); /* Minimum legal index key size */ - if( nCell<2 || nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){ - rc = SQLITE_CORRUPT_PAGE(pPage); - goto moveto_index_finish; - } - pCellKey = sqlite3Malloc( nCell+nOverrun ); - if( pCellKey==0 ){ - rc = SQLITE_NOMEM; - goto moveto_index_finish; - } - pCur->ix = (u16)idx; - rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0); - memset(((u8*)pCellKey)+nCell,0,nOverrun); /* Fix uninit warnings */ - pCur->curFlags &= ~BTCF_ValidOvfl; - if( rc ){ - sqlite3_free(pCellKey); - goto moveto_index_finish; - } - c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey); - sqlite3_free(pCellKey); - } - assert( - (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) - && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed) - ); - if( c<0 ){ - lwr = idx+1; - }else if( c>0 ){ - upr = idx-1; - }else{ - assert( c==0 ); - *pRes = 0; - rc = SQLITE_OK; - pCur->ix = (u16)idx; - if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT; - goto moveto_index_finish; - } - if( lwr>upr ) break; - assert( lwr+upr>=0 ); - idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */ - } - assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) ); - assert( pPage->isInit ); - if( pPage->leaf ){ - assert( pCur->ixpPage->nCell ); - pCur->ix = (u16)idx; - *pRes = c; - rc = SQLITE_OK; - goto moveto_index_finish; - } - if( lwr>=pPage->nCell ){ - chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); - }else{ - chldPg = get4byte(findCell(pPage, lwr)); - } - pCur->ix = (u16)lwr; - rc = moveToChild(pCur, chldPg); - if( rc ) break; - } -moveto_index_finish: - pCur->info.nSize = 0; - assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); - return rc; -} - - -/* -** Return TRUE if the cursor is not pointing at an entry of the table. -** -** TRUE will be returned after a call to sqlite3BtreeNext() moves -** past the last entry in the table or sqlite3BtreePrev() moves past -** the first entry. TRUE is also returned if the table is empty. -*/ -int sqlite3BtreeEof(BtCursor *pCur){ - /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries - ** have been deleted? This API will need to change to return an error code - ** as well as the boolean result value. - */ - return (CURSOR_VALID!=pCur->eState); -} - -/* -** Return an estimate for the number of rows in the table that pCur is -** pointing to. Return a negative number if no estimate is currently -** available. -*/ -i64 sqlite3BtreeRowCountEst(BtCursor *pCur){ - i64 n; - u8 i; - - assert( cursorOwnsBtShared(pCur) ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - - /* Currently this interface is only called by the OP_IfSmaller - ** opcode, and it that case the cursor will always be valid and - ** will always point to a leaf node. */ - if( NEVER(pCur->eState!=CURSOR_VALID) ) return -1; - if( NEVER(pCur->pPage->leaf==0) ) return -1; - - n = pCur->pPage->nCell; - for(i=0; iiPage; i++){ - n *= pCur->apPage[i]->nCell; - } - return n; -} - -/* -** Advance the cursor to the next entry in the database. -** Return value: -** -** SQLITE_OK success -** SQLITE_DONE cursor is already pointing at the last element -** otherwise some kind of error occurred -** -** The main entry point is sqlite3BtreeNext(). That routine is optimized -** for the common case of merely incrementing the cell counter BtCursor.aiIdx -** to the next cell on the current page. The (slower) btreeNext() helper -** routine is called when it is necessary to move to a different page or -** to restore the cursor. -** -** If bit 0x01 of the F argument in sqlite3BtreeNext(C,F) is 1, then the -** cursor corresponds to an SQL index and this routine could have been -** skipped if the SQL index had been a unique index. The F argument -** is a hint to the implement. SQLite btree implementation does not use -** this hint, but COMDB2 does. -*/ -static SQLITE_NOINLINE int btreeNext(BtCursor *pCur){ - int rc; - int idx; - MemPage *pPage; - - assert( cursorOwnsBtShared(pCur) ); - if( pCur->eState!=CURSOR_VALID ){ - assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); - rc = restoreCursorPosition(pCur); - if( rc!=SQLITE_OK ){ - return rc; - } - if( CURSOR_INVALID==pCur->eState ){ - return SQLITE_DONE; - } - if( pCur->eState==CURSOR_SKIPNEXT ){ - pCur->eState = CURSOR_VALID; - if( pCur->skipNext>0 ) return SQLITE_OK; - } - } - - pPage = pCur->pPage; - idx = ++pCur->ix; - if( !pPage->isInit || sqlite3FaultSim(412) ){ - /* The only known way for this to happen is for there to be a - ** recursive SQL function that does a DELETE operation as part of a - ** SELECT which deletes content out from under an active cursor - ** in a corrupt database file where the table being DELETE-ed from - ** has pages in common with the table being queried. See TH3 - ** module cov1/btree78.test testcase 220 (2018-06-08) for an - ** example. */ - return SQLITE_CORRUPT_BKPT; - } - - if( idx>=pPage->nCell ){ - if( !pPage->leaf ){ - rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); - if( rc ) return rc; - return moveToLeftmost(pCur); - } - do{ - if( pCur->iPage==0 ){ - pCur->eState = CURSOR_INVALID; - return SQLITE_DONE; - } - moveToParent(pCur); - pPage = pCur->pPage; - }while( pCur->ix>=pPage->nCell ); - if( pPage->intKey ){ - return sqlite3BtreeNext(pCur, 0); - }else{ - return SQLITE_OK; - } - } - if( pPage->leaf ){ - return SQLITE_OK; - }else{ - return moveToLeftmost(pCur); - } -} -int sqlite3BtreeNext(BtCursor *pCur, int flags){ - MemPage *pPage; - UNUSED_PARAMETER( flags ); /* Used in COMDB2 but not native SQLite */ - assert( cursorOwnsBtShared(pCur) ); - assert( flags==0 || flags==1 ); - pCur->info.nSize = 0; - pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); - if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur); - pPage = pCur->pPage; - if( (++pCur->ix)>=pPage->nCell ){ - pCur->ix--; - return btreeNext(pCur); - } - if( pPage->leaf ){ - return SQLITE_OK; - }else{ - return moveToLeftmost(pCur); - } -} - -/* -** Step the cursor to the back to the previous entry in the database. -** Return values: -** -** SQLITE_OK success -** SQLITE_DONE the cursor is already on the first element of the table -** otherwise some kind of error occurred -** -** The main entry point is sqlite3BtreePrevious(). That routine is optimized -** for the common case of merely decrementing the cell counter BtCursor.aiIdx -** to the previous cell on the current page. The (slower) btreePrevious() -** helper routine is called when it is necessary to move to a different page -** or to restore the cursor. -** -** If bit 0x01 of the F argument to sqlite3BtreePrevious(C,F) is 1, then -** the cursor corresponds to an SQL index and this routine could have been -** skipped if the SQL index had been a unique index. The F argument is a -** hint to the implement. The native SQLite btree implementation does not -** use this hint, but COMDB2 does. -*/ -static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur){ - int rc; - MemPage *pPage; - - assert( cursorOwnsBtShared(pCur) ); - assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); - assert( pCur->info.nSize==0 ); - if( pCur->eState!=CURSOR_VALID ){ - rc = restoreCursorPosition(pCur); - if( rc!=SQLITE_OK ){ - return rc; - } - if( CURSOR_INVALID==pCur->eState ){ - return SQLITE_DONE; - } - if( CURSOR_SKIPNEXT==pCur->eState ){ - pCur->eState = CURSOR_VALID; - if( pCur->skipNext<0 ) return SQLITE_OK; - } - } - - pPage = pCur->pPage; - assert( pPage->isInit ); - if( !pPage->leaf ){ - int idx = pCur->ix; - rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); - if( rc ) return rc; - rc = moveToRightmost(pCur); - }else{ - while( pCur->ix==0 ){ - if( pCur->iPage==0 ){ - pCur->eState = CURSOR_INVALID; - return SQLITE_DONE; - } - moveToParent(pCur); - } - assert( pCur->info.nSize==0 ); - assert( (pCur->curFlags & (BTCF_ValidOvfl))==0 ); - - pCur->ix--; - pPage = pCur->pPage; - if( pPage->intKey && !pPage->leaf ){ - rc = sqlite3BtreePrevious(pCur, 0); - }else{ - rc = SQLITE_OK; - } - } - return rc; -} -int sqlite3BtreePrevious(BtCursor *pCur, int flags){ - assert( cursorOwnsBtShared(pCur) ); - assert( flags==0 || flags==1 ); - UNUSED_PARAMETER( flags ); /* Used in COMDB2 but not native SQLite */ - pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); - pCur->info.nSize = 0; - if( pCur->eState!=CURSOR_VALID - || pCur->ix==0 - || pCur->pPage->leaf==0 - ){ - return btreePrevious(pCur); - } - pCur->ix--; - return SQLITE_OK; -} - -/* -** Allocate a new page from the database file. -** -** The new page is marked as dirty. (In other words, sqlite3PagerWrite() -** has already been called on the new page.) The new page has also -** been referenced and the calling routine is responsible for calling -** sqlite3PagerUnref() on the new page when it is done. -** -** SQLITE_OK is returned on success. Any other return value indicates -** an error. *ppPage is set to NULL in the event of an error. -** -** If the "nearby" parameter is not 0, then an effort is made to -** locate a page close to the page number "nearby". This can be used in an -** attempt to keep related pages close to each other in the database file, -** which in turn can make database access faster. -** -** If the eMode parameter is BTALLOC_EXACT and the nearby page exists -** anywhere on the free-list, then it is guaranteed to be returned. If -** eMode is BTALLOC_LT then the page returned will be less than or equal -** to nearby if any such page exists. If eMode is BTALLOC_ANY then there -** are no restrictions on which page is returned. -*/ -static int allocateBtreePage( - BtShared *pBt, /* The btree */ - MemPage **ppPage, /* Store pointer to the allocated page here */ - Pgno *pPgno, /* Store the page number here */ - Pgno nearby, /* Search for a page near this one */ - u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ -){ - MemPage *pPage1; - int rc; - u32 n; /* Number of pages on the freelist */ - u32 k; /* Number of leaves on the trunk of the freelist */ - MemPage *pTrunk = 0; - MemPage *pPrevTrunk = 0; - Pgno mxPage; /* Total size of the database file */ - - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); - pPage1 = pBt->pPage1; - mxPage = btreePagecount(pBt); - /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36 - ** stores stores the total number of pages on the freelist. */ - n = get4byte(&pPage1->aData[36]); - testcase( n==mxPage-1 ); - if( n>=mxPage ){ - return SQLITE_CORRUPT_BKPT; - } - if( n>0 ){ - /* There are pages on the freelist. Reuse one of those pages. */ - Pgno iTrunk; - u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ - u32 nSearch = 0; /* Count of the number of search attempts */ - - /* If eMode==BTALLOC_EXACT and a query of the pointer-map - ** shows that the page 'nearby' is somewhere on the free-list, then - ** the entire-list will be searched for that page. - */ -#ifndef SQLITE_OMIT_AUTOVACUUM - if( eMode==BTALLOC_EXACT ){ - if( nearby<=mxPage ){ - u8 eType; - assert( nearby>0 ); - assert( pBt->autoVacuum ); - rc = ptrmapGet(pBt, nearby, &eType, 0); - if( rc ) return rc; - if( eType==PTRMAP_FREEPAGE ){ - searchList = 1; - } - } - }else if( eMode==BTALLOC_LE ){ - searchList = 1; - } -#endif - - /* Decrement the free-list count by 1. Set iTrunk to the index of the - ** first free-list trunk page. iPrevTrunk is initially 1. - */ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc ) return rc; - put4byte(&pPage1->aData[36], n-1); - - /* The code within this loop is run only once if the 'searchList' variable - ** is not true. Otherwise, it runs once for each trunk-page on the - ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) - ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT) - */ - do { - pPrevTrunk = pTrunk; - if( pPrevTrunk ){ - /* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page - ** is the page number of the next freelist trunk page in the list or - ** zero if this is the last freelist trunk page. */ - iTrunk = get4byte(&pPrevTrunk->aData[0]); - }else{ - /* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32 - ** stores the page number of the first page of the freelist, or zero if - ** the freelist is empty. */ - iTrunk = get4byte(&pPage1->aData[32]); - } - testcase( iTrunk==mxPage ); - if( iTrunk>mxPage || nSearch++ > n ){ - rc = SQLITE_CORRUPT_PGNO(pPrevTrunk ? pPrevTrunk->pgno : 1); - }else{ - rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); - } - if( rc ){ - pTrunk = 0; - goto end_allocate_page; - } - assert( pTrunk!=0 ); - assert( pTrunk->aData!=0 ); - /* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page - ** is the number of leaf page pointers to follow. */ - k = get4byte(&pTrunk->aData[4]); - if( k==0 && !searchList ){ - /* The trunk has no leaves and the list is not being searched. - ** So extract the trunk page itself and use it as the newly - ** allocated page */ - assert( pPrevTrunk==0 ); - rc = sqlite3PagerWrite(pTrunk->pDbPage); - if( rc ){ - goto end_allocate_page; - } - *pPgno = iTrunk; - memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); - *ppPage = pTrunk; - pTrunk = 0; - TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); - }else if( k>(u32)(pBt->usableSize/4 - 2) ){ - /* Value of k is out of range. Database corruption */ - rc = SQLITE_CORRUPT_PGNO(iTrunk); - goto end_allocate_page; -#ifndef SQLITE_OMIT_AUTOVACUUM - }else if( searchList - && (nearby==iTrunk || (iTrunkpDbPage); - if( rc ){ - goto end_allocate_page; - } - if( k==0 ){ - if( !pPrevTrunk ){ - memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); - }else{ - rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); - if( rc!=SQLITE_OK ){ - goto end_allocate_page; - } - memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); - } - }else{ - /* The trunk page is required by the caller but it contains - ** pointers to free-list leaves. The first leaf becomes a trunk - ** page in this case. - */ - MemPage *pNewTrunk; - Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); - if( iNewTrunk>mxPage ){ - rc = SQLITE_CORRUPT_PGNO(iTrunk); - goto end_allocate_page; - } - testcase( iNewTrunk==mxPage ); - rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0); - if( rc!=SQLITE_OK ){ - goto end_allocate_page; - } - rc = sqlite3PagerWrite(pNewTrunk->pDbPage); - if( rc!=SQLITE_OK ){ - releasePage(pNewTrunk); - goto end_allocate_page; - } - memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); - put4byte(&pNewTrunk->aData[4], k-1); - memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4); - releasePage(pNewTrunk); - if( !pPrevTrunk ){ - assert( sqlite3PagerIswriteable(pPage1->pDbPage) ); - put4byte(&pPage1->aData[32], iNewTrunk); - }else{ - rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); - if( rc ){ - goto end_allocate_page; - } - put4byte(&pPrevTrunk->aData[0], iNewTrunk); - } - } - pTrunk = 0; - TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); -#endif - }else if( k>0 ){ - /* Extract a leaf from the trunk */ - u32 closest; - Pgno iPage; - unsigned char *aData = pTrunk->aData; - if( nearby>0 ){ - u32 i; - closest = 0; - if( eMode==BTALLOC_LE ){ - for(i=0; imxPage || iPage<2 ){ - rc = SQLITE_CORRUPT_PGNO(iTrunk); - goto end_allocate_page; - } - testcase( iPage==mxPage ); - if( !searchList - || (iPage==nearby || (iPagepgno, n-1)); - rc = sqlite3PagerWrite(pTrunk->pDbPage); - if( rc ) goto end_allocate_page; - if( closestpDbPage); - if( rc!=SQLITE_OK ){ - releasePage(*ppPage); - *ppPage = 0; - } - } - searchList = 0; - } - } - releasePage(pPrevTrunk); - pPrevTrunk = 0; - }while( searchList ); - }else{ - /* There are no pages on the freelist, so append a new page to the - ** database image. - ** - ** Normally, new pages allocated by this block can be requested from the - ** pager layer with the 'no-content' flag set. This prevents the pager - ** from trying to read the pages content from disk. However, if the - ** current transaction has already run one or more incremental-vacuum - ** steps, then the page we are about to allocate may contain content - ** that is required in the event of a rollback. In this case, do - ** not set the no-content flag. This causes the pager to load and journal - ** the current page content before overwriting it. - ** - ** Note that the pager will not actually attempt to load or journal - ** content for any page that really does lie past the end of the database - ** file on disk. So the effects of disabling the no-content optimization - ** here are confined to those pages that lie between the end of the - ** database image and the end of the database file. - */ - int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0; - - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); - if( rc ) return rc; - pBt->nPage++; - if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; - -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){ - /* If *pPgno refers to a pointer-map page, allocate two new pages - ** at the end of the file instead of one. The first allocated page - ** becomes a new pointer-map page, the second is used by the caller. - */ - MemPage *pPg = 0; - TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); - assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent); - if( rc==SQLITE_OK ){ - rc = sqlite3PagerWrite(pPg->pDbPage); - releasePage(pPg); - } - if( rc ) return rc; - pBt->nPage++; - if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; } - } -#endif - put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage); - *pPgno = pBt->nPage; - - assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent); - if( rc ) return rc; - rc = sqlite3PagerWrite((*ppPage)->pDbPage); - if( rc!=SQLITE_OK ){ - releasePage(*ppPage); - *ppPage = 0; - } - TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); - } - - assert( CORRUPT_DB || *pPgno!=PENDING_BYTE_PAGE(pBt) ); - -end_allocate_page: - releasePage(pTrunk); - releasePage(pPrevTrunk); - assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 ); - assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 ); - return rc; -} - -/* -** This function is used to add page iPage to the database file free-list. -** It is assumed that the page is not already a part of the free-list. -** -** The value passed as the second argument to this function is optional. -** If the caller happens to have a pointer to the MemPage object -** corresponding to page iPage handy, it may pass it as the second value. -** Otherwise, it may pass NULL. -** -** If a pointer to a MemPage object is passed as the second argument, -** its reference count is not altered by this function. -*/ -static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ - MemPage *pTrunk = 0; /* Free-list trunk page */ - Pgno iTrunk = 0; /* Page number of free-list trunk page */ - MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */ - MemPage *pPage; /* Page being freed. May be NULL. */ - int rc; /* Return Code */ - u32 nFree; /* Initial number of pages on free-list */ - - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( CORRUPT_DB || iPage>1 ); - assert( !pMemPage || pMemPage->pgno==iPage ); - - if( NEVER(iPage<2) || iPage>pBt->nPage ){ - return SQLITE_CORRUPT_BKPT; - } - if( pMemPage ){ - pPage = pMemPage; - sqlite3PagerRef(pPage->pDbPage); - }else{ - pPage = btreePageLookup(pBt, iPage); - } - - /* Increment the free page count on pPage1 */ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc ) goto freepage_out; - nFree = get4byte(&pPage1->aData[36]); - put4byte(&pPage1->aData[36], nFree+1); - - if( pBt->btsFlags & BTS_SECURE_DELETE ){ - /* If the secure_delete option is enabled, then - ** always fully overwrite deleted information with zeros. - */ - if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) - || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) - ){ - goto freepage_out; - } - memset(pPage->aData, 0, pPage->pBt->pageSize); - } - - /* If the database supports auto-vacuum, write an entry in the pointer-map - ** to indicate that the page is free. - */ - if( ISAUTOVACUUM ){ - ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); - if( rc ) goto freepage_out; - } - - /* Now manipulate the actual database free-list structure. There are two - ** possibilities. If the free-list is currently empty, or if the first - ** trunk page in the free-list is full, then this page will become a - ** new free-list trunk page. Otherwise, it will become a leaf of the - ** first trunk page in the current free-list. This block tests if it - ** is possible to add the page as a new free-list leaf. - */ - if( nFree!=0 ){ - u32 nLeaf; /* Initial number of leaf cells on trunk page */ - - iTrunk = get4byte(&pPage1->aData[32]); - if( iTrunk>btreePagecount(pBt) ){ - rc = SQLITE_CORRUPT_BKPT; - goto freepage_out; - } - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); - if( rc!=SQLITE_OK ){ - goto freepage_out; - } - - nLeaf = get4byte(&pTrunk->aData[4]); - assert( pBt->usableSize>32 ); - if( nLeaf > (u32)pBt->usableSize/4 - 2 ){ - rc = SQLITE_CORRUPT_BKPT; - goto freepage_out; - } - if( nLeaf < (u32)pBt->usableSize/4 - 8 ){ - /* In this case there is room on the trunk page to insert the page - ** being freed as a new leaf. - ** - ** Note that the trunk page is not really full until it contains - ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have - ** coded. But due to a coding error in versions of SQLite prior to - ** 3.6.0, databases with freelist trunk pages holding more than - ** usableSize/4 - 8 entries will be reported as corrupt. In order - ** to maintain backwards compatibility with older versions of SQLite, - ** we will continue to restrict the number of entries to usableSize/4 - 8 - ** for now. At some point in the future (once everyone has upgraded - ** to 3.6.0 or later) we should consider fixing the conditional above - ** to read "usableSize/4-2" instead of "usableSize/4-8". - ** - ** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still - ** avoid using the last six entries in the freelist trunk page array in - ** order that database files created by newer versions of SQLite can be - ** read by older versions of SQLite. - */ - rc = sqlite3PagerWrite(pTrunk->pDbPage); - if( rc==SQLITE_OK ){ - put4byte(&pTrunk->aData[4], nLeaf+1); - put4byte(&pTrunk->aData[8+nLeaf*4], iPage); - if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){ - sqlite3PagerDontWrite(pPage->pDbPage); - } - rc = btreeSetHasContent(pBt, iPage); - } - TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno)); - goto freepage_out; - } - } - - /* If control flows to this point, then it was not possible to add the - ** the page being freed as a leaf page of the first trunk in the free-list. - ** Possibly because the free-list is empty, or possibly because the - ** first trunk in the free-list is full. Either way, the page being freed - ** will become the new first trunk page in the free-list. - */ - if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ - goto freepage_out; - } - rc = sqlite3PagerWrite(pPage->pDbPage); - if( rc!=SQLITE_OK ){ - goto freepage_out; - } - put4byte(pPage->aData, iTrunk); - put4byte(&pPage->aData[4], 0); - put4byte(&pPage1->aData[32], iPage); - TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk)); - -freepage_out: - if( pPage ){ - pPage->isInit = 0; - } - releasePage(pPage); - releasePage(pTrunk); - return rc; -} -static void freePage(MemPage *pPage, int *pRC){ - if( (*pRC)==SQLITE_OK ){ - *pRC = freePage2(pPage->pBt, pPage, pPage->pgno); - } -} - -/* -** Free the overflow pages associated with the given Cell. -*/ -static SQLITE_NOINLINE int clearCellOverflow( - MemPage *pPage, /* The page that contains the Cell */ - unsigned char *pCell, /* First byte of the Cell */ - CellInfo *pInfo /* Size information about the cell */ -){ - BtShared *pBt; - Pgno ovflPgno; - int rc; - int nOvfl; - u32 ovflPageSize; - - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pInfo->nLocal!=pInfo->nPayload ); - testcase( pCell + pInfo->nSize == pPage->aDataEnd ); - testcase( pCell + (pInfo->nSize-1) == pPage->aDataEnd ); - if( pCell + pInfo->nSize > pPage->aDataEnd ){ - /* Cell extends past end of page */ - return SQLITE_CORRUPT_PAGE(pPage); - } - ovflPgno = get4byte(pCell + pInfo->nSize - 4); - pBt = pPage->pBt; - assert( pBt->usableSize > 4 ); - ovflPageSize = pBt->usableSize - 4; - nOvfl = (pInfo->nPayload - pInfo->nLocal + ovflPageSize - 1)/ovflPageSize; - assert( nOvfl>0 || - (CORRUPT_DB && (pInfo->nPayload + ovflPageSize)btreePagecount(pBt) ){ - /* 0 is not a legal page number and page 1 cannot be an - ** overflow page. Therefore if ovflPgno<2 or past the end of the - ** file the database must be corrupt. */ - return SQLITE_CORRUPT_BKPT; - } - if( nOvfl ){ - rc = getOverflowPage(pBt, ovflPgno, &pOvfl, &iNext); - if( rc ) return rc; - } - - if( ( pOvfl || ((pOvfl = btreePageLookup(pBt, ovflPgno))!=0) ) - && sqlite3PagerPageRefcount(pOvfl->pDbPage)!=1 - ){ - /* There is no reason any cursor should have an outstanding reference - ** to an overflow page belonging to a cell that is being deleted/updated. - ** So if there exists more than one reference to this page, then it - ** must not really be an overflow page and the database must be corrupt. - ** It is helpful to detect this before calling freePage2(), as - ** freePage2() may zero the page contents if secure-delete mode is - ** enabled. If this 'overflow' page happens to be a page that the - ** caller is iterating through or using in some other way, this - ** can be problematic. - */ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = freePage2(pBt, pOvfl, ovflPgno); - } - - if( pOvfl ){ - sqlite3PagerUnref(pOvfl->pDbPage); - } - if( rc ) return rc; - ovflPgno = iNext; - } - return SQLITE_OK; -} - -/* Call xParseCell to compute the size of a cell. If the cell contains -** overflow, then invoke cellClearOverflow to clear out that overflow. -** STore the result code (SQLITE_OK or some error code) in rc. -** -** Implemented as macro to force inlining for performance. -*/ -#define BTREE_CLEAR_CELL(rc, pPage, pCell, sInfo) \ - pPage->xParseCell(pPage, pCell, &sInfo); \ - if( sInfo.nLocal!=sInfo.nPayload ){ \ - rc = clearCellOverflow(pPage, pCell, &sInfo); \ - }else{ \ - rc = SQLITE_OK; \ - } - - -/* -** Create the byte sequence used to represent a cell on page pPage -** and write that byte sequence into pCell[]. Overflow pages are -** allocated and filled in as necessary. The calling procedure -** is responsible for making sure sufficient space has been allocated -** for pCell[]. -** -** Note that pCell does not necessary need to point to the pPage->aData -** area. pCell might point to some temporary storage. The cell will -** be constructed in this temporary area then copied into pPage->aData -** later. -*/ -static int fillInCell( - MemPage *pPage, /* The page that contains the cell */ - unsigned char *pCell, /* Complete text of the cell */ - const BtreePayload *pX, /* Payload with which to construct the cell */ - int *pnSize /* Write cell size here */ -){ - int nPayload; - const u8 *pSrc; - int nSrc, n, rc, mn; - int spaceLeft; - MemPage *pToRelease; - unsigned char *pPrior; - unsigned char *pPayload; - BtShared *pBt; - Pgno pgnoOvfl; - int nHeader; - - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - - /* pPage is not necessarily writeable since pCell might be auxiliary - ** buffer space that is separate from the pPage buffer area */ - assert( pCellaData || pCell>=&pPage->aData[pPage->pBt->pageSize] - || sqlite3PagerIswriteable(pPage->pDbPage) ); - - /* Fill in the header. */ - nHeader = pPage->childPtrSize; - if( pPage->intKey ){ - nPayload = pX->nData + pX->nZero; - pSrc = pX->pData; - nSrc = pX->nData; - assert( pPage->intKeyLeaf ); /* fillInCell() only called for leaves */ - nHeader += putVarint32(&pCell[nHeader], nPayload); - nHeader += putVarint(&pCell[nHeader], *(u64*)&pX->nKey); - }else{ - assert( pX->nKey<=0x7fffffff && pX->pKey!=0 ); - nSrc = nPayload = (int)pX->nKey; - pSrc = pX->pKey; - nHeader += putVarint32(&pCell[nHeader], nPayload); - } - - /* Fill in the payload */ - pPayload = &pCell[nHeader]; - if( nPayload<=pPage->maxLocal ){ - /* This is the common case where everything fits on the btree page - ** and no overflow pages are required. */ - n = nHeader + nPayload; - testcase( n==3 ); - testcase( n==4 ); - if( n<4 ) n = 4; - *pnSize = n; - assert( nSrc<=nPayload ); - testcase( nSrcminLocal; - n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4); - testcase( n==pPage->maxLocal ); - testcase( n==pPage->maxLocal+1 ); - if( n > pPage->maxLocal ) n = mn; - spaceLeft = n; - *pnSize = n + nHeader + 4; - pPrior = &pCell[nHeader+n]; - pToRelease = 0; - pgnoOvfl = 0; - pBt = pPage->pBt; - - /* At this point variables should be set as follows: - ** - ** nPayload Total payload size in bytes - ** pPayload Begin writing payload here - ** spaceLeft Space available at pPayload. If nPayload>spaceLeft, - ** that means content must spill into overflow pages. - ** *pnSize Size of the local cell (not counting overflow pages) - ** pPrior Where to write the pgno of the first overflow page - ** - ** Use a call to btreeParseCellPtr() to verify that the values above - ** were computed correctly. - */ -#ifdef SQLITE_DEBUG - { - CellInfo info; - pPage->xParseCell(pPage, pCell, &info); - assert( nHeader==(int)(info.pPayload - pCell) ); - assert( info.nKey==pX->nKey ); - assert( *pnSize == info.nSize ); - assert( spaceLeft == info.nLocal ); - } -#endif - - /* Write the payload into the local Cell and any extra into overflow pages */ - while( 1 ){ - n = nPayload; - if( n>spaceLeft ) n = spaceLeft; - - /* If pToRelease is not zero than pPayload points into the data area - ** of pToRelease. Make sure pToRelease is still writeable. */ - assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); - - /* If pPayload is part of the data area of pPage, then make sure pPage - ** is still writeable */ - assert( pPayloadaData || pPayload>=&pPage->aData[pBt->pageSize] - || sqlite3PagerIswriteable(pPage->pDbPage) ); - - if( nSrc>=n ){ - memcpy(pPayload, pSrc, n); - }else if( nSrc>0 ){ - n = nSrc; - memcpy(pPayload, pSrc, n); - }else{ - memset(pPayload, 0, n); - } - nPayload -= n; - if( nPayload<=0 ) break; - pPayload += n; - pSrc += n; - nSrc -= n; - spaceLeft -= n; - if( spaceLeft==0 ){ - MemPage *pOvfl = 0; -#ifndef SQLITE_OMIT_AUTOVACUUM - Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ - if( pBt->autoVacuum ){ - do{ - pgnoOvfl++; - } while( - PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) - ); - } -#endif - rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0); -#ifndef SQLITE_OMIT_AUTOVACUUM - /* If the database supports auto-vacuum, and the second or subsequent - ** overflow page is being allocated, add an entry to the pointer-map - ** for that page now. - ** - ** If this is the first overflow page, then write a partial entry - ** to the pointer-map. If we write nothing to this pointer-map slot, - ** then the optimistic overflow chain processing in clearCell() - ** may misinterpret the uninitialized values and delete the - ** wrong pages from the database. - */ - if( pBt->autoVacuum && rc==SQLITE_OK ){ - u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); - ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc); - if( rc ){ - releasePage(pOvfl); - } - } -#endif - if( rc ){ - releasePage(pToRelease); - return rc; - } - - /* If pToRelease is not zero than pPrior points into the data area - ** of pToRelease. Make sure pToRelease is still writeable. */ - assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); - - /* If pPrior is part of the data area of pPage, then make sure pPage - ** is still writeable */ - assert( pPrioraData || pPrior>=&pPage->aData[pBt->pageSize] - || sqlite3PagerIswriteable(pPage->pDbPage) ); - - put4byte(pPrior, pgnoOvfl); - releasePage(pToRelease); - pToRelease = pOvfl; - pPrior = pOvfl->aData; - put4byte(pPrior, 0); - pPayload = &pOvfl->aData[4]; - spaceLeft = pBt->usableSize - 4; - } - } - releasePage(pToRelease); - return SQLITE_OK; -} - -/* -** Remove the i-th cell from pPage. This routine effects pPage only. -** The cell content is not freed or deallocated. It is assumed that -** the cell content has been copied someplace else. This routine just -** removes the reference to the cell from pPage. -** -** "sz" must be the number of bytes in the cell. -*/ -static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ - u32 pc; /* Offset to cell content of cell being deleted */ - u8 *data; /* pPage->aData */ - u8 *ptr; /* Used to move bytes around within data[] */ - int rc; /* The return code */ - int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ - - if( *pRC ) return; - assert( idx>=0 && idxnCell ); - assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( pPage->nFree>=0 ); - data = pPage->aData; - ptr = &pPage->aCellIdx[2*idx]; - pc = get2byte(ptr); - hdr = pPage->hdrOffset; - testcase( pc==get2byte(&data[hdr+5]) ); - testcase( pc+sz==pPage->pBt->usableSize ); - if( pc+sz > pPage->pBt->usableSize ){ - *pRC = SQLITE_CORRUPT_BKPT; - return; - } - rc = freeSpace(pPage, pc, sz); - if( rc ){ - *pRC = rc; - return; - } - pPage->nCell--; - if( pPage->nCell==0 ){ - memset(&data[hdr+1], 0, 4); - data[hdr+7] = 0; - put2byte(&data[hdr+5], pPage->pBt->usableSize); - pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset - - pPage->childPtrSize - 8; - }else{ - memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); - put2byte(&data[hdr+3], pPage->nCell); - pPage->nFree += 2; - } -} - -/* -** Insert a new cell on pPage at cell index "i". pCell points to the -** content of the cell. -** -** If the cell content will fit on the page, then put it there. If it -** will not fit, then make a copy of the cell content into pTemp if -** pTemp is not null. Regardless of pTemp, allocate a new entry -** in pPage->apOvfl[] and make it point to the cell content (either -** in pTemp or the original pCell) and also record its index. -** Allocating a new entry in pPage->aCell[] implies that -** pPage->nOverflow is incremented. -** -** *pRC must be SQLITE_OK when this routine is called. -*/ -static void insertCell( - MemPage *pPage, /* Page into which we are copying */ - int i, /* New cell becomes the i-th cell of the page */ - u8 *pCell, /* Content of the new cell */ - int sz, /* Bytes of content in pCell */ - u8 *pTemp, /* Temp storage space for pCell, if needed */ - Pgno iChild, /* If non-zero, replace first 4 bytes with this value */ - int *pRC /* Read and write return code from here */ -){ - int idx = 0; /* Where to write new cell content in data[] */ - int j; /* Loop counter */ - u8 *data; /* The content of the whole page */ - u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ - - assert( *pRC==SQLITE_OK ); - assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); - assert( MX_CELL(pPage->pBt)<=10921 ); - assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); - assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); - assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB ); - assert( pPage->nFree>=0 ); - if( pPage->nOverflow || sz+2>pPage->nFree ){ - if( pTemp ){ - memcpy(pTemp, pCell, sz); - pCell = pTemp; - } - if( iChild ){ - put4byte(pCell, iChild); - } - j = pPage->nOverflow++; - /* Comparison against ArraySize-1 since we hold back one extra slot - ** as a contingency. In other words, never need more than 3 overflow - ** slots but 4 are allocated, just to be safe. */ - assert( j < ArraySize(pPage->apOvfl)-1 ); - pPage->apOvfl[j] = pCell; - pPage->aiOvfl[j] = (u16)i; - - /* When multiple overflows occur, they are always sequential and in - ** sorted order. This invariants arise because multiple overflows can - ** only occur when inserting divider cells into the parent page during - ** balancing, and the dividers are adjacent and sorted. - */ - assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ - assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ - }else{ - int rc = sqlite3PagerWrite(pPage->pDbPage); - if( rc!=SQLITE_OK ){ - *pRC = rc; - return; - } - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - data = pPage->aData; - assert( &data[pPage->cellOffset]==pPage->aCellIdx ); - rc = allocateSpace(pPage, sz, &idx); - if( rc ){ *pRC = rc; return; } - /* The allocateSpace() routine guarantees the following properties - ** if it returns successfully */ - assert( idx >= 0 ); - assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); - assert( idx+sz <= (int)pPage->pBt->usableSize ); - pPage->nFree -= (u16)(2 + sz); - if( iChild ){ - /* In a corrupt database where an entry in the cell index section of - ** a btree page has a value of 3 or less, the pCell value might point - ** as many as 4 bytes in front of the start of the aData buffer for - ** the source page. Make sure this does not cause problems by not - ** reading the first 4 bytes */ - memcpy(&data[idx+4], pCell+4, sz-4); - put4byte(&data[idx], iChild); - }else{ - memcpy(&data[idx], pCell, sz); - } - pIns = pPage->aCellIdx + i*2; - memmove(pIns+2, pIns, 2*(pPage->nCell - i)); - put2byte(pIns, idx); - pPage->nCell++; - /* increment the cell count */ - if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; - assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB ); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPage->pBt->autoVacuum ){ - /* The cell may contain a pointer to an overflow page. If so, write - ** the entry for the overflow page into the pointer map. - */ - ptrmapPutOvflPtr(pPage, pPage, pCell, pRC); - } -#endif - } -} - -/* -** The following parameters determine how many adjacent pages get involved -** in a balancing operation. NN is the number of neighbors on either side -** of the page that participate in the balancing operation. NB is the -** total number of pages that participate, including the target page and -** NN neighbors on either side. -** -** The minimum value of NN is 1 (of course). Increasing NN above 1 -** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance -** in exchange for a larger degradation in INSERT and UPDATE performance. -** The value of NN appears to give the best results overall. -** -** (Later:) The description above makes it seem as if these values are -** tunable - as if you could change them and recompile and it would all work. -** But that is unlikely. NB has been 3 since the inception of SQLite and -** we have never tested any other value. -*/ -#define NN 1 /* Number of neighbors on either side of pPage */ -#define NB 3 /* (NN*2+1): Total pages involved in the balance */ - -/* -** A CellArray object contains a cache of pointers and sizes for a -** consecutive sequence of cells that might be held on multiple pages. -** -** The cells in this array are the divider cell or cells from the pParent -** page plus up to three child pages. There are a total of nCell cells. -** -** pRef is a pointer to one of the pages that contributes cells. This is -** used to access information such as MemPage.intKey and MemPage.pBt->pageSize -** which should be common to all pages that contribute cells to this array. -** -** apCell[] and szCell[] hold, respectively, pointers to the start of each -** cell and the size of each cell. Some of the apCell[] pointers might refer -** to overflow cells. In other words, some apCel[] pointers might not point -** to content area of the pages. -** -** A szCell[] of zero means the size of that cell has not yet been computed. -** -** The cells come from as many as four different pages: -** -** ----------- -** | Parent | -** ----------- -** / | \ -** / | \ -** --------- --------- --------- -** |Child-1| |Child-2| |Child-3| -** --------- --------- --------- -** -** The order of cells is in the array is for an index btree is: -** -** 1. All cells from Child-1 in order -** 2. The first divider cell from Parent -** 3. All cells from Child-2 in order -** 4. The second divider cell from Parent -** 5. All cells from Child-3 in order -** -** For a table-btree (with rowids) the items 2 and 4 are empty because -** content exists only in leaves and there are no divider cells. -** -** For an index btree, the apEnd[] array holds pointer to the end of page -** for Child-1, the Parent, Child-2, the Parent (again), and Child-3, -** respectively. The ixNx[] array holds the number of cells contained in -** each of these 5 stages, and all stages to the left. Hence: -** -** ixNx[0] = Number of cells in Child-1. -** ixNx[1] = Number of cells in Child-1 plus 1 for first divider. -** ixNx[2] = Number of cells in Child-1 and Child-2 + 1 for 1st divider. -** ixNx[3] = Number of cells in Child-1 and Child-2 + both divider cells -** ixNx[4] = Total number of cells. -** -** For a table-btree, the concept is similar, except only apEnd[0]..apEnd[2] -** are used and they point to the leaf pages only, and the ixNx value are: -** -** ixNx[0] = Number of cells in Child-1. -** ixNx[1] = Number of cells in Child-1 and Child-2. -** ixNx[2] = Total number of cells. -** -** Sometimes when deleting, a child page can have zero cells. In those -** cases, ixNx[] entries with higher indexes, and the corresponding apEnd[] -** entries, shift down. The end result is that each ixNx[] entry should -** be larger than the previous -*/ -typedef struct CellArray CellArray; -struct CellArray { - int nCell; /* Number of cells in apCell[] */ - MemPage *pRef; /* Reference page */ - u8 **apCell; /* All cells begin balanced */ - u16 *szCell; /* Local size of all cells in apCell[] */ - u8 *apEnd[NB*2]; /* MemPage.aDataEnd values */ - int ixNx[NB*2]; /* Index of at which we move to the next apEnd[] */ -}; - -/* -** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been -** computed. -*/ -static void populateCellCache(CellArray *p, int idx, int N){ - assert( idx>=0 && idx+N<=p->nCell ); - while( N>0 ){ - assert( p->apCell[idx]!=0 ); - if( p->szCell[idx]==0 ){ - p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); - }else{ - assert( CORRUPT_DB || - p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); - } - idx++; - N--; - } -} - -/* -** Return the size of the Nth element of the cell array -*/ -static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ - assert( N>=0 && NnCell ); - assert( p->szCell[N]==0 ); - p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); - return p->szCell[N]; -} -static u16 cachedCellSize(CellArray *p, int N){ - assert( N>=0 && NnCell ); - if( p->szCell[N] ) return p->szCell[N]; - return computeCellSize(p, N); -} - -/* -** Array apCell[] contains pointers to nCell b-tree page cells. The -** szCell[] array contains the size in bytes of each cell. This function -** replaces the current contents of page pPg with the contents of the cell -** array. -** -** Some of the cells in apCell[] may currently be stored in pPg. This -** function works around problems caused by this by making a copy of any -** such cells before overwriting the page data. -** -** The MemPage.nFree field is invalidated by this function. It is the -** responsibility of the caller to set it correctly. -*/ -static int rebuildPage( - CellArray *pCArray, /* Content to be added to page pPg */ - int iFirst, /* First cell in pCArray to use */ - int nCell, /* Final number of cells on page */ - MemPage *pPg /* The page to be reconstructed */ -){ - const int hdr = pPg->hdrOffset; /* Offset of header on pPg */ - u8 * const aData = pPg->aData; /* Pointer to data for pPg */ - const int usableSize = pPg->pBt->usableSize; - u8 * const pEnd = &aData[usableSize]; - int i = iFirst; /* Which cell to copy from pCArray*/ - u32 j; /* Start of cell content area */ - int iEnd = i+nCell; /* Loop terminator */ - u8 *pCellptr = pPg->aCellIdx; - u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); - u8 *pData; - int k; /* Current slot in pCArray->apEnd[] */ - u8 *pSrcEnd; /* Current pCArray->apEnd[k] value */ - - assert( i(u32)usableSize) ){ j = 0; } - memcpy(&pTmp[j], &aData[j], usableSize - j); - - for(k=0; pCArray->ixNx[k]<=i && ALWAYS(kapEnd[k]; - - pData = pEnd; - while( 1/*exit by break*/ ){ - u8 *pCell = pCArray->apCell[i]; - u16 sz = pCArray->szCell[i]; - assert( sz>0 ); - if( SQLITE_WITHIN(pCell,aData+j,pEnd) ){ - if( ((uptr)(pCell+sz))>(uptr)pEnd ) return SQLITE_CORRUPT_BKPT; - pCell = &pTmp[pCell - aData]; - }else if( (uptr)(pCell+sz)>(uptr)pSrcEnd - && (uptr)(pCell)<(uptr)pSrcEnd - ){ - return SQLITE_CORRUPT_BKPT; - } - - pData -= sz; - put2byte(pCellptr, (pData - aData)); - pCellptr += 2; - if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT; - memmove(pData, pCell, sz); - assert( sz==pPg->xCellSize(pPg, pCell) || CORRUPT_DB ); - i++; - if( i>=iEnd ) break; - if( pCArray->ixNx[k]<=i ){ - k++; - pSrcEnd = pCArray->apEnd[k]; - } - } - - /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ - pPg->nCell = nCell; - pPg->nOverflow = 0; - - put2byte(&aData[hdr+1], 0); - put2byte(&aData[hdr+3], pPg->nCell); - put2byte(&aData[hdr+5], pData - aData); - aData[hdr+7] = 0x00; - return SQLITE_OK; -} - -/* -** The pCArray objects contains pointers to b-tree cells and the cell sizes. -** This function attempts to add the cells stored in the array to page pPg. -** If it cannot (because the page needs to be defragmented before the cells -** will fit), non-zero is returned. Otherwise, if the cells are added -** successfully, zero is returned. -** -** Argument pCellptr points to the first entry in the cell-pointer array -** (part of page pPg) to populate. After cell apCell[0] is written to the -** page body, a 16-bit offset is written to pCellptr. And so on, for each -** cell in the array. It is the responsibility of the caller to ensure -** that it is safe to overwrite this part of the cell-pointer array. -** -** When this function is called, *ppData points to the start of the -** content area on page pPg. If the size of the content area is extended, -** *ppData is updated to point to the new start of the content area -** before returning. -** -** Finally, argument pBegin points to the byte immediately following the -** end of the space required by this page for the cell-pointer area (for -** all cells - not just those inserted by the current call). If the content -** area must be extended to before this point in order to accomodate all -** cells in apCell[], then the cells do not fit and non-zero is returned. -*/ -static int pageInsertArray( - MemPage *pPg, /* Page to add cells to */ - u8 *pBegin, /* End of cell-pointer array */ - u8 **ppData, /* IN/OUT: Page content-area pointer */ - u8 *pCellptr, /* Pointer to cell-pointer area */ - int iFirst, /* Index of first cell to add */ - int nCell, /* Number of cells to add to pPg */ - CellArray *pCArray /* Array of cells */ -){ - int i = iFirst; /* Loop counter - cell index to insert */ - u8 *aData = pPg->aData; /* Complete page */ - u8 *pData = *ppData; /* Content area. A subset of aData[] */ - int iEnd = iFirst + nCell; /* End of loop. One past last cell to ins */ - int k; /* Current slot in pCArray->apEnd[] */ - u8 *pEnd; /* Maximum extent of cell data */ - assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ - if( iEnd<=iFirst ) return 0; - for(k=0; pCArray->ixNx[k]<=i && ALWAYS(kapEnd[k]; - while( 1 /*Exit by break*/ ){ - int sz, rc; - u8 *pSlot; - assert( pCArray->szCell[i]!=0 ); - sz = pCArray->szCell[i]; - if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){ - if( (pData - pBegin)apCell[i] will never overlap on a well-formed - ** database. But they might for a corrupt database. Hence use memmove() - ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */ - assert( (pSlot+sz)<=pCArray->apCell[i] - || pSlot>=(pCArray->apCell[i]+sz) - || CORRUPT_DB ); - if( (uptr)(pCArray->apCell[i]+sz)>(uptr)pEnd - && (uptr)(pCArray->apCell[i])<(uptr)pEnd - ){ - assert( CORRUPT_DB ); - (void)SQLITE_CORRUPT_BKPT; - return 1; - } - memmove(pSlot, pCArray->apCell[i], sz); - put2byte(pCellptr, (pSlot - aData)); - pCellptr += 2; - i++; - if( i>=iEnd ) break; - if( pCArray->ixNx[k]<=i ){ - k++; - pEnd = pCArray->apEnd[k]; - } - } - *ppData = pData; - return 0; -} - -/* -** The pCArray object contains pointers to b-tree cells and their sizes. -** -** This function adds the space associated with each cell in the array -** that is currently stored within the body of pPg to the pPg free-list. -** The cell-pointers and other fields of the page are not updated. -** -** This function returns the total number of cells added to the free-list. -*/ -static int pageFreeArray( - MemPage *pPg, /* Page to edit */ - int iFirst, /* First cell to delete */ - int nCell, /* Cells to delete */ - CellArray *pCArray /* Array of cells */ -){ - u8 * const aData = pPg->aData; - u8 * const pEnd = &aData[pPg->pBt->usableSize]; - u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize]; - int nRet = 0; - int i; - int iEnd = iFirst + nCell; - u8 *pFree = 0; - int szFree = 0; - - for(i=iFirst; iapCell[i]; - if( SQLITE_WITHIN(pCell, pStart, pEnd) ){ - int sz; - /* No need to use cachedCellSize() here. The sizes of all cells that - ** are to be freed have already been computing while deciding which - ** cells need freeing */ - sz = pCArray->szCell[i]; assert( sz>0 ); - if( pFree!=(pCell + sz) ){ - if( pFree ){ - assert( pFree>aData && (pFree - aData)<65536 ); - freeSpace(pPg, (u16)(pFree - aData), szFree); - } - pFree = pCell; - szFree = sz; - if( pFree+sz>pEnd ){ - return 0; - } - }else{ - pFree = pCell; - szFree += sz; - } - nRet++; - } - } - if( pFree ){ - assert( pFree>aData && (pFree - aData)<65536 ); - freeSpace(pPg, (u16)(pFree - aData), szFree); - } - return nRet; -} - -/* -** pCArray contains pointers to and sizes of all cells in the page being -** balanced. The current page, pPg, has pPg->nCell cells starting with -** pCArray->apCell[iOld]. After balancing, this page should hold nNew cells -** starting at apCell[iNew]. -** -** This routine makes the necessary adjustments to pPg so that it contains -** the correct cells after being balanced. -** -** The pPg->nFree field is invalid when this function returns. It is the -** responsibility of the caller to set it correctly. -*/ -static int editPage( - MemPage *pPg, /* Edit this page */ - int iOld, /* Index of first cell currently on page */ - int iNew, /* Index of new first cell on page */ - int nNew, /* Final number of cells on page */ - CellArray *pCArray /* Array of cells and sizes */ -){ - u8 * const aData = pPg->aData; - const int hdr = pPg->hdrOffset; - u8 *pBegin = &pPg->aCellIdx[nNew * 2]; - int nCell = pPg->nCell; /* Cells stored on pPg */ - u8 *pData; - u8 *pCellptr; - int i; - int iOldEnd = iOld + pPg->nCell + pPg->nOverflow; - int iNewEnd = iNew + nNew; - -#ifdef SQLITE_DEBUG - u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); - memcpy(pTmp, aData, pPg->pBt->usableSize); -#endif - - /* Remove cells from the start and end of the page */ - assert( nCell>=0 ); - if( iOldnCell) ) return SQLITE_CORRUPT_BKPT; - memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); - nCell -= nShift; - } - if( iNewEnd < iOldEnd ){ - int nTail = pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray); - assert( nCell>=nTail ); - nCell -= nTail; - } - - pData = &aData[get2byteNotZero(&aData[hdr+5])]; - if( pDatapPg->aDataEnd) ) goto editpage_fail; - - /* Add cells to the start of the page */ - if( iNew=0 ); - pCellptr = pPg->aCellIdx; - memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); - if( pageInsertArray( - pPg, pBegin, &pData, pCellptr, - iNew, nAdd, pCArray - ) ) goto editpage_fail; - nCell += nAdd; - } - - /* Add any overflow cells */ - for(i=0; inOverflow; i++){ - int iCell = (iOld + pPg->aiOvfl[i]) - iNew; - if( iCell>=0 && iCellaCellIdx[iCell * 2]; - if( nCell>iCell ){ - memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2); - } - nCell++; - cachedCellSize(pCArray, iCell+iNew); - if( pageInsertArray( - pPg, pBegin, &pData, pCellptr, - iCell+iNew, 1, pCArray - ) ) goto editpage_fail; - } - } - - /* Append cells to the end of the page */ - assert( nCell>=0 ); - pCellptr = &pPg->aCellIdx[nCell*2]; - if( pageInsertArray( - pPg, pBegin, &pData, pCellptr, - iNew+nCell, nNew-nCell, pCArray - ) ) goto editpage_fail; - - pPg->nCell = nNew; - pPg->nOverflow = 0; - - put2byte(&aData[hdr+3], pPg->nCell); - put2byte(&aData[hdr+5], pData - aData); - -#ifdef SQLITE_DEBUG - for(i=0; iapCell[i+iNew]; - int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); - if( SQLITE_WITHIN(pCell, aData, &aData[pPg->pBt->usableSize]) ){ - pCell = &pTmp[pCell - aData]; - } - assert( 0==memcmp(pCell, &aData[iOff], - pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); - } -#endif - - return SQLITE_OK; - editpage_fail: - /* Unable to edit this page. Rebuild it from scratch instead. */ - populateCellCache(pCArray, iNew, nNew); - return rebuildPage(pCArray, iNew, nNew, pPg); -} - - -#ifndef SQLITE_OMIT_QUICKBALANCE -/* -** This version of balance() handles the common special case where -** a new entry is being inserted on the extreme right-end of the -** tree, in other words, when the new entry will become the largest -** entry in the tree. -** -** Instead of trying to balance the 3 right-most leaf pages, just add -** a new page to the right-hand side and put the one new entry in -** that page. This leaves the right side of the tree somewhat -** unbalanced. But odds are that we will be inserting new entries -** at the end soon afterwards so the nearly empty page will quickly -** fill up. On average. -** -** pPage is the leaf page which is the right-most page in the tree. -** pParent is its parent. pPage must have a single overflow entry -** which is also the right-most entry on the page. -** -** The pSpace buffer is used to store a temporary copy of the divider -** cell that will be inserted into pParent. Such a cell consists of a 4 -** byte page number followed by a variable length integer. In other -** words, at most 13 bytes. Hence the pSpace buffer must be at -** least 13 bytes in size. -*/ -static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ - BtShared *const pBt = pPage->pBt; /* B-Tree Database */ - MemPage *pNew; /* Newly allocated page */ - int rc; /* Return Code */ - Pgno pgnoNew; /* Page number of pNew */ - - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( sqlite3PagerIswriteable(pParent->pDbPage) ); - assert( pPage->nOverflow==1 ); - - if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; /* dbfuzz001.test */ - assert( pPage->nFree>=0 ); - assert( pParent->nFree>=0 ); - - /* Allocate a new page. This page will become the right-sibling of - ** pPage. Make the parent page writable, so that the new divider cell - ** may be inserted. If both these operations are successful, proceed. - */ - rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); - - if( rc==SQLITE_OK ){ - - u8 *pOut = &pSpace[4]; - u8 *pCell = pPage->apOvfl[0]; - u16 szCell = pPage->xCellSize(pPage, pCell); - u8 *pStop; - CellArray b; - - assert( sqlite3PagerIswriteable(pNew->pDbPage) ); - assert( CORRUPT_DB || pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); - zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); - b.nCell = 1; - b.pRef = pPage; - b.apCell = &pCell; - b.szCell = &szCell; - b.apEnd[0] = pPage->aDataEnd; - b.ixNx[0] = 2; - rc = rebuildPage(&b, 0, 1, pNew); - if( NEVER(rc) ){ - releasePage(pNew); - return rc; - } - pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; - - /* If this is an auto-vacuum database, update the pointer map - ** with entries for the new page, and any pointer from the - ** cell on the page to an overflow page. If either of these - ** operations fails, the return code is set, but the contents - ** of the parent page are still manipulated by thh code below. - ** That is Ok, at this point the parent page is guaranteed to - ** be marked as dirty. Returning an error code will cause a - ** rollback, undoing any changes made to the parent page. - */ - if( ISAUTOVACUUM ){ - ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc); - if( szCell>pNew->minLocal ){ - ptrmapPutOvflPtr(pNew, pNew, pCell, &rc); - } - } - - /* Create a divider cell to insert into pParent. The divider cell - ** consists of a 4-byte page number (the page number of pPage) and - ** a variable length key value (which must be the same value as the - ** largest key on pPage). - ** - ** To find the largest key value on pPage, first find the right-most - ** cell on pPage. The first two fields of this cell are the - ** record-length (a variable length integer at most 32-bits in size) - ** and the key value (a variable length integer, may have any value). - ** The first of the while(...) loops below skips over the record-length - ** field. The second while(...) loop copies the key value from the - ** cell on pPage into the pSpace buffer. - */ - pCell = findCell(pPage, pPage->nCell-1); - pStop = &pCell[9]; - while( (*(pCell++)&0x80) && pCellnCell, pSpace, (int)(pOut-pSpace), - 0, pPage->pgno, &rc); - } - - /* Set the right-child pointer of pParent to point to the new page. */ - put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); - - /* Release the reference to the new page. */ - releasePage(pNew); - } - - return rc; -} -#endif /* SQLITE_OMIT_QUICKBALANCE */ - -#if 0 -/* -** This function does not contribute anything to the operation of SQLite. -** it is sometimes activated temporarily while debugging code responsible -** for setting pointer-map entries. -*/ -static int ptrmapCheckPages(MemPage **apPage, int nPage){ - int i, j; - for(i=0; ipBt; - assert( pPage->isInit ); - - for(j=0; jnCell; j++){ - CellInfo info; - u8 *z; - - z = findCell(pPage, j); - pPage->xParseCell(pPage, z, &info); - if( info.nLocalpgno && e==PTRMAP_OVERFLOW1 ); - } - if( !pPage->leaf ){ - Pgno child = get4byte(z); - ptrmapGet(pBt, child, &e, &n); - assert( n==pPage->pgno && e==PTRMAP_BTREE ); - } - } - if( !pPage->leaf ){ - Pgno child = get4byte(&pPage->aData[pPage->hdrOffset+8]); - ptrmapGet(pBt, child, &e, &n); - assert( n==pPage->pgno && e==PTRMAP_BTREE ); - } - } - return 1; -} -#endif - -/* -** This function is used to copy the contents of the b-tree node stored -** on page pFrom to page pTo. If page pFrom was not a leaf page, then -** the pointer-map entries for each child page are updated so that the -** parent page stored in the pointer map is page pTo. If pFrom contained -** any cells with overflow page pointers, then the corresponding pointer -** map entries are also updated so that the parent page is page pTo. -** -** If pFrom is currently carrying any overflow cells (entries in the -** MemPage.apOvfl[] array), they are not copied to pTo. -** -** Before returning, page pTo is reinitialized using btreeInitPage(). -** -** The performance of this function is not critical. It is only used by -** the balance_shallower() and balance_deeper() procedures, neither of -** which are called often under normal circumstances. -*/ -static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){ - if( (*pRC)==SQLITE_OK ){ - BtShared * const pBt = pFrom->pBt; - u8 * const aFrom = pFrom->aData; - u8 * const aTo = pTo->aData; - int const iFromHdr = pFrom->hdrOffset; - int const iToHdr = ((pTo->pgno==1) ? 100 : 0); - int rc; - int iData; - - - assert( pFrom->isInit ); - assert( pFrom->nFree>=iToHdr ); - assert( get2byte(&aFrom[iFromHdr+5]) <= (int)pBt->usableSize ); - - /* Copy the b-tree node content from page pFrom to page pTo. */ - iData = get2byte(&aFrom[iFromHdr+5]); - memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData); - memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + 2*pFrom->nCell); - - /* Reinitialize page pTo so that the contents of the MemPage structure - ** match the new data. The initialization of pTo can actually fail under - ** fairly obscure circumstances, even though it is a copy of initialized - ** page pFrom. - */ - pTo->isInit = 0; - rc = btreeInitPage(pTo); - if( rc==SQLITE_OK ) rc = btreeComputeFreeSpace(pTo); - if( rc!=SQLITE_OK ){ - *pRC = rc; - return; - } - - /* If this is an auto-vacuum database, update the pointer-map entries - ** for any b-tree or overflow pages that pTo now contains the pointers to. - */ - if( ISAUTOVACUUM ){ - *pRC = setChildPtrmaps(pTo); - } - } -} - -/* -** This routine redistributes cells on the iParentIdx'th child of pParent -** (hereafter "the page") and up to 2 siblings so that all pages have about the -** same amount of free space. Usually a single sibling on either side of the -** page are used in the balancing, though both siblings might come from one -** side if the page is the first or last child of its parent. If the page -** has fewer than 2 siblings (something which can only happen if the page -** is a root page or a child of a root page) then all available siblings -** participate in the balancing. -** -** The number of siblings of the page might be increased or decreased by -** one or two in an effort to keep pages nearly full but not over full. -** -** Note that when this routine is called, some of the cells on the page -** might not actually be stored in MemPage.aData[]. This can happen -** if the page is overfull. This routine ensures that all cells allocated -** to the page and its siblings fit into MemPage.aData[] before returning. -** -** In the course of balancing the page and its siblings, cells may be -** inserted into or removed from the parent page (pParent). Doing so -** may cause the parent page to become overfull or underfull. If this -** happens, it is the responsibility of the caller to invoke the correct -** balancing routine to fix this problem (see the balance() routine). -** -** If this routine fails for any reason, it might leave the database -** in a corrupted state. So if this routine fails, the database should -** be rolled back. -** -** The third argument to this function, aOvflSpace, is a pointer to a -** buffer big enough to hold one page. If while inserting cells into the parent -** page (pParent) the parent page becomes overfull, this buffer is -** used to store the parent's overflow cells. Because this function inserts -** a maximum of four divider cells into the parent page, and the maximum -** size of a cell stored within an internal node is always less than 1/4 -** of the page-size, the aOvflSpace[] buffer is guaranteed to be large -** enough for all overflow cells. -** -** If aOvflSpace is set to a null pointer, this function returns -** SQLITE_NOMEM. -*/ -static int balance_nonroot( - MemPage *pParent, /* Parent page of siblings being balanced */ - int iParentIdx, /* Index of "the page" in pParent */ - u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ - int isRoot, /* True if pParent is a root-page */ - int bBulk /* True if this call is part of a bulk load */ -){ - BtShared *pBt; /* The whole database */ - int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ - int nNew = 0; /* Number of pages in apNew[] */ - int nOld; /* Number of pages in apOld[] */ - int i, j, k; /* Loop counters */ - int nxDiv; /* Next divider slot in pParent->aCell[] */ - int rc = SQLITE_OK; /* The return code */ - u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ - int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ - int usableSpace; /* Bytes in pPage beyond the header */ - int pageFlags; /* Value of pPage->aData[0] */ - int iSpace1 = 0; /* First unused byte of aSpace1[] */ - int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ - int szScratch; /* Size of scratch memory requested */ - MemPage *apOld[NB]; /* pPage and up to two siblings */ - MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ - u8 *pRight; /* Location in parent of right-sibling pointer */ - u8 *apDiv[NB-1]; /* Divider cells in pParent */ - int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ - int cntOld[NB+2]; /* Old index in b.apCell[] */ - int szNew[NB+2]; /* Combined size of cells placed on i-th page */ - u8 *aSpace1; /* Space for copies of dividers cells */ - Pgno pgno; /* Temp var to store a page number in */ - u8 abDone[NB+2]; /* True after i'th new page is populated */ - Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ - Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ - u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ - CellArray b; /* Parsed information on cells being balanced */ - - memset(abDone, 0, sizeof(abDone)); - memset(&b, 0, sizeof(b)); - pBt = pParent->pBt; - assert( sqlite3_mutex_held(pBt->mutex) ); - assert( sqlite3PagerIswriteable(pParent->pDbPage) ); - - /* At this point pParent may have at most one overflow cell. And if - ** this overflow cell is present, it must be the cell with - ** index iParentIdx. This scenario comes about when this function - ** is called (indirectly) from sqlite3BtreeDelete(). - */ - assert( pParent->nOverflow==0 || pParent->nOverflow==1 ); - assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx ); - - if( !aOvflSpace ){ - return SQLITE_NOMEM; - } - assert( pParent->nFree>=0 ); - - /* Find the sibling pages to balance. Also locate the cells in pParent - ** that divide the siblings. An attempt is made to find NN siblings on - ** either side of pPage. More siblings are taken from one side, however, - ** if there are fewer than NN siblings on the other side. If pParent - ** has NB or fewer children then all children of pParent are taken. - ** - ** This loop also drops the divider cells from the parent page. This - ** way, the remainder of the function does not have to deal with any - ** overflow cells in the parent page, since if any existed they will - ** have already been removed. - */ - i = pParent->nOverflow + pParent->nCell; - if( i<2 ){ - nxDiv = 0; - }else{ - assert( bBulk==0 || bBulk==1 ); - if( iParentIdx==0 ){ - nxDiv = 0; - }else if( iParentIdx==i ){ - nxDiv = i-2+bBulk; - }else{ - nxDiv = iParentIdx-1; - } - i = 2-bBulk; - } - nOld = i+1; - if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ - pRight = &pParent->aData[pParent->hdrOffset+8]; - }else{ - pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); - } - pgno = get4byte(pRight); - while( 1 ){ - if( rc==SQLITE_OK ){ - rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); - } - if( rc ){ - memset(apOld, 0, (i+1)*sizeof(MemPage*)); - goto balance_cleanup; - } - if( apOld[i]->nFree<0 ){ - rc = btreeComputeFreeSpace(apOld[i]); - if( rc ){ - memset(apOld, 0, (i)*sizeof(MemPage*)); - goto balance_cleanup; - } - } - nMaxCells += apOld[i]->nCell + ArraySize(pParent->apOvfl); - if( (i--)==0 ) break; - - if( pParent->nOverflow && i+nxDiv==pParent->aiOvfl[0] ){ - apDiv[i] = pParent->apOvfl[0]; - pgno = get4byte(apDiv[i]); - szNew[i] = pParent->xCellSize(pParent, apDiv[i]); - pParent->nOverflow = 0; - }else{ - apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); - pgno = get4byte(apDiv[i]); - szNew[i] = pParent->xCellSize(pParent, apDiv[i]); - - /* Drop the cell from the parent page. apDiv[i] still points to - ** the cell within the parent, even though it has been dropped. - ** This is safe because dropping a cell only overwrites the first - ** four bytes of it, and this function does not need the first - ** four bytes of the divider cell. So the pointer is safe to use - ** later on. - ** - ** But not if we are in secure-delete mode. In secure-delete mode, - ** the dropCell() routine will overwrite the entire cell with zeroes. - ** In this case, temporarily copy the cell into the aOvflSpace[] - ** buffer. It will be copied out again as soon as the aSpace[] buffer - ** is allocated. */ - if( pBt->btsFlags & BTS_FAST_SECURE ){ - int iOff; - - /* If the following if() condition is not true, the db is corrupted. - ** The call to dropCell() below will detect this. */ - iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData); - if( (iOff+szNew[i])<=(int)pBt->usableSize ){ - memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]); - apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData]; - } - } - dropCell(pParent, i+nxDiv-pParent->nOverflow, szNew[i], &rc); - } - } - - /* Make nMaxCells a multiple of 4 in order to preserve 8-byte - ** alignment */ - nMaxCells = (nMaxCells + 3)&~3; - - /* - ** Allocate space for memory structures - */ - szScratch = - nMaxCells*sizeof(u8*) /* b.apCell */ - + nMaxCells*sizeof(u16) /* b.szCell */ - + pBt->pageSize; /* aSpace1 */ - - assert( szScratch<=7*(int)pBt->pageSize ); - b.apCell = sqlite3StackAllocRaw(0, szScratch ); - if( b.apCell==0 ){ - rc = SQLITE_NOMEM; - goto balance_cleanup; - } - b.szCell = (u16*)&b.apCell[nMaxCells]; - aSpace1 = (u8*)&b.szCell[nMaxCells]; - assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); - - /* - ** Load pointers to all cells on sibling pages and the divider cells - ** into the local b.apCell[] array. Make copies of the divider cells - ** into space obtained from aSpace1[]. The divider cells have already - ** been removed from pParent. - ** - ** If the siblings are on leaf pages, then the child pointers of the - ** divider cells are stripped from the cells before they are copied - ** into aSpace1[]. In this way, all cells in b.apCell[] are without - ** child pointers. If siblings are not leaves, then all cell in - ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] - ** are alike. - ** - ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. - ** leafData: 1 if pPage holds key+data and pParent holds only keys. - */ - b.pRef = apOld[0]; - leafCorrection = b.pRef->leaf*4; - leafData = b.pRef->intKeyLeaf; - for(i=0; inCell; - u8 *aData = pOld->aData; - u16 maskPage = pOld->maskPage; - u8 *piCell = aData + pOld->cellOffset; - u8 *piEnd; - VVA_ONLY( int nCellAtStart = b.nCell; ) - - /* Verify that all sibling pages are of the same "type" (table-leaf, - ** table-interior, index-leaf, or index-interior). - */ - if( pOld->aData[0]!=apOld[0]->aData[0] ){ - rc = SQLITE_CORRUPT_BKPT; - goto balance_cleanup; - } - - /* Load b.apCell[] with pointers to all cells in pOld. If pOld - ** contains overflow cells, include them in the b.apCell[] array - ** in the correct spot. - ** - ** Note that when there are multiple overflow cells, it is always the - ** case that they are sequential and adjacent. This invariant arises - ** because multiple overflows can only occurs when inserting divider - ** cells into a parent on a prior balance, and divider cells are always - ** adjacent and are inserted in order. There is an assert() tagged - ** with "NOTE 1" in the overflow cell insertion loop to prove this - ** invariant. - ** - ** This must be done in advance. Once the balance starts, the cell - ** offset section of the btree page will be overwritten and we will no - ** long be able to find the cells if a pointer to each cell is not saved - ** first. - */ - memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*(limit+pOld->nOverflow)); - if( pOld->nOverflow>0 ){ - if( NEVER(limitaiOvfl[0]) ){ - rc = SQLITE_CORRUPT_BKPT; - goto balance_cleanup; - } - limit = pOld->aiOvfl[0]; - for(j=0; jnOverflow; k++){ - assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ - b.apCell[b.nCell] = pOld->apOvfl[k]; - b.nCell++; - } - } - piEnd = aData + pOld->cellOffset + 2*pOld->nCell; - while( piCellnCell+pOld->nOverflow) ); - - cntOld[i] = b.nCell; - if( imaxLocal+23 ); - assert( iSpace1 <= (int)pBt->pageSize ); - memcpy(pTemp, apDiv[i], sz); - b.apCell[b.nCell] = pTemp+leafCorrection; - assert( leafCorrection==0 || leafCorrection==4 ); - b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; - if( !pOld->leaf ){ - assert( leafCorrection==0 ); - assert( pOld->hdrOffset==0 || CORRUPT_DB ); - /* The right pointer of the child page pOld becomes the left - ** pointer of the divider cell */ - memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); - }else{ - assert( leafCorrection==4 ); - while( b.szCell[b.nCell]<4 ){ - /* Do not allow any cells smaller than 4 bytes. If a smaller cell - ** does exist, pad it with 0x00 bytes. */ - assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); - assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); - aSpace1[iSpace1++] = 0x00; - b.szCell[b.nCell]++; - } - } - b.nCell++; - } - } - - /* - ** Figure out the number of pages needed to hold all b.nCell cells. - ** Store this number in "k". Also compute szNew[] which is the total - ** size of all cells on the i-th page and cntNew[] which is the index - ** in b.apCell[] of the cell that divides page i from page i+1. - ** cntNew[k] should equal b.nCell. - ** - ** Values computed by this block: - ** - ** k: The total number of sibling pages - ** szNew[i]: Spaced used on the i-th sibling page. - ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to - ** the right of the i-th sibling page. - ** usableSpace: Number of bytes of space available on each sibling. - ** - */ - usableSpace = pBt->usableSize - 12 + leafCorrection; - for(i=k=0; iaDataEnd; - b.ixNx[k] = cntOld[i]; - if( k && b.ixNx[k]==b.ixNx[k-1] ){ - k--; /* Omit b.ixNx[] entry for child pages with no cells */ - } - if( !leafData ){ - k++; - b.apEnd[k] = pParent->aDataEnd; - b.ixNx[k] = cntOld[i]+1; - } - assert( p->nFree>=0 ); - szNew[i] = usableSpace - p->nFree; - for(j=0; jnOverflow; j++){ - szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); - } - cntNew[i] = cntOld[i]; - } - k = nOld; - for(i=0; iusableSpace ){ - if( i+1>=k ){ - k = i+2; - if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } - szNew[k-1] = 0; - cntNew[k-1] = b.nCell; - } - sz = 2 + cachedCellSize(&b, cntNew[i]-1); - szNew[i] -= sz; - if( !leafData ){ - if( cntNew[i]usableSpace ) break; - szNew[i] += sz; - cntNew[i]++; - if( !leafData ){ - if( cntNew[i]=b.nCell ){ - k = i+1; - }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){ - rc = SQLITE_CORRUPT_BKPT; - goto balance_cleanup; - } - } - - /* - ** The packing computed by the previous block is biased toward the siblings - ** on the left side (siblings with smaller keys). The left siblings are - ** always nearly full, while the right-most sibling might be nearly empty. - ** The next block of code attempts to adjust the packing of siblings to - ** get a better balance. - ** - ** This adjustment is more than an optimization. The packing above might - ** be so out of balance as to be illegal. For example, the right-most - ** sibling might be completely empty. This adjustment is not optional. - */ - for(i=k-1; i>0; i--){ - int szRight = szNew[i]; /* Size of sibling on the right */ - int szLeft = szNew[i-1]; /* Size of sibling on the left */ - int r; /* Index of right-most cell in left sibling */ - int d; /* Index of first cell to the left of right sibling */ - - r = cntNew[i-1] - 1; - d = r + 1 - leafData; - (void)cachedCellSize(&b, d); - do{ - assert( d szLeft-(b.szCell[r]+(i==k-1?0:2)))){ - break; - } - szRight += b.szCell[d] + 2; - szLeft -= b.szCell[r] + 2; - cntNew[i-1] = r; - r--; - d--; - }while( r>=0 ); - szNew[i] = szRight; - szNew[i-1] = szLeft; - if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){ - rc = SQLITE_CORRUPT_BKPT; - goto balance_cleanup; - } - } - - /* Sanity check: For a non-corrupt database file one of the follwing - ** must be true: - ** (1) We found one or more cells (cntNew[0])>0), or - ** (2) pPage is a virtual root page. A virtual root page is when - ** the real root page is page 1 and we are the only child of - ** that page. - */ - assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB); - TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n", - apOld[0]->pgno, apOld[0]->nCell, - nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0, - nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0 - )); - - /* - ** Allocate k new pages. Reuse old pages where possible. - */ - pageFlags = apOld[0]->aData[0]; - for(i=0; ipDbPage); - nNew++; - if( sqlite3PagerPageRefcount(pNew->pDbPage)!=1+(i==(iParentIdx-nxDiv)) - && rc==SQLITE_OK - ){ - rc = SQLITE_CORRUPT_BKPT; - } - if( rc ) goto balance_cleanup; - }else{ - assert( i>0 ); - rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); - if( rc ) goto balance_cleanup; - zeroPage(pNew, pageFlags); - apNew[i] = pNew; - nNew++; - cntOld[i] = b.nCell; - - /* Set the pointer-map entry for the new sibling page. */ - if( ISAUTOVACUUM ){ - ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); - if( rc!=SQLITE_OK ){ - goto balance_cleanup; - } - } - } - } - - /* - ** Reassign page numbers so that the new pages are in ascending order. - ** This helps to keep entries in the disk file in order so that a scan - ** of the table is closer to a linear scan through the file. That in turn - ** helps the operating system to deliver pages from the disk more rapidly. - ** - ** An O(n^2) insertion sort algorithm is used, but since n is never more - ** than (NB+2) (a small constant), that should not be a problem. - ** - ** When NB==3, this one optimization makes the database about 25% faster - ** for large insertions and deletions. - */ - for(i=0; ipgno; - aPgFlags[i] = apNew[i]->pDbPage->flags; - for(j=0; ji ){ - sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0); - } - sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]); - apNew[i]->pgno = pgno; - } - } - - TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) " - "%d(%d nc=%d) %d(%d nc=%d)\n", - apNew[0]->pgno, szNew[0], cntNew[0], - nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0, - nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0, - nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0, - nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0, - nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0, - nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0, - nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0, - nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0 - )); - - assert( sqlite3PagerIswriteable(pParent->pDbPage) ); - assert( nNew>=1 && nNew<=ArraySize(apNew) ); - assert( apNew[nNew-1]!=0 ); - put4byte(pRight, apNew[nNew-1]->pgno); - - /* If the sibling pages are not leaves, ensure that the right-child pointer - ** of the right-most new sibling page is set to the value that was - ** originally in the same field of the right-most old sibling page. */ - if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){ - MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1]; - memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4); - } - - /* Make any required updates to pointer map entries associated with - ** cells stored on sibling pages following the balance operation. Pointer - ** map entries associated with divider cells are set by the insertCell() - ** routine. The associated pointer map entries are: - ** - ** a) if the cell contains a reference to an overflow chain, the - ** entry associated with the first page in the overflow chain, and - ** - ** b) if the sibling pages are not leaves, the child page associated - ** with the cell. - ** - ** If the sibling pages are not leaves, then the pointer map entry - ** associated with the right-child of each sibling may also need to be - ** updated. This happens below, after the sibling pages have been - ** populated, not here. - */ - if( ISAUTOVACUUM ){ - MemPage *pOld; - MemPage *pNew = pOld = apNew[0]; - int cntOldNext = pNew->nCell + pNew->nOverflow; - int iNew = 0; - int iOld = 0; - - for(i=0; i=0 && iOldnCell + pOld->nOverflow + !leafData; - } - if( i==cntNew[iNew] ){ - pNew = apNew[++iNew]; - if( !leafData ) continue; - } - - /* Cell pCell is destined for new sibling page pNew. Originally, it - ** was either part of sibling page iOld (possibly an overflow cell), - ** or else the divider cell to the left of sibling page iOld. So, - ** if sibling page iOld had the same page number as pNew, and if - ** pCell really was a part of sibling page iOld (not a divider or - ** overflow cell), we can skip updating the pointer map entries. */ - if( iOld>=nNew - || pNew->pgno!=aPgno[iOld] - || !SQLITE_WITHIN(pCell,pOld->aData,pOld->aDataEnd) - ){ - if( !leafCorrection ){ - ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); - } - if( cachedCellSize(&b,i)>pNew->minLocal ){ - ptrmapPutOvflPtr(pNew, pOld, pCell, &rc); - } - if( rc ) goto balance_cleanup; - } - } - } - - /* Insert new divider cells into pParent. */ - for(i=0; ileaf ){ - memcpy(&pNew->aData[8], pCell, 4); - }else if( leafData ){ - /* If the tree is a leaf-data tree, and the siblings are leaves, - ** then there is no divider cell in b.apCell[]. Instead, the divider - ** cell consists of the integer key for the right-most cell of - ** the sibling-page assembled above only. - */ - CellInfo info; - j--; - pNew->xParseCell(pNew, b.apCell[j], &info); - pCell = pTemp; - sz = 4 + putVarint(&pCell[4], info.nKey); - pTemp = 0; - }else{ - pCell -= 4; - /* Obscure case for non-leaf-data trees: If the cell at pCell was - ** previously stored on a leaf node, and its reported size was 4 - ** bytes, then it may actually be smaller than this - ** (see btreeParseCellPtr(), 4 bytes is the minimum size of - ** any cell). But it is important to pass the correct size to - ** insertCell(), so reparse the cell now. - ** - ** This can only happen for b-trees used to evaluate "IN (SELECT ...)" - ** and WITHOUT ROWID tables with exactly one column which is the - ** primary key. - */ - if( b.szCell[j]==4 ){ - assert(leafCorrection==4); - sz = pParent->xCellSize(pParent, pCell); - } - } - iOvflSpace += sz; - assert( sz<=pBt->maxLocal+23 ); - assert( iOvflSpace <= (int)pBt->pageSize ); - for(k=0; b.ixNx[k]<=i && ALWAYS(kpgno, &rc); - if( rc!=SQLITE_OK ) goto balance_cleanup; - assert( sqlite3PagerIswriteable(pParent->pDbPage) ); - } - - /* Now update the actual sibling pages. The order in which they are updated - ** is important, as this code needs to avoid disrupting any page from which - ** cells may still to be read. In practice, this means: - ** - ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) - ** then it is not safe to update page apNew[iPg] until after - ** the left-hand sibling apNew[iPg-1] has been updated. - ** - ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) - ** then it is not safe to update page apNew[iPg] until after - ** the right-hand sibling apNew[iPg+1] has been updated. - ** - ** If neither of the above apply, the page is safe to update. - ** - ** The iPg value in the following loop starts at nNew-1 goes down - ** to 0, then back up to nNew-1 again, thus making two passes over - ** the pages. On the initial downward pass, only condition (1) above - ** needs to be tested because (2) will always be true from the previous - ** step. On the upward pass, both conditions are always true, so the - ** upwards pass simply processes pages that were missed on the downward - ** pass. - */ - for(i=1-nNew; i=0 && iPg=0 /* On the upwards pass, or... */ - || cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */ - ){ - int iNew; - int iOld; - int nNewCell; - - /* Verify condition (1): If cells are moving left, update iPg - ** only after iPg-1 has already been updated. */ - assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] ); - - /* Verify condition (2): If cells are moving right, update iPg - ** only after iPg+1 has already been updated. */ - assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] ); - - if( iPg==0 ){ - iNew = iOld = 0; - nNewCell = cntNew[0]; - }else{ - iOld = iPgnFree = usableSpace-szNew[iPg]; - assert( apNew[iPg]->nOverflow==0 ); - assert( apNew[iPg]->nCell==nNewCell ); - } - } - - /* All pages have been processed exactly once */ - assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 ); - - assert( nOld>0 ); - assert( nNew>0 ); - - if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){ - /* The root page of the b-tree now contains no cells. The only sibling - ** page is the right-child of the parent. Copy the contents of the - ** child page into the parent, decreasing the overall height of the - ** b-tree structure by one. This is described as the "balance-shallower" - ** sub-algorithm in some documentation. - ** - ** If this is an auto-vacuum database, the call to copyNodeContent() - ** sets all pointer-map entries corresponding to database image pages - ** for which the pointer is stored within the content being copied. - ** - ** It is critical that the child page be defragmented before being - ** copied into the parent, because if the parent is page 1 then it will - ** by smaller than the child due to the database header, and so all the - ** free space needs to be up front. - */ - assert( nNew==1 || CORRUPT_DB ); - rc = defragmentPage(apNew[0], -1); - testcase( rc!=SQLITE_OK ); - assert( apNew[0]->nFree == - (get2byteNotZero(&apNew[0]->aData[5]) - apNew[0]->cellOffset - - apNew[0]->nCell*2) - || rc!=SQLITE_OK - ); - copyNodeContent(apNew[0], pParent, &rc); - freePage(apNew[0], &rc); - }else if( ISAUTOVACUUM && !leafCorrection ){ - /* Fix the pointer map entries associated with the right-child of each - ** sibling page. All other pointer map entries have already been taken - ** care of. */ - for(i=0; iaData[8]); - ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc); - } - } - - assert( pParent->isInit ); - TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", - nOld, nNew, b.nCell)); - - /* Free any old pages that were not reused as new pages. - */ - for(i=nNew; iisInit ){ - /* The ptrmapCheckPages() contains assert() statements that verify that - ** all pointer map pages are set correctly. This is helpful while - ** debugging. This is usually disabled because a corrupt database may - ** cause an assert() statement to fail. */ - ptrmapCheckPages(apNew, nNew); - ptrmapCheckPages(&pParent, 1); - } -#endif - - /* - ** Cleanup before returning. - */ -balance_cleanup: - sqlite3StackFree(0, b.apCell); - for(i=0; ipBt; /* The BTree */ - - assert( pRoot->nOverflow>0 ); - assert( sqlite3_mutex_held(pBt->mutex) ); - - /* Make pRoot, the root page of the b-tree, writable. Allocate a new - ** page that will become the new right-child of pPage. Copy the contents - ** of the node stored on pRoot into the new child page. - */ - rc = sqlite3PagerWrite(pRoot->pDbPage); - if( rc==SQLITE_OK ){ - rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); - copyNodeContent(pRoot, pChild, &rc); - if( ISAUTOVACUUM ){ - ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); - } - } - if( rc ){ - *ppChild = 0; - releasePage(pChild); - return rc; - } - assert( sqlite3PagerIswriteable(pChild->pDbPage) ); - assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); - assert( pChild->nCell==pRoot->nCell || CORRUPT_DB ); - - TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno)); - - /* Copy the overflow cells from pRoot to pChild */ - memcpy(pChild->aiOvfl, pRoot->aiOvfl, - pRoot->nOverflow*sizeof(pRoot->aiOvfl[0])); - memcpy(pChild->apOvfl, pRoot->apOvfl, - pRoot->nOverflow*sizeof(pRoot->apOvfl[0])); - pChild->nOverflow = pRoot->nOverflow; - - /* Zero the contents of pRoot. Then install pChild as the right-child. */ - zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF); - put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild); - - *ppChild = pChild; - return SQLITE_OK; -} - -/* -** Return SQLITE_CORRUPT if any cursor other than pCur is currently valid -** on the same B-tree as pCur. -** -** This can occur if a database is corrupt with two or more SQL tables -** pointing to the same b-tree. If an insert occurs on one SQL table -** and causes a BEFORE TRIGGER to do a secondary insert on the other SQL -** table linked to the same b-tree. If the secondary insert causes a -** rebalance, that can change content out from under the cursor on the -** first SQL table, violating invariants on the first insert. -*/ -static int anotherValidCursor(BtCursor *pCur){ - BtCursor *pOther; - for(pOther=pCur->pBt->pCursor; pOther; pOther=pOther->pNext){ - if( pOther!=pCur - && pOther->eState==CURSOR_VALID - && pOther->pPage==pCur->pPage - ){ - return SQLITE_CORRUPT_BKPT; - } - } - return SQLITE_OK; -} - -/* -** The page that pCur currently points to has just been modified in -** some way. This function figures out if this modification means the -** tree needs to be balanced, and if so calls the appropriate balancing -** routine. Balancing routines are: -** -** balance_quick() -** balance_deeper() -** balance_nonroot() -*/ -static int balance(BtCursor *pCur){ - int rc = SQLITE_OK; - const int nMin = pCur->pBt->usableSize * 2 / 3; - u8 aBalanceQuickSpace[13]; - u8 *pFree = 0; - - VVA_ONLY( int balance_quick_called = 0 ); - VVA_ONLY( int balance_deeper_called = 0 ); - - do { - int iPage; - MemPage *pPage = pCur->pPage; - - if( NEVER(pPage->nFree<0) && btreeComputeFreeSpace(pPage) ) break; - if( pPage->nOverflow==0 && pPage->nFree<=nMin ){ - break; - }else if( (iPage = pCur->iPage)==0 ){ - if( pPage->nOverflow && (rc = anotherValidCursor(pCur))==SQLITE_OK ){ - /* The root page of the b-tree is overfull. In this case call the - ** balance_deeper() function to create a new child for the root-page - ** and copy the current contents of the root-page to it. The - ** next iteration of the do-loop will balance the child page. - */ - assert( balance_deeper_called==0 ); - VVA_ONLY( balance_deeper_called++ ); - rc = balance_deeper(pPage, &pCur->apPage[1]); - if( rc==SQLITE_OK ){ - pCur->iPage = 1; - pCur->ix = 0; - pCur->aiIdx[0] = 0; - pCur->apPage[0] = pPage; - pCur->pPage = pCur->apPage[1]; - assert( pCur->pPage->nOverflow ); - } - }else{ - break; - } - }else{ - MemPage * const pParent = pCur->apPage[iPage-1]; - int const iIdx = pCur->aiIdx[iPage-1]; - - rc = sqlite3PagerWrite(pParent->pDbPage); - if( rc==SQLITE_OK && pParent->nFree<0 ){ - rc = btreeComputeFreeSpace(pParent); - } - if( rc==SQLITE_OK ){ -#ifndef SQLITE_OMIT_QUICKBALANCE - if( pPage->intKeyLeaf - && pPage->nOverflow==1 - && pPage->aiOvfl[0]==pPage->nCell - && pParent->pgno!=1 - && pParent->nCell==iIdx - ){ - /* Call balance_quick() to create a new sibling of pPage on which - ** to store the overflow cell. balance_quick() inserts a new cell - ** into pParent, which may cause pParent overflow. If this - ** happens, the next iteration of the do-loop will balance pParent - ** use either balance_nonroot() or balance_deeper(). Until this - ** happens, the overflow cell is stored in the aBalanceQuickSpace[] - ** buffer. - ** - ** The purpose of the following assert() is to check that only a - ** single call to balance_quick() is made for each call to this - ** function. If this were not verified, a subtle bug involving reuse - ** of the aBalanceQuickSpace[] might sneak in. - */ - assert( balance_quick_called==0 ); - VVA_ONLY( balance_quick_called++ ); - rc = balance_quick(pParent, pPage, aBalanceQuickSpace); - }else -#endif - { - /* In this case, call balance_nonroot() to redistribute cells - ** between pPage and up to 2 of its sibling pages. This involves - ** modifying the contents of pParent, which may cause pParent to - ** become overfull or underfull. The next iteration of the do-loop - ** will balance the parent page to correct this. - ** - ** If the parent page becomes overfull, the overflow cell or cells - ** are stored in the pSpace buffer allocated immediately below. - ** A subsequent iteration of the do-loop will deal with this by - ** calling balance_nonroot() (balance_deeper() may be called first, - ** but it doesn't deal with overflow cells - just moves them to a - ** different page). Once this subsequent call to balance_nonroot() - ** has completed, it is safe to release the pSpace buffer used by - ** the previous call, as the overflow cell data will have been - ** copied either into the body of a database page or into the new - ** pSpace buffer passed to the latter call to balance_nonroot(). - */ - u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); - rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, - pCur->hints&BTREE_BULKLOAD); - if( pFree ){ - /* If pFree is not NULL, it points to the pSpace buffer used - ** by a previous call to balance_nonroot(). Its contents are - ** now stored either on real database pages or within the - ** new pSpace buffer, so it may be safely freed here. */ - sqlite3PageFree(pFree); - } - - /* The pSpace buffer will be freed after the next call to - ** balance_nonroot(), or just before this function returns, whichever - ** comes first. */ - pFree = pSpace; - } - } - - pPage->nOverflow = 0; - - /* The next iteration of the do-loop balances the parent page. */ - releasePage(pPage); - pCur->iPage--; - assert( pCur->iPage>=0 ); - pCur->pPage = pCur->apPage[pCur->iPage]; - } - }while( rc==SQLITE_OK ); - - if( pFree ){ - sqlite3PageFree(pFree); - } - return rc; -} - -/* Overwrite content from pX into pDest. Only do the write if the -** content is different from what is already there. -*/ -static int btreeOverwriteContent( - MemPage *pPage, /* MemPage on which writing will occur */ - u8 *pDest, /* Pointer to the place to start writing */ - const BtreePayload *pX, /* Source of data to write */ - int iOffset, /* Offset of first byte to write */ - int iAmt /* Number of bytes to be written */ -){ - int nData = pX->nData - iOffset; - if( nData<=0 ){ - /* Overwritting with zeros */ - int i; - for(i=0; ipDbPage); - if( rc ) return rc; - memset(pDest + i, 0, iAmt - i); - } - }else{ - if( nDatapData) + iOffset, iAmt)!=0 ){ - int rc = sqlite3PagerWrite(pPage->pDbPage); - if( rc ) return rc; - /* In a corrupt database, it is possible for the source and destination - ** buffers to overlap. This is harmless since the database is already - ** corrupt but it does cause valgrind and ASAN warnings. So use - ** memmove(). */ - memmove(pDest, ((u8*)pX->pData) + iOffset, iAmt); - } - } - return SQLITE_OK; -} - -/* -** Overwrite the cell that cursor pCur is pointing to with fresh content -** contained in pX. -*/ -static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){ - int iOffset; /* Next byte of pX->pData to write */ - int nTotal = pX->nData + pX->nZero; /* Total bytes of to write */ - int rc; /* Return code */ - MemPage *pPage = pCur->pPage; /* Page being written */ - BtShared *pBt; /* Btree */ - Pgno ovflPgno; /* Next overflow page to write */ - u32 ovflPageSize; /* Size to write on overflow page */ - - if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd - || pCur->info.pPayload < pPage->aData + pPage->cellOffset - ){ - return SQLITE_CORRUPT_BKPT; - } - /* Overwrite the local portion first */ - rc = btreeOverwriteContent(pPage, pCur->info.pPayload, pX, - 0, pCur->info.nLocal); - if( rc ) return rc; - if( pCur->info.nLocal==nTotal ) return SQLITE_OK; - - /* Now overwrite the overflow pages */ - iOffset = pCur->info.nLocal; - assert( nTotal>=0 ); - assert( iOffset>=0 ); - ovflPgno = get4byte(pCur->info.pPayload + iOffset); - pBt = pPage->pBt; - ovflPageSize = pBt->usableSize - 4; - do{ - rc = btreeGetPage(pBt, ovflPgno, &pPage, 0); - if( rc ) return rc; - if( sqlite3PagerPageRefcount(pPage->pDbPage)!=1 || pPage->isInit ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - if( iOffset+ovflPageSize<(u32)nTotal ){ - ovflPgno = get4byte(pPage->aData); - }else{ - ovflPageSize = nTotal - iOffset; - } - rc = btreeOverwriteContent(pPage, pPage->aData+4, pX, - iOffset, ovflPageSize); - } - sqlite3PagerUnref(pPage->pDbPage); - if( rc ) return rc; - iOffset += ovflPageSize; - }while( iOffset0 then pCur points to a cell -** that is larger than (pKey,nKey). -** -** If seekResult==0, that means pCur is pointing at some unknown location. -** In that case, this routine must seek the cursor to the correct insertion -** point for (pKey,nKey) before doing the insertion. For index btrees, -** if pX->nMem is non-zero, then pX->aMem contains pointers to the unpacked -** key values and pX->aMem can be used instead of pX->pKey to avoid having -** to decode the key. -*/ -int sqlite3BtreeInsert( - BtCursor *pCur, /* Insert data into the table of this cursor */ - const BtreePayload *pX, /* Content of the row to be inserted */ - int flags, /* True if this is likely an append */ - int seekResult /* Result of prior MovetoUnpacked() call */ -){ - int rc; - int loc = seekResult; /* -1: before desired location +1: after */ - int szNew = 0; - int idx; - MemPage *pPage; - Btree *p = pCur->pBtree; - BtShared *pBt = p->pBt; - unsigned char *oldCell; - unsigned char *newCell = 0; - - assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND|BTREE_PREFORMAT))==flags ); - assert( (flags & BTREE_PREFORMAT)==0 || seekResult || pCur->pKeyInfo==0 ); - - if( pCur->eState==CURSOR_FAULT ){ - assert( pCur->skipNext!=SQLITE_OK ); - return pCur->skipNext; - } - - assert( cursorOwnsBtShared(pCur) ); - assert( (pCur->curFlags & BTCF_WriteFlag)!=0 - && pBt->inTransaction==TRANS_WRITE - && (pBt->btsFlags & BTS_READ_ONLY)==0 ); - assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); - - /* Assert that the caller has been consistent. If this cursor was opened - ** expecting an index b-tree, then the caller should be inserting blob - ** keys with no associated data. If the cursor was opened expecting an - ** intkey table, the caller should be inserting integer keys with a - ** blob of associated data. */ - assert( (flags & BTREE_PREFORMAT) || (pX->pKey==0)==(pCur->pKeyInfo==0) ); - - /* Save the positions of any other cursors open on this table. - ** - ** In some cases, the call to btreeMoveto() below is a no-op. For - ** example, when inserting data into a table with auto-generated integer - ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the - ** integer key to use. It then calls this function to actually insert the - ** data into the intkey B-Tree. In this case btreeMoveto() recognizes - ** that the cursor is already where it needs to be and returns without - ** doing any work. To avoid thwarting these optimizations, it is important - ** not to clear the cursor here. - */ - if( pCur->curFlags & BTCF_Multiple ){ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; - if( loc && pCur->iPage<0 ){ - /* This can only happen if the schema is corrupt such that there is more - ** than one table or index with the same root page as used by the cursor. - ** Which can only happen if the SQLITE_NoSchemaError flag was set when - ** the schema was loaded. This cannot be asserted though, as a user might - ** set the flag, load the schema, and then unset the flag. */ - return SQLITE_CORRUPT_BKPT; - } - } - - if( pCur->pKeyInfo==0 ){ - assert( pX->pKey==0 ); - /* If this is an insert into a table b-tree, invalidate any incrblob - ** cursors open on the row being replaced */ - if( p->hasIncrblobCur ){ - invalidateIncrblobCursors(p, pCur->pgnoRoot, pX->nKey, 0); - } - - /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing - ** to a row with the same key as the new entry being inserted. - */ -#ifdef SQLITE_DEBUG - if( flags & BTREE_SAVEPOSITION ){ - assert( pCur->curFlags & BTCF_ValidNKey ); - assert( pX->nKey==pCur->info.nKey ); - assert( loc==0 ); - } -#endif - - /* On the other hand, BTREE_SAVEPOSITION==0 does not imply - ** that the cursor is not pointing to a row to be overwritten. - ** So do a complete check. - */ - if( (pCur->curFlags&BTCF_ValidNKey)!=0 && pX->nKey==pCur->info.nKey ){ - /* The cursor is pointing to the entry that is to be - ** overwritten */ - assert( pX->nData>=0 && pX->nZero>=0 ); - if( pCur->info.nSize!=0 - && pCur->info.nPayload==(u32)pX->nData+pX->nZero - ){ - /* New entry is the same size as the old. Do an overwrite */ - return btreeOverwriteCell(pCur, pX); - } - assert( loc==0 ); - }else if( loc==0 ){ - /* The cursor is *not* pointing to the cell to be overwritten, nor - ** to an adjacent cell. Move the cursor so that it is pointing either - ** to the cell to be overwritten or an adjacent cell. - */ - rc = sqlite3BtreeTableMoveto(pCur, pX->nKey, - (flags & BTREE_APPEND)!=0, &loc); - if( rc ) return rc; - } - }else{ - /* This is an index or a WITHOUT ROWID table */ - - /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing - ** to a row with the same key as the new entry being inserted. - */ - assert( (flags & BTREE_SAVEPOSITION)==0 || loc==0 ); - - /* If the cursor is not already pointing either to the cell to be - ** overwritten, or if a new cell is being inserted, if the cursor is - ** not pointing to an immediately adjacent cell, then move the cursor - ** so that it does. - */ - if( loc==0 && (flags & BTREE_SAVEPOSITION)==0 ){ - if( pX->nMem ){ - UnpackedRecord r; - r.pKeyInfo = pCur->pKeyInfo; - r.aMem = pX->aMem; - r.nField = pX->nMem; - r.default_rc = 0; - r.eqSeen = 0; - rc = sqlite3BtreeIndexMoveto(pCur, &r, &loc); - }else{ - rc = btreeMoveto(pCur, pX->pKey, pX->nKey, - (flags & BTREE_APPEND)!=0, &loc); - } - if( rc ) return rc; - } - - /* If the cursor is currently pointing to an entry to be overwritten - ** and the new content is the same as as the old, then use the - ** overwrite optimization. - */ - if( loc==0 ){ - getCellInfo(pCur); - if( pCur->info.nKey==pX->nKey ){ - BtreePayload x2; - x2.pData = pX->pKey; - x2.nData = pX->nKey; - x2.nZero = 0; - return btreeOverwriteCell(pCur, &x2); - } - } - } - assert( pCur->eState==CURSOR_VALID - || (pCur->eState==CURSOR_INVALID && loc) - || CORRUPT_DB ); - - pPage = pCur->pPage; - assert( pPage->intKey || pX->nKey>=0 || (flags & BTREE_PREFORMAT) ); - assert( pPage->leaf || !pPage->intKey ); - if( pPage->nFree<0 ){ - if( NEVER(pCur->eState>CURSOR_INVALID) ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = btreeComputeFreeSpace(pPage); - } - if( rc ) return rc; - } - - TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", - pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno, - loc==0 ? "overwrite" : "new entry")); - assert( pPage->isInit ); - newCell = pBt->pTmpSpace; - assert( newCell!=0 ); - if( flags & BTREE_PREFORMAT ){ - rc = SQLITE_OK; - szNew = pBt->nPreformatSize; - if( szNew<4 ) szNew = 4; - if( ISAUTOVACUUM && szNew>pPage->maxLocal ){ - CellInfo info; - pPage->xParseCell(pPage, newCell, &info); - if( info.nPayload!=info.nLocal ){ - Pgno ovfl = get4byte(&newCell[szNew-4]); - ptrmapPut(pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc); - } - } - }else{ - rc = fillInCell(pPage, newCell, pX, &szNew); - } - if( rc ) goto end_insert; - assert( szNew==pPage->xCellSize(pPage, newCell) ); - assert( szNew <= MX_CELL_SIZE(pBt) ); - idx = pCur->ix; - if( loc==0 ){ - CellInfo info; - assert( idx>=0 ); - if( idx>=pPage->nCell ){ - return SQLITE_CORRUPT_BKPT; - } - rc = sqlite3PagerWrite(pPage->pDbPage); - if( rc ){ - goto end_insert; - } - oldCell = findCell(pPage, idx); - if( !pPage->leaf ){ - memcpy(newCell, oldCell, 4); - } - BTREE_CLEAR_CELL(rc, pPage, oldCell, info); - testcase( pCur->curFlags & BTCF_ValidOvfl ); - invalidateOverflowCache(pCur); - if( info.nSize==szNew && info.nLocal==info.nPayload - && (!ISAUTOVACUUM || szNewminLocal) - ){ - /* Overwrite the old cell with the new if they are the same size. - ** We could also try to do this if the old cell is smaller, then add - ** the leftover space to the free list. But experiments show that - ** doing that is no faster then skipping this optimization and just - ** calling dropCell() and insertCell(). - ** - ** This optimization cannot be used on an autovacuum database if the - ** new entry uses overflow pages, as the insertCell() call below is - ** necessary to add the PTRMAP_OVERFLOW1 pointer-map entry. */ - assert( rc==SQLITE_OK ); /* clearCell never fails when nLocal==nPayload */ - if( oldCell < pPage->aData+pPage->hdrOffset+10 ){ - return SQLITE_CORRUPT_BKPT; - } - if( oldCell+szNew > pPage->aDataEnd ){ - return SQLITE_CORRUPT_BKPT; - } - memcpy(oldCell, newCell, szNew); - return SQLITE_OK; - } - dropCell(pPage, idx, info.nSize, &rc); - if( rc ) goto end_insert; - }else if( loc<0 && pPage->nCell>0 ){ - assert( pPage->leaf ); - idx = ++pCur->ix; - pCur->curFlags &= ~BTCF_ValidNKey; - }else{ - assert( pPage->leaf ); - } - insertCell(pPage, idx, newCell, szNew, 0, 0, &rc); - assert( pPage->nOverflow==0 || rc==SQLITE_OK ); - assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 ); - - /* If no error has occurred and pPage has an overflow cell, call balance() - ** to redistribute the cells within the tree. Since balance() may move - ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey - ** variables. - ** - ** Previous versions of SQLite called moveToRoot() to move the cursor - ** back to the root page as balance() used to invalidate the contents - ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that, - ** set the cursor state to "invalid". This makes common insert operations - ** slightly faster. - ** - ** There is a subtle but important optimization here too. When inserting - ** multiple records into an intkey b-tree using a single cursor (as can - ** happen while processing an "INSERT INTO ... SELECT" statement), it - ** is advantageous to leave the cursor pointing to the last entry in - ** the b-tree if possible. If the cursor is left pointing to the last - ** entry in the table, and the next row inserted has an integer key - ** larger than the largest existing key, it is possible to insert the - ** row without seeking the cursor. This can be a big performance boost. - */ - pCur->info.nSize = 0; - if( pPage->nOverflow ){ - assert( rc==SQLITE_OK ); - pCur->curFlags &= ~(BTCF_ValidNKey); - rc = balance(pCur); - - /* Must make sure nOverflow is reset to zero even if the balance() - ** fails. Internal data structure corruption will result otherwise. - ** Also, set the cursor state to invalid. This stops saveCursorPosition() - ** from trying to save the current position of the cursor. */ - pCur->pPage->nOverflow = 0; - pCur->eState = CURSOR_INVALID; - if( (flags & BTREE_SAVEPOSITION) && rc==SQLITE_OK ){ - btreeReleaseAllCursorPages(pCur); - if( pCur->pKeyInfo ){ - assert( pCur->pKey==0 ); - pCur->pKey = sqlite3Malloc( pX->nKey ); - if( pCur->pKey==0 ){ - rc = SQLITE_NOMEM; - }else{ - memcpy(pCur->pKey, pX->pKey, pX->nKey); - } - } - pCur->eState = CURSOR_REQUIRESEEK; - pCur->nKey = pX->nKey; - } - } - assert( pCur->iPage<0 || pCur->pPage->nOverflow==0 ); - -end_insert: - return rc; -} - -/* -** This function is used as part of copying the current row from cursor -** pSrc into cursor pDest. If the cursors are open on intkey tables, then -** parameter iKey is used as the rowid value when the record is copied -** into pDest. Otherwise, the record is copied verbatim. -** -** This function does not actually write the new value to cursor pDest. -** Instead, it creates and populates any required overflow pages and -** writes the data for the new cell into the BtShared.pTmpSpace buffer -** for the destination database. The size of the cell, in bytes, is left -** in BtShared.nPreformatSize. The caller completes the insertion by -** calling sqlite3BtreeInsert() with the BTREE_PREFORMAT flag specified. -** -** SQLITE_OK is returned if successful, or an SQLite error code otherwise. -*/ -int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64 iKey){ - int rc = SQLITE_OK; - BtShared *pBt = pDest->pBt; - u8 *aOut = pBt->pTmpSpace; /* Pointer to next output buffer */ - const u8 *aIn; /* Pointer to next input buffer */ - u32 nIn; /* Size of input buffer aIn[] */ - u32 nRem; /* Bytes of data still to copy */ - - getCellInfo(pSrc); - aOut += putVarint32(aOut, pSrc->info.nPayload); - if( pDest->pKeyInfo==0 ) aOut += putVarint(aOut, iKey); - nIn = pSrc->info.nLocal; - aIn = pSrc->info.pPayload; - if( aIn+nIn>pSrc->pPage->aDataEnd ){ - return SQLITE_CORRUPT_BKPT; - } - nRem = pSrc->info.nPayload; - if( nIn==nRem && nInpPage->maxLocal ){ - memcpy(aOut, aIn, nIn); - pBt->nPreformatSize = nIn + (aOut - pBt->pTmpSpace); - }else{ - Pager *pSrcPager = pSrc->pBt->pPager; - u8 *pPgnoOut = 0; - Pgno ovflIn = 0; - DbPage *pPageIn = 0; - MemPage *pPageOut = 0; - u32 nOut; /* Size of output buffer aOut[] */ - - nOut = btreePayloadToLocal(pDest->pPage, pSrc->info.nPayload); - pBt->nPreformatSize = nOut + (aOut - pBt->pTmpSpace); - if( nOutinfo.nPayload ){ - pPgnoOut = &aOut[nOut]; - pBt->nPreformatSize += 4; - } - - if( nRem>nIn ){ - if( aIn+nIn+4>pSrc->pPage->aDataEnd ){ - return SQLITE_CORRUPT_BKPT; - } - ovflIn = get4byte(&pSrc->info.pPayload[nIn]); - } - - do { - nRem -= nOut; - do{ - assert( nOut>0 ); - if( nIn>0 ){ - int nCopy = MIN(nOut, nIn); - memcpy(aOut, aIn, nCopy); - nOut -= nCopy; - nIn -= nCopy; - aOut += nCopy; - aIn += nCopy; - } - if( nOut>0 ){ - sqlite3PagerUnref(pPageIn); - pPageIn = 0; - rc = sqlite3PagerGet(pSrcPager, ovflIn, &pPageIn, PAGER_GET_READONLY); - if( rc==SQLITE_OK ){ - aIn = (const u8*)sqlite3PagerGetData(pPageIn); - ovflIn = get4byte(aIn); - aIn += 4; - nIn = pSrc->pBt->usableSize - 4; - } - } - }while( rc==SQLITE_OK && nOut>0 ); - - if( rc==SQLITE_OK && nRem>0 && ALWAYS(pPgnoOut) ){ - Pgno pgnoNew; - MemPage *pNew = 0; - rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); - put4byte(pPgnoOut, pgnoNew); - if( ISAUTOVACUUM && pPageOut ){ - ptrmapPut(pBt, pgnoNew, PTRMAP_OVERFLOW2, pPageOut->pgno, &rc); - } - releasePage(pPageOut); - pPageOut = pNew; - if( pPageOut ){ - pPgnoOut = pPageOut->aData; - put4byte(pPgnoOut, 0); - aOut = &pPgnoOut[4]; - nOut = MIN(pBt->usableSize - 4, nRem); - } - } - }while( nRem>0 && rc==SQLITE_OK ); - - releasePage(pPageOut); - sqlite3PagerUnref(pPageIn); - } - - return rc; -} - -/* -** Delete the entry that the cursor is pointing to. -** -** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then -** the cursor is left pointing at an arbitrary location after the delete. -** But if that bit is set, then the cursor is left in a state such that -** the next call to BtreeNext() or BtreePrev() moves it to the same row -** as it would have been on if the call to BtreeDelete() had been omitted. -** -** The BTREE_AUXDELETE bit of flags indicates that is one of several deletes -** associated with a single table entry and its indexes. Only one of those -** deletes is considered the "primary" delete. The primary delete occurs -** on a cursor that is not a BTREE_FORDELETE cursor. All but one delete -** operation on non-FORDELETE cursors is tagged with the AUXDELETE flag. -** The BTREE_AUXDELETE bit is a hint that is not used by this implementation, -** but which might be used by alternative storage engines. -*/ -int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){ - Btree *p = pCur->pBtree; - BtShared *pBt = p->pBt; - int rc; /* Return code */ - MemPage *pPage; /* Page to delete cell from */ - unsigned char *pCell; /* Pointer to cell to delete */ - int iCellIdx; /* Index of cell to delete */ - int iCellDepth; /* Depth of node containing pCell */ - CellInfo info; /* Size of the cell being deleted */ - int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */ - u8 bPreserve = flags & BTREE_SAVEPOSITION; /* Keep cursor valid */ - - assert( cursorOwnsBtShared(pCur) ); - assert( pBt->inTransaction==TRANS_WRITE ); - assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); - assert( pCur->curFlags & BTCF_WriteFlag ); - assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); - assert( !hasReadConflicts(p, pCur->pgnoRoot) ); - assert( (flags & ~(BTREE_SAVEPOSITION | BTREE_AUXDELETE))==0 ); - if( pCur->eState==CURSOR_REQUIRESEEK ){ - rc = btreeRestoreCursorPosition(pCur); - assert( rc!=SQLITE_OK || CORRUPT_DB || pCur->eState==CURSOR_VALID ); - if( rc || pCur->eState!=CURSOR_VALID ) return rc; - } - assert( CORRUPT_DB || pCur->eState==CURSOR_VALID ); - - iCellDepth = pCur->iPage; - iCellIdx = pCur->ix; - pPage = pCur->pPage; - pCell = findCell(pPage, iCellIdx); - if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ) return SQLITE_CORRUPT; - - /* If the bPreserve flag is set to true, then the cursor position must - ** be preserved following this delete operation. If the current delete - ** will cause a b-tree rebalance, then this is done by saving the cursor - ** key and leaving the cursor in CURSOR_REQUIRESEEK state before - ** returning. - ** - ** Or, if the current delete will not cause a rebalance, then the cursor - ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately - ** before or after the deleted entry. In this case set bSkipnext to true. */ - if( bPreserve ){ - if( !pPage->leaf - || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3) - || pPage->nCell==1 /* See dbfuzz001.test for a test case */ - ){ - /* A b-tree rebalance will be required after deleting this entry. - ** Save the cursor key. */ - rc = saveCursorKey(pCur); - if( rc ) return rc; - }else{ - bSkipnext = 1; - } - } - - /* If the page containing the entry to delete is not a leaf page, move - ** the cursor to the largest entry in the tree that is smaller than - ** the entry being deleted. This cell will replace the cell being deleted - ** from the internal node. The 'previous' entry is used for this instead - ** of the 'next' entry, as the previous entry is always a part of the - ** sub-tree headed by the child page of the cell being deleted. This makes - ** balancing the tree following the delete operation easier. */ - if( !pPage->leaf ){ - rc = sqlite3BtreePrevious(pCur, 0); - assert( rc!=SQLITE_DONE ); - if( rc ) return rc; - } - - /* Save the positions of any other cursors open on this table before - ** making any modifications. */ - if( pCur->curFlags & BTCF_Multiple ){ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; - } - - /* If this is a delete operation to remove a row from a table b-tree, - ** invalidate any incrblob cursors open on the row being deleted. */ - if( pCur->pKeyInfo==0 && p->hasIncrblobCur ){ - invalidateIncrblobCursors(p, pCur->pgnoRoot, pCur->info.nKey, 0); - } - - /* Make the page containing the entry to be deleted writable. Then free any - ** overflow pages associated with the entry and finally remove the cell - ** itself from within the page. */ - rc = sqlite3PagerWrite(pPage->pDbPage); - if( rc ) return rc; - BTREE_CLEAR_CELL(rc, pPage, pCell, info); - dropCell(pPage, iCellIdx, info.nSize, &rc); - if( rc ) return rc; - - /* If the cell deleted was not located on a leaf page, then the cursor - ** is currently pointing to the largest entry in the sub-tree headed - ** by the child-page of the cell that was just deleted from an internal - ** node. The cell from the leaf node needs to be moved to the internal - ** node to replace the deleted cell. */ - if( !pPage->leaf ){ - MemPage *pLeaf = pCur->pPage; - int nCell; - Pgno n; - unsigned char *pTmp; - - if( pLeaf->nFree<0 ){ - rc = btreeComputeFreeSpace(pLeaf); - if( rc ) return rc; - } - if( iCellDepthiPage-1 ){ - n = pCur->apPage[iCellDepth+1]->pgno; - }else{ - n = pCur->pPage->pgno; - } - pCell = findCell(pLeaf, pLeaf->nCell-1); - if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; - nCell = pLeaf->xCellSize(pLeaf, pCell); - assert( MX_CELL_SIZE(pBt) >= nCell ); - pTmp = pBt->pTmpSpace; - assert( pTmp!=0 ); - rc = sqlite3PagerWrite(pLeaf->pDbPage); - if( rc==SQLITE_OK ){ - insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); - } - dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); - if( rc ) return rc; - } - - /* Balance the tree. If the entry deleted was located on a leaf page, - ** then the cursor still points to that page. In this case the first - ** call to balance() repairs the tree, and the if(...) condition is - ** never true. - ** - ** Otherwise, if the entry deleted was on an internal node page, then - ** pCur is pointing to the leaf page from which a cell was removed to - ** replace the cell deleted from the internal node. This is slightly - ** tricky as the leaf node may be underfull, and the internal node may - ** be either under or overfull. In this case run the balancing algorithm - ** on the leaf node first. If the balance proceeds far enough up the - ** tree that we can be sure that any problem in the internal node has - ** been corrected, so be it. Otherwise, after balancing the leaf node, - ** walk the cursor up the tree to the internal node and balance it as - ** well. */ - rc = balance(pCur); - if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){ - releasePageNotNull(pCur->pPage); - pCur->iPage--; - while( pCur->iPage>iCellDepth ){ - releasePage(pCur->apPage[pCur->iPage--]); - } - pCur->pPage = pCur->apPage[pCur->iPage]; - rc = balance(pCur); - } - - if( rc==SQLITE_OK ){ - if( bSkipnext ){ - assert( bPreserve && (pCur->iPage==iCellDepth || CORRUPT_DB) ); - assert( pPage==pCur->pPage || CORRUPT_DB ); - assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell ); - pCur->eState = CURSOR_SKIPNEXT; - if( iCellIdx>=pPage->nCell ){ - pCur->skipNext = -1; - pCur->ix = pPage->nCell-1; - }else{ - pCur->skipNext = 1; - } - }else{ - rc = moveToRoot(pCur); - if( bPreserve ){ - btreeReleaseAllCursorPages(pCur); - pCur->eState = CURSOR_REQUIRESEEK; - } - if( rc==SQLITE_EMPTY ) rc = SQLITE_OK; - } - } - return rc; -} - -/* -** Create a new BTree table. Write into *piTable the page -** number for the root page of the new table. -** -** The type of type is determined by the flags parameter. Only the -** following values of flags are currently in use. Other values for -** flags might not work: -** -** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys -** BTREE_ZERODATA Used for SQL indices -*/ -static int btreeCreateTable(Btree *p, Pgno *piTable, int createTabFlags){ - BtShared *pBt = p->pBt; - MemPage *pRoot; - Pgno pgnoRoot; - int rc; - int ptfFlags; /* Page-type flage for the root page of new table */ - - assert( sqlite3BtreeHoldsMutex(p) ); - assert( pBt->inTransaction==TRANS_WRITE ); - assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); - -#ifdef SQLITE_OMIT_AUTOVACUUM - rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); - if( rc ){ - return rc; - } -#else - if( pBt->autoVacuum ){ - Pgno pgnoMove; /* Move a page here to make room for the root-page */ - MemPage *pPageMove; /* The page to move to. */ - - /* Creating a new table may probably require moving an existing database - ** to make room for the new tables root page. In case this page turns - ** out to be an overflow page, delete all overflow page-map caches - ** held by open cursors. - */ - invalidateAllOverflowCache(pBt); - - /* Read the value of meta[3] from the database to determine where the - ** root page of the new table should go. meta[3] is the largest root-page - ** created so far, so the new root-page is (meta[3]+1). - */ - sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot); - if( pgnoRoot>btreePagecount(pBt) ){ - return SQLITE_CORRUPT_BKPT; - } - pgnoRoot++; - - /* The new root-page may not be allocated on a pointer-map page, or the - ** PENDING_BYTE page. - */ - while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || - pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ - pgnoRoot++; - } - assert( pgnoRoot>=3 ); - - /* Allocate a page. The page that currently resides at pgnoRoot will - ** be moved to the allocated page (unless the allocated page happens - ** to reside at pgnoRoot). - */ - rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); - if( rc!=SQLITE_OK ){ - return rc; - } - - if( pgnoMove!=pgnoRoot ){ - /* pgnoRoot is the page that will be used for the root-page of - ** the new table (assuming an error did not occur). But we were - ** allocated pgnoMove. If required (i.e. if it was not allocated - ** by extending the file), the current page at position pgnoMove - ** is already journaled. - */ - u8 eType = 0; - Pgno iPtrPage = 0; - - /* Save the positions of any open cursors. This is required in - ** case they are holding a reference to an xFetch reference - ** corresponding to page pgnoRoot. */ - rc = saveAllCursors(pBt, 0, 0); - releasePage(pPageMove); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Move the page currently at pgnoRoot to pgnoMove. */ - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); - if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ - rc = SQLITE_CORRUPT_BKPT; - } - if( rc!=SQLITE_OK ){ - releasePage(pRoot); - return rc; - } - assert( eType!=PTRMAP_ROOTPAGE ); - assert( eType!=PTRMAP_FREEPAGE ); - rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0); - releasePage(pRoot); - - /* Obtain the page at pgnoRoot */ - if( rc!=SQLITE_OK ){ - return rc; - } - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - rc = sqlite3PagerWrite(pRoot->pDbPage); - if( rc!=SQLITE_OK ){ - releasePage(pRoot); - return rc; - } - }else{ - pRoot = pPageMove; - } - - /* Update the pointer-map and meta-data with the new root-page number. */ - ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc); - if( rc ){ - releasePage(pRoot); - return rc; - } - - /* When the new root page was allocated, page 1 was made writable in - ** order either to increase the database filesize, or to decrement the - ** freelist count. Hence, the sqlite3BtreeUpdateMeta() call cannot fail. - */ - assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) ); - rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot); - if( NEVER(rc) ){ - releasePage(pRoot); - return rc; - } - - }else{ - rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); - if( rc ) return rc; - } -#endif - assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); - if( createTabFlags & BTREE_INTKEY ){ - ptfFlags = PTF_INTKEY | PTF_LEAFDATA | PTF_LEAF; - }else{ - ptfFlags = PTF_ZERODATA | PTF_LEAF; - } - zeroPage(pRoot, ptfFlags); - sqlite3PagerUnref(pRoot->pDbPage); - assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 ); - *piTable = pgnoRoot; - return SQLITE_OK; -} -int sqlite3BtreeCreateTable(Btree *p, Pgno *piTable, int flags){ - int rc; - sqlite3BtreeEnter(p); - rc = btreeCreateTable(p, piTable, flags); - sqlite3BtreeLeave(p); - return rc; -} - -/* -** Erase the given database page and all its children. Return -** the page to the freelist. -*/ -static int clearDatabasePage( - BtShared *pBt, /* The BTree that contains the table */ - Pgno pgno, /* Page number to clear */ - int freePageFlag, /* Deallocate page if true */ - i64 *pnChange /* Add number of Cells freed to this counter */ -){ - MemPage *pPage; - int rc; - unsigned char *pCell; - int i; - int hdr; - CellInfo info; - - assert( sqlite3_mutex_held(pBt->mutex) ); - if( pgno>btreePagecount(pBt) ){ - return SQLITE_CORRUPT_BKPT; - } - rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); - if( rc ) return rc; - if( (pBt->openFlags & BTREE_SINGLE)==0 - && sqlite3PagerPageRefcount(pPage->pDbPage)!=1 - ){ - rc = SQLITE_CORRUPT_BKPT; - goto cleardatabasepage_out; - } - hdr = pPage->hdrOffset; - for(i=0; inCell; i++){ - pCell = findCell(pPage, i); - if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); - if( rc ) goto cleardatabasepage_out; - } - BTREE_CLEAR_CELL(rc, pPage, pCell, info); - if( rc ) goto cleardatabasepage_out; - } - if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); - if( rc ) goto cleardatabasepage_out; - if( pPage->intKey ) pnChange = 0; - } - if( pnChange ){ - testcase( !pPage->intKey ); - *pnChange += pPage->nCell; - } - if( freePageFlag ){ - freePage(pPage, &rc); - }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ - zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); - } - -cleardatabasepage_out: - releasePage(pPage); - return rc; -} - -/* -** Delete all information from a single table in the database. iTable is -** the page number of the root of the table. After this routine returns, -** the root page is empty, but still exists. -** -** This routine will fail with SQLITE_LOCKED if there are any open -** read cursors on the table. Open write cursors are moved to the -** root of the table. -** -** If pnChange is not NULL, then the integer value pointed to by pnChange -** is incremented by the number of entries in the table. -*/ -int sqlite3BtreeClearTable(Btree *p, int iTable, i64 *pnChange){ - int rc; - BtShared *pBt = p->pBt; - sqlite3BtreeEnter(p); - assert( p->inTrans==TRANS_WRITE ); - - rc = saveAllCursors(pBt, (Pgno)iTable, 0); - - if( SQLITE_OK==rc ){ - /* Invalidate all incrblob cursors open on table iTable (assuming iTable - ** is the root of a table b-tree - if it is not, the following call is - ** a no-op). */ - if( p->hasIncrblobCur ){ - invalidateIncrblobCursors(p, (Pgno)iTable, 0, 1); - } - rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); - } - sqlite3BtreeLeave(p); - return rc; -} - -/* -** Delete all information from the single table that pCur is open on. -** -** This routine only work for pCur on an ephemeral table. -*/ -int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){ - return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0); -} - -/* -** Erase all information in a table and add the root of the table to -** the freelist. Except, the root of the principle table (the one on -** page 1) is never added to the freelist. -** -** This routine will fail with SQLITE_LOCKED if there are any open -** cursors on the table. -** -** If AUTOVACUUM is enabled and the page at iTable is not the last -** root page in the database file, then the last root page -** in the database file is moved into the slot formerly occupied by -** iTable and that last slot formerly occupied by the last root page -** is added to the freelist instead of iTable. In this say, all -** root pages are kept at the beginning of the database file, which -** is necessary for AUTOVACUUM to work right. *piMoved is set to the -** page number that used to be the last root page in the file before -** the move. If no page gets moved, *piMoved is set to 0. -** The last root page is recorded in meta[3] and the value of -** meta[3] is updated by this procedure. -*/ -static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ - int rc; - MemPage *pPage = 0; - BtShared *pBt = p->pBt; - - assert( sqlite3BtreeHoldsMutex(p) ); - assert( p->inTrans==TRANS_WRITE ); - assert( iTable>=2 ); - if( iTable>btreePagecount(pBt) ){ - return SQLITE_CORRUPT_BKPT; - } - - rc = sqlite3BtreeClearTable(p, iTable, 0); - if( rc ) return rc; - rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); - if( NEVER(rc) ){ - releasePage(pPage); - return rc; - } - - *piMoved = 0; - -#ifdef SQLITE_OMIT_AUTOVACUUM - freePage(pPage, &rc); - releasePage(pPage); -#else - if( pBt->autoVacuum ){ - Pgno maxRootPgno; - sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno); - - if( iTable==maxRootPgno ){ - /* If the table being dropped is the table with the largest root-page - ** number in the database, put the root page on the free list. - */ - freePage(pPage, &rc); - releasePage(pPage); - if( rc!=SQLITE_OK ){ - return rc; - } - }else{ - /* The table being dropped does not have the largest root-page - ** number in the database. So move the page that does into the - ** gap left by the deleted root-page. - */ - MemPage *pMove; - releasePage(pPage); - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0); - releasePage(pMove); - if( rc!=SQLITE_OK ){ - return rc; - } - pMove = 0; - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); - freePage(pMove, &rc); - releasePage(pMove); - if( rc!=SQLITE_OK ){ - return rc; - } - *piMoved = maxRootPgno; - } - - /* Set the new 'max-root-page' value in the database header. This - ** is the old value less one, less one more if that happens to - ** be a root-page number, less one again if that is the - ** PENDING_BYTE_PAGE. - */ - maxRootPgno--; - while( maxRootPgno==PENDING_BYTE_PAGE(pBt) - || PTRMAP_ISPAGE(pBt, maxRootPgno) ){ - maxRootPgno--; - } - assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) ); - - rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno); - }else{ - freePage(pPage, &rc); - releasePage(pPage); - } -#endif - return rc; -} -int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ - int rc; - sqlite3BtreeEnter(p); - rc = btreeDropTable(p, iTable, piMoved); - sqlite3BtreeLeave(p); - return rc; -} - - -/* -** This function may only be called if the b-tree connection already -** has a read or write transaction open on the database. -** -** Read the meta-information out of a database file. Meta[0] -** is the number of free pages currently in the database. Meta[1] -** through meta[15] are available for use by higher layers. Meta[0] -** is read-only, the others are read/write. -** -** The schema layer numbers meta values differently. At the schema -** layer (and the SetCookie and ReadCookie opcodes) the number of -** free pages is not visible. So Cookie[0] is the same as Meta[1]. -** -** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead -** of reading the value out of the header, it instead loads the "DataVersion" -** from the pager. The BTREE_DATA_VERSION value is not actually stored in the -** database file. It is a number computed by the pager. But its access -** pattern is the same as header meta values, and so it is convenient to -** read it from this routine. -*/ -void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ - BtShared *pBt = p->pBt; - - sqlite3BtreeEnter(p); - assert( p->inTrans>TRANS_NONE ); - assert( SQLITE_OK==querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK) ); - assert( pBt->pPage1 ); - assert( idx>=0 && idx<=15 ); - - if( idx==BTREE_DATA_VERSION ){ - *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iBDataVersion; - }else{ - *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); - } - - /* If auto-vacuum is disabled in this build and this is an auto-vacuum - ** database, mark the database as read-only. */ -#ifdef SQLITE_OMIT_AUTOVACUUM - if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){ - pBt->btsFlags |= BTS_READ_ONLY; - } -#endif - - sqlite3BtreeLeave(p); -} - -/* -** Write meta-information back into the database. Meta[0] is -** read-only and may not be written. -*/ -int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ - BtShared *pBt = p->pBt; - unsigned char *pP1; - int rc; - assert( idx>=1 && idx<=15 ); - sqlite3BtreeEnter(p); - assert( p->inTrans==TRANS_WRITE ); - assert( pBt->pPage1!=0 ); - pP1 = pBt->pPage1->aData; - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); - if( rc==SQLITE_OK ){ - put4byte(&pP1[36 + idx*4], iMeta); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( idx==BTREE_INCR_VACUUM ){ - assert( pBt->autoVacuum || iMeta==0 ); - assert( iMeta==0 || iMeta==1 ); - pBt->incrVacuum = (u8)iMeta; - } -#endif - } - sqlite3BtreeLeave(p); - return rc; -} - -/* -** The first argument, pCur, is a cursor opened on some b-tree. Count the -** number of entries in the b-tree and write the result to *pnEntry. -** -** SQLITE_OK is returned if the operation is successfully executed. -** Otherwise, if an error is encountered (i.e. an IO error or database -** corruption) an SQLite error code is returned. -*/ -int sqlite3BtreeCount(sqlite3 *db, BtCursor *pCur, i64 *pnEntry){ - i64 nEntry = 0; /* Value to return in *pnEntry */ - int rc; /* Return code */ - - rc = moveToRoot(pCur); - if( rc==SQLITE_EMPTY ){ - *pnEntry = 0; - return SQLITE_OK; - } - - /* Unless an error occurs, the following loop runs one iteration for each - ** page in the B-Tree structure (not including overflow pages). - */ - while( rc==SQLITE_OK && !AtomicLoad(&db->u1.isInterrupted) ){ - int iIdx; /* Index of child node in parent */ - MemPage *pPage; /* Current page of the b-tree */ - - /* If this is a leaf page or the tree is not an int-key tree, then - ** this page contains countable entries. Increment the entry counter - ** accordingly. - */ - pPage = pCur->pPage; - if( pPage->leaf || !pPage->intKey ){ - nEntry += pPage->nCell; - } - - /* pPage is a leaf node. This loop navigates the cursor so that it - ** points to the first interior cell that it points to the parent of - ** the next page in the tree that has not yet been visited. The - ** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell - ** of the page, or to the number of cells in the page if the next page - ** to visit is the right-child of its parent. - ** - ** If all pages in the tree have been visited, return SQLITE_OK to the - ** caller. - */ - if( pPage->leaf ){ - do { - if( pCur->iPage==0 ){ - /* All pages of the b-tree have been visited. Return successfully. */ - *pnEntry = nEntry; - return moveToRoot(pCur); - } - moveToParent(pCur); - }while ( pCur->ix>=pCur->pPage->nCell ); - - pCur->ix++; - pPage = pCur->pPage; - } - - /* Descend to the child node of the cell that the cursor currently - ** points at. This is the right-child if (iIdx==pPage->nCell). - */ - iIdx = pCur->ix; - if( iIdx==pPage->nCell ){ - rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); - }else{ - rc = moveToChild(pCur, get4byte(findCell(pPage, iIdx))); - } - } - - /* An error has occurred. Return an error code. */ - return rc; -} - -/* -** Return the pager associated with a BTree. This routine is used for -** testing and debugging only. -*/ -Pager *sqlite3BtreePager(Btree *p){ - return p->pBt->pPager; -} - -#ifndef SQLITE_OMIT_INTEGRITY_CHECK -/* -** Append a message to the error message string. -*/ -static void checkAppendMsg( - IntegrityCk *pCheck, - const char *zFormat, - ... -){ - va_list ap; - if( !pCheck->mxErr ) return; - pCheck->mxErr--; - pCheck->nErr++; - va_start(ap, zFormat); - if( pCheck->errMsg.nChar ){ - sqlite3_str_append(&pCheck->errMsg, "\n", 1); - } - if( pCheck->zPfx ){ - sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx, pCheck->v1, pCheck->v2); - } - sqlite3_str_vappendf(&pCheck->errMsg, zFormat, ap); - va_end(ap); - if( pCheck->errMsg.accError==SQLITE_NOMEM ){ - pCheck->bOomFault = 1; - } -} -#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ - -#ifndef SQLITE_OMIT_INTEGRITY_CHECK - -/* -** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that -** corresponds to page iPg is already set. -*/ -static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){ - assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); - return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07))); -} - -/* -** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg. -*/ -static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){ - assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); - pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07)); -} - - -/* -** Add 1 to the reference count for page iPage. If this is the second -** reference to the page, add an error message to pCheck->zErrMsg. -** Return 1 if there are 2 or more references to the page and 0 if -** if this is the first reference to the page. -** -** Also check that the page number is in bounds. -*/ -static int checkRef(IntegrityCk *pCheck, Pgno iPage){ - if( iPage>pCheck->nPage || iPage==0 ){ - checkAppendMsg(pCheck, "invalid page number %d", iPage); - return 1; - } - if( getPageReferenced(pCheck, iPage) ){ - checkAppendMsg(pCheck, "2nd reference to page %d", iPage); - return 1; - } - if( AtomicLoad(&pCheck->db->u1.isInterrupted) ) return 1; - setPageReferenced(pCheck, iPage); - return 0; -} - -#ifndef SQLITE_OMIT_AUTOVACUUM -/* -** Check that the entry in the pointer-map for page iChild maps to -** page iParent, pointer type ptrType. If not, append an error message -** to pCheck. -*/ -static void checkPtrmap( - IntegrityCk *pCheck, /* Integrity check context */ - Pgno iChild, /* Child page number */ - u8 eType, /* Expected pointer map type */ - Pgno iParent /* Expected pointer map parent page number */ -){ - int rc; - u8 ePtrmapType; - Pgno iPtrmapParent; - - rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->bOomFault = 1; - checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild); - return; - } - - if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ - checkAppendMsg(pCheck, - "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", - iChild, eType, iParent, ePtrmapType, iPtrmapParent); - } -} -#endif - -/* -** Check the integrity of the freelist or of an overflow page list. -** Verify that the number of pages on the list is N. -*/ -static void checkList( - IntegrityCk *pCheck, /* Integrity checking context */ - int isFreeList, /* True for a freelist. False for overflow page list */ - Pgno iPage, /* Page number for first page in the list */ - u32 N /* Expected number of pages in the list */ -){ - int i; - u32 expected = N; - int nErrAtStart = pCheck->nErr; - while( iPage!=0 && pCheck->mxErr ){ - DbPage *pOvflPage; - unsigned char *pOvflData; - if( checkRef(pCheck, iPage) ) break; - N--; - if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage, 0) ){ - checkAppendMsg(pCheck, "failed to get page %d", iPage); - break; - } - pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); - if( isFreeList ){ - u32 n = (u32)get4byte(&pOvflData[4]); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pCheck->pBt->autoVacuum ){ - checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); - } -#endif - if( n>pCheck->pBt->usableSize/4-2 ){ - checkAppendMsg(pCheck, - "freelist leaf count too big on page %d", iPage); - N--; - }else{ - for(i=0; i<(int)n; i++){ - Pgno iFreePage = get4byte(&pOvflData[8+i*4]); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pCheck->pBt->autoVacuum ){ - checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0); - } -#endif - checkRef(pCheck, iFreePage); - } - N -= n; - } - } -#ifndef SQLITE_OMIT_AUTOVACUUM - else{ - /* If this database supports auto-vacuum and iPage is not the last - ** page in this overflow list, check that the pointer-map entry for - ** the following page matches iPage. - */ - if( pCheck->pBt->autoVacuum && N>0 ){ - i = get4byte(pOvflData); - checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); - } - } -#endif - iPage = get4byte(pOvflData); - sqlite3PagerUnref(pOvflPage); - } - if( N && nErrAtStart==pCheck->nErr ){ - checkAppendMsg(pCheck, - "%s is %d but should be %d", - isFreeList ? "size" : "overflow list length", - expected-N, expected); - } -} -#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ - -/* -** An implementation of a min-heap. -** -** aHeap[0] is the number of elements on the heap. aHeap[1] is the -** root element. The daughter nodes of aHeap[N] are aHeap[N*2] -** and aHeap[N*2+1]. -** -** The heap property is this: Every node is less than or equal to both -** of its daughter nodes. A consequence of the heap property is that the -** root node aHeap[1] is always the minimum value currently in the heap. -** -** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto -** the heap, preserving the heap property. The btreeHeapPull() routine -** removes the root element from the heap (the minimum value in the heap) -** and then moves other nodes around as necessary to preserve the heap -** property. -** -** This heap is used for cell overlap and coverage testing. Each u32 -** entry represents the span of a cell or freeblock on a btree page. -** The upper 16 bits are the index of the first byte of a range and the -** lower 16 bits are the index of the last byte of that range. -*/ -static void btreeHeapInsert(u32 *aHeap, u32 x){ - u32 j, i = ++aHeap[0]; - aHeap[i] = x; - while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){ - x = aHeap[j]; - aHeap[j] = aHeap[i]; - aHeap[i] = x; - i = j; - } -} -static int btreeHeapPull(u32 *aHeap, u32 *pOut){ - u32 j, i, x; - if( (x = aHeap[0])==0 ) return 0; - *pOut = aHeap[1]; - aHeap[1] = aHeap[x]; - aHeap[x] = 0xffffffff; - aHeap[0]--; - i = 1; - while( (j = i*2)<=aHeap[0] ){ - if( aHeap[j]>aHeap[j+1] ) j++; - if( aHeap[i]zPfx; - int saved_v1 = pCheck->v1; - int saved_v2 = pCheck->v2; - u8 savedIsInit = 0; - - /* Check that the page exists - */ - pBt = pCheck->pBt; - usableSize = pBt->usableSize; - if( iPage==0 ) return 0; - if( checkRef(pCheck, iPage) ) return 0; - pCheck->zPfx = "Page %u: "; - pCheck->v1 = iPage; - if( (rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0 ){ - checkAppendMsg(pCheck, - "unable to get the page. error code=%d", rc); - goto end_of_check; - } - - /* Clear MemPage.isInit to make sure the corruption detection code in - ** btreeInitPage() is executed. */ - savedIsInit = pPage->isInit; - pPage->isInit = 0; - if( (rc = btreeInitPage(pPage))!=0 ){ - assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ - checkAppendMsg(pCheck, - "btreeInitPage() returns error code %d", rc); - goto end_of_check; - } - if( (rc = btreeComputeFreeSpace(pPage))!=0 ){ - assert( rc==SQLITE_CORRUPT ); - checkAppendMsg(pCheck, "free space corruption", rc); - goto end_of_check; - } - data = pPage->aData; - hdr = pPage->hdrOffset; - - /* Set up for cell analysis */ - pCheck->zPfx = "On tree page %u cell %d: "; - contentOffset = get2byteNotZero(&data[hdr+5]); - assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ - - /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the - ** number of cells on the page. */ - nCell = get2byte(&data[hdr+3]); - assert( pPage->nCell==nCell ); - - /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page - ** immediately follows the b-tree page header. */ - cellStart = hdr + 12 - 4*pPage->leaf; - assert( pPage->aCellIdx==&data[cellStart] ); - pCellIdx = &data[cellStart + 2*(nCell-1)]; - - if( !pPage->leaf ){ - /* Analyze the right-child page of internal pages */ - pgno = get4byte(&data[hdr+8]); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ - pCheck->zPfx = "On page %u at right child: "; - checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); - } -#endif - depth = checkTreePage(pCheck, pgno, &maxKey, maxKey); - keyCanBeEqual = 0; - }else{ - /* For leaf pages, the coverage check will occur in the same loop - ** as the other cell checks, so initialize the heap. */ - heap = pCheck->heap; - heap[0] = 0; - } - - /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte - ** integer offsets to the cell contents. */ - for(i=nCell-1; i>=0 && pCheck->mxErr; i--){ - CellInfo info; - - /* Check cell size */ - pCheck->v2 = i; - assert( pCellIdx==&data[cellStart + i*2] ); - pc = get2byteAligned(pCellIdx); - pCellIdx -= 2; - if( pcusableSize-4 ){ - checkAppendMsg(pCheck, "Offset %d out of range %d..%d", - pc, contentOffset, usableSize-4); - doCoverageCheck = 0; - continue; - } - pCell = &data[pc]; - pPage->xParseCell(pPage, pCell, &info); - if( pc+info.nSize>usableSize ){ - checkAppendMsg(pCheck, "Extends off end of page"); - doCoverageCheck = 0; - continue; - } - - /* Check for integer primary key out of range */ - if( pPage->intKey ){ - if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){ - checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey); - } - maxKey = info.nKey; - keyCanBeEqual = 0; /* Only the first key on the page may ==maxKey */ - } - - /* Check the content overflow list */ - if( info.nPayload>info.nLocal ){ - u32 nPage; /* Number of pages on the overflow chain */ - Pgno pgnoOvfl; /* First page of the overflow chain */ - assert( pc + info.nSize - 4 <= usableSize ); - nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4); - pgnoOvfl = get4byte(&pCell[info.nSize - 4]); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); - } -#endif - checkList(pCheck, 0, pgnoOvfl, nPage); - } - - if( !pPage->leaf ){ - /* Check sanity of left child page for internal pages */ - pgno = get4byte(pCell); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); - } -#endif - d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey); - keyCanBeEqual = 0; - if( d2!=depth ){ - checkAppendMsg(pCheck, "Child page depth differs"); - depth = d2; - } - }else{ - /* Populate the coverage-checking heap for leaf pages */ - btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1)); - } - } - *piMinKey = maxKey; - - /* Check for complete coverage of the page - */ - pCheck->zPfx = 0; - if( doCoverageCheck && pCheck->mxErr>0 ){ - /* For leaf pages, the min-heap has already been initialized and the - ** cells have already been inserted. But for internal pages, that has - ** not yet been done, so do it now */ - if( !pPage->leaf ){ - heap = pCheck->heap; - heap[0] = 0; - for(i=nCell-1; i>=0; i--){ - u32 size; - pc = get2byteAligned(&data[cellStart+i*2]); - size = pPage->xCellSize(pPage, &data[pc]); - btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); - } - } - /* Add the freeblocks to the min-heap - ** - ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header - ** is the offset of the first freeblock, or zero if there are no - ** freeblocks on the page. - */ - i = get2byte(&data[hdr+1]); - while( i>0 ){ - int size, j; - assert( (u32)i<=usableSize-4 ); /* Enforced by btreeComputeFreeSpace() */ - size = get2byte(&data[i+2]); - assert( (u32)(i+size)<=usableSize ); /* due to btreeComputeFreeSpace() */ - btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1)); - /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a - ** big-endian integer which is the offset in the b-tree page of the next - ** freeblock in the chain, or zero if the freeblock is the last on the - ** chain. */ - j = get2byte(&data[i]); - /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of - ** increasing offset. */ - assert( j==0 || j>i+size ); /* Enforced by btreeComputeFreeSpace() */ - assert( (u32)j<=usableSize-4 ); /* Enforced by btreeComputeFreeSpace() */ - i = j; - } - /* Analyze the min-heap looking for overlap between cells and/or - ** freeblocks, and counting the number of untracked bytes in nFrag. - ** - ** Each min-heap entry is of the form: (start_address<<16)|end_address. - ** There is an implied first entry the covers the page header, the cell - ** pointer index, and the gap between the cell pointer index and the start - ** of cell content. - ** - ** The loop below pulls entries from the min-heap in order and compares - ** the start_address against the previous end_address. If there is an - ** overlap, that means bytes are used multiple times. If there is a gap, - ** that gap is added to the fragmentation count. - */ - nFrag = 0; - prev = contentOffset - 1; /* Implied first min-heap entry */ - while( btreeHeapPull(heap,&x) ){ - if( (prev&0xffff)>=(x>>16) ){ - checkAppendMsg(pCheck, - "Multiple uses for byte %u of page %u", x>>16, iPage); - break; - }else{ - nFrag += (x>>16) - (prev&0xffff) - 1; - prev = x; - } - } - nFrag += usableSize - (prev&0xffff) - 1; - /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments - ** is stored in the fifth field of the b-tree page header. - ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the - ** number of fragmented free bytes within the cell content area. - */ - if( heap[0]==0 && nFrag!=data[hdr+7] ){ - checkAppendMsg(pCheck, - "Fragmentation of %d bytes reported as %d on page %u", - nFrag, data[hdr+7], iPage); - } - } - -end_of_check: - if( !doCoverageCheck ) pPage->isInit = savedIsInit; - releasePage(pPage); - pCheck->zPfx = saved_zPfx; - pCheck->v1 = saved_v1; - pCheck->v2 = saved_v2; - return depth+1; -} -#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ - -#ifndef SQLITE_OMIT_INTEGRITY_CHECK -/* -** This routine does a complete check of the given BTree file. aRoot[] is -** an array of pages numbers were each page number is the root page of -** a table. nRoot is the number of entries in aRoot. -** -** A read-only or read-write transaction must be opened before calling -** this function. -** -** Write the number of error seen in *pnErr. Except for some memory -** allocation errors, an error message held in memory obtained from -** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is -** returned. If a memory allocation error occurs, NULL is returned. -** -** If the first entry in aRoot[] is 0, that indicates that the list of -** root pages is incomplete. This is a "partial integrity-check". This -** happens when performing an integrity check on a single table. The -** zero is skipped, of course. But in addition, the freelist checks -** and the checks to make sure every page is referenced are also skipped, -** since obviously it is not possible to know which pages are covered by -** the unverified btrees. Except, if aRoot[1] is 1, then the freelist -** checks are still performed. -*/ -char *sqlite3BtreeIntegrityCheck( - sqlite3 *db, /* Database connection that is running the check */ - Btree *p, /* The btree to be checked */ - Pgno *aRoot, /* An array of root pages numbers for individual trees */ - int nRoot, /* Number of entries in aRoot[] */ - int mxErr, /* Stop reporting errors after this many */ - int *pnErr /* Write number of errors seen to this variable */ -){ - Pgno i; - IntegrityCk sCheck; - BtShared *pBt = p->pBt; - u64 savedDbFlags = pBt->db->flags; - char zErr[100]; - int bPartial = 0; /* True if not checking all btrees */ - int bCkFreelist = 1; /* True to scan the freelist */ - VVA_ONLY( int nRef ); - assert( nRoot>0 ); - - /* aRoot[0]==0 means this is a partial check */ - if( aRoot[0]==0 ){ - assert( nRoot>1 ); - bPartial = 1; - if( aRoot[1]!=1 ) bCkFreelist = 0; - } - - sqlite3BtreeEnter(p); - assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); - VVA_ONLY( nRef = sqlite3PagerRefcount(pBt->pPager) ); - assert( nRef>=0 ); - sCheck.db = db; - sCheck.pBt = pBt; - sCheck.pPager = pBt->pPager; - sCheck.nPage = btreePagecount(sCheck.pBt); - sCheck.mxErr = mxErr; - sCheck.nErr = 0; - sCheck.bOomFault = 0; - sCheck.zPfx = 0; - sCheck.v1 = 0; - sCheck.v2 = 0; - sCheck.aPgRef = 0; - sCheck.heap = 0; - sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); - sCheck.errMsg.printfFlags = SQLITE_PRINTF_INTERNAL; - if( sCheck.nPage==0 ){ - goto integrity_ck_cleanup; - } - - sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); - if( !sCheck.aPgRef ){ - sCheck.bOomFault = 1; - goto integrity_ck_cleanup; - } - sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); - if( sCheck.heap==0 ){ - sCheck.bOomFault = 1; - goto integrity_ck_cleanup; - } - - i = PENDING_BYTE_PAGE(pBt); - if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); - - /* Check the integrity of the freelist - */ - if( bCkFreelist ){ - sCheck.zPfx = "Main freelist: "; - checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), - get4byte(&pBt->pPage1->aData[36])); - sCheck.zPfx = 0; - } - - /* Check all the tables. - */ -#ifndef SQLITE_OMIT_AUTOVACUUM - if( !bPartial ){ - if( pBt->autoVacuum ){ - Pgno mx = 0; - Pgno mxInHdr; - for(i=0; (int)ipPage1->aData[52]); - if( mx!=mxInHdr ){ - checkAppendMsg(&sCheck, - "max rootpage (%d) disagrees with header (%d)", - mx, mxInHdr - ); - } - }else if( get4byte(&pBt->pPage1->aData[64])!=0 ){ - checkAppendMsg(&sCheck, - "incremental_vacuum enabled with a max rootpage of zero" - ); - } - } -#endif - testcase( pBt->db->flags & SQLITE_CellSizeCk ); - pBt->db->flags &= ~(u64)SQLITE_CellSizeCk; - for(i=0; (int)iautoVacuum && aRoot[i]>1 && !bPartial ){ - checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); - } -#endif - checkTreePage(&sCheck, aRoot[i], ¬Used, LARGEST_INT64); - } - pBt->db->flags = savedDbFlags; - - /* Make sure every page in the file is referenced - */ - if( !bPartial ){ - for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ -#ifdef SQLITE_OMIT_AUTOVACUUM - if( getPageReferenced(&sCheck, i)==0 ){ - checkAppendMsg(&sCheck, "Page %d is never used", i); - } -#else - /* If the database supports auto-vacuum, make sure no tables contain - ** references to pointer-map pages. - */ - if( getPageReferenced(&sCheck, i)==0 && - (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ - checkAppendMsg(&sCheck, "Page %d is never used", i); - } - if( getPageReferenced(&sCheck, i)!=0 && - (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ - checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); - } -#endif - } - } - - /* Clean up and report errors. - */ -integrity_ck_cleanup: - sqlite3PageFree(sCheck.heap); - sqlite3_free(sCheck.aPgRef); - if( sCheck.bOomFault ){ - sqlite3_str_reset(&sCheck.errMsg); - sCheck.nErr++; - } - *pnErr = sCheck.nErr; - if( sCheck.nErr==0 ) sqlite3_str_reset(&sCheck.errMsg); - /* Make sure this analysis did not leave any unref() pages. */ - assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); - sqlite3BtreeLeave(p); - return sqlite3StrAccumFinish(&sCheck.errMsg); -} -#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ - -/* -** Return the full pathname of the underlying database file. Return -** an empty string if the database is in-memory or a TEMP database. -** -** The pager filename is invariant as long as the pager is -** open so it is safe to access without the BtShared mutex. -*/ -const char *sqlite3BtreeGetFilename(Btree *p){ - assert( p->pBt->pPager!=0 ); - return sqlite3PagerFilename(p->pBt->pPager, 1); -} - -/* -** Return the pathname of the journal file for this database. The return -** value of this routine is the same regardless of whether the journal file -** has been created or not. -** -** The pager journal filename is invariant as long as the pager is -** open so it is safe to access without the BtShared mutex. -*/ -const char *sqlite3BtreeGetJournalname(Btree *p){ - assert( p->pBt->pPager!=0 ); - return sqlite3PagerJournalname(p->pBt->pPager); -} - -/* -** Return one of SQLITE_TXN_NONE, SQLITE_TXN_READ, or SQLITE_TXN_WRITE -** to describe the current transaction state of Btree p. -*/ -int sqlite3BtreeTxnState(Btree *p){ - assert( p==0 || sqlite3_mutex_held(p->db->mutex) ); - return p ? p->inTrans : 0; -} - -#ifndef SQLITE_OMIT_WAL -/* -** Run a checkpoint on the Btree passed as the first argument. -** -** Return SQLITE_LOCKED if this or any other connection has an open -** transaction on the shared-cache the argument Btree is connected to. -** -** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. -*/ -int sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ - int rc = SQLITE_OK; - if( p ){ - BtShared *pBt = p->pBt; - sqlite3BtreeEnter(p); - if( pBt->inTransaction!=TRANS_NONE ){ - rc = SQLITE_LOCKED; - }else{ - rc = sqlite3PagerCheckpoint(pBt->pPager, p->db, eMode, pnLog, pnCkpt); - } - sqlite3BtreeLeave(p); - } - return rc; -} -#endif - -/* -** Return true if there is currently a backup running on Btree p. -*/ -int sqlite3BtreeIsInBackup(Btree *p){ - assert( p ); - assert( sqlite3_mutex_held(p->db->mutex) ); - return p->nBackup!=0; -} - -/* -** This function returns a pointer to a blob of memory associated with -** a single shared-btree. The memory is used by client code for its own -** purposes (for example, to store a high-level schema associated with -** the shared-btree). The btree layer manages reference counting issues. -** -** The first time this is called on a shared-btree, nBytes bytes of memory -** are allocated, zeroed, and returned to the caller. For each subsequent -** call the nBytes parameter is ignored and a pointer to the same blob -** of memory returned. -** -** If the nBytes parameter is 0 and the blob of memory has not yet been -** allocated, a null pointer is returned. If the blob has already been -** allocated, it is returned as normal. -** -** Just before the shared-btree is closed, the function passed as the -** xFree argument when the memory allocation was made is invoked on the -** blob of allocated memory. The xFree function should not call sqlite3_free() -** on the memory, the btree layer does that. -*/ -void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){ - BtShared *pBt = p->pBt; - sqlite3BtreeEnter(p); - if( !pBt->pSchema && nBytes ){ - pBt->pSchema = sqlite3DbMallocZero(0, nBytes); - pBt->xFreeSchema = xFree; - } - sqlite3BtreeLeave(p); - return pBt->pSchema; -} - -/* -** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared -** btree as the argument handle holds an exclusive lock on the -** sqlite_schema table. Otherwise SQLITE_OK. -*/ -int sqlite3BtreeSchemaLocked(Btree *p){ - int rc; - assert( sqlite3_mutex_held(p->db->mutex) ); - sqlite3BtreeEnter(p); - rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK); - assert( rc==SQLITE_OK || rc==SQLITE_LOCKED_SHAREDCACHE ); - sqlite3BtreeLeave(p); - return rc; -} - - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** Obtain a lock on the table whose root page is iTab. The -** lock is a write lock if isWritelock is true or a read lock -** if it is false. -*/ -int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ - int rc = SQLITE_OK; - assert( p->inTrans!=TRANS_NONE ); - if( p->sharable ){ - u8 lockType = READ_LOCK + isWriteLock; - assert( READ_LOCK+1==WRITE_LOCK ); - assert( isWriteLock==0 || isWriteLock==1 ); - - sqlite3BtreeEnter(p); - rc = querySharedCacheTableLock(p, iTab, lockType); - if( rc==SQLITE_OK ){ - rc = setSharedCacheTableLock(p, iTab, lockType); - } - sqlite3BtreeLeave(p); - } - return rc; -} -#endif - -#ifndef SQLITE_OMIT_INCRBLOB -/* -** Argument pCsr must be a cursor opened for writing on an -** INTKEY table currently pointing at a valid table entry. -** This function modifies the data stored as part of that entry. -** -** Only the data content may only be modified, it is not possible to -** change the length of the data stored. If this function is called with -** parameters that attempt to write past the end of the existing data, -** no modifications are made and SQLITE_CORRUPT is returned. -*/ -int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ - int rc; - assert( cursorOwnsBtShared(pCsr) ); - assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); - assert( pCsr->curFlags & BTCF_Incrblob ); - - rc = restoreCursorPosition(pCsr); - if( rc!=SQLITE_OK ){ - return rc; - } - assert( pCsr->eState!=CURSOR_REQUIRESEEK ); - if( pCsr->eState!=CURSOR_VALID ){ - return SQLITE_ABORT; - } - - /* Save the positions of all other cursors open on this table. This is - ** required in case any of them are holding references to an xFetch - ** version of the b-tree page modified by the accessPayload call below. - ** - ** Note that pCsr must be open on a INTKEY table and saveCursorPosition() - ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence - ** saveAllCursors can only return SQLITE_OK. - */ - VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); - assert( rc==SQLITE_OK ); - - /* Check some assumptions: - ** (a) the cursor is open for writing, - ** (b) there is a read/write transaction open, - ** (c) the connection holds a write-lock on the table (if required), - ** (d) there are no conflicting read-locks, and - ** (e) the cursor points at a valid row of an intKey table. - */ - if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){ - return SQLITE_READONLY; - } - assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0 - && pCsr->pBt->inTransaction==TRANS_WRITE ); - assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, 0, 2) ); - assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); - assert( pCsr->pPage->intKey ); - - return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); -} - -/* -** Mark this cursor as an incremental blob cursor. -*/ -void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ - pCur->curFlags |= BTCF_Incrblob; - pCur->pBtree->hasIncrblobCur = 1; -} -#endif - -/* -** Set both the "read version" (single byte at byte offset 18) and -** "write version" (single byte at byte offset 19) fields in the database -** header to iVersion. -*/ -int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ - BtShared *pBt = pBtree->pBt; - int rc; /* Return code */ - - assert( iVersion==1 || iVersion==2 ); - - /* If setting the version fields to 1, do not automatically open the - ** WAL connection, even if the version fields are currently set to 2. - */ - pBt->btsFlags &= ~BTS_NO_WAL; - if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL; - - rc = sqlite3BtreeBeginTrans(pBtree, 0, 0); - if( rc==SQLITE_OK ){ - u8 *aData = pBt->pPage1->aData; - if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){ - rc = sqlite3BtreeBeginTrans(pBtree, 2, 0); - if( rc==SQLITE_OK ){ - rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); - if( rc==SQLITE_OK ){ - aData[18] = (u8)iVersion; - aData[19] = (u8)iVersion; - } - } - } - } - - pBt->btsFlags &= ~BTS_NO_WAL; - return rc; -} - -/* -** Return true if the cursor has a hint specified. This routine is -** only used from within assert() statements -*/ -int sqlite3BtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){ - return (pCsr->hints & mask)!=0; -} - -/* -** Return true if the given Btree is read-only. -*/ -int sqlite3BtreeIsReadonly(Btree *p){ - return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; -} - -/* -** Return the size of the header added to each page by this module. -*/ -int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } - -#if !defined(SQLITE_OMIT_SHARED_CACHE) -/* -** Return true if the Btree passed as the only argument is sharable. -*/ -int sqlite3BtreeSharable(Btree *p){ - return p->sharable; -} - -/* -** Return the number of connections to the BtShared object accessed by -** the Btree handle passed as the only argument. For private caches -** this is always 1. For shared caches it may be 1 or greater. -*/ -int sqlite3BtreeConnectionCount(Btree *p){ - testcase( p->sharable ); - return p->pBt->nRef; -} -#endif diff --git a/source/libs/tdb/src/sqlite/pager.c b/source/libs/tdb/src/sqlite/pager.c deleted file mode 100644 index 573e9ef316..0000000000 --- a/source/libs/tdb/src/sqlite/pager.c +++ /dev/null @@ -1,6851 +0,0 @@ -/* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the implementation of the page cache subsystem or "pager". -** -** The pager is used to access a database disk file. It implements -** atomic commit and rollback through the use of a journal file that -** is separate from the database file. The pager also implements file -** locking to prevent two processes from writing the same database -** file simultaneously, or one process from reading the database while -** another is writing. -*/ -#include "sqliteInt.h" -// #include "wal.h" - -// /* -// ** Macros for troubleshooting. Normally turned off -// */ -// #if 0 -// int sqlite3PagerTrace=1; /* True to enable tracing */ -// #define sqlite3DebugPrintf printf -// #define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; } -// #else -// #define PAGERTRACE(X) -// #endif - -// /* -// ** The following two macros are used within the PAGERTRACE() macros above -// ** to print out file-descriptors. -// ** -// ** PAGERID() takes a pointer to a Pager struct as its argument. The -// ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file -// ** struct as its argument. -// */ -// #define PAGERID(p) (SQLITE_PTR_TO_INT(p->fd)) -// #define FILEHANDLEID(fd) (SQLITE_PTR_TO_INT(fd)) - -#define PAGER_OPEN 0 -#define PAGER_READER 1 -#define PAGER_WRITER_LOCKED 2 -#define PAGER_WRITER_CACHEMOD 3 -#define PAGER_WRITER_DBMOD 4 -#define PAGER_WRITER_FINISHED 5 -#define PAGER_ERROR 6 - -// #define UNKNOWN_LOCK (EXCLUSIVE_LOCK+1) - -// /* -// ** The maximum allowed sector size. 64KiB. If the xSectorsize() method -// ** returns a value larger than this, then MAX_SECTOR_SIZE is used instead. -// ** This could conceivably cause corruption following a power failure on -// ** such a system. This is currently an undocumented limit. -// */ -// #define MAX_SECTOR_SIZE 0x10000 - -// typedef struct PagerSavepoint PagerSavepoint; -// struct PagerSavepoint { -// i64 iOffset; /* Starting offset in main journal */ -// i64 iHdrOffset; /* See above */ -// Bitvec *pInSavepoint; /* Set of pages in this savepoint */ -// Pgno nOrig; /* Original number of pages in file */ -// Pgno iSubRec; /* Index of first record in sub-journal */ -// int bTruncateOnRelease; /* If stmt journal may be truncated on RELEASE */ -// #ifndef SQLITE_OMIT_WAL -// u32 aWalData[WAL_SAVEPOINT_NDATA]; /* WAL savepoint context */ -// #endif -// }; - -// /* -// ** Bits of the Pager.doNotSpill flag. See further description below. -// */ -// #define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ -// #define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ -// #define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ - -struct Pager { - u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ - u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */ - u8 useJournal; /* Use a rollback journal on this file */ - u8 noSync; /* Do not sync the journal if true */ - u8 fullSync; /* Do extra syncs of the journal for robustness */ - u8 extraSync; /* sync directory after journal delete */ - u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */ - u8 walSyncFlags; /* See description above */ - u8 tempFile; /* zFilename is a temporary or immutable file */ - u8 noLock; /* Do not lock (except in WAL mode) */ - u8 readOnly; /* True for a read-only database */ - u8 memDb; /* True to inhibit all file I/O */ - u8 memVfs; /* VFS-implemented memory database */ - - /************************************************************************** - ** The following block contains those class members that change during - ** routine operation. Class members not in this block are either fixed - ** when the pager is first created or else only change when there is a - ** significant mode change (such as changing the page_size, locking_mode, - ** or the journal_mode). From another view, these class members describe - ** the "state" of the pager, while other class members describe the - ** "configuration" of the pager. - */ - u8 eState; /* Pager state (OPEN, READER, WRITER_LOCKED..) */ - u8 eLock; /* Current lock held on database file */ - u8 changeCountDone; /* Set after incrementing the change-counter */ - u8 setSuper; /* Super-jrnl name is written into jrnl */ - u8 doNotSpill; /* Do not spill the cache when non-zero */ - u8 subjInMemory; /* True to use in-memory sub-journals */ - u8 bUseFetch; /* True to use xFetch() */ - u8 hasHeldSharedLock; /* True if a shared lock has ever been held */ - Pgno dbSize; /* Number of pages in the database */ - Pgno dbOrigSize; /* dbSize before the current transaction */ - Pgno dbFileSize; /* Number of pages in the database file */ - Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */ - int errCode; /* One of several kinds of errors */ - int nRec; /* Pages journalled since last j-header written */ - u32 cksumInit; /* Quasi-random value added to every checksum */ - u32 nSubRec; /* Number of records written to sub-journal */ - // Bitvec *pInJournal; /* One bit for each page in the database file */ - int fd; /* File descriptor for database */ - int jfd; /* File descriptor for main journal */ - int sjfd; /* File descriptor for sub-journal */ - // i64 journalOff; /* Current write offset in the journal file */ - // i64 journalHdr; /* Byte offset to previous journal header */ - // sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ - // PagerSavepoint *aSavepoint; /* Array of active savepoints */ - // int nSavepoint; /* Number of elements in aSavepoint[] */ - // u32 iDataVersion; /* Changes whenever database content changes */ - // char dbFileVers[16]; /* Changes whenever database file changes */ - - // int nMmapOut; /* Number of mmap pages currently outstanding */ - // sqlite3_int64 szMmap; /* Desired maximum mmap size */ - // PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */ - // /* - // ** End of the routinely-changing class members - // ***************************************************************************/ - - // u16 nExtra; /* Add this many bytes to each in-memory page */ - // i16 nReserve; /* Number of unused bytes at end of each page */ - u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ - // u32 sectorSize; /* Assumed sector size during rollback */ - // Pgno mxPgno; /* Maximum allowed size of the database */ - // i64 pageSize; /* Number of bytes in a page */ - // i64 journalSizeLimit; /* Size limit for persistent journal files */ - char *zFilename; /* Name of the database file */ - char *zJournal; /* Name of the journal file */ - // int (*xBusyHandler)(void*); /* Function to call when busy */ - // void *pBusyHandlerArg; /* Context argument for xBusyHandler */ - // int aStat[4]; /* Total cache hits, misses, writes, spills */ - // #ifdef SQLITE_TEST - // int nRead; /* Database pages read */ - // #endif - void (*xReiniter)(DbPage *); /* Call this routine when reloading pages */ - // int (*xGet)(Pager*,Pgno,DbPage**,int); /* Routine to fetch a patch */ - // char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ - PCache *pPCache; /* Pointer to page cache object */ - // Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ - char *zWal; /* File name for write-ahead log */ -}; - -// /* -// ** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains -// ** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS -// ** or CACHE_WRITE to sqlite3_db_status(). -// */ -// #define PAGER_STAT_HIT 0 -// #define PAGER_STAT_MISS 1 -// #define PAGER_STAT_WRITE 2 -// #define PAGER_STAT_SPILL 3 - -// /* -// ** The following global variables hold counters used for -// ** testing purposes only. These variables do not exist in -// ** a non-testing build. These variables are not thread-safe. -// */ -// #ifdef SQLITE_TEST -// int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ -// int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ -// int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ -// # define PAGER_INCR(v) v++ -// #else -// # define PAGER_INCR(v) -// #endif - -static const unsigned char aJournalMagic[] = { - 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, -}; - -// /* -// ** The size of the of each page record in the journal is given by -// ** the following macro. -// */ -// #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) - -// /* -// ** The journal header size for this pager. This is usually the same -// ** size as a single disk sector. See also setSectorSize(). -// */ -// #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) - -// /* -// ** The macro MEMDB is true if we are dealing with an in-memory database. -// ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set, -// ** the value of MEMDB will be a constant and the compiler will optimize -// ** out code that would never execute. -// */ -// #ifdef SQLITE_OMIT_MEMORYDB -// # define MEMDB 0 -// #else -// # define MEMDB pPager->memDb -// #endif - -// /* -// ** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch -// ** interfaces to access the database using memory-mapped I/O. -// */ -// #if SQLITE_MAX_MMAP_SIZE>0 -// # define USEFETCH(x) ((x)->bUseFetch) -// #else -// # define USEFETCH(x) 0 -// #endif - -// /* -// ** The argument to this macro is a file descriptor (type sqlite3_file*). -// ** Return 0 if it is not open, or non-zero (but not 1) if it is. -// ** -// ** This is so that expressions can be written as: -// ** -// ** if( isOpen(pPager->jfd) ){ ... -// ** -// ** instead of -// ** -// ** if( pPager->jfd->pMethods ){ ... -// */ -// #define isOpen(pFd) ((pFd)->pMethods!=0) - -// #ifdef SQLITE_DIRECT_OVERFLOW_READ -// /* -// ** Return true if page pgno can be read directly from the database file -// ** by the b-tree layer. This is the case if: -// ** -// ** * the database file is open, -// ** * there are no dirty pages in the cache, and -// ** * the desired page is not currently in the wal file. -// */ -// int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno){ -// if( pPager->fd->pMethods==0 ) return 0; -// if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; -// #ifndef SQLITE_OMIT_WAL -// if( pPager->pWal ){ -// u32 iRead = 0; -// int rc; -// rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iRead); -// return (rc==SQLITE_OK && iRead==0); -// } -// #endif -// return 1; -// } -// #endif - -// #ifndef SQLITE_OMIT_WAL -// # define pagerUseWal(x) ((x)->pWal!=0) -// #else -// # define pagerUseWal(x) 0 -// # define pagerRollbackWal(x) 0 -// # define pagerWalFrames(v,w,x,y) 0 -// # define pagerOpenWalIfPresent(z) SQLITE_OK -// # define pagerBeginReadTransaction(z) SQLITE_OK -// #endif - -// #ifndef NDEBUG -// /* -// ** Usage: -// ** -// ** assert( assert_pager_state(pPager) ); -// ** -// ** This function runs many asserts to try to find inconsistencies in -// ** the internal state of the Pager object. -// */ -// static int assert_pager_state(Pager *p){ -// Pager *pPager = p; - -// /* State must be valid. */ -// assert( p->eState==PAGER_OPEN -// || p->eState==PAGER_READER -// || p->eState==PAGER_WRITER_LOCKED -// || p->eState==PAGER_WRITER_CACHEMOD -// || p->eState==PAGER_WRITER_DBMOD -// || p->eState==PAGER_WRITER_FINISHED -// || p->eState==PAGER_ERROR -// ); - -// /* Regardless of the current state, a temp-file connection always behaves -// ** as if it has an exclusive lock on the database file. It never updates -// ** the change-counter field, so the changeCountDone flag is always set. -// */ -// assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK ); -// assert( p->tempFile==0 || pPager->changeCountDone ); - -// /* If the useJournal flag is clear, the journal-mode must be "OFF". -// ** And if the journal-mode is "OFF", the journal file must not be open. -// */ -// assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal ); -// assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) ); - -// /* Check that MEMDB implies noSync. And an in-memory journal. Since -// ** this means an in-memory pager performs no IO at all, it cannot encounter -// ** either SQLITE_IOERR or SQLITE_FULL during rollback or while finalizing -// ** a journal file. (although the in-memory journal implementation may -// ** return SQLITE_IOERR_NOMEM while the journal file is being written). It -// ** is therefore not possible for an in-memory pager to enter the ERROR -// ** state. -// */ -// if( MEMDB ){ -// assert( !isOpen(p->fd) ); -// assert( p->noSync ); -// assert( p->journalMode==PAGER_JOURNALMODE_OFF -// || p->journalMode==PAGER_JOURNALMODE_MEMORY -// ); -// assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN ); -// assert( pagerUseWal(p)==0 ); -// } - -// /* If changeCountDone is set, a RESERVED lock or greater must be held -// ** on the file. -// */ -// assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK ); -// assert( p->eLock!=PENDING_LOCK ); - -// switch( p->eState ){ -// case PAGER_OPEN: -// assert( !MEMDB ); -// assert( pPager->errCode==SQLITE_OK ); -// assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile ); -// break; - -// case PAGER_READER: -// assert( pPager->errCode==SQLITE_OK ); -// assert( p->eLock!=UNKNOWN_LOCK ); -// assert( p->eLock>=SHARED_LOCK ); -// break; - -// case PAGER_WRITER_LOCKED: -// assert( p->eLock!=UNKNOWN_LOCK ); -// assert( pPager->errCode==SQLITE_OK ); -// if( !pagerUseWal(pPager) ){ -// assert( p->eLock>=RESERVED_LOCK ); -// } -// assert( pPager->dbSize==pPager->dbOrigSize ); -// assert( pPager->dbOrigSize==pPager->dbFileSize ); -// assert( pPager->dbOrigSize==pPager->dbHintSize ); -// assert( pPager->setSuper==0 ); -// break; - -// case PAGER_WRITER_CACHEMOD: -// assert( p->eLock!=UNKNOWN_LOCK ); -// assert( pPager->errCode==SQLITE_OK ); -// if( !pagerUseWal(pPager) ){ -// /* It is possible that if journal_mode=wal here that neither the -// ** journal file nor the WAL file are open. This happens during -// ** a rollback transaction that switches from journal_mode=off -// ** to journal_mode=wal. -// */ -// assert( p->eLock>=RESERVED_LOCK ); -// assert( isOpen(p->jfd) -// || p->journalMode==PAGER_JOURNALMODE_OFF -// || p->journalMode==PAGER_JOURNALMODE_WAL -// ); -// } -// assert( pPager->dbOrigSize==pPager->dbFileSize ); -// assert( pPager->dbOrigSize==pPager->dbHintSize ); -// break; - -// case PAGER_WRITER_DBMOD: -// assert( p->eLock==EXCLUSIVE_LOCK ); -// assert( pPager->errCode==SQLITE_OK ); -// assert( !pagerUseWal(pPager) ); -// assert( p->eLock>=EXCLUSIVE_LOCK ); -// assert( isOpen(p->jfd) -// || p->journalMode==PAGER_JOURNALMODE_OFF -// || p->journalMode==PAGER_JOURNALMODE_WAL -// || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) -// ); -// assert( pPager->dbOrigSize<=pPager->dbHintSize ); -// break; - -// case PAGER_WRITER_FINISHED: -// assert( p->eLock==EXCLUSIVE_LOCK ); -// assert( pPager->errCode==SQLITE_OK ); -// assert( !pagerUseWal(pPager) ); -// assert( isOpen(p->jfd) -// || p->journalMode==PAGER_JOURNALMODE_OFF -// || p->journalMode==PAGER_JOURNALMODE_WAL -// || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) -// ); -// break; - -// case PAGER_ERROR: -// /* There must be at least one outstanding reference to the pager if -// ** in ERROR state. Otherwise the pager should have already dropped -// ** back to OPEN state. -// */ -// assert( pPager->errCode!=SQLITE_OK ); -// assert( sqlite3PcacheRefCount(pPager->pPCache)>0 || pPager->tempFile ); -// break; -// } - -// return 1; -// } -// #endif /* ifndef NDEBUG */ - -// #ifdef SQLITE_DEBUG -// /* -// ** Return a pointer to a human readable string in a static buffer -// ** containing the state of the Pager object passed as an argument. This -// ** is intended to be used within debuggers. For example, as an alternative -// ** to "print *pPager" in gdb: -// ** -// ** (gdb) printf "%s", print_pager_state(pPager) -// ** -// ** This routine has external linkage in order to suppress compiler warnings -// ** about an unused function. It is enclosed within SQLITE_DEBUG and so does -// ** not appear in normal builds. -// */ -// char *print_pager_state(Pager *p){ -// static char zRet[1024]; - -// sqlite3_snprintf(1024, zRet, -// "Filename: %s\n" -// "State: %s errCode=%d\n" -// "Lock: %s\n" -// "Locking mode: locking_mode=%s\n" -// "Journal mode: journal_mode=%s\n" -// "Backing store: tempFile=%d memDb=%d useJournal=%d\n" -// "Journal: journalOff=%lld journalHdr=%lld\n" -// "Size: dbsize=%d dbOrigSize=%d dbFileSize=%d\n" -// , p->zFilename -// , p->eState==PAGER_OPEN ? "OPEN" : -// p->eState==PAGER_READER ? "READER" : -// p->eState==PAGER_WRITER_LOCKED ? "WRITER_LOCKED" : -// p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" : -// p->eState==PAGER_WRITER_DBMOD ? "WRITER_DBMOD" : -// p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" : -// p->eState==PAGER_ERROR ? "ERROR" : "?error?" -// , (int)p->errCode -// , p->eLock==NO_LOCK ? "NO_LOCK" : -// p->eLock==RESERVED_LOCK ? "RESERVED" : -// p->eLock==EXCLUSIVE_LOCK ? "EXCLUSIVE" : -// p->eLock==SHARED_LOCK ? "SHARED" : -// p->eLock==UNKNOWN_LOCK ? "UNKNOWN" : "?error?" -// , p->exclusiveMode ? "exclusive" : "normal" -// , p->journalMode==PAGER_JOURNALMODE_MEMORY ? "memory" : -// p->journalMode==PAGER_JOURNALMODE_OFF ? "off" : -// p->journalMode==PAGER_JOURNALMODE_DELETE ? "delete" : -// p->journalMode==PAGER_JOURNALMODE_PERSIST ? "persist" : -// p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" : -// p->journalMode==PAGER_JOURNALMODE_WAL ? "wal" : "?error?" -// , (int)p->tempFile, (int)p->memDb, (int)p->useJournal -// , p->journalOff, p->journalHdr -// , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize -// ); - -// return zRet; -// } -// #endif - -// /* Forward references to the various page getters */ -// static int getPageNormal(Pager*,Pgno,DbPage**,int); -// static int getPageError(Pager*,Pgno,DbPage**,int); -// #if SQLITE_MAX_MMAP_SIZE>0 -// static int getPageMMap(Pager*,Pgno,DbPage**,int); -// #endif - -// /* -// ** Set the Pager.xGet method for the appropriate routine used to fetch -// ** content from the pager. -// */ -// static void setGetterMethod(Pager *pPager){ -// if( pPager->errCode ){ -// pPager->xGet = getPageError; -// #if SQLITE_MAX_MMAP_SIZE>0 -// }else if( USEFETCH(pPager) ){ -// pPager->xGet = getPageMMap; -// #endif /* SQLITE_MAX_MMAP_SIZE>0 */ -// }else{ -// pPager->xGet = getPageNormal; -// } -// } - -// /* -// ** Return true if it is necessary to write page *pPg into the sub-journal. -// ** A page needs to be written into the sub-journal if there exists one -// ** or more open savepoints for which: -// ** -// ** * The page-number is less than or equal to PagerSavepoint.nOrig, and -// ** * The bit corresponding to the page-number is not set in -// ** PagerSavepoint.pInSavepoint. -// */ -// static int subjRequiresPage(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// PagerSavepoint *p; -// Pgno pgno = pPg->pgno; -// int i; -// for(i=0; inSavepoint; i++){ -// p = &pPager->aSavepoint[i]; -// if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ -// for(i=i+1; inSavepoint; i++){ -// pPager->aSavepoint[i].bTruncateOnRelease = 0; -// } -// return 1; -// } -// } -// return 0; -// } - -// #ifdef SQLITE_DEBUG -// /* -// ** Return true if the page is already in the journal file. -// */ -// static int pageInJournal(Pager *pPager, PgHdr *pPg){ -// return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); -// } -// #endif - -// /* -// ** Read a 32-bit integer from the given file descriptor. Store the integer -// ** that is read in *pRes. Return SQLITE_OK if everything worked, or an -// ** error code is something goes wrong. -// ** -// ** All values are stored on disk as big-endian. -// */ -// static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ -// unsigned char ac[4]; -// int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); -// if( rc==SQLITE_OK ){ -// *pRes = sqlite3Get4byte(ac); -// } -// return rc; -// } - -// /* -// ** Write a 32-bit integer into a string buffer in big-endian byte order. -// */ -// #define put32bits(A,B) sqlite3Put4byte((u8*)A,B) - -// /* -// ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK -// ** on success or an error code is something goes wrong. -// */ -// static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ -// char ac[4]; -// put32bits(ac, val); -// return sqlite3OsWrite(fd, ac, 4, offset); -// } - -// /* -// ** Unlock the database file to level eLock, which must be either NO_LOCK -// ** or SHARED_LOCK. Regardless of whether or not the call to xUnlock() -// ** succeeds, set the Pager.eLock variable to match the (attempted) new lock. -// ** -// ** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is -// ** called, do not modify it. See the comment above the #define of -// ** UNKNOWN_LOCK for an explanation of this. -// */ -// static int pagerUnlockDb(Pager *pPager, int eLock){ -// int rc = SQLITE_OK; - -// assert( !pPager->exclusiveMode || pPager->eLock==eLock ); -// assert( eLock==NO_LOCK || eLock==SHARED_LOCK ); -// assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 ); -// if( isOpen(pPager->fd) ){ -// assert( pPager->eLock>=eLock ); -// rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock); -// if( pPager->eLock!=UNKNOWN_LOCK ){ -// pPager->eLock = (u8)eLock; -// } -// IOTRACE(("UNLOCK %p %d\n", pPager, eLock)) -// } -// pPager->changeCountDone = pPager->tempFile; /* ticket fb3b3024ea238d5c */ -// return rc; -// } - -// /* -// ** Lock the database file to level eLock, which must be either SHARED_LOCK, -// ** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the -// ** Pager.eLock variable to the new locking state. -// ** -// ** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is -// ** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. -// ** See the comment above the #define of UNKNOWN_LOCK for an explanation -// ** of this. -// */ -// static int pagerLockDb(Pager *pPager, int eLock){ -// int rc = SQLITE_OK; - -// assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK ); -// if( pPager->eLockeLock==UNKNOWN_LOCK ){ -// rc = pPager->noLock ? SQLITE_OK : sqlite3OsLock(pPager->fd, eLock); -// if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){ -// pPager->eLock = (u8)eLock; -// IOTRACE(("LOCK %p %d\n", pPager, eLock)) -// } -// } -// return rc; -// } - -// /* -// ** This function determines whether or not the atomic-write or -// ** atomic-batch-write optimizations can be used with this pager. The -// ** atomic-write optimization can be used if: -// ** -// ** (a) the value returned by OsDeviceCharacteristics() indicates that -// ** a database page may be written atomically, and -// ** (b) the value returned by OsSectorSize() is less than or equal -// ** to the page size. -// ** -// ** If it can be used, then the value returned is the size of the journal -// ** file when it contains rollback data for exactly one page. -// ** -// ** The atomic-batch-write optimization can be used if OsDeviceCharacteristics() -// ** returns a value with the SQLITE_IOCAP_BATCH_ATOMIC bit set. -1 is -// ** returned in this case. -// ** -// ** If neither optimization can be used, 0 is returned. -// */ -// static int jrnlBufferSize(Pager *pPager){ -// assert( !MEMDB ); - -// #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ -// || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) -// int dc; /* Device characteristics */ - -// assert( isOpen(pPager->fd) ); -// dc = sqlite3OsDeviceCharacteristics(pPager->fd); -// #else -// UNUSED_PARAMETER(pPager); -// #endif - -// #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE -// if( pPager->dbSize>0 && (dc&SQLITE_IOCAP_BATCH_ATOMIC) ){ -// return -1; -// } -// #endif - -// #ifdef SQLITE_ENABLE_ATOMIC_WRITE -// { -// int nSector = pPager->sectorSize; -// int szPage = pPager->pageSize; - -// assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); -// assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); -// if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){ -// return 0; -// } -// } - -// return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); -// #endif - -// return 0; -// } - -// /* -// ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking -// ** on the cache using a hash function. This is used for testing -// ** and debugging only. -// */ -// #ifdef SQLITE_CHECK_PAGES -// /* -// ** Return a 32-bit hash of the page data for pPage. -// */ -// static u32 pager_datahash(int nByte, unsigned char *pData){ -// u32 hash = 0; -// int i; -// for(i=0; ipPager->pageSize, (unsigned char *)pPage->pData); -// } -// static void pager_set_pagehash(PgHdr *pPage){ -// pPage->pageHash = pager_pagehash(pPage); -// } - -// /* -// ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES -// ** is defined, and NDEBUG is not defined, an assert() statement checks -// ** that the page is either dirty or still matches the calculated page-hash. -// */ -// #define CHECK_PAGE(x) checkPage(x) -// static void checkPage(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// assert( pPager->eState!=PAGER_ERROR ); -// assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) ); -// } - -// #else -// #define pager_datahash(X,Y) 0 -// #define pager_pagehash(X) 0 -// #define pager_set_pagehash(X) -// #define CHECK_PAGE(x) -// #endif /* SQLITE_CHECK_PAGES */ - -// /* -// ** When this is called the journal file for pager pPager must be open. -// ** This function attempts to read a super-journal file name from the -// ** end of the file and, if successful, copies it into memory supplied -// ** by the caller. See comments above writeSuperJournal() for the format -// ** used to store a super-journal file name at the end of a journal file. -// ** -// ** zSuper must point to a buffer of at least nSuper bytes allocated by -// ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is -// ** enough space to write the super-journal name). If the super-journal -// ** name in the journal is longer than nSuper bytes (including a -// ** nul-terminator), then this is handled as if no super-journal name -// ** were present in the journal. -// ** -// ** If a super-journal file name is present at the end of the journal -// ** file, then it is copied into the buffer pointed to by zSuper. A -// ** nul-terminator byte is appended to the buffer following the -// ** super-journal file name. -// ** -// ** If it is determined that no super-journal file name is present -// ** zSuper[0] is set to 0 and SQLITE_OK returned. -// ** -// ** If an error occurs while reading from the journal file, an SQLite -// ** error code is returned. -// */ -// static int readSuperJournal(sqlite3_file *pJrnl, char *zSuper, u32 nSuper){ -// int rc; /* Return code */ -// u32 len; /* Length in bytes of super-journal name */ -// i64 szJ; /* Total size in bytes of journal file pJrnl */ -// u32 cksum; /* MJ checksum value read from journal */ -// u32 u; /* Unsigned loop counter */ -// unsigned char aMagic[8]; /* A buffer to hold the magic header */ -// zSuper[0] = '\0'; - -// if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ)) -// || szJ<16 -// || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len)) -// || len>=nSuper -// || len>szJ-16 -// || len==0 -// || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum)) -// || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8)) -// || memcmp(aMagic, aJournalMagic, 8) -// || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zSuper, len, szJ-16-len)) -// ){ -// return rc; -// } - -// /* See if the checksum matches the super-journal name */ -// for(u=0; ujournalOff, assuming a sector -// ** size of pPager->sectorSize bytes. -// ** -// ** i.e for a sector size of 512: -// ** -// ** Pager.journalOff Return value -// ** --------------------------------------- -// ** 0 0 -// ** 512 512 -// ** 100 512 -// ** 2000 2048 -// ** -// */ -// static i64 journalHdrOffset(Pager *pPager){ -// i64 offset = 0; -// i64 c = pPager->journalOff; -// if( c ){ -// offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); -// } -// assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); -// assert( offset>=c ); -// assert( (offset-c)jfd) ); -// assert( !sqlite3JournalIsInMemory(pPager->jfd) ); -// if( pPager->journalOff ){ -// const i64 iLimit = pPager->journalSizeLimit; /* Local cache of jsl */ - -// IOTRACE(("JZEROHDR %p\n", pPager)) -// if( doTruncate || iLimit==0 ){ -// rc = sqlite3OsTruncate(pPager->jfd, 0); -// }else{ -// static const char zeroHdr[28] = {0}; -// rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0); -// } -// if( rc==SQLITE_OK && !pPager->noSync ){ -// rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags); -// } - -// /* At this point the transaction is committed but the write lock -// ** is still held on the file. If there is a size limit configured for -// ** the persistent journal and the journal file currently consumes more -// ** space than that limit allows for, truncate it now. There is no need -// ** to sync the file following this operation. -// */ -// if( rc==SQLITE_OK && iLimit>0 ){ -// i64 sz; -// rc = sqlite3OsFileSize(pPager->jfd, &sz); -// if( rc==SQLITE_OK && sz>iLimit ){ -// rc = sqlite3OsTruncate(pPager->jfd, iLimit); -// } -// } -// } -// return rc; -// } - -// /* -// ** The journal file must be open when this routine is called. A journal -// ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the -// ** current location. -// ** -// ** The format for the journal header is as follows: -// ** - 8 bytes: Magic identifying journal format. -// ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. -// ** - 4 bytes: Random number used for page hash. -// ** - 4 bytes: Initial database page count. -// ** - 4 bytes: Sector size used by the process that wrote this journal. -// ** - 4 bytes: Database page size. -// ** -// ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space. -// */ -// static int writeJournalHdr(Pager *pPager){ -// int rc = SQLITE_OK; /* Return code */ -// char *zHeader = pPager->pTmpSpace; /* Temporary space used to build header */ -// u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */ -// u32 nWrite; /* Bytes of header sector written */ -// int ii; /* Loop counter */ - -// assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ - -// if( nHeader>JOURNAL_HDR_SZ(pPager) ){ -// nHeader = JOURNAL_HDR_SZ(pPager); -// } - -// /* If there are active savepoints and any of them were created -// ** since the most recent journal header was written, update the -// ** PagerSavepoint.iHdrOffset fields now. -// */ -// for(ii=0; iinSavepoint; ii++){ -// if( pPager->aSavepoint[ii].iHdrOffset==0 ){ -// pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff; -// } -// } - -// pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager); - -// /* -// ** Write the nRec Field - the number of page records that follow this -// ** journal header. Normally, zero is written to this value at this time. -// ** After the records are added to the journal (and the journal synced, -// ** if in full-sync mode), the zero is overwritten with the true number -// ** of records (see syncJournal()). -// ** -// ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When -// ** reading the journal this value tells SQLite to assume that the -// ** rest of the journal file contains valid page records. This assumption -// ** is dangerous, as if a failure occurred whilst writing to the journal -// ** file it may contain some garbage data. There are two scenarios -// ** where this risk can be ignored: -// ** -// ** * When the pager is in no-sync mode. Corruption can follow a -// ** power failure in this case anyway. -// ** -// ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees -// ** that garbage data is never appended to the journal file. -// */ -// assert( isOpen(pPager->fd) || pPager->noSync ); -// if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY) -// || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) -// ){ -// memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); -// put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); -// }else{ -// memset(zHeader, 0, sizeof(aJournalMagic)+4); -// } - -// /* The random check-hash initializer */ -// sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); -// put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); -// /* The initial database size */ -// put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize); -// /* The assumed sector size for this process */ -// put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); - -// /* The page size */ -// put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize); - -// /* Initializing the tail of the buffer is not necessary. Everything -// ** works find if the following memset() is omitted. But initializing -// ** the memory prevents valgrind from complaining, so we are willing to -// ** take the performance hit. -// */ -// memset(&zHeader[sizeof(aJournalMagic)+20], 0, -// nHeader-(sizeof(aJournalMagic)+20)); - -// /* In theory, it is only necessary to write the 28 bytes that the -// ** journal header consumes to the journal file here. Then increment the -// ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next -// ** record is written to the following sector (leaving a gap in the file -// ** that will be implicitly filled in by the OS). -// ** -// ** However it has been discovered that on some systems this pattern can -// ** be significantly slower than contiguously writing data to the file, -// ** even if that means explicitly writing data to the block of -// ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what -// ** is done. -// ** -// ** The loop is required here in case the sector-size is larger than the -// ** database page size. Since the zHeader buffer is only Pager.pageSize -// ** bytes in size, more than one call to sqlite3OsWrite() may be required -// ** to populate the entire journal header sector. -// */ -// for(nWrite=0; rc==SQLITE_OK&&nWritejournalHdr, nHeader)) -// rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff); -// assert( pPager->journalHdr <= pPager->journalOff ); -// pPager->journalOff += nHeader; -// } - -// return rc; -// } - -// /* -// ** The journal file must be open when this is called. A journal header file -// ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal -// ** file. The current location in the journal file is given by -// ** pPager->journalOff. See comments above function writeJournalHdr() for -// ** a description of the journal header format. -// ** -// ** If the header is read successfully, *pNRec is set to the number of -// ** page records following this header and *pDbSize is set to the size of the -// ** database before the transaction began, in pages. Also, pPager->cksumInit -// ** is set to the value read from the journal header. SQLITE_OK is returned -// ** in this case. -// ** -// ** If the journal header file appears to be corrupted, SQLITE_DONE is -// ** returned and *pNRec and *PDbSize are undefined. If JOURNAL_HDR_SZ bytes -// ** cannot be read from the journal file an error code is returned. -// */ -// static int readJournalHdr( -// Pager *pPager, /* Pager object */ -// int isHot, -// i64 journalSize, /* Size of the open journal file in bytes */ -// u32 *pNRec, /* OUT: Value read from the nRec field */ -// u32 *pDbSize /* OUT: Value of original database size field */ -// ){ -// int rc; /* Return code */ -// unsigned char aMagic[8]; /* A buffer to hold the magic header */ -// i64 iHdrOff; /* Offset of journal header being read */ - -// assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ - -// /* Advance Pager.journalOff to the start of the next sector. If the -// ** journal file is too small for there to be a header stored at this -// ** point, return SQLITE_DONE. -// */ -// pPager->journalOff = journalHdrOffset(pPager); -// if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ -// return SQLITE_DONE; -// } -// iHdrOff = pPager->journalOff; - -// /* Read in the first 8 bytes of the journal header. If they do not match -// ** the magic string found at the start of each journal header, return -// ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise, -// ** proceed. -// */ -// if( isHot || iHdrOff!=pPager->journalHdr ){ -// rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff); -// if( rc ){ -// return rc; -// } -// if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ -// return SQLITE_DONE; -// } -// } - -// /* Read the first three 32-bit fields of the journal header: The nRec -// ** field, the checksum-initializer and the database size at the start -// ** of the transaction. Return an error code if anything goes wrong. -// */ -// if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec)) -// || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit)) -// || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize)) -// ){ -// return rc; -// } - -// if( pPager->journalOff==0 ){ -// u32 iPageSize; /* Page-size field of journal header */ -// u32 iSectorSize; /* Sector-size field of journal header */ - -// /* Read the page-size and sector-size journal header fields. */ -// if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize)) -// || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize)) -// ){ -// return rc; -// } - -// /* Versions of SQLite prior to 3.5.8 set the page-size field of the -// ** journal header to zero. In this case, assume that the Pager.pageSize -// ** variable is already set to the correct page size. -// */ -// if( iPageSize==0 ){ -// iPageSize = pPager->pageSize; -// } - -// /* Check that the values read from the page-size and sector-size fields -// ** are within range. To be 'in range', both values need to be a power -// ** of two greater than or equal to 512 or 32, and not greater than their -// ** respective compile time maximum limits. -// */ -// if( iPageSize<512 || iSectorSize<32 -// || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE -// || ((iPageSize-1)&iPageSize)!=0 || ((iSectorSize-1)&iSectorSize)!=0 -// ){ -// /* If the either the page-size or sector-size in the journal-header is -// ** invalid, then the process that wrote the journal-header must have -// ** crashed before the header was synced. In this case stop reading -// ** the journal file here. -// */ -// return SQLITE_DONE; -// } - -// /* Update the page-size to match the value read from the journal. -// ** Use a testcase() macro to make sure that malloc failure within -// ** PagerSetPagesize() is tested. -// */ -// rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1); -// testcase( rc!=SQLITE_OK ); - -// /* Update the assumed sector-size to match the value used by -// ** the process that created this journal. If this journal was -// ** created by a process other than this one, then this routine -// ** is being called from within pager_playback(). The local value -// ** of Pager.sectorSize is restored at the end of that routine. -// */ -// pPager->sectorSize = iSectorSize; -// } - -// pPager->journalOff += JOURNAL_HDR_SZ(pPager); -// return rc; -// } - -// /* -// ** Write the supplied super-journal name into the journal file for pager -// ** pPager at the current location. The super-journal name must be the last -// ** thing written to a journal file. If the pager is in full-sync mode, the -// ** journal file descriptor is advanced to the next sector boundary before -// ** anything is written. The format is: -// ** -// ** + 4 bytes: PAGER_MJ_PGNO. -// ** + N bytes: super-journal filename in utf-8. -// ** + 4 bytes: N (length of super-journal name in bytes, no nul-terminator). -// ** + 4 bytes: super-journal name checksum. -// ** + 8 bytes: aJournalMagic[]. -// ** -// ** The super-journal page checksum is the sum of the bytes in thesuper-journal -// ** name, where each byte is interpreted as a signed 8-bit integer. -// ** -// ** If zSuper is a NULL pointer (occurs for a single database transaction), -// ** this call is a no-op. -// */ -// static int writeSuperJournal(Pager *pPager, const char *zSuper){ -// int rc; /* Return code */ -// int nSuper; /* Length of string zSuper */ -// i64 iHdrOff; /* Offset of header in journal file */ -// i64 jrnlSize; /* Size of journal file on disk */ -// u32 cksum = 0; /* Checksum of string zSuper */ - -// assert( pPager->setSuper==0 ); -// assert( !pagerUseWal(pPager) ); - -// if( !zSuper -// || pPager->journalMode==PAGER_JOURNALMODE_MEMORY -// || !isOpen(pPager->jfd) -// ){ -// return SQLITE_OK; -// } -// pPager->setSuper = 1; -// assert( pPager->journalHdr <= pPager->journalOff ); - -// /* Calculate the length in bytes and the checksum of zSuper */ -// for(nSuper=0; zSuper[nSuper]; nSuper++){ -// cksum += zSuper[nSuper]; -// } - -// /* If in full-sync mode, advance to the next disk sector before writing -// ** the super-journal name. This is in case the previous page written to -// ** the journal has already been synced. -// */ -// if( pPager->fullSync ){ -// pPager->journalOff = journalHdrOffset(pPager); -// } -// iHdrOff = pPager->journalOff; - -// /* Write the super-journal data to the end of the journal file. If -// ** an error occurs, return the error code to the caller. -// */ -// if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager)))) -// || (0 != (rc = sqlite3OsWrite(pPager->jfd, zSuper, nSuper, iHdrOff+4))) -// || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nSuper, nSuper))) -// || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nSuper+4, cksum))) -// || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, -// iHdrOff+4+nSuper+8))) -// ){ -// return rc; -// } -// pPager->journalOff += (nSuper+20); - -// /* If the pager is in peristent-journal mode, then the physical -// ** journal-file may extend past the end of the super-journal name -// ** and 8 bytes of magic data just written to the file. This is -// ** dangerous because the code to rollback a hot-journal file -// ** will not be able to find the super-journal name to determine -// ** whether or not the journal is hot. -// ** -// ** Easiest thing to do in this scenario is to truncate the journal -// ** file to the required size. -// */ -// if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize)) -// && jrnlSize>pPager->journalOff -// ){ -// rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff); -// } -// return rc; -// } - -// /* -// ** Discard the entire contents of the in-memory page-cache. -// */ -// static void pager_reset(Pager *pPager){ -// pPager->iDataVersion++; -// sqlite3BackupRestart(pPager->pBackup); -// sqlite3PcacheClear(pPager->pPCache); -// } - -// /* -// ** Return the pPager->iDataVersion value -// */ -// u32 sqlite3PagerDataVersion(Pager *pPager){ -// return pPager->iDataVersion; -// } - -// /* -// ** Free all structures in the Pager.aSavepoint[] array and set both -// ** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal -// ** if it is open and the pager is not in exclusive mode. -// */ -// static void releaseAllSavepoints(Pager *pPager){ -// int ii; /* Iterator for looping through Pager.aSavepoint */ -// for(ii=0; iinSavepoint; ii++){ -// sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); -// } -// if( !pPager->exclusiveMode || sqlite3JournalIsInMemory(pPager->sjfd) ){ -// sqlite3OsClose(pPager->sjfd); -// } -// sqlite3_free(pPager->aSavepoint); -// pPager->aSavepoint = 0; -// pPager->nSavepoint = 0; -// pPager->nSubRec = 0; -// } - -// /* -// ** Set the bit number pgno in the PagerSavepoint.pInSavepoint -// ** bitvecs of all open savepoints. Return SQLITE_OK if successful -// ** or SQLITE_NOMEM if a malloc failure occurs. -// */ -// static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ -// int ii; /* Loop counter */ -// int rc = SQLITE_OK; /* Result code */ - -// for(ii=0; iinSavepoint; ii++){ -// PagerSavepoint *p = &pPager->aSavepoint[ii]; -// if( pgno<=p->nOrig ){ -// rc |= sqlite3BitvecSet(p->pInSavepoint, pgno); -// testcase( rc==SQLITE_NOMEM ); -// assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); -// } -// } -// return rc; -// } - -// /* -// ** This function is a no-op if the pager is in exclusive mode and not -// ** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN -// ** state. -// ** -// ** If the pager is not in exclusive-access mode, the database file is -// ** completely unlocked. If the file is unlocked and the file-system does -// ** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is -// ** closed (if it is open). -// ** -// ** If the pager is in ERROR state when this function is called, the -// ** contents of the pager cache are discarded before switching back to -// ** the OPEN state. Regardless of whether the pager is in exclusive-mode -// ** or not, any journal file left in the file-system will be treated -// ** as a hot-journal and rolled back the next time a read-transaction -// ** is opened (by this or by any other connection). -// */ -// static void pager_unlock(Pager *pPager){ - -// assert( pPager->eState==PAGER_READER -// || pPager->eState==PAGER_OPEN -// || pPager->eState==PAGER_ERROR -// ); - -// sqlite3BitvecDestroy(pPager->pInJournal); -// pPager->pInJournal = 0; -// releaseAllSavepoints(pPager); - -// if( pagerUseWal(pPager) ){ -// assert( !isOpen(pPager->jfd) ); -// sqlite3WalEndReadTransaction(pPager->pWal); -// pPager->eState = PAGER_OPEN; -// }else if( !pPager->exclusiveMode ){ -// int rc; /* Error code returned by pagerUnlockDb() */ -// int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0; - -// /* If the operating system support deletion of open files, then -// ** close the journal file when dropping the database lock. Otherwise -// ** another connection with journal_mode=delete might delete the file -// ** out from under us. -// */ -// assert( (PAGER_JOURNALMODE_MEMORY & 5)!=1 ); -// assert( (PAGER_JOURNALMODE_OFF & 5)!=1 ); -// assert( (PAGER_JOURNALMODE_WAL & 5)!=1 ); -// assert( (PAGER_JOURNALMODE_DELETE & 5)!=1 ); -// assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); -// assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); -// if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN) -// || 1!=(pPager->journalMode & 5) -// ){ -// sqlite3OsClose(pPager->jfd); -// } - -// /* If the pager is in the ERROR state and the call to unlock the database -// ** file fails, set the current lock to UNKNOWN_LOCK. See the comment -// ** above the #define for UNKNOWN_LOCK for an explanation of why this -// ** is necessary. -// */ -// rc = pagerUnlockDb(pPager, NO_LOCK); -// if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){ -// pPager->eLock = UNKNOWN_LOCK; -// } - -// /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here -// ** without clearing the error code. This is intentional - the error -// ** code is cleared and the cache reset in the block below. -// */ -// assert( pPager->errCode || pPager->eState!=PAGER_ERROR ); -// pPager->eState = PAGER_OPEN; -// } - -// /* If Pager.errCode is set, the contents of the pager cache cannot be -// ** trusted. Now that there are no outstanding references to the pager, -// ** it can safely move back to PAGER_OPEN state. This happens in both -// ** normal and exclusive-locking mode. -// */ -// assert( pPager->errCode==SQLITE_OK || !MEMDB ); -// if( pPager->errCode ){ -// if( pPager->tempFile==0 ){ -// pager_reset(pPager); -// pPager->changeCountDone = 0; -// pPager->eState = PAGER_OPEN; -// }else{ -// pPager->eState = (isOpen(pPager->jfd) ? PAGER_OPEN : PAGER_READER); -// } -// if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); -// pPager->errCode = SQLITE_OK; -// setGetterMethod(pPager); -// } - -// pPager->journalOff = 0; -// pPager->journalHdr = 0; -// pPager->setSuper = 0; -// } - -// /* -// ** This function is called whenever an IOERR or FULL error that requires -// ** the pager to transition into the ERROR state may ahve occurred. -// ** The first argument is a pointer to the pager structure, the second -// ** the error-code about to be returned by a pager API function. The -// ** value returned is a copy of the second argument to this function. -// ** -// ** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the -// ** IOERR sub-codes, the pager enters the ERROR state and the error code -// ** is stored in Pager.errCode. While the pager remains in the ERROR state, -// ** all major API calls on the Pager will immediately return Pager.errCode. -// ** -// ** The ERROR state indicates that the contents of the pager-cache -// ** cannot be trusted. This state can be cleared by completely discarding -// ** the contents of the pager-cache. If a transaction was active when -// ** the persistent error occurred, then the rollback journal may need -// ** to be replayed to restore the contents of the database file (as if -// ** it were a hot-journal). -// */ -// static int pager_error(Pager *pPager, int rc){ -// int rc2 = rc & 0xff; -// assert( rc==SQLITE_OK || !MEMDB ); -// assert( -// pPager->errCode==SQLITE_FULL || -// pPager->errCode==SQLITE_OK || -// (pPager->errCode & 0xff)==SQLITE_IOERR -// ); -// if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){ -// pPager->errCode = rc; -// pPager->eState = PAGER_ERROR; -// setGetterMethod(pPager); -// } -// return rc; -// } - -// static int pager_truncate(Pager *pPager, Pgno nPage); - -// /* -// ** The write transaction open on pPager is being committed (bCommit==1) -// ** or rolled back (bCommit==0). -// ** -// ** Return TRUE if and only if all dirty pages should be flushed to disk. -// ** -// ** Rules: -// ** -// ** * For non-TEMP databases, always sync to disk. This is necessary -// ** for transactions to be durable. -// ** -// ** * Sync TEMP database only on a COMMIT (not a ROLLBACK) when the backing -// ** file has been created already (via a spill on pagerStress()) and -// ** when the number of dirty pages in memory exceeds 25% of the total -// ** cache size. -// */ -// static int pagerFlushOnCommit(Pager *pPager, int bCommit){ -// if( pPager->tempFile==0 ) return 1; -// if( !bCommit ) return 0; -// if( !isOpen(pPager->fd) ) return 0; -// return (sqlite3PCachePercentDirty(pPager->pPCache)>=25); -// } - -// /* -// ** This routine ends a transaction. A transaction is usually ended by -// ** either a COMMIT or a ROLLBACK operation. This routine may be called -// ** after rollback of a hot-journal, or if an error occurs while opening -// ** the journal file or writing the very first journal-header of a -// ** database transaction. -// ** -// ** This routine is never called in PAGER_ERROR state. If it is called -// ** in PAGER_NONE or PAGER_SHARED state and the lock held is less -// ** exclusive than a RESERVED lock, it is a no-op. -// ** -// ** Otherwise, any active savepoints are released. -// ** -// ** If the journal file is open, then it is "finalized". Once a journal -// ** file has been finalized it is not possible to use it to roll back a -// ** transaction. Nor will it be considered to be a hot-journal by this -// ** or any other database connection. Exactly how a journal is finalized -// ** depends on whether or not the pager is running in exclusive mode and -// ** the current journal-mode (Pager.journalMode value), as follows: -// ** -// ** journalMode==MEMORY -// ** Journal file descriptor is simply closed. This destroys an -// ** in-memory journal. -// ** -// ** journalMode==TRUNCATE -// ** Journal file is truncated to zero bytes in size. -// ** -// ** journalMode==PERSIST -// ** The first 28 bytes of the journal file are zeroed. This invalidates -// ** the first journal header in the file, and hence the entire journal -// ** file. An invalid journal file cannot be rolled back. -// ** -// ** journalMode==DELETE -// ** The journal file is closed and deleted using sqlite3OsDelete(). -// ** -// ** If the pager is running in exclusive mode, this method of finalizing -// ** the journal file is never used. Instead, if the journalMode is -// ** DELETE and the pager is in exclusive mode, the method described under -// ** journalMode==PERSIST is used instead. -// ** -// ** After the journal is finalized, the pager moves to PAGER_READER state. -// ** If running in non-exclusive rollback mode, the lock on the file is -// ** downgraded to a SHARED_LOCK. -// ** -// ** SQLITE_OK is returned if no error occurs. If an error occurs during -// ** any of the IO operations to finalize the journal file or unlock the -// ** database then the IO error code is returned to the user. If the -// ** operation to finalize the journal file fails, then the code still -// ** tries to unlock the database file if not in exclusive mode. If the -// ** unlock operation fails as well, then the first error code related -// ** to the first error encountered (the journal finalization one) is -// ** returned. -// */ -// static int pager_end_transaction(Pager *pPager, int hasSuper, int bCommit){ -// int rc = SQLITE_OK; /* Error code from journal finalization operation */ -// int rc2 = SQLITE_OK; /* Error code from db file unlock operation */ - -// /* Do nothing if the pager does not have an open write transaction -// ** or at least a RESERVED lock. This function may be called when there -// ** is no write-transaction active but a RESERVED or greater lock is -// ** held under two circumstances: -// ** -// ** 1. After a successful hot-journal rollback, it is called with -// ** eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK. -// ** -// ** 2. If a connection with locking_mode=exclusive holding an EXCLUSIVE -// ** lock switches back to locking_mode=normal and then executes a -// ** read-transaction, this function is called with eState==PAGER_READER -// ** and eLock==EXCLUSIVE_LOCK when the read-transaction is closed. -// */ -// assert( assert_pager_state(pPager) ); -// assert( pPager->eState!=PAGER_ERROR ); -// if( pPager->eStateeLockjfd) || pPager->pInJournal==0 -// || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_BATCH_ATOMIC) -// ); -// if( isOpen(pPager->jfd) ){ -// assert( !pagerUseWal(pPager) ); - -// /* Finalize the journal file. */ -// if( sqlite3JournalIsInMemory(pPager->jfd) ){ -// /* assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); */ -// sqlite3OsClose(pPager->jfd); -// }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){ -// if( pPager->journalOff==0 ){ -// rc = SQLITE_OK; -// }else{ -// rc = sqlite3OsTruncate(pPager->jfd, 0); -// if( rc==SQLITE_OK && pPager->fullSync ){ -// /* Make sure the new file size is written into the inode right away. -// ** Otherwise the journal might resurrect following a power loss and -// ** cause the last transaction to roll back. See -// ** https://bugzilla.mozilla.org/show_bug.cgi?id=1072773 -// */ -// rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); -// } -// } -// pPager->journalOff = 0; -// }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST -// || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL) -// ){ -// rc = zeroJournalHdr(pPager, hasSuper||pPager->tempFile); -// pPager->journalOff = 0; -// }else{ -// /* This branch may be executed with Pager.journalMode==MEMORY if -// ** a hot-journal was just rolled back. In this case the journal -// ** file should be closed and deleted. If this connection writes to -// ** the database file, it will do so using an in-memory journal. -// */ -// int bDelete = !pPager->tempFile; -// assert( sqlite3JournalIsInMemory(pPager->jfd)==0 ); -// assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE -// || pPager->journalMode==PAGER_JOURNALMODE_MEMORY -// || pPager->journalMode==PAGER_JOURNALMODE_WAL -// ); -// sqlite3OsClose(pPager->jfd); -// if( bDelete ){ -// rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, pPager->extraSync); -// } -// } -// } - -// #ifdef SQLITE_CHECK_PAGES -// sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash); -// if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){ -// PgHdr *p = sqlite3PagerLookup(pPager, 1); -// if( p ){ -// p->pageHash = 0; -// sqlite3PagerUnrefNotNull(p); -// } -// } -// #endif - -// sqlite3BitvecDestroy(pPager->pInJournal); -// pPager->pInJournal = 0; -// pPager->nRec = 0; -// if( rc==SQLITE_OK ){ -// if( MEMDB || pagerFlushOnCommit(pPager, bCommit) ){ -// sqlite3PcacheCleanAll(pPager->pPCache); -// }else{ -// sqlite3PcacheClearWritable(pPager->pPCache); -// } -// sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize); -// } - -// if( pagerUseWal(pPager) ){ -// /* Drop the WAL write-lock, if any. Also, if the connection was in -// ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE -// ** lock held on the database file. -// */ -// rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); -// assert( rc2==SQLITE_OK ); -// }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){ -// /* This branch is taken when committing a transaction in rollback-journal -// ** mode if the database file on disk is larger than the database image. -// ** At this point the journal has been finalized and the transaction -// ** successfully committed, but the EXCLUSIVE lock is still held on the -// ** file. So it is safe to truncate the database file to its minimum -// ** required size. */ -// assert( pPager->eLock==EXCLUSIVE_LOCK ); -// rc = pager_truncate(pPager, pPager->dbSize); -// } - -// if( rc==SQLITE_OK && bCommit ){ -// rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0); -// if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; -// } - -// if( !pPager->exclusiveMode -// && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) -// ){ -// rc2 = pagerUnlockDb(pPager, SHARED_LOCK); -// } -// pPager->eState = PAGER_READER; -// pPager->setSuper = 0; - -// return (rc==SQLITE_OK?rc2:rc); -// } - -// /* -// ** Execute a rollback if a transaction is active and unlock the -// ** database file. -// ** -// ** If the pager has already entered the ERROR state, do not attempt -// ** the rollback at this time. Instead, pager_unlock() is called. The -// ** call to pager_unlock() will discard all in-memory pages, unlock -// ** the database file and move the pager back to OPEN state. If this -// ** means that there is a hot-journal left in the file-system, the next -// ** connection to obtain a shared lock on the pager (which may be this one) -// ** will roll it back. -// ** -// ** If the pager has not already entered the ERROR state, but an IO or -// ** malloc error occurs during a rollback, then this will itself cause -// ** the pager to enter the ERROR state. Which will be cleared by the -// ** call to pager_unlock(), as described above. -// */ -// static void pagerUnlockAndRollback(Pager *pPager){ -// if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){ -// assert( assert_pager_state(pPager) ); -// if( pPager->eState>=PAGER_WRITER_LOCKED ){ -// sqlite3BeginBenignMalloc(); -// sqlite3PagerRollback(pPager); -// sqlite3EndBenignMalloc(); -// }else if( !pPager->exclusiveMode ){ -// assert( pPager->eState==PAGER_READER ); -// pager_end_transaction(pPager, 0, 0); -// } -// } -// pager_unlock(pPager); -// } - -// /* -// ** Parameter aData must point to a buffer of pPager->pageSize bytes -// ** of data. Compute and return a checksum based ont the contents of the -// ** page of data and the current value of pPager->cksumInit. -// ** -// ** This is not a real checksum. It is really just the sum of the -// ** random initial value (pPager->cksumInit) and every 200th byte -// ** of the page data, starting with byte offset (pPager->pageSize%200). -// ** Each byte is interpreted as an 8-bit unsigned integer. -// ** -// ** Changing the formula used to compute this checksum results in an -// ** incompatible journal file format. -// ** -// ** If journal corruption occurs due to a power failure, the most likely -// ** scenario is that one end or the other of the record will be changed. -// ** It is much less likely that the two ends of the journal record will be -// ** correct and the middle be corrupt. Thus, this "checksum" scheme, -// ** though fast and simple, catches the mostly likely kind of corruption. -// */ -// static u32 pager_cksum(Pager *pPager, const u8 *aData){ -// u32 cksum = pPager->cksumInit; /* Checksum value to return */ -// int i = pPager->pageSize-200; /* Loop counter */ -// while( i>0 ){ -// cksum += aData[i]; -// i -= 200; -// } -// return cksum; -// } - -// /* -// ** Read a single page from either the journal file (if isMainJrnl==1) or -// ** from the sub-journal (if isMainJrnl==0) and playback that page. -// ** The page begins at offset *pOffset into the file. The *pOffset -// ** value is increased to the start of the next page in the journal. -// ** -// ** The main rollback journal uses checksums - the statement journal does -// ** not. -// ** -// ** If the page number of the page record read from the (sub-)journal file -// ** is greater than the current value of Pager.dbSize, then playback is -// ** skipped and SQLITE_OK is returned. -// ** -// ** If pDone is not NULL, then it is a record of pages that have already -// ** been played back. If the page at *pOffset has already been played back -// ** (if the corresponding pDone bit is set) then skip the playback. -// ** Make sure the pDone bit corresponding to the *pOffset page is set -// ** prior to returning. -// ** -// ** If the page record is successfully read from the (sub-)journal file -// ** and played back, then SQLITE_OK is returned. If an IO error occurs -// ** while reading the record from the (sub-)journal file or while writing -// ** to the database file, then the IO error code is returned. If data -// ** is successfully read from the (sub-)journal file but appears to be -// ** corrupted, SQLITE_DONE is returned. Data is considered corrupted in -// ** two circumstances: -// ** -// ** * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or -// ** * If the record is being rolled back from the main journal file -// ** and the checksum field does not match the record content. -// ** -// ** Neither of these two scenarios are possible during a savepoint rollback. -// ** -// ** If this is a savepoint rollback, then memory may have to be dynamically -// ** allocated by this function. If this is the case and an allocation fails, -// ** SQLITE_NOMEM is returned. -// */ -// static int pager_playback_one_page( -// Pager *pPager, /* The pager being played back */ -// i64 *pOffset, /* Offset of record to playback */ -// Bitvec *pDone, /* Bitvec of pages already played back */ -// int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */ -// int isSavepnt /* True for a savepoint rollback */ -// ){ -// int rc; -// PgHdr *pPg; /* An existing page in the cache */ -// Pgno pgno; /* The page number of a page in journal */ -// u32 cksum; /* Checksum used for sanity checking */ -// char *aData; /* Temporary storage for the page */ -// sqlite3_file *jfd; /* The file descriptor for the journal file */ -// int isSynced; /* True if journal page is synced */ - -// assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */ -// assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */ -// assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */ -// assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */ - -// aData = pPager->pTmpSpace; -// assert( aData ); /* Temp storage must have already been allocated */ -// assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) ); - -// /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction -// ** or savepoint rollback done at the request of the caller) or this is -// ** a hot-journal rollback. If it is a hot-journal rollback, the pager -// ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback -// ** only reads from the main journal, not the sub-journal. -// */ -// assert( pPager->eState>=PAGER_WRITER_CACHEMOD -// || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK) -// ); -// assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl ); - -// /* Read the page number and page data from the journal or sub-journal -// ** file. Return an error code to the caller if an IO error occurs. -// */ -// jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; -// rc = read32bits(jfd, *pOffset, &pgno); -// if( rc!=SQLITE_OK ) return rc; -// rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4); -// if( rc!=SQLITE_OK ) return rc; -// *pOffset += pPager->pageSize + 4 + isMainJrnl*4; - -// /* Sanity checking on the page. This is more important that I originally -// ** thought. If a power failure occurs while the journal is being written, -// ** it could cause invalid data to be written into the journal. We need to -// ** detect this invalid data (with high probability) and ignore it. -// */ -// if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ -// assert( !isSavepnt ); -// return SQLITE_DONE; -// } -// if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){ -// return SQLITE_OK; -// } -// if( isMainJrnl ){ -// rc = read32bits(jfd, (*pOffset)-4, &cksum); -// if( rc ) return rc; -// if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){ -// return SQLITE_DONE; -// } -// } - -// /* If this page has already been played back before during the current -// ** rollback, then don't bother to play it back again. -// */ -// if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ -// return rc; -// } - -// /* When playing back page 1, restore the nReserve setting -// */ -// if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){ -// pPager->nReserve = ((u8*)aData)[20]; -// } - -// /* If the pager is in CACHEMOD state, then there must be a copy of this -// ** page in the pager cache. In this case just update the pager cache, -// ** not the database file. The page is left marked dirty in this case. -// ** -// ** An exception to the above rule: If the database is in no-sync mode -// ** and a page is moved during an incremental vacuum then the page may -// ** not be in the pager cache. Later: if a malloc() or IO error occurs -// ** during a Movepage() call, then the page may not be in the cache -// ** either. So the condition described in the above paragraph is not -// ** assert()able. -// ** -// ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the -// ** pager cache if it exists and the main file. The page is then marked -// ** not dirty. Since this code is only executed in PAGER_OPEN state for -// ** a hot-journal rollback, it is guaranteed that the page-cache is empty -// ** if the pager is in OPEN state. -// ** -// ** Ticket #1171: The statement journal might contain page content that is -// ** different from the page content at the start of the transaction. -// ** This occurs when a page is changed prior to the start of a statement -// ** then changed again within the statement. When rolling back such a -// ** statement we must not write to the original database unless we know -// ** for certain that original page contents are synced into the main rollback -// ** journal. Otherwise, a power loss might leave modified data in the -// ** database file without an entry in the rollback journal that can -// ** restore the database to its original form. Two conditions must be -// ** met before writing to the database files. (1) the database must be -// ** locked. (2) we know that the original page content is fully synced -// ** in the main journal either because the page is not in cache or else -// ** the page is marked as needSync==0. -// ** -// ** 2008-04-14: When attempting to vacuum a corrupt database file, it -// ** is possible to fail a statement on a database that does not yet exist. -// ** Do not attempt to write if database file has never been opened. -// */ -// if( pagerUseWal(pPager) ){ -// pPg = 0; -// }else{ -// pPg = sqlite3PagerLookup(pPager, pgno); -// } -// assert( pPg || !MEMDB ); -// assert( pPager->eState!=PAGER_OPEN || pPg==0 || pPager->tempFile ); -// PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", -// PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData), -// (isMainJrnl?"main-journal":"sub-journal") -// )); -// if( isMainJrnl ){ -// isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr); -// }else{ -// isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC)); -// } -// if( isOpen(pPager->fd) -// && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) -// && isSynced -// ){ -// i64 ofst = (pgno-1)*(i64)pPager->pageSize; -// testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); -// assert( !pagerUseWal(pPager) ); - -// /* Write the data read from the journal back into the database file. -// ** This is usually safe even for an encrypted database - as the data -// ** was encrypted before it was written to the journal file. The exception -// ** is if the data was just read from an in-memory sub-journal. In that -// ** case it must be encrypted here before it is copied into the database -// ** file. */ -// rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst); - -// if( pgno>pPager->dbFileSize ){ -// pPager->dbFileSize = pgno; -// } -// if( pPager->pBackup ){ -// sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData); -// } -// }else if( !isMainJrnl && pPg==0 ){ -// /* If this is a rollback of a savepoint and data was not written to -// ** the database and the page is not in-memory, there is a potential -// ** problem. When the page is next fetched by the b-tree layer, it -// ** will be read from the database file, which may or may not be -// ** current. -// ** -// ** There are a couple of different ways this can happen. All are quite -// ** obscure. When running in synchronous mode, this can only happen -// ** if the page is on the free-list at the start of the transaction, then -// ** populated, then moved using sqlite3PagerMovepage(). -// ** -// ** The solution is to add an in-memory page to the cache containing -// ** the data just read from the sub-journal. Mark the page as dirty -// ** and if the pager requires a journal-sync, then mark the page as -// ** requiring a journal-sync before it is written. -// */ -// assert( isSavepnt ); -// assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)==0 ); -// pPager->doNotSpill |= SPILLFLAG_ROLLBACK; -// rc = sqlite3PagerGet(pPager, pgno, &pPg, 1); -// assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)!=0 ); -// pPager->doNotSpill &= ~SPILLFLAG_ROLLBACK; -// if( rc!=SQLITE_OK ) return rc; -// sqlite3PcacheMakeDirty(pPg); -// } -// if( pPg ){ -// /* No page should ever be explicitly rolled back that is in use, except -// ** for page 1 which is held in use in order to keep the lock on the -// ** database active. However such a page may be rolled back as a result -// ** of an internal error resulting in an automatic call to -// ** sqlite3PagerRollback(). -// */ -// void *pData; -// pData = pPg->pData; -// memcpy(pData, (u8*)aData, pPager->pageSize); -// pPager->xReiniter(pPg); -// /* It used to be that sqlite3PcacheMakeClean(pPg) was called here. But -// ** that call was dangerous and had no detectable benefit since the cache -// ** is normally cleaned by sqlite3PcacheCleanAll() after rollback and so -// ** has been removed. */ -// pager_set_pagehash(pPg); - -// /* If this was page 1, then restore the value of Pager.dbFileVers. -// ** Do this before any decoding. */ -// if( pgno==1 ){ -// memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); -// } -// sqlite3PcacheRelease(pPg); -// } -// return rc; -// } - -// /* -// ** Parameter zSuper is the name of a super-journal file. A single journal -// ** file that referred to the super-journal file has just been rolled back. -// ** This routine checks if it is possible to delete the super-journal file, -// ** and does so if it is. -// ** -// ** Argument zSuper may point to Pager.pTmpSpace. So that buffer is not -// ** available for use within this function. -// ** -// ** When a super-journal file is created, it is populated with the names -// ** of all of its child journals, one after another, formatted as utf-8 -// ** encoded text. The end of each child journal file is marked with a -// ** nul-terminator byte (0x00). i.e. the entire contents of a super-journal -// ** file for a transaction involving two databases might be: -// ** -// ** "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00" -// ** -// ** A super-journal file may only be deleted once all of its child -// ** journals have been rolled back. -// ** -// ** This function reads the contents of the super-journal file into -// ** memory and loops through each of the child journal names. For -// ** each child journal, it checks if: -// ** -// ** * if the child journal exists, and if so -// ** * if the child journal contains a reference to super-journal -// ** file zSuper -// ** -// ** If a child journal can be found that matches both of the criteria -// ** above, this function returns without doing anything. Otherwise, if -// ** no such child journal can be found, file zSuper is deleted from -// ** the file-system using sqlite3OsDelete(). -// ** -// ** If an IO error within this function, an error code is returned. This -// ** function allocates memory by calling sqlite3Malloc(). If an allocation -// ** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors -// ** occur, SQLITE_OK is returned. -// ** -// ** TODO: This function allocates a single block of memory to load -// ** the entire contents of the super-journal file. This could be -// ** a couple of kilobytes or so - potentially larger than the page -// ** size. -// */ -// static int pager_delsuper(Pager *pPager, const char *zSuper){ -// sqlite3_vfs *pVfs = pPager->pVfs; -// int rc; /* Return code */ -// sqlite3_file *pSuper; /* Malloc'd super-journal file descriptor */ -// sqlite3_file *pJournal; /* Malloc'd child-journal file descriptor */ -// char *zSuperJournal = 0; /* Contents of super-journal file */ -// i64 nSuperJournal; /* Size of super-journal file */ -// char *zJournal; /* Pointer to one journal within MJ file */ -// char *zSuperPtr; /* Space to hold super-journal filename */ -// char *zFree = 0; /* Free this buffer */ -// int nSuperPtr; /* Amount of space allocated to zSuperPtr[] */ - -// /* Allocate space for both the pJournal and pSuper file descriptors. -// ** If successful, open the super-journal file for reading. -// */ -// pSuper = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2); -// if( !pSuper ){ -// rc = SQLITE_NOMEM; -// pJournal = 0; -// }else{ -// const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_SUPER_JOURNAL); -// rc = sqlite3OsOpen(pVfs, zSuper, pSuper, flags, 0); -// pJournal = (sqlite3_file *)(((u8 *)pSuper) + pVfs->szOsFile); -// } -// if( rc!=SQLITE_OK ) goto delsuper_out; - -// /* Load the entire super-journal file into space obtained from -// ** sqlite3_malloc() and pointed to by zSuperJournal. Also obtain -// ** sufficient space (in zSuperPtr) to hold the names of super-journal -// ** files extracted from regular rollback-journals. -// */ -// rc = sqlite3OsFileSize(pSuper, &nSuperJournal); -// if( rc!=SQLITE_OK ) goto delsuper_out; -// nSuperPtr = pVfs->mxPathname+1; -// zFree = sqlite3Malloc(4 + nSuperJournal + nSuperPtr + 2); -// if( !zFree ){ -// rc = SQLITE_NOMEM; -// goto delsuper_out; -// } -// zFree[0] = zFree[1] = zFree[2] = zFree[3] = 0; -// zSuperJournal = &zFree[4]; -// zSuperPtr = &zSuperJournal[nSuperJournal+2]; -// rc = sqlite3OsRead(pSuper, zSuperJournal, (int)nSuperJournal, 0); -// if( rc!=SQLITE_OK ) goto delsuper_out; -// zSuperJournal[nSuperJournal] = 0; -// zSuperJournal[nSuperJournal+1] = 0; - -// zJournal = zSuperJournal; -// while( (zJournal-zSuperJournal)pageSize bytes). -// ** If the file on disk is currently larger than nPage pages, then use the VFS -// ** xTruncate() method to truncate it. -// ** -// ** Or, it might be the case that the file on disk is smaller than -// ** nPage pages. Some operating system implementations can get confused if -// ** you try to truncate a file to some size that is larger than it -// ** currently is, so detect this case and write a single zero byte to -// ** the end of the new file instead. -// ** -// ** If successful, return SQLITE_OK. If an IO error occurs while modifying -// ** the database file, return the error code to the caller. -// */ -// static int pager_truncate(Pager *pPager, Pgno nPage){ -// int rc = SQLITE_OK; -// assert( pPager->eState!=PAGER_ERROR ); -// assert( pPager->eState!=PAGER_READER ); - -// if( isOpen(pPager->fd) -// && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) -// ){ -// i64 currentSize, newSize; -// int szPage = pPager->pageSize; -// assert( pPager->eLock==EXCLUSIVE_LOCK ); -// /* TODO: Is it safe to use Pager.dbFileSize here? */ -// rc = sqlite3OsFileSize(pPager->fd, ¤tSize); -// newSize = szPage*(i64)nPage; -// if( rc==SQLITE_OK && currentSize!=newSize ){ -// if( currentSize>newSize ){ -// rc = sqlite3OsTruncate(pPager->fd, newSize); -// }else if( (currentSize+szPage)<=newSize ){ -// char *pTmp = pPager->pTmpSpace; -// memset(pTmp, 0, szPage); -// testcase( (newSize-szPage) == currentSize ); -// testcase( (newSize-szPage) > currentSize ); -// rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage); -// } -// if( rc==SQLITE_OK ){ -// pPager->dbFileSize = nPage; -// } -// } -// } -// return rc; -// } - -// /* -// ** Return a sanitized version of the sector-size of OS file pFile. The -// ** return value is guaranteed to lie between 32 and MAX_SECTOR_SIZE. -// */ -// int sqlite3SectorSize(sqlite3_file *pFile){ -// int iRet = sqlite3OsSectorSize(pFile); -// if( iRet<32 ){ -// iRet = 512; -// }else if( iRet>MAX_SECTOR_SIZE ){ -// assert( MAX_SECTOR_SIZE>=512 ); -// iRet = MAX_SECTOR_SIZE; -// } -// return iRet; -// } - -// /* -// ** Set the value of the Pager.sectorSize variable for the given -// ** pager based on the value returned by the xSectorSize method -// ** of the open database file. The sector size will be used -// ** to determine the size and alignment of journal header and -// ** super-journal pointers within created journal files. -// ** -// ** For temporary files the effective sector size is always 512 bytes. -// ** -// ** Otherwise, for non-temporary files, the effective sector size is -// ** the value returned by the xSectorSize() method rounded up to 32 if -// ** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it -// ** is greater than MAX_SECTOR_SIZE. -// ** -// ** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set -// ** the effective sector size to its minimum value (512). The purpose of -// ** pPager->sectorSize is to define the "blast radius" of bytes that -// ** might change if a crash occurs while writing to a single byte in -// ** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero -// ** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector -// ** size. For backwards compatibility of the rollback journal file format, -// ** we cannot reduce the effective sector size below 512. -// */ -// static void setSectorSize(Pager *pPager){ -// assert( isOpen(pPager->fd) || pPager->tempFile ); - -// if( pPager->tempFile -// || (sqlite3OsDeviceCharacteristics(pPager->fd) & -// SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0 -// ){ -// /* Sector size doesn't matter for temporary files. Also, the file -// ** may not have been opened yet, in which case the OsSectorSize() -// ** call will segfault. */ -// pPager->sectorSize = 512; -// }else{ -// pPager->sectorSize = sqlite3SectorSize(pPager->fd); -// } -// } - -// /* -// ** Playback the journal and thus restore the database file to -// ** the state it was in before we started making changes. -// ** -// ** The journal file format is as follows: -// ** -// ** (1) 8 byte prefix. A copy of aJournalMagic[]. -// ** (2) 4 byte big-endian integer which is the number of valid page records -// ** in the journal. If this value is 0xffffffff, then compute the -// ** number of page records from the journal size. -// ** (3) 4 byte big-endian integer which is the initial value for the -// ** sanity checksum. -// ** (4) 4 byte integer which is the number of pages to truncate the -// ** database to during a rollback. -// ** (5) 4 byte big-endian integer which is the sector size. The header -// ** is this many bytes in size. -// ** (6) 4 byte big-endian integer which is the page size. -// ** (7) zero padding out to the next sector size. -// ** (8) Zero or more pages instances, each as follows: -// ** + 4 byte page number. -// ** + pPager->pageSize bytes of data. -// ** + 4 byte checksum -// ** -// ** When we speak of the journal header, we mean the first 7 items above. -// ** Each entry in the journal is an instance of the 8th item. -// ** -// ** Call the value from the second bullet "nRec". nRec is the number of -// ** valid page entries in the journal. In most cases, you can compute the -// ** value of nRec from the size of the journal file. But if a power -// ** failure occurred while the journal was being written, it could be the -// ** case that the size of the journal file had already been increased but -// ** the extra entries had not yet made it safely to disk. In such a case, -// ** the value of nRec computed from the file size would be too large. For -// ** that reason, we always use the nRec value in the header. -// ** -// ** If the nRec value is 0xffffffff it means that nRec should be computed -// ** from the file size. This value is used when the user selects the -// ** no-sync option for the journal. A power failure could lead to corruption -// ** in this case. But for things like temporary table (which will be -// ** deleted when the power is restored) we don't care. -// ** -// ** If the file opened as the journal file is not a well-formed -// ** journal file then all pages up to the first corrupted page are rolled -// ** back (or no pages if the journal header is corrupted). The journal file -// ** is then deleted and SQLITE_OK returned, just as if no corruption had -// ** been encountered. -// ** -// ** If an I/O or malloc() error occurs, the journal-file is not deleted -// ** and an error code is returned. -// ** -// ** The isHot parameter indicates that we are trying to rollback a journal -// ** that might be a hot journal. Or, it could be that the journal is -// ** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE. -// ** If the journal really is hot, reset the pager cache prior rolling -// ** back any content. If the journal is merely persistent, no reset is -// ** needed. -// */ -// static int pager_playback(Pager *pPager, int isHot){ -// sqlite3_vfs *pVfs = pPager->pVfs; -// i64 szJ; /* Size of the journal file in bytes */ -// u32 nRec; /* Number of Records in the journal */ -// u32 u; /* Unsigned loop counter */ -// Pgno mxPg = 0; /* Size of the original file in pages */ -// int rc; /* Result code of a subroutine */ -// int res = 1; /* Value returned by sqlite3OsAccess() */ -// char *zSuper = 0; /* Name of super-journal file if any */ -// int needPagerReset; /* True to reset page prior to first page rollback */ -// int nPlayback = 0; /* Total number of pages restored from journal */ -// u32 savedPageSize = pPager->pageSize; - -// /* Figure out how many records are in the journal. Abort early if -// ** the journal is empty. -// */ -// assert( isOpen(pPager->jfd) ); -// rc = sqlite3OsFileSize(pPager->jfd, &szJ); -// if( rc!=SQLITE_OK ){ -// goto end_playback; -// } - -// /* Read the super-journal name from the journal, if it is present. -// ** If a super-journal file name is specified, but the file is not -// ** present on disk, then the journal is not hot and does not need to be -// ** played back. -// ** -// ** TODO: Technically the following is an error because it assumes that -// ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that -// ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c, -// ** mxPathname is 512, which is the same as the minimum allowable value -// ** for pageSize. -// */ -// zSuper = pPager->pTmpSpace; -// rc = readSuperJournal(pPager->jfd, zSuper, pPager->pVfs->mxPathname+1); -// if( rc==SQLITE_OK && zSuper[0] ){ -// rc = sqlite3OsAccess(pVfs, zSuper, SQLITE_ACCESS_EXISTS, &res); -// } -// zSuper = 0; -// if( rc!=SQLITE_OK || !res ){ -// goto end_playback; -// } -// pPager->journalOff = 0; -// needPagerReset = isHot; - -// /* This loop terminates either when a readJournalHdr() or -// ** pager_playback_one_page() call returns SQLITE_DONE or an IO error -// ** occurs. -// */ -// while( 1 ){ -// /* Read the next journal header from the journal file. If there are -// ** not enough bytes left in the journal file for a complete header, or -// ** it is corrupted, then a process must have failed while writing it. -// ** This indicates nothing more needs to be rolled back. -// */ -// rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg); -// if( rc!=SQLITE_OK ){ -// if( rc==SQLITE_DONE ){ -// rc = SQLITE_OK; -// } -// goto end_playback; -// } - -// /* If nRec is 0xffffffff, then this journal was created by a process -// ** working in no-sync mode. This means that the rest of the journal -// ** file consists of pages, there are no more journal headers. Compute -// ** the value of nRec based on this assumption. -// */ -// if( nRec==0xffffffff ){ -// assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); -// nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)); -// } - -// /* If nRec is 0 and this rollback is of a transaction created by this -// ** process and if this is the final header in the journal, then it means -// ** that this part of the journal was being filled but has not yet been -// ** synced to disk. Compute the number of pages based on the remaining -// ** size of the file. -// ** -// ** The third term of the test was added to fix ticket #2565. -// ** When rolling back a hot journal, nRec==0 always means that the next -// ** chunk of the journal contains zero pages to be rolled back. But -// ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in -// ** the journal, it means that the journal might contain additional -// ** pages that need to be rolled back and that the number of pages -// ** should be computed based on the journal file size. -// */ -// if( nRec==0 && !isHot && -// pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ -// nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager)); -// } - -// /* If this is the first header read from the journal, truncate the -// ** database file back to its original size. -// */ -// if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ -// rc = pager_truncate(pPager, mxPg); -// if( rc!=SQLITE_OK ){ -// goto end_playback; -// } -// pPager->dbSize = mxPg; -// } - -// /* Copy original pages out of the journal and back into the -// ** database file and/or page cache. -// */ -// for(u=0; ujournalOff,0,1,0); -// if( rc==SQLITE_OK ){ -// nPlayback++; -// }else{ -// if( rc==SQLITE_DONE ){ -// pPager->journalOff = szJ; -// break; -// }else if( rc==SQLITE_IOERR_SHORT_READ ){ -// /* If the journal has been truncated, simply stop reading and -// ** processing the journal. This might happen if the journal was -// ** not completely written and synced prior to a crash. In that -// ** case, the database should have never been written in the -// ** first place so it is OK to simply abandon the rollback. */ -// rc = SQLITE_OK; -// goto end_playback; -// }else{ -// /* If we are unable to rollback, quit and return the error -// ** code. This will cause the pager to enter the error state -// ** so that no further harm will be done. Perhaps the next -// ** process to come along will be able to rollback the database. -// */ -// goto end_playback; -// } -// } -// } -// } -// /*NOTREACHED*/ -// assert( 0 ); - -// end_playback: -// if( rc==SQLITE_OK ){ -// rc = sqlite3PagerSetPagesize(pPager, &savedPageSize, -1); -// } -// /* Following a rollback, the database file should be back in its original -// ** state prior to the start of the transaction, so invoke the -// ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the -// ** assertion that the transaction counter was modified. -// */ -// #ifdef SQLITE_DEBUG -// sqlite3OsFileControlHint(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0); -// #endif - -// /* If this playback is happening automatically as a result of an IO or -// ** malloc error that occurred after the change-counter was updated but -// ** before the transaction was committed, then the change-counter -// ** modification may just have been reverted. If this happens in exclusive -// ** mode, then subsequent transactions performed by the connection will not -// ** update the change-counter at all. This may lead to cache inconsistency -// ** problems for other processes at some point in the future. So, just -// ** in case this has happened, clear the changeCountDone flag now. -// */ -// pPager->changeCountDone = pPager->tempFile; - -// if( rc==SQLITE_OK ){ -// /* Leave 4 bytes of space before the super-journal filename in memory. -// ** This is because it may end up being passed to sqlite3OsOpen(), in -// ** which case it requires 4 0x00 bytes in memory immediately before -// ** the filename. */ -// zSuper = &pPager->pTmpSpace[4]; -// rc = readSuperJournal(pPager->jfd, zSuper, pPager->pVfs->mxPathname+1); -// testcase( rc!=SQLITE_OK ); -// } -// if( rc==SQLITE_OK -// && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) -// ){ -// rc = sqlite3PagerSync(pPager, 0); -// } -// if( rc==SQLITE_OK ){ -// rc = pager_end_transaction(pPager, zSuper[0]!='\0', 0); -// testcase( rc!=SQLITE_OK ); -// } -// if( rc==SQLITE_OK && zSuper[0] && res ){ -// /* If there was a super-journal and this routine will return success, -// ** see if it is possible to delete the super-journal. -// */ -// assert( zSuper==&pPager->pTmpSpace[4] ); -// memset(&zSuper[-4], 0, 4); -// rc = pager_delsuper(pPager, zSuper); -// testcase( rc!=SQLITE_OK ); -// } -// if( isHot && nPlayback ){ -// sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s", -// nPlayback, pPager->zJournal); -// } - -// /* The Pager.sectorSize variable may have been updated while rolling -// ** back a journal created by a process with a different sector size -// ** value. Reset it to the correct value for this process. -// */ -// setSectorSize(pPager); -// return rc; -// } - -// /* -// ** Read the content for page pPg out of the database file (or out of -// ** the WAL if that is where the most recent copy if found) into -// ** pPg->pData. A shared lock or greater must be held on the database -// ** file before this function is called. -// ** -// ** If page 1 is read, then the value of Pager.dbFileVers[] is set to -// ** the value read from the database file. -// ** -// ** If an IO error occurs, then the IO error is returned to the caller. -// ** Otherwise, SQLITE_OK is returned. -// */ -// static int readDbPage(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ -// int rc = SQLITE_OK; /* Return code */ - -// #ifndef SQLITE_OMIT_WAL -// u32 iFrame = 0; /* Frame of WAL containing pgno */ - -// assert( pPager->eState>=PAGER_READER && !MEMDB ); -// assert( isOpen(pPager->fd) ); - -// if( pagerUseWal(pPager) ){ -// rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); -// if( rc ) return rc; -// } -// if( iFrame ){ -// rc = sqlite3WalReadFrame(pPager->pWal, iFrame,pPager->pageSize,pPg->pData); -// }else -// #endif -// { -// i64 iOffset = (pPg->pgno-1)*(i64)pPager->pageSize; -// rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); -// if( rc==SQLITE_IOERR_SHORT_READ ){ -// rc = SQLITE_OK; -// } -// } - -// if( pPg->pgno==1 ){ -// if( rc ){ -// /* If the read is unsuccessful, set the dbFileVers[] to something -// ** that will never be a valid file version. dbFileVers[] is a copy -// ** of bytes 24..39 of the database. Bytes 28..31 should always be -// ** zero or the size of the database in page. Bytes 32..35 and 35..39 -// ** should be page numbers which are never 0xffffffff. So filling -// ** pPager->dbFileVers[] with all 0xff bytes should suffice. -// ** -// ** For an encrypted database, the situation is more complex: bytes -// ** 24..39 of the database are white noise. But the probability of -// ** white noise equaling 16 bytes of 0xff is vanishingly small so -// ** we should still be ok. -// */ -// memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); -// }else{ -// u8 *dbFileVers = &((u8*)pPg->pData)[24]; -// memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); -// } -// } -// PAGER_INCR(sqlite3_pager_readdb_count); -// PAGER_INCR(pPager->nRead); -// IOTRACE(("PGIN %p %d\n", pPager, pPg->pgno)); -// PAGERTRACE(("FETCH %d page %d hash(%08x)\n", -// PAGERID(pPager), pPg->pgno, pager_pagehash(pPg))); - -// return rc; -// } - -// /* -// ** Update the value of the change-counter at offsets 24 and 92 in -// ** the header and the sqlite version number at offset 96. -// ** -// ** This is an unconditional update. See also the pager_incr_changecounter() -// ** routine which only updates the change-counter if the update is actually -// ** needed, as determined by the pPager->changeCountDone state variable. -// */ -// static void pager_write_changecounter(PgHdr *pPg){ -// u32 change_counter; -// if( NEVER(pPg==0) ) return; - -// /* Increment the value just read and write it back to byte 24. */ -// change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1; -// put32bits(((char*)pPg->pData)+24, change_counter); - -// /* Also store the SQLite version number in bytes 96..99 and in -// ** bytes 92..95 store the change counter for which the version number -// ** is valid. */ -// put32bits(((char*)pPg->pData)+92, change_counter); -// put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER); -// } - -// #ifndef SQLITE_OMIT_WAL -// /* -// ** This function is invoked once for each page that has already been -// ** written into the log file when a WAL transaction is rolled back. -// ** Parameter iPg is the page number of said page. The pCtx argument -// ** is actually a pointer to the Pager structure. -// ** -// ** If page iPg is present in the cache, and has no outstanding references, -// ** it is discarded. Otherwise, if there are one or more outstanding -// ** references, the page content is reloaded from the database. If the -// ** attempt to reload content from the database is required and fails, -// ** return an SQLite error code. Otherwise, SQLITE_OK. -// */ -// static int pagerUndoCallback(void *pCtx, Pgno iPg){ -// int rc = SQLITE_OK; -// Pager *pPager = (Pager *)pCtx; -// PgHdr *pPg; - -// assert( pagerUseWal(pPager) ); -// pPg = sqlite3PagerLookup(pPager, iPg); -// if( pPg ){ -// if( sqlite3PcachePageRefcount(pPg)==1 ){ -// sqlite3PcacheDrop(pPg); -// }else{ -// rc = readDbPage(pPg); -// if( rc==SQLITE_OK ){ -// pPager->xReiniter(pPg); -// } -// sqlite3PagerUnrefNotNull(pPg); -// } -// } - -// /* Normally, if a transaction is rolled back, any backup processes are -// ** updated as data is copied out of the rollback journal and into the -// ** database. This is not generally possible with a WAL database, as -// ** rollback involves simply truncating the log file. Therefore, if one -// ** or more frames have already been written to the log (and therefore -// ** also copied into the backup databases) as part of this transaction, -// ** the backups must be restarted. -// */ -// sqlite3BackupRestart(pPager->pBackup); - -// return rc; -// } - -// /* -// ** This function is called to rollback a transaction on a WAL database. -// */ -// static int pagerRollbackWal(Pager *pPager){ -// int rc; /* Return Code */ -// PgHdr *pList; /* List of dirty pages to revert */ - -// /* For all pages in the cache that are currently dirty or have already -// ** been written (but not committed) to the log file, do one of the -// ** following: -// ** -// ** + Discard the cached page (if refcount==0), or -// ** + Reload page content from the database (if refcount>0). -// */ -// pPager->dbSize = pPager->dbOrigSize; -// rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); -// pList = sqlite3PcacheDirtyList(pPager->pPCache); -// while( pList && rc==SQLITE_OK ){ -// PgHdr *pNext = pList->pDirty; -// rc = pagerUndoCallback((void *)pPager, pList->pgno); -// pList = pNext; -// } - -// return rc; -// } - -// /* -// ** This function is a wrapper around sqlite3WalFrames(). As well as logging -// ** the contents of the list of pages headed by pList (connected by pDirty), -// ** this function notifies any active backup processes that the pages have -// ** changed. -// ** -// ** The list of pages passed into this routine is always sorted by page number. -// ** Hence, if page 1 appears anywhere on the list, it will be the first page. -// */ -// static int pagerWalFrames( -// Pager *pPager, /* Pager object */ -// PgHdr *pList, /* List of frames to log */ -// Pgno nTruncate, /* Database size after this commit */ -// int isCommit /* True if this is a commit */ -// ){ -// int rc; /* Return code */ -// int nList; /* Number of pages in pList */ -// PgHdr *p; /* For looping over pages */ - -// assert( pPager->pWal ); -// assert( pList ); -// #ifdef SQLITE_DEBUG -// /* Verify that the page list is in accending order */ -// for(p=pList; p && p->pDirty; p=p->pDirty){ -// assert( p->pgno < p->pDirty->pgno ); -// } -// #endif - -// assert( pList->pDirty==0 || isCommit ); -// if( isCommit ){ -// /* If a WAL transaction is being committed, there is no point in writing -// ** any pages with page numbers greater than nTruncate into the WAL file. -// ** They will never be read by any client. So remove them from the pDirty -// ** list here. */ -// PgHdr **ppNext = &pList; -// nList = 0; -// for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ -// if( p->pgno<=nTruncate ){ -// ppNext = &p->pDirty; -// nList++; -// } -// } -// assert( pList ); -// }else{ -// nList = 1; -// } -// pPager->aStat[PAGER_STAT_WRITE] += nList; - -// if( pList->pgno==1 ) pager_write_changecounter(pList); -// rc = sqlite3WalFrames(pPager->pWal, -// pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags -// ); -// if( rc==SQLITE_OK && pPager->pBackup ){ -// for(p=pList; p; p=p->pDirty){ -// sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); -// } -// } - -// #ifdef SQLITE_CHECK_PAGES -// pList = sqlite3PcacheDirtyList(pPager->pPCache); -// for(p=pList; p; p=p->pDirty){ -// pager_set_pagehash(p); -// } -// #endif - -// return rc; -// } - -// /* -// ** Begin a read transaction on the WAL. -// ** -// ** This routine used to be called "pagerOpenSnapshot()" because it essentially -// ** makes a snapshot of the database at the current point in time and preserves -// ** that snapshot for use by the reader in spite of concurrently changes by -// ** other writers or checkpointers. -// */ -// static int pagerBeginReadTransaction(Pager *pPager){ -// int rc; /* Return code */ -// int changed = 0; /* True if cache must be reset */ - -// assert( pagerUseWal(pPager) ); -// assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); - -// /* sqlite3WalEndReadTransaction() was not called for the previous -// ** transaction in locking_mode=EXCLUSIVE. So call it now. If we -// ** are in locking_mode=NORMAL and EndRead() was previously called, -// ** the duplicate call is harmless. -// */ -// sqlite3WalEndReadTransaction(pPager->pWal); - -// rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); -// if( rc!=SQLITE_OK || changed ){ -// pager_reset(pPager); -// if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); -// } - -// return rc; -// } -// #endif - -// /* -// ** This function is called as part of the transition from PAGER_OPEN -// ** to PAGER_READER state to determine the size of the database file -// ** in pages (assuming the page size currently stored in Pager.pageSize). -// ** -// ** If no error occurs, SQLITE_OK is returned and the size of the database -// ** in pages is stored in *pnPage. Otherwise, an error code (perhaps -// ** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified. -// */ -// static int pagerPagecount(Pager *pPager, Pgno *pnPage){ -// Pgno nPage; /* Value to return via *pnPage */ - -// /* Query the WAL sub-system for the database size. The WalDbsize() -// ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or -// ** if the database size is not available. The database size is not -// ** available from the WAL sub-system if the log file is empty or -// ** contains no valid committed transactions. -// */ -// assert( pPager->eState==PAGER_OPEN ); -// assert( pPager->eLock>=SHARED_LOCK ); -// assert( isOpen(pPager->fd) ); -// assert( pPager->tempFile==0 ); -// nPage = sqlite3WalDbsize(pPager->pWal); - -// /* If the number of pages in the database is not available from the -// ** WAL sub-system, determine the page count based on the size of -// ** the database file. If the size of the database file is not an -// ** integer multiple of the page-size, round up the result. -// */ -// if( nPage==0 && ALWAYS(isOpen(pPager->fd)) ){ -// i64 n = 0; /* Size of db file in bytes */ -// int rc = sqlite3OsFileSize(pPager->fd, &n); -// if( rc!=SQLITE_OK ){ -// return rc; -// } -// nPage = (Pgno)((n+pPager->pageSize-1) / pPager->pageSize); -// } - -// /* If the current number of pages in the file is greater than the -// ** configured maximum pager number, increase the allowed limit so -// ** that the file can be read. -// */ -// if( nPage>pPager->mxPgno ){ -// pPager->mxPgno = (Pgno)nPage; -// } - -// *pnPage = nPage; -// return SQLITE_OK; -// } - -// #ifndef SQLITE_OMIT_WAL -// /* -// ** Check if the *-wal file that corresponds to the database opened by pPager -// ** exists if the database is not empy, or verify that the *-wal file does -// ** not exist (by deleting it) if the database file is empty. -// ** -// ** If the database is not empty and the *-wal file exists, open the pager -// ** in WAL mode. If the database is empty or if no *-wal file exists and -// ** if no error occurs, make sure Pager.journalMode is not set to -// ** PAGER_JOURNALMODE_WAL. -// ** -// ** Return SQLITE_OK or an error code. -// ** -// ** The caller must hold a SHARED lock on the database file to call this -// ** function. Because an EXCLUSIVE lock on the db file is required to delete -// ** a WAL on a none-empty database, this ensures there is no race condition -// ** between the xAccess() below and an xDelete() being executed by some -// ** other connection. -// */ -// static int pagerOpenWalIfPresent(Pager *pPager){ -// int rc = SQLITE_OK; -// assert( pPager->eState==PAGER_OPEN ); -// assert( pPager->eLock>=SHARED_LOCK ); - -// if( !pPager->tempFile ){ -// int isWal; /* True if WAL file exists */ -// rc = sqlite3OsAccess( -// pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal -// ); -// if( rc==SQLITE_OK ){ -// if( isWal ){ -// Pgno nPage; /* Size of the database file */ - -// rc = pagerPagecount(pPager, &nPage); -// if( rc ) return rc; -// if( nPage==0 ){ -// rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0); -// }else{ -// testcase( sqlite3PcachePagecount(pPager->pPCache)==0 ); -// rc = sqlite3PagerOpenWal(pPager, 0); -// } -// }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ -// pPager->journalMode = PAGER_JOURNALMODE_DELETE; -// } -// } -// } -// return rc; -// } -// #endif - -// /* -// ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback -// ** the entire super-journal file. The case pSavepoint==NULL occurs when -// ** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction -// ** savepoint. -// ** -// ** When pSavepoint is not NULL (meaning a non-transaction savepoint is -// ** being rolled back), then the rollback consists of up to three stages, -// ** performed in the order specified: -// ** -// ** * Pages are played back from the main journal starting at byte -// ** offset PagerSavepoint.iOffset and continuing to -// ** PagerSavepoint.iHdrOffset, or to the end of the main journal -// ** file if PagerSavepoint.iHdrOffset is zero. -// ** -// ** * If PagerSavepoint.iHdrOffset is not zero, then pages are played -// ** back starting from the journal header immediately following -// ** PagerSavepoint.iHdrOffset to the end of the main journal file. -// ** -// ** * Pages are then played back from the sub-journal file, starting -// ** with the PagerSavepoint.iSubRec and continuing to the end of -// ** the journal file. -// ** -// ** Throughout the rollback process, each time a page is rolled back, the -// ** corresponding bit is set in a bitvec structure (variable pDone in the -// ** implementation below). This is used to ensure that a page is only -// ** rolled back the first time it is encountered in either journal. -// ** -// ** If pSavepoint is NULL, then pages are only played back from the main -// ** journal file. There is no need for a bitvec in this case. -// ** -// ** In either case, before playback commences the Pager.dbSize variable -// ** is reset to the value that it held at the start of the savepoint -// ** (or transaction). No page with a page-number greater than this value -// ** is played back. If one is encountered it is simply skipped. -// */ -// static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ -// i64 szJ; /* Effective size of the main journal */ -// i64 iHdrOff; /* End of first segment of main-journal records */ -// int rc = SQLITE_OK; /* Return code */ -// Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */ - -// assert( pPager->eState!=PAGER_ERROR ); -// assert( pPager->eState>=PAGER_WRITER_LOCKED ); - -// /* Allocate a bitvec to use to store the set of pages rolled back */ -// if( pSavepoint ){ -// pDone = sqlite3BitvecCreate(pSavepoint->nOrig); -// if( !pDone ){ -// return SQLITE_NOMEM; -// } -// } - -// /* Set the database size back to the value it was before the savepoint -// ** being reverted was opened. -// */ -// pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; -// pPager->changeCountDone = pPager->tempFile; - -// if( !pSavepoint && pagerUseWal(pPager) ){ -// return pagerRollbackWal(pPager); -// } - -// /* Use pPager->journalOff as the effective size of the main rollback -// ** journal. The actual file might be larger than this in -// ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything -// ** past pPager->journalOff is off-limits to us. -// */ -// szJ = pPager->journalOff; -// assert( pagerUseWal(pPager)==0 || szJ==0 ); - -// /* Begin by rolling back records from the main journal starting at -// ** PagerSavepoint.iOffset and continuing to the next journal header. -// ** There might be records in the main journal that have a page number -// ** greater than the current database size (pPager->dbSize) but those -// ** will be skipped automatically. Pages are added to pDone as they -// ** are played back. -// */ -// if( pSavepoint && !pagerUseWal(pPager) ){ -// iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; -// pPager->journalOff = pSavepoint->iOffset; -// while( rc==SQLITE_OK && pPager->journalOffjournalOff, pDone, 1, 1); -// } -// assert( rc!=SQLITE_DONE ); -// }else{ -// pPager->journalOff = 0; -// } - -// /* Continue rolling back records out of the main journal starting at -// ** the first journal header seen and continuing until the effective end -// ** of the main journal file. Continue to skip out-of-range pages and -// ** continue adding pages rolled back to pDone. -// */ -// while( rc==SQLITE_OK && pPager->journalOffjournalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff" -// ** test is related to ticket #2565. See the discussion in the -// ** pager_playback() function for additional information. -// */ -// if( nJRec==0 -// && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff -// ){ -// nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager)); -// } -// for(ii=0; rc==SQLITE_OK && iijournalOffjournalOff, pDone, 1, 1); -// } -// assert( rc!=SQLITE_DONE ); -// } -// assert( rc!=SQLITE_OK || pPager->journalOff>=szJ ); - -// /* Finally, rollback pages from the sub-journal. Page that were -// ** previously rolled back out of the main journal (and are hence in pDone) -// ** will be skipped. Out-of-range pages are also skipped. -// */ -// if( pSavepoint ){ -// u32 ii; /* Loop counter */ -// i64 offset = (i64)pSavepoint->iSubRec*(4+pPager->pageSize); - -// if( pagerUseWal(pPager) ){ -// rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData); -// } -// for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && iinSubRec; ii++){ -// assert( offset==(i64)ii*(4+pPager->pageSize) ); -// rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); -// } -// assert( rc!=SQLITE_DONE ); -// } - -// sqlite3BitvecDestroy(pDone); -// if( rc==SQLITE_OK ){ -// pPager->journalOff = szJ; -// } - -// return rc; -// } - -// /* -// ** Change the maximum number of in-memory pages that are allowed -// ** before attempting to recycle clean and unused pages. -// */ -// void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ -// sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); -// } - -// /* -// ** Change the maximum number of in-memory pages that are allowed -// ** before attempting to spill pages to journal. -// */ -// int sqlite3PagerSetSpillsize(Pager *pPager, int mxPage){ -// return sqlite3PcacheSetSpillsize(pPager->pPCache, mxPage); -// } - -// /* -// ** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap. -// */ -// static void pagerFixMaplimit(Pager *pPager){ -// #if SQLITE_MAX_MMAP_SIZE>0 -// sqlite3_file *fd = pPager->fd; -// if( isOpen(fd) && fd->pMethods->iVersion>=3 ){ -// sqlite3_int64 sz; -// sz = pPager->szMmap; -// pPager->bUseFetch = (sz>0); -// setGetterMethod(pPager); -// sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz); -// } -// #endif -// } - -// /* -// ** Change the maximum size of any memory mapping made of the database file. -// */ -// void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){ -// pPager->szMmap = szMmap; -// pagerFixMaplimit(pPager); -// } - -// /* -// ** Free as much memory as possible from the pager. -// */ -// void sqlite3PagerShrink(Pager *pPager){ -// sqlite3PcacheShrink(pPager->pPCache); -// } - -// /* -// ** Adjust settings of the pager to those specified in the pgFlags parameter. -// ** -// ** The "level" in pgFlags & PAGER_SYNCHRONOUS_MASK sets the robustness -// ** of the database to damage due to OS crashes or power failures by -// ** changing the number of syncs()s when writing the journals. -// ** There are four levels: -// ** -// ** OFF sqlite3OsSync() is never called. This is the default -// ** for temporary and transient files. -// ** -// ** NORMAL The journal is synced once before writes begin on the -// ** database. This is normally adequate protection, but -// ** it is theoretically possible, though very unlikely, -// ** that an inopertune power failure could leave the journal -// ** in a state which would cause damage to the database -// ** when it is rolled back. -// ** -// ** FULL The journal is synced twice before writes begin on the -// ** database (with some additional information - the nRec field -// ** of the journal header - being written in between the two -// ** syncs). If we assume that writing a -// ** single disk sector is atomic, then this mode provides -// ** assurance that the journal will not be corrupted to the -// ** point of causing damage to the database during rollback. -// ** -// ** EXTRA This is like FULL except that is also syncs the directory -// ** that contains the rollback journal after the rollback -// ** journal is unlinked. -// ** -// ** The above is for a rollback-journal mode. For WAL mode, OFF continues -// ** to mean that no syncs ever occur. NORMAL means that the WAL is synced -// ** prior to the start of checkpoint and that the database file is synced -// ** at the conclusion of the checkpoint if the entire content of the WAL -// ** was written back into the database. But no sync operations occur for -// ** an ordinary commit in NORMAL mode with WAL. FULL means that the WAL -// ** file is synced following each commit operation, in addition to the -// ** syncs associated with NORMAL. There is no difference between FULL -// ** and EXTRA for WAL mode. -// ** -// ** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL. The -// ** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync -// ** using fcntl(F_FULLFSYNC). SQLITE_SYNC_NORMAL means to do an -// ** ordinary fsync() call. There is no difference between SQLITE_SYNC_FULL -// ** and SQLITE_SYNC_NORMAL on platforms other than MacOSX. But the -// ** synchronous=FULL versus synchronous=NORMAL setting determines when -// ** the xSync primitive is called and is relevant to all platforms. -// ** -// ** Numeric values associated with these states are OFF==1, NORMAL=2, -// ** and FULL=3. -// */ -// #ifndef SQLITE_OMIT_PAGER_PRAGMAS -// void sqlite3PagerSetFlags( -// Pager *pPager, /* The pager to set safety level for */ -// unsigned pgFlags /* Various flags */ -// ){ -// unsigned level = pgFlags & PAGER_SYNCHRONOUS_MASK; -// if( pPager->tempFile ){ -// pPager->noSync = 1; -// pPager->fullSync = 0; -// pPager->extraSync = 0; -// }else{ -// pPager->noSync = level==PAGER_SYNCHRONOUS_OFF ?1:0; -// pPager->fullSync = level>=PAGER_SYNCHRONOUS_FULL ?1:0; -// pPager->extraSync = level==PAGER_SYNCHRONOUS_EXTRA ?1:0; -// } -// if( pPager->noSync ){ -// pPager->syncFlags = 0; -// }else if( pgFlags & PAGER_FULLFSYNC ){ -// pPager->syncFlags = SQLITE_SYNC_FULL; -// }else{ -// pPager->syncFlags = SQLITE_SYNC_NORMAL; -// } -// pPager->walSyncFlags = (pPager->syncFlags<<2); -// if( pPager->fullSync ){ -// pPager->walSyncFlags |= pPager->syncFlags; -// } -// if( (pgFlags & PAGER_CKPT_FULLFSYNC) && !pPager->noSync ){ -// pPager->walSyncFlags |= (SQLITE_SYNC_FULL<<2); -// } -// if( pgFlags & PAGER_CACHESPILL ){ -// pPager->doNotSpill &= ~SPILLFLAG_OFF; -// }else{ -// pPager->doNotSpill |= SPILLFLAG_OFF; -// } -// } -// #endif - -// /* -// ** The following global variable is incremented whenever the library -// ** attempts to open a temporary file. This information is used for -// ** testing and analysis only. -// */ -// #ifdef SQLITE_TEST -// int sqlite3_opentemp_count = 0; -// #endif - -// /* -// ** Open a temporary file. -// ** -// ** Write the file descriptor into *pFile. Return SQLITE_OK on success -// ** or some other error code if we fail. The OS will automatically -// ** delete the temporary file when it is closed. -// ** -// ** The flags passed to the VFS layer xOpen() call are those specified -// ** by parameter vfsFlags ORed with the following: -// ** -// ** SQLITE_OPEN_READWRITE -// ** SQLITE_OPEN_CREATE -// ** SQLITE_OPEN_EXCLUSIVE -// ** SQLITE_OPEN_DELETEONCLOSE -// */ -// static int pagerOpentemp( -// Pager *pPager, /* The pager object */ -// sqlite3_file *pFile, /* Write the file descriptor here */ -// int vfsFlags /* Flags passed through to the VFS */ -// ){ -// int rc; /* Return code */ - -// #ifdef SQLITE_TEST -// sqlite3_opentemp_count++; /* Used for testing and analysis only */ -// #endif - -// vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | -// SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; -// rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0); -// assert( rc!=SQLITE_OK || isOpen(pFile) ); -// return rc; -// } - -// /* -// ** Set the busy handler function. -// ** -// ** The pager invokes the busy-handler if sqlite3OsLock() returns -// ** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock, -// ** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE -// ** lock. It does *not* invoke the busy handler when upgrading from -// ** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE -// ** (which occurs during hot-journal rollback). Summary: -// ** -// ** Transition | Invokes xBusyHandler -// ** -------------------------------------------------------- -// ** NO_LOCK -> SHARED_LOCK | Yes -// ** SHARED_LOCK -> RESERVED_LOCK | No -// ** SHARED_LOCK -> EXCLUSIVE_LOCK | No -// ** RESERVED_LOCK -> EXCLUSIVE_LOCK | Yes -// ** -// ** If the busy-handler callback returns non-zero, the lock is -// ** retried. If it returns zero, then the SQLITE_BUSY error is -// ** returned to the caller of the pager API function. -// */ -// void sqlite3PagerSetBusyHandler( -// Pager *pPager, /* Pager object */ -// int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ -// void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ -// ){ -// void **ap; -// pPager->xBusyHandler = xBusyHandler; -// pPager->pBusyHandlerArg = pBusyHandlerArg; -// ap = (void **)&pPager->xBusyHandler; -// assert( ((int(*)(void *))(ap[0]))==xBusyHandler ); -// assert( ap[1]==pBusyHandlerArg ); -// sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_BUSYHANDLER, (void *)ap); -// } - -int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve) { - // int rc = SQLITE_OK; - - // /* It is not possible to do a full assert_pager_state() here, as this - // ** function may be called from within PagerOpen(), before the state - // ** of the Pager object is internally consistent. - // ** - // ** At one point this function returned an error if the pager was in - // ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that - // ** there is at least one outstanding page reference, this function - // ** is a no-op for that case anyhow. - // */ - - // u32 pageSize = *pPageSize; - // assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); - // if( (pPager->memDb==0 || pPager->dbSize==0) - // && sqlite3PcacheRefCount(pPager->pPCache)==0 - // && pageSize && pageSize!=(u32)pPager->pageSize - // ){ - // char *pNew = NULL; /* New temp space */ - // i64 nByte = 0; - - // if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){ - // rc = sqlite3OsFileSize(pPager->fd, &nByte); - // } - // if( rc==SQLITE_OK ){ - // /* 8 bytes of zeroed overrun space is sufficient so that the b-tree - // * cell header parser will never run off the end of the allocation */ - // pNew = (char *)sqlite3PageMalloc(pageSize+8); - // if( !pNew ){ - // rc = SQLITE_NOMEM; - // }else{ - // memset(pNew+pageSize, 0, 8); - // } - // } - - // if( rc==SQLITE_OK ){ - // pager_reset(pPager); - // rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize); - // } - // if( rc==SQLITE_OK ){ - // sqlite3PageFree(pPager->pTmpSpace); - // pPager->pTmpSpace = pNew; - // pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize); - // pPager->pageSize = pageSize; - // }else{ - // sqlite3PageFree(pNew); - // } - // } - - // *pPageSize = pPager->pageSize; - // if( rc==SQLITE_OK ){ - // if( nReserve<0 ) nReserve = pPager->nReserve; - // assert( nReserve>=0 && nReserve<1000 ); - // pPager->nReserve = (i16)nReserve; - // pagerFixMaplimit(pPager); - // } - // return rc; -} - -// /* -// ** Return a pointer to the "temporary page" buffer held internally -// ** by the pager. This is a buffer that is big enough to hold the -// ** entire content of a database page. This buffer is used internally -// ** during rollback and will be overwritten whenever a rollback -// ** occurs. But other modules are free to use it too, as long as -// ** no rollbacks are happening. -// */ -// void *sqlite3PagerTempSpace(Pager *pPager){ -// return pPager->pTmpSpace; -// } - -// /* -// ** Attempt to set the maximum database page count if mxPage is positive. -// ** Make no changes if mxPage is zero or negative. And never reduce the -// ** maximum page count below the current size of the database. -// ** -// ** Regardless of mxPage, return the current maximum page count. -// */ -// Pgno sqlite3PagerMaxPageCount(Pager *pPager, Pgno mxPage){ -// if( mxPage>0 ){ -// pPager->mxPgno = mxPage; -// } -// assert( pPager->eState!=PAGER_OPEN ); /* Called only by OP_MaxPgcnt */ -// /* assert( pPager->mxPgno>=pPager->dbSize ); */ -// /* OP_MaxPgcnt ensures that the parameter passed to this function is not -// ** less than the total number of valid pages in the database. But this -// ** may be less than Pager.dbSize, and so the assert() above is not valid */ -// return pPager->mxPgno; -// } - -// /* -// ** The following set of routines are used to disable the simulated -// ** I/O error mechanism. These routines are used to avoid simulated -// ** errors in places where we do not care about errors. -// ** -// ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops -// ** and generate no code. -// */ -// #ifdef SQLITE_TEST -// extern int sqlite3_io_error_pending; -// extern int sqlite3_io_error_hit; -// static int saved_cnt; -// void disable_simulated_io_errors(void){ -// saved_cnt = sqlite3_io_error_pending; -// sqlite3_io_error_pending = -1; -// } -// void enable_simulated_io_errors(void){ -// sqlite3_io_error_pending = saved_cnt; -// } -// #else -// # define disable_simulated_io_errors() -// # define enable_simulated_io_errors() -// #endif - -// /* -// ** Read the first N bytes from the beginning of the file into memory -// ** that pDest points to. -// ** -// ** If the pager was opened on a transient file (zFilename==""), or -// ** opened on a file less than N bytes in size, the output buffer is -// ** zeroed and SQLITE_OK returned. The rationale for this is that this -// ** function is used to read database headers, and a new transient or -// ** zero sized database has a header than consists entirely of zeroes. -// ** -// ** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered, -// ** the error code is returned to the caller and the contents of the -// ** output buffer undefined. -// */ -// int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ -// int rc = SQLITE_OK; -// memset(pDest, 0, N); -// assert( isOpen(pPager->fd) || pPager->tempFile ); - -// /* This routine is only called by btree immediately after creating -// ** the Pager object. There has not been an opportunity to transition -// ** to WAL mode yet. -// */ -// assert( !pagerUseWal(pPager) ); - -// if( isOpen(pPager->fd) ){ -// IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) -// rc = sqlite3OsRead(pPager->fd, pDest, N, 0); -// if( rc==SQLITE_IOERR_SHORT_READ ){ -// rc = SQLITE_OK; -// } -// } -// return rc; -// } - -// /* -// ** This function may only be called when a read-transaction is open on -// ** the pager. It returns the total number of pages in the database. -// ** -// ** However, if the file is between 1 and bytes in size, then -// ** this is considered a 1 page file. -// */ -// void sqlite3PagerPagecount(Pager *pPager, int *pnPage){ -// assert( pPager->eState>=PAGER_READER ); -// assert( pPager->eState!=PAGER_WRITER_FINISHED ); -// *pnPage = (int)pPager->dbSize; -// } - -// /* -// ** Try to obtain a lock of type locktype on the database file. If -// ** a similar or greater lock is already held, this function is a no-op -// ** (returning SQLITE_OK immediately). -// ** -// ** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke -// ** the busy callback if the lock is currently not available. Repeat -// ** until the busy callback returns false or until the attempt to -// ** obtain the lock succeeds. -// ** -// ** Return SQLITE_OK on success and an error code if we cannot obtain -// ** the lock. If the lock is obtained successfully, set the Pager.state -// ** variable to locktype before returning. -// */ -// static int pager_wait_on_lock(Pager *pPager, int locktype){ -// int rc; /* Return code */ - -// /* Check that this is either a no-op (because the requested lock is -// ** already held), or one of the transitions that the busy-handler -// ** may be invoked during, according to the comment above -// ** sqlite3PagerSetBusyhandler(). -// */ -// assert( (pPager->eLock>=locktype) -// || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK) -// || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK) -// ); - -// do { -// rc = pagerLockDb(pPager, locktype); -// }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) ); -// return rc; -// } - -// /* -// ** Function assertTruncateConstraint(pPager) checks that one of the -// ** following is true for all dirty pages currently in the page-cache: -// ** -// ** a) The page number is less than or equal to the size of the -// ** current database image, in pages, OR -// ** -// ** b) if the page content were written at this time, it would not -// ** be necessary to write the current content out to the sub-journal -// ** (as determined by function subjRequiresPage()). -// ** -// ** If the condition asserted by this function were not true, and the -// ** dirty page were to be discarded from the cache via the pagerStress() -// ** routine, pagerStress() would not write the current page content to -// ** the database file. If a savepoint transaction were rolled back after -// ** this happened, the correct behavior would be to restore the current -// ** content of the page. However, since this content is not present in either -// ** the database file or the portion of the rollback journal and -// ** sub-journal rolled back the content could not be restored and the -// ** database image would become corrupt. It is therefore fortunate that -// ** this circumstance cannot arise. -// */ -// #if defined(SQLITE_DEBUG) -// static void assertTruncateConstraintCb(PgHdr *pPg){ -// assert( pPg->flags&PGHDR_DIRTY ); -// assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize ); -// } -// static void assertTruncateConstraint(Pager *pPager){ -// sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb); -// } -// #else -// # define assertTruncateConstraint(pPager) -// #endif - -// /* -// ** Truncate the in-memory database file image to nPage pages. This -// ** function does not actually modify the database file on disk. It -// ** just sets the internal state of the pager object so that the -// ** truncation will be done when the current transaction is committed. -// ** -// ** This function is only called right before committing a transaction. -// ** Once this function has been called, the transaction must either be -// ** rolled back or committed. It is not safe to call this function and -// ** then continue writing to the database. -// */ -// void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ -// assert( pPager->dbSize>=nPage || CORRUPT_DB ); -// testcase( pPager->dbSizeeState>=PAGER_WRITER_CACHEMOD ); -// pPager->dbSize = nPage; - -// /* At one point the code here called assertTruncateConstraint() to -// ** ensure that all pages being truncated away by this operation are, -// ** if one or more savepoints are open, present in the savepoint -// ** journal so that they can be restored if the savepoint is rolled -// ** back. This is no longer necessary as this function is now only -// ** called right before committing a transaction. So although the -// ** Pager object may still have open savepoints (Pager.nSavepoint!=0), -// ** they cannot be rolled back. So the assertTruncateConstraint() call -// ** is no longer correct. */ -// } - -// /* -// ** This function is called before attempting a hot-journal rollback. It -// ** syncs the journal file to disk, then sets pPager->journalHdr to the -// ** size of the journal file so that the pager_playback() routine knows -// ** that the entire journal file has been synced. -// ** -// ** Syncing a hot-journal to disk before attempting to roll it back ensures -// ** that if a power-failure occurs during the rollback, the process that -// ** attempts rollback following system recovery sees the same journal -// ** content as this process. -// ** -// ** If everything goes as planned, SQLITE_OK is returned. Otherwise, -// ** an SQLite error code. -// */ -// static int pagerSyncHotJournal(Pager *pPager){ -// int rc = SQLITE_OK; -// if( !pPager->noSync ){ -// rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL); -// } -// if( rc==SQLITE_OK ){ -// rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr); -// } -// return rc; -// } - -// #if SQLITE_MAX_MMAP_SIZE>0 -// /* -// ** Obtain a reference to a memory mapped page object for page number pgno. -// ** The new object will use the pointer pData, obtained from xFetch(). -// ** If successful, set *ppPage to point to the new page reference -// ** and return SQLITE_OK. Otherwise, return an SQLite error code and set -// ** *ppPage to zero. -// ** -// ** Page references obtained by calling this function should be released -// ** by calling pagerReleaseMapPage(). -// */ -// static int pagerAcquireMapPage( -// Pager *pPager, /* Pager object */ -// Pgno pgno, /* Page number */ -// void *pData, /* xFetch()'d data for this page */ -// PgHdr **ppPage /* OUT: Acquired page object */ -// ){ -// PgHdr *p; /* Memory mapped page to return */ - -// if( pPager->pMmapFreelist ){ -// *ppPage = p = pPager->pMmapFreelist; -// pPager->pMmapFreelist = p->pDirty; -// p->pDirty = 0; -// assert( pPager->nExtra>=8 ); -// memset(p->pExtra, 0, 8); -// }else{ -// *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra); -// if( p==0 ){ -// sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData); -// return SQLITE_NOMEM; -// } -// p->pExtra = (void *)&p[1]; -// p->flags = PGHDR_MMAP; -// p->nRef = 1; -// p->pPager = pPager; -// } - -// assert( p->pExtra==(void *)&p[1] ); -// assert( p->pPage==0 ); -// assert( p->flags==PGHDR_MMAP ); -// assert( p->pPager==pPager ); -// assert( p->nRef==1 ); - -// p->pgno = pgno; -// p->pData = pData; -// pPager->nMmapOut++; - -// return SQLITE_OK; -// } -// #endif - -// /* -// ** Release a reference to page pPg. pPg must have been returned by an -// ** earlier call to pagerAcquireMapPage(). -// */ -// static void pagerReleaseMapPage(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// pPager->nMmapOut--; -// pPg->pDirty = pPager->pMmapFreelist; -// pPager->pMmapFreelist = pPg; - -// assert( pPager->fd->pMethods->iVersion>=3 ); -// sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData); -// } - -// /* -// ** Free all PgHdr objects stored in the Pager.pMmapFreelist list. -// */ -// static void pagerFreeMapHdrs(Pager *pPager){ -// PgHdr *p; -// PgHdr *pNext; -// for(p=pPager->pMmapFreelist; p; p=pNext){ -// pNext = p->pDirty; -// sqlite3_free(p); -// } -// } - -// /* Verify that the database file has not be deleted or renamed out from -// ** under the pager. Return SQLITE_OK if the database is still where it ought -// ** to be on disk. Return non-zero (SQLITE_READONLY_DBMOVED or some other error -// ** code from sqlite3OsAccess()) if the database has gone missing. -// */ -// static int databaseIsUnmoved(Pager *pPager){ -// int bHasMoved = 0; -// int rc; - -// if( pPager->tempFile ) return SQLITE_OK; -// if( pPager->dbSize==0 ) return SQLITE_OK; -// assert( pPager->zFilename && pPager->zFilename[0] ); -// rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_HAS_MOVED, &bHasMoved); -// if( rc==SQLITE_NOTFOUND ){ -// /* If the HAS_MOVED file-control is unimplemented, assume that the file -// ** has not been moved. That is the historical behavior of SQLite: prior to -// ** version 3.8.3, it never checked */ -// rc = SQLITE_OK; -// }else if( rc==SQLITE_OK && bHasMoved ){ -// rc = SQLITE_READONLY_DBMOVED; -// } -// return rc; -// } - -/* -** Shutdown the page cache. Free all memory and close all files. -** -** If a transaction was in progress when this routine is called, that -** transaction is rolled back. All outstanding pages are invalidated -** and their memory is freed. Any attempt to use a page associated -** with this page cache after this function returns will likely -** result in a coredump. -** -** This function always succeeds. If a transaction is active an attempt -** is made to roll it back. If an error occurs during the rollback -** a hot journal may be left in the filesystem but no error is returned -** to the caller. -*/ -int sqlite3PagerClose(Pager *pPager, sqlite3 *db) { - // u8 *pTmp = (u8*)pPager->pTmpSpace; - // assert( db || pagerUseWal(pPager)==0 ); - // assert( assert_pager_state(pPager) ); - // disable_simulated_io_errors(); - // sqlite3BeginBenignMalloc(); - // pagerFreeMapHdrs(pPager); - // /* pPager->errCode = 0; */ - // pPager->exclusiveMode = 0; - // #ifndef SQLITE_OMIT_WAL - // { - // u8 *a = 0; - // assert( db || pPager->pWal==0 ); - // if( db && 0==(db->flags & SQLITE_NoCkptOnClose) - // && SQLITE_OK==databaseIsUnmoved(pPager) - // ){ - // a = pTmp; - // } - // sqlite3WalClose(pPager->pWal, db, pPager->walSyncFlags, pPager->pageSize,a); - // pPager->pWal = 0; - // } - // #endif - // pager_reset(pPager); - // if( MEMDB ){ - // pager_unlock(pPager); - // }else{ - // /* If it is open, sync the journal file before calling UnlockAndRollback. - // ** If this is not done, then an unsynced portion of the open journal - // ** file may be played back into the database. If a power failure occurs - // ** while this is happening, the database could become corrupt. - // ** - // ** If an error occurs while trying to sync the journal, shift the pager - // ** into the ERROR state. This causes UnlockAndRollback to unlock the - // ** database and close the journal file without attempting to roll it - // ** back or finalize it. The next database user will have to do hot-journal - // ** rollback before accessing the database file. - // */ - // if( isOpen(pPager->jfd) ){ - // pager_error(pPager, pagerSyncHotJournal(pPager)); - // } - // pagerUnlockAndRollback(pPager); - // } - // sqlite3EndBenignMalloc(); - // enable_simulated_io_errors(); - // PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); - // IOTRACE(("CLOSE %p\n", pPager)) - // sqlite3OsClose(pPager->jfd); - // sqlite3OsClose(pPager->fd); - // sqlite3PageFree(pTmp); - // sqlite3PcacheClose(pPager->pPCache); - // assert( !pPager->aSavepoint && !pPager->pInJournal ); - // assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ); - - // sqlite3_free(pPager); - return SQLITE_OK; -} - -// #if !defined(NDEBUG) || defined(SQLITE_TEST) -// /* -// ** Return the page number for page pPg. -// */ -// Pgno sqlite3PagerPagenumber(DbPage *pPg){ -// return pPg->pgno; -// } -// #endif - -// /* -// ** Increment the reference count for page pPg. -// */ -// void sqlite3PagerRef(DbPage *pPg){ -// sqlite3PcacheRef(pPg); -// } - -// /* -// ** Sync the journal. In other words, make sure all the pages that have -// ** been written to the journal have actually reached the surface of the -// ** disk and can be restored in the event of a hot-journal rollback. -// ** -// ** If the Pager.noSync flag is set, then this function is a no-op. -// ** Otherwise, the actions required depend on the journal-mode and the -// ** device characteristics of the file-system, as follows: -// ** -// ** * If the journal file is an in-memory journal file, no action need -// ** be taken. -// ** -// ** * Otherwise, if the device does not support the SAFE_APPEND property, -// ** then the nRec field of the most recently written journal header -// ** is updated to contain the number of journal records that have -// ** been written following it. If the pager is operating in full-sync -// ** mode, then the journal file is synced before this field is updated. -// ** -// ** * If the device does not support the SEQUENTIAL property, then -// ** journal file is synced. -// ** -// ** Or, in pseudo-code: -// ** -// ** if( NOT ){ -// ** if( NOT SAFE_APPEND ){ -// ** if( ) xSync(); -// ** -// ** } -// ** if( NOT SEQUENTIAL ) xSync(); -// ** } -// ** -// ** If successful, this routine clears the PGHDR_NEED_SYNC flag of every -// ** page currently held in memory before returning SQLITE_OK. If an IO -// ** error is encountered, then the IO error code is returned to the caller. -// */ -// static int syncJournal(Pager *pPager, int newHdr){ -// int rc; /* Return code */ - -// assert( pPager->eState==PAGER_WRITER_CACHEMOD -// || pPager->eState==PAGER_WRITER_DBMOD -// ); -// assert( assert_pager_state(pPager) ); -// assert( !pagerUseWal(pPager) ); - -// rc = sqlite3PagerExclusiveLock(pPager); -// if( rc!=SQLITE_OK ) return rc; - -// if( !pPager->noSync ){ -// assert( !pPager->tempFile ); -// if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ -// const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); -// assert( isOpen(pPager->jfd) ); - -// if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ -// /* This block deals with an obscure problem. If the last connection -// ** that wrote to this database was operating in persistent-journal -// ** mode, then the journal file may at this point actually be larger -// ** than Pager.journalOff bytes. If the next thing in the journal -// ** file happens to be a journal-header (written as part of the -// ** previous connection's transaction), and a crash or power-failure -// ** occurs after nRec is updated but before this connection writes -// ** anything else to the journal file (or commits/rolls back its -// ** transaction), then SQLite may become confused when doing the -// ** hot-journal rollback following recovery. It may roll back all -// ** of this connections data, then proceed to rolling back the old, -// ** out-of-date data that follows it. Database corruption. -// ** -// ** To work around this, if the journal file does appear to contain -// ** a valid header following Pager.journalOff, then write a 0x00 -// ** byte to the start of it to prevent it from being recognized. -// ** -// ** Variable iNextHdrOffset is set to the offset at which this -// ** problematic header will occur, if it exists. aMagic is used -// ** as a temporary buffer to inspect the first couple of bytes of -// ** the potential journal header. -// */ -// i64 iNextHdrOffset; -// u8 aMagic[8]; -// u8 zHeader[sizeof(aJournalMagic)+4]; - -// memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); -// put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec); - -// iNextHdrOffset = journalHdrOffset(pPager); -// rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset); -// if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){ -// static const u8 zerobyte = 0; -// rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset); -// } -// if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ -// return rc; -// } - -// /* Write the nRec value into the journal file header. If in -// ** full-synchronous mode, sync the journal first. This ensures that -// ** all data has really hit the disk before nRec is updated to mark -// ** it as a candidate for rollback. -// ** -// ** This is not required if the persistent media supports the -// ** SAFE_APPEND property. Because in this case it is not possible -// ** for garbage data to be appended to the file, the nRec field -// ** is populated with 0xFFFFFFFF when the journal header is written -// ** and never needs to be updated. -// */ -// if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ -// PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); -// IOTRACE(("JSYNC %p\n", pPager)) -// rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); -// if( rc!=SQLITE_OK ) return rc; -// } -// IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr)); -// rc = sqlite3OsWrite( -// pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr -// ); -// if( rc!=SQLITE_OK ) return rc; -// } -// if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ -// PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); -// IOTRACE(("JSYNC %p\n", pPager)) -// rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| -// (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) -// ); -// if( rc!=SQLITE_OK ) return rc; -// } - -// pPager->journalHdr = pPager->journalOff; -// if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ -// pPager->nRec = 0; -// rc = writeJournalHdr(pPager); -// if( rc!=SQLITE_OK ) return rc; -// } -// }else{ -// pPager->journalHdr = pPager->journalOff; -// } -// } - -// /* Unless the pager is in noSync mode, the journal file was just -// ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on -// ** all pages. -// */ -// sqlite3PcacheClearSyncFlags(pPager->pPCache); -// pPager->eState = PAGER_WRITER_DBMOD; -// assert( assert_pager_state(pPager) ); -// return SQLITE_OK; -// } - -// /* -// ** The argument is the first in a linked list of dirty pages connected -// ** by the PgHdr.pDirty pointer. This function writes each one of the -// ** in-memory pages in the list to the database file. The argument may -// ** be NULL, representing an empty list. In this case this function is -// ** a no-op. -// ** -// ** The pager must hold at least a RESERVED lock when this function -// ** is called. Before writing anything to the database file, this lock -// ** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained, -// ** SQLITE_BUSY is returned and no data is written to the database file. -// ** -// ** If the pager is a temp-file pager and the actual file-system file -// ** is not yet open, it is created and opened before any data is -// ** written out. -// ** -// ** Once the lock has been upgraded and, if necessary, the file opened, -// ** the pages are written out to the database file in list order. Writing -// ** a page is skipped if it meets either of the following criteria: -// ** -// ** * The page number is greater than Pager.dbSize, or -// ** * The PGHDR_DONT_WRITE flag is set on the page. -// ** -// ** If writing out a page causes the database file to grow, Pager.dbFileSize -// ** is updated accordingly. If page 1 is written out, then the value cached -// ** in Pager.dbFileVers[] is updated to match the new value stored in -// ** the database file. -// ** -// ** If everything is successful, SQLITE_OK is returned. If an IO error -// ** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot -// ** be obtained, SQLITE_BUSY is returned. -// */ -// static int pager_write_pagelist(Pager *pPager, PgHdr *pList){ -// int rc = SQLITE_OK; /* Return code */ - -// /* This function is only called for rollback pagers in WRITER_DBMOD state. */ -// assert( !pagerUseWal(pPager) ); -// assert( pPager->tempFile || pPager->eState==PAGER_WRITER_DBMOD ); -// assert( pPager->eLock==EXCLUSIVE_LOCK ); -// assert( isOpen(pPager->fd) || pList->pDirty==0 ); - -// /* If the file is a temp-file has not yet been opened, open it now. It -// ** is not possible for rc to be other than SQLITE_OK if this branch -// ** is taken, as pager_wait_on_lock() is a no-op for temp-files. -// */ -// if( !isOpen(pPager->fd) ){ -// assert( pPager->tempFile && rc==SQLITE_OK ); -// rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags); -// } - -// /* Before the first write, give the VFS a hint of what the final -// ** file size will be. -// */ -// assert( rc!=SQLITE_OK || isOpen(pPager->fd) ); -// if( rc==SQLITE_OK -// && pPager->dbHintSizedbSize -// && (pList->pDirty || pList->pgno>pPager->dbHintSize) -// ){ -// sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize; -// sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile); -// pPager->dbHintSize = pPager->dbSize; -// } - -// while( rc==SQLITE_OK && pList ){ -// Pgno pgno = pList->pgno; - -// /* If there are dirty pages in the page cache with page numbers greater -// ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to -// ** make the file smaller (presumably by auto-vacuum code). Do not write -// ** any such pages to the file. -// ** -// ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag -// ** set (set by sqlite3PagerDontWrite()). -// */ -// if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ -// i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */ -// char *pData; /* Data to write */ - -// assert( (pList->flags&PGHDR_NEED_SYNC)==0 ); -// if( pList->pgno==1 ) pager_write_changecounter(pList); - -// pData = pList->pData; - -// /* Write out the page data. */ -// rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); - -// /* If page 1 was just written, update Pager.dbFileVers to match -// ** the value now stored in the database file. If writing this -// ** page caused the database file to grow, update dbFileSize. -// */ -// if( pgno==1 ){ -// memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); -// } -// if( pgno>pPager->dbFileSize ){ -// pPager->dbFileSize = pgno; -// } -// pPager->aStat[PAGER_STAT_WRITE]++; - -// /* Update any backup objects copying the contents of this pager. */ -// sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData); - -// PAGERTRACE(("STORE %d page %d hash(%08x)\n", -// PAGERID(pPager), pgno, pager_pagehash(pList))); -// IOTRACE(("PGOUT %p %d\n", pPager, pgno)); -// PAGER_INCR(sqlite3_pager_writedb_count); -// }else{ -// PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno)); -// } -// pager_set_pagehash(pList); -// pList = pList->pDirty; -// } - -// return rc; -// } - -// /* -// ** Ensure that the sub-journal file is open. If it is already open, this -// ** function is a no-op. -// ** -// ** SQLITE_OK is returned if everything goes according to plan. An -// ** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() -// ** fails. -// */ -// static int openSubJournal(Pager *pPager){ -// int rc = SQLITE_OK; -// if( !isOpen(pPager->sjfd) ){ -// const int flags = SQLITE_OPEN_SUBJOURNAL | SQLITE_OPEN_READWRITE -// | SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE -// | SQLITE_OPEN_DELETEONCLOSE; -// int nStmtSpill = sqlite3Config.nStmtSpill; -// if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ -// nStmtSpill = -1; -// } -// rc = sqlite3JournalOpen(pPager->pVfs, 0, pPager->sjfd, flags, nStmtSpill); -// } -// return rc; -// } - -// /* -// ** Append a record of the current state of page pPg to the sub-journal. -// ** -// ** If successful, set the bit corresponding to pPg->pgno in the bitvecs -// ** for all open savepoints before returning. -// ** -// ** This function returns SQLITE_OK if everything is successful, an IO -// ** error code if the attempt to write to the sub-journal fails, or -// ** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint -// ** bitvec. -// */ -// static int subjournalPage(PgHdr *pPg){ -// int rc = SQLITE_OK; -// Pager *pPager = pPg->pPager; -// if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ - -// /* Open the sub-journal, if it has not already been opened */ -// assert( pPager->useJournal ); -// assert( isOpen(pPager->jfd) || pagerUseWal(pPager) ); -// assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 ); -// assert( pagerUseWal(pPager) -// || pageInJournal(pPager, pPg) -// || pPg->pgno>pPager->dbOrigSize -// ); -// rc = openSubJournal(pPager); - -// /* If the sub-journal was opened successfully (or was already open), -// ** write the journal record into the file. */ -// if( rc==SQLITE_OK ){ -// void *pData = pPg->pData; -// i64 offset = (i64)pPager->nSubRec*(4+pPager->pageSize); -// char *pData2; -// pData2 = pData; -// PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); -// rc = write32bits(pPager->sjfd, offset, pPg->pgno); -// if( rc==SQLITE_OK ){ -// rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); -// } -// } -// } -// if( rc==SQLITE_OK ){ -// pPager->nSubRec++; -// assert( pPager->nSavepoint>0 ); -// rc = addToSavepointBitvecs(pPager, pPg->pgno); -// } -// return rc; -// } -// static int subjournalPageIfRequired(PgHdr *pPg){ -// if( subjRequiresPage(pPg) ){ -// return subjournalPage(pPg); -// }else{ -// return SQLITE_OK; -// } -// } - -// /* -// ** This function is called by the pcache layer when it has reached some -// ** soft memory limit. The first argument is a pointer to a Pager object -// ** (cast as a void*). The pager is always 'purgeable' (not an in-memory -// ** database). The second argument is a reference to a page that is -// ** currently dirty but has no outstanding references. The page -// ** is always associated with the Pager object passed as the first -// ** argument. -// ** -// ** The job of this function is to make pPg clean by writing its contents -// ** out to the database file, if possible. This may involve syncing the -// ** journal file. -// ** -// ** If successful, sqlite3PcacheMakeClean() is called on the page and -// ** SQLITE_OK returned. If an IO error occurs while trying to make the -// ** page clean, the IO error code is returned. If the page cannot be -// ** made clean for some other reason, but no error occurs, then SQLITE_OK -// ** is returned by sqlite3PcacheMakeClean() is not called. -// */ -// static int pagerStress(void *p, PgHdr *pPg){ -// Pager *pPager = (Pager *)p; -// int rc = SQLITE_OK; - -// assert( pPg->pPager==pPager ); -// assert( pPg->flags&PGHDR_DIRTY ); - -// /* The doNotSpill NOSYNC bit is set during times when doing a sync of -// ** journal (and adding a new header) is not allowed. This occurs -// ** during calls to sqlite3PagerWrite() while trying to journal multiple -// ** pages belonging to the same sector. -// ** -// ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling -// ** regardless of whether or not a sync is required. This is set during -// ** a rollback or by user request, respectively. -// ** -// ** Spilling is also prohibited when in an error state since that could -// ** lead to database corruption. In the current implementation it -// ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3 -// ** while in the error state, hence it is impossible for this routine to -// ** be called in the error state. Nevertheless, we include a NEVER() -// ** test for the error state as a safeguard against future changes. -// */ -// if( NEVER(pPager->errCode) ) return SQLITE_OK; -// testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK ); -// testcase( pPager->doNotSpill & SPILLFLAG_OFF ); -// testcase( pPager->doNotSpill & SPILLFLAG_NOSYNC ); -// if( pPager->doNotSpill -// && ((pPager->doNotSpill & (SPILLFLAG_ROLLBACK|SPILLFLAG_OFF))!=0 -// || (pPg->flags & PGHDR_NEED_SYNC)!=0) -// ){ -// return SQLITE_OK; -// } - -// pPager->aStat[PAGER_STAT_SPILL]++; -// pPg->pDirty = 0; -// if( pagerUseWal(pPager) ){ -// /* Write a single frame for this page to the log. */ -// rc = subjournalPageIfRequired(pPg); -// if( rc==SQLITE_OK ){ -// rc = pagerWalFrames(pPager, pPg, 0, 0); -// } -// }else{ - -// #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE -// if( pPager->tempFile==0 ){ -// rc = sqlite3JournalCreate(pPager->jfd); -// if( rc!=SQLITE_OK ) return pager_error(pPager, rc); -// } -// #endif - -// /* Sync the journal file if required. */ -// if( pPg->flags&PGHDR_NEED_SYNC -// || pPager->eState==PAGER_WRITER_CACHEMOD -// ){ -// rc = syncJournal(pPager, 1); -// } - -// /* Write the contents of the page out to the database file. */ -// if( rc==SQLITE_OK ){ -// assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); -// rc = pager_write_pagelist(pPager, pPg); -// } -// } - -// /* Mark the page as clean. */ -// if( rc==SQLITE_OK ){ -// PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); -// sqlite3PcacheMakeClean(pPg); -// } - -// return pager_error(pPager, rc); -// } - -// /* -// ** Flush all unreferenced dirty pages to disk. -// */ -// int sqlite3PagerFlush(Pager *pPager){ -// int rc = pPager->errCode; -// if( !MEMDB ){ -// PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); -// assert( assert_pager_state(pPager) ); -// while( rc==SQLITE_OK && pList ){ -// PgHdr *pNext = pList->pDirty; -// if( pList->nRef==0 ){ -// rc = pagerStress((void*)pPager, pList); -// } -// pList = pNext; -// } -// } - -// return rc; -// } - -int sqlite3PagerOpen(Pager **ppPager, const char *zFilename, int nExtra, int flags, int vfsFlags, - void (*xReinit)(DbPage *)) { - u8 * pPtr; - Pager * pPager = 0; /* Pager object to allocate and return */ - int rc = SQLITE_OK; /* Return code */ - int tempFile = 0; /* True for temp files (incl. in-memory files) */ - int memDb = 0; /* True if this is an in-memory file */ - int memJM = 0; /* Memory journal mode */ - int readOnly = 0; /* True if this is a read-only file */ - int journalFileSize; /* Bytes to allocate for each journal fd */ - int nPathname = 120; /* Number of bytes in zPathname */ - int useJournal = (flags & PAGER_OMIT_JOURNAL) == 0; /* False to omit journal */ - int pcacheSize = sqlite3PcacheSize(); /* Bytes to allocate for PCache */ - u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; /* Default page size */ - const char *zUri = 0; /* URI args to copy */ - int nUriByte = 1; /* Number of bytes of URI args at *zUri */ - int nUri = 0; /* Number of URI parameters */ - int tsize; - - journalFileSize = 120; - - /* Set the output variable to NULL in case an error occurs. */ - *ppPager = 0; - - tsize = ROUND8(sizeof(*pPager)) /* Pager */ - + ROUND8(pcacheSize) /* PCache */ - + sizeof(pPager) /* Self contained pager pointer */ - + 4 /* Database prefix (4 Bytes) */ - + nPathname + 1 /* Database file name */ - + nPathname + 8 + 1 /* Journal file name */ - + nPathname + 4 + 1 /* WAL file name */ - + 3; /* 3 bytes of terminalter */ - - pPtr = (u8 *)calloc(1, tsize); - if (!pPtr) { - return SQLITE_NOMEM; - } - pPager = (Pager *)pPtr; - pPtr += ROUND8(sizeof(*pPager)); - pPager->pPCache = (PCache *)pPtr; - pPtr += ROUND8(pcacheSize); - memcpy(pPtr, &pPager, sizeof(pPager)); - pPtr += sizeof(pPager); - pPtr += 4; - pPager->zFilename = (char *)pPtr; - memcpy(pPtr, zFilename, strlen(zFilename)); - pPtr += (nPathname + 1); - pPager->zJournal = (char *)pPtr; - memcpy(pPtr, zFilename, strlen(zFilename)); - pPtr += nPathname; - memcpy(pPtr, "-journal", 8); - pPtr += 8 + 1; - pPager->zWal = (char *)pPtr; - memcpy(pPtr, zFilename, nPathname); - pPtr += nPathname; - memcpy(pPtr, "-wal", 4); - pPtr += 4 + 1; - - pPager->vfsFlags = vfsFlags; - - /* Open the pager file. - */ - int fout = 0; /* VFS flags returned by xOpen() */ - pPager->fd = open(zFilename, O_RDWR | O_CREAT, 0755); - if (pPager->fd < 0) { - return -1; - } - readOnly = 0; - - /* If the file was successfully opened for read/write access, - ** choose a default page size in case we have to create the - ** database file. The default page size is the maximum of: - ** - ** + SQLITE_DEFAULT_PAGE_SIZE, - ** + The value returned by sqlite3OsSectorSize() - ** + The largest page size that can be written atomically. - */ - // if (rc == SQLITE_OK) { - // int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); - // if (!readOnly) { - // setSectorSize(pPager); - // if (szPageDflt < pPager->sectorSize) { - // if (pPager->sectorSize > SQLITE_MAX_DEFAULT_PAGE_SIZE) { - // szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE; - // } else { - // szPageDflt = (u32)pPager->sectorSize; - // } - // } - // } - // pPager->noLock = sqlite3_uri_boolean(pPager->zFilename, "nolock", 0); - // if ((iDc & SQLITE_IOCAP_IMMUTABLE) != 0 || sqlite3_uri_boolean(pPager->zFilename, "immutable", 0)) { - // vfsFlags |= SQLITE_OPEN_READONLY; - // goto act_like_temp_file; - // } - // } - // /* The following call to PagerSetPagesize() serves to set the value of - // ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer. - // */ - // if (rc == SQLITE_OK) { - // assert(pPager->memDb == 0); - // rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1); - // testcase(rc != SQLITE_OK); - // } - - // /* Initialize the PCache object. */ - // if (rc == SQLITE_OK) { - // nExtra = ROUND8(nExtra); - // assert(nExtra >= 8 && nExtra < 1000); - // rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, !memDb ? pagerStress : 0, (void *)pPager, pPager->pPCache); - // } - - // /* If an error occurred above, free the Pager structure and close the file. - // */ - // if (rc != SQLITE_OK) { - // sqlite3OsClose(pPager->fd); - // sqlite3PageFree(pPager->pTmpSpace); - // sqlite3_free(pPager); - // return rc; - // } - - // PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename)); - // IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) - - // pPager->useJournal = (u8)useJournal; - // pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; - // pPager->tempFile = (u8)tempFile; - // assert(tempFile == PAGER_LOCKINGMODE_NORMAL || tempFile == PAGER_LOCKINGMODE_EXCLUSIVE); - // assert(PAGER_LOCKINGMODE_EXCLUSIVE == 1); - // pPager->exclusiveMode = (u8)tempFile; - // pPager->changeCountDone = pPager->tempFile; - // pPager->memDb = (u8)memDb; - // pPager->readOnly = (u8)readOnly; - // assert(useJournal || pPager->tempFile); - // pPager->noSync = pPager->tempFile; - // if (pPager->noSync) { - // assert(pPager->fullSync == 0); - // assert(pPager->extraSync == 0); - // assert(pPager->syncFlags == 0); - // assert(pPager->walSyncFlags == 0); - // } else { - // pPager->fullSync = 1; - // pPager->extraSync = 0; - // pPager->syncFlags = SQLITE_SYNC_NORMAL; - // pPager->walSyncFlags = SQLITE_SYNC_NORMAL | (SQLITE_SYNC_NORMAL << 2); - // } - // pPager->nExtra = (u16)nExtra; - // pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT; - // assert(isOpen(pPager->fd) || tempFile); - // setSectorSize(pPager); - // if (!useJournal) { - // pPager->journalMode = PAGER_JOURNALMODE_OFF; - // } else if (memDb || memJM) { - // pPager->journalMode = PAGER_JOURNALMODE_MEMORY; - // } - // pPager->xReiniter = xReinit; - // setGetterMethod(pPager); - - *ppPager = pPager; - return SQLITE_OK; -} - -// /* -// ** Return the sqlite3_file for the main database given the name -// ** of the corresonding WAL or Journal name as passed into -// ** xOpen. -// */ -// sqlite3_file *sqlite3_database_file_object(const char *zName){ -// Pager *pPager; -// while( zName[-1]!=0 || zName[-2]!=0 || zName[-3]!=0 || zName[-4]!=0 ){ -// zName--; -// } -// pPager = *(Pager**)(zName - 4 - sizeof(Pager*)); -// return pPager->fd; -// } - -// /* -// ** This function is called after transitioning from PAGER_UNLOCK to -// ** PAGER_SHARED state. It tests if there is a hot journal present in -// ** the file-system for the given pager. A hot journal is one that -// ** needs to be played back. According to this function, a hot-journal -// ** file exists if the following criteria are met: -// ** -// ** * The journal file exists in the file system, and -// ** * No process holds a RESERVED or greater lock on the database file, and -// ** * The database file itself is greater than 0 bytes in size, and -// ** * The first byte of the journal file exists and is not 0x00. -// ** -// ** If the current size of the database file is 0 but a journal file -// ** exists, that is probably an old journal left over from a prior -// ** database with the same name. In this case the journal file is -// ** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK -// ** is returned. -// ** -// ** This routine does not check if there is a super-journal filename -// ** at the end of the file. If there is, and that super-journal file -// ** does not exist, then the journal file is not really hot. In this -// ** case this routine will return a false-positive. The pager_playback() -// ** routine will discover that the journal file is not really hot and -// ** will not roll it back. -// ** -// ** If a hot-journal file is found to exist, *pExists is set to 1 and -// ** SQLITE_OK returned. If no hot-journal file is present, *pExists is -// ** set to 0 and SQLITE_OK returned. If an IO error occurs while trying -// ** to determine whether or not a hot-journal file exists, the IO error -// ** code is returned and the value of *pExists is undefined. -// */ -// static int hasHotJournal(Pager *pPager, int *pExists){ -// sqlite3_vfs * const pVfs = pPager->pVfs; -// int rc = SQLITE_OK; /* Return code */ -// int exists = 1; /* True if a journal file is present */ -// int jrnlOpen = !!isOpen(pPager->jfd); - -// assert( pPager->useJournal ); -// assert( isOpen(pPager->fd) ); -// assert( pPager->eState==PAGER_OPEN ); - -// assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) & -// SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN -// )); - -// *pExists = 0; -// if( !jrnlOpen ){ -// rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists); -// } -// if( rc==SQLITE_OK && exists ){ -// int locked = 0; /* True if some process holds a RESERVED lock */ - -// /* Race condition here: Another process might have been holding the -// ** the RESERVED lock and have a journal open at the sqlite3OsAccess() -// ** call above, but then delete the journal and drop the lock before -// ** we get to the following sqlite3OsCheckReservedLock() call. If that -// ** is the case, this routine might think there is a hot journal when -// ** in fact there is none. This results in a false-positive which will -// ** be dealt with by the playback routine. Ticket #3883. -// */ -// rc = sqlite3OsCheckReservedLock(pPager->fd, &locked); -// if( rc==SQLITE_OK && !locked ){ -// Pgno nPage; /* Number of pages in database file */ - -// assert( pPager->tempFile==0 ); -// rc = pagerPagecount(pPager, &nPage); -// if( rc==SQLITE_OK ){ -// /* If the database is zero pages in size, that means that either (1) the -// ** journal is a remnant from a prior database with the same name where -// ** the database file but not the journal was deleted, or (2) the initial -// ** transaction that populates a new database is being rolled back. -// ** In either case, the journal file can be deleted. However, take care -// ** not to delete the journal file if it is already open due to -// ** journal_mode=PERSIST. -// */ -// if( nPage==0 && !jrnlOpen ){ -// sqlite3BeginBenignMalloc(); -// if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){ -// sqlite3OsDelete(pVfs, pPager->zJournal, 0); -// if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK); -// } -// sqlite3EndBenignMalloc(); -// }else{ -// /* The journal file exists and no other connection has a reserved -// ** or greater lock on the database file. Now check that there is -// ** at least one non-zero bytes at the start of the journal file. -// ** If there is, then we consider this journal to be hot. If not, -// ** it can be ignored. -// */ -// if( !jrnlOpen ){ -// int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL; -// rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f); -// } -// if( rc==SQLITE_OK ){ -// u8 first = 0; -// rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0); -// if( rc==SQLITE_IOERR_SHORT_READ ){ -// rc = SQLITE_OK; -// } -// if( !jrnlOpen ){ -// sqlite3OsClose(pPager->jfd); -// } -// *pExists = (first!=0); -// }else if( rc==SQLITE_CANTOPEN ){ -// /* If we cannot open the rollback journal file in order to see if -// ** it has a zero header, that might be due to an I/O error, or -// ** it might be due to the race condition described above and in -// ** ticket #3883. Either way, assume that the journal is hot. -// ** This might be a false positive. But if it is, then the -// ** automatic journal playback and recovery mechanism will deal -// ** with it under an EXCLUSIVE lock where we do not need to -// ** worry so much with race conditions. -// */ -// *pExists = 1; -// rc = SQLITE_OK; -// } -// } -// } -// } -// } - -// return rc; -// } - -// /* -// ** This function is called to obtain a shared lock on the database file. -// ** It is illegal to call sqlite3PagerGet() until after this function -// ** has been successfully called. If a shared-lock is already held when -// ** this function is called, it is a no-op. -// ** -// ** The following operations are also performed by this function. -// ** -// ** 1) If the pager is currently in PAGER_OPEN state (no lock held -// ** on the database file), then an attempt is made to obtain a -// ** SHARED lock on the database file. Immediately after obtaining -// ** the SHARED lock, the file-system is checked for a hot-journal, -// ** which is played back if present. Following any hot-journal -// ** rollback, the contents of the cache are validated by checking -// ** the 'change-counter' field of the database file header and -// ** discarded if they are found to be invalid. -// ** -// ** 2) If the pager is running in exclusive-mode, and there are currently -// ** no outstanding references to any pages, and is in the error state, -// ** then an attempt is made to clear the error state by discarding -// ** the contents of the page cache and rolling back any open journal -// ** file. -// ** -// ** If everything is successful, SQLITE_OK is returned. If an IO error -// ** occurs while locking the database, checking for a hot-journal file or -// ** rolling back a journal file, the IO error code is returned. -// */ -// int sqlite3PagerSharedLock(Pager *pPager){ -// int rc = SQLITE_OK; /* Return code */ - -// /* This routine is only called from b-tree and only when there are no -// ** outstanding pages. This implies that the pager state should either -// ** be OPEN or READER. READER is only possible if the pager is or was in -// ** exclusive access mode. */ -// assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); -// assert( assert_pager_state(pPager) ); -// assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); -// assert( pPager->errCode==SQLITE_OK ); - -// if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){ -// int bHotJournal = 1; /* True if there exists a hot journal-file */ - -// assert( !MEMDB ); -// assert( pPager->tempFile==0 || pPager->eLock==EXCLUSIVE_LOCK ); - -// rc = pager_wait_on_lock(pPager, SHARED_LOCK); -// if( rc!=SQLITE_OK ){ -// assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK ); -// goto failed; -// } - -// /* If a journal file exists, and there is no RESERVED lock on the -// ** database file, then it either needs to be played back or deleted. -// */ -// if( pPager->eLock<=SHARED_LOCK ){ -// rc = hasHotJournal(pPager, &bHotJournal); -// } -// if( rc!=SQLITE_OK ){ -// goto failed; -// } -// if( bHotJournal ){ -// if( pPager->readOnly ){ -// rc = SQLITE_READONLY_ROLLBACK; -// goto failed; -// } - -// /* Get an EXCLUSIVE lock on the database file. At this point it is -// ** important that a RESERVED lock is not obtained on the way to the -// ** EXCLUSIVE lock. If it were, another process might open the -// ** database file, detect the RESERVED lock, and conclude that the -// ** database is safe to read while this process is still rolling the -// ** hot-journal back. -// ** -// ** Because the intermediate RESERVED lock is not requested, any -// ** other process attempting to access the database file will get to -// ** this point in the code and fail to obtain its own EXCLUSIVE lock -// ** on the database file. -// ** -// ** Unless the pager is in locking_mode=exclusive mode, the lock is -// ** downgraded to SHARED_LOCK before this function returns. -// */ -// rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); -// if( rc!=SQLITE_OK ){ -// goto failed; -// } - -// /* If it is not already open and the file exists on disk, open the -// ** journal for read/write access. Write access is required because -// ** in exclusive-access mode the file descriptor will be kept open -// ** and possibly used for a transaction later on. Also, write-access -// ** is usually required to finalize the journal in journal_mode=persist -// ** mode (and also for journal_mode=truncate on some systems). -// ** -// ** If the journal does not exist, it usually means that some -// ** other connection managed to get in and roll it back before -// ** this connection obtained the exclusive lock above. Or, it -// ** may mean that the pager was in the error-state when this -// ** function was called and the journal file does not exist. -// */ -// if( !isOpen(pPager->jfd) ){ -// sqlite3_vfs * const pVfs = pPager->pVfs; -// int bExists; /* True if journal file exists */ -// rc = sqlite3OsAccess( -// pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists); -// if( rc==SQLITE_OK && bExists ){ -// int fout = 0; -// int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; -// assert( !pPager->tempFile ); -// rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); -// assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); -// if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){ -// rc = SQLITE_CANTOPEN_BKPT; -// sqlite3OsClose(pPager->jfd); -// } -// } -// } - -// /* Playback and delete the journal. Drop the database write -// ** lock and reacquire the read lock. Purge the cache before -// ** playing back the hot-journal so that we don't end up with -// ** an inconsistent cache. Sync the hot journal before playing -// ** it back since the process that crashed and left the hot journal -// ** probably did not sync it and we are required to always sync -// ** the journal before playing it back. -// */ -// if( isOpen(pPager->jfd) ){ -// assert( rc==SQLITE_OK ); -// rc = pagerSyncHotJournal(pPager); -// if( rc==SQLITE_OK ){ -// rc = pager_playback(pPager, !pPager->tempFile); -// pPager->eState = PAGER_OPEN; -// } -// }else if( !pPager->exclusiveMode ){ -// pagerUnlockDb(pPager, SHARED_LOCK); -// } - -// if( rc!=SQLITE_OK ){ -// /* This branch is taken if an error occurs while trying to open -// ** or roll back a hot-journal while holding an EXCLUSIVE lock. The -// ** pager_unlock() routine will be called before returning to unlock -// ** the file. If the unlock attempt fails, then Pager.eLock must be -// ** set to UNKNOWN_LOCK (see the comment above the #define for -// ** UNKNOWN_LOCK above for an explanation). -// ** -// ** In order to get pager_unlock() to do this, set Pager.eState to -// ** PAGER_ERROR now. This is not actually counted as a transition -// ** to ERROR state in the state diagram at the top of this file, -// ** since we know that the same call to pager_unlock() will very -// ** shortly transition the pager object to the OPEN state. Calling -// ** assert_pager_state() would fail now, as it should not be possible -// ** to be in ERROR state when there are zero outstanding page -// ** references. -// */ -// pager_error(pPager, rc); -// goto failed; -// } - -// assert( pPager->eState==PAGER_OPEN ); -// assert( (pPager->eLock==SHARED_LOCK) -// || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) -// ); -// } - -// if( !pPager->tempFile && pPager->hasHeldSharedLock ){ -// /* The shared-lock has just been acquired then check to -// ** see if the database has been modified. If the database has changed, -// ** flush the cache. The hasHeldSharedLock flag prevents this from -// ** occurring on the very first access to a file, in order to save a -// ** single unnecessary sqlite3OsRead() call at the start-up. -// ** -// ** Database changes are detected by looking at 15 bytes beginning -// ** at offset 24 into the file. The first 4 of these 16 bytes are -// ** a 32-bit counter that is incremented with each change. The -// ** other bytes change randomly with each file change when -// ** a codec is in use. -// ** -// ** There is a vanishingly small chance that a change will not be -// ** detected. The chance of an undetected change is so small that -// ** it can be neglected. -// */ -// char dbFileVers[sizeof(pPager->dbFileVers)]; - -// IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); -// rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); -// if( rc!=SQLITE_OK ){ -// if( rc!=SQLITE_IOERR_SHORT_READ ){ -// goto failed; -// } -// memset(dbFileVers, 0, sizeof(dbFileVers)); -// } - -// if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ -// pager_reset(pPager); - -// /* Unmap the database file. It is possible that external processes -// ** may have truncated the database file and then extended it back -// ** to its original size while this process was not holding a lock. -// ** In this case there may exist a Pager.pMap mapping that appears -// ** to be the right size but is not actually valid. Avoid this -// ** possibility by unmapping the db here. */ -// if( USEFETCH(pPager) ){ -// sqlite3OsUnfetch(pPager->fd, 0, 0); -// } -// } -// } - -// /* If there is a WAL file in the file-system, open this database in WAL -// ** mode. Otherwise, the following function call is a no-op. -// */ -// rc = pagerOpenWalIfPresent(pPager); -// #ifndef SQLITE_OMIT_WAL -// assert( pPager->pWal==0 || rc==SQLITE_OK ); -// #endif -// } - -// if( pagerUseWal(pPager) ){ -// assert( rc==SQLITE_OK ); -// rc = pagerBeginReadTransaction(pPager); -// } - -// if( pPager->tempFile==0 && pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){ -// rc = pagerPagecount(pPager, &pPager->dbSize); -// } - -// failed: -// if( rc!=SQLITE_OK ){ -// assert( !MEMDB ); -// pager_unlock(pPager); -// assert( pPager->eState==PAGER_OPEN ); -// }else{ -// pPager->eState = PAGER_READER; -// pPager->hasHeldSharedLock = 1; -// } -// return rc; -// } - -// /* -// ** If the reference count has reached zero, rollback any active -// ** transaction and unlock the pager. -// ** -// ** Except, in locking_mode=EXCLUSIVE when there is nothing to in -// ** the rollback journal, the unlock is not performed and there is -// ** nothing to rollback, so this routine is a no-op. -// */ -// static void pagerUnlockIfUnused(Pager *pPager){ -// if( sqlite3PcacheRefCount(pPager->pPCache)==0 ){ -// assert( pPager->nMmapOut==0 ); /* because page1 is never memory mapped */ -// pagerUnlockAndRollback(pPager); -// } -// } - -// /* -// ** The page getter methods each try to acquire a reference to a -// ** page with page number pgno. If the requested reference is -// ** successfully obtained, it is copied to *ppPage and SQLITE_OK returned. -// ** -// ** There are different implementations of the getter method depending -// ** on the current state of the pager. -// ** -// ** getPageNormal() -- The normal getter -// ** getPageError() -- Used if the pager is in an error state -// ** getPageMmap() -- Used if memory-mapped I/O is enabled -// ** -// ** If the requested page is already in the cache, it is returned. -// ** Otherwise, a new page object is allocated and populated with data -// ** read from the database file. In some cases, the pcache module may -// ** choose not to allocate a new page object and may reuse an existing -// ** object with no outstanding references. -// ** -// ** The extra data appended to a page is always initialized to zeros the -// ** first time a page is loaded into memory. If the page requested is -// ** already in the cache when this function is called, then the extra -// ** data is left as it was when the page object was last used. -// ** -// ** If the database image is smaller than the requested page or if -// ** the flags parameter contains the PAGER_GET_NOCONTENT bit and the -// ** requested page is not already stored in the cache, then no -// ** actual disk read occurs. In this case the memory image of the -// ** page is initialized to all zeros. -// ** -// ** If PAGER_GET_NOCONTENT is true, it means that we do not care about -// ** the contents of the page. This occurs in two scenarios: -// ** -// ** a) When reading a free-list leaf page from the database, and -// ** -// ** b) When a savepoint is being rolled back and we need to load -// ** a new page into the cache to be filled with the data read -// ** from the savepoint journal. -// ** -// ** If PAGER_GET_NOCONTENT is true, then the data returned is zeroed instead -// ** of being read from the database. Additionally, the bits corresponding -// ** to pgno in Pager.pInJournal (bitvec of pages already written to the -// ** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open -// ** savepoints are set. This means if the page is made writable at any -// ** point in the future, using a call to sqlite3PagerWrite(), its contents -// ** will not be journaled. This saves IO. -// ** -// ** The acquisition might fail for several reasons. In all cases, -// ** an appropriate error code is returned and *ppPage is set to NULL. -// ** -// ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt -// ** to find a page in the in-memory cache first. If the page is not already -// ** in memory, this routine goes to disk to read it in whereas Lookup() -// ** just returns 0. This routine acquires a read-lock the first time it -// ** has to go to disk, and could also playback an old journal if necessary. -// ** Since Lookup() never goes to disk, it never has to deal with locks -// ** or journal files. -// */ -// static int getPageNormal( -// Pager *pPager, /* The pager open on the database file */ -// Pgno pgno, /* Page number to fetch */ -// DbPage **ppPage, /* Write a pointer to the page here */ -// int flags /* PAGER_GET_XXX flags */ -// ){ -// int rc = SQLITE_OK; -// PgHdr *pPg; -// u8 noContent; /* True if PAGER_GET_NOCONTENT is set */ -// sqlite3_pcache_page *pBase; - -// assert( pPager->errCode==SQLITE_OK ); -// assert( pPager->eState>=PAGER_READER ); -// assert( assert_pager_state(pPager) ); -// assert( pPager->hasHeldSharedLock==1 ); - -// if( pgno==0 ) return SQLITE_CORRUPT_BKPT; -// pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3); -// if( pBase==0 ){ -// pPg = 0; -// rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); -// if( rc!=SQLITE_OK ) goto pager_acquire_err; -// if( pBase==0 ){ -// rc = SQLITE_NOMEM; -// goto pager_acquire_err; -// } -// } -// pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); -// assert( pPg==(*ppPage) ); -// assert( pPg->pgno==pgno ); -// assert( pPg->pPager==pPager || pPg->pPager==0 ); - -// noContent = (flags & PAGER_GET_NOCONTENT)!=0; -// if( pPg->pPager && !noContent ){ -// /* In this case the pcache already contains an initialized copy of -// ** the page. Return without further ado. */ -// assert( pgno!=PAGER_MJ_PGNO(pPager) ); -// pPager->aStat[PAGER_STAT_HIT]++; -// return SQLITE_OK; - -// }else{ -// /* The pager cache has created a new page. Its content needs to -// ** be initialized. But first some error checks: -// ** -// ** (*) obsolete. Was: maximum page number is 2^31 -// ** (2) Never try to fetch the locking page -// */ -// if( pgno==PAGER_MJ_PGNO(pPager) ){ -// rc = SQLITE_CORRUPT_BKPT; -// goto pager_acquire_err; -// } - -// pPg->pPager = pPager; - -// assert( !isOpen(pPager->fd) || !MEMDB ); -// if( !isOpen(pPager->fd) || pPager->dbSizepPager->mxPgno ){ -// rc = SQLITE_FULL; -// goto pager_acquire_err; -// } -// if( noContent ){ -// /* Failure to set the bits in the InJournal bit-vectors is benign. -// ** It merely means that we might do some extra work to journal a -// ** page that does not need to be journaled. Nevertheless, be sure -// ** to test the case where a malloc error occurs while trying to set -// ** a bit in a bit vector. -// */ -// sqlite3BeginBenignMalloc(); -// if( pgno<=pPager->dbOrigSize ){ -// TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno); -// testcase( rc==SQLITE_NOMEM ); -// } -// TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno); -// testcase( rc==SQLITE_NOMEM ); -// sqlite3EndBenignMalloc(); -// } -// memset(pPg->pData, 0, pPager->pageSize); -// IOTRACE(("ZERO %p %d\n", pPager, pgno)); -// }else{ -// assert( pPg->pPager==pPager ); -// pPager->aStat[PAGER_STAT_MISS]++; -// rc = readDbPage(pPg); -// if( rc!=SQLITE_OK ){ -// goto pager_acquire_err; -// } -// } -// pager_set_pagehash(pPg); -// } -// return SQLITE_OK; - -// pager_acquire_err: -// assert( rc!=SQLITE_OK ); -// if( pPg ){ -// sqlite3PcacheDrop(pPg); -// } -// pagerUnlockIfUnused(pPager); -// *ppPage = 0; -// return rc; -// } - -// #if SQLITE_MAX_MMAP_SIZE>0 -// /* The page getter for when memory-mapped I/O is enabled */ -// static int getPageMMap( -// Pager *pPager, /* The pager open on the database file */ -// Pgno pgno, /* Page number to fetch */ -// DbPage **ppPage, /* Write a pointer to the page here */ -// int flags /* PAGER_GET_XXX flags */ -// ){ -// int rc = SQLITE_OK; -// PgHdr *pPg = 0; -// u32 iFrame = 0; /* Frame to read from WAL file */ - -// /* It is acceptable to use a read-only (mmap) page for any page except -// ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY -// ** flag was specified by the caller. And so long as the db is not a -// ** temporary or in-memory database. */ -// const int bMmapOk = (pgno>1 -// && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY)) -// ); - -// assert( USEFETCH(pPager) ); - -// /* Optimization note: Adding the "pgno<=1" term before "pgno==0" here -// ** allows the compiler optimizer to reuse the results of the "pgno>1" -// ** test in the previous statement, and avoid testing pgno==0 in the -// ** common case where pgno is large. */ -// if( pgno<=1 && pgno==0 ){ -// return SQLITE_CORRUPT_BKPT; -// } -// assert( pPager->eState>=PAGER_READER ); -// assert( assert_pager_state(pPager) ); -// assert( pPager->hasHeldSharedLock==1 ); -// assert( pPager->errCode==SQLITE_OK ); - -// if( bMmapOk && pagerUseWal(pPager) ){ -// rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); -// if( rc!=SQLITE_OK ){ -// *ppPage = 0; -// return rc; -// } -// } -// if( bMmapOk && iFrame==0 ){ -// void *pData = 0; -// rc = sqlite3OsFetch(pPager->fd, -// (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData -// ); -// if( rc==SQLITE_OK && pData ){ -// if( pPager->eState>PAGER_READER || pPager->tempFile ){ -// pPg = sqlite3PagerLookup(pPager, pgno); -// } -// if( pPg==0 ){ -// rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg); -// }else{ -// sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData); -// } -// if( pPg ){ -// assert( rc==SQLITE_OK ); -// *ppPage = pPg; -// return SQLITE_OK; -// } -// } -// if( rc!=SQLITE_OK ){ -// *ppPage = 0; -// return rc; -// } -// } -// return getPageNormal(pPager, pgno, ppPage, flags); -// } -// #endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -// /* The page getter method for when the pager is an error state */ -// static int getPageError( -// Pager *pPager, /* The pager open on the database file */ -// Pgno pgno, /* Page number to fetch */ -// DbPage **ppPage, /* Write a pointer to the page here */ -// int flags /* PAGER_GET_XXX flags */ -// ){ -// UNUSED_PARAMETER(pgno); -// UNUSED_PARAMETER(flags); -// assert( pPager->errCode!=SQLITE_OK ); -// *ppPage = 0; -// return pPager->errCode; -// } - -// /* Dispatch all page fetch requests to the appropriate getter method. -// */ -// int sqlite3PagerGet( -// Pager *pPager, /* The pager open on the database file */ -// Pgno pgno, /* Page number to fetch */ -// DbPage **ppPage, /* Write a pointer to the page here */ -// int flags /* PAGER_GET_XXX flags */ -// ){ -// return pPager->xGet(pPager, pgno, ppPage, flags); -// } - -// /* -// ** Acquire a page if it is already in the in-memory cache. Do -// ** not read the page from disk. Return a pointer to the page, -// ** or 0 if the page is not in cache. -// ** -// ** See also sqlite3PagerGet(). The difference between this routine -// ** and sqlite3PagerGet() is that _get() will go to the disk and read -// ** in the page if the page is not already in cache. This routine -// ** returns NULL if the page is not in cache or if a disk I/O error -// ** has ever happened. -// */ -// DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ -// sqlite3_pcache_page *pPage; -// assert( pPager!=0 ); -// assert( pgno!=0 ); -// assert( pPager->pPCache!=0 ); -// pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); -// assert( pPage==0 || pPager->hasHeldSharedLock ); -// if( pPage==0 ) return 0; -// return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); -// } - -// /* -// ** Release a page reference. -// ** -// ** The sqlite3PagerUnref() and sqlite3PagerUnrefNotNull() may only be -// ** used if we know that the page being released is not the last page. -// ** The btree layer always holds page1 open until the end, so these first -// ** to routines can be used to release any page other than BtShared.pPage1. -// ** -// ** Use sqlite3PagerUnrefPageOne() to release page1. This latter routine -// ** checks the total number of outstanding pages and if the number of -// ** pages reaches zero it drops the database lock. -// */ -// void sqlite3PagerUnrefNotNull(DbPage *pPg){ -// TESTONLY( Pager *pPager = pPg->pPager; ) -// assert( pPg!=0 ); -// if( pPg->flags & PGHDR_MMAP ){ -// assert( pPg->pgno!=1 ); /* Page1 is never memory mapped */ -// pagerReleaseMapPage(pPg); -// }else{ -// sqlite3PcacheRelease(pPg); -// } -// /* Do not use this routine to release the last reference to page1 */ -// assert( sqlite3PcacheRefCount(pPager->pPCache)>0 ); -// } -// void sqlite3PagerUnref(DbPage *pPg){ -// if( pPg ) sqlite3PagerUnrefNotNull(pPg); -// } -// void sqlite3PagerUnrefPageOne(DbPage *pPg){ -// Pager *pPager; -// assert( pPg!=0 ); -// assert( pPg->pgno==1 ); -// assert( (pPg->flags & PGHDR_MMAP)==0 ); /* Page1 is never memory mapped */ -// pPager = pPg->pPager; -// sqlite3PcacheRelease(pPg); -// pagerUnlockIfUnused(pPager); -// } - -// /* -// ** This function is called at the start of every write transaction. -// ** There must already be a RESERVED or EXCLUSIVE lock on the database -// ** file when this routine is called. -// ** -// ** Open the journal file for pager pPager and write a journal header -// ** to the start of it. If there are active savepoints, open the sub-journal -// ** as well. This function is only used when the journal file is being -// ** opened to write a rollback log for a transaction. It is not used -// ** when opening a hot journal file to roll it back. -// ** -// ** If the journal file is already open (as it may be in exclusive mode), -// ** then this function just writes a journal header to the start of the -// ** already open file. -// ** -// ** Whether or not the journal file is opened by this function, the -// ** Pager.pInJournal bitvec structure is allocated. -// ** -// ** Return SQLITE_OK if everything is successful. Otherwise, return -// ** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or -// ** an IO error code if opening or writing the journal file fails. -// */ -// static int pager_open_journal(Pager *pPager){ -// int rc = SQLITE_OK; /* Return code */ -// sqlite3_vfs * const pVfs = pPager->pVfs; /* Local cache of vfs pointer */ - -// assert( pPager->eState==PAGER_WRITER_LOCKED ); -// assert( assert_pager_state(pPager) ); -// assert( pPager->pInJournal==0 ); - -// /* If already in the error state, this function is a no-op. But on -// ** the other hand, this routine is never called if we are already in -// ** an error state. */ -// if( NEVER(pPager->errCode) ) return pPager->errCode; - -// if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ -// pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize); -// if( pPager->pInJournal==0 ){ -// return SQLITE_NOMEM; -// } - -// /* Open the journal file if it is not already open. */ -// if( !isOpen(pPager->jfd) ){ -// if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ -// sqlite3MemJournalOpen(pPager->jfd); -// }else{ -// int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE; -// int nSpill; - -// if( pPager->tempFile ){ -// flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL); -// nSpill = sqlite3Config.nStmtSpill; -// }else{ -// flags |= SQLITE_OPEN_MAIN_JOURNAL; -// nSpill = jrnlBufferSize(pPager); -// } - -// /* Verify that the database still has the same name as it did when -// ** it was originally opened. */ -// rc = databaseIsUnmoved(pPager); -// if( rc==SQLITE_OK ){ -// rc = sqlite3JournalOpen ( -// pVfs, pPager->zJournal, pPager->jfd, flags, nSpill -// ); -// } -// } -// assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); -// } - -// /* Write the first journal header to the journal file and open -// ** the sub-journal if necessary. -// */ -// if( rc==SQLITE_OK ){ -// /* TODO: Check if all of these are really required. */ -// pPager->nRec = 0; -// pPager->journalOff = 0; -// pPager->setSuper = 0; -// pPager->journalHdr = 0; -// rc = writeJournalHdr(pPager); -// } -// } - -// if( rc!=SQLITE_OK ){ -// sqlite3BitvecDestroy(pPager->pInJournal); -// pPager->pInJournal = 0; -// }else{ -// assert( pPager->eState==PAGER_WRITER_LOCKED ); -// pPager->eState = PAGER_WRITER_CACHEMOD; -// } - -// return rc; -// } - -// /* -// ** Begin a write-transaction on the specified pager object. If a -// ** write-transaction has already been opened, this function is a no-op. -// ** -// ** If the exFlag argument is false, then acquire at least a RESERVED -// ** lock on the database file. If exFlag is true, then acquire at least -// ** an EXCLUSIVE lock. If such a lock is already held, no locking -// ** functions need be called. -// ** -// ** If the subjInMemory argument is non-zero, then any sub-journal opened -// ** within this transaction will be opened as an in-memory file. This -// ** has no effect if the sub-journal is already opened (as it may be when -// ** running in exclusive mode) or if the transaction does not require a -// ** sub-journal. If the subjInMemory argument is zero, then any required -// ** sub-journal is implemented in-memory if pPager is an in-memory database, -// ** or using a temporary file otherwise. -// */ -// int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ -// int rc = SQLITE_OK; - -// if( pPager->errCode ) return pPager->errCode; -// assert( pPager->eState>=PAGER_READER && pPager->eStatesubjInMemory = (u8)subjInMemory; - -// if( pPager->eState==PAGER_READER ){ -// assert( pPager->pInJournal==0 ); - -// if( pagerUseWal(pPager) ){ -// /* If the pager is configured to use locking_mode=exclusive, and an -// ** exclusive lock on the database is not already held, obtain it now. -// */ -// if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){ -// rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); -// if( rc!=SQLITE_OK ){ -// return rc; -// } -// (void)sqlite3WalExclusiveMode(pPager->pWal, 1); -// } - -// /* Grab the write lock on the log file. If successful, upgrade to -// ** PAGER_RESERVED state. Otherwise, return an error code to the caller. -// ** The busy-handler is not invoked if another connection already -// ** holds the write-lock. If possible, the upper layer will call it. -// */ -// rc = sqlite3WalBeginWriteTransaction(pPager->pWal); -// }else{ -// /* Obtain a RESERVED lock on the database file. If the exFlag parameter -// ** is true, then immediately upgrade this to an EXCLUSIVE lock. The -// ** busy-handler callback can be used when upgrading to the EXCLUSIVE -// ** lock, but not when obtaining the RESERVED lock. -// */ -// rc = pagerLockDb(pPager, RESERVED_LOCK); -// if( rc==SQLITE_OK && exFlag ){ -// rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); -// } -// } - -// if( rc==SQLITE_OK ){ -// /* Change to WRITER_LOCKED state. -// ** -// ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD -// ** when it has an open transaction, but never to DBMOD or FINISHED. -// ** This is because in those states the code to roll back savepoint -// ** transactions may copy data from the sub-journal into the database -// ** file as well as into the page cache. Which would be incorrect in -// ** WAL mode. -// */ -// pPager->eState = PAGER_WRITER_LOCKED; -// pPager->dbHintSize = pPager->dbSize; -// pPager->dbFileSize = pPager->dbSize; -// pPager->dbOrigSize = pPager->dbSize; -// pPager->journalOff = 0; -// } - -// assert( rc==SQLITE_OK || pPager->eState==PAGER_READER ); -// assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED ); -// assert( assert_pager_state(pPager) ); -// } - -// PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager))); -// return rc; -// } - -// /* -// ** Write page pPg onto the end of the rollback journal. -// */ -// static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// int rc; -// u32 cksum; -// char *pData2; -// i64 iOff = pPager->journalOff; - -// /* We should never write to the journal file the page that -// ** contains the database locks. The following assert verifies -// ** that we do not. */ -// assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); - -// assert( pPager->journalHdr<=pPager->journalOff ); -// pData2 = pPg->pData; -// cksum = pager_cksum(pPager, (u8*)pData2); - -// /* Even if an IO or diskfull error occurs while journalling the -// ** page in the block above, set the need-sync flag for the page. -// ** Otherwise, when the transaction is rolled back, the logic in -// ** playback_one_page() will think that the page needs to be restored -// ** in the database file. And if an IO error occurs while doing so, -// ** then corruption may follow. -// */ -// pPg->flags |= PGHDR_NEED_SYNC; - -// rc = write32bits(pPager->jfd, iOff, pPg->pgno); -// if( rc!=SQLITE_OK ) return rc; -// rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); -// if( rc!=SQLITE_OK ) return rc; -// rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); -// if( rc!=SQLITE_OK ) return rc; - -// IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, -// pPager->journalOff, pPager->pageSize)); -// PAGER_INCR(sqlite3_pager_writej_count); -// PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", -// PAGERID(pPager), pPg->pgno, -// ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); - -// pPager->journalOff += 8 + pPager->pageSize; -// pPager->nRec++; -// assert( pPager->pInJournal!=0 ); -// rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); -// testcase( rc==SQLITE_NOMEM ); -// assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); -// rc |= addToSavepointBitvecs(pPager, pPg->pgno); -// assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); -// return rc; -// } - -// /* -// ** Mark a single data page as writeable. The page is written into the -// ** main journal or sub-journal as required. If the page is written into -// ** one of the journals, the corresponding bit is set in the -// ** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs -// ** of any open savepoints as appropriate. -// */ -// static int pager_write(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// int rc = SQLITE_OK; - -// /* This routine is not called unless a write-transaction has already -// ** been started. The journal file may or may not be open at this point. -// ** It is never called in the ERROR state. -// */ -// assert( pPager->eState==PAGER_WRITER_LOCKED -// || pPager->eState==PAGER_WRITER_CACHEMOD -// || pPager->eState==PAGER_WRITER_DBMOD -// ); -// assert( assert_pager_state(pPager) ); -// assert( pPager->errCode==0 ); -// assert( pPager->readOnly==0 ); -// CHECK_PAGE(pPg); - -// /* The journal file needs to be opened. Higher level routines have already -// ** obtained the necessary locks to begin the write-transaction, but the -// ** rollback journal might not yet be open. Open it now if this is the case. -// ** -// ** This is done before calling sqlite3PcacheMakeDirty() on the page. -// ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then -// ** an error might occur and the pager would end up in WRITER_LOCKED state -// ** with pages marked as dirty in the cache. -// */ -// if( pPager->eState==PAGER_WRITER_LOCKED ){ -// rc = pager_open_journal(pPager); -// if( rc!=SQLITE_OK ) return rc; -// } -// assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); -// assert( assert_pager_state(pPager) ); - -// /* Mark the page that is about to be modified as dirty. */ -// sqlite3PcacheMakeDirty(pPg); - -// /* If a rollback journal is in use, them make sure the page that is about -// ** to change is in the rollback journal, or if the page is a new page off -// ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. -// */ -// assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); -// if( pPager->pInJournal!=0 -// && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 -// ){ -// assert( pagerUseWal(pPager)==0 ); -// if( pPg->pgno<=pPager->dbOrigSize ){ -// rc = pagerAddPageToRollbackJournal(pPg); -// if( rc!=SQLITE_OK ){ -// return rc; -// } -// }else{ -// if( pPager->eState!=PAGER_WRITER_DBMOD ){ -// pPg->flags |= PGHDR_NEED_SYNC; -// } -// PAGERTRACE(("APPEND %d page %d needSync=%d\n", -// PAGERID(pPager), pPg->pgno, -// ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); -// } -// } - -// /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list -// ** and before writing the page into the rollback journal. Wait until now, -// ** after the page has been successfully journalled, before setting the -// ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. -// */ -// pPg->flags |= PGHDR_WRITEABLE; - -// /* If the statement journal is open and the page is not in it, -// ** then write the page into the statement journal. -// */ -// if( pPager->nSavepoint>0 ){ -// rc = subjournalPageIfRequired(pPg); -// } - -// /* Update the database size and return. */ -// if( pPager->dbSizepgno ){ -// pPager->dbSize = pPg->pgno; -// } -// return rc; -// } - -// /* -// ** This is a variant of sqlite3PagerWrite() that runs when the sector size -// ** is larger than the page size. SQLite makes the (reasonable) assumption that -// ** all bytes of a sector are written together by hardware. Hence, all bytes of -// ** a sector need to be journalled in case of a power loss in the middle of -// ** a write. -// ** -// ** Usually, the sector size is less than or equal to the page size, in which -// ** case pages can be individually written. This routine only runs in the -// ** exceptional case where the page size is smaller than the sector size. -// */ -// static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ -// int rc = SQLITE_OK; /* Return code */ -// Pgno nPageCount; /* Total number of pages in database file */ -// Pgno pg1; /* First page of the sector pPg is located on. */ -// int nPage = 0; /* Number of pages starting at pg1 to journal */ -// int ii; /* Loop counter */ -// int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ -// Pager *pPager = pPg->pPager; /* The pager that owns pPg */ -// Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); - -// /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow -// ** a journal header to be written between the pages journaled by -// ** this function. -// */ -// assert( !MEMDB ); -// assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)==0 ); -// pPager->doNotSpill |= SPILLFLAG_NOSYNC; - -// /* This trick assumes that both the page-size and sector-size are -// ** an integer power of 2. It sets variable pg1 to the identifier -// ** of the first page of the sector pPg is located on. -// */ -// pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; - -// nPageCount = pPager->dbSize; -// if( pPg->pgno>nPageCount ){ -// nPage = (pPg->pgno - pg1)+1; -// }else if( (pg1+nPagePerSector-1)>nPageCount ){ -// nPage = nPageCount+1-pg1; -// }else{ -// nPage = nPagePerSector; -// } -// assert(nPage>0); -// assert(pg1<=pPg->pgno); -// assert((pg1+nPage)>pPg->pgno); - -// for(ii=0; iipgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){ -// if( pg!=PAGER_MJ_PGNO(pPager) ){ -// rc = sqlite3PagerGet(pPager, pg, &pPage, 0); -// if( rc==SQLITE_OK ){ -// rc = pager_write(pPage); -// if( pPage->flags&PGHDR_NEED_SYNC ){ -// needSync = 1; -// } -// sqlite3PagerUnrefNotNull(pPage); -// } -// } -// }else if( (pPage = sqlite3PagerLookup(pPager, pg))!=0 ){ -// if( pPage->flags&PGHDR_NEED_SYNC ){ -// needSync = 1; -// } -// sqlite3PagerUnrefNotNull(pPage); -// } -// } - -// /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages -// ** starting at pg1, then it needs to be set for all of them. Because -// ** writing to any of these nPage pages may damage the others, the -// ** journal file must contain sync()ed copies of all of them -// ** before any of them can be written out to the database file. -// */ -// if( rc==SQLITE_OK && needSync ){ -// assert( !MEMDB ); -// for(ii=0; iiflags |= PGHDR_NEED_SYNC; -// sqlite3PagerUnrefNotNull(pPage); -// } -// } -// } - -// assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)!=0 ); -// pPager->doNotSpill &= ~SPILLFLAG_NOSYNC; -// return rc; -// } - -// /* -// ** Mark a data page as writeable. This routine must be called before -// ** making changes to a page. The caller must check the return value -// ** of this function and be careful not to change any page data unless -// ** this routine returns SQLITE_OK. -// ** -// ** The difference between this function and pager_write() is that this -// ** function also deals with the special case where 2 or more pages -// ** fit on a single disk sector. In this case all co-resident pages -// ** must have been written to the journal file before returning. -// ** -// ** If an error occurs, SQLITE_NOMEM or an IO error code is returned -// ** as appropriate. Otherwise, SQLITE_OK. -// */ -// int sqlite3PagerWrite(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// assert( (pPg->flags & PGHDR_MMAP)==0 ); -// assert( pPager->eState>=PAGER_WRITER_LOCKED ); -// assert( assert_pager_state(pPager) ); -// if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ -// if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); -// return SQLITE_OK; -// }else if( pPager->errCode ){ -// return pPager->errCode; -// }else if( pPager->sectorSize > (u32)pPager->pageSize ){ -// assert( pPager->tempFile==0 ); -// return pagerWriteLargeSector(pPg); -// }else{ -// return pager_write(pPg); -// } -// } - -// /* -// ** Return TRUE if the page given in the argument was previously passed -// ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok -// ** to change the content of the page. -// */ -// #ifndef NDEBUG -// int sqlite3PagerIswriteable(DbPage *pPg){ -// return pPg->flags & PGHDR_WRITEABLE; -// } -// #endif - -// /* -// ** A call to this routine tells the pager that it is not necessary to -// ** write the information on page pPg back to the disk, even though -// ** that page might be marked as dirty. This happens, for example, when -// ** the page has been added as a leaf of the freelist and so its -// ** content no longer matters. -// ** -// ** The overlying software layer calls this routine when all of the data -// ** on the given page is unused. The pager marks the page as clean so -// ** that it does not get written to disk. -// ** -// ** Tests show that this optimization can quadruple the speed of large -// ** DELETE operations. -// ** -// ** This optimization cannot be used with a temp-file, as the page may -// ** have been dirty at the start of the transaction. In that case, if -// ** memory pressure forces page pPg out of the cache, the data does need -// ** to be written out to disk so that it may be read back in if the -// ** current transaction is rolled back. -// */ -// void sqlite3PagerDontWrite(PgHdr *pPg){ -// Pager *pPager = pPg->pPager; -// if( !pPager->tempFile && (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){ -// PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); -// IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) -// pPg->flags |= PGHDR_DONT_WRITE; -// pPg->flags &= ~PGHDR_WRITEABLE; -// testcase( pPg->flags & PGHDR_NEED_SYNC ); -// pager_set_pagehash(pPg); -// } -// } - -// /* -// ** This routine is called to increment the value of the database file -// ** change-counter, stored as a 4-byte big-endian integer starting at -// ** byte offset 24 of the pager file. The secondary change counter at -// ** 92 is also updated, as is the SQLite version number at offset 96. -// ** -// ** But this only happens if the pPager->changeCountDone flag is false. -// ** To avoid excess churning of page 1, the update only happens once. -// ** See also the pager_write_changecounter() routine that does an -// ** unconditional update of the change counters. -// ** -// ** If the isDirectMode flag is zero, then this is done by calling -// ** sqlite3PagerWrite() on page 1, then modifying the contents of the -// ** page data. In this case the file will be updated when the current -// ** transaction is committed. -// ** -// ** The isDirectMode flag may only be non-zero if the library was compiled -// ** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case, -// ** if isDirect is non-zero, then the database file is updated directly -// ** by writing an updated version of page 1 using a call to the -// ** sqlite3OsWrite() function. -// */ -// static int pager_incr_changecounter(Pager *pPager, int isDirectMode){ -// int rc = SQLITE_OK; - -// assert( pPager->eState==PAGER_WRITER_CACHEMOD -// || pPager->eState==PAGER_WRITER_DBMOD -// ); -// assert( assert_pager_state(pPager) ); - -// /* Declare and initialize constant integer 'isDirect'. If the -// ** atomic-write optimization is enabled in this build, then isDirect -// ** is initialized to the value passed as the isDirectMode parameter -// ** to this function. Otherwise, it is always set to zero. -// ** -// ** The idea is that if the atomic-write optimization is not -// ** enabled at compile time, the compiler can omit the tests of -// ** 'isDirect' below, as well as the block enclosed in the -// ** "if( isDirect )" condition. -// */ -// #ifndef SQLITE_ENABLE_ATOMIC_WRITE -// # define DIRECT_MODE 0 -// assert( isDirectMode==0 ); -// UNUSED_PARAMETER(isDirectMode); -// #else -// # define DIRECT_MODE isDirectMode -// #endif - -// if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){ -// PgHdr *pPgHdr; /* Reference to page 1 */ - -// assert( !pPager->tempFile && isOpen(pPager->fd) ); - -// /* Open page 1 of the file for writing. */ -// rc = sqlite3PagerGet(pPager, 1, &pPgHdr, 0); -// assert( pPgHdr==0 || rc==SQLITE_OK ); - -// /* If page one was fetched successfully, and this function is not -// ** operating in direct-mode, make page 1 writable. When not in -// ** direct mode, page 1 is always held in cache and hence the PagerGet() -// ** above is always successful - hence the ALWAYS on rc==SQLITE_OK. -// */ -// if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){ -// rc = sqlite3PagerWrite(pPgHdr); -// } - -// if( rc==SQLITE_OK ){ -// /* Actually do the update of the change counter */ -// pager_write_changecounter(pPgHdr); - -// /* If running in direct mode, write the contents of page 1 to the file. */ -// if( DIRECT_MODE ){ -// const void *zBuf; -// assert( pPager->dbFileSize>0 ); -// zBuf = pPgHdr->pData; -// if( rc==SQLITE_OK ){ -// rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); -// pPager->aStat[PAGER_STAT_WRITE]++; -// } -// if( rc==SQLITE_OK ){ -// /* Update the pager's copy of the change-counter. Otherwise, the -// ** next time a read transaction is opened the cache will be -// ** flushed (as the change-counter values will not match). */ -// const void *pCopy = (const void *)&((const char *)zBuf)[24]; -// memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers)); -// pPager->changeCountDone = 1; -// } -// }else{ -// pPager->changeCountDone = 1; -// } -// } - -// /* Release the page reference. */ -// sqlite3PagerUnref(pPgHdr); -// } -// return rc; -// } - -// /* -// ** Sync the database file to disk. This is a no-op for in-memory databases -// ** or pages with the Pager.noSync flag set. -// ** -// ** If successful, or if called on a pager for which it is a no-op, this -// ** function returns SQLITE_OK. Otherwise, an IO error code is returned. -// */ -// int sqlite3PagerSync(Pager *pPager, const char *zSuper){ -// int rc = SQLITE_OK; -// void *pArg = (void*)zSuper; -// rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC, pArg); -// if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; -// if( rc==SQLITE_OK && !pPager->noSync ){ -// assert( !MEMDB ); -// rc = sqlite3OsSync(pPager->fd, pPager->syncFlags); -// } -// return rc; -// } - -// /* -// ** This function may only be called while a write-transaction is active in -// ** rollback. If the connection is in WAL mode, this call is a no-op. -// ** Otherwise, if the connection does not already have an EXCLUSIVE lock on -// ** the database file, an attempt is made to obtain one. -// ** -// ** If the EXCLUSIVE lock is already held or the attempt to obtain it is -// ** successful, or the connection is in WAL mode, SQLITE_OK is returned. -// ** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is -// ** returned. -// */ -// int sqlite3PagerExclusiveLock(Pager *pPager){ -// int rc = pPager->errCode; -// assert( assert_pager_state(pPager) ); -// if( rc==SQLITE_OK ){ -// assert( pPager->eState==PAGER_WRITER_CACHEMOD -// || pPager->eState==PAGER_WRITER_DBMOD -// || pPager->eState==PAGER_WRITER_LOCKED -// ); -// assert( assert_pager_state(pPager) ); -// if( 0==pagerUseWal(pPager) ){ -// rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); -// } -// } -// return rc; -// } - -// /* -// ** Sync the database file for the pager pPager. zSuper points to the name -// ** of a super-journal file that should be written into the individual -// ** journal file. zSuper may be NULL, which is interpreted as no -// ** super-journal (a single database transaction). -// ** -// ** This routine ensures that: -// ** -// ** * The database file change-counter is updated, -// ** * the journal is synced (unless the atomic-write optimization is used), -// ** * all dirty pages are written to the database file, -// ** * the database file is truncated (if required), and -// ** * the database file synced. -// ** -// ** The only thing that remains to commit the transaction is to finalize -// ** (delete, truncate or zero the first part of) the journal file (or -// ** delete the super-journal file if specified). -// ** -// ** Note that if zSuper==NULL, this does not overwrite a previous value -// ** passed to an sqlite3PagerCommitPhaseOne() call. -// ** -// ** If the final parameter - noSync - is true, then the database file itself -// ** is not synced. The caller must call sqlite3PagerSync() directly to -// ** sync the database file before calling CommitPhaseTwo() to delete the -// ** journal file in this case. -// */ -// int sqlite3PagerCommitPhaseOne( -// Pager *pPager, /* Pager object */ -// const char *zSuper, /* If not NULL, the super-journal name */ -// int noSync /* True to omit the xSync on the db file */ -// ){ -// int rc = SQLITE_OK; /* Return code */ - -// assert( pPager->eState==PAGER_WRITER_LOCKED -// || pPager->eState==PAGER_WRITER_CACHEMOD -// || pPager->eState==PAGER_WRITER_DBMOD -// || pPager->eState==PAGER_ERROR -// ); -// assert( assert_pager_state(pPager) ); - -// /* If a prior error occurred, report that error again. */ -// if( NEVER(pPager->errCode) ) return pPager->errCode; - -// /* Provide the ability to easily simulate an I/O error during testing */ -// if( sqlite3FaultSim(400) ) return SQLITE_IOERR; - -// PAGERTRACE(("DATABASE SYNC: File=%s zSuper=%s nSize=%d\n", -// pPager->zFilename, zSuper, pPager->dbSize)); - -// /* If no database changes have been made, return early. */ -// if( pPager->eStatetempFile ); -// assert( isOpen(pPager->fd) || pPager->tempFile ); -// if( 0==pagerFlushOnCommit(pPager, 1) ){ -// /* If this is an in-memory db, or no pages have been written to, or this -// ** function has already been called, it is mostly a no-op. However, any -// ** backup in progress needs to be restarted. */ -// sqlite3BackupRestart(pPager->pBackup); -// }else{ -// PgHdr *pList; -// if( pagerUseWal(pPager) ){ -// PgHdr *pPageOne = 0; -// pList = sqlite3PcacheDirtyList(pPager->pPCache); -// if( pList==0 ){ -// /* Must have at least one page for the WAL commit flag. -// ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */ -// rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0); -// pList = pPageOne; -// pList->pDirty = 0; -// } -// assert( rc==SQLITE_OK ); -// if( ALWAYS(pList) ){ -// rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); -// } -// sqlite3PagerUnref(pPageOne); -// if( rc==SQLITE_OK ){ -// sqlite3PcacheCleanAll(pPager->pPCache); -// } -// }else{ -// /* The bBatch boolean is true if the batch-atomic-write commit method -// ** should be used. No rollback journal is created if batch-atomic-write -// ** is enabled. -// */ -// #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE -// sqlite3_file *fd = pPager->fd; -// int bBatch = zSuper==0 /* An SQLITE_IOCAP_BATCH_ATOMIC commit */ -// && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC) -// && !pPager->noSync -// && sqlite3JournalIsInMemory(pPager->jfd); -// #else -// # define bBatch 0 -// #endif - -// #ifdef SQLITE_ENABLE_ATOMIC_WRITE -// /* The following block updates the change-counter. Exactly how it -// ** does this depends on whether or not the atomic-update optimization -// ** was enabled at compile time, and if this transaction meets the -// ** runtime criteria to use the operation: -// ** -// ** * The file-system supports the atomic-write property for -// ** blocks of size page-size, and -// ** * This commit is not part of a multi-file transaction, and -// ** * Exactly one page has been modified and store in the journal file. -// ** -// ** If the optimization was not enabled at compile time, then the -// ** pager_incr_changecounter() function is called to update the change -// ** counter in 'indirect-mode'. If the optimization is compiled in but -// ** is not applicable to this transaction, call sqlite3JournalCreate() -// ** to make sure the journal file has actually been created, then call -// ** pager_incr_changecounter() to update the change-counter in indirect -// ** mode. -// ** -// ** Otherwise, if the optimization is both enabled and applicable, -// ** then call pager_incr_changecounter() to update the change-counter -// ** in 'direct' mode. In this case the journal file will never be -// ** created for this transaction. -// */ -// if( bBatch==0 ){ -// PgHdr *pPg; -// assert( isOpen(pPager->jfd) -// || pPager->journalMode==PAGER_JOURNALMODE_OFF -// || pPager->journalMode==PAGER_JOURNALMODE_WAL -// ); -// if( !zSuper && isOpen(pPager->jfd) -// && pPager->journalOff==jrnlBufferSize(pPager) -// && pPager->dbSize>=pPager->dbOrigSize -// && (!(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) -// ){ -// /* Update the db file change counter via the direct-write method. The -// ** following call will modify the in-memory representation of page 1 -// ** to include the updated change counter and then write page 1 -// ** directly to the database file. Because of the atomic-write -// ** property of the host file-system, this is safe. -// */ -// rc = pager_incr_changecounter(pPager, 1); -// }else{ -// rc = sqlite3JournalCreate(pPager->jfd); -// if( rc==SQLITE_OK ){ -// rc = pager_incr_changecounter(pPager, 0); -// } -// } -// } -// #else /* SQLITE_ENABLE_ATOMIC_WRITE */ -// #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE -// if( zSuper ){ -// rc = sqlite3JournalCreate(pPager->jfd); -// if( rc!=SQLITE_OK ) goto commit_phase_one_exit; -// assert( bBatch==0 ); -// } -// #endif -// rc = pager_incr_changecounter(pPager, 0); -// #endif /* !SQLITE_ENABLE_ATOMIC_WRITE */ -// if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - -// /* Write the super-journal name into the journal file. If a -// ** super-journal file name has already been written to the journal file, -// ** or if zSuper is NULL (no super-journal), then this call is a no-op. -// */ -// rc = writeSuperJournal(pPager, zSuper); -// if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - -// /* Sync the journal file and write all dirty pages to the database. -// ** If the atomic-update optimization is being used, this sync will not -// ** create the journal file or perform any real IO. -// ** -// ** Because the change-counter page was just modified, unless the -// ** atomic-update optimization is used it is almost certain that the -// ** journal requires a sync here. However, in locking_mode=exclusive -// ** on a system under memory pressure it is just possible that this is -// ** not the case. In this case it is likely enough that the redundant -// ** xSync() call will be changed to a no-op by the OS anyhow. -// */ -// rc = syncJournal(pPager, 0); -// if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - -// pList = sqlite3PcacheDirtyList(pPager->pPCache); -// #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE -// if( bBatch ){ -// rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0); -// if( rc==SQLITE_OK ){ -// rc = pager_write_pagelist(pPager, pList); -// if( rc==SQLITE_OK ){ -// rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0); -// } -// if( rc!=SQLITE_OK ){ -// sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0); -// } -// } - -// if( (rc&0xFF)==SQLITE_IOERR && rc!=SQLITE_IOERR_NOMEM ){ -// rc = sqlite3JournalCreate(pPager->jfd); -// if( rc!=SQLITE_OK ){ -// sqlite3OsClose(pPager->jfd); -// goto commit_phase_one_exit; -// } -// bBatch = 0; -// }else{ -// sqlite3OsClose(pPager->jfd); -// } -// } -// #endif /* SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ - -// if( bBatch==0 ){ -// rc = pager_write_pagelist(pPager, pList); -// } -// if( rc!=SQLITE_OK ){ -// assert( rc!=SQLITE_IOERR_BLOCKED ); -// goto commit_phase_one_exit; -// } -// sqlite3PcacheCleanAll(pPager->pPCache); - -// /* If the file on disk is smaller than the database image, use -// ** pager_truncate to grow the file here. This can happen if the database -// ** image was extended as part of the current transaction and then the -// ** last page in the db image moved to the free-list. In this case the -// ** last page is never written out to disk, leaving the database file -// ** undersized. Fix this now if it is the case. */ -// if( pPager->dbSize>pPager->dbFileSize ){ -// Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); -// assert( pPager->eState==PAGER_WRITER_DBMOD ); -// rc = pager_truncate(pPager, nNew); -// if( rc!=SQLITE_OK ) goto commit_phase_one_exit; -// } - -// /* Finally, sync the database file. */ -// if( !noSync ){ -// rc = sqlite3PagerSync(pPager, zSuper); -// } -// IOTRACE(("DBSYNC %p\n", pPager)) -// } -// } - -// commit_phase_one_exit: -// if( rc==SQLITE_OK && !pagerUseWal(pPager) ){ -// pPager->eState = PAGER_WRITER_FINISHED; -// } -// return rc; -// } - -// /* -// ** When this function is called, the database file has been completely -// ** updated to reflect the changes made by the current transaction and -// ** synced to disk. The journal file still exists in the file-system -// ** though, and if a failure occurs at this point it will eventually -// ** be used as a hot-journal and the current transaction rolled back. -// ** -// ** This function finalizes the journal file, either by deleting, -// ** truncating or partially zeroing it, so that it cannot be used -// ** for hot-journal rollback. Once this is done the transaction is -// ** irrevocably committed. -// ** -// ** If an error occurs, an IO error code is returned and the pager -// ** moves into the error state. Otherwise, SQLITE_OK is returned. -// */ -// int sqlite3PagerCommitPhaseTwo(Pager *pPager){ -// int rc = SQLITE_OK; /* Return code */ - -// /* This routine should not be called if a prior error has occurred. -// ** But if (due to a coding error elsewhere in the system) it does get -// ** called, just return the same error code without doing anything. */ -// if( NEVER(pPager->errCode) ) return pPager->errCode; -// pPager->iDataVersion++; - -// assert( pPager->eState==PAGER_WRITER_LOCKED -// || pPager->eState==PAGER_WRITER_FINISHED -// || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD) -// ); -// assert( assert_pager_state(pPager) ); - -// /* An optimization. If the database was not actually modified during -// ** this transaction, the pager is running in exclusive-mode and is -// ** using persistent journals, then this function is a no-op. -// ** -// ** The start of the journal file currently contains a single journal -// ** header with the nRec field set to 0. If such a journal is used as -// ** a hot-journal during hot-journal rollback, 0 changes will be made -// ** to the database file. So there is no need to zero the journal -// ** header. Since the pager is in exclusive mode, there is no need -// ** to drop any locks either. -// */ -// if( pPager->eState==PAGER_WRITER_LOCKED -// && pPager->exclusiveMode -// && pPager->journalMode==PAGER_JOURNALMODE_PERSIST -// ){ -// assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff ); -// pPager->eState = PAGER_READER; -// return SQLITE_OK; -// } - -// PAGERTRACE(("COMMIT %d\n", PAGERID(pPager))); -// rc = pager_end_transaction(pPager, pPager->setSuper, 1); -// return pager_error(pPager, rc); -// } - -// /* -// ** If a write transaction is open, then all changes made within the -// ** transaction are reverted and the current write-transaction is closed. -// ** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR -// ** state if an error occurs. -// ** -// ** If the pager is already in PAGER_ERROR state when this function is called, -// ** it returns Pager.errCode immediately. No work is performed in this case. -// ** -// ** Otherwise, in rollback mode, this function performs two functions: -// ** -// ** 1) It rolls back the journal file, restoring all database file and -// ** in-memory cache pages to the state they were in when the transaction -// ** was opened, and -// ** -// ** 2) It finalizes the journal file, so that it is not used for hot -// ** rollback at any point in the future. -// ** -// ** Finalization of the journal file (task 2) is only performed if the -// ** rollback is successful. -// ** -// ** In WAL mode, all cache-entries containing data modified within the -// ** current transaction are either expelled from the cache or reverted to -// ** their pre-transaction state by re-reading data from the database or -// ** WAL files. The WAL transaction is then closed. -// */ -// int sqlite3PagerRollback(Pager *pPager){ -// int rc = SQLITE_OK; /* Return code */ -// PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); - -// /* PagerRollback() is a no-op if called in READER or OPEN state. If -// ** the pager is already in the ERROR state, the rollback is not -// ** attempted here. Instead, the error code is returned to the caller. -// */ -// assert( assert_pager_state(pPager) ); -// if( pPager->eState==PAGER_ERROR ) return pPager->errCode; -// if( pPager->eState<=PAGER_READER ) return SQLITE_OK; - -// if( pagerUseWal(pPager) ){ -// int rc2; -// rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); -// rc2 = pager_end_transaction(pPager, pPager->setSuper, 0); -// if( rc==SQLITE_OK ) rc = rc2; -// }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){ -// int eState = pPager->eState; -// rc = pager_end_transaction(pPager, 0, 0); -// if( !MEMDB && eState>PAGER_WRITER_LOCKED ){ -// /* This can happen using journal_mode=off. Move the pager to the error -// ** state to indicate that the contents of the cache may not be trusted. -// ** Any active readers will get SQLITE_ABORT. -// */ -// pPager->errCode = SQLITE_ABORT; -// pPager->eState = PAGER_ERROR; -// setGetterMethod(pPager); -// return rc; -// } -// }else{ -// rc = pager_playback(pPager, 0); -// } - -// assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK ); -// assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT -// || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR -// || rc==SQLITE_CANTOPEN -// ); - -// /* If an error occurs during a ROLLBACK, we can no longer trust the pager -// ** cache. So call pager_error() on the way out to make any error persistent. -// */ -// return pager_error(pPager, rc); -// } - -// /* -// ** Return TRUE if the database file is opened read-only. Return FALSE -// ** if the database is (in theory) writable. -// */ -// u8 sqlite3PagerIsreadonly(Pager *pPager){ -// return pPager->readOnly; -// } - -// #ifdef SQLITE_DEBUG -// /* -// ** Return the sum of the reference counts for all pages held by pPager. -// */ -// int sqlite3PagerRefcount(Pager *pPager){ -// return sqlite3PcacheRefCount(pPager->pPCache); -// } -// #endif - -// /* -// ** Return the approximate number of bytes of memory currently -// ** used by the pager and its associated cache. -// */ -// int sqlite3PagerMemUsed(Pager *pPager){ -// int perPageSize = pPager->pageSize + pPager->nExtra -// + (int)(sizeof(PgHdr) + 5*sizeof(void*)); -// return perPageSize*sqlite3PcachePagecount(pPager->pPCache) -// + sqlite3MallocSize(pPager) -// + pPager->pageSize; -// } - -// /* -// ** Return the number of references to the specified page. -// */ -// int sqlite3PagerPageRefcount(DbPage *pPage){ -// return sqlite3PcachePageRefcount(pPage); -// } - -// #ifdef SQLITE_TEST -// /* -// ** This routine is used for testing and analysis only. -// */ -// int *sqlite3PagerStats(Pager *pPager){ -// static int a[11]; -// a[0] = sqlite3PcacheRefCount(pPager->pPCache); -// a[1] = sqlite3PcachePagecount(pPager->pPCache); -// a[2] = sqlite3PcacheGetCachesize(pPager->pPCache); -// a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize; -// a[4] = pPager->eState; -// a[5] = pPager->errCode; -// a[6] = pPager->aStat[PAGER_STAT_HIT]; -// a[7] = pPager->aStat[PAGER_STAT_MISS]; -// a[8] = 0; /* Used to be pPager->nOvfl */ -// a[9] = pPager->nRead; -// a[10] = pPager->aStat[PAGER_STAT_WRITE]; -// return a; -// } -// #endif - -// /* -// ** Parameter eStat must be one of SQLITE_DBSTATUS_CACHE_HIT, _MISS, _WRITE, -// ** or _WRITE+1. The SQLITE_DBSTATUS_CACHE_WRITE+1 case is a translation -// ** of SQLITE_DBSTATUS_CACHE_SPILL. The _SPILL case is not contiguous because -// ** it was added later. -// ** -// ** Before returning, *pnVal is incremented by the -// ** current cache hit or miss count, according to the value of eStat. If the -// ** reset parameter is non-zero, the cache hit or miss count is zeroed before -// ** returning. -// */ -// void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){ - -// assert( eStat==SQLITE_DBSTATUS_CACHE_HIT -// || eStat==SQLITE_DBSTATUS_CACHE_MISS -// || eStat==SQLITE_DBSTATUS_CACHE_WRITE -// || eStat==SQLITE_DBSTATUS_CACHE_WRITE+1 -// ); - -// assert( SQLITE_DBSTATUS_CACHE_HIT+1==SQLITE_DBSTATUS_CACHE_MISS ); -// assert( SQLITE_DBSTATUS_CACHE_HIT+2==SQLITE_DBSTATUS_CACHE_WRITE ); -// assert( PAGER_STAT_HIT==0 && PAGER_STAT_MISS==1 -// && PAGER_STAT_WRITE==2 && PAGER_STAT_SPILL==3 ); - -// eStat -= SQLITE_DBSTATUS_CACHE_HIT; -// *pnVal += pPager->aStat[eStat]; -// if( reset ){ -// pPager->aStat[eStat] = 0; -// } -// } - -// /* -// ** Return true if this is an in-memory or temp-file backed pager. -// */ -// int sqlite3PagerIsMemdb(Pager *pPager){ -// return pPager->tempFile || pPager->memVfs; -// } - -// /* -// ** Check that there are at least nSavepoint savepoints open. If there are -// ** currently less than nSavepoints open, then open one or more savepoints -// ** to make up the difference. If the number of savepoints is already -// ** equal to nSavepoint, then this function is a no-op. -// ** -// ** If a memory allocation fails, SQLITE_NOMEM is returned. If an error -// ** occurs while opening the sub-journal file, then an IO error code is -// ** returned. Otherwise, SQLITE_OK. -// */ -// static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ -// int rc = SQLITE_OK; /* Return code */ -// int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ -// int ii; /* Iterator variable */ -// PagerSavepoint *aNew; /* New Pager.aSavepoint array */ - -// assert( pPager->eState>=PAGER_WRITER_LOCKED ); -// assert( assert_pager_state(pPager) ); -// assert( nSavepoint>nCurrent && pPager->useJournal ); - -// /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM -// ** if the allocation fails. Otherwise, zero the new portion in case a -// ** malloc failure occurs while populating it in the for(...) loop below. -// */ -// aNew = (PagerSavepoint *)sqlite3Realloc( -// pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint -// ); -// if( !aNew ){ -// return SQLITE_NOMEM; -// } -// memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); -// pPager->aSavepoint = aNew; - -// /* Populate the PagerSavepoint structures just allocated. */ -// for(ii=nCurrent; iidbSize; -// if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ -// aNew[ii].iOffset = pPager->journalOff; -// }else{ -// aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); -// } -// aNew[ii].iSubRec = pPager->nSubRec; -// aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); -// aNew[ii].bTruncateOnRelease = 1; -// if( !aNew[ii].pInSavepoint ){ -// return SQLITE_NOMEM; -// } -// if( pagerUseWal(pPager) ){ -// sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); -// } -// pPager->nSavepoint = ii+1; -// } -// assert( pPager->nSavepoint==nSavepoint ); -// assertTruncateConstraint(pPager); -// return rc; -// } -// int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ -// assert( pPager->eState>=PAGER_WRITER_LOCKED ); -// assert( assert_pager_state(pPager) ); - -// if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ -// return pagerOpenSavepoint(pPager, nSavepoint); -// }else{ -// return SQLITE_OK; -// } -// } - -// /* -// ** This function is called to rollback or release (commit) a savepoint. -// ** The savepoint to release or rollback need not be the most recently -// ** created savepoint. -// ** -// ** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE. -// ** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with -// ** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes -// ** that have occurred since the specified savepoint was created. -// ** -// ** The savepoint to rollback or release is identified by parameter -// ** iSavepoint. A value of 0 means to operate on the outermost savepoint -// ** (the first created). A value of (Pager.nSavepoint-1) means operate -// ** on the most recently created savepoint. If iSavepoint is greater than -// ** (Pager.nSavepoint-1), then this function is a no-op. -// ** -// ** If a negative value is passed to this function, then the current -// ** transaction is rolled back. This is different to calling -// ** sqlite3PagerRollback() because this function does not terminate -// ** the transaction or unlock the database, it just restores the -// ** contents of the database to its original state. -// ** -// ** In any case, all savepoints with an index greater than iSavepoint -// ** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE), -// ** then savepoint iSavepoint is also destroyed. -// ** -// ** This function may return SQLITE_NOMEM if a memory allocation fails, -// ** or an IO error code if an IO error occurs while rolling back a -// ** savepoint. If no errors occur, SQLITE_OK is returned. -// */ -// int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ -// int rc = pPager->errCode; - -// #ifdef SQLITE_ENABLE_ZIPVFS -// if( op==SAVEPOINT_RELEASE ) rc = SQLITE_OK; -// #endif - -// assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); -// assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK ); - -// if( rc==SQLITE_OK && iSavepointnSavepoint ){ -// int ii; /* Iterator variable */ -// int nNew; /* Number of remaining savepoints after this op. */ - -// /* Figure out how many savepoints will still be active after this -// ** operation. Store this value in nNew. Then free resources associated -// ** with any savepoints that are destroyed by this operation. -// */ -// nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1); -// for(ii=nNew; iinSavepoint; ii++){ -// sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); -// } -// pPager->nSavepoint = nNew; - -// /* Truncate the sub-journal so that it only includes the parts -// ** that are still in use. */ -// if( op==SAVEPOINT_RELEASE ){ -// PagerSavepoint *pRel = &pPager->aSavepoint[nNew]; -// if( pRel->bTruncateOnRelease && isOpen(pPager->sjfd) ){ -// /* Only truncate if it is an in-memory sub-journal. */ -// if( sqlite3JournalIsInMemory(pPager->sjfd) ){ -// i64 sz = (pPager->pageSize+4)*(i64)pRel->iSubRec; -// rc = sqlite3OsTruncate(pPager->sjfd, sz); -// assert( rc==SQLITE_OK ); -// } -// pPager->nSubRec = pRel->iSubRec; -// } -// } -// /* Else this is a rollback operation, playback the specified savepoint. -// ** If this is a temp-file, it is possible that the journal file has -// ** not yet been opened. In this case there have been no changes to -// ** the database file, so the playback operation can be skipped. -// */ -// else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){ -// PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; -// rc = pagerPlaybackSavepoint(pPager, pSavepoint); -// assert(rc!=SQLITE_DONE); -// } - -// #ifdef SQLITE_ENABLE_ZIPVFS -// /* If the cache has been modified but the savepoint cannot be rolled -// ** back journal_mode=off, put the pager in the error state. This way, -// ** if the VFS used by this pager includes ZipVFS, the entire transaction -// ** can be rolled back at the ZipVFS level. */ -// else if( -// pPager->journalMode==PAGER_JOURNALMODE_OFF -// && pPager->eState>=PAGER_WRITER_CACHEMOD -// ){ -// pPager->errCode = SQLITE_ABORT; -// pPager->eState = PAGER_ERROR; -// setGetterMethod(pPager); -// } -// #endif -// } - -// return rc; -// } - -// /* -// ** Return the full pathname of the database file. -// ** -// ** Except, if the pager is in-memory only, then return an empty string if -// ** nullIfMemDb is true. This routine is called with nullIfMemDb==1 when -// ** used to report the filename to the user, for compatibility with legacy -// ** behavior. But when the Btree needs to know the filename for matching to -// ** shared cache, it uses nullIfMemDb==0 so that in-memory databases can -// ** participate in shared-cache. -// ** -// ** The return value to this routine is always safe to use with -// ** sqlite3_uri_parameter() and sqlite3_filename_database() and friends. -// */ -// const char *sqlite3PagerFilename(const Pager *pPager, int nullIfMemDb){ -// static const char zFake[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; -// return (nullIfMemDb && pPager->memDb) ? &zFake[4] : pPager->zFilename; -// } - -// /* -// ** Return the VFS structure for the pager. -// */ -// sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ -// return pPager->pVfs; -// } - -// /* -// ** Return the file handle for the database file associated -// ** with the pager. This might return NULL if the file has -// ** not yet been opened. -// */ -// sqlite3_file *sqlite3PagerFile(Pager *pPager){ -// return pPager->fd; -// } - -// /* -// ** Return the file handle for the journal file (if it exists). -// ** This will be either the rollback journal or the WAL file. -// */ -// sqlite3_file *sqlite3PagerJrnlFile(Pager *pPager){ -// #if SQLITE_OMIT_WAL -// return pPager->jfd; -// #else -// return pPager->pWal ? sqlite3WalFile(pPager->pWal) : pPager->jfd; -// #endif -// } - -// /* -// ** Return the full pathname of the journal file. -// */ -// const char *sqlite3PagerJournalname(Pager *pPager){ -// return pPager->zJournal; -// } - -// #ifndef SQLITE_OMIT_AUTOVACUUM -// /* -// ** Move the page pPg to location pgno in the file. -// ** -// ** There must be no references to the page previously located at -// ** pgno (which we call pPgOld) though that page is allowed to be -// ** in cache. If the page previously located at pgno is not already -// ** in the rollback journal, it is not put there by by this routine. -// ** -// ** References to the page pPg remain valid. Updating any -// ** meta-data associated with pPg (i.e. data stored in the nExtra bytes -// ** allocated along with the page) is the responsibility of the caller. -// ** -// ** A transaction must be active when this routine is called. It used to be -// ** required that a statement transaction was not active, but this restriction -// ** has been removed (CREATE INDEX needs to move a page when a statement -// ** transaction is active). -// ** -// ** If the fourth argument, isCommit, is non-zero, then this page is being -// ** moved as part of a database reorganization just before the transaction -// ** is being committed. In this case, it is guaranteed that the database page -// ** pPg refers to will not be written to again within this transaction. -// ** -// ** This function may return SQLITE_NOMEM or an IO error code if an error -// ** occurs. Otherwise, it returns SQLITE_OK. -// */ -// int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ -// PgHdr *pPgOld; /* The page being overwritten. */ -// Pgno needSyncPgno = 0; /* Old value of pPg->pgno, if sync is required */ -// int rc; /* Return code */ -// Pgno origPgno; /* The original page number */ - -// assert( pPg->nRef>0 ); -// assert( pPager->eState==PAGER_WRITER_CACHEMOD -// || pPager->eState==PAGER_WRITER_DBMOD -// ); -// assert( assert_pager_state(pPager) ); - -// /* In order to be able to rollback, an in-memory database must journal -// ** the page we are moving from. -// */ -// assert( pPager->tempFile || !MEMDB ); -// if( pPager->tempFile ){ -// rc = sqlite3PagerWrite(pPg); -// if( rc ) return rc; -// } - -// /* If the page being moved is dirty and has not been saved by the latest -// ** savepoint, then save the current contents of the page into the -// ** sub-journal now. This is required to handle the following scenario: -// ** -// ** BEGIN; -// ** -// ** SAVEPOINT one; -// ** -// ** ROLLBACK TO one; -// ** -// ** If page X were not written to the sub-journal here, it would not -// ** be possible to restore its contents when the "ROLLBACK TO one" -// ** statement were is processed. -// ** -// ** subjournalPage() may need to allocate space to store pPg->pgno into -// ** one or more savepoint bitvecs. This is the reason this function -// ** may return SQLITE_NOMEM. -// */ -// if( (pPg->flags & PGHDR_DIRTY)!=0 -// && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) -// ){ -// return rc; -// } - -// PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", -// PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno)); -// IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) - -// /* If the journal needs to be sync()ed before page pPg->pgno can -// ** be written to, store pPg->pgno in local variable needSyncPgno. -// ** -// ** If the isCommit flag is set, there is no need to remember that -// ** the journal needs to be sync()ed before database page pPg->pgno -// ** can be written to. The caller has already promised not to write to it. -// */ -// if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){ -// needSyncPgno = pPg->pgno; -// assert( pPager->journalMode==PAGER_JOURNALMODE_OFF || -// pageInJournal(pPager, pPg) || pPg->pgno>pPager->dbOrigSize ); -// assert( pPg->flags&PGHDR_DIRTY ); -// } - -// /* If the cache contains a page with page-number pgno, remove it -// ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for -// ** page pgno before the 'move' operation, it needs to be retained -// ** for the page moved there. -// */ -// pPg->flags &= ~PGHDR_NEED_SYNC; -// pPgOld = sqlite3PagerLookup(pPager, pgno); -// assert( !pPgOld || pPgOld->nRef==1 || CORRUPT_DB ); -// if( pPgOld ){ -// if( NEVER(pPgOld->nRef>1) ){ -// sqlite3PagerUnrefNotNull(pPgOld); -// return SQLITE_CORRUPT_BKPT; -// } -// pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC); -// if( pPager->tempFile ){ -// /* Do not discard pages from an in-memory database since we might -// ** need to rollback later. Just move the page out of the way. */ -// sqlite3PcacheMove(pPgOld, pPager->dbSize+1); -// }else{ -// sqlite3PcacheDrop(pPgOld); -// } -// } - -// origPgno = pPg->pgno; -// sqlite3PcacheMove(pPg, pgno); -// sqlite3PcacheMakeDirty(pPg); - -// /* For an in-memory database, make sure the original page continues -// ** to exist, in case the transaction needs to roll back. Use pPgOld -// ** as the original page since it has already been allocated. -// */ -// if( pPager->tempFile && pPgOld ){ -// sqlite3PcacheMove(pPgOld, origPgno); -// sqlite3PagerUnrefNotNull(pPgOld); -// } - -// if( needSyncPgno ){ -// /* If needSyncPgno is non-zero, then the journal file needs to be -// ** sync()ed before any data is written to database file page needSyncPgno. -// ** Currently, no such page exists in the page-cache and the -// ** "is journaled" bitvec flag has been set. This needs to be remedied by -// ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC -// ** flag. -// ** -// ** If the attempt to load the page into the page-cache fails, (due -// ** to a malloc() or IO failure), clear the bit in the pInJournal[] -// ** array. Otherwise, if the page is loaded and written again in -// ** this transaction, it may be written to the database file before -// ** it is synced into the journal file. This way, it may end up in -// ** the journal file twice, but that is not a problem. -// */ -// PgHdr *pPgHdr; -// rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr, 0); -// if( rc!=SQLITE_OK ){ -// if( needSyncPgno<=pPager->dbOrigSize ){ -// assert( pPager->pTmpSpace!=0 ); -// sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace); -// } -// return rc; -// } -// pPgHdr->flags |= PGHDR_NEED_SYNC; -// sqlite3PcacheMakeDirty(pPgHdr); -// sqlite3PagerUnrefNotNull(pPgHdr); -// } - -// return SQLITE_OK; -// } -// #endif - -// /* -// ** The page handle passed as the first argument refers to a dirty page -// ** with a page number other than iNew. This function changes the page's -// ** page number to iNew and sets the value of the PgHdr.flags field to -// ** the value passed as the third parameter. -// */ -// void sqlite3PagerRekey(DbPage *pPg, Pgno iNew, u16 flags){ -// assert( pPg->pgno!=iNew ); -// pPg->flags = flags; -// sqlite3PcacheMove(pPg, iNew); -// } - -// /* -// ** Return a pointer to the data for the specified page. -// */ -// void *sqlite3PagerGetData(DbPage *pPg){ -// assert( pPg->nRef>0 || pPg->pPager->memDb ); -// return pPg->pData; -// } - -// /* -// ** Return a pointer to the Pager.nExtra bytes of "extra" space -// ** allocated along with the specified page. -// */ -// void *sqlite3PagerGetExtra(DbPage *pPg){ -// return pPg->pExtra; -// } - -// /* -// ** Get/set the locking-mode for this pager. Parameter eMode must be one -// ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or -// ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then -// ** the locking-mode is set to the value specified. -// ** -// ** The returned value is either PAGER_LOCKINGMODE_NORMAL or -// ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) -// ** locking-mode. -// */ -// int sqlite3PagerLockingMode(Pager *pPager, int eMode){ -// assert( eMode==PAGER_LOCKINGMODE_QUERY -// || eMode==PAGER_LOCKINGMODE_NORMAL -// || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); -// assert( PAGER_LOCKINGMODE_QUERY<0 ); -// assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); -// assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) ); -// if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){ -// pPager->exclusiveMode = (u8)eMode; -// } -// return (int)pPager->exclusiveMode; -// } - -// /* -// ** Set the journal-mode for this pager. Parameter eMode must be one of: -// ** -// ** PAGER_JOURNALMODE_DELETE -// ** PAGER_JOURNALMODE_TRUNCATE -// ** PAGER_JOURNALMODE_PERSIST -// ** PAGER_JOURNALMODE_OFF -// ** PAGER_JOURNALMODE_MEMORY -// ** PAGER_JOURNALMODE_WAL -// ** -// ** The journalmode is set to the value specified if the change is allowed. -// ** The change may be disallowed for the following reasons: -// ** -// ** * An in-memory database can only have its journal_mode set to _OFF -// ** or _MEMORY. -// ** -// ** * Temporary databases cannot have _WAL journalmode. -// ** -// ** The returned indicate the current (possibly updated) journal-mode. -// */ -// int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ -// u8 eOld = pPager->journalMode; /* Prior journalmode */ - -// /* The eMode parameter is always valid */ -// assert( eMode==PAGER_JOURNALMODE_DELETE -// || eMode==PAGER_JOURNALMODE_TRUNCATE -// || eMode==PAGER_JOURNALMODE_PERSIST -// || eMode==PAGER_JOURNALMODE_OFF -// || eMode==PAGER_JOURNALMODE_WAL -// || eMode==PAGER_JOURNALMODE_MEMORY ); - -// /* This routine is only called from the OP_JournalMode opcode, and -// ** the logic there will never allow a temporary file to be changed -// ** to WAL mode. -// */ -// assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL ); - -// /* Do allow the journalmode of an in-memory database to be set to -// ** anything other than MEMORY or OFF -// */ -// if( MEMDB ){ -// assert( eOld==PAGER_JOURNALMODE_MEMORY || eOld==PAGER_JOURNALMODE_OFF ); -// if( eMode!=PAGER_JOURNALMODE_MEMORY && eMode!=PAGER_JOURNALMODE_OFF ){ -// eMode = eOld; -// } -// } - -// if( eMode!=eOld ){ - -// /* Change the journal mode. */ -// assert( pPager->eState!=PAGER_ERROR ); -// pPager->journalMode = (u8)eMode; - -// /* When transistioning from TRUNCATE or PERSIST to any other journal -// ** mode except WAL, unless the pager is in locking_mode=exclusive mode, -// ** delete the journal file. -// */ -// assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); -// assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); -// assert( (PAGER_JOURNALMODE_DELETE & 5)==0 ); -// assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 ); -// assert( (PAGER_JOURNALMODE_OFF & 5)==0 ); -// assert( (PAGER_JOURNALMODE_WAL & 5)==5 ); - -// assert( isOpen(pPager->fd) || pPager->exclusiveMode ); -// if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){ - -// /* In this case we would like to delete the journal file. If it is -// ** not possible, then that is not a problem. Deleting the journal file -// ** here is an optimization only. -// ** -// ** Before deleting the journal file, obtain a RESERVED lock on the -// ** database file. This ensures that the journal file is not deleted -// ** while it is in use by some other client. -// */ -// sqlite3OsClose(pPager->jfd); -// if( pPager->eLock>=RESERVED_LOCK ){ -// sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); -// }else{ -// int rc = SQLITE_OK; -// int state = pPager->eState; -// assert( state==PAGER_OPEN || state==PAGER_READER ); -// if( state==PAGER_OPEN ){ -// rc = sqlite3PagerSharedLock(pPager); -// } -// if( pPager->eState==PAGER_READER ){ -// assert( rc==SQLITE_OK ); -// rc = pagerLockDb(pPager, RESERVED_LOCK); -// } -// if( rc==SQLITE_OK ){ -// sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); -// } -// if( rc==SQLITE_OK && state==PAGER_READER ){ -// pagerUnlockDb(pPager, SHARED_LOCK); -// }else if( state==PAGER_OPEN ){ -// pager_unlock(pPager); -// } -// assert( state==pPager->eState ); -// } -// }else if( eMode==PAGER_JOURNALMODE_OFF ){ -// sqlite3OsClose(pPager->jfd); -// } -// } - -// /* Return the new journal mode */ -// return (int)pPager->journalMode; -// } - -// /* -// ** Return the current journal mode. -// */ -// int sqlite3PagerGetJournalMode(Pager *pPager){ -// return (int)pPager->journalMode; -// } - -// /* -// ** Return TRUE if the pager is in a state where it is OK to change the -// ** journalmode. Journalmode changes can only happen when the database -// ** is unmodified. -// */ -// int sqlite3PagerOkToChangeJournalMode(Pager *pPager){ -// assert( assert_pager_state(pPager) ); -// if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0; -// if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0; -// return 1; -// } - -// /* -// ** Get/set the size-limit used for persistent journal files. -// ** -// ** Setting the size limit to -1 means no limit is enforced. -// ** An attempt to set a limit smaller than -1 is a no-op. -// */ -// i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){ -// if( iLimit>=-1 ){ -// pPager->journalSizeLimit = iLimit; -// sqlite3WalLimit(pPager->pWal, iLimit); -// } -// return pPager->journalSizeLimit; -// } - -// /* -// ** Return a pointer to the pPager->pBackup variable. The backup module -// ** in backup.c maintains the content of this variable. This module -// ** uses it opaquely as an argument to sqlite3BackupRestart() and -// ** sqlite3BackupUpdate() only. -// */ -// sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ -// return &pPager->pBackup; -// } - -// #ifndef SQLITE_OMIT_VACUUM -// /* -// ** Unless this is an in-memory or temporary database, clear the pager cache. -// */ -// void sqlite3PagerClearCache(Pager *pPager){ -// assert( MEMDB==0 || pPager->tempFile ); -// if( pPager->tempFile==0 ) pager_reset(pPager); -// } -// #endif - -// #ifndef SQLITE_OMIT_WAL -// /* -// ** This function is called when the user invokes "PRAGMA wal_checkpoint", -// ** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint() -// ** or wal_blocking_checkpoint() API functions. -// ** -// ** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. -// */ -// int sqlite3PagerCheckpoint( -// Pager *pPager, /* Checkpoint on this pager */ -// sqlite3 *db, /* Db handle used to check for interrupts */ -// int eMode, /* Type of checkpoint */ -// int *pnLog, /* OUT: Final number of frames in log */ -// int *pnCkpt /* OUT: Final number of checkpointed frames */ -// ){ -// int rc = SQLITE_OK; -// if( pPager->pWal ){ -// rc = sqlite3WalCheckpoint(pPager->pWal, db, eMode, -// (eMode==SQLITE_CHECKPOINT_PASSIVE ? 0 : pPager->xBusyHandler), -// pPager->pBusyHandlerArg, -// pPager->walSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace, -// pnLog, pnCkpt -// ); -// } -// return rc; -// } - -// int sqlite3PagerWalCallback(Pager *pPager){ -// return sqlite3WalCallback(pPager->pWal); -// } - -// /* -// ** Return true if the underlying VFS for the given pager supports the -// ** primitives necessary for write-ahead logging. -// */ -// int sqlite3PagerWalSupported(Pager *pPager){ -// const sqlite3_io_methods *pMethods = pPager->fd->pMethods; -// if( pPager->noLock ) return 0; -// return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap); -// } - -// /* -// ** Attempt to take an exclusive lock on the database file. If a PENDING lock -// ** is obtained instead, immediately release it. -// */ -// static int pagerExclusiveLock(Pager *pPager){ -// int rc; /* Return code */ - -// assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); -// rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); -// if( rc!=SQLITE_OK ){ -// /* If the attempt to grab the exclusive lock failed, release the -// ** pending lock that may have been obtained instead. */ -// pagerUnlockDb(pPager, SHARED_LOCK); -// } - -// return rc; -// } - -// /* -// ** Call sqlite3WalOpen() to open the WAL handle. If the pager is in -// ** exclusive-locking mode when this function is called, take an EXCLUSIVE -// ** lock on the database file and use heap-memory to store the wal-index -// ** in. Otherwise, use the normal shared-memory. -// */ -// static int pagerOpenWal(Pager *pPager){ -// int rc = SQLITE_OK; - -// assert( pPager->pWal==0 && pPager->tempFile==0 ); -// assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); - -// /* If the pager is already in exclusive-mode, the WAL module will use -// ** heap-memory for the wal-index instead of the VFS shared-memory -// ** implementation. Take the exclusive lock now, before opening the WAL -// ** file, to make sure this is safe. -// */ -// if( pPager->exclusiveMode ){ -// rc = pagerExclusiveLock(pPager); -// } - -// /* Open the connection to the log file. If this operation fails, -// ** (e.g. due to malloc() failure), return an error code. -// */ -// if( rc==SQLITE_OK ){ -// rc = sqlite3WalOpen(pPager->pVfs, -// pPager->fd, pPager->zWal, pPager->exclusiveMode, -// pPager->journalSizeLimit, &pPager->pWal -// ); -// } -// pagerFixMaplimit(pPager); - -// return rc; -// } - -// /* -// ** The caller must be holding a SHARED lock on the database file to call -// ** this function. -// ** -// ** If the pager passed as the first argument is open on a real database -// ** file (not a temp file or an in-memory database), and the WAL file -// ** is not already open, make an attempt to open it now. If successful, -// ** return SQLITE_OK. If an error occurs or the VFS used by the pager does -// ** not support the xShmXXX() methods, return an error code. *pbOpen is -// ** not modified in either case. -// ** -// ** If the pager is open on a temp-file (or in-memory database), or if -// ** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK -// ** without doing anything. -// */ -// int sqlite3PagerOpenWal( -// Pager *pPager, /* Pager object */ -// int *pbOpen /* OUT: Set to true if call is a no-op */ -// ){ -// int rc = SQLITE_OK; /* Return code */ - -// assert( assert_pager_state(pPager) ); -// assert( pPager->eState==PAGER_OPEN || pbOpen ); -// assert( pPager->eState==PAGER_READER || !pbOpen ); -// assert( pbOpen==0 || *pbOpen==0 ); -// assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) ); - -// if( !pPager->tempFile && !pPager->pWal ){ -// if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN; - -// /* Close any rollback journal previously open */ -// sqlite3OsClose(pPager->jfd); - -// rc = pagerOpenWal(pPager); -// if( rc==SQLITE_OK ){ -// pPager->journalMode = PAGER_JOURNALMODE_WAL; -// pPager->eState = PAGER_OPEN; -// } -// }else{ -// *pbOpen = 1; -// } - -// return rc; -// } - -// /* -// ** This function is called to close the connection to the log file prior -// ** to switching from WAL to rollback mode. -// ** -// ** Before closing the log file, this function attempts to take an -// ** EXCLUSIVE lock on the database file. If this cannot be obtained, an -// ** error (SQLITE_BUSY) is returned and the log connection is not closed. -// ** If successful, the EXCLUSIVE lock is not released before returning. -// */ -// int sqlite3PagerCloseWal(Pager *pPager, sqlite3 *db){ -// int rc = SQLITE_OK; - -// assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); - -// /* If the log file is not already open, but does exist in the file-system, -// ** it may need to be checkpointed before the connection can switch to -// ** rollback mode. Open it now so this can happen. -// */ -// if( !pPager->pWal ){ -// int logexists = 0; -// rc = pagerLockDb(pPager, SHARED_LOCK); -// if( rc==SQLITE_OK ){ -// rc = sqlite3OsAccess( -// pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists -// ); -// } -// if( rc==SQLITE_OK && logexists ){ -// rc = pagerOpenWal(pPager); -// } -// } - -// /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on -// ** the database file, the log and log-summary files will be deleted. -// */ -// if( rc==SQLITE_OK && pPager->pWal ){ -// rc = pagerExclusiveLock(pPager); -// if( rc==SQLITE_OK ){ -// rc = sqlite3WalClose(pPager->pWal, db, pPager->walSyncFlags, -// pPager->pageSize, (u8*)pPager->pTmpSpace); -// pPager->pWal = 0; -// pagerFixMaplimit(pPager); -// if( rc && !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK); -// } -// } -// return rc; -// } - -// #ifdef SQLITE_ENABLE_SETLK_TIMEOUT -// /* -// ** If pager pPager is a wal-mode database not in exclusive locking mode, -// ** invoke the sqlite3WalWriteLock() function on the associated Wal object -// ** with the same db and bLock parameters as were passed to this function. -// ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. -// */ -// int sqlite3PagerWalWriteLock(Pager *pPager, int bLock){ -// int rc = SQLITE_OK; -// if( pagerUseWal(pPager) && pPager->exclusiveMode==0 ){ -// rc = sqlite3WalWriteLock(pPager->pWal, bLock); -// } -// return rc; -// } - -// /* -// ** Set the database handle used by the wal layer to determine if -// ** blocking locks are required. -// */ -// void sqlite3PagerWalDb(Pager *pPager, sqlite3 *db){ -// if( pagerUseWal(pPager) ){ -// sqlite3WalDb(pPager->pWal, db); -// } -// } -// #endif - -// #ifdef SQLITE_ENABLE_SNAPSHOT -// /* -// ** If this is a WAL database, obtain a snapshot handle for the snapshot -// ** currently open. Otherwise, return an error. -// */ -// int sqlite3PagerSnapshotGet(Pager *pPager, sqlite3_snapshot **ppSnapshot){ -// int rc = SQLITE_ERROR; -// if( pPager->pWal ){ -// rc = sqlite3WalSnapshotGet(pPager->pWal, ppSnapshot); -// } -// return rc; -// } - -// /* -// ** If this is a WAL database, store a pointer to pSnapshot. Next time a -// ** read transaction is opened, attempt to read from the snapshot it -// ** identifies. If this is not a WAL database, return an error. -// */ -// int sqlite3PagerSnapshotOpen( -// Pager *pPager, -// sqlite3_snapshot *pSnapshot -// ){ -// int rc = SQLITE_OK; -// if( pPager->pWal ){ -// sqlite3WalSnapshotOpen(pPager->pWal, pSnapshot); -// }else{ -// rc = SQLITE_ERROR; -// } -// return rc; -// } - -// /* -// ** If this is a WAL database, call sqlite3WalSnapshotRecover(). If this -// ** is not a WAL database, return an error. -// */ -// int sqlite3PagerSnapshotRecover(Pager *pPager){ -// int rc; -// if( pPager->pWal ){ -// rc = sqlite3WalSnapshotRecover(pPager->pWal); -// }else{ -// rc = SQLITE_ERROR; -// } -// return rc; -// } - -// /* -// ** The caller currently has a read transaction open on the database. -// ** If this is not a WAL database, SQLITE_ERROR is returned. Otherwise, -// ** this function takes a SHARED lock on the CHECKPOINTER slot and then -// ** checks if the snapshot passed as the second argument is still -// ** available. If so, SQLITE_OK is returned. -// ** -// ** If the snapshot is not available, SQLITE_ERROR is returned. Or, if -// ** the CHECKPOINTER lock cannot be obtained, SQLITE_BUSY. If any error -// ** occurs (any value other than SQLITE_OK is returned), the CHECKPOINTER -// ** lock is released before returning. -// */ -// int sqlite3PagerSnapshotCheck(Pager *pPager, sqlite3_snapshot *pSnapshot){ -// int rc; -// if( pPager->pWal ){ -// rc = sqlite3WalSnapshotCheck(pPager->pWal, pSnapshot); -// }else{ -// rc = SQLITE_ERROR; -// } -// return rc; -// } - -// /* -// ** Release a lock obtained by an earlier successful call to -// ** sqlite3PagerSnapshotCheck(). -// */ -// void sqlite3PagerSnapshotUnlock(Pager *pPager){ -// assert( pPager->pWal ); -// sqlite3WalSnapshotUnlock(pPager->pWal); -// } - -// #endif /* SQLITE_ENABLE_SNAPSHOT */ -// #endif /* !SQLITE_OMIT_WAL */ diff --git a/source/libs/tdb/src/sqlite/pcache.c b/source/libs/tdb/src/sqlite/pcache.c deleted file mode 100644 index 1a23064401..0000000000 --- a/source/libs/tdb/src/sqlite/pcache.c +++ /dev/null @@ -1,851 +0,0 @@ -/* -** 2008 August 05 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file implements that page cache. -*/ -#include "sqliteInt.h" - -/* -** A complete page cache is an instance of this structure. Every -** entry in the cache holds a single page of the database file. The -** btree layer only operates on the cached copy of the database pages. -** -** A page cache entry is "clean" if it exactly matches what is currently -** on disk. A page is "dirty" if it has been modified and needs to be -** persisted to disk. -** -** pDirty, pDirtyTail, pSynced: -** All dirty pages are linked into the doubly linked list using -** PgHdr.pDirtyNext and pDirtyPrev. The list is maintained in LRU order -** such that p was added to the list more recently than p->pDirtyNext. -** PCache.pDirty points to the first (newest) element in the list and -** pDirtyTail to the last (oldest). -** -** The PCache.pSynced variable is used to optimize searching for a dirty -** page to eject from the cache mid-transaction. It is better to eject -** a page that does not require a journal sync than one that does. -** Therefore, pSynced is maintained so that it *almost* always points -** to either the oldest page in the pDirty/pDirtyTail list that has a -** clear PGHDR_NEED_SYNC flag or to a page that is older than this one -** (so that the right page to eject can be found by following pDirtyPrev -** pointers). -*/ -struct PCache { - PgHdr *pDirty, *pDirtyTail; /* List of dirty pages in LRU order */ - PgHdr *pSynced; /* Last synced page in dirty page list */ - int nRefSum; /* Sum of ref counts over all pages */ - int szCache; /* Configured cache size */ - int szSpill; /* Size before spilling occurs */ - int szPage; /* Size of every page in this cache */ - int szExtra; /* Size of extra space for each page */ - u8 bPurgeable; /* True if pages are on backing store */ - u8 eCreate; /* eCreate value for for xFetch() */ - int (*xStress)(void *, PgHdr *); /* Call to try make a page clean */ - void * pStress; /* Argument to xStress */ - sqlite3_pcache *pCache; /* Pluggable cache module */ -}; - -/********************************** Test and Debug Logic **********************/ -/* -** Debug tracing macros. Enable by by changing the "0" to "1" and -** recompiling. -** -** When sqlite3PcacheTrace is 1, single line trace messages are issued. -** When sqlite3PcacheTrace is 2, a dump of the pcache showing all cache entries -** is displayed for many operations, resulting in a lot of output. -*/ -#if defined(SQLITE_DEBUG) && 0 -int sqlite3PcacheTrace = 2; /* 0: off 1: simple 2: cache dumps */ -int sqlite3PcacheMxDump = 9999; /* Max cache entries for pcacheDump() */ -#define pcacheTrace(X) \ - if (sqlite3PcacheTrace) { \ - sqlite3DebugPrintf X; \ - } -void pcacheDump(PCache *pCache) { - int N; - int i, j; - sqlite3_pcache_page *pLower; - PgHdr * pPg; - unsigned char * a; - - if (sqlite3PcacheTrace < 2) return; - if (pCache->pCache == 0) return; - N = sqlite3PcachePagecount(pCache); - if (N > sqlite3PcacheMxDump) N = sqlite3PcacheMxDump; - for (i = 1; i <= N; i++) { - pLower = pcache2.xFetch(pCache->pCache, i, 0); - if (pLower == 0) continue; - pPg = (PgHdr *)pLower->pExtra; - printf("%3d: nRef %2d flgs %02x data ", i, pPg->nRef, pPg->flags); - a = (unsigned char *)pLower->pBuf; - for (j = 0; j < 12; j++) printf("%02x", a[j]); - printf("\n"); - if (pPg->pPage == 0) { - pcache2.xUnpin(pCache->pCache, pLower, 0); - } - } -} -#else -#define pcacheTrace(X) -#define pcacheDump(X) -#endif - -// /* -// ** Check invariants on a PgHdr entry. Return true if everything is OK. -// ** Return false if any invariant is violated. -// ** -// ** This routine is for use inside of assert() statements only. For -// ** example: -// ** -// ** assert( sqlite3PcachePageSanity(pPg) ); -// */ -// #ifdef SQLITE_DEBUG -// int sqlite3PcachePageSanity(PgHdr *pPg) { -// PCache *pCache; -// assert(pPg != 0); -// assert(pPg->pgno > 0 || pPg->pPager == 0); /* Page number is 1 or more */ -// pCache = pPg->pCache; -// assert(pCache != 0); /* Every page has an associated PCache */ -// if (pPg->flags & PGHDR_CLEAN) { -// assert((pPg->flags & PGHDR_DIRTY) == 0); /* Cannot be both CLEAN and DIRTY */ -// assert(pCache->pDirty != pPg); /* CLEAN pages not on dirty list */ -// assert(pCache->pDirtyTail != pPg); -// } -// /* WRITEABLE pages must also be DIRTY */ -// if (pPg->flags & PGHDR_WRITEABLE) { -// assert(pPg->flags & PGHDR_DIRTY); /* WRITEABLE implies DIRTY */ -// } -// /* NEED_SYNC can be set independently of WRITEABLE. This can happen, -// ** for example, when using the sqlite3PagerDontWrite() optimization: -// ** (1) Page X is journalled, and gets WRITEABLE and NEED_SEEK. -// ** (2) Page X moved to freelist, WRITEABLE is cleared -// ** (3) Page X reused, WRITEABLE is set again -// ** If NEED_SYNC had been cleared in step 2, then it would not be reset -// ** in step 3, and page might be written into the database without first -// ** syncing the rollback journal, which might cause corruption on a power -// ** loss. -// ** -// ** Another example is when the database page size is smaller than the -// ** disk sector size. When any page of a sector is journalled, all pages -// ** in that sector are marked NEED_SYNC even if they are still CLEAN, just -// ** in case they are later modified, since all pages in the same sector -// ** must be journalled and synced before any of those pages can be safely -// ** written. -// */ -// return 1; -// } -// #endif /* SQLITE_DEBUG */ - -/********************************** Linked List Management ********************/ - -/* Allowed values for second argument to pcacheManageDirtyList() */ -#define PCACHE_DIRTYLIST_REMOVE 1 /* Remove pPage from dirty list */ -#define PCACHE_DIRTYLIST_ADD 2 /* Add pPage to the dirty list */ -#define PCACHE_DIRTYLIST_FRONT 3 /* Move pPage to the front of the list */ - -/* -** Manage pPage's participation on the dirty list. Bits of the addRemove -** argument determines what operation to do. The 0x01 bit means first -** remove pPage from the dirty list. The 0x02 means add pPage back to -** the dirty list. Doing both moves pPage to the front of the dirty list. -*/ -static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove) { - PCache *p = pPage->pCache; - - pcacheTrace(("%p.DIRTYLIST.%s %d\n", p, addRemove == 1 ? "REMOVE" : addRemove == 2 ? "ADD" : "FRONT", pPage->pgno)); - if (addRemove & PCACHE_DIRTYLIST_REMOVE) { - assert(pPage->pDirtyNext || pPage == p->pDirtyTail); - assert(pPage->pDirtyPrev || pPage == p->pDirty); - - /* Update the PCache1.pSynced variable if necessary. */ - if (p->pSynced == pPage) { - p->pSynced = pPage->pDirtyPrev; - } - - if (pPage->pDirtyNext) { - pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev; - } else { - assert(pPage == p->pDirtyTail); - p->pDirtyTail = pPage->pDirtyPrev; - } - if (pPage->pDirtyPrev) { - pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext; - } else { - /* If there are now no dirty pages in the cache, set eCreate to 2. - ** This is an optimization that allows sqlite3PcacheFetch() to skip - ** searching for a dirty page to eject from the cache when it might - ** otherwise have to. */ - assert(pPage == p->pDirty); - p->pDirty = pPage->pDirtyNext; - assert(p->bPurgeable || p->eCreate == 2); - if (p->pDirty == 0) { /*OPTIMIZATION-IF-TRUE*/ - assert(p->bPurgeable == 0 || p->eCreate == 1); - p->eCreate = 2; - } - } - } - if (addRemove & PCACHE_DIRTYLIST_ADD) { - pPage->pDirtyPrev = 0; - pPage->pDirtyNext = p->pDirty; - if (pPage->pDirtyNext) { - assert(pPage->pDirtyNext->pDirtyPrev == 0); - pPage->pDirtyNext->pDirtyPrev = pPage; - } else { - p->pDirtyTail = pPage; - if (p->bPurgeable) { - assert(p->eCreate == 2); - p->eCreate = 1; - } - } - p->pDirty = pPage; - - /* If pSynced is NULL and this page has a clear NEED_SYNC flag, set - ** pSynced to point to it. Checking the NEED_SYNC flag is an - ** optimization, as if pSynced points to a page with the NEED_SYNC - ** flag set sqlite3PcacheFetchStress() searches through all newer - ** entries of the dirty-list for a page with NEED_SYNC clear anyway. */ - if (!p->pSynced && 0 == (pPage->flags & PGHDR_NEED_SYNC) /*OPTIMIZATION-IF-FALSE*/ - ) { - p->pSynced = pPage; - } - } - pcacheDump(p); -} - -/* -** Wrapper around the pluggable caches xUnpin method. If the cache is -** being used for an in-memory database, this function is a no-op. -*/ -static void pcacheUnpin(PgHdr *p) { - if (p->pCache->bPurgeable) { - pcacheTrace(("%p.UNPIN %d\n", p->pCache, p->pgno)); - pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); - pcacheDump(p->pCache); - } -} - -/* -** Compute the number of pages of cache requested. p->szCache is the -** cache size requested by the "PRAGMA cache_size" statement. -*/ -static int numberOfCachePages(PCache *p) { - if (p->szCache >= 0) { - /* IMPLEMENTATION-OF: R-42059-47211 If the argument N is positive then the - ** suggested cache size is set to N. */ - return p->szCache; - } else { - i64 n; - /* IMPLEMANTATION-OF: R-59858-46238 If the argument N is negative, then the - ** number of cache pages is adjusted to be a number of pages that would - ** use approximately abs(N*1024) bytes of memory based on the current - ** page size. */ - n = ((-1024 * (i64)p->szCache) / (p->szPage + p->szExtra)); - if (n > 1000000000) n = 1000000000; - return (int)n; - } -} - -/*************************************************** General Interfaces ****** -** -** Initialize and shutdown the page cache subsystem. Neither of these -** functions are threadsafe. -*/ -int sqlite3PcacheInitialize(void) { return pcache2.xInit(pcache2.pArg); } -void sqlite3PcacheShutdown(void) { - if (pcache2.xShutdown) { - /* IMPLEMENTATION-OF: R-26000-56589 The xShutdown() method may be NULL. */ - pcache2.xShutdown(pcache2.pArg); - } -} - -/* -** Return the size in bytes of a PCache object. -*/ -int sqlite3PcacheSize(void) { return sizeof(PCache); } - -/* -** Create a new PCache object. Storage space to hold the object -** has already been allocated and is passed in as the p pointer. -** The caller discovers how much space needs to be allocated by -** calling sqlite3PcacheSize(). -** -** szExtra is some extra space allocated for each page. The first -** 8 bytes of the extra space will be zeroed as the page is allocated, -** but remaining content will be uninitialized. Though it is opaque -** to this module, the extra space really ends up being the MemPage -** structure in the pager. -*/ -int sqlite3PcacheOpen(int szPage, /* Size of every page */ - int szExtra, /* Extra space associated with each page */ - int bPurgeable, /* True if pages are on backing store */ - int (*xStress)(void *, PgHdr *), /* Call to try to make pages clean */ - void * pStress, /* Argument to xStress */ - PCache *p /* Preallocated space for the PCache */ -) { - memset(p, 0, sizeof(PCache)); - p->szPage = 1; - p->szExtra = szExtra; - assert(szExtra >= 8); /* First 8 bytes will be zeroed */ - p->bPurgeable = bPurgeable; - p->eCreate = 2; - p->xStress = xStress; - p->pStress = pStress; - p->szCache = 100; - p->szSpill = 1; - pcacheTrace(("%p.OPEN szPage %d bPurgeable %d\n", p, szPage, bPurgeable)); - return sqlite3PcacheSetPageSize(p, szPage); -} - -/* -** Change the page size for PCache object. The caller must ensure that there -** are no outstanding page references when this function is called. -*/ -int sqlite3PcacheSetPageSize(PCache *pCache, int szPage) { - assert(pCache->nRefSum == 0 && pCache->pDirty == 0); - if (pCache->szPage) { - sqlite3_pcache *pNew; - pNew = pcache2.xCreate(szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)), pCache->bPurgeable); - if (pNew == 0) return SQLITE_NOMEM; - pcache2.xCachesize(pNew, numberOfCachePages(pCache)); - if (pCache->pCache) { - pcache2.xDestroy(pCache->pCache); - } - pCache->pCache = pNew; - pCache->szPage = szPage; - pcacheTrace(("%p.PAGESIZE %d\n", pCache, szPage)); - } - return 0; -} - -/* -** Try to obtain a page from the cache. -** -** This routine returns a pointer to an sqlite3_pcache_page object if -** such an object is already in cache, or if a new one is created. -** This routine returns a NULL pointer if the object was not in cache -** and could not be created. -** -** The createFlags should be 0 to check for existing pages and should -** be 3 (not 1, but 3) to try to create a new page. -** -** If the createFlag is 0, then NULL is always returned if the page -** is not already in the cache. If createFlag is 1, then a new page -** is created only if that can be done without spilling dirty pages -** and without exceeding the cache size limit. -** -** The caller needs to invoke sqlite3PcacheFetchFinish() to properly -** initialize the sqlite3_pcache_page object and convert it into a -** PgHdr object. The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish() -** routines are split this way for performance reasons. When separated -** they can both (usually) operate without having to push values to -** the stack on entry and pop them back off on exit, which saves a -** lot of pushing and popping. -*/ -sqlite3_pcache_page *sqlite3PcacheFetch(PCache *pCache, /* Obtain the page from this cache */ - Pgno pgno, /* Page number to obtain */ - int createFlag /* If true, create page if it does not exist already */ -) { - int eCreate; - sqlite3_pcache_page *pRes; - - assert(pCache != 0); - assert(pCache->pCache != 0); - assert(createFlag == 3 || createFlag == 0); - assert(pCache->eCreate == ((pCache->bPurgeable && pCache->pDirty) ? 1 : 2)); - - /* eCreate defines what to do if the page does not exist. - ** 0 Do not allocate a new page. (createFlag==0) - ** 1 Allocate a new page if doing so is inexpensive. - ** (createFlag==1 AND bPurgeable AND pDirty) - ** 2 Allocate a new page even it doing so is difficult. - ** (createFlag==1 AND !(bPurgeable AND pDirty) - */ - eCreate = createFlag & pCache->eCreate; - assert(eCreate == 0 || eCreate == 1 || eCreate == 2); - assert(createFlag == 0 || pCache->eCreate == eCreate); - assert(createFlag == 0 || eCreate == 1 + (!pCache->bPurgeable || !pCache->pDirty)); - pRes = pcache2.xFetch(pCache->pCache, pgno, eCreate); - pcacheTrace(("%p.FETCH %d%s (result: %p)\n", pCache, pgno, createFlag ? " create" : "", pRes)); - return pRes; -} - -/* -** If the sqlite3PcacheFetch() routine is unable to allocate a new -** page because no clean pages are available for reuse and the cache -** size limit has been reached, then this routine can be invoked to -** try harder to allocate a page. This routine might invoke the stress -** callback to spill dirty pages to the journal. It will then try to -** allocate the new page and will only fail to allocate a new page on -** an OOM error. -** -** This routine should be invoked only after sqlite3PcacheFetch() fails. -*/ -int sqlite3PcacheFetchStress(PCache * pCache, /* Obtain the page from this cache */ - Pgno pgno, /* Page number to obtain */ - sqlite3_pcache_page **ppPage /* Write result here */ -) { - PgHdr *pPg; - if (pCache->eCreate == 2) return 0; - - if (sqlite3PcachePagecount(pCache) > pCache->szSpill) { - /* Find a dirty page to write-out and recycle. First try to find a - ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC - ** cleared), but if that is not possible settle for any other - ** unreferenced dirty page. - ** - ** If the LRU page in the dirty list that has a clear PGHDR_NEED_SYNC - ** flag is currently referenced, then the following may leave pSynced - ** set incorrectly (pointing to other than the LRU page with NEED_SYNC - ** cleared). This is Ok, as pSynced is just an optimization. */ - for (pPg = pCache->pSynced; pPg && (pPg->nRef || (pPg->flags & PGHDR_NEED_SYNC)); pPg = pPg->pDirtyPrev) - ; - pCache->pSynced = pPg; - if (!pPg) { - for (pPg = pCache->pDirtyTail; pPg && pPg->nRef; pPg = pPg->pDirtyPrev) - ; - } - if (pPg) { - int rc; -#ifdef SQLITE_LOG_CACHE_SPILL - sqlite3_log(SQLITE_FULL, "spill page %d making room for %d - cache used: %d/%d", pPg->pgno, pgno, - pcache2.xPagecount(pCache->pCache), numberOfCachePages(pCache)); -#endif - pcacheTrace(("%p.SPILL %d\n", pCache, pPg->pgno)); - rc = pCache->xStress(pCache->pStress, pPg); - pcacheDump(pCache); - if (rc != 0 && rc != SQLITE_BUSY) { - return rc; - } - } - } - *ppPage = pcache2.xFetch(pCache->pCache, pgno, 2); - return *ppPage == 0 ? SQLITE_NOMEM : 0; -} - -/* -** This is a helper routine for sqlite3PcacheFetchFinish() -** -** In the uncommon case where the page being fetched has not been -** initialized, this routine is invoked to do the initialization. -** This routine is broken out into a separate function since it -** requires extra stack manipulation that can be avoided in the common -** case. -*/ -static PgHdr *pcacheFetchFinishWithInit(PCache * pCache, /* Obtain the page from this cache */ - Pgno pgno, /* Page number obtained */ - sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ -) { - PgHdr *pPgHdr; - assert(pPage != 0); - pPgHdr = (PgHdr *)pPage->pExtra; - assert(pPgHdr->pPage == 0); - memset(&pPgHdr->pDirty, 0, sizeof(PgHdr) - offsetof(PgHdr, pDirty)); - pPgHdr->pPage = pPage; - pPgHdr->pData = pPage->pBuf; - pPgHdr->pExtra = (void *)&pPgHdr[1]; - memset(pPgHdr->pExtra, 0, 8); - pPgHdr->pCache = pCache; - pPgHdr->pgno = pgno; - pPgHdr->flags = PGHDR_CLEAN; - return sqlite3PcacheFetchFinish(pCache, pgno, pPage); -} - -/* -** This routine converts the sqlite3_pcache_page object returned by -** sqlite3PcacheFetch() into an initialized PgHdr object. This routine -** must be called after sqlite3PcacheFetch() in order to get a usable -** result. -*/ -PgHdr *sqlite3PcacheFetchFinish(PCache * pCache, /* Obtain the page from this cache */ - Pgno pgno, /* Page number obtained */ - sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ -) { - PgHdr *pPgHdr; - - assert(pPage != 0); - pPgHdr = (PgHdr *)pPage->pExtra; - - if (!pPgHdr->pPage) { - return pcacheFetchFinishWithInit(pCache, pgno, pPage); - } - pCache->nRefSum++; - pPgHdr->nRef++; - // assert(sqlite3PcachePageSanity(pPgHdr)); - return pPgHdr; -} - -/* -** Decrement the reference count on a page. If the page is clean and the -** reference count drops to 0, then it is made eligible for recycling. -*/ -void sqlite3PcacheRelease(PgHdr *p) { - assert(p->nRef > 0); - p->pCache->nRefSum--; - if ((--p->nRef) == 0) { - if (p->flags & PGHDR_CLEAN) { - pcacheUnpin(p); - } else { - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); - } - } -} - -/* -** Increase the reference count of a supplied page by 1. -*/ -void sqlite3PcacheRef(PgHdr *p) { - assert(p->nRef > 0); - // assert(sqlite3PcachePageSanity(p)); - p->nRef++; - p->pCache->nRefSum++; -} - -/* -** Drop a page from the cache. There must be exactly one reference to the -** page. This function deletes that reference, so after it returns the -** page pointed to by p is invalid. -*/ -void sqlite3PcacheDrop(PgHdr *p) { - assert(p->nRef == 1); - // assert(sqlite3PcachePageSanity(p)); - if (p->flags & PGHDR_DIRTY) { - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); - } - p->pCache->nRefSum--; - pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); -} - -/* -** Make sure the page is marked as dirty. If it isn't dirty already, -** make it so. -*/ -void sqlite3PcacheMakeDirty(PgHdr *p) { - assert(p->nRef > 0); - // assert(sqlite3PcachePageSanity(p)); - if (p->flags & (PGHDR_CLEAN | PGHDR_DONT_WRITE)) { /*OPTIMIZATION-IF-FALSE*/ - p->flags &= ~PGHDR_DONT_WRITE; - if (p->flags & PGHDR_CLEAN) { - p->flags ^= (PGHDR_DIRTY | PGHDR_CLEAN); - pcacheTrace(("%p.DIRTY %d\n", p->pCache, p->pgno)); - assert((p->flags & (PGHDR_DIRTY | PGHDR_CLEAN)) == PGHDR_DIRTY); - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); - } - // assert(sqlite3PcachePageSanity(p)); - } -} - -/* -** Make sure the page is marked as clean. If it isn't clean already, -** make it so. -*/ -void sqlite3PcacheMakeClean(PgHdr *p) { - // assert(sqlite3PcachePageSanity(p)); - assert((p->flags & PGHDR_DIRTY) != 0); - assert((p->flags & PGHDR_CLEAN) == 0); - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); - p->flags &= ~(PGHDR_DIRTY | PGHDR_NEED_SYNC | PGHDR_WRITEABLE); - p->flags |= PGHDR_CLEAN; - pcacheTrace(("%p.CLEAN %d\n", p->pCache, p->pgno)); - // assert(sqlite3PcachePageSanity(p)); - if (p->nRef == 0) { - pcacheUnpin(p); - } -} - -/* -** Make every page in the cache clean. -*/ -void sqlite3PcacheCleanAll(PCache *pCache) { - PgHdr *p; - pcacheTrace(("%p.CLEAN-ALL\n", pCache)); - while ((p = pCache->pDirty) != 0) { - sqlite3PcacheMakeClean(p); - } -} - -/* -** Clear the PGHDR_NEED_SYNC and PGHDR_WRITEABLE flag from all dirty pages. -*/ -void sqlite3PcacheClearWritable(PCache *pCache) { - PgHdr *p; - pcacheTrace(("%p.CLEAR-WRITEABLE\n", pCache)); - for (p = pCache->pDirty; p; p = p->pDirtyNext) { - p->flags &= ~(PGHDR_NEED_SYNC | PGHDR_WRITEABLE); - } - pCache->pSynced = pCache->pDirtyTail; -} - -/* -** Clear the PGHDR_NEED_SYNC flag from all dirty pages. -*/ -void sqlite3PcacheClearSyncFlags(PCache *pCache) { - PgHdr *p; - for (p = pCache->pDirty; p; p = p->pDirtyNext) { - p->flags &= ~PGHDR_NEED_SYNC; - } - pCache->pSynced = pCache->pDirtyTail; -} - -/* -** Change the page number of page p to newPgno. -*/ -void sqlite3PcacheMove(PgHdr *p, Pgno newPgno) { - PCache *pCache = p->pCache; - assert(p->nRef > 0); - assert(newPgno > 0); - // assert(sqlite3PcachePageSanity(p)); - pcacheTrace(("%p.MOVE %d -> %d\n", pCache, p->pgno, newPgno)); - pcache2.xRekey(pCache->pCache, p->pPage, p->pgno, newPgno); - p->pgno = newPgno; - if ((p->flags & PGHDR_DIRTY) && (p->flags & PGHDR_NEED_SYNC)) { - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); - } -} - -/* -** Drop every cache entry whose page number is greater than "pgno". The -** caller must ensure that there are no outstanding references to any pages -** other than page 1 with a page number greater than pgno. -** -** If there is a reference to page 1 and the pgno parameter passed to this -** function is 0, then the data area associated with page 1 is zeroed, but -** the page object is not dropped. -*/ -void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno) { - if (pCache->pCache) { - PgHdr *p; - PgHdr *pNext; - pcacheTrace(("%p.TRUNCATE %d\n", pCache, pgno)); - for (p = pCache->pDirty; p; p = pNext) { - pNext = p->pDirtyNext; - /* This routine never gets call with a positive pgno except right - ** after sqlite3PcacheCleanAll(). So if there are dirty pages, - ** it must be that pgno==0. - */ - assert(p->pgno > 0); - if (p->pgno > pgno) { - assert(p->flags & PGHDR_DIRTY); - sqlite3PcacheMakeClean(p); - } - } - if (pgno == 0 && pCache->nRefSum) { - sqlite3_pcache_page *pPage1; - pPage1 = pcache2.xFetch(pCache->pCache, 1, 0); - if (pPage1) { /* Page 1 is always available in cache, because - ** pCache->nRefSum>0 */ - memset(pPage1->pBuf, 0, pCache->szPage); - pgno = 1; - } - } - pcache2.xTruncate(pCache->pCache, pgno + 1); - } -} - -/* -** Close a cache. -*/ -void sqlite3PcacheClose(PCache *pCache) { - assert(pCache->pCache != 0); - pcacheTrace(("%p.CLOSE\n", pCache)); - pcache2.xDestroy(pCache->pCache); -} - -/* -** Discard the contents of the cache. -*/ -void sqlite3PcacheClear(PCache *pCache) { sqlite3PcacheTruncate(pCache, 0); } - -/* -** Merge two lists of pages connected by pDirty and in pgno order. -** Do not bother fixing the pDirtyPrev pointers. -*/ -static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB) { - PgHdr result, *pTail; - pTail = &result; - assert(pA != 0 && pB != 0); - for (;;) { - if (pA->pgno < pB->pgno) { - pTail->pDirty = pA; - pTail = pA; - pA = pA->pDirty; - if (pA == 0) { - pTail->pDirty = pB; - break; - } - } else { - pTail->pDirty = pB; - pTail = pB; - pB = pB->pDirty; - if (pB == 0) { - pTail->pDirty = pA; - break; - } - } - } - return result.pDirty; -} - -/* -** Sort the list of pages in accending order by pgno. Pages are -** connected by pDirty pointers. The pDirtyPrev pointers are -** corrupted by this sort. -** -** Since there cannot be more than 2^31 distinct pages in a database, -** there cannot be more than 31 buckets required by the merge sorter. -** One extra bucket is added to catch overflow in case something -** ever changes to make the previous sentence incorrect. -*/ -#define N_SORT_BUCKET 32 -static PgHdr *pcacheSortDirtyList(PgHdr *pIn) { - PgHdr *a[N_SORT_BUCKET], *p; - int i; - memset(a, 0, sizeof(a)); - while (pIn) { - p = pIn; - pIn = p->pDirty; - p->pDirty = 0; - for (i = 0; i < N_SORT_BUCKET - 1; i++) { - if (a[i] == 0) { - a[i] = p; - break; - } else { - p = pcacheMergeDirtyList(a[i], p); - a[i] = 0; - } - } - if (i == N_SORT_BUCKET - 1) { - /* To get here, there need to be 2^(N_SORT_BUCKET) elements in - ** the input list. But that is impossible. - */ - a[i] = pcacheMergeDirtyList(a[i], p); - } - } - p = a[0]; - for (i = 1; i < N_SORT_BUCKET; i++) { - if (a[i] == 0) continue; - p = p ? pcacheMergeDirtyList(p, a[i]) : a[i]; - } - return p; -} - -/* -** Return a list of all dirty pages in the cache, sorted by page number. -*/ -PgHdr *sqlite3PcacheDirtyList(PCache *pCache) { - PgHdr *p; - for (p = pCache->pDirty; p; p = p->pDirtyNext) { - p->pDirty = p->pDirtyNext; - } - return pcacheSortDirtyList(pCache->pDirty); -} - -/* -** Return the total number of references to all pages held by the cache. -** -** This is not the total number of pages referenced, but the sum of the -** reference count for all pages. -*/ -int sqlite3PcacheRefCount(PCache *pCache) { return pCache->nRefSum; } - -/* -** Return the number of references to the page supplied as an argument. -*/ -int sqlite3PcachePageRefcount(PgHdr *p) { return p->nRef; } - -/* -** Return the total number of pages in the cache. -*/ -int sqlite3PcachePagecount(PCache *pCache) { - assert(pCache->pCache != 0); - return pcache2.xPagecount(pCache->pCache); -} - -#ifdef SQLITE_TEST -/* -** Get the suggested cache-size value. -*/ -int sqlite3PcacheGetCachesize(PCache *pCache) { return numberOfCachePages(pCache); } -#endif - -/* -** Set the suggested cache-size value. -*/ -void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage) { - assert(pCache->pCache != 0); - pCache->szCache = mxPage; - pcache2.xCachesize(pCache->pCache, numberOfCachePages(pCache)); -} - -/* -** Set the suggested cache-spill value. Make no changes if if the -** argument is zero. Return the effective cache-spill size, which will -** be the larger of the szSpill and szCache. -*/ -int sqlite3PcacheSetSpillsize(PCache *p, int mxPage) { - int res; - assert(p->pCache != 0); - if (mxPage) { - if (mxPage < 0) { - mxPage = (int)((-1024 * (i64)mxPage) / (p->szPage + p->szExtra)); - } - p->szSpill = mxPage; - } - res = numberOfCachePages(p); - if (res < p->szSpill) res = p->szSpill; - return res; -} - -/* -** Free up as much memory as possible from the page cache. -*/ -void sqlite3PcacheShrink(PCache *pCache) { - assert(pCache->pCache != 0); - pcache2.xShrink(pCache->pCache); -} - -/* -** Return the size of the header added by this middleware layer -** in the page-cache hierarchy. -*/ -int sqlite3HeaderSizePcache(void) { return ROUND8(sizeof(PgHdr)); } - -/* -** Return the number of dirty pages currently in the cache, as a percentage -** of the configured cache size. -*/ -int sqlite3PCachePercentDirty(PCache *pCache) { - PgHdr *pDirty; - int nDirty = 0; - int nCache = numberOfCachePages(pCache); - for (pDirty = pCache->pDirty; pDirty; pDirty = pDirty->pDirtyNext) nDirty++; - return nCache ? (int)(((i64)nDirty * 100) / nCache) : 0; -} - -#ifdef SQLITE_DIRECT_OVERFLOW_READ -/* -** Return true if there are one or more dirty pages in the cache. Else false. -*/ -int sqlite3PCacheIsDirty(PCache *pCache) { return (pCache->pDirty != 0); } -#endif - -#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) -/* -** For all dirty pages currently in the cache, invoke the specified -** callback. This is only used if the SQLITE_CHECK_PAGES macro is -** defined. -*/ -void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)) { - PgHdr *pDirty; - for (pDirty = pCache->pDirty; pDirty; pDirty = pDirty->pDirtyNext) { - xIter(pDirty); - } -} -#endif diff --git a/source/libs/tdb/src/sqlite/pcache1.c b/source/libs/tdb/src/sqlite/pcache1.c deleted file mode 100644 index 4079937bee..0000000000 --- a/source/libs/tdb/src/sqlite/pcache1.c +++ /dev/null @@ -1,1211 +0,0 @@ -/* -** 2008 November 05 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -** This file implements the default page cache implementation (the -** sqlite3_pcache interface). It also contains part of the implementation -** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features. -** If the default page cache implementation is overridden, then neither of -** these two features are available. -** -** A Page cache line looks like this: -** -** ------------------------------------------------------------- -** | database page content | PgHdr1 | MemPage | PgHdr | -** ------------------------------------------------------------- -** -** The database page content is up front (so that buffer overreads tend to -** flow harmlessly into the PgHdr1, MemPage, and PgHdr extensions). MemPage -** is the extension added by the btree.c module containing information such -** as the database page number and how that database page is used. PgHdr -** is added by the pcache.c layer and contains information used to keep track -** of which pages are "dirty". PgHdr1 is an extension added by this -** module (pcache1.c). The PgHdr1 header is a subclass of sqlite3_pcache_page. -** PgHdr1 contains information needed to look up a page by its page number. -** The superclass sqlite3_pcache_page.pBuf points to the start of the -** database page content and sqlite3_pcache_page.pExtra points to PgHdr. -** -** The size of the extension (MemPage+PgHdr+PgHdr1) can be determined at -** runtime using sqlite3_config(SQLITE_CONFIG_PCACHE_HDRSZ, &size). The -** sizes of the extensions sum to 272 bytes on x64 for 3.8.10, but this -** size can vary according to architecture, compile-time options, and -** SQLite library version number. -** -** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained -** using a separate memory allocation from the database page content. This -** seeks to overcome the "clownshoe" problem (also called "internal -** fragmentation" in academic literature) of allocating a few bytes more -** than a power of two with the memory allocator rounding up to the next -** power of two, and leaving the rounded-up space unused. -** -** This module tracks pointers to PgHdr1 objects. Only pcache.c communicates -** with this module. Information is passed back and forth as PgHdr1 pointers. -** -** The pcache.c and pager.c modules deal pointers to PgHdr objects. -** The btree.c module deals with pointers to MemPage objects. -** -** SOURCE OF PAGE CACHE MEMORY: -** -** Memory for a page might come from any of three sources: -** -** (1) The general-purpose memory allocator - sqlite3Malloc() -** (2) Global page-cache memory provided using sqlite3_config() with -** SQLITE_CONFIG_PAGECACHE. -** (3) PCache-local bulk allocation. -** -** The third case is a chunk of heap memory (defaulting to 100 pages worth) -** that is allocated when the page cache is created. The size of the local -** bulk allocation can be adjusted using -** -** sqlite3_config(SQLITE_CONFIG_PAGECACHE, (void*)0, 0, N). -** -** If N is positive, then N pages worth of memory are allocated using a single -** sqlite3Malloc() call and that memory is used for the first N pages allocated. -** Or if N is negative, then -1024*N bytes of memory are allocated and used -** for as many pages as can be accomodated. -** -** Only one of (2) or (3) can be used. Once the memory available to (2) or -** (3) is exhausted, subsequent allocations fail over to the general-purpose -** memory allocator (1). -** -** Earlier versions of SQLite used only methods (1) and (2). But experiments -** show that method (3) with N==100 provides about a 5% performance boost for -** common workloads. -*/ -#include "sqliteInt.h" - -typedef struct PCache1 PCache1; -typedef struct PgHdr1 PgHdr1; -typedef struct PgFreeslot PgFreeslot; -typedef struct PGroup PGroup; - -/* -** Each cache entry is represented by an instance of the following -** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of -** PgHdr1.pCache->szPage bytes is allocated directly before this structure -** in memory. -** -** Note: Variables isBulkLocal and isAnchor were once type "u8". That works, -** but causes a 2-byte gap in the structure for most architectures (since -** pointers must be either 4 or 8-byte aligned). As this structure is located -** in memory directly after the associated page data, if the database is -** corrupt, code at the b-tree layer may overread the page buffer and -** read part of this structure before the corruption is detected. This -** can cause a valgrind error if the unitialized gap is accessed. Using u16 -** ensures there is no such gap, and therefore no bytes of unitialized memory -** in the structure. -*/ -struct PgHdr1 { - sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */ - unsigned int iKey; /* Key value (page number) */ - u16 isBulkLocal; /* This page from bulk local storage */ - u16 isAnchor; /* This is the PGroup.lru element */ - PgHdr1 * pNext; /* Next in hash table chain */ - PCache1 * pCache; /* Cache that currently owns this page */ - PgHdr1 * pLruNext; /* Next in LRU list of unpinned pages */ - PgHdr1 * pLruPrev; /* Previous in LRU list of unpinned pages */ - /* NB: pLruPrev is only valid if pLruNext!=0 */ -}; - -/* -** A page is pinned if it is not on the LRU list. To be "pinned" means -** that the page is in active use and must not be deallocated. -*/ -#define PAGE_IS_PINNED(p) ((p)->pLruNext == 0) -#define PAGE_IS_UNPINNED(p) ((p)->pLruNext != 0) - -/* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set -** of one or more PCaches that are able to recycle each other's unpinned -** pages when they are under memory pressure. A PGroup is an instance of -** the following object. -** -** This page cache implementation works in one of two modes: -** -** (1) Every PCache is the sole member of its own PGroup. There is -** one PGroup per PCache. -** -** (2) There is a single global PGroup that all PCaches are a member -** of. -** -** Mode 1 uses more memory (since PCache instances are not able to rob -** unused pages from other PCaches) but it also operates without a mutex, -** and is therefore often faster. Mode 2 requires a mutex in order to be -** threadsafe, but recycles pages more efficiently. -** -** For mode (1), PGroup.mutex is NULL. For mode (2) there is only a single -** PGroup which is the pcache1.grp global variable and its mutex is -** SQLITE_MUTEX_STATIC_LRU. -*/ -struct PGroup { - pthread_mutex_t mutex; /* MUTEX_STATIC_LRU or NULL */ - unsigned int nMaxPage; /* Sum of nMax for purgeable caches */ - unsigned int nMinPage; /* Sum of nMin for purgeable caches */ - unsigned int mxPinned; /* nMaxpage + 10 - nMinPage */ - unsigned int nPurgeable; /* Number of purgeable pages allocated */ - PgHdr1 lru; /* The beginning and end of the LRU list */ -}; - -/* Each page cache is an instance of the following object. Every -** open database file (including each in-memory database and each -** temporary or transient database) has a single page cache which -** is an instance of this object. -** -** Pointers to structures of this type are cast and returned as -** opaque sqlite3_pcache* handles. -*/ -struct PCache1 { - /* Cache configuration parameters. Page size (szPage) and the purgeable - ** flag (bPurgeable) and the pnPurgeable pointer are all set when the - ** cache is created and are never changed thereafter. nMax may be - ** modified at any time by a call to the pcache1Cachesize() method. - ** The PGroup mutex must be held when accessing nMax. - */ - PGroup * pGroup; /* PGroup this cache belongs to */ - unsigned int *pnPurgeable; /* Pointer to pGroup->nPurgeable */ - int szPage; /* Size of database content section */ - int szExtra; /* sizeof(MemPage)+sizeof(PgHdr) */ - int szAlloc; /* Total size of one pcache line */ - int bPurgeable; /* True if cache is purgeable */ - unsigned int nMin; /* Minimum number of pages reserved */ - unsigned int nMax; /* Configured "cache_size" value */ - unsigned int n90pct; /* nMax*9/10 */ - unsigned int iMaxKey; /* Largest key seen since xTruncate() */ - unsigned int nPurgeableDummy; /* pnPurgeable points here when not used*/ - - /* Hash table of all pages. The following variables may only be accessed - ** when the accessor is holding the PGroup mutex. - */ - unsigned int nRecyclable; /* Number of pages in the LRU list */ - unsigned int nPage; /* Total number of pages in apHash */ - unsigned int nHash; /* Number of slots in apHash[] */ - PgHdr1 ** apHash; /* Hash table for fast lookup by key */ - PgHdr1 * pFree; /* List of unused pcache-local pages */ - void * pBulk; /* Bulk memory used by pcache-local */ -}; - -/* -** Free slots in the allocator used to divide up the global page cache -** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. -*/ -struct PgFreeslot { - PgFreeslot *pNext; /* Next free slot */ -}; - -/* -** Global data used by this cache. -*/ -static struct PCacheGlobal { - PGroup grp; /* The global PGroup for mode (2) */ - - /* Variables related to SQLITE_CONFIG_PAGECACHE settings. The - ** szSlot, nSlot, pStart, pEnd, nReserve, and isInit values are all - ** fixed at sqlite3_initialize() time and do not require mutex protection. - ** The nFreeSlot and pFree values do require mutex protection. - */ - int isInit; /* True if initialized */ - int separateCache; /* Use a new PGroup for each PCache */ - int nInitPage; /* Initial bulk allocation size */ - int szSlot; /* Size of each free slot */ - int nSlot; /* The number of pcache slots */ - int nReserve; /* Try to keep nFreeSlot above this */ - void *pStart, *pEnd; /* Bounds of global page cache memory */ - /* Above requires no mutex. Use mutex below for variable that follow. */ - pthread_mutex_t mutex; /* Mutex for accessing the following: */ - PgFreeslot * pFree; /* Free page blocks */ - int nFreeSlot; /* Number of unused pcache slots */ - /* The following value requires a mutex to change. We skip the mutex on - ** reading because (1) most platforms read a 32-bit integer atomically and - ** (2) even if an incorrect value is read, no great harm is done since this - ** is really just an optimization. */ - int bUnderPressure; /* True if low on PAGECACHE memory */ -} pcache1; - -#define pcache1EnterMutex(X) pthread_mutex_lock(&((X)->mutex)) -#define pcache1LeaveMutex(X) pthread_mutex_unlock(&((X)->mutex)) -#define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 - -/******************************************************************************/ -/******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ - -/* -** This function is called during initialization if a static buffer is -** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE -** verb to sqlite3_config(). Parameter pBuf points to an allocation large -** enough to contain 'n' buffers of 'sz' bytes each. -** -** This routine is called from sqlite3_initialize() and so it is guaranteed -** to be serialized already. There is no need for further mutexing. -*/ -void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n) { - if (pcache1.isInit) { - PgFreeslot *p; - if (pBuf == 0) sz = n = 0; - if (n == 0) sz = 0; - sz = ROUNDDOWN8(sz); - pcache1.szSlot = sz; - pcache1.nSlot = pcache1.nFreeSlot = n; - pcache1.nReserve = n > 90 ? 10 : (n / 10 + 1); - pcache1.pStart = pBuf; - pcache1.pFree = 0; - pcache1.bUnderPressure = 0; - while (n--) { - p = (PgFreeslot *)pBuf; - p->pNext = pcache1.pFree; - pcache1.pFree = p; - pBuf = (void *)&((char *)pBuf)[sz]; - } - pcache1.pEnd = pBuf; - } -} - -/* -** Try to initialize the pCache->pFree and pCache->pBulk fields. Return -** true if pCache->pFree ends up containing one or more free pages. -*/ -static int pcache1InitBulk(PCache1 *pCache) { - i64 szBulk; - char *zBulk; - if (pcache1.nInitPage == 0) return 0; - /* Do not bother with a bulk allocation if the cache size very small */ - if (pCache->nMax < 3) return 0; - // sqlite3BeginBenignMalloc(); - if (pcache1.nInitPage > 0) { - szBulk = pCache->szAlloc * (i64)pcache1.nInitPage; - } else { - szBulk = -1024 * (i64)pcache1.nInitPage; - } - if (szBulk > pCache->szAlloc * (i64)pCache->nMax) { - szBulk = pCache->szAlloc * (i64)pCache->nMax; - } - zBulk = pCache->pBulk = malloc(szBulk); - // sqlite3EndBenignMalloc(); - if (zBulk) { - int nBulk = szBulk / pCache->szAlloc; - do { - PgHdr1 *pX = (PgHdr1 *)(&zBulk[pCache->szPage]); - pX->page.pBuf = zBulk; - pX->page.pExtra = &pX[1]; - pX->isBulkLocal = 1; - pX->isAnchor = 0; - pX->pNext = pCache->pFree; - pX->pLruPrev = 0; /* Initializing this saves a valgrind error */ - pCache->pFree = pX; - zBulk += pCache->szAlloc; - } while (--nBulk); - } - return pCache->pFree != 0; -} - -/* -** Malloc function used within this file to allocate space from the buffer -** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no -** such buffer exists or there is no space left in it, this function falls -** back to sqlite3Malloc(). -** -** Multiple threads can run this routine at the same time. Global variables -** in pcache1 need to be protected via mutex. -*/ -static void *pcache1Alloc(int nByte) { - void *p = 0; - // assert(sqlite3_mutex_notheld(pcache1.grp.mutex)); - if (nByte <= pcache1.szSlot) { - pthread_mutex_lock(&(pcache1.mutex)); - p = (PgHdr1 *)pcache1.pFree; - if (p) { - pcache1.pFree = pcache1.pFree->pNext; - pcache1.nFreeSlot--; - pcache1.bUnderPressure = pcache1.nFreeSlot < pcache1.nReserve; - assert(pcache1.nFreeSlot >= 0); - // sqlite3StatusHighwater(SQLITE_STATUS_PAGECACHE_SIZE, nByte); - // sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_USED, 1); - } - pthread_mutex_unlock(&pcache1.mutex); - } - if (p == 0) { - /* Memory is not available in the SQLITE_CONFIG_PAGECACHE pool. Get - ** it from sqlite3Malloc instead. - */ - p = malloc(nByte); -#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS - if (p) { - int sz = nByte; - pthread_mutex_lock(&pcache1.mutex); - // sqlite3StatusHighwater(SQLITE_STATUS_PAGECACHE_SIZE, nByte); - // sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); - pthread_mutex_unlock(&pcache1.mutex); - } -#endif - // sqlite3MemdebugSetType(p, MEMTYPE_PCACHE); - } - return p; -} - -/* -** Free an allocated buffer obtained from pcache1Alloc(). -*/ -static void pcache1Free(void *p) { - if (p == 0) return; - // if (SQLITE_WITHIN(p, pcache1.pStart, pcache1.pEnd)) { - if (p >= pcache1.pStart && p < pcache1.pEnd) { - PgFreeslot *pSlot; - pthread_mutex_lock(&pcache1.mutex); - // sqlite3StatusDown(SQLITE_STATUS_PAGECACHE_USED, 1); - pSlot = (PgFreeslot *)p; - pSlot->pNext = pcache1.pFree; - pcache1.pFree = pSlot; - pcache1.nFreeSlot++; - pcache1.bUnderPressure = pcache1.nFreeSlot < pcache1.nReserve; - assert(pcache1.nFreeSlot <= pcache1.nSlot); - pthread_mutex_unlock(&pcache1.mutex); - } else { - // assert(sqlite3MemdebugHasType(p, MEMTYPE_PCACHE)); - // sqlite3MemdebugSetType(p, MEMTYPE_HEAP); -#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS - { - int nFreed = 0; - // nFreed = sqlite3MallocSize(p); - pthread_mutex_lock(&pcache1.mutex); - // sqlite3StatusDown(SQLITE_STATUS_PAGECACHE_OVERFLOW, nFreed); - pthread_mutex_unlock(&pcache1.mutex); - } -#endif - free(p); - } -} - -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT -/* -** Return the size of a pcache allocation -*/ -static int pcache1MemSize(void *p) { - if (p >= pcache1.pStart && p < pcache1.pEnd) { - return pcache1.szSlot; - } else { - int iSize; - assert(sqlite3MemdebugHasType(p, MEMTYPE_PCACHE)); - sqlite3MemdebugSetType(p, MEMTYPE_HEAP); - iSize = sqlite3MallocSize(p); - sqlite3MemdebugSetType(p, MEMTYPE_PCACHE); - return iSize; - } -} -#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ - -/* -** Allocate a new page object initially associated with cache pCache. -*/ -static PgHdr1 *pcache1AllocPage(PCache1 *pCache, int benignMalloc) { - PgHdr1 *p = 0; - void * pPg; - - // assert(sqlite3_mutex_held(pCache->pGroup->mutex)); - if (pCache->pFree || (pCache->nPage == 0 && pcache1InitBulk(pCache))) { - assert(pCache->pFree != 0); - p = pCache->pFree; - pCache->pFree = p->pNext; - p->pNext = 0; - } else { -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT - /* The group mutex must be released before pcache1Alloc() is called. This - ** is because it might call sqlite3_release_memory(), which assumes that - ** this mutex is not held. */ - assert(pcache1.separateCache == 0); - assert(pCache->pGroup == &pcache1.grp); - pcache1LeaveMutex(pCache->pGroup); -#endif - if (benignMalloc) { - // sqlite3BeginBenignMalloc(); - } -#ifdef SQLITE_PCACHE_SEPARATE_HEADER - pPg = pcache1Alloc(pCache->szPage); - p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); - if (!pPg || !p) { - pcache1Free(pPg); - sqlite3_free(p); - pPg = 0; - } -#else - pPg = pcache1Alloc(pCache->szAlloc); -#endif - if (benignMalloc) { - // sqlite3EndBenignMalloc(); - } -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT - pcache1EnterMutex(pCache->pGroup); -#endif - if (pPg == 0) return 0; -#ifndef SQLITE_PCACHE_SEPARATE_HEADER - p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; -#endif - p->page.pBuf = pPg; - p->page.pExtra = &p[1]; - p->isBulkLocal = 0; - p->isAnchor = 0; - p->pLruPrev = 0; /* Initializing this saves a valgrind error */ - } - (*pCache->pnPurgeable)++; - return p; -} - -/* -** Free a page object allocated by pcache1AllocPage(). -*/ -static void pcache1FreePage(PgHdr1 *p) { - PCache1 *pCache; - assert(p != 0); - pCache = p->pCache; - // assert(sqlite3_mutex_held(p->pCache->pGroup->mutex)); - if (p->isBulkLocal) { - p->pNext = pCache->pFree; - pCache->pFree = p; - } else { - pcache1Free(p->page.pBuf); -#ifdef SQLITE_PCACHE_SEPARATE_HEADER - sqlite3_free(p); -#endif - } - (*pCache->pnPurgeable)--; -} - -/* -** Malloc function used by SQLite to obtain space from the buffer configured -** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer -** exists, this function falls back to sqlite3Malloc(). -*/ -void *sqlite3PageMalloc(int sz) { - assert(sz <= 65536 + 8); /* These allocations are never very large */ - return pcache1Alloc(sz); -} - -/* -** Free an allocated buffer obtained from sqlite3PageMalloc(). -*/ -void sqlite3PageFree(void *p) { pcache1Free(p); } - -/* -** Return true if it desirable to avoid allocating a new page cache -** entry. -** -** If memory was allocated specifically to the page cache using -** SQLITE_CONFIG_PAGECACHE but that memory has all been used, then -** it is desirable to avoid allocating a new page cache entry because -** presumably SQLITE_CONFIG_PAGECACHE was suppose to be sufficient -** for all page cache needs and we should not need to spill the -** allocation onto the heap. -** -** Or, the heap is used for all page cache memory but the heap is -** under memory pressure, then again it is desirable to avoid -** allocating a new page cache entry in order to avoid stressing -** the heap even further. -*/ -static int pcache1UnderMemoryPressure(PCache1 *pCache) { - // if (pcache1.nSlot && (pCache->szPage + pCache->szExtra) <= pcache1.szSlot) { - return pcache1.bUnderPressure; - // } else { - // return sqlite3HeapNearlyFull(); - // } -} - -/******************************************************************************/ -/******** General Implementation Functions ************************************/ - -/* -** This function is used to resize the hash table used by the cache passed -** as the first argument. -** -** The PCache mutex must be held when this function is called. -*/ -static void pcache1ResizeHash(PCache1 *p) { - PgHdr1 ** apNew; - unsigned int nNew; - unsigned int i; - - // assert(sqlite3_mutex_held(p->pGroup->mutex)); - - nNew = p->nHash * 2; - if (nNew < 256) { - nNew = 256; - } - - pcache1LeaveMutex(p->pGroup); - if (p->nHash) { - // sqlite3BeginBenignMalloc(); - } - apNew = (PgHdr1 **)calloc(nNew, sizeof(PgHdr1 *)); - if (p->nHash) { - // sqlite3EndBenignMalloc(); - } - pcache1EnterMutex(p->pGroup); - if (apNew) { - for (i = 0; i < p->nHash; i++) { - PgHdr1 *pPage; - PgHdr1 *pNext = p->apHash[i]; - while ((pPage = pNext) != 0) { - unsigned int h = pPage->iKey % nNew; - pNext = pPage->pNext; - pPage->pNext = apNew[h]; - apNew[h] = pPage; - } - } - free(p->apHash); - p->apHash = apNew; - p->nHash = nNew; - } -} - -/* -** This function is used internally to remove the page pPage from the -** PGroup LRU list, if is part of it. If pPage is not part of the PGroup -** LRU list, then this function is a no-op. -** -** The PGroup mutex must be held when this function is called. -*/ -static PgHdr1 *pcache1PinPage(PgHdr1 *pPage) { - assert(pPage != 0); - assert(PAGE_IS_UNPINNED(pPage)); - assert(pPage->pLruNext); - assert(pPage->pLruPrev); - // assert(sqlite3_mutex_held(pPage->pCache->pGroup->mutex)); - pPage->pLruPrev->pLruNext = pPage->pLruNext; - pPage->pLruNext->pLruPrev = pPage->pLruPrev; - pPage->pLruNext = 0; - /* pPage->pLruPrev = 0; - ** No need to clear pLruPrev as it is never accessed if pLruNext is 0 */ - assert(pPage->isAnchor == 0); - assert(pPage->pCache->pGroup->lru.isAnchor == 1); - pPage->pCache->nRecyclable--; - return pPage; -} - -/* -** Remove the page supplied as an argument from the hash table -** (PCache1.apHash structure) that it is currently stored in. -** Also free the page if freePage is true. -** -** The PGroup mutex must be held when this function is called. -*/ -static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag) { - unsigned int h; - PCache1 * pCache = pPage->pCache; - PgHdr1 ** pp; - - // assert(sqlite3_mutex_held(pCache->pGroup->mutex)); - h = pPage->iKey % pCache->nHash; - for (pp = &pCache->apHash[h]; (*pp) != pPage; pp = &(*pp)->pNext) - ; - *pp = (*pp)->pNext; - - pCache->nPage--; - if (freeFlag) pcache1FreePage(pPage); -} - -/* -** If there are currently more than nMaxPage pages allocated, try -** to recycle pages to reduce the number allocated to nMaxPage. -*/ -static void pcache1EnforceMaxPage(PCache1 *pCache) { - PGroup *pGroup = pCache->pGroup; - PgHdr1 *p; - // assert(sqlite3_mutex_held(pGroup->mutex)); - while (pGroup->nPurgeable > pGroup->nMaxPage && (p = pGroup->lru.pLruPrev)->isAnchor == 0) { - assert(p->pCache->pGroup == pGroup); - assert(PAGE_IS_UNPINNED(p)); - pcache1PinPage(p); - pcache1RemoveFromHash(p, 1); - } - if (pCache->nPage == 0 && pCache->pBulk) { - free(pCache->pBulk); - pCache->pBulk = pCache->pFree = 0; - } -} - -/* -** Discard all pages from cache pCache with a page number (key value) -** greater than or equal to iLimit. Any pinned pages that meet this -** criteria are unpinned before they are discarded. -** -** The PCache mutex must be held when this function is called. -*/ -static void pcache1TruncateUnsafe(PCache1 * pCache, /* The cache to truncate */ - unsigned int iLimit /* Drop pages with this pgno or larger */ -) { - int nPage = 0; /* To assert pCache->nPage is correct */ - unsigned int h, iStop; - // assert(sqlite3_mutex_held(pCache->pGroup->mutex)); - assert(pCache->iMaxKey >= iLimit); - assert(pCache->nHash > 0); - if (pCache->iMaxKey - iLimit < pCache->nHash) { - /* If we are just shaving the last few pages off the end of the - ** cache, then there is no point in scanning the entire hash table. - ** Only scan those hash slots that might contain pages that need to - ** be removed. */ - h = iLimit % pCache->nHash; - iStop = pCache->iMaxKey % pCache->nHash; - nPage = -10; /* Disable the pCache->nPage validity check */ - } else { - /* This is the general case where many pages are being removed. - ** It is necessary to scan the entire hash table */ - h = pCache->nHash / 2; - iStop = h - 1; - } - for (;;) { - PgHdr1 **pp; - PgHdr1 * pPage; - assert(h < pCache->nHash); - pp = &pCache->apHash[h]; - while ((pPage = *pp) != 0) { - if (pPage->iKey >= iLimit) { - pCache->nPage--; - *pp = pPage->pNext; - if (PAGE_IS_UNPINNED(pPage)) pcache1PinPage(pPage); - pcache1FreePage(pPage); - } else { - pp = &pPage->pNext; - if (nPage >= 0) nPage++; - } - } - if (h == iStop) break; - h = (h + 1) % pCache->nHash; - } - assert(nPage < 0 || pCache->nPage == (unsigned)nPage); -} - -/******************************************************************************/ -/******** sqlite3_pcache Methods **********************************************/ - -/* -** Implementation of the sqlite3_pcache.xInit method. -*/ -static int pcache1Init(void *NotUsed) { - assert(pcache1.isInit == 0); - memset(&pcache1, 0, sizeof(pcache1)); - - // /* - // ** The pcache1.separateCache variable is true if each PCache has its own - // ** private PGroup (mode-1). pcache1.separateCache is false if the single - // ** PGroup in pcache1.grp is used for all page caches (mode-2). - // ** - // ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT - // ** - // ** * Use a unified cache in single-threaded applications that have - // ** configured a start-time buffer for use as page-cache memory using - // ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL - // ** pBuf argument. - // ** - // ** * Otherwise use separate caches (mode-1) - // */ - // #if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) - // pcache1.separateCache = 0; - // #elif SQLITE_THREADSAFE - // pcache1.separateCache = sqlite3GlobalConfig.pPage==0 - // || sqlite3GlobalConfig.bCoreMutex>0; - // #else - // pcache1.separateCache = sqlite3GlobalConfig.pPage==0; - // #endif - pcache1.separateCache = 1; - - pthread_mutex_init(&pcache1.grp.mutex, NULL); - pthread_mutex_init(&pcache1.mutex, NULL); - - // if (pcache1.separateCache && sqlite3GlobalConfig.nPage != 0 && sqlite3GlobalConfig.pPage == 0) { - // pcache1.nInitPage = sqlite3GlobalConfig.nPage; - // } else { - pcache1.nInitPage = 0; - // } - pcache1.grp.mxPinned = 10; - pcache1.isInit = 1; - return 0; -} - -/* -** Implementation of the sqlite3_pcache.xShutdown method. -** Note that the static mutex allocated in xInit does -** not need to be freed. -*/ -static void pcache1Shutdown(void *NotUsed) { - assert(pcache1.isInit != 0); - memset(&pcache1, 0, sizeof(pcache1)); -} - -/* forward declaration */ -static void pcache1Destroy(sqlite3_pcache *p); - -/* -** Implementation of the sqlite3_pcache.xCreate method. -** -** Allocate a new cache. -*/ -static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable) { - PCache1 *pCache; /* The newly created page cache */ - PGroup * pGroup; /* The group the new page cache will belong to */ - int sz; /* Bytes of memory required to allocate the new cache */ - - assert((szPage & (szPage - 1)) == 0 && szPage >= 512 && szPage <= 65536); - assert(szExtra < 300); - - sz = sizeof(PCache1) + sizeof(PGroup) * pcache1.separateCache; - pCache = (PCache1 *)calloc(1, sz); - if (pCache) { - if (pcache1.separateCache) { - pGroup = (PGroup *)&pCache[1]; - pGroup->mxPinned = 10; - } else { - pGroup = &pcache1.grp; - } - pcache1EnterMutex(pGroup); - if (pGroup->lru.isAnchor == 0) { - pGroup->lru.isAnchor = 1; - pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru; - } - pCache->pGroup = pGroup; - pCache->szPage = szPage; - pCache->szExtra = szExtra; - pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); - pCache->bPurgeable = (bPurgeable ? 1 : 0); - pcache1ResizeHash(pCache); - if (bPurgeable) { - pCache->nMin = 10; - pGroup->nMinPage += pCache->nMin; - pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; - pCache->pnPurgeable = &pGroup->nPurgeable; - } else { - pCache->pnPurgeable = &pCache->nPurgeableDummy; - } - pcache1LeaveMutex(pGroup); - if (pCache->nHash == 0) { - pcache1Destroy((sqlite3_pcache *)pCache); - pCache = 0; - } - } - return (sqlite3_pcache *)pCache; -} - -/* -** Implementation of the sqlite3_pcache.xCachesize method. -** -** Configure the cache_size limit for a cache. -*/ -static void pcache1Cachesize(sqlite3_pcache *p, int nMax) { - PCache1 *pCache = (PCache1 *)p; - u32 n; - assert(nMax >= 0); - if (pCache->bPurgeable) { - PGroup *pGroup = pCache->pGroup; - pcache1EnterMutex(pGroup); - n = (u32)nMax; - if (n > 0x7fff0000 - pGroup->nMaxPage + pCache->nMax) { - n = 0x7fff0000 - pGroup->nMaxPage + pCache->nMax; - } - pGroup->nMaxPage += (n - pCache->nMax); - pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; - pCache->nMax = n; - pCache->n90pct = pCache->nMax * 9 / 10; - pcache1EnforceMaxPage(pCache); - pcache1LeaveMutex(pGroup); - } -} - -/* -** Implementation of the sqlite3_pcache.xShrink method. -** -** Free up as much memory as possible. -*/ -static void pcache1Shrink(sqlite3_pcache *p) { - PCache1 *pCache = (PCache1 *)p; - if (pCache->bPurgeable) { - PGroup * pGroup = pCache->pGroup; - unsigned int savedMaxPage; - pcache1EnterMutex(pGroup); - savedMaxPage = pGroup->nMaxPage; - pGroup->nMaxPage = 0; - pcache1EnforceMaxPage(pCache); - pGroup->nMaxPage = savedMaxPage; - pcache1LeaveMutex(pGroup); - } -} - -/* -** Implementation of the sqlite3_pcache.xPagecount method. -*/ -static int pcache1Pagecount(sqlite3_pcache *p) { - int n; - PCache1 *pCache = (PCache1 *)p; - pcache1EnterMutex(pCache->pGroup); - n = pCache->nPage; - pcache1LeaveMutex(pCache->pGroup); - return n; -} - -/* -** Implement steps 3, 4, and 5 of the pcache1Fetch() algorithm described -** in the header of the pcache1Fetch() procedure. -** -** This steps are broken out into a separate procedure because they are -** usually not needed, and by avoiding the stack initialization required -** for these steps, the main pcache1Fetch() procedure can run faster. -*/ -static PgHdr1 *pcache1FetchStage2(PCache1 *pCache, unsigned int iKey, int createFlag) { - unsigned int nPinned; - PGroup * pGroup = pCache->pGroup; - PgHdr1 * pPage = 0; - - /* Step 3: Abort if createFlag is 1 but the cache is nearly full */ - assert(pCache->nPage >= pCache->nRecyclable); - nPinned = pCache->nPage - pCache->nRecyclable; - assert(pGroup->mxPinned == pGroup->nMaxPage + 10 - pGroup->nMinPage); - assert(pCache->n90pct == pCache->nMax * 9 / 10); - if (createFlag == 1 && (nPinned >= pGroup->mxPinned || nPinned >= pCache->n90pct || - (pcache1UnderMemoryPressure(pCache) && pCache->nRecyclable < nPinned))) { - return 0; - } - - if (pCache->nPage >= pCache->nHash) pcache1ResizeHash(pCache); - assert(pCache->nHash > 0 && pCache->apHash); - - /* Step 4. Try to recycle a page. */ - if (pCache->bPurgeable && !pGroup->lru.pLruPrev->isAnchor && - ((pCache->nPage + 1 >= pCache->nMax) || pcache1UnderMemoryPressure(pCache))) { - PCache1 *pOther; - pPage = pGroup->lru.pLruPrev; - assert(PAGE_IS_UNPINNED(pPage)); - pcache1RemoveFromHash(pPage, 0); - pcache1PinPage(pPage); - pOther = pPage->pCache; - if (pOther->szAlloc != pCache->szAlloc) { - pcache1FreePage(pPage); - pPage = 0; - } else { - pGroup->nPurgeable -= (pOther->bPurgeable - pCache->bPurgeable); - } - } - - /* Step 5. If a usable page buffer has still not been found, - ** attempt to allocate a new one. - */ - if (!pPage) { - pPage = pcache1AllocPage(pCache, createFlag == 1); - } - - if (pPage) { - unsigned int h = iKey % pCache->nHash; - pCache->nPage++; - pPage->iKey = iKey; - pPage->pNext = pCache->apHash[h]; - pPage->pCache = pCache; - pPage->pLruNext = 0; - /* pPage->pLruPrev = 0; - ** No need to clear pLruPrev since it is not accessed when pLruNext==0 */ - *(void **)pPage->page.pExtra = 0; - pCache->apHash[h] = pPage; - if (iKey > pCache->iMaxKey) { - pCache->iMaxKey = iKey; - } - } - return pPage; -} - -/* -** Implementation of the sqlite3_pcache.xFetch method. -** -** Fetch a page by key value. -** -** Whether or not a new page may be allocated by this function depends on -** the value of the createFlag argument. 0 means do not allocate a new -** page. 1 means allocate a new page if space is easily available. 2 -** means to try really hard to allocate a new page. -** -** For a non-purgeable cache (a cache used as the storage for an in-memory -** database) there is really no difference between createFlag 1 and 2. So -** the calling function (pcache.c) will never have a createFlag of 1 on -** a non-purgeable cache. -** -** There are three different approaches to obtaining space for a page, -** depending on the value of parameter createFlag (which may be 0, 1 or 2). -** -** 1. Regardless of the value of createFlag, the cache is searched for a -** copy of the requested page. If one is found, it is returned. -** -** 2. If createFlag==0 and the page is not already in the cache, NULL is -** returned. -** -** 3. If createFlag is 1, and the page is not already in the cache, then -** return NULL (do not allocate a new page) if any of the following -** conditions are true: -** -** (a) the number of pages pinned by the cache is greater than -** PCache1.nMax, or -** -** (b) the number of pages pinned by the cache is greater than -** the sum of nMax for all purgeable caches, less the sum of -** nMin for all other purgeable caches, or -** -** 4. If none of the first three conditions apply and the cache is marked -** as purgeable, and if one of the following is true: -** -** (a) The number of pages allocated for the cache is already -** PCache1.nMax, or -** -** (b) The number of pages allocated for all purgeable caches is -** already equal to or greater than the sum of nMax for all -** purgeable caches, -** -** (c) The system is under memory pressure and wants to avoid -** unnecessary pages cache entry allocations -** -** then attempt to recycle a page from the LRU list. If it is the right -** size, return the recycled buffer. Otherwise, free the buffer and -** proceed to step 5. -** -** 5. Otherwise, allocate and return a new page buffer. -** -** There are two versions of this routine. pcache1FetchWithMutex() is -** the general case. pcache1FetchNoMutex() is a faster implementation for -** the common case where pGroup->mutex is NULL. The pcache1Fetch() wrapper -** invokes the appropriate routine. -*/ -static PgHdr1 *pcache1FetchNoMutex(sqlite3_pcache *p, unsigned int iKey, int createFlag) { - PCache1 *pCache = (PCache1 *)p; - PgHdr1 * pPage = 0; - - /* Step 1: Search the hash table for an existing entry. */ - pPage = pCache->apHash[iKey % pCache->nHash]; - while (pPage && pPage->iKey != iKey) { - pPage = pPage->pNext; - } - - /* Step 2: If the page was found in the hash table, then return it. - ** If the page was not in the hash table and createFlag is 0, abort. - ** Otherwise (page not in hash and createFlag!=0) continue with - ** subsequent steps to try to create the page. */ - if (pPage) { - if (PAGE_IS_UNPINNED(pPage)) { - return pcache1PinPage(pPage); - } else { - return pPage; - } - } else if (createFlag) { - /* Steps 3, 4, and 5 implemented by this subroutine */ - return pcache1FetchStage2(pCache, iKey, createFlag); - } else { - return 0; - } -} -#if PCACHE1_MIGHT_USE_GROUP_MUTEX -static PgHdr1 *pcache1FetchWithMutex(sqlite3_pcache *p, unsigned int iKey, int createFlag) { - PCache1 *pCache = (PCache1 *)p; - PgHdr1 * pPage; - - pcache1EnterMutex(pCache->pGroup); - pPage = pcache1FetchNoMutex(p, iKey, createFlag); - assert(pPage == 0 || pCache->iMaxKey >= iKey); - pcache1LeaveMutex(pCache->pGroup); - return pPage; -} -#endif -static sqlite3_pcache_page *pcache1Fetch(sqlite3_pcache *p, unsigned int iKey, int createFlag) { -#if PCACHE1_MIGHT_USE_GROUP_MUTEX || defined(SQLITE_DEBUG) - PCache1 *pCache = (PCache1 *)p; -#endif - - assert(offsetof(PgHdr1, page) == 0); - assert(pCache->bPurgeable || createFlag != 1); - assert(pCache->bPurgeable || pCache->nMin == 0); - assert(pCache->bPurgeable == 0 || pCache->nMin == 10); - assert(pCache->nMin == 0 || pCache->bPurgeable); - assert(pCache->nHash > 0); - return (sqlite3_pcache_page *)pcache1FetchWithMutex(p, iKey, createFlag); -} - -/* -** Implementation of the sqlite3_pcache.xUnpin method. -** -** Mark a page as unpinned (eligible for asynchronous recycling). -*/ -static void pcache1Unpin(sqlite3_pcache *p, sqlite3_pcache_page *pPg, int reuseUnlikely) { - PCache1 *pCache = (PCache1 *)p; - PgHdr1 * pPage = (PgHdr1 *)pPg; - PGroup * pGroup = pCache->pGroup; - - assert(pPage->pCache == pCache); - pcache1EnterMutex(pGroup); - - /* It is an error to call this function if the page is already - ** part of the PGroup LRU list. - */ - assert(pPage->pLruNext == 0); - assert(PAGE_IS_PINNED(pPage)); - - if (reuseUnlikely || pGroup->nPurgeable > pGroup->nMaxPage) { - pcache1RemoveFromHash(pPage, 1); - } else { - /* Add the page to the PGroup LRU list. */ - PgHdr1 **ppFirst = &pGroup->lru.pLruNext; - pPage->pLruPrev = &pGroup->lru; - (pPage->pLruNext = *ppFirst)->pLruPrev = pPage; - *ppFirst = pPage; - pCache->nRecyclable++; - } - - pcache1LeaveMutex(pCache->pGroup); -} - -/* -** Implementation of the sqlite3_pcache.xRekey method. -*/ -static void pcache1Rekey(sqlite3_pcache *p, sqlite3_pcache_page *pPg, unsigned int iOld, unsigned int iNew) { - PCache1 * pCache = (PCache1 *)p; - PgHdr1 * pPage = (PgHdr1 *)pPg; - PgHdr1 ** pp; - unsigned int h; - assert(pPage->iKey == iOld); - assert(pPage->pCache == pCache); - - pcache1EnterMutex(pCache->pGroup); - - h = iOld % pCache->nHash; - pp = &pCache->apHash[h]; - while ((*pp) != pPage) { - pp = &(*pp)->pNext; - } - *pp = pPage->pNext; - - h = iNew % pCache->nHash; - pPage->iKey = iNew; - pPage->pNext = pCache->apHash[h]; - pCache->apHash[h] = pPage; - if (iNew > pCache->iMaxKey) { - pCache->iMaxKey = iNew; - } - - pcache1LeaveMutex(pCache->pGroup); -} - -/* -** Implementation of the sqlite3_pcache.xTruncate method. -** -** Discard all unpinned pages in the cache with a page number equal to -** or greater than parameter iLimit. Any pinned pages with a page number -** equal to or greater than iLimit are implicitly unpinned. -*/ -static void pcache1Truncate(sqlite3_pcache *p, unsigned int iLimit) { - PCache1 *pCache = (PCache1 *)p; - pcache1EnterMutex(pCache->pGroup); - if (iLimit <= pCache->iMaxKey) { - pcache1TruncateUnsafe(pCache, iLimit); - pCache->iMaxKey = iLimit - 1; - } - pcache1LeaveMutex(pCache->pGroup); -} - -/* -** Implementation of the sqlite3_pcache.xDestroy method. -** -** Destroy a cache allocated using pcache1Create(). -*/ -static void pcache1Destroy(sqlite3_pcache *p) { - PCache1 *pCache = (PCache1 *)p; - PGroup * pGroup = pCache->pGroup; - assert(pCache->bPurgeable || (pCache->nMax == 0 && pCache->nMin == 0)); - pcache1EnterMutex(pGroup); - if (pCache->nPage) pcache1TruncateUnsafe(pCache, 0); - assert(pGroup->nMaxPage >= pCache->nMax); - pGroup->nMaxPage -= pCache->nMax; - assert(pGroup->nMinPage >= pCache->nMin); - pGroup->nMinPage -= pCache->nMin; - pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; - pcache1EnforceMaxPage(pCache); - pcache1LeaveMutex(pGroup); - free(pCache->pBulk); - free(pCache->apHash); - free(pCache); -} - -/* -** Return the size of the header on each page of this PCACHE implementation. -*/ -int sqlite3HeaderSizePcache1(void) { return ROUND8(sizeof(PgHdr1)); } - -// /* -// ** Return the global mutex used by this PCACHE implementation. The -// ** sqlite3_status() routine needs access to this mutex. -// */ -// sqlite3_mutex *sqlite3Pcache1Mutex(void) { return pcache1.mutex; } - -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT -/* -** This function is called to free superfluous dynamically allocated memory -** held by the pager system. Memory in use by any SQLite pager allocated -** by the current thread may be sqlite3_free()ed. -** -** nReq is the number of bytes of memory required. Once this much has -** been released, the function returns. The return value is the total number -** of bytes of memory released. -*/ -int sqlite3PcacheReleaseMemory(int nReq) { - int nFree = 0; - // assert(sqlite3_mutex_notheld(pcache1.grp.mutex)); - // assert(sqlite3_mutex_notheld(pcache1.mutex)); - if (sqlite3GlobalConfig.pPage == 0) { - PgHdr1 *p; - pcache1EnterMutex(&pcache1.grp); - while ((nReq < 0 || nFree < nReq) && (p = pcache1.grp.lru.pLruPrev) != 0 && p->isAnchor == 0) { - nFree += pcache1MemSize(p->page.pBuf); -#ifdef SQLITE_PCACHE_SEPARATE_HEADER - nFree += sqlite3MemSize(p); -#endif - assert(PAGE_IS_UNPINNED(p)); - pcache1PinPage(p); - pcache1RemoveFromHash(p, 1); - } - pcache1LeaveMutex(&pcache1.grp); - } - return nFree; -} -#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ - -#ifdef SQLITE_TEST -/* -** This function is used by test procedures to inspect the internal state -** of the global cache. -*/ -void sqlite3PcacheStats(int *pnCurrent, /* OUT: Total number of pages cached */ - int *pnMax, /* OUT: Global maximum cache size */ - int *pnMin, /* OUT: Sum of PCache1.nMin for purgeable caches */ - int *pnRecyclable /* OUT: Total number of pages available for recycling */ -) { - PgHdr1 *p; - int nRecyclable = 0; - for (p = pcache1.grp.lru.pLruNext; p && !p->isAnchor; p = p->pLruNext) { - assert(PAGE_IS_UNPINNED(p)); - nRecyclable++; - } - *pnCurrent = pcache1.grp.nPurgeable; - *pnMax = (int)pcache1.grp.nMaxPage; - *pnMin = (int)pcache1.grp.nMinPage; - *pnRecyclable = nRecyclable; -} -#endif - -sqlite3_pcache_methods2 pcache2 = { - 1, /* iVersion */ - 0, /* pArg */ - pcache1Init, /* xInit */ - pcache1Shutdown, /* xShutdown */ - pcache1Create, /* xCreate */ - pcache1Cachesize, /* xCachesize */ - pcache1Pagecount, /* xPagecount */ - pcache1Fetch, /* xFetch */ - pcache1Unpin, /* xUnpin */ - pcache1Rekey, /* xRekey */ - pcache1Truncate, /* xTruncate */ - pcache1Destroy, /* xDestroy */ - pcache1Shrink /* xShrink */ -}; diff --git a/source/libs/tdb/src/sqlite/wal.c b/source/libs/tdb/src/sqlite/wal.c deleted file mode 100644 index 225c708ab0..0000000000 --- a/source/libs/tdb/src/sqlite/wal.c +++ /dev/null @@ -1,4153 +0,0 @@ -/* -** 2010 February 1 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -** This file contains the implementation of a write-ahead log (WAL) used in -** "journal_mode=WAL" mode. -** -** WRITE-AHEAD LOG (WAL) FILE FORMAT -** -** A WAL file consists of a header followed by zero or more "frames". -** Each frame records the revised content of a single page from the -** database file. All changes to the database are recorded by writing -** frames into the WAL. Transactions commit when a frame is written that -** contains a commit marker. A single WAL can and usually does record -** multiple transactions. Periodically, the content of the WAL is -** transferred back into the database file in an operation called a -** "checkpoint". -** -** A single WAL file can be used multiple times. In other words, the -** WAL can fill up with frames and then be checkpointed and then new -** frames can overwrite the old ones. A WAL always grows from beginning -** toward the end. Checksums and counters attached to each frame are -** used to determine which frames within the WAL are valid and which -** are leftovers from prior checkpoints. -** -** The WAL header is 32 bytes in size and consists of the following eight -** big-endian 32-bit unsigned integer values: -** -** 0: Magic number. 0x377f0682 or 0x377f0683 -** 4: File format version. Currently 3007000 -** 8: Database page size. Example: 1024 -** 12: Checkpoint sequence number -** 16: Salt-1, random integer incremented with each checkpoint -** 20: Salt-2, a different random integer changing with each ckpt -** 24: Checksum-1 (first part of checksum for first 24 bytes of header). -** 28: Checksum-2 (second part of checksum for first 24 bytes of header). -** -** Immediately following the wal-header are zero or more frames. Each -** frame consists of a 24-byte frame-header followed by a bytes -** of page data. The frame-header is six big-endian 32-bit unsigned -** integer values, as follows: -** -** 0: Page number. -** 4: For commit records, the size of the database image in pages -** after the commit. For all other records, zero. -** 8: Salt-1 (copied from the header) -** 12: Salt-2 (copied from the header) -** 16: Checksum-1. -** 20: Checksum-2. -** -** A frame is considered valid if and only if the following conditions are -** true: -** -** (1) The salt-1 and salt-2 values in the frame-header match -** salt values in the wal-header -** -** (2) The checksum values in the final 8 bytes of the frame-header -** exactly match the checksum computed consecutively on the -** WAL header and the first 8 bytes and the content of all frames -** up to and including the current frame. -** -** The checksum is computed using 32-bit big-endian integers if the -** magic number in the first 4 bytes of the WAL is 0x377f0683 and it -** is computed using little-endian if the magic number is 0x377f0682. -** The checksum values are always stored in the frame header in a -** big-endian format regardless of which byte order is used to compute -** the checksum. The checksum is computed by interpreting the input as -** an even number of unsigned 32-bit integers: x[0] through x[N]. The -** algorithm used for the checksum is as follows: -** -** for i from 0 to n-1 step 2: -** s0 += x[i] + s1; -** s1 += x[i+1] + s0; -** endfor -** -** Note that s0 and s1 are both weighted checksums using fibonacci weights -** in reverse order (the largest fibonacci weight occurs on the first element -** of the sequence being summed.) The s1 value spans all 32-bit -** terms of the sequence whereas s0 omits the final term. -** -** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the -** WAL is transferred into the database, then the database is VFS.xSync-ed. -** The VFS.xSync operations serve as write barriers - all writes launched -** before the xSync must complete before any write that launches after the -** xSync begins. -** -** After each checkpoint, the salt-1 value is incremented and the salt-2 -** value is randomized. This prevents old and new frames in the WAL from -** being considered valid at the same time and being checkpointing together -** following a crash. -** -** READER ALGORITHM -** -** To read a page from the database (call it page number P), a reader -** first checks the WAL to see if it contains page P. If so, then the -** last valid instance of page P that is a followed by a commit frame -** or is a commit frame itself becomes the value read. If the WAL -** contains no copies of page P that are valid and which are a commit -** frame or are followed by a commit frame, then page P is read from -** the database file. -** -** To start a read transaction, the reader records the index of the last -** valid frame in the WAL. The reader uses this recorded "mxFrame" value -** for all subsequent read operations. New transactions can be appended -** to the WAL, but as long as the reader uses its original mxFrame value -** and ignores the newly appended content, it will see a consistent snapshot -** of the database from a single point in time. This technique allows -** multiple concurrent readers to view different versions of the database -** content simultaneously. -** -** The reader algorithm in the previous paragraphs works correctly, but -** because frames for page P can appear anywhere within the WAL, the -** reader has to scan the entire WAL looking for page P frames. If the -** WAL is large (multiple megabytes is typical) that scan can be slow, -** and read performance suffers. To overcome this problem, a separate -** data structure called the wal-index is maintained to expedite the -** search for frames of a particular page. -** -** WAL-INDEX FORMAT -** -** Conceptually, the wal-index is shared memory, though VFS implementations -** might choose to implement the wal-index using a mmapped file. Because -** the wal-index is shared memory, SQLite does not support journal_mode=WAL -** on a network filesystem. All users of the database must be able to -** share memory. -** -** In the default unix and windows implementation, the wal-index is a mmapped -** file whose name is the database name with a "-shm" suffix added. For that -** reason, the wal-index is sometimes called the "shm" file. -** -** The wal-index is transient. After a crash, the wal-index can (and should -** be) reconstructed from the original WAL file. In fact, the VFS is required -** to either truncate or zero the header of the wal-index when the last -** connection to it closes. Because the wal-index is transient, it can -** use an architecture-specific format; it does not have to be cross-platform. -** Hence, unlike the database and WAL file formats which store all values -** as big endian, the wal-index can store multi-byte values in the native -** byte order of the host computer. -** -** The purpose of the wal-index is to answer this question quickly: Given -** a page number P and a maximum frame index M, return the index of the -** last frame in the wal before frame M for page P in the WAL, or return -** NULL if there are no frames for page P in the WAL prior to M. -** -** The wal-index consists of a header region, followed by an one or -** more index blocks. -** -** The wal-index header contains the total number of frames within the WAL -** in the mxFrame field. -** -** Each index block except for the first contains information on -** HASHTABLE_NPAGE frames. The first index block contains information on -** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and -** HASHTABLE_NPAGE are selected so that together the wal-index header and -** first index block are the same size as all other index blocks in the -** wal-index. The values are: -** -** HASHTABLE_NPAGE 4096 -** HASHTABLE_NPAGE_ONE 4062 -** -** Each index block contains two sections, a page-mapping that contains the -** database page number associated with each wal frame, and a hash-table -** that allows readers to query an index block for a specific page number. -** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE -** for the first index block) 32-bit page numbers. The first entry in the -** first index-block contains the database page number corresponding to the -** first frame in the WAL file. The first entry in the second index block -** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in -** the log, and so on. -** -** The last index block in a wal-index usually contains less than the full -** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers, -** depending on the contents of the WAL file. This does not change the -** allocated size of the page-mapping array - the page-mapping array merely -** contains unused entries. -** -** Even without using the hash table, the last frame for page P -** can be found by scanning the page-mapping sections of each index block -** starting with the last index block and moving toward the first, and -** within each index block, starting at the end and moving toward the -** beginning. The first entry that equals P corresponds to the frame -** holding the content for that page. -** -** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers. -** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the -** hash table for each page number in the mapping section, so the hash -** table is never more than half full. The expected number of collisions -** prior to finding a match is 1. Each entry of the hash table is an -** 1-based index of an entry in the mapping section of the same -** index block. Let K be the 1-based index of the largest entry in -** the mapping section. (For index blocks other than the last, K will -** always be exactly HASHTABLE_NPAGE (4096) and for the last index block -** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table -** contain a value of 0. -** -** To look for page P in the hash table, first compute a hash iKey on -** P as follows: -** -** iKey = (P * 383) % HASHTABLE_NSLOT -** -** Then start scanning entries of the hash table, starting with iKey -** (wrapping around to the beginning when the end of the hash table is -** reached) until an unused hash slot is found. Let the first unused slot -** be at index iUnused. (iUnused might be less than iKey if there was -** wrap-around.) Because the hash table is never more than half full, -** the search is guaranteed to eventually hit an unused entry. Let -** iMax be the value between iKey and iUnused, closest to iUnused, -** where aHash[iMax]==P. If there is no iMax entry (if there exists -** no hash slot such that aHash[i]==p) then page P is not in the -** current index block. Otherwise the iMax-th mapping entry of the -** current index block corresponds to the last entry that references -** page P. -** -** A hash search begins with the last index block and moves toward the -** first index block, looking for entries corresponding to page P. On -** average, only two or three slots in each index block need to be -** examined in order to either find the last entry for page P, or to -** establish that no such entry exists in the block. Each index block -** holds over 4000 entries. So two or three index blocks are sufficient -** to cover a typical 10 megabyte WAL file, assuming 1K pages. 8 or 10 -** comparisons (on average) suffice to either locate a frame in the -** WAL or to establish that the frame does not exist in the WAL. This -** is much faster than scanning the entire 10MB WAL. -** -** Note that entries are added in order of increasing K. Hence, one -** reader might be using some value K0 and a second reader that started -** at a later time (after additional transactions were added to the WAL -** and to the wal-index) might be using a different value K1, where K1>K0. -** Both readers can use the same hash table and mapping section to get -** the correct result. There may be entries in the hash table with -** K>K0 but to the first reader, those entries will appear to be unused -** slots in the hash table and so the first reader will get an answer as -** if no values greater than K0 had ever been inserted into the hash table -** in the first place - which is what reader one wants. Meanwhile, the -** second reader using K1 will see additional values that were inserted -** later, which is exactly what reader two wants. -** -** When a rollback occurs, the value of K is decreased. Hash table entries -** that correspond to frames greater than the new K value are removed -** from the hash table at this point. -*/ -#ifndef SQLITE_OMIT_WAL - -#include "wal.h" - -/* -** Trace output macros -*/ -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) -int sqlite3WalTrace = 0; -# define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X -#else -# define WALTRACE(X) -#endif - -/* -** The maximum (and only) versions of the wal and wal-index formats -** that may be interpreted by this version of SQLite. -** -** If a client begins recovering a WAL file and finds that (a) the checksum -** values in the wal-header are correct and (b) the version field is not -** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN. -** -** Similarly, if a client successfully reads a wal-index header (i.e. the -** checksum test is successful) and finds that the version field is not -** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite -** returns SQLITE_CANTOPEN. -*/ -#define WAL_MAX_VERSION 3007000 -#define WALINDEX_MAX_VERSION 3007000 - -/* -** Index numbers for various locking bytes. WAL_NREADER is the number -** of available reader locks and should be at least 3. The default -** is SQLITE_SHM_NLOCK==8 and WAL_NREADER==5. -** -** Technically, the various VFSes are free to implement these locks however -** they see fit. However, compatibility is encouraged so that VFSes can -** interoperate. The standard implemention used on both unix and windows -** is for the index number to indicate a byte offset into the -** WalCkptInfo.aLock[] array in the wal-index header. In other words, all -** locks are on the shm file. The WALINDEX_LOCK_OFFSET constant (which -** should be 120) is the location in the shm file for the first locking -** byte. -*/ -#define WAL_WRITE_LOCK 0 -#define WAL_ALL_BUT_WRITE 1 -#define WAL_CKPT_LOCK 1 -#define WAL_RECOVER_LOCK 2 -#define WAL_READ_LOCK(I) (3+(I)) -#define WAL_NREADER (SQLITE_SHM_NLOCK-3) - - -/* Object declarations */ -typedef struct WalIndexHdr WalIndexHdr; -typedef struct WalIterator WalIterator; -typedef struct WalCkptInfo WalCkptInfo; - - -/* -** The following object holds a copy of the wal-index header content. -** -** The actual header in the wal-index consists of two copies of this -** object followed by one instance of the WalCkptInfo object. -** For all versions of SQLite through 3.10.0 and probably beyond, -** the locking bytes (WalCkptInfo.aLock) start at offset 120 and -** the total header size is 136 bytes. -** -** The szPage value can be any power of 2 between 512 and 32768, inclusive. -** Or it can be 1 to represent a 65536-byte page. The latter case was -** added in 3.7.1 when support for 64K pages was added. -*/ -struct WalIndexHdr { - u32 iVersion; /* Wal-index version */ - u32 unused; /* Unused (padding) field */ - u32 iChange; /* Counter incremented each transaction */ - u8 isInit; /* 1 when initialized */ - u8 bigEndCksum; /* True if checksums in WAL are big-endian */ - u16 szPage; /* Database page size in bytes. 1==64K */ - u32 mxFrame; /* Index of last valid frame in the WAL */ - u32 nPage; /* Size of database in pages */ - u32 aFrameCksum[2]; /* Checksum of last frame in log */ - u32 aSalt[2]; /* Two salt values copied from WAL header */ - u32 aCksum[2]; /* Checksum over all prior fields */ -}; - -/* -** A copy of the following object occurs in the wal-index immediately -** following the second copy of the WalIndexHdr. This object stores -** information used by checkpoint. -** -** nBackfill is the number of frames in the WAL that have been written -** back into the database. (We call the act of moving content from WAL to -** database "backfilling".) The nBackfill number is never greater than -** WalIndexHdr.mxFrame. nBackfill can only be increased by threads -** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). -** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from -** mxFrame back to zero when the WAL is reset. -** -** nBackfillAttempted is the largest value of nBackfill that a checkpoint -** has attempted to achieve. Normally nBackfill==nBackfillAtempted, however -** the nBackfillAttempted is set before any backfilling is done and the -** nBackfill is only set after all backfilling completes. So if a checkpoint -** crashes, nBackfillAttempted might be larger than nBackfill. The -** WalIndexHdr.mxFrame must never be less than nBackfillAttempted. -** -** The aLock[] field is a set of bytes used for locking. These bytes should -** never be read or written. -** -** There is one entry in aReadMark[] for each reader lock. If a reader -** holds read-lock K, then the value in aReadMark[K] is no greater than -** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) -** for any aReadMark[] means that entry is unused. aReadMark[0] is -** a special case; its value is never used and it exists as a place-holder -** to avoid having to offset aReadMark[] indexs by one. Readers holding -** WAL_READ_LOCK(0) always ignore the entire WAL and read all content -** directly from the database. -** -** The value of aReadMark[K] may only be changed by a thread that -** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of -** aReadMark[K] cannot changed while there is a reader is using that mark -** since the reader will be holding a shared lock on WAL_READ_LOCK(K). -** -** The checkpointer may only transfer frames from WAL to database where -** the frame numbers are less than or equal to every aReadMark[] that is -** in use (that is, every aReadMark[j] for which there is a corresponding -** WAL_READ_LOCK(j)). New readers (usually) pick the aReadMark[] with the -** largest value and will increase an unused aReadMark[] to mxFrame if there -** is not already an aReadMark[] equal to mxFrame. The exception to the -** previous sentence is when nBackfill equals mxFrame (meaning that everything -** in the WAL has been backfilled into the database) then new readers -** will choose aReadMark[0] which has value 0 and hence such reader will -** get all their all content directly from the database file and ignore -** the WAL. -** -** Writers normally append new frames to the end of the WAL. However, -** if nBackfill equals mxFrame (meaning that all WAL content has been -** written back into the database) and if no readers are using the WAL -** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then -** the writer will first "reset" the WAL back to the beginning and start -** writing new content beginning at frame 1. -** -** We assume that 32-bit loads are atomic and so no locks are needed in -** order to read from any aReadMark[] entries. -*/ -struct WalCkptInfo { - u32 nBackfill; /* Number of WAL frames backfilled into DB */ - u32 aReadMark[WAL_NREADER]; /* Reader marks */ - u8 aLock[SQLITE_SHM_NLOCK]; /* Reserved space for locks */ - u32 nBackfillAttempted; /* WAL frames perhaps written, or maybe not */ - u32 notUsed0; /* Available for future enhancements */ -}; -#define READMARK_NOT_USED 0xffffffff - -/* -** This is a schematic view of the complete 136-byte header of the -** wal-index file (also known as the -shm file): -** -** +-----------------------------+ -** 0: | iVersion | \ -** +-----------------------------+ | -** 4: | (unused padding) | | -** +-----------------------------+ | -** 8: | iChange | | -** +-------+-------+-------------+ | -** 12: | bInit | bBig | szPage | | -** +-------+-------+-------------+ | -** 16: | mxFrame | | First copy of the -** +-----------------------------+ | WalIndexHdr object -** 20: | nPage | | -** +-----------------------------+ | -** 24: | aFrameCksum | | -** | | | -** +-----------------------------+ | -** 32: | aSalt | | -** | | | -** +-----------------------------+ | -** 40: | aCksum | | -** | | / -** +-----------------------------+ -** 48: | iVersion | \ -** +-----------------------------+ | -** 52: | (unused padding) | | -** +-----------------------------+ | -** 56: | iChange | | -** +-------+-------+-------------+ | -** 60: | bInit | bBig | szPage | | -** +-------+-------+-------------+ | Second copy of the -** 64: | mxFrame | | WalIndexHdr -** +-----------------------------+ | -** 68: | nPage | | -** +-----------------------------+ | -** 72: | aFrameCksum | | -** | | | -** +-----------------------------+ | -** 80: | aSalt | | -** | | | -** +-----------------------------+ | -** 88: | aCksum | | -** | | / -** +-----------------------------+ -** 96: | nBackfill | -** +-----------------------------+ -** 100: | 5 read marks | -** | | -** | | -** | | -** | | -** +-------+-------+------+------+ -** 120: | Write | Ckpt | Rcvr | Rd0 | \ -** +-------+-------+------+------+ ) 8 lock bytes -** | Read1 | Read2 | Rd3 | Rd4 | / -** +-------+-------+------+------+ -** 128: | nBackfillAttempted | -** +-----------------------------+ -** 132: | (unused padding) | -** +-----------------------------+ -*/ - -/* A block of WALINDEX_LOCK_RESERVED bytes beginning at -** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems -** only support mandatory file-locks, we do not read or write data -** from the region of the file on which locks are applied. -*/ -#define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2+offsetof(WalCkptInfo,aLock)) -#define WALINDEX_HDR_SIZE (sizeof(WalIndexHdr)*2+sizeof(WalCkptInfo)) - -/* Size of header before each frame in wal */ -#define WAL_FRAME_HDRSIZE 24 - -/* Size of write ahead log header, including checksum. */ -#define WAL_HDRSIZE 32 - -/* WAL magic value. Either this value, or the same value with the least -** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit -** big-endian format in the first 4 bytes of a WAL file. -** -** If the LSB is set, then the checksums for each frame within the WAL -** file are calculated by treating all data as an array of 32-bit -** big-endian words. Otherwise, they are calculated by interpreting -** all data as 32-bit little-endian words. -*/ -#define WAL_MAGIC 0x377f0682 - -/* -** Return the offset of frame iFrame in the write-ahead log file, -** assuming a database page size of szPage bytes. The offset returned -** is to the start of the write-ahead log frame-header. -*/ -#define walFrameOffset(iFrame, szPage) ( \ - WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE) \ -) - -/* -** An open write-ahead log file is represented by an instance of the -** following object. -*/ -struct Wal { - sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ - sqlite3_file *pDbFd; /* File handle for the database file */ - sqlite3_file *pWalFd; /* File handle for WAL file */ - u32 iCallback; /* Value to pass to log callback (or 0) */ - i64 mxWalSize; /* Truncate WAL to this size upon reset */ - int nWiData; /* Size of array apWiData */ - int szFirstBlock; /* Size of first block written to WAL file */ - volatile u32 **apWiData; /* Pointer to wal-index content in memory */ - u32 szPage; /* Database page size */ - i16 readLock; /* Which read lock is being held. -1 for none */ - u8 syncFlags; /* Flags to use to sync header writes */ - u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ - u8 writeLock; /* True if in a write transaction */ - u8 ckptLock; /* True if holding a checkpoint lock */ - u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ - u8 truncateOnCommit; /* True to truncate WAL file on commit */ - u8 syncHeader; /* Fsync the WAL header if true */ - u8 padToSectorBoundary; /* Pad transactions out to the next sector */ - u8 bShmUnreliable; /* SHM content is read-only and unreliable */ - WalIndexHdr hdr; /* Wal-index header for current transaction */ - u32 minFrame; /* Ignore wal frames before this one */ - u32 iReCksum; /* On commit, recalculate checksums from here */ - const char *zWalName; /* Name of WAL file */ - u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ -#ifdef SQLITE_DEBUG - u8 lockError; /* True if a locking error has occurred */ -#endif -#ifdef SQLITE_ENABLE_SNAPSHOT - WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ -#endif -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - sqlite3 *db; -#endif -}; - -/* -** Candidate values for Wal.exclusiveMode. -*/ -#define WAL_NORMAL_MODE 0 -#define WAL_EXCLUSIVE_MODE 1 -#define WAL_HEAPMEMORY_MODE 2 - -/* -** Possible values for WAL.readOnly -*/ -#define WAL_RDWR 0 /* Normal read/write connection */ -#define WAL_RDONLY 1 /* The WAL file is readonly */ -#define WAL_SHM_RDONLY 2 /* The SHM file is readonly */ - -/* -** Each page of the wal-index mapping contains a hash-table made up of -** an array of HASHTABLE_NSLOT elements of the following type. -*/ -typedef u16 ht_slot; - -/* -** This structure is used to implement an iterator that loops through -** all frames in the WAL in database page order. Where two or more frames -** correspond to the same database page, the iterator visits only the -** frame most recently written to the WAL (in other words, the frame with -** the largest index). -** -** The internals of this structure are only accessed by: -** -** walIteratorInit() - Create a new iterator, -** walIteratorNext() - Step an iterator, -** walIteratorFree() - Free an iterator. -** -** This functionality is used by the checkpoint code (see walCheckpoint()). -*/ -struct WalIterator { - u32 iPrior; /* Last result returned from the iterator */ - int nSegment; /* Number of entries in aSegment[] */ - struct WalSegment { - int iNext; /* Next slot in aIndex[] not yet returned */ - ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ - u32 *aPgno; /* Array of page numbers. */ - int nEntry; /* Nr. of entries in aPgno[] and aIndex[] */ - int iZero; /* Frame number associated with aPgno[0] */ - } aSegment[1]; /* One for every 32KB page in the wal-index */ -}; - -/* -** Define the parameters of the hash tables in the wal-index file. There -** is a hash-table following every HASHTABLE_NPAGE page numbers in the -** wal-index. -** -** Changing any of these constants will alter the wal-index format and -** create incompatibilities. -*/ -#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ -#define HASHTABLE_HASH_1 383 /* Should be prime */ -#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ - -/* -** The block of page numbers associated with the first hash-table in a -** wal-index is smaller than usual. This is so that there is a complete -** hash-table on each aligned 32KB page of the wal-index. -*/ -#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32))) - -/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */ -#define WALINDEX_PGSZ ( \ - sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ -) - -/* -** Obtain a pointer to the iPage'th page of the wal-index. The wal-index -** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are -** numbered from zero. -** -** If the wal-index is currently smaller the iPage pages then the size -** of the wal-index might be increased, but only if it is safe to do -** so. It is safe to enlarge the wal-index if pWal->writeLock is true -** or pWal->exclusiveMode==WAL_HEAPMEMORY_MODE. -** -** Three possible result scenarios: -** -** (1) rc==SQLITE_OK and *ppPage==Requested-Wal-Index-Page -** (2) rc>=SQLITE_ERROR and *ppPage==NULL -** (3) rc==SQLITE_OK and *ppPage==NULL // only if iPage==0 -** -** Scenario (3) can only occur when pWal->writeLock is false and iPage==0 -*/ -static SQLITE_NOINLINE int walIndexPageRealloc( - Wal *pWal, /* The WAL context */ - int iPage, /* The page we seek */ - volatile u32 **ppPage /* Write the page pointer here */ -){ - int rc = SQLITE_OK; - - /* Enlarge the pWal->apWiData[] array if required */ - if( pWal->nWiData<=iPage ){ - sqlite3_int64 nByte = sizeof(u32*)*(iPage+1); - volatile u32 **apNew; - apNew = (volatile u32 **)sqlite3Realloc((void *)pWal->apWiData, nByte); - if( !apNew ){ - *ppPage = 0; - return SQLITE_NOMEM; - } - memset((void*)&apNew[pWal->nWiData], 0, - sizeof(u32*)*(iPage+1-pWal->nWiData)); - pWal->apWiData = apNew; - pWal->nWiData = iPage+1; - } - - /* Request a pointer to the required page from the VFS */ - assert( pWal->apWiData[iPage]==0 ); - if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ - pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); - if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM; - }else{ - rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, - pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] - ); - assert( pWal->apWiData[iPage]!=0 - || rc!=SQLITE_OK - || (pWal->writeLock==0 && iPage==0) ); - testcase( pWal->apWiData[iPage]==0 && rc==SQLITE_OK ); - if( rc==SQLITE_OK ){ - if( iPage>0 && sqlite3FaultSim(600) ) rc = SQLITE_NOMEM; - }else if( (rc&0xff)==SQLITE_READONLY ){ - pWal->readOnly |= WAL_SHM_RDONLY; - if( rc==SQLITE_READONLY ){ - rc = SQLITE_OK; - } - } - } - - *ppPage = pWal->apWiData[iPage]; - assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); - return rc; -} -static int walIndexPage( - Wal *pWal, /* The WAL context */ - int iPage, /* The page we seek */ - volatile u32 **ppPage /* Write the page pointer here */ -){ - if( pWal->nWiData<=iPage || (*ppPage = pWal->apWiData[iPage])==0 ){ - return walIndexPageRealloc(pWal, iPage, ppPage); - } - return SQLITE_OK; -} - -/* -** Return a pointer to the WalCkptInfo structure in the wal-index. -*/ -static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ - assert( pWal->nWiData>0 && pWal->apWiData[0] ); - return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); -} - -/* -** Return a pointer to the WalIndexHdr structure in the wal-index. -*/ -static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ - assert( pWal->nWiData>0 && pWal->apWiData[0] ); - return (volatile WalIndexHdr*)pWal->apWiData[0]; -} - -/* -** The argument to this macro must be of type u32. On a little-endian -** architecture, it returns the u32 value that results from interpreting -** the 4 bytes as a big-endian value. On a big-endian architecture, it -** returns the value that would be produced by interpreting the 4 bytes -** of the input value as a little-endian integer. -*/ -#define BYTESWAP32(x) ( \ - (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \ - + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \ -) - -/* -** Generate or extend an 8 byte checksum based on the data in -** array aByte[] and the initial values of aIn[0] and aIn[1] (or -** initial values of 0 and 0 if aIn==NULL). -** -** The checksum is written back into aOut[] before returning. -** -** nByte must be a positive multiple of 8. -*/ -static void walChecksumBytes( - int nativeCksum, /* True for native byte-order, false for non-native */ - u8 *a, /* Content to be checksummed */ - int nByte, /* Bytes of content in a[]. Must be a multiple of 8. */ - const u32 *aIn, /* Initial checksum value input */ - u32 *aOut /* OUT: Final checksum value output */ -){ - u32 s1, s2; - u32 *aData = (u32 *)a; - u32 *aEnd = (u32 *)&a[nByte]; - - if( aIn ){ - s1 = aIn[0]; - s2 = aIn[1]; - }else{ - s1 = s2 = 0; - } - - assert( nByte>=8 ); - assert( (nByte&0x00000007)==0 ); - assert( nByte<=65536 ); - - if( nativeCksum ){ - do { - s1 += *aData++ + s2; - s2 += *aData++ + s1; - }while( aDataexclusiveMode!=WAL_HEAPMEMORY_MODE ){ - sqlite3OsShmBarrier(pWal->pDbFd); - } -} - -/* -** Add the SQLITE_NO_TSAN as part of the return-type of a function -** definition as a hint that the function contains constructs that -** might give false-positive TSAN warnings. -** -** See tag-20200519-1. -*/ -#if defined(__clang__) && !defined(SQLITE_NO_TSAN) -# define SQLITE_NO_TSAN __attribute__((no_sanitize_thread)) -#else -# define SQLITE_NO_TSAN -#endif - -/* -** Write the header information in pWal->hdr into the wal-index. -** -** The checksum on pWal->hdr is updated before it is written. -*/ -static SQLITE_NO_TSAN void walIndexWriteHdr(Wal *pWal){ - volatile WalIndexHdr *aHdr = walIndexHdr(pWal); - const int nCksum = offsetof(WalIndexHdr, aCksum); - - assert( pWal->writeLock ); - pWal->hdr.isInit = 1; - pWal->hdr.iVersion = WALINDEX_MAX_VERSION; - walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); - /* Possible TSAN false-positive. See tag-20200519-1 */ - memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); - walShmBarrier(pWal); - memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); -} - -/* -** This function encodes a single frame header and writes it to a buffer -** supplied by the caller. A frame-header is made up of a series of -** 4-byte big-endian integers, as follows: -** -** 0: Page number. -** 4: For commit records, the size of the database image in pages -** after the commit. For all other records, zero. -** 8: Salt-1 (copied from the wal-header) -** 12: Salt-2 (copied from the wal-header) -** 16: Checksum-1. -** 20: Checksum-2. -*/ -static void walEncodeFrame( - Wal *pWal, /* The write-ahead log */ - u32 iPage, /* Database page number for frame */ - u32 nTruncate, /* New db size (or 0 for non-commit frames) */ - u8 *aData, /* Pointer to page data */ - u8 *aFrame /* OUT: Write encoded frame here */ -){ - int nativeCksum; /* True for native byte-order checksums */ - u32 *aCksum = pWal->hdr.aFrameCksum; - assert( WAL_FRAME_HDRSIZE==24 ); - sqlite3Put4byte(&aFrame[0], iPage); - sqlite3Put4byte(&aFrame[4], nTruncate); - if( pWal->iReCksum==0 ){ - memcpy(&aFrame[8], pWal->hdr.aSalt, 8); - - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); - - sqlite3Put4byte(&aFrame[16], aCksum[0]); - sqlite3Put4byte(&aFrame[20], aCksum[1]); - }else{ - memset(&aFrame[8], 0, 16); - } -} - -/* -** Check to see if the frame with header in aFrame[] and content -** in aData[] is valid. If it is a valid frame, fill *piPage and -** *pnTruncate and return true. Return if the frame is not valid. -*/ -static int walDecodeFrame( - Wal *pWal, /* The write-ahead log */ - u32 *piPage, /* OUT: Database page number for frame */ - u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ - u8 *aData, /* Pointer to page data (for checksum) */ - u8 *aFrame /* Frame data */ -){ - int nativeCksum; /* True for native byte-order checksums */ - u32 *aCksum = pWal->hdr.aFrameCksum; - u32 pgno; /* Page number of the frame */ - assert( WAL_FRAME_HDRSIZE==24 ); - - /* A frame is only valid if the salt values in the frame-header - ** match the salt values in the wal-header. - */ - if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){ - return 0; - } - - /* A frame is only valid if the page number is creater than zero. - */ - pgno = sqlite3Get4byte(&aFrame[0]); - if( pgno==0 ){ - return 0; - } - - /* A frame is only valid if a checksum of the WAL header, - ** all prior frams, the first 16 bytes of this frame-header, - ** and the frame-data matches the checksum in the last 8 - ** bytes of this frame-header. - */ - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); - if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) - || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) - ){ - /* Checksum failed. */ - return 0; - } - - /* If we reach this point, the frame is valid. Return the page number - ** and the new database size. - */ - *piPage = pgno; - *pnTruncate = sqlite3Get4byte(&aFrame[4]); - return 1; -} - - -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) -/* -** Names of locks. This routine is used to provide debugging output and is not -** a part of an ordinary build. -*/ -static const char *walLockName(int lockIdx){ - if( lockIdx==WAL_WRITE_LOCK ){ - return "WRITE-LOCK"; - }else if( lockIdx==WAL_CKPT_LOCK ){ - return "CKPT-LOCK"; - }else if( lockIdx==WAL_RECOVER_LOCK ){ - return "RECOVER-LOCK"; - }else{ - static char zName[15]; - sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]", - lockIdx-WAL_READ_LOCK(0)); - return zName; - } -} -#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */ - - -/* -** Set or release locks on the WAL. Locks are either shared or exclusive. -** A lock cannot be moved directly between shared and exclusive - it must go -** through the unlocked state first. -** -** In locking_mode=EXCLUSIVE, all of these routines become no-ops. -*/ -static int walLockShared(Wal *pWal, int lockIdx){ - int rc; - if( pWal->exclusiveMode ) return SQLITE_OK; - rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, - SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); - WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, - walLockName(lockIdx), rc ? "failed" : "ok")); - VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); ) - return rc; -} -static void walUnlockShared(Wal *pWal, int lockIdx){ - if( pWal->exclusiveMode ) return; - (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, - SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); - WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); -} -static int walLockExclusive(Wal *pWal, int lockIdx, int n){ - int rc; - if( pWal->exclusiveMode ) return SQLITE_OK; - rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, - SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); - WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, - walLockName(lockIdx), n, rc ? "failed" : "ok")); - VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); ) - return rc; -} -static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ - if( pWal->exclusiveMode ) return; - (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, - SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); - WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, - walLockName(lockIdx), n)); -} - -/* -** Compute a hash on a page number. The resulting hash value must land -** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances -** the hash to the next value in the event of a collision. -*/ -static int walHash(u32 iPage){ - assert( iPage>0 ); - assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 ); - return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1); -} -static int walNextHash(int iPriorHash){ - return (iPriorHash+1)&(HASHTABLE_NSLOT-1); -} - -/* -** An instance of the WalHashLoc object is used to describe the location -** of a page hash table in the wal-index. This becomes the return value -** from walHashGet(). -*/ -typedef struct WalHashLoc WalHashLoc; -struct WalHashLoc { - volatile ht_slot *aHash; /* Start of the wal-index hash table */ - volatile u32 *aPgno; /* aPgno[1] is the page of first frame indexed */ - u32 iZero; /* One less than the frame number of first indexed*/ -}; - -/* -** Return pointers to the hash table and page number array stored on -** page iHash of the wal-index. The wal-index is broken into 32KB pages -** numbered starting from 0. -** -** Set output variable pLoc->aHash to point to the start of the hash table -** in the wal-index file. Set pLoc->iZero to one less than the frame -** number of the first frame indexed by this hash table. If a -** slot in the hash table is set to N, it refers to frame number -** (pLoc->iZero+N) in the log. -** -** Finally, set pLoc->aPgno so that pLoc->aPgno[0] is the page number of the -** first frame indexed by the hash table, frame (pLoc->iZero). -*/ -static int walHashGet( - Wal *pWal, /* WAL handle */ - int iHash, /* Find the iHash'th table */ - WalHashLoc *pLoc /* OUT: Hash table location */ -){ - int rc; /* Return code */ - - rc = walIndexPage(pWal, iHash, &pLoc->aPgno); - assert( rc==SQLITE_OK || iHash>0 ); - - if( pLoc->aPgno ){ - pLoc->aHash = (volatile ht_slot *)&pLoc->aPgno[HASHTABLE_NPAGE]; - if( iHash==0 ){ - pLoc->aPgno = &pLoc->aPgno[WALINDEX_HDR_SIZE/sizeof(u32)]; - pLoc->iZero = 0; - }else{ - pLoc->iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; - } - }else if( NEVER(rc==SQLITE_OK) ){ - rc = SQLITE_ERROR; - } - return rc; -} - -/* -** Return the number of the wal-index page that contains the hash-table -** and page-number array that contain entries corresponding to WAL frame -** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages -** are numbered starting from 0. -*/ -static int walFramePage(u32 iFrame){ - int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; - assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) - && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) - && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) - && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) - && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) - ); - assert( iHash>=0 ); - return iHash; -} - -/* -** Return the page number associated with frame iFrame in this WAL. -*/ -static u32 walFramePgno(Wal *pWal, u32 iFrame){ - int iHash = walFramePage(iFrame); - if( iHash==0 ){ - return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; - } - return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; -} - -/* -** Remove entries from the hash table that point to WAL slots greater -** than pWal->hdr.mxFrame. -** -** This function is called whenever pWal->hdr.mxFrame is decreased due -** to a rollback or savepoint. -** -** At most only the hash table containing pWal->hdr.mxFrame needs to be -** updated. Any later hash tables will be automatically cleared when -** pWal->hdr.mxFrame advances to the point where those hash tables are -** actually needed. -*/ -static void walCleanupHash(Wal *pWal){ - WalHashLoc sLoc; /* Hash table location */ - int iLimit = 0; /* Zero values greater than this */ - int nByte; /* Number of bytes to zero in aPgno[] */ - int i; /* Used to iterate through aHash[] */ - - assert( pWal->writeLock ); - testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 ); - testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE ); - testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 ); - - if( pWal->hdr.mxFrame==0 ) return; - - /* Obtain pointers to the hash-table and page-number array containing - ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed - ** that the page said hash-table and array reside on is already mapped.(1) - */ - assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); - assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); - i = walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &sLoc); - if( NEVER(i) ) return; /* Defense-in-depth, in case (1) above is wrong */ - - /* Zero all hash-table entries that correspond to frame numbers greater - ** than pWal->hdr.mxFrame. - */ - iLimit = pWal->hdr.mxFrame - sLoc.iZero; - assert( iLimit>0 ); - for(i=0; iiLimit ){ - sLoc.aHash[i] = 0; - } - } - - /* Zero the entries in the aPgno array that correspond to frames with - ** frame numbers greater than pWal->hdr.mxFrame. - */ - nByte = (int)((char *)sLoc.aHash - (char *)&sLoc.aPgno[iLimit]); - assert( nByte>=0 ); - memset((void *)&sLoc.aPgno[iLimit], 0, nByte); - -#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT - /* Verify that the every entry in the mapping region is still reachable - ** via the hash table even after the cleanup. - */ - if( iLimit ){ - int j; /* Loop counter */ - int iKey; /* Hash key */ - for(j=0; j=0 ); - memset((void*)sLoc.aPgno, 0, nByte); - } - - /* If the entry in aPgno[] is already set, then the previous writer - ** must have exited unexpectedly in the middle of a transaction (after - ** writing one or more dirty pages to the WAL to free up memory). - ** Remove the remnants of that writers uncommitted transaction from - ** the hash-table before writing any new entries. - */ - if( sLoc.aPgno[idx-1] ){ - walCleanupHash(pWal); - assert( !sLoc.aPgno[idx-1] ); - } - - /* Write the aPgno[] array entry and the hash-table slot. */ - nCollide = idx; - for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ - if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; - } - sLoc.aPgno[idx-1] = iPage; - AtomicStore(&sLoc.aHash[iKey], (ht_slot)idx); - -#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT - /* Verify that the number of entries in the hash table exactly equals - ** the number of entries in the mapping region. - */ - { - int i; /* Loop counter */ - int nEntry = 0; /* Number of entries in the hash table */ - for(i=0; ickptLock==1 || pWal->ckptLock==0 ); - assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); - assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); - assert( pWal->writeLock ); - iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; - rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); - if( rc ){ - return rc; - } - - WALTRACE(("WAL%p: recovery begin...\n", pWal)); - - memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); - - rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); - if( rc!=SQLITE_OK ){ - goto recovery_error; - } - - if( nSize>WAL_HDRSIZE ){ - u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ - u32 *aPrivate = 0; /* Heap copy of *-shm hash being populated */ - u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ - int szFrame; /* Number of bytes in buffer aFrame[] */ - u8 *aData; /* Pointer to data part of aFrame buffer */ - int szPage; /* Page size according to the log */ - u32 magic; /* Magic value read from WAL header */ - u32 version; /* Magic value read from WAL header */ - int isValid; /* True if this frame is valid */ - u32 iPg; /* Current 32KB wal-index page */ - u32 iLastFrame; /* Last frame in wal, based on nSize alone */ - - /* Read in the WAL header. */ - rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); - if( rc!=SQLITE_OK ){ - goto recovery_error; - } - - /* If the database page size is not a power of two, or is greater than - ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid - ** data. Similarly, if the 'magic' value is invalid, ignore the whole - ** WAL file. - */ - magic = sqlite3Get4byte(&aBuf[0]); - szPage = sqlite3Get4byte(&aBuf[8]); - if( (magic&0xFFFFFFFE)!=WAL_MAGIC - || szPage&(szPage-1) - || szPage>SQLITE_MAX_PAGE_SIZE - || szPage<512 - ){ - goto finished; - } - pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); - pWal->szPage = szPage; - pWal->nCkpt = sqlite3Get4byte(&aBuf[12]); - memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); - - /* Verify that the WAL header checksum is correct */ - walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, - aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum - ); - if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) - || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) - ){ - goto finished; - } - - /* Verify that the version number on the WAL format is one that - ** are able to understand */ - version = sqlite3Get4byte(&aBuf[4]); - if( version!=WAL_MAX_VERSION ){ - rc = SQLITE_CANTOPEN_BKPT; - goto finished; - } - - /* Malloc a buffer to read frames into. */ - szFrame = szPage + WAL_FRAME_HDRSIZE; - aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ); - if( !aFrame ){ - rc = SQLITE_NOMEM; - goto recovery_error; - } - aData = &aFrame[WAL_FRAME_HDRSIZE]; - aPrivate = (u32*)&aData[szPage]; - - /* Read all frames from the log file. */ - iLastFrame = (nSize - WAL_HDRSIZE) / szFrame; - for(iPg=0; iPg<=(u32)walFramePage(iLastFrame); iPg++){ - u32 *aShare; - u32 iFrame; /* Index of last frame read */ - u32 iLast = MIN(iLastFrame, HASHTABLE_NPAGE_ONE+iPg*HASHTABLE_NPAGE); - u32 iFirst = 1 + (iPg==0?0:HASHTABLE_NPAGE_ONE+(iPg-1)*HASHTABLE_NPAGE); - u32 nHdr, nHdr32; - rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare); - assert( aShare!=0 || rc!=SQLITE_OK ); - if( aShare==0 ) break; - pWal->apWiData[iPg] = aPrivate; - - for(iFrame=iFirst; iFrame<=iLast; iFrame++){ - i64 iOffset = walFrameOffset(iFrame, szPage); - u32 pgno; /* Database page number for frame */ - u32 nTruncate; /* dbsize field from frame header */ - - /* Read and decode the next log frame. */ - rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); - if( rc!=SQLITE_OK ) break; - isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); - if( !isValid ) break; - rc = walIndexAppend(pWal, iFrame, pgno); - if( NEVER(rc!=SQLITE_OK) ) break; - - /* If nTruncate is non-zero, this is a commit record. */ - if( nTruncate ){ - pWal->hdr.mxFrame = iFrame; - pWal->hdr.nPage = nTruncate; - pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); - testcase( szPage<=32768 ); - testcase( szPage>=65536 ); - aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; - aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; - } - } - pWal->apWiData[iPg] = aShare; - nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0); - nHdr32 = nHdr / sizeof(u32); -#ifndef SQLITE_SAFER_WALINDEX_RECOVERY - /* Memcpy() should work fine here, on all reasonable implementations. - ** Technically, memcpy() might change the destination to some - ** intermediate value before setting to the final value, and that might - ** cause a concurrent reader to malfunction. Memcpy() is allowed to - ** do that, according to the spec, but no memcpy() implementation that - ** we know of actually does that, which is why we say that memcpy() - ** is safe for this. Memcpy() is certainly a lot faster. - */ - memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr); -#else - /* In the event that some platform is found for which memcpy() - ** changes the destination to some intermediate value before - ** setting the final value, this alternative copy routine is - ** provided. - */ - { - int i; - for(i=nHdr32; ihdr.aFrameCksum[0] = aFrameCksum[0]; - pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; - walIndexWriteHdr(pWal); - - /* Reset the checkpoint-header. This is safe because this thread is - ** currently holding locks that exclude all other writers and - ** checkpointers. Then set the values of read-mark slots 1 through N. - */ - pInfo = walCkptInfo(pWal); - pInfo->nBackfill = 0; - pInfo->nBackfillAttempted = pWal->hdr.mxFrame; - pInfo->aReadMark[0] = 0; - for(i=1; ihdr.mxFrame ){ - pInfo->aReadMark[i] = pWal->hdr.mxFrame; - }else{ - pInfo->aReadMark[i] = READMARK_NOT_USED; - } - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc!=SQLITE_BUSY ){ - goto recovery_error; - } - } - - /* If more than one frame was recovered from the log file, report an - ** event via sqlite3_log(). This is to help with identifying performance - ** problems caused by applications routinely shutting down without - ** checkpointing the log file. - */ - if( pWal->hdr.nPage ){ - sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, - "recovered %d frames from WAL file %s", - pWal->hdr.mxFrame, pWal->zWalName - ); - } - } - -recovery_error: - WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); - walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); - return rc; -} - -/* -** Close an open wal-index. -*/ -static void walIndexClose(Wal *pWal, int isDelete){ - if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){ - int i; - for(i=0; inWiData; i++){ - sqlite3_free((void *)pWal->apWiData[i]); - pWal->apWiData[i] = 0; - } - } - if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){ - sqlite3OsShmUnmap(pWal->pDbFd, isDelete); - } -} - -/* -** Open a connection to the WAL file zWalName. The database file must -** already be opened on connection pDbFd. The buffer that zWalName points -** to must remain valid for the lifetime of the returned Wal* handle. -** -** A SHARED lock should be held on the database file when this function -** is called. The purpose of this SHARED lock is to prevent any other -** client from unlinking the WAL or wal-index file. If another process -** were to do this just after this client opened one of these files, the -** system would be badly broken. -** -** If the log file is successfully opened, SQLITE_OK is returned and -** *ppWal is set to point to a new WAL handle. If an error occurs, -** an SQLite error code is returned and *ppWal is left unmodified. -*/ -int sqlite3WalOpen( - sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ - sqlite3_file *pDbFd, /* The open database file */ - const char *zWalName, /* Name of the WAL file */ - int bNoShm, /* True to run in heap-memory mode */ - i64 mxWalSize, /* Truncate WAL to this size on reset */ - Wal **ppWal /* OUT: Allocated Wal handle */ -){ - int rc; /* Return Code */ - Wal *pRet; /* Object to allocate and return */ - int flags; /* Flags passed to OsOpen() */ - - assert( zWalName && zWalName[0] ); - assert( pDbFd ); - - /* Verify the values of various constants. Any changes to the values - ** of these constants would result in an incompatible on-disk format - ** for the -shm file. Any change that causes one of these asserts to - ** fail is a backward compatibility problem, even if the change otherwise - ** works. - ** - ** This table also serves as a helpful cross-reference when trying to - ** interpret hex dumps of the -shm file. - */ - assert( 48 == sizeof(WalIndexHdr) ); - assert( 40 == sizeof(WalCkptInfo) ); - assert( 120 == WALINDEX_LOCK_OFFSET ); - assert( 136 == WALINDEX_HDR_SIZE ); - assert( 4096 == HASHTABLE_NPAGE ); - assert( 4062 == HASHTABLE_NPAGE_ONE ); - assert( 8192 == HASHTABLE_NSLOT ); - assert( 383 == HASHTABLE_HASH_1 ); - assert( 32768 == WALINDEX_PGSZ ); - assert( 8 == SQLITE_SHM_NLOCK ); - assert( 5 == WAL_NREADER ); - assert( 24 == WAL_FRAME_HDRSIZE ); - assert( 32 == WAL_HDRSIZE ); - assert( 120 == WALINDEX_LOCK_OFFSET + WAL_WRITE_LOCK ); - assert( 121 == WALINDEX_LOCK_OFFSET + WAL_CKPT_LOCK ); - assert( 122 == WALINDEX_LOCK_OFFSET + WAL_RECOVER_LOCK ); - assert( 123 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(0) ); - assert( 124 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(1) ); - assert( 125 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(2) ); - assert( 126 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(3) ); - assert( 127 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(4) ); - - /* In the amalgamation, the os_unix.c and os_win.c source files come before - ** this source file. Verify that the #defines of the locking byte offsets - ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value. - ** For that matter, if the lock offset ever changes from its initial design - ** value of 120, we need to know that so there is an assert() to check it. - */ -#ifdef WIN_SHM_BASE - assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET ); -#endif -#ifdef UNIX_SHM_BASE - assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); -#endif - - - /* Allocate an instance of struct Wal to return. */ - *ppWal = 0; - pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); - if( !pRet ){ - return SQLITE_NOMEM; - } - - pRet->pVfs = pVfs; - pRet->pWalFd = (sqlite3_file *)&pRet[1]; - pRet->pDbFd = pDbFd; - pRet->readLock = -1; - pRet->mxWalSize = mxWalSize; - pRet->zWalName = zWalName; - pRet->syncHeader = 1; - pRet->padToSectorBoundary = 1; - pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); - - /* Open file handle on the write-ahead log file. */ - flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); - rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); - if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ - pRet->readOnly = WAL_RDONLY; - } - - if( rc!=SQLITE_OK ){ - walIndexClose(pRet, 0); - sqlite3OsClose(pRet->pWalFd); - sqlite3_free(pRet); - }else{ - int iDC = sqlite3OsDeviceCharacteristics(pDbFd); - if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } - if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ - pRet->padToSectorBoundary = 0; - } - *ppWal = pRet; - WALTRACE(("WAL%d: opened\n", pRet)); - } - return rc; -} - -/* -** Change the size to which the WAL file is trucated on each reset. -*/ -void sqlite3WalLimit(Wal *pWal, i64 iLimit){ - if( pWal ) pWal->mxWalSize = iLimit; -} - -/* -** Find the smallest page number out of all pages held in the WAL that -** has not been returned by any prior invocation of this method on the -** same WalIterator object. Write into *piFrame the frame index where -** that page was last written into the WAL. Write into *piPage the page -** number. -** -** Return 0 on success. If there are no pages in the WAL with a page -** number larger than *piPage, then return 1. -*/ -static int walIteratorNext( - WalIterator *p, /* Iterator */ - u32 *piPage, /* OUT: The page number of the next page */ - u32 *piFrame /* OUT: Wal frame index of next page */ -){ - u32 iMin; /* Result pgno must be greater than iMin */ - u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ - int i; /* For looping through segments */ - - iMin = p->iPrior; - assert( iMin<0xffffffff ); - for(i=p->nSegment-1; i>=0; i--){ - struct WalSegment *pSegment = &p->aSegment[i]; - while( pSegment->iNextnEntry ){ - u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; - if( iPg>iMin ){ - if( iPgiZero + pSegment->aIndex[pSegment->iNext]; - } - break; - } - pSegment->iNext++; - } - } - - *piPage = p->iPrior = iRet; - return (iRet==0xFFFFFFFF); -} - -/* -** This function merges two sorted lists into a single sorted list. -** -** aLeft[] and aRight[] are arrays of indices. The sort key is -** aContent[aLeft[]] and aContent[aRight[]]. Upon entry, the following -** is guaranteed for all J0 && nRight>0 ); - while( iRight=nRight || aContent[aLeft[iLeft]]=nLeft || aContent[aLeft[iLeft]]>dbpage ); - assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); - } - - *paRight = aLeft; - *pnRight = iOut; - memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut); -} - -/* -** Sort the elements in list aList using aContent[] as the sort key. -** Remove elements with duplicate keys, preferring to keep the -** larger aList[] values. -** -** The aList[] entries are indices into aContent[]. The values in -** aList[] are to be sorted so that for all J0 ); - assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) ); - - for(iList=0; iListaList && p->nList<=(1<aList==&aList[iList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); - } - aSub[iSub].aList = aMerge; - aSub[iSub].nList = nMerge; - } - - for(iSub++; iSubnList<=(1<aList==&aList[nList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); - } - } - assert( aMerge==aList ); - *pnList = nMerge; - -#ifdef SQLITE_DEBUG - { - int i; - for(i=1; i<*pnList; i++){ - assert( aContent[aList[i]] > aContent[aList[i-1]] ); - } - } -#endif -} - -/* -** Free an iterator allocated by walIteratorInit(). -*/ -static void walIteratorFree(WalIterator *p){ - sqlite3_free(p); -} - -/* -** Construct a WalInterator object that can be used to loop over all -** pages in the WAL following frame nBackfill in ascending order. Frames -** nBackfill or earlier may be included - excluding them is an optimization -** only. The caller must hold the checkpoint lock. -** -** On success, make *pp point to the newly allocated WalInterator object -** return SQLITE_OK. Otherwise, return an error code. If this routine -** returns an error, the value of *pp is undefined. -** -** The calling routine should invoke walIteratorFree() to destroy the -** WalIterator object when it has finished with it. -*/ -static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){ - WalIterator *p; /* Return value */ - int nSegment; /* Number of segments to merge */ - u32 iLast; /* Last frame in log */ - sqlite3_int64 nByte; /* Number of bytes to allocate */ - int i; /* Iterator variable */ - ht_slot *aTmp; /* Temp space used by merge-sort */ - int rc = SQLITE_OK; /* Return Code */ - - /* This routine only runs while holding the checkpoint lock. And - ** it only runs if there is actually content in the log (mxFrame>0). - */ - assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); - iLast = pWal->hdr.mxFrame; - - /* Allocate space for the WalIterator object. */ - nSegment = walFramePage(iLast) + 1; - nByte = sizeof(WalIterator) - + (nSegment-1)*sizeof(struct WalSegment) - + iLast*sizeof(ht_slot); - p = (WalIterator *)sqlite3_malloc64(nByte); - if( !p ){ - return SQLITE_NOMEM; - } - memset(p, 0, nByte); - p->nSegment = nSegment; - - /* Allocate temporary space used by the merge-sort routine. This block - ** of memory will be freed before this function returns. - */ - aTmp = (ht_slot *)sqlite3_malloc64( - sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) - ); - if( !aTmp ){ - rc = SQLITE_NOMEM; - } - - for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && iaSegment[p->nSegment])[sLoc.iZero]; - sLoc.iZero++; - - for(j=0; jaSegment[i].iZero = sLoc.iZero; - p->aSegment[i].nEntry = nEntry; - p->aSegment[i].aIndex = aIndex; - p->aSegment[i].aPgno = (u32 *)sLoc.aPgno; - } - } - sqlite3_free(aTmp); - - if( rc!=SQLITE_OK ){ - walIteratorFree(p); - p = 0; - } - *pp = p; - return rc; -} - -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT -/* -** Attempt to enable blocking locks. Blocking locks are enabled only if (a) -** they are supported by the VFS, and (b) the database handle is configured -** with a busy-timeout. Return 1 if blocking locks are successfully enabled, -** or 0 otherwise. -*/ -static int walEnableBlocking(Wal *pWal){ - int res = 0; - if( pWal->db ){ - int tmout = pWal->db->busyTimeout; - if( tmout ){ - int rc; - rc = sqlite3OsFileControl( - pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout - ); - res = (rc==SQLITE_OK); - } - } - return res; -} - -/* -** Disable blocking locks. -*/ -static void walDisableBlocking(Wal *pWal){ - int tmout = 0; - sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout); -} - -/* -** If parameter bLock is true, attempt to enable blocking locks, take -** the WRITER lock, and then disable blocking locks. If blocking locks -** cannot be enabled, no attempt to obtain the WRITER lock is made. Return -** an SQLite error code if an error occurs, or SQLITE_OK otherwise. It is not -** an error if blocking locks can not be enabled. -** -** If the bLock parameter is false and the WRITER lock is held, release it. -*/ -int sqlite3WalWriteLock(Wal *pWal, int bLock){ - int rc = SQLITE_OK; - assert( pWal->readLock<0 || bLock==0 ); - if( bLock ){ - assert( pWal->db ); - if( walEnableBlocking(pWal) ){ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - if( rc==SQLITE_OK ){ - pWal->writeLock = 1; - } - walDisableBlocking(pWal); - } - }else if( pWal->writeLock ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - } - return rc; -} - -/* -** Set the database handle used to determine if blocking locks are required. -*/ -void sqlite3WalDb(Wal *pWal, sqlite3 *db){ - pWal->db = db; -} - -/* -** Take an exclusive WRITE lock. Blocking if so configured. -*/ -static int walLockWriter(Wal *pWal){ - int rc; - walEnableBlocking(pWal); - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - walDisableBlocking(pWal); - return rc; -} -#else -# define walEnableBlocking(x) 0 -# define walDisableBlocking(x) -# define walLockWriter(pWal) walLockExclusive((pWal), WAL_WRITE_LOCK, 1) -# define sqlite3WalDb(pWal, db) -#endif /* ifdef SQLITE_ENABLE_SETLK_TIMEOUT */ - - -/* -** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and -** n. If the attempt fails and parameter xBusy is not NULL, then it is a -** busy-handler function. Invoke it and retry the lock until either the -** lock is successfully obtained or the busy-handler returns 0. -*/ -static int walBusyLock( - Wal *pWal, /* WAL connection */ - int (*xBusy)(void*), /* Function to call when busy */ - void *pBusyArg, /* Context argument for xBusyHandler */ - int lockIdx, /* Offset of first byte to lock */ - int n /* Number of bytes to lock */ -){ - int rc; - do { - rc = walLockExclusive(pWal, lockIdx, n); - }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) ); -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - if( rc==SQLITE_BUSY_TIMEOUT ){ - walDisableBlocking(pWal); - rc = SQLITE_BUSY; - } -#endif - return rc; -} - -/* -** The cache of the wal-index header must be valid to call this function. -** Return the page-size in bytes used by the database. -*/ -static int walPagesize(Wal *pWal){ - return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); -} - -/* -** The following is guaranteed when this function is called: -** -** a) the WRITER lock is held, -** b) the entire log file has been checkpointed, and -** c) any existing readers are reading exclusively from the database -** file - there are no readers that may attempt to read a frame from -** the log file. -** -** This function updates the shared-memory structures so that the next -** client to write to the database (which may be this one) does so by -** writing frames into the start of the log file. -** -** The value of parameter salt1 is used as the aSalt[1] value in the -** new wal-index header. It should be passed a pseudo-random value (i.e. -** one obtained from sqlite3_randomness()). -*/ -static void walRestartHdr(Wal *pWal, u32 salt1){ - volatile WalCkptInfo *pInfo = walCkptInfo(pWal); - int i; /* Loop counter */ - u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ - pWal->nCkpt++; - pWal->hdr.mxFrame = 0; - sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); - memcpy(&pWal->hdr.aSalt[1], &salt1, 4); - walIndexWriteHdr(pWal); - AtomicStore(&pInfo->nBackfill, 0); - pInfo->nBackfillAttempted = 0; - pInfo->aReadMark[1] = 0; - for(i=2; iaReadMark[i] = READMARK_NOT_USED; - assert( pInfo->aReadMark[0]==0 ); -} - -/* -** Copy as much content as we can from the WAL back into the database file -** in response to an sqlite3_wal_checkpoint() request or the equivalent. -** -** The amount of information copies from WAL to database might be limited -** by active readers. This routine will never overwrite a database page -** that a concurrent reader might be using. -** -** All I/O barrier operations (a.k.a fsyncs) occur in this routine when -** SQLite is in WAL-mode in synchronous=NORMAL. That means that if -** checkpoints are always run by a background thread or background -** process, foreground threads will never block on a lengthy fsync call. -** -** Fsync is called on the WAL before writing content out of the WAL and -** into the database. This ensures that if the new content is persistent -** in the WAL and can be recovered following a power-loss or hard reset. -** -** Fsync is also called on the database file if (and only if) the entire -** WAL content is copied into the database file. This second fsync makes -** it safe to delete the WAL since the new content will persist in the -** database file. -** -** This routine uses and updates the nBackfill field of the wal-index header. -** This is the only routine that will increase the value of nBackfill. -** (A WAL reset or recovery will revert nBackfill to zero, but not increase -** its value.) -** -** The caller must be holding sufficient locks to ensure that no other -** checkpoint is running (in any other thread or process) at the same -** time. -*/ -static int walCheckpoint( - Wal *pWal, /* Wal connection */ - sqlite3 *db, /* Check for interrupts on this handle */ - int eMode, /* One of PASSIVE, FULL or RESTART */ - int (*xBusy)(void*), /* Function to call when busy */ - void *pBusyArg, /* Context argument for xBusyHandler */ - int sync_flags, /* Flags for OsSync() (or 0) */ - u8 *zBuf /* Temporary buffer to use */ -){ - int rc = SQLITE_OK; /* Return code */ - int szPage; /* Database page-size */ - WalIterator *pIter = 0; /* Wal iterator context */ - u32 iDbpage = 0; /* Next database page to write */ - u32 iFrame = 0; /* Wal frame containing data for iDbpage */ - u32 mxSafeFrame; /* Max frame that can be backfilled */ - u32 mxPage; /* Max database page to write */ - int i; /* Loop counter */ - volatile WalCkptInfo *pInfo; /* The checkpoint status information */ - - szPage = walPagesize(pWal); - testcase( szPage<=32768 ); - testcase( szPage>=65536 ); - pInfo = walCkptInfo(pWal); - if( pInfo->nBackfillhdr.mxFrame ){ - - /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked - ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ - assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); - - /* Compute in mxSafeFrame the index of the last frame of the WAL that is - ** safe to write into the database. Frames beyond mxSafeFrame might - ** overwrite database pages that are in use by active readers and thus - ** cannot be backfilled from the WAL. - */ - mxSafeFrame = pWal->hdr.mxFrame; - mxPage = pWal->hdr.nPage; - for(i=1; iaReadMark+i); - if( mxSafeFrame>y ){ - assert( y<=pWal->hdr.mxFrame ); - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED); - AtomicStore(pInfo->aReadMark+i, iMark); - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc==SQLITE_BUSY ){ - mxSafeFrame = y; - xBusy = 0; - }else{ - goto walcheckpoint_out; - } - } - } - - /* Allocate the iterator */ - if( pInfo->nBackfillnBackfill, &pIter); - assert( rc==SQLITE_OK || pIter==0 ); - } - - if( pIter - && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK - ){ - u32 nBackfill = pInfo->nBackfill; - - pInfo->nBackfillAttempted = mxSafeFrame; - - /* Sync the WAL to disk */ - rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); - - /* If the database may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. - */ - if( rc==SQLITE_OK ){ - i64 nReq = ((i64)mxPage * szPage); - i64 nSize; /* Current size of database file */ - sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_START, 0); - rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); - if( rc==SQLITE_OK && nSizehdr.mxFrame*szPage)pDbFd, SQLITE_FCNTL_SIZE_HINT,&nReq); - } - } - - } - - /* Iterate through the contents of the WAL, copying data to the db file */ - while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ - i64 iOffset; - assert( walFramePgno(pWal, iFrame)==iDbpage ); - if( AtomicLoad(&db->u1.isInterrupted) ){ - rc = db->mallocFailed ? SQLITE_NOMEM : SQLITE_INTERRUPT; - break; - } - if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ - continue; - } - iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ - rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); - if( rc!=SQLITE_OK ) break; - iOffset = (iDbpage-1)*(i64)szPage; - testcase( IS_BIG_INT(iOffset) ); - rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); - if( rc!=SQLITE_OK ) break; - } - sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_DONE, 0); - - /* If work was actually accomplished... */ - if( rc==SQLITE_OK ){ - if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ - i64 szDb = pWal->hdr.nPage*(i64)szPage; - testcase( IS_BIG_INT(szDb) ); - rc = sqlite3OsTruncate(pWal->pDbFd, szDb); - if( rc==SQLITE_OK ){ - rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags)); - } - } - if( rc==SQLITE_OK ){ - AtomicStore(&pInfo->nBackfill, mxSafeFrame); - } - } - - /* Release the reader lock held while backfilling */ - walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); - } - - if( rc==SQLITE_BUSY ){ - /* Reset the return code so as not to report a checkpoint failure - ** just because there are active readers. */ - rc = SQLITE_OK; - } - } - - /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the - ** entire wal file has been copied into the database file, then block - ** until all readers have finished using the wal file. This ensures that - ** the next process to write to the database restarts the wal file. - */ - if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ - assert( pWal->writeLock ); - if( pInfo->nBackfillhdr.mxFrame ){ - rc = SQLITE_BUSY; - }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ - u32 salt1; - sqlite3_randomness(4, &salt1); - assert( pInfo->nBackfill==pWal->hdr.mxFrame ); - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); - if( rc==SQLITE_OK ){ - if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ - /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as - ** SQLITE_CHECKPOINT_RESTART with the addition that it also - ** truncates the log file to zero bytes just prior to a - ** successful return. - ** - ** In theory, it might be safe to do this without updating the - ** wal-index header in shared memory, as all subsequent reader or - ** writer clients should see that the entire log file has been - ** checkpointed and behave accordingly. This seems unsafe though, - ** as it would leave the system in a state where the contents of - ** the wal-index header do not match the contents of the - ** file-system. To avoid this, update the wal-index header to - ** indicate that the log file contains zero valid frames. */ - walRestartHdr(pWal, salt1); - rc = sqlite3OsTruncate(pWal->pWalFd, 0); - } - walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); - } - } - } - - walcheckpoint_out: - walIteratorFree(pIter); - return rc; -} - -/* -** If the WAL file is currently larger than nMax bytes in size, truncate -** it to exactly nMax bytes. If an error occurs while doing so, ignore it. -*/ -static void walLimitSize(Wal *pWal, i64 nMax){ - i64 sz; - int rx; - sqlite3BeginBenignMalloc(); - rx = sqlite3OsFileSize(pWal->pWalFd, &sz); - if( rx==SQLITE_OK && (sz > nMax ) ){ - rx = sqlite3OsTruncate(pWal->pWalFd, nMax); - } - sqlite3EndBenignMalloc(); - if( rx ){ - sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); - } -} - -/* -** Close a connection to a log file. -*/ -int sqlite3WalClose( - Wal *pWal, /* Wal to close */ - sqlite3 *db, /* For interrupt flag */ - int sync_flags, /* Flags to pass to OsSync() (or 0) */ - int nBuf, - u8 *zBuf /* Buffer of at least nBuf bytes */ -){ - int rc = SQLITE_OK; - if( pWal ){ - int isDelete = 0; /* True to unlink wal and wal-index files */ - - /* If an EXCLUSIVE lock can be obtained on the database file (using the - ** ordinary, rollback-mode locking methods, this guarantees that the - ** connection associated with this log file is the only connection to - ** the database. In this case checkpoint the database and unlink both - ** the wal and wal-index files. - ** - ** The EXCLUSIVE lock is not released before returning. - */ - if( zBuf!=0 - && SQLITE_OK==(rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE)) - ){ - if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ - pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; - } - rc = sqlite3WalCheckpoint(pWal, db, - SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 - ); - if( rc==SQLITE_OK ){ - int bPersist = -1; - sqlite3OsFileControlHint( - pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist - ); - if( bPersist!=1 ){ - /* Try to delete the WAL file if the checkpoint completed and - ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal - ** mode (!bPersist) */ - isDelete = 1; - }else if( pWal->mxWalSize>=0 ){ - /* Try to truncate the WAL file to zero bytes if the checkpoint - ** completed and fsynced (rc==SQLITE_OK) and we are in persistent - ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a - ** non-negative value (pWal->mxWalSize>=0). Note that we truncate - ** to zero bytes as truncating to the journal_size_limit might - ** leave a corrupt WAL file on disk. */ - walLimitSize(pWal, 0); - } - } - } - - walIndexClose(pWal, isDelete); - sqlite3OsClose(pWal->pWalFd); - if( isDelete ){ - sqlite3BeginBenignMalloc(); - sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); - sqlite3EndBenignMalloc(); - } - WALTRACE(("WAL%p: closed\n", pWal)); - sqlite3_free((void *)pWal->apWiData); - sqlite3_free(pWal); - } - return rc; -} - -/* -** Try to read the wal-index header. Return 0 on success and 1 if -** there is a problem. -** -** The wal-index is in shared memory. Another thread or process might -** be writing the header at the same time this procedure is trying to -** read it, which might result in inconsistency. A dirty read is detected -** by verifying that both copies of the header are the same and also by -** a checksum on the header. -** -** If and only if the read is consistent and the header is different from -** pWal->hdr, then pWal->hdr is updated to the content of the new header -** and *pChanged is set to 1. -** -** If the checksum cannot be verified return non-zero. If the header -** is read successfully and the checksum verified, return zero. -*/ -static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){ - u32 aCksum[2]; /* Checksum on the header content */ - WalIndexHdr h1, h2; /* Two copies of the header content */ - WalIndexHdr volatile *aHdr; /* Header in shared memory */ - - /* The first page of the wal-index must be mapped at this point. */ - assert( pWal->nWiData>0 && pWal->apWiData[0] ); - - /* Read the header. This might happen concurrently with a write to the - ** same area of shared memory on a different CPU in a SMP, - ** meaning it is possible that an inconsistent snapshot is read - ** from the file. If this happens, return non-zero. - ** - ** tag-20200519-1: - ** There are two copies of the header at the beginning of the wal-index. - ** When reading, read [0] first then [1]. Writes are in the reverse order. - ** Memory barriers are used to prevent the compiler or the hardware from - ** reordering the reads and writes. TSAN and similar tools can sometimes - ** give false-positive warnings about these accesses because the tools do not - ** account for the double-read and the memory barrier. The use of mutexes - ** here would be problematic as the memory being accessed is potentially - ** shared among multiple processes and not all mutex implementions work - ** reliably in that environment. - */ - aHdr = walIndexHdr(pWal); - memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); /* Possible TSAN false-positive */ - walShmBarrier(pWal); - memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); - - if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ - return 1; /* Dirty read */ - } - if( h1.isInit==0 ){ - return 1; /* Malformed header - probably all zeros */ - } - walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); - if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){ - return 1; /* Checksum does not match */ - } - - if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){ - *pChanged = 1; - memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr)); - pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); - testcase( pWal->szPage<=32768 ); - testcase( pWal->szPage>=65536 ); - } - - /* The header was successfully read. Return zero. */ - return 0; -} - -/* -** This is the value that walTryBeginRead returns when it needs to -** be retried. -*/ -#define WAL_RETRY (-1) - -/* -** Read the wal-index header from the wal-index and into pWal->hdr. -** If the wal-header appears to be corrupt, try to reconstruct the -** wal-index from the WAL before returning. -** -** Set *pChanged to 1 if the wal-index header value in pWal->hdr is -** changed by this operation. If pWal->hdr is unchanged, set *pChanged -** to 0. -** -** If the wal-index header is successfully read, return SQLITE_OK. -** Otherwise an SQLite error code. -*/ -static int walIndexReadHdr(Wal *pWal, int *pChanged){ - int rc; /* Return code */ - int badHdr; /* True if a header read failed */ - volatile u32 *page0; /* Chunk of wal-index containing header */ - - /* Ensure that page 0 of the wal-index (the page that contains the - ** wal-index header) is mapped. Return early if an error occurs here. - */ - assert( pChanged ); - rc = walIndexPage(pWal, 0, &page0); - if( rc!=SQLITE_OK ){ - assert( rc!=SQLITE_READONLY ); /* READONLY changed to OK in walIndexPage */ - if( rc==SQLITE_READONLY_CANTINIT ){ - /* The SQLITE_READONLY_CANTINIT return means that the shared-memory - ** was openable but is not writable, and this thread is unable to - ** confirm that another write-capable connection has the shared-memory - ** open, and hence the content of the shared-memory is unreliable, - ** since the shared-memory might be inconsistent with the WAL file - ** and there is no writer on hand to fix it. */ - assert( page0==0 ); - assert( pWal->writeLock==0 ); - assert( pWal->readOnly & WAL_SHM_RDONLY ); - pWal->bShmUnreliable = 1; - pWal->exclusiveMode = WAL_HEAPMEMORY_MODE; - *pChanged = 1; - }else{ - return rc; /* Any other non-OK return is just an error */ - } - }else{ - /* page0 can be NULL if the SHM is zero bytes in size and pWal->writeLock - ** is zero, which prevents the SHM from growing */ - testcase( page0!=0 ); - } - assert( page0!=0 || pWal->writeLock==0 ); - - /* If the first page of the wal-index has been mapped, try to read the - ** wal-index header immediately, without holding any lock. This usually - ** works, but may fail if the wal-index header is corrupt or currently - ** being modified by another thread or process. - */ - badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); - - /* If the first attempt failed, it might have been due to a race - ** with a writer. So get a WRITE lock and try again. - */ - if( badHdr ){ - if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){ - if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ - walUnlockShared(pWal, WAL_WRITE_LOCK); - rc = SQLITE_READONLY_RECOVERY; - } - }else{ - int bWriteLock = pWal->writeLock; - if( bWriteLock || SQLITE_OK==(rc = walLockWriter(pWal)) ){ - pWal->writeLock = 1; - if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ - badHdr = walIndexTryHdr(pWal, pChanged); - if( badHdr ){ - /* If the wal-index header is still malformed even while holding - ** a WRITE lock, it can only mean that the header is corrupted and - ** needs to be reconstructed. So run recovery to do exactly that. - */ - rc = walIndexRecover(pWal); - *pChanged = 1; - } - } - if( bWriteLock==0 ){ - pWal->writeLock = 0; - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - } - } - } - } - - /* If the header is read successfully, check the version number to make - ** sure the wal-index was not constructed with some future format that - ** this version of SQLite cannot understand. - */ - if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ - rc = SQLITE_CANTOPEN_BKPT; - } - if( pWal->bShmUnreliable ){ - if( rc!=SQLITE_OK ){ - walIndexClose(pWal, 0); - pWal->bShmUnreliable = 0; - assert( pWal->nWiData>0 && pWal->apWiData[0]==0 ); - /* walIndexRecover() might have returned SHORT_READ if a concurrent - ** writer truncated the WAL out from under it. If that happens, it - ** indicates that a writer has fixed the SHM file for us, so retry */ - if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY; - } - pWal->exclusiveMode = WAL_NORMAL_MODE; - } - - return rc; -} - -/* -** Open a transaction in a connection where the shared-memory is read-only -** and where we cannot verify that there is a separate write-capable connection -** on hand to keep the shared-memory up-to-date with the WAL file. -** -** This can happen, for example, when the shared-memory is implemented by -** memory-mapping a *-shm file, where a prior writer has shut down and -** left the *-shm file on disk, and now the present connection is trying -** to use that database but lacks write permission on the *-shm file. -** Other scenarios are also possible, depending on the VFS implementation. -** -** Precondition: -** -** The *-wal file has been read and an appropriate wal-index has been -** constructed in pWal->apWiData[] using heap memory instead of shared -** memory. -** -** If this function returns SQLITE_OK, then the read transaction has -** been successfully opened. In this case output variable (*pChanged) -** is set to true before returning if the caller should discard the -** contents of the page cache before proceeding. Or, if it returns -** WAL_RETRY, then the heap memory wal-index has been discarded and -** the caller should retry opening the read transaction from the -** beginning (including attempting to map the *-shm file). -** -** If an error occurs, an SQLite error code is returned. -*/ -static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ - i64 szWal; /* Size of wal file on disk in bytes */ - i64 iOffset; /* Current offset when reading wal file */ - u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ - u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ - int szFrame; /* Number of bytes in buffer aFrame[] */ - u8 *aData; /* Pointer to data part of aFrame buffer */ - volatile void *pDummy; /* Dummy argument for xShmMap */ - int rc; /* Return code */ - u32 aSaveCksum[2]; /* Saved copy of pWal->hdr.aFrameCksum */ - - assert( pWal->bShmUnreliable ); - assert( pWal->readOnly & WAL_SHM_RDONLY ); - assert( pWal->nWiData>0 && pWal->apWiData[0] ); - - /* Take WAL_READ_LOCK(0). This has the effect of preventing any - ** writers from running a checkpoint, but does not stop them - ** from running recovery. */ - rc = walLockShared(pWal, WAL_READ_LOCK(0)); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_BUSY ) rc = WAL_RETRY; - goto begin_unreliable_shm_out; - } - pWal->readLock = 0; - - /* Check to see if a separate writer has attached to the shared-memory area, - ** thus making the shared-memory "reliable" again. Do this by invoking - ** the xShmMap() routine of the VFS and looking to see if the return - ** is SQLITE_READONLY instead of SQLITE_READONLY_CANTINIT. - ** - ** If the shared-memory is now "reliable" return WAL_RETRY, which will - ** cause the heap-memory WAL-index to be discarded and the actual - ** shared memory to be used in its place. - ** - ** This step is important because, even though this connection is holding - ** the WAL_READ_LOCK(0) which prevents a checkpoint, a writer might - ** have already checkpointed the WAL file and, while the current - ** is active, wrap the WAL and start overwriting frames that this - ** process wants to use. - ** - ** Once sqlite3OsShmMap() has been called for an sqlite3_file and has - ** returned any SQLITE_READONLY value, it must return only SQLITE_READONLY - ** or SQLITE_READONLY_CANTINIT or some error for all subsequent invocations, - ** even if some external agent does a "chmod" to make the shared-memory - ** writable by us, until sqlite3OsShmUnmap() has been called. - ** This is a requirement on the VFS implementation. - */ - rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); - assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */ - if( rc!=SQLITE_READONLY_CANTINIT ){ - rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc); - goto begin_unreliable_shm_out; - } - - /* We reach this point only if the real shared-memory is still unreliable. - ** Assume the in-memory WAL-index substitute is correct and load it - ** into pWal->hdr. - */ - memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); - - /* Make sure some writer hasn't come in and changed the WAL file out - ** from under us, then disconnected, while we were not looking. - */ - rc = sqlite3OsFileSize(pWal->pWalFd, &szWal); - if( rc!=SQLITE_OK ){ - goto begin_unreliable_shm_out; - } - if( szWalhdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY); - goto begin_unreliable_shm_out; - } - - /* Check the salt keys at the start of the wal file still match. */ - rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); - if( rc!=SQLITE_OK ){ - goto begin_unreliable_shm_out; - } - if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){ - /* Some writer has wrapped the WAL file while we were not looking. - ** Return WAL_RETRY which will cause the in-memory WAL-index to be - ** rebuilt. */ - rc = WAL_RETRY; - goto begin_unreliable_shm_out; - } - - /* Allocate a buffer to read frames into */ - szFrame = pWal->hdr.szPage + WAL_FRAME_HDRSIZE; - aFrame = (u8 *)sqlite3_malloc64(szFrame); - if( aFrame==0 ){ - rc = SQLITE_NOMEM; - goto begin_unreliable_shm_out; - } - aData = &aFrame[WAL_FRAME_HDRSIZE]; - - /* Check to see if a complete transaction has been appended to the - ** wal file since the heap-memory wal-index was created. If so, the - ** heap-memory wal-index is discarded and WAL_RETRY returned to - ** the caller. */ - aSaveCksum[0] = pWal->hdr.aFrameCksum[0]; - aSaveCksum[1] = pWal->hdr.aFrameCksum[1]; - for(iOffset=walFrameOffset(pWal->hdr.mxFrame+1, pWal->hdr.szPage); - iOffset+szFrame<=szWal; - iOffset+=szFrame - ){ - u32 pgno; /* Database page number for frame */ - u32 nTruncate; /* dbsize field from frame header */ - - /* Read and decode the next log frame. */ - rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); - if( rc!=SQLITE_OK ) break; - if( !walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame) ) break; - - /* If nTruncate is non-zero, then a complete transaction has been - ** appended to this wal file. Set rc to WAL_RETRY and break out of - ** the loop. */ - if( nTruncate ){ - rc = WAL_RETRY; - break; - } - } - pWal->hdr.aFrameCksum[0] = aSaveCksum[0]; - pWal->hdr.aFrameCksum[1] = aSaveCksum[1]; - - begin_unreliable_shm_out: - sqlite3_free(aFrame); - if( rc!=SQLITE_OK ){ - int i; - for(i=0; inWiData; i++){ - sqlite3_free((void*)pWal->apWiData[i]); - pWal->apWiData[i] = 0; - } - pWal->bShmUnreliable = 0; - sqlite3WalEndReadTransaction(pWal); - *pChanged = 1; - } - return rc; -} - -/* -** Attempt to start a read transaction. This might fail due to a race or -** other transient condition. When that happens, it returns WAL_RETRY to -** indicate to the caller that it is safe to retry immediately. -** -** On success return SQLITE_OK. On a permanent failure (such an -** I/O error or an SQLITE_BUSY because another process is running -** recovery) return a positive error code. -** -** The useWal parameter is true to force the use of the WAL and disable -** the case where the WAL is bypassed because it has been completely -** checkpointed. If useWal==0 then this routine calls walIndexReadHdr() -** to make a copy of the wal-index header into pWal->hdr. If the -** wal-index header has changed, *pChanged is set to 1 (as an indication -** to the caller that the local page cache is obsolete and needs to be -** flushed.) When useWal==1, the wal-index header is assumed to already -** be loaded and the pChanged parameter is unused. -** -** The caller must set the cnt parameter to the number of prior calls to -** this routine during the current read attempt that returned WAL_RETRY. -** This routine will start taking more aggressive measures to clear the -** race conditions after multiple WAL_RETRY returns, and after an excessive -** number of errors will ultimately return SQLITE_PROTOCOL. The -** SQLITE_PROTOCOL return indicates that some other process has gone rogue -** and is not honoring the locking protocol. There is a vanishingly small -** chance that SQLITE_PROTOCOL could be returned because of a run of really -** bad luck when there is lots of contention for the wal-index, but that -** possibility is so small that it can be safely neglected, we believe. -** -** On success, this routine obtains a read lock on -** WAL_READ_LOCK(pWal->readLock). The pWal->readLock integer is -** in the range 0 <= pWal->readLock < WAL_NREADER. If pWal->readLock==(-1) -** that means the Wal does not hold any read lock. The reader must not -** access any database page that is modified by a WAL frame up to and -** including frame number aReadMark[pWal->readLock]. The reader will -** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0 -** Or if pWal->readLock==0, then the reader will ignore the WAL -** completely and get all content directly from the database file. -** If the useWal parameter is 1 then the WAL will never be ignored and -** this routine will always set pWal->readLock>0 on success. -** When the read transaction is completed, the caller must release the -** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1. -** -** This routine uses the nBackfill and aReadMark[] fields of the header -** to select a particular WAL_READ_LOCK() that strives to let the -** checkpoint process do as much work as possible. This routine might -** update values of the aReadMark[] array in the header, but if it does -** so it takes care to hold an exclusive lock on the corresponding -** WAL_READ_LOCK() while changing values. -*/ -static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ - volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ - u32 mxReadMark; /* Largest aReadMark[] value */ - int mxI; /* Index of largest aReadMark[] value */ - int i; /* Loop counter */ - int rc = SQLITE_OK; /* Return code */ - u32 mxFrame; /* Wal frame to lock to */ - - assert( pWal->readLock<0 ); /* Not currently locked */ - - /* useWal may only be set for read/write connections */ - assert( (pWal->readOnly & WAL_SHM_RDONLY)==0 || useWal==0 ); - - /* Take steps to avoid spinning forever if there is a protocol error. - ** - ** Circumstances that cause a RETRY should only last for the briefest - ** instances of time. No I/O or other system calls are done while the - ** locks are held, so the locks should not be held for very long. But - ** if we are unlucky, another process that is holding a lock might get - ** paged out or take a page-fault that is time-consuming to resolve, - ** during the few nanoseconds that it is holding the lock. In that case, - ** it might take longer than normal for the lock to free. - ** - ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few - ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this - ** is more of a scheduler yield than an actual delay. But on the 10th - ** an subsequent retries, the delays start becoming longer and longer, - ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. - ** The total delay time before giving up is less than 10 seconds. - */ - if( cnt>5 ){ - int nDelay = 1; /* Pause time in microseconds */ - if( cnt>100 ){ - VVA_ONLY( pWal->lockError = 1; ) - return SQLITE_PROTOCOL; - } - if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; - sqlite3OsSleep(pWal->pVfs, nDelay); - } - - if( !useWal ){ - assert( rc==SQLITE_OK ); - if( pWal->bShmUnreliable==0 ){ - rc = walIndexReadHdr(pWal, pChanged); - } - if( rc==SQLITE_BUSY ){ - /* If there is not a recovery running in another thread or process - ** then convert BUSY errors to WAL_RETRY. If recovery is known to - ** be running, convert BUSY to BUSY_RECOVERY. There is a race here - ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY - ** would be technically correct. But the race is benign since with - ** WAL_RETRY this routine will be called again and will probably be - ** right on the second iteration. - */ - if( pWal->apWiData[0]==0 ){ - /* This branch is taken when the xShmMap() method returns SQLITE_BUSY. - ** We assume this is a transient condition, so return WAL_RETRY. The - ** xShmMap() implementation used by the default unix and win32 VFS - ** modules may return SQLITE_BUSY due to a race condition in the - ** code that determines whether or not the shared-memory region - ** must be zeroed before the requested page is returned. - */ - rc = WAL_RETRY; - }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){ - walUnlockShared(pWal, WAL_RECOVER_LOCK); - rc = WAL_RETRY; - }else if( rc==SQLITE_BUSY ){ - rc = SQLITE_BUSY_RECOVERY; - } - } - if( rc!=SQLITE_OK ){ - return rc; - } - else if( pWal->bShmUnreliable ){ - return walBeginShmUnreliable(pWal, pChanged); - } - } - - assert( pWal->nWiData>0 ); - assert( pWal->apWiData[0]!=0 ); - pInfo = walCkptInfo(pWal); - if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame -#ifdef SQLITE_ENABLE_SNAPSHOT - && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0) -#endif - ){ - /* The WAL has been completely backfilled (or it is empty). - ** and can be safely ignored. - */ - rc = walLockShared(pWal, WAL_READ_LOCK(0)); - walShmBarrier(pWal); - if( rc==SQLITE_OK ){ - if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ - /* It is not safe to allow the reader to continue here if frames - ** may have been appended to the log before READ_LOCK(0) was obtained. - ** When holding READ_LOCK(0), the reader ignores the entire log file, - ** which implies that the database file contains a trustworthy - ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from - ** happening, this is usually correct. - ** - ** However, if frames have been appended to the log (or if the log - ** is wrapped and written for that matter) before the READ_LOCK(0) - ** is obtained, that is not necessarily true. A checkpointer may - ** have started to backfill the appended frames but crashed before - ** it finished. Leaving a corrupt image in the database file. - */ - walUnlockShared(pWal, WAL_READ_LOCK(0)); - return WAL_RETRY; - } - pWal->readLock = 0; - return SQLITE_OK; - }else if( rc!=SQLITE_BUSY ){ - return rc; - } - } - - /* If we get this far, it means that the reader will want to use - ** the WAL to get at content from recent commits. The job now is - ** to select one of the aReadMark[] entries that is closest to - ** but not exceeding pWal->hdr.mxFrame and lock that entry. - */ - mxReadMark = 0; - mxI = 0; - mxFrame = pWal->hdr.mxFrame; -#ifdef SQLITE_ENABLE_SNAPSHOT - if( pWal->pSnapshot && pWal->pSnapshot->mxFramepSnapshot->mxFrame; - } -#endif - for(i=1; iaReadMark+i); - if( mxReadMark<=thisMark && thisMark<=mxFrame ){ - assert( thisMark!=READMARK_NOT_USED ); - mxReadMark = thisMark; - mxI = i; - } - } - if( (pWal->readOnly & WAL_SHM_RDONLY)==0 - && (mxReadMarkaReadMark+i,mxFrame); - mxReadMark = mxFrame; - mxI = i; - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - break; - }else if( rc!=SQLITE_BUSY ){ - return rc; - } - } - } - if( mxI==0 ){ - assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); - return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT; - } - - rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); - if( rc ){ - return rc==SQLITE_BUSY ? WAL_RETRY : rc; - } - /* Now that the read-lock has been obtained, check that neither the - ** value in the aReadMark[] array or the contents of the wal-index - ** header have changed. - ** - ** It is necessary to check that the wal-index header did not change - ** between the time it was read and when the shared-lock was obtained - ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility - ** that the log file may have been wrapped by a writer, or that frames - ** that occur later in the log than pWal->hdr.mxFrame may have been - ** copied into the database by a checkpointer. If either of these things - ** happened, then reading the database with the current value of - ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry - ** instead. - ** - ** Before checking that the live wal-index header has not changed - ** since it was read, set Wal.minFrame to the first frame in the wal - ** file that has not yet been checkpointed. This client will not need - ** to read any frames earlier than minFrame from the wal file - they - ** can be safely read directly from the database file. - ** - ** Because a ShmBarrier() call is made between taking the copy of - ** nBackfill and checking that the wal-header in shared-memory still - ** matches the one cached in pWal->hdr, it is guaranteed that the - ** checkpointer that set nBackfill was not working with a wal-index - ** header newer than that cached in pWal->hdr. If it were, that could - ** cause a problem. The checkpointer could omit to checkpoint - ** a version of page X that lies before pWal->minFrame (call that version - ** A) on the basis that there is a newer version (version B) of the same - ** page later in the wal file. But if version B happens to like past - ** frame pWal->hdr.mxFrame - then the client would incorrectly assume - ** that it can read version A from the database file. However, since - ** we can guarantee that the checkpointer that set nBackfill could not - ** see any pages past pWal->hdr.mxFrame, this problem does not come up. - */ - pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; - walShmBarrier(pWal); - if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark - || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) - ){ - walUnlockShared(pWal, WAL_READ_LOCK(mxI)); - return WAL_RETRY; - }else{ - assert( mxReadMark<=pWal->hdr.mxFrame ); - pWal->readLock = (i16)mxI; - } - return rc; -} - -#ifdef SQLITE_ENABLE_SNAPSHOT -/* -** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted -** variable so that older snapshots can be accessed. To do this, loop -** through all wal frames from nBackfillAttempted to (nBackfill+1), -** comparing their content to the corresponding page with the database -** file, if any. Set nBackfillAttempted to the frame number of the -** first frame for which the wal file content matches the db file. -** -** This is only really safe if the file-system is such that any page -** writes made by earlier checkpointers were atomic operations, which -** is not always true. It is also possible that nBackfillAttempted -** may be left set to a value larger than expected, if a wal frame -** contains content that duplicate of an earlier version of the same -** page. -** -** SQLITE_OK is returned if successful, or an SQLite error code if an -** error occurs. It is not an error if nBackfillAttempted cannot be -** decreased at all. -*/ -int sqlite3WalSnapshotRecover(Wal *pWal){ - int rc; - - assert( pWal->readLock>=0 ); - rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); - if( rc==SQLITE_OK ){ - volatile WalCkptInfo *pInfo = walCkptInfo(pWal); - int szPage = (int)pWal->szPage; - i64 szDb; /* Size of db file in bytes */ - - rc = sqlite3OsFileSize(pWal->pDbFd, &szDb); - if( rc==SQLITE_OK ){ - void *pBuf1 = sqlite3_malloc(szPage); - void *pBuf2 = sqlite3_malloc(szPage); - if( pBuf1==0 || pBuf2==0 ){ - rc = SQLITE_NOMEM; - }else{ - u32 i = pInfo->nBackfillAttempted; - for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){ - WalHashLoc sLoc; /* Hash table location */ - u32 pgno; /* Page number in db file */ - i64 iDbOff; /* Offset of db file entry */ - i64 iWalOff; /* Offset of wal file entry */ - - rc = walHashGet(pWal, walFramePage(i), &sLoc); - if( rc!=SQLITE_OK ) break; - assert( i - sLoc.iZero - 1 >=0 ); - pgno = sLoc.aPgno[i-sLoc.iZero-1]; - iDbOff = (i64)(pgno-1) * szPage; - - if( iDbOff+szPage<=szDb ){ - iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE; - rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff); - - if( rc==SQLITE_OK ){ - rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff); - } - - if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){ - break; - } - } - - pInfo->nBackfillAttempted = i-1; - } - } - - sqlite3_free(pBuf1); - sqlite3_free(pBuf2); - } - walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); - } - - return rc; -} -#endif /* SQLITE_ENABLE_SNAPSHOT */ - -/* -** Begin a read transaction on the database. -** -** This routine used to be called sqlite3OpenSnapshot() and with good reason: -** it takes a snapshot of the state of the WAL and wal-index for the current -** instant in time. The current thread will continue to use this snapshot. -** Other threads might append new content to the WAL and wal-index but -** that extra content is ignored by the current thread. -** -** If the database contents have changes since the previous read -** transaction, then *pChanged is set to 1 before returning. The -** Pager layer will use this to know that its cache is stale and -** needs to be flushed. -*/ -int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ - int rc; /* Return code */ - int cnt = 0; /* Number of TryBeginRead attempts */ -#ifdef SQLITE_ENABLE_SNAPSHOT - int bChanged = 0; - WalIndexHdr *pSnapshot = pWal->pSnapshot; -#endif - - assert( pWal->ckptLock==0 ); - -#ifdef SQLITE_ENABLE_SNAPSHOT - if( pSnapshot ){ - if( memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ - bChanged = 1; - } - - /* It is possible that there is a checkpointer thread running - ** concurrent with this code. If this is the case, it may be that the - ** checkpointer has already determined that it will checkpoint - ** snapshot X, where X is later in the wal file than pSnapshot, but - ** has not yet set the pInfo->nBackfillAttempted variable to indicate - ** its intent. To avoid the race condition this leads to, ensure that - ** there is no checkpointer process by taking a shared CKPT lock - ** before checking pInfo->nBackfillAttempted. */ - (void)walEnableBlocking(pWal); - rc = walLockShared(pWal, WAL_CKPT_LOCK); - walDisableBlocking(pWal); - - if( rc!=SQLITE_OK ){ - return rc; - } - pWal->ckptLock = 1; - } -#endif - - do{ - rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); - }while( rc==WAL_RETRY ); - testcase( (rc&0xff)==SQLITE_BUSY ); - testcase( (rc&0xff)==SQLITE_IOERR ); - testcase( rc==SQLITE_PROTOCOL ); - testcase( rc==SQLITE_OK ); - -#ifdef SQLITE_ENABLE_SNAPSHOT - if( rc==SQLITE_OK ){ - if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ - /* At this point the client has a lock on an aReadMark[] slot holding - ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr - ** is populated with the wal-index header corresponding to the head - ** of the wal file. Verify that pSnapshot is still valid before - ** continuing. Reasons why pSnapshot might no longer be valid: - ** - ** (1) The WAL file has been reset since the snapshot was taken. - ** In this case, the salt will have changed. - ** - ** (2) A checkpoint as been attempted that wrote frames past - ** pSnapshot->mxFrame into the database file. Note that the - ** checkpoint need not have completed for this to cause problems. - */ - volatile WalCkptInfo *pInfo = walCkptInfo(pWal); - - assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); - assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); - - /* Check that the wal file has not been wrapped. Assuming that it has - ** not, also check that no checkpointer has attempted to checkpoint any - ** frames beyond pSnapshot->mxFrame. If either of these conditions are - ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr - ** with *pSnapshot and set *pChanged as appropriate for opening the - ** snapshot. */ - if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) - && pSnapshot->mxFrame>=pInfo->nBackfillAttempted - ){ - assert( pWal->readLock>0 ); - memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr)); - *pChanged = bChanged; - }else{ - rc = SQLITE_ERROR_SNAPSHOT; - } - - /* A client using a non-current snapshot may not ignore any frames - ** from the start of the wal file. This is because, for a system - ** where (minFrame < iSnapshot < maxFrame), a checkpointer may - ** have omitted to checkpoint a frame earlier than minFrame in - ** the file because there exists a frame after iSnapshot that - ** is the same database page. */ - pWal->minFrame = 1; - - if( rc!=SQLITE_OK ){ - sqlite3WalEndReadTransaction(pWal); - } - } - } - - /* Release the shared CKPT lock obtained above. */ - if( pWal->ckptLock ){ - assert( pSnapshot ); - walUnlockShared(pWal, WAL_CKPT_LOCK); - pWal->ckptLock = 0; - } -#endif - return rc; -} - -/* -** Finish with a read transaction. All this does is release the -** read-lock. -*/ -void sqlite3WalEndReadTransaction(Wal *pWal){ - sqlite3WalEndWriteTransaction(pWal); - if( pWal->readLock>=0 ){ - walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - pWal->readLock = -1; - } -} - -/* -** Search the wal file for page pgno. If found, set *piRead to the frame that -** contains the page. Otherwise, if pgno is not in the wal file, set *piRead -** to zero. -** -** Return SQLITE_OK if successful, or an error code if an error occurs. If an -** error does occur, the final value of *piRead is undefined. -*/ -int sqlite3WalFindFrame( - Wal *pWal, /* WAL handle */ - Pgno pgno, /* Database page number to read data for */ - u32 *piRead /* OUT: Frame number (or zero) */ -){ - u32 iRead = 0; /* If !=0, WAL frame to return data from */ - u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ - int iHash; /* Used to loop through N hash tables */ - int iMinHash; - - /* This routine is only be called from within a read transaction. */ - assert( pWal->readLock>=0 || pWal->lockError ); - - /* If the "last page" field of the wal-index header snapshot is 0, then - ** no data will be read from the wal under any circumstances. Return early - ** in this case as an optimization. Likewise, if pWal->readLock==0, - ** then the WAL is ignored by the reader so return early, as if the - ** WAL were empty. - */ - if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ - *piRead = 0; - return SQLITE_OK; - } - - /* Search the hash table or tables for an entry matching page number - ** pgno. Each iteration of the following for() loop searches one - ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). - ** - ** This code might run concurrently to the code in walIndexAppend() - ** that adds entries to the wal-index (and possibly to this hash - ** table). This means the value just read from the hash - ** slot (aHash[iKey]) may have been added before or after the - ** current read transaction was opened. Values added after the - ** read transaction was opened may have been written incorrectly - - ** i.e. these slots may contain garbage data. However, we assume - ** that any slots written before the current read transaction was - ** opened remain unmodified. - ** - ** For the reasons above, the if(...) condition featured in the inner - ** loop of the following block is more stringent that would be required - ** if we had exclusive access to the hash-table: - ** - ** (aPgno[iFrame]==pgno): - ** This condition filters out normal hash-table collisions. - ** - ** (iFrame<=iLast): - ** This condition filters out entries that were added to the hash - ** table after the current read-transaction had started. - */ - iMinHash = walFramePage(pWal->minFrame); - for(iHash=walFramePage(iLast); iHash>=iMinHash; iHash--){ - WalHashLoc sLoc; /* Hash table location */ - int iKey; /* Hash slot index */ - int nCollide; /* Number of hash collisions remaining */ - int rc; /* Error code */ - u32 iH; - - rc = walHashGet(pWal, iHash, &sLoc); - if( rc!=SQLITE_OK ){ - return rc; - } - nCollide = HASHTABLE_NSLOT; - iKey = walHash(pgno); - while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){ - u32 iFrame = iH + sLoc.iZero; - if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){ - assert( iFrame>iRead || CORRUPT_DB ); - iRead = iFrame; - } - if( (nCollide--)==0 ){ - return SQLITE_CORRUPT_BKPT; - } - iKey = walNextHash(iKey); - } - if( iRead ) break; - } - -#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT - /* If expensive assert() statements are available, do a linear search - ** of the wal-index file content. Make sure the results agree with the - ** result obtained using the hash indexes above. */ - { - u32 iRead2 = 0; - u32 iTest; - assert( pWal->bShmUnreliable || pWal->minFrame>0 ); - for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){ - if( walFramePgno(pWal, iTest)==pgno ){ - iRead2 = iTest; - break; - } - } - assert( iRead==iRead2 ); - } -#endif - - *piRead = iRead; - return SQLITE_OK; -} - -/* -** Read the contents of frame iRead from the wal file into buffer pOut -** (which is nOut bytes in size). Return SQLITE_OK if successful, or an -** error code otherwise. -*/ -int sqlite3WalReadFrame( - Wal *pWal, /* WAL handle */ - u32 iRead, /* Frame to read */ - int nOut, /* Size of buffer pOut in bytes */ - u8 *pOut /* Buffer to write page data to */ -){ - int sz; - i64 iOffset; - sz = pWal->hdr.szPage; - sz = (sz&0xfe00) + ((sz&0x0001)<<16); - testcase( sz<=32768 ); - testcase( sz>=65536 ); - iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); -} - -/* -** Return the size of the database in pages (or zero, if unknown). -*/ -Pgno sqlite3WalDbsize(Wal *pWal){ - if( pWal && ALWAYS(pWal->readLock>=0) ){ - return pWal->hdr.nPage; - } - return 0; -} - - -/* -** This function starts a write transaction on the WAL. -** -** A read transaction must have already been started by a prior call -** to sqlite3WalBeginReadTransaction(). -** -** If another thread or process has written into the database since -** the read transaction was started, then it is not possible for this -** thread to write as doing so would cause a fork. So this routine -** returns SQLITE_BUSY in that case and no write transaction is started. -** -** There can only be a single writer active at a time. -*/ -int sqlite3WalBeginWriteTransaction(Wal *pWal){ - int rc; - -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - /* If the write-lock is already held, then it was obtained before the - ** read-transaction was even opened, making this call a no-op. - ** Return early. */ - if( pWal->writeLock ){ - assert( !memcmp(&pWal->hdr,(void *)walIndexHdr(pWal),sizeof(WalIndexHdr)) ); - return SQLITE_OK; - } -#endif - - /* Cannot start a write transaction without first holding a read - ** transaction. */ - assert( pWal->readLock>=0 ); - assert( pWal->writeLock==0 && pWal->iReCksum==0 ); - - if( pWal->readOnly ){ - return SQLITE_READONLY; - } - - /* Only one writer allowed at a time. Get the write lock. Return - ** SQLITE_BUSY if unable. - */ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - if( rc ){ - return rc; - } - pWal->writeLock = 1; - - /* If another connection has written to the database file since the - ** time the read transaction on this connection was started, then - ** the write is disallowed. - */ - if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - rc = SQLITE_BUSY_SNAPSHOT; - } - - return rc; -} - -/* -** End a write transaction. The commit has already been done. This -** routine merely releases the lock. -*/ -int sqlite3WalEndWriteTransaction(Wal *pWal){ - if( pWal->writeLock ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - pWal->iReCksum = 0; - pWal->truncateOnCommit = 0; - } - return SQLITE_OK; -} - -/* -** If any data has been written (but not committed) to the log file, this -** function moves the write-pointer back to the start of the transaction. -** -** Additionally, the callback function is invoked for each frame written -** to the WAL since the start of the transaction. If the callback returns -** other than SQLITE_OK, it is not invoked again and the error code is -** returned to the caller. -** -** Otherwise, if the callback function does not return an error, this -** function returns SQLITE_OK. -*/ -int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ - int rc = SQLITE_OK; - if( ALWAYS(pWal->writeLock) ){ - Pgno iMax = pWal->hdr.mxFrame; - Pgno iFrame; - - /* Restore the clients cache of the wal-index header to the state it - ** was in before the client began writing to the database. - */ - memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); - - for(iFrame=pWal->hdr.mxFrame+1; - ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; - iFrame++ - ){ - /* This call cannot fail. Unless the page for which the page number - ** is passed as the second argument is (a) in the cache and - ** (b) has an outstanding reference, then xUndo is either a no-op - ** (if (a) is false) or simply expels the page from the cache (if (b) - ** is false). - ** - ** If the upper layer is doing a rollback, it is guaranteed that there - ** are no outstanding references to any page other than page 1. And - ** page 1 is never written to the log until the transaction is - ** committed. As a result, the call to xUndo may not fail. - */ - assert( walFramePgno(pWal, iFrame)!=1 ); - rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); - } - if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); - } - return rc; -} - -/* -** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 -** values. This function populates the array with values required to -** "rollback" the write position of the WAL handle back to the current -** point in the event of a savepoint rollback (via WalSavepointUndo()). -*/ -void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ - assert( pWal->writeLock ); - aWalData[0] = pWal->hdr.mxFrame; - aWalData[1] = pWal->hdr.aFrameCksum[0]; - aWalData[2] = pWal->hdr.aFrameCksum[1]; - aWalData[3] = pWal->nCkpt; -} - -/* -** Move the write position of the WAL back to the point identified by -** the values in the aWalData[] array. aWalData must point to an array -** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated -** by a call to WalSavepoint(). -*/ -int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ - int rc = SQLITE_OK; - - assert( pWal->writeLock ); - assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); - - if( aWalData[3]!=pWal->nCkpt ){ - /* This savepoint was opened immediately after the write-transaction - ** was started. Right after that, the writer decided to wrap around - ** to the start of the log. Update the savepoint values to match. - */ - aWalData[0] = 0; - aWalData[3] = pWal->nCkpt; - } - - if( aWalData[0]hdr.mxFrame ){ - pWal->hdr.mxFrame = aWalData[0]; - pWal->hdr.aFrameCksum[0] = aWalData[1]; - pWal->hdr.aFrameCksum[1] = aWalData[2]; - walCleanupHash(pWal); - } - - return rc; -} - -/* -** This function is called just before writing a set of frames to the log -** file (see sqlite3WalFrames()). It checks to see if, instead of appending -** to the current log file, it is possible to overwrite the start of the -** existing log file with the new frames (i.e. "reset" the log). If so, -** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left -** unchanged. -** -** SQLITE_OK is returned if no error is encountered (regardless of whether -** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned -** if an error occurs. -*/ -static int walRestartLog(Wal *pWal){ - int rc = SQLITE_OK; - int cnt; - - if( pWal->readLock==0 ){ - volatile WalCkptInfo *pInfo = walCkptInfo(pWal); - assert( pInfo->nBackfill==pWal->hdr.mxFrame ); - if( pInfo->nBackfill>0 ){ - u32 salt1; - sqlite3_randomness(4, &salt1); - rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); - if( rc==SQLITE_OK ){ - /* If all readers are using WAL_READ_LOCK(0) (in other words if no - ** readers are currently using the WAL), then the transactions - ** frames will overwrite the start of the existing log. Update the - ** wal-index header to reflect this. - ** - ** In theory it would be Ok to update the cache of the header only - ** at this point. But updating the actual wal-index header is also - ** safe and means there is no special case for sqlite3WalUndo() - ** to handle if this transaction is rolled back. */ - walRestartHdr(pWal, salt1); - walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); - }else if( rc!=SQLITE_BUSY ){ - return rc; - } - } - walUnlockShared(pWal, WAL_READ_LOCK(0)); - pWal->readLock = -1; - cnt = 0; - do{ - int notUsed; - rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); - }while( rc==WAL_RETRY ); - assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ - testcase( (rc&0xff)==SQLITE_IOERR ); - testcase( rc==SQLITE_PROTOCOL ); - testcase( rc==SQLITE_OK ); - } - return rc; -} - -/* -** Information about the current state of the WAL file and where -** the next fsync should occur - passed from sqlite3WalFrames() into -** walWriteToLog(). -*/ -typedef struct WalWriter { - Wal *pWal; /* The complete WAL information */ - sqlite3_file *pFd; /* The WAL file to which we write */ - sqlite3_int64 iSyncPoint; /* Fsync at this offset */ - int syncFlags; /* Flags for the fsync */ - int szPage; /* Size of one page */ -} WalWriter; - -/* -** Write iAmt bytes of content into the WAL file beginning at iOffset. -** Do a sync when crossing the p->iSyncPoint boundary. -** -** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, -** first write the part before iSyncPoint, then sync, then write the -** rest. -*/ -static int walWriteToLog( - WalWriter *p, /* WAL to write to */ - void *pContent, /* Content to be written */ - int iAmt, /* Number of bytes to write */ - sqlite3_int64 iOffset /* Start writing at this offset */ -){ - int rc; - if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ - int iFirstAmt = (int)(p->iSyncPoint - iOffset); - rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); - if( rc ) return rc; - iOffset += iFirstAmt; - iAmt -= iFirstAmt; - pContent = (void*)(iFirstAmt + (char*)pContent); - assert( WAL_SYNC_FLAGS(p->syncFlags)!=0 ); - rc = sqlite3OsSync(p->pFd, WAL_SYNC_FLAGS(p->syncFlags)); - if( iAmt==0 || rc ) return rc; - } - rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); - return rc; -} - -/* -** Write out a single frame of the WAL -*/ -static int walWriteOneFrame( - WalWriter *p, /* Where to write the frame */ - PgHdr *pPage, /* The page of the frame to be written */ - int nTruncate, /* The commit flag. Usually 0. >0 for commit */ - sqlite3_int64 iOffset /* Byte offset at which to write */ -){ - int rc; /* Result code from subfunctions */ - void *pData; /* Data actually written */ - u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ - pData = pPage->pData; - walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); - rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); - if( rc ) return rc; - /* Write the page data */ - rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); - return rc; -} - -/* -** This function is called as part of committing a transaction within which -** one or more frames have been overwritten. It updates the checksums for -** all frames written to the wal file by the current transaction starting -** with the earliest to have been overwritten. -** -** SQLITE_OK is returned if successful, or an SQLite error code otherwise. -*/ -static int walRewriteChecksums(Wal *pWal, u32 iLast){ - const int szPage = pWal->szPage;/* Database page size */ - int rc = SQLITE_OK; /* Return code */ - u8 *aBuf; /* Buffer to load data from wal file into */ - u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ - u32 iRead; /* Next frame to read from wal file */ - i64 iCksumOff; - - aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); - if( aBuf==0 ) return SQLITE_NOMEM; - - /* Find the checksum values to use as input for the recalculating the - ** first checksum. If the first frame is frame 1 (implying that the current - ** transaction restarted the wal file), these values must be read from the - ** wal-file header. Otherwise, read them from the frame header of the - ** previous frame. */ - assert( pWal->iReCksum>0 ); - if( pWal->iReCksum==1 ){ - iCksumOff = 24; - }else{ - iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16; - } - rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff); - pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); - pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); - - iRead = pWal->iReCksum; - pWal->iReCksum = 0; - for(; rc==SQLITE_OK && iRead<=iLast; iRead++){ - i64 iOff = walFrameOffset(iRead, szPage); - rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); - if( rc==SQLITE_OK ){ - u32 iPgno, nDbSize; - iPgno = sqlite3Get4byte(aBuf); - nDbSize = sqlite3Get4byte(&aBuf[4]); - - walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); - rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); - } - } - - sqlite3_free(aBuf); - return rc; -} - -/* -** Write a set of frames to the log. The caller must hold the write-lock -** on the log file (obtained using sqlite3WalBeginWriteTransaction()). -*/ -int sqlite3WalFrames( - Wal *pWal, /* Wal handle to write to */ - int szPage, /* Database page-size in bytes */ - PgHdr *pList, /* List of dirty pages to write */ - Pgno nTruncate, /* Database size after this commit */ - int isCommit, /* True if this is a commit */ - int sync_flags /* Flags to pass to OsSync() (or 0) */ -){ - int rc; /* Used to catch return codes */ - u32 iFrame; /* Next frame address */ - PgHdr *p; /* Iterator to run through pList with. */ - PgHdr *pLast = 0; /* Last frame in list */ - int nExtra = 0; /* Number of extra copies of last page */ - int szFrame; /* The size of a single frame */ - i64 iOffset; /* Next byte to write in WAL file */ - WalWriter w; /* The writer */ - u32 iFirst = 0; /* First frame that may be overwritten */ - WalIndexHdr *pLive; /* Pointer to shared header */ - - assert( pList ); - assert( pWal->writeLock ); - - /* If this frame set completes a transaction, then nTruncate>0. If - ** nTruncate==0 then this frame set does not complete the transaction. */ - assert( (isCommit!=0)==(nTruncate!=0) ); - -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) - { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} - WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", - pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); - } -#endif - - pLive = (WalIndexHdr*)walIndexHdr(pWal); - if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){ - iFirst = pLive->mxFrame+1; - } - - /* See if it is possible to write these frames into the start of the - ** log file, instead of appending to it at pWal->hdr.mxFrame. - */ - if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ - return rc; - } - - /* If this is the first frame written into the log, write the WAL - ** header to the start of the WAL file. See comments at the top of - ** this source file for a description of the WAL header format. - */ - iFrame = pWal->hdr.mxFrame; - if( iFrame==0 ){ - u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ - u32 aCksum[2]; /* Checksum for wal-header */ - - sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); - sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); - sqlite3Put4byte(&aWalHdr[8], szPage); - sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); - if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); - memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); - walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); - sqlite3Put4byte(&aWalHdr[24], aCksum[0]); - sqlite3Put4byte(&aWalHdr[28], aCksum[1]); - - pWal->szPage = szPage; - pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; - pWal->hdr.aFrameCksum[0] = aCksum[0]; - pWal->hdr.aFrameCksum[1] = aCksum[1]; - pWal->truncateOnCommit = 1; - - rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); - WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless - ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise - ** an out-of-order write following a WAL restart could result in - ** database corruption. See the ticket: - ** - ** https://sqlite.org/src/info/ff5be73dee - */ - if( pWal->syncHeader ){ - rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); - if( rc ) return rc; - } - } - assert( (int)pWal->szPage==szPage ); - - /* Setup information needed to write frames into the WAL */ - w.pWal = pWal; - w.pFd = pWal->pWalFd; - w.iSyncPoint = 0; - w.syncFlags = sync_flags; - w.szPage = szPage; - iOffset = walFrameOffset(iFrame+1, szPage); - szFrame = szPage + WAL_FRAME_HDRSIZE; - - /* Write all frames into the log file exactly once */ - for(p=pList; p; p=p->pDirty){ - int nDbSize; /* 0 normally. Positive == commit flag */ - - /* Check if this page has already been written into the wal file by - ** the current transaction. If so, overwrite the existing frame and - ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that - ** checksums must be recomputed when the transaction is committed. */ - if( iFirst && (p->pDirty || isCommit==0) ){ - u32 iWrite = 0; - VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite); - assert( rc==SQLITE_OK || iWrite==0 ); - if( iWrite>=iFirst ){ - i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE; - void *pData; - if( pWal->iReCksum==0 || iWriteiReCksum ){ - pWal->iReCksum = iWrite; - } - pData = p->pData; - rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOff); - if( rc ) return rc; - p->flags &= ~PGHDR_WAL_APPEND; - continue; - } - } - - iFrame++; - assert( iOffset==walFrameOffset(iFrame, szPage) ); - nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; - rc = walWriteOneFrame(&w, p, nDbSize, iOffset); - if( rc ) return rc; - pLast = p; - iOffset += szFrame; - p->flags |= PGHDR_WAL_APPEND; - } - - /* Recalculate checksums within the wal file if required. */ - if( isCommit && pWal->iReCksum ){ - rc = walRewriteChecksums(pWal, iFrame); - if( rc ) return rc; - } - - /* If this is the end of a transaction, then we might need to pad - ** the transaction and/or sync the WAL file. - ** - ** Padding and syncing only occur if this set of frames complete a - ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL - ** or synchronous==OFF, then no padding or syncing are needed. - ** - ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not - ** needed and only the sync is done. If padding is needed, then the - ** final frame is repeated (with its commit mark) until the next sector - ** boundary is crossed. Only the part of the WAL prior to the last - ** sector boundary is synced; the part of the last frame that extends - ** past the sector boundary is written after the sync. - */ - if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){ - int bSync = 1; - if( pWal->padToSectorBoundary ){ - int sectorSize = sqlite3SectorSize(pWal->pWalFd); - w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; - bSync = (w.iSyncPoint==iOffset); - testcase( bSync ); - while( iOffsettruncateOnCommit && pWal->mxWalSize>=0 ){ - i64 sz = pWal->mxWalSize; - if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ - sz = walFrameOffset(iFrame+nExtra+1, szPage); - } - walLimitSize(pWal, sz); - pWal->truncateOnCommit = 0; - } - - /* Append data to the wal-index. It is not necessary to lock the - ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index - ** guarantees that there are no other writers, and no data that may - ** be in use by existing readers is being overwritten. - */ - iFrame = pWal->hdr.mxFrame; - for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ - if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; - iFrame++; - rc = walIndexAppend(pWal, iFrame, p->pgno); - } - assert( pLast!=0 || nExtra==0 ); - while( rc==SQLITE_OK && nExtra>0 ){ - iFrame++; - nExtra--; - rc = walIndexAppend(pWal, iFrame, pLast->pgno); - } - - if( rc==SQLITE_OK ){ - /* Update the private copy of the header. */ - pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); - testcase( szPage<=32768 ); - testcase( szPage>=65536 ); - pWal->hdr.mxFrame = iFrame; - if( isCommit ){ - pWal->hdr.iChange++; - pWal->hdr.nPage = nTruncate; - } - /* If this is a commit, update the wal-index header too. */ - if( isCommit ){ - walIndexWriteHdr(pWal); - pWal->iCallback = iFrame; - } - } - - WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); - return rc; -} - -/* -** This routine is called to implement sqlite3_wal_checkpoint() and -** related interfaces. -** -** Obtain a CHECKPOINT lock and then backfill as much information as -** we can from WAL into the database. -** -** If parameter xBusy is not NULL, it is a pointer to a busy-handler -** callback. In this case this function runs a blocking checkpoint. -*/ -int sqlite3WalCheckpoint( - Wal *pWal, /* Wal connection */ - sqlite3 *db, /* Check this handle's interrupt flag */ - int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ - int (*xBusy)(void*), /* Function to call when busy */ - void *pBusyArg, /* Context argument for xBusyHandler */ - int sync_flags, /* Flags to sync db file with (or 0) */ - int nBuf, /* Size of temporary buffer */ - u8 *zBuf, /* Temporary buffer to use */ - int *pnLog, /* OUT: Number of frames in WAL */ - int *pnCkpt /* OUT: Number of backfilled frames in WAL */ -){ - int rc; /* Return code */ - int isChanged = 0; /* True if a new wal-index header is loaded */ - int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ - int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ - - assert( pWal->ckptLock==0 ); - assert( pWal->writeLock==0 ); - - /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked - ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ - assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); - - if( pWal->readOnly ) return SQLITE_READONLY; - WALTRACE(("WAL%p: checkpoint begins\n", pWal)); - - /* Enable blocking locks, if possible. If blocking locks are successfully - ** enabled, set xBusy2=0 so that the busy-handler is never invoked. */ - sqlite3WalDb(pWal, db); - (void)walEnableBlocking(pWal); - - /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive - ** "checkpoint" lock on the database file. - ** EVIDENCE-OF: R-10421-19736 If any other process is running a - ** checkpoint operation at the same time, the lock cannot be obtained and - ** SQLITE_BUSY is returned. - ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, - ** it will not be invoked in this case. - */ - rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); - testcase( rc==SQLITE_BUSY ); - testcase( rc!=SQLITE_OK && xBusy2!=0 ); - if( rc==SQLITE_OK ){ - pWal->ckptLock = 1; - - /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and - ** TRUNCATE modes also obtain the exclusive "writer" lock on the database - ** file. - ** - ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained - ** immediately, and a busy-handler is configured, it is invoked and the - ** writer lock retried until either the busy-handler returns 0 or the - ** lock is successfully obtained. - */ - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ - rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); - if( rc==SQLITE_OK ){ - pWal->writeLock = 1; - }else if( rc==SQLITE_BUSY ){ - eMode2 = SQLITE_CHECKPOINT_PASSIVE; - xBusy2 = 0; - rc = SQLITE_OK; - } - } - } - - - /* Read the wal-index header. */ - if( rc==SQLITE_OK ){ - walDisableBlocking(pWal); - rc = walIndexReadHdr(pWal, &isChanged); - (void)walEnableBlocking(pWal); - if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ - sqlite3OsUnfetch(pWal->pDbFd, 0, 0); - } - } - - /* Copy data from the log to the database file. */ - if( rc==SQLITE_OK ){ - - if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); - } - - /* If no error occurred, set the output variables. */ - if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ - if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; - if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); - } - } - - if( isChanged ){ - /* If a new wal-index header was loaded before the checkpoint was - ** performed, then the pager-cache associated with pWal is now - ** out of date. So zero the cached wal-index header to ensure that - ** next time the pager opens a snapshot on this database it knows that - ** the cache needs to be reset. - */ - memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); - } - - walDisableBlocking(pWal); - sqlite3WalDb(pWal, 0); - - /* Release the locks. */ - sqlite3WalEndWriteTransaction(pWal); - if( pWal->ckptLock ){ - walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); - pWal->ckptLock = 0; - } - WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT - if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY; -#endif - return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc); -} - -/* Return the value to pass to a sqlite3_wal_hook callback, the -** number of frames in the WAL at the point of the last commit since -** sqlite3WalCallback() was called. If no commits have occurred since -** the last call, then return 0. -*/ -int sqlite3WalCallback(Wal *pWal){ - u32 ret = 0; - if( pWal ){ - ret = pWal->iCallback; - pWal->iCallback = 0; - } - return (int)ret; -} - -/* -** This function is called to change the WAL subsystem into or out -** of locking_mode=EXCLUSIVE. -** -** If op is zero, then attempt to change from locking_mode=EXCLUSIVE -** into locking_mode=NORMAL. This means that we must acquire a lock -** on the pWal->readLock byte. If the WAL is already in locking_mode=NORMAL -** or if the acquisition of the lock fails, then return 0. If the -** transition out of exclusive-mode is successful, return 1. This -** operation must occur while the pager is still holding the exclusive -** lock on the main database file. -** -** If op is one, then change from locking_mode=NORMAL into -** locking_mode=EXCLUSIVE. This means that the pWal->readLock must -** be released. Return 1 if the transition is made and 0 if the -** WAL is already in exclusive-locking mode - meaning that this -** routine is a no-op. The pager must already hold the exclusive lock -** on the main database file before invoking this operation. -** -** If op is negative, then do a dry-run of the op==1 case but do -** not actually change anything. The pager uses this to see if it -** should acquire the database exclusive lock prior to invoking -** the op==1 case. -*/ -int sqlite3WalExclusiveMode(Wal *pWal, int op){ - int rc; - assert( pWal->writeLock==0 ); - assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 ); - - /* pWal->readLock is usually set, but might be -1 if there was a - ** prior error while attempting to acquire are read-lock. This cannot - ** happen if the connection is actually in exclusive mode (as no xShmLock - ** locks are taken in this case). Nor should the pager attempt to - ** upgrade to exclusive-mode following such an error. - */ - assert( pWal->readLock>=0 || pWal->lockError ); - assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) ); - - if( op==0 ){ - if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){ - pWal->exclusiveMode = WAL_NORMAL_MODE; - if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ - pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; - } - rc = pWal->exclusiveMode==WAL_NORMAL_MODE; - }else{ - /* Already in locking_mode=NORMAL */ - rc = 0; - } - }else if( op>0 ){ - assert( pWal->exclusiveMode==WAL_NORMAL_MODE ); - assert( pWal->readLock>=0 ); - walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; - rc = 1; - }else{ - rc = pWal->exclusiveMode==WAL_NORMAL_MODE; - } - return rc; -} - -/* -** Return true if the argument is non-NULL and the WAL module is using -** heap-memory for the wal-index. Otherwise, if the argument is NULL or the -** WAL module is using shared-memory, return false. -*/ -int sqlite3WalHeapMemory(Wal *pWal){ - return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); -} - -#ifdef SQLITE_ENABLE_SNAPSHOT -/* Create a snapshot object. The content of a snapshot is opaque to -** every other subsystem, so the WAL module can put whatever it needs -** in the object. -*/ -int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){ - int rc = SQLITE_OK; - WalIndexHdr *pRet; - static const u32 aZero[4] = { 0, 0, 0, 0 }; - - assert( pWal->readLock>=0 && pWal->writeLock==0 ); - - if( memcmp(&pWal->hdr.aFrameCksum[0],aZero,16)==0 ){ - *ppSnapshot = 0; - return SQLITE_ERROR; - } - pRet = (WalIndexHdr*)sqlite3_malloc(sizeof(WalIndexHdr)); - if( pRet==0 ){ - rc = SQLITE_NOMEM; - }else{ - memcpy(pRet, &pWal->hdr, sizeof(WalIndexHdr)); - *ppSnapshot = (sqlite3_snapshot*)pRet; - } - - return rc; -} - -/* Try to open on pSnapshot when the next read-transaction starts -*/ -void sqlite3WalSnapshotOpen( - Wal *pWal, - sqlite3_snapshot *pSnapshot -){ - pWal->pSnapshot = (WalIndexHdr*)pSnapshot; -} - -/* -** Return a +ve value if snapshot p1 is newer than p2. A -ve value if -** p1 is older than p2 and zero if p1 and p2 are the same snapshot. -*/ -int sqlite3_snapshot_cmp(sqlite3_snapshot *p1, sqlite3_snapshot *p2){ - WalIndexHdr *pHdr1 = (WalIndexHdr*)p1; - WalIndexHdr *pHdr2 = (WalIndexHdr*)p2; - - /* aSalt[0] is a copy of the value stored in the wal file header. It - ** is incremented each time the wal file is restarted. */ - if( pHdr1->aSalt[0]aSalt[0] ) return -1; - if( pHdr1->aSalt[0]>pHdr2->aSalt[0] ) return +1; - if( pHdr1->mxFramemxFrame ) return -1; - if( pHdr1->mxFrame>pHdr2->mxFrame ) return +1; - return 0; -} - -/* -** The caller currently has a read transaction open on the database. -** This function takes a SHARED lock on the CHECKPOINTER slot and then -** checks if the snapshot passed as the second argument is still -** available. If so, SQLITE_OK is returned. -** -** If the snapshot is not available, SQLITE_ERROR is returned. Or, if -** the CHECKPOINTER lock cannot be obtained, SQLITE_BUSY. If any error -** occurs (any value other than SQLITE_OK is returned), the CHECKPOINTER -** lock is released before returning. -*/ -int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){ - int rc; - rc = walLockShared(pWal, WAL_CKPT_LOCK); - if( rc==SQLITE_OK ){ - WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot; - if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) - || pNew->mxFramenBackfillAttempted - ){ - rc = SQLITE_ERROR_SNAPSHOT; - walUnlockShared(pWal, WAL_CKPT_LOCK); - } - } - return rc; -} - -/* -** Release a lock obtained by an earlier successful call to -** sqlite3WalSnapshotCheck(). -*/ -void sqlite3WalSnapshotUnlock(Wal *pWal){ - assert( pWal ); - walUnlockShared(pWal, WAL_CKPT_LOCK); -} - - -#endif /* SQLITE_ENABLE_SNAPSHOT */ - -#ifdef SQLITE_ENABLE_ZIPVFS -/* -** If the argument is not NULL, it points to a Wal object that holds a -** read-lock. This function returns the database page-size if it is known, -** or zero if it is not (or if pWal is NULL). -*/ -int sqlite3WalFramesize(Wal *pWal){ - assert( pWal==0 || pWal->readLock>=0 ); - return (pWal ? pWal->szPage : 0); -} -#endif - -/* Return the sqlite3_file object for the WAL file -*/ -sqlite3_file *sqlite3WalFile(Wal *pWal){ - return pWal->pWalFd; -} - -#endif /* #ifndef SQLITE_OMIT_WAL */ diff --git a/source/libs/tdb/src/sqliteinc/btree.h b/source/libs/tdb/src/sqliteinc/btree.h deleted file mode 100644 index f80ba4a97b..0000000000 --- a/source/libs/tdb/src/sqliteinc/btree.h +++ /dev/null @@ -1,412 +0,0 @@ -/* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the interface that the sqlite B-Tree file -** subsystem. See comments in the source code for a detailed description -** of what each interface routine does. -*/ -#ifndef SQLITE_BTREE_H -#define SQLITE_BTREE_H - -/* TODO: This definition is just included so other modules compile. It -** needs to be revisited. -*/ -#define SQLITE_N_BTREE_META 16 - -/* -** If defined as non-zero, auto-vacuum is enabled by default. Otherwise -** it must be turned on for each database using "PRAGMA auto_vacuum = 1". -*/ -#ifndef SQLITE_DEFAULT_AUTOVACUUM - #define SQLITE_DEFAULT_AUTOVACUUM 0 -#endif - -#define BTREE_AUTOVACUUM_NONE 0 /* Do not do auto-vacuum */ -#define BTREE_AUTOVACUUM_FULL 1 /* Do full auto-vacuum */ -#define BTREE_AUTOVACUUM_INCR 2 /* Incremental vacuum */ - -/* -** Forward declarations of structure -*/ -typedef struct Btree Btree; -typedef struct BtCursor BtCursor; -typedef struct BtShared BtShared; -typedef struct BtreePayload BtreePayload; - - -int sqlite3BtreeOpen( - sqlite3_vfs *pVfs, /* VFS to use with this b-tree */ - const char *zFilename, /* Name of database file to open */ - sqlite3 *db, /* Associated database connection */ - Btree **ppBtree, /* Return open Btree* here */ - int flags, /* Flags */ - int vfsFlags /* Flags passed through to VFS open */ -); - -/* The flags parameter to sqlite3BtreeOpen can be the bitwise or of the -** following values. -** -** NOTE: These values must match the corresponding PAGER_ values in -** pager.h. -*/ -#define BTREE_OMIT_JOURNAL 1 /* Do not create or use a rollback journal */ -#define BTREE_MEMORY 2 /* This is an in-memory DB */ -#define BTREE_SINGLE 4 /* The file contains at most 1 b-tree */ -#define BTREE_UNORDERED 8 /* Use of a hash implementation is OK */ - -int sqlite3BtreeClose(Btree*); -int sqlite3BtreeSetCacheSize(Btree*,int); -int sqlite3BtreeSetSpillSize(Btree*,int); -#if SQLITE_MAX_MMAP_SIZE>0 - int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64); -#endif -int sqlite3BtreeSetPagerFlags(Btree*,unsigned); -int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix); -int sqlite3BtreeGetPageSize(Btree*); -Pgno sqlite3BtreeMaxPageCount(Btree*,Pgno); -Pgno sqlite3BtreeLastPage(Btree*); -int sqlite3BtreeSecureDelete(Btree*,int); -int sqlite3BtreeGetRequestedReserve(Btree*); -int sqlite3BtreeGetReserveNoMutex(Btree *p); -int sqlite3BtreeSetAutoVacuum(Btree *, int); -int sqlite3BtreeGetAutoVacuum(Btree *); -int sqlite3BtreeBeginTrans(Btree*,int,int*); -int sqlite3BtreeCommitPhaseOne(Btree*, const char*); -int sqlite3BtreeCommitPhaseTwo(Btree*, int); -int sqlite3BtreeCommit(Btree*); -int sqlite3BtreeRollback(Btree*,int,int); -int sqlite3BtreeBeginStmt(Btree*,int); -int sqlite3BtreeCreateTable(Btree*, Pgno*, int flags); -int sqlite3BtreeTxnState(Btree*); -int sqlite3BtreeIsInBackup(Btree*); - -void *sqlite3BtreeSchema(Btree *, int, void(*)(void *)); -int sqlite3BtreeSchemaLocked(Btree *pBtree); -#ifndef SQLITE_OMIT_SHARED_CACHE -int sqlite3BtreeLockTable(Btree *pBtree, int iTab, u8 isWriteLock); -#endif - -/* Savepoints are named, nestable SQL transactions mostly implemented */ -/* in vdbe.c and pager.c See https://sqlite.org/lang_savepoint.html */ -int sqlite3BtreeSavepoint(Btree *, int, int); - -/* "Checkpoint" only refers to WAL. See https://sqlite.org/wal.html#ckpt */ -#ifndef SQLITE_OMIT_WAL - int sqlite3BtreeCheckpoint(Btree*, int, int *, int *); -#endif - -const char *sqlite3BtreeGetFilename(Btree *); -const char *sqlite3BtreeGetJournalname(Btree *); -int sqlite3BtreeCopyFile(Btree *, Btree *); - -int sqlite3BtreeIncrVacuum(Btree *); - -/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR -** of the flags shown below. -** -** Every SQLite table must have either BTREE_INTKEY or BTREE_BLOBKEY set. -** With BTREE_INTKEY, the table key is a 64-bit integer and arbitrary data -** is stored in the leaves. (BTREE_INTKEY is used for SQL tables.) With -** BTREE_BLOBKEY, the key is an arbitrary BLOB and no content is stored -** anywhere - the key is the content. (BTREE_BLOBKEY is used for SQL -** indices.) -*/ -#define BTREE_INTKEY 1 /* Table has only 64-bit signed integer keys */ -#define BTREE_BLOBKEY 2 /* Table has keys only - no data */ - -int sqlite3BtreeDropTable(Btree*, int, int*); -int sqlite3BtreeClearTable(Btree*, int, i64*); -int sqlite3BtreeClearTableOfCursor(BtCursor*); -int sqlite3BtreeTripAllCursors(Btree*, int, int); - -void sqlite3BtreeGetMeta(Btree *pBtree, int idx, u32 *pValue); -int sqlite3BtreeUpdateMeta(Btree*, int idx, u32 value); - -int sqlite3BtreeNewDb(Btree *p); - -/* -** The second parameter to sqlite3BtreeGetMeta or sqlite3BtreeUpdateMeta -** should be one of the following values. The integer values are assigned -** to constants so that the offset of the corresponding field in an -** SQLite database header may be found using the following formula: -** -** offset = 36 + (idx * 4) -** -** For example, the free-page-count field is located at byte offset 36 of -** the database file header. The incr-vacuum-flag field is located at -** byte offset 64 (== 36+4*7). -** -** The BTREE_DATA_VERSION value is not really a value stored in the header. -** It is a read-only number computed by the pager. But we merge it with -** the header value access routines since its access pattern is the same. -** Call it a "virtual meta value". -*/ -#define BTREE_FREE_PAGE_COUNT 0 -#define BTREE_SCHEMA_VERSION 1 -#define BTREE_FILE_FORMAT 2 -#define BTREE_DEFAULT_CACHE_SIZE 3 -#define BTREE_LARGEST_ROOT_PAGE 4 -#define BTREE_TEXT_ENCODING 5 -#define BTREE_USER_VERSION 6 -#define BTREE_INCR_VACUUM 7 -#define BTREE_APPLICATION_ID 8 -#define BTREE_DATA_VERSION 15 /* A virtual meta-value */ - -/* -** Kinds of hints that can be passed into the sqlite3BtreeCursorHint() -** interface. -** -** BTREE_HINT_RANGE (arguments: Expr*, Mem*) -** -** The first argument is an Expr* (which is guaranteed to be constant for -** the lifetime of the cursor) that defines constraints on which rows -** might be fetched with this cursor. The Expr* tree may contain -** TK_REGISTER nodes that refer to values stored in the array of registers -** passed as the second parameter. In other words, if Expr.op==TK_REGISTER -** then the value of the node is the value in Mem[pExpr.iTable]. Any -** TK_COLUMN node in the expression tree refers to the Expr.iColumn-th -** column of the b-tree of the cursor. The Expr tree will not contain -** any function calls nor subqueries nor references to b-trees other than -** the cursor being hinted. -** -** The design of the _RANGE hint is aid b-tree implementations that try -** to prefetch content from remote machines - to provide those -** implementations with limits on what needs to be prefetched and thereby -** reduce network bandwidth. -** -** Note that BTREE_HINT_FLAGS with BTREE_BULKLOAD is the only hint used by -** standard SQLite. The other hints are provided for extentions that use -** the SQLite parser and code generator but substitute their own storage -** engine. -*/ -#define BTREE_HINT_RANGE 0 /* Range constraints on queries */ - -/* -** Values that may be OR'd together to form the argument to the -** BTREE_HINT_FLAGS hint for sqlite3BtreeCursorHint(): -** -** The BTREE_BULKLOAD flag is set on index cursors when the index is going -** to be filled with content that is already in sorted order. -** -** The BTREE_SEEK_EQ flag is set on cursors that will get OP_SeekGE or -** OP_SeekLE opcodes for a range search, but where the range of entries -** selected will all have the same key. In other words, the cursor will -** be used only for equality key searches. -** -*/ -#define BTREE_BULKLOAD 0x00000001 /* Used to full index in sorted order */ -#define BTREE_SEEK_EQ 0x00000002 /* EQ seeks only - no range seeks */ - -/* -** Flags passed as the third argument to sqlite3BtreeCursor(). -** -** For read-only cursors the wrFlag argument is always zero. For read-write -** cursors it may be set to either (BTREE_WRCSR|BTREE_FORDELETE) or just -** (BTREE_WRCSR). If the BTREE_FORDELETE bit is set, then the cursor will -** only be used by SQLite for the following: -** -** * to seek to and then delete specific entries, and/or -** -** * to read values that will be used to create keys that other -** BTREE_FORDELETE cursors will seek to and delete. -** -** The BTREE_FORDELETE flag is an optimization hint. It is not used by -** by this, the native b-tree engine of SQLite, but it is available to -** alternative storage engines that might be substituted in place of this -** b-tree system. For alternative storage engines in which a delete of -** the main table row automatically deletes corresponding index rows, -** the FORDELETE flag hint allows those alternative storage engines to -** skip a lot of work. Namely: FORDELETE cursors may treat all SEEK -** and DELETE operations as no-ops, and any READ operation against a -** FORDELETE cursor may return a null row: 0x01 0x00. -*/ -#define BTREE_WRCSR 0x00000004 /* read-write cursor */ -#define BTREE_FORDELETE 0x00000008 /* Cursor is for seek/delete only */ - -int sqlite3BtreeCursor( - Btree*, /* BTree containing table to open */ - Pgno iTable, /* Index of root page */ - int wrFlag, /* 1 for writing. 0 for read-only */ - struct KeyInfo*, /* First argument to compare function */ - BtCursor *pCursor /* Space to write cursor structure */ -); -BtCursor *sqlite3BtreeFakeValidCursor(void); -int sqlite3BtreeCursorSize(void); -void sqlite3BtreeCursorZero(BtCursor*); -void sqlite3BtreeCursorHintFlags(BtCursor*, unsigned); -#ifdef SQLITE_ENABLE_CURSOR_HINTS -void sqlite3BtreeCursorHint(BtCursor*, int, ...); -#endif - -int sqlite3BtreeCloseCursor(BtCursor*); -int sqlite3BtreeTableMoveto( - BtCursor*, - i64 intKey, - int bias, - int *pRes -); -int sqlite3BtreeIndexMoveto( - BtCursor*, - UnpackedRecord *pUnKey, - int *pRes -); -int sqlite3BtreeCursorHasMoved(BtCursor*); -int sqlite3BtreeCursorRestore(BtCursor*, int*); -int sqlite3BtreeDelete(BtCursor*, u8 flags); - -/* Allowed flags for sqlite3BtreeDelete() and sqlite3BtreeInsert() */ -#define BTREE_SAVEPOSITION 0x02 /* Leave cursor pointing at NEXT or PREV */ -#define BTREE_AUXDELETE 0x04 /* not the primary delete operation */ -#define BTREE_APPEND 0x08 /* Insert is likely an append */ -#define BTREE_PREFORMAT 0x80 /* Inserted data is a preformated cell */ - -/* An instance of the BtreePayload object describes the content of a single -** entry in either an index or table btree. -** -** Index btrees (used for indexes and also WITHOUT ROWID tables) contain -** an arbitrary key and no data. These btrees have pKey,nKey set to the -** key and the pData,nData,nZero fields are uninitialized. The aMem,nMem -** fields give an array of Mem objects that are a decomposition of the key. -** The nMem field might be zero, indicating that no decomposition is available. -** -** Table btrees (used for rowid tables) contain an integer rowid used as -** the key and passed in the nKey field. The pKey field is zero. -** pData,nData hold the content of the new entry. nZero extra zero bytes -** are appended to the end of the content when constructing the entry. -** The aMem,nMem fields are uninitialized for table btrees. -** -** Field usage summary: -** -** Table BTrees Index Btrees -** -** pKey always NULL encoded key -** nKey the ROWID length of pKey -** pData data not used -** aMem not used decomposed key value -** nMem not used entries in aMem -** nData length of pData not used -** nZero extra zeros after pData not used -** -** This object is used to pass information into sqlite3BtreeInsert(). The -** same information used to be passed as five separate parameters. But placing -** the information into this object helps to keep the interface more -** organized and understandable, and it also helps the resulting code to -** run a little faster by using fewer registers for parameter passing. -*/ -struct BtreePayload { - const void *pKey; /* Key content for indexes. NULL for tables */ - sqlite3_int64 nKey; /* Size of pKey for indexes. PRIMARY KEY for tabs */ - const void *pData; /* Data for tables. */ - sqlite3_value *aMem; /* First of nMem value in the unpacked pKey */ - u16 nMem; /* Number of aMem[] value. Might be zero */ - int nData; /* Size of pData. 0 if none. */ - int nZero; /* Extra zero data appended after pData,nData */ -}; - -int sqlite3BtreeInsert(BtCursor*, const BtreePayload *pPayload, - int flags, int seekResult); -int sqlite3BtreeFirst(BtCursor*, int *pRes); -int sqlite3BtreeLast(BtCursor*, int *pRes); -int sqlite3BtreeNext(BtCursor*, int flags); -int sqlite3BtreeEof(BtCursor*); -int sqlite3BtreePrevious(BtCursor*, int flags); -i64 sqlite3BtreeIntegerKey(BtCursor*); -void sqlite3BtreeCursorPin(BtCursor*); -void sqlite3BtreeCursorUnpin(BtCursor*); -#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC -i64 sqlite3BtreeOffset(BtCursor*); -#endif -int sqlite3BtreePayload(BtCursor*, u32 offset, u32 amt, void*); -const void *sqlite3BtreePayloadFetch(BtCursor*, u32 *pAmt); -u32 sqlite3BtreePayloadSize(BtCursor*); -sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor*); - -char *sqlite3BtreeIntegrityCheck(sqlite3*,Btree*,Pgno*aRoot,int nRoot,int,int*); -struct Pager *sqlite3BtreePager(Btree*); -i64 sqlite3BtreeRowCountEst(BtCursor*); - -#ifndef SQLITE_OMIT_INCRBLOB -int sqlite3BtreePayloadChecked(BtCursor*, u32 offset, u32 amt, void*); -int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*); -void sqlite3BtreeIncrblobCursor(BtCursor *); -#endif -void sqlite3BtreeClearCursor(BtCursor *); -int sqlite3BtreeSetVersion(Btree *pBt, int iVersion); -int sqlite3BtreeCursorHasHint(BtCursor*, unsigned int mask); -int sqlite3BtreeIsReadonly(Btree *pBt); -int sqlite3HeaderSizeBtree(void); - -#ifdef SQLITE_DEBUG -sqlite3_uint64 sqlite3BtreeSeekCount(Btree*); -#else -# define sqlite3BtreeSeekCount(X) 0 -#endif - -#ifndef NDEBUG -int sqlite3BtreeCursorIsValid(BtCursor*); -#endif -int sqlite3BtreeCursorIsValidNN(BtCursor*); - -int sqlite3BtreeCount(sqlite3*, BtCursor*, i64*); - -#ifdef SQLITE_TEST -int sqlite3BtreeCursorInfo(BtCursor*, int*, int); -void sqlite3BtreeCursorList(Btree*); -#endif - -#ifndef SQLITE_OMIT_WAL - int sqlite3BtreeCheckpoint(Btree*, int, int *, int *); -#endif - -int sqlite3BtreeTransferRow(BtCursor*, BtCursor*, i64); - -/* -** If we are not using shared cache, then there is no need to -** use mutexes to access the BtShared structures. So make the -** Enter and Leave procedures no-ops. -*/ -#ifndef SQLITE_OMIT_SHARED_CACHE - void sqlite3BtreeEnter(Btree*); - void sqlite3BtreeEnterAll(sqlite3*); - int sqlite3BtreeSharable(Btree*); - void sqlite3BtreeEnterCursor(BtCursor*); - int sqlite3BtreeConnectionCount(Btree*); -#else -# define sqlite3BtreeEnter(X) -# define sqlite3BtreeEnterAll(X) -# define sqlite3BtreeSharable(X) 0 -# define sqlite3BtreeEnterCursor(X) -# define sqlite3BtreeConnectionCount(X) 1 -#endif - -#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE - void sqlite3BtreeLeave(Btree*); - void sqlite3BtreeLeaveCursor(BtCursor*); - void sqlite3BtreeLeaveAll(sqlite3*); -#ifndef NDEBUG - /* These routines are used inside assert() statements only. */ - int sqlite3BtreeHoldsMutex(Btree*); - int sqlite3BtreeHoldsAllMutexes(sqlite3*); - int sqlite3SchemaMutexHeld(sqlite3*,int,Schema*); -#endif -#else - -# define sqlite3BtreeLeave(X) -# define sqlite3BtreeLeaveCursor(X) -# define sqlite3BtreeLeaveAll(X) - -# define sqlite3BtreeHoldsMutex(X) 1 -# define sqlite3BtreeHoldsAllMutexes(X) 1 -# define sqlite3SchemaMutexHeld(X,Y,Z) 1 -#endif - - -#endif /* SQLITE_BTREE_H */ diff --git a/source/libs/tdb/src/sqliteinc/btreeInt.h b/source/libs/tdb/src/sqliteinc/btreeInt.h deleted file mode 100644 index 1076fd8f2c..0000000000 --- a/source/libs/tdb/src/sqliteinc/btreeInt.h +++ /dev/null @@ -1,729 +0,0 @@ -/* -** 2004 April 6 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file implements an external (disk-based) database using BTrees. -** For a detailed discussion of BTrees, refer to -** -** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: -** "Sorting And Searching", pages 473-480. Addison-Wesley -** Publishing Company, Reading, Massachusetts. -** -** The basic idea is that each page of the file contains N database -** entries and N+1 pointers to subpages. -** -** ---------------------------------------------------------------- -** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N-1) | Ptr(N) | -** ---------------------------------------------------------------- -** -** All of the keys on the page that Ptr(0) points to have values less -** than Key(0). All of the keys on page Ptr(1) and its subpages have -** values greater than Key(0) and less than Key(1). All of the keys -** on Ptr(N) and its subpages have values greater than Key(N-1). And -** so forth. -** -** Finding a particular key requires reading O(log(M)) pages from the -** disk where M is the number of entries in the tree. -** -** In this implementation, a single file can hold one or more separate -** BTrees. Each BTree is identified by the index of its root page. The -** key and data for any entry are combined to form the "payload". A -** fixed amount of payload can be carried directly on the database -** page. If the payload is larger than the preset amount then surplus -** bytes are stored on overflow pages. The payload for an entry -** and the preceding pointer are combined to form a "Cell". Each -** page has a small header which contains the Ptr(N) pointer and other -** information such as the size of key and data. -** -** FORMAT DETAILS -** -** The file is divided into pages. The first page is called page 1, -** the second is page 2, and so forth. A page number of zero indicates -** "no such page". The page size can be any power of 2 between 512 and 65536. -** Each page can be either a btree page, a freelist page, an overflow -** page, or a pointer-map page. -** -** The first page is always a btree page. The first 100 bytes of the first -** page contain a special header (the "file header") that describes the file. -** The format of the file header is as follows: -** -** OFFSET SIZE DESCRIPTION -** 0 16 Header string: "SQLite format 3\000" -** 16 2 Page size in bytes. (1 means 65536) -** 18 1 File format write version -** 19 1 File format read version -** 20 1 Bytes of unused space at the end of each page -** 21 1 Max embedded payload fraction (must be 64) -** 22 1 Min embedded payload fraction (must be 32) -** 23 1 Min leaf payload fraction (must be 32) -** 24 4 File change counter -** 28 4 Reserved for future use -** 32 4 First freelist page -** 36 4 Number of freelist pages in the file -** 40 60 15 4-byte meta values passed to higher layers -** -** 40 4 Schema cookie -** 44 4 File format of schema layer -** 48 4 Size of page cache -** 52 4 Largest root-page (auto/incr_vacuum) -** 56 4 1=UTF-8 2=UTF16le 3=UTF16be -** 60 4 User version -** 64 4 Incremental vacuum mode -** 68 4 Application-ID -** 72 20 unused -** 92 4 The version-valid-for number -** 96 4 SQLITE_VERSION_NUMBER -** -** All of the integer values are big-endian (most significant byte first). -** -** The file change counter is incremented when the database is changed -** This counter allows other processes to know when the file has changed -** and thus when they need to flush their cache. -** -** The max embedded payload fraction is the amount of the total usable -** space in a page that can be consumed by a single cell for standard -** B-tree (non-LEAFDATA) tables. A value of 255 means 100%. The default -** is to limit the maximum cell size so that at least 4 cells will fit -** on one page. Thus the default max embedded payload fraction is 64. -** -** If the payload for a cell is larger than the max payload, then extra -** payload is spilled to overflow pages. Once an overflow page is allocated, -** as many bytes as possible are moved into the overflow pages without letting -** the cell size drop below the min embedded payload fraction. -** -** The min leaf payload fraction is like the min embedded payload fraction -** except that it applies to leaf nodes in a LEAFDATA tree. The maximum -** payload fraction for a LEAFDATA tree is always 100% (or 255) and it -** not specified in the header. -** -** Each btree pages is divided into three sections: The header, the -** cell pointer array, and the cell content area. Page 1 also has a 100-byte -** file header that occurs before the page header. -** -** |----------------| -** | file header | 100 bytes. Page 1 only. -** |----------------| -** | page header | 8 bytes for leaves. 12 bytes for interior nodes -** |----------------| -** | cell pointer | | 2 bytes per cell. Sorted order. -** | array | | Grows downward -** | | v -** |----------------| -** | unallocated | -** | space | -** |----------------| ^ Grows upwards -** | cell content | | Arbitrary order interspersed with freeblocks. -** | area | | and free space fragments. -** |----------------| -** -** The page headers looks like this: -** -** OFFSET SIZE DESCRIPTION -** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf -** 1 2 byte offset to the first freeblock -** 3 2 number of cells on this page -** 5 2 first byte of the cell content area -** 7 1 number of fragmented free bytes -** 8 4 Right child (the Ptr(N) value). Omitted on leaves. -** -** The flags define the format of this btree page. The leaf flag means that -** this page has no children. The zerodata flag means that this page carries -** only keys and no data. The intkey flag means that the key is an integer -** which is stored in the key size entry of the cell header rather than in -** the payload area. -** -** The cell pointer array begins on the first byte after the page header. -** The cell pointer array contains zero or more 2-byte numbers which are -** offsets from the beginning of the page to the cell content in the cell -** content area. The cell pointers occur in sorted order. The system strives -** to keep free space after the last cell pointer so that new cells can -** be easily added without having to defragment the page. -** -** Cell content is stored at the very end of the page and grows toward the -** beginning of the page. -** -** Unused space within the cell content area is collected into a linked list of -** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset -** to the first freeblock is given in the header. Freeblocks occur in -** increasing order. Because a freeblock must be at least 4 bytes in size, -** any group of 3 or fewer unused bytes in the cell content area cannot -** exist on the freeblock chain. A group of 3 or fewer free bytes is called -** a fragment. The total number of bytes in all fragments is recorded. -** in the page header at offset 7. -** -** SIZE DESCRIPTION -** 2 Byte offset of the next freeblock -** 2 Bytes in this freeblock -** -** Cells are of variable length. Cells are stored in the cell content area at -** the end of the page. Pointers to the cells are in the cell pointer array -** that immediately follows the page header. Cells is not necessarily -** contiguous or in order, but cell pointers are contiguous and in order. -** -** Cell content makes use of variable length integers. A variable -** length integer is 1 to 9 bytes where the lower 7 bits of each -** byte are used. The integer consists of all bytes that have bit 8 set and -** the first byte with bit 8 clear. The most significant byte of the integer -** appears first. A variable-length integer may not be more than 9 bytes long. -** As a special case, all 8 bytes of the 9th byte are used as data. This -** allows a 64-bit integer to be encoded in 9 bytes. -** -** 0x00 becomes 0x00000000 -** 0x7f becomes 0x0000007f -** 0x81 0x00 becomes 0x00000080 -** 0x82 0x00 becomes 0x00000100 -** 0x80 0x7f becomes 0x0000007f -** 0x8a 0x91 0xd1 0xac 0x78 becomes 0x12345678 -** 0x81 0x81 0x81 0x81 0x01 becomes 0x10204081 -** -** Variable length integers are used for rowids and to hold the number of -** bytes of key and data in a btree cell. -** -** The content of a cell looks like this: -** -** SIZE DESCRIPTION -** 4 Page number of the left child. Omitted if leaf flag is set. -** var Number of bytes of data. Omitted if the zerodata flag is set. -** var Number of bytes of key. Or the key itself if intkey flag is set. -** * Payload -** 4 First page of the overflow chain. Omitted if no overflow -** -** Overflow pages form a linked list. Each page except the last is completely -** filled with data (pagesize - 4 bytes). The last page can have as little -** as 1 byte of data. -** -** SIZE DESCRIPTION -** 4 Page number of next overflow page -** * Data -** -** Freelist pages come in two subtypes: trunk pages and leaf pages. The -** file header points to the first in a linked list of trunk page. Each trunk -** page points to multiple leaf pages. The content of a leaf page is -** unspecified. A trunk page looks like this: -** -** SIZE DESCRIPTION -** 4 Page number of next trunk page -** 4 Number of leaf pointers on this page -** * zero or more pages numbers of leaves -*/ -#include "sqliteInt.h" - - -/* The following value is the maximum cell size assuming a maximum page -** size give above. -*/ -#define MX_CELL_SIZE(pBt) ((int)(pBt->pageSize-8)) - -/* The maximum number of cells on a single page of the database. This -** assumes a minimum cell size of 6 bytes (4 bytes for the cell itself -** plus 2 bytes for the index to the cell in the page header). Such -** small cells will be rare, but they are possible. -*/ -#define MX_CELL(pBt) ((pBt->pageSize-8)/6) - -/* Forward declarations */ -typedef struct MemPage MemPage; -typedef struct BtLock BtLock; -typedef struct CellInfo CellInfo; - -/* -** This is a magic string that appears at the beginning of every -** SQLite database in order to identify the file as a real database. -** -** You can change this value at compile-time by specifying a -** -DSQLITE_FILE_HEADER="..." on the compiler command-line. The -** header must be exactly 16 bytes including the zero-terminator so -** the string itself should be 15 characters long. If you change -** the header, then your custom library will not be able to read -** databases generated by the standard tools and the standard tools -** will not be able to read databases created by your custom library. -*/ -#ifndef SQLITE_FILE_HEADER /* 123456789 123456 */ -# define SQLITE_FILE_HEADER "SQLite format 3" -#endif - -/* -** Page type flags. An ORed combination of these flags appear as the -** first byte of on-disk image of every BTree page. -*/ -#define PTF_INTKEY 0x01 -#define PTF_ZERODATA 0x02 -#define PTF_LEAFDATA 0x04 -#define PTF_LEAF 0x08 - -/* -** An instance of this object stores information about each a single database -** page that has been loaded into memory. The information in this object -** is derived from the raw on-disk page content. -** -** As each database page is loaded into memory, the pager allocats an -** instance of this object and zeros the first 8 bytes. (This is the -** "extra" information associated with each page of the pager.) -** -** Access to all fields of this structure is controlled by the mutex -** stored in MemPage.pBt->mutex. -*/ -struct MemPage { - u8 isInit; /* True if previously initialized. MUST BE FIRST! */ - u8 intKey; /* True if table b-trees. False for index b-trees */ - u8 intKeyLeaf; /* True if the leaf of an intKey table */ - Pgno pgno; /* Page number for this page */ - /* Only the first 8 bytes (above) are zeroed by pager.c when a new page - ** is allocated. All fields that follow must be initialized before use */ - u8 leaf; /* True if a leaf page */ - u8 hdrOffset; /* 100 for page 1. 0 otherwise */ - u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */ - u8 max1bytePayload; /* min(maxLocal,127) */ - u8 nOverflow; /* Number of overflow cell bodies in aCell[] */ - u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */ - u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */ - u16 cellOffset; /* Index in aData of first cell pointer */ - int nFree; /* Number of free bytes on the page. -1 for unknown */ - u16 nCell; /* Number of cells on this page, local and ovfl */ - u16 maskPage; /* Mask for page offset */ - u16 aiOvfl[4]; /* Insert the i-th overflow cell before the aiOvfl-th - ** non-overflow cell */ - u8 *apOvfl[4]; /* Pointers to the body of overflow cells */ - BtShared *pBt; /* Pointer to BtShared that this page is part of */ - u8 *aData; /* Pointer to disk image of the page data */ - u8 *aDataEnd; /* One byte past the end of usable data */ - u8 *aCellIdx; /* The cell index area */ - u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ - DbPage *pDbPage; /* Pager page handle */ - u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ - void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ -}; - -/* -** A linked list of the following structures is stored at BtShared.pLock. -** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor -** is opened on the table with root page BtShared.iTable. Locks are removed -** from this list when a transaction is committed or rolled back, or when -** a btree handle is closed. -*/ -struct BtLock { - Btree *pBtree; /* Btree handle holding this lock */ - Pgno iTable; /* Root page of table */ - u8 eLock; /* READ_LOCK or WRITE_LOCK */ - BtLock *pNext; /* Next in BtShared.pLock list */ -}; - -/* Candidate values for BtLock.eLock */ -#define READ_LOCK 1 -#define WRITE_LOCK 2 - -/* A Btree handle -** -** A database connection contains a pointer to an instance of -** this object for every database file that it has open. This structure -** is opaque to the database connection. The database connection cannot -** see the internals of this structure and only deals with pointers to -** this structure. -** -** For some database files, the same underlying database cache might be -** shared between multiple connections. In that case, each connection -** has it own instance of this object. But each instance of this object -** points to the same BtShared object. The database cache and the -** schema associated with the database file are all contained within -** the BtShared object. -** -** All fields in this structure are accessed under sqlite3.mutex. -** The pBt pointer itself may not be changed while there exists cursors -** in the referenced BtShared that point back to this Btree since those -** cursors have to go through this Btree to find their BtShared and -** they often do so without holding sqlite3.mutex. -*/ -struct Btree { - sqlite3 *db; /* The database connection holding this btree */ - BtShared *pBt; /* Sharable content of this btree */ - u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ - u8 sharable; /* True if we can share pBt with another db */ - u8 locked; /* True if db currently has pBt locked */ - u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ - int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ - int nBackup; /* Number of backup operations reading this btree */ - u32 iBDataVersion; /* Combines with pBt->pPager->iDataVersion */ - Btree *pNext; /* List of other sharable Btrees from the same db */ - Btree *pPrev; /* Back pointer of the same list */ -#ifdef SQLITE_DEBUG - u64 nSeek; /* Calls to sqlite3BtreeMovetoUnpacked() */ -#endif -#ifndef SQLITE_OMIT_SHARED_CACHE - BtLock lock; /* Object used to lock page 1 */ -#endif -}; - -/* -** Btree.inTrans may take one of the following values. -** -** If the shared-data extension is enabled, there may be multiple users -** of the Btree structure. At most one of these may open a write transaction, -** but any number may have active read transactions. -** -** These values must match SQLITE_TXN_NONE, SQLITE_TXN_READ, and -** SQLITE_TXN_WRITE -*/ -#define TRANS_NONE 0 -#define TRANS_READ 1 -#define TRANS_WRITE 2 - -#if TRANS_NONE!=SQLITE_TXN_NONE -# error wrong numeric code for no-transaction -#endif -#if TRANS_READ!=SQLITE_TXN_READ -# error wrong numeric code for read-transaction -#endif -#if TRANS_WRITE!=SQLITE_TXN_WRITE -# error wrong numeric code for write-transaction -#endif - - -/* -** An instance of this object represents a single database file. -** -** A single database file can be in use at the same time by two -** or more database connections. When two or more connections are -** sharing the same database file, each connection has it own -** private Btree object for the file and each of those Btrees points -** to this one BtShared object. BtShared.nRef is the number of -** connections currently sharing this database file. -** -** Fields in this structure are accessed under the BtShared.mutex -** mutex, except for nRef and pNext which are accessed under the -** global SQLITE_MUTEX_STATIC_MAIN mutex. The pPager field -** may not be modified once it is initially set as long as nRef>0. -** The pSchema field may be set once under BtShared.mutex and -** thereafter is unchanged as long as nRef>0. -** -** isPending: -** -** If a BtShared client fails to obtain a write-lock on a database -** table (because there exists one or more read-locks on the table), -** the shared-cache enters 'pending-lock' state and isPending is -** set to true. -** -** The shared-cache leaves the 'pending lock' state when either of -** the following occur: -** -** 1) The current writer (BtShared.pWriter) concludes its transaction, OR -** 2) The number of locks held by other connections drops to zero. -** -** while in the 'pending-lock' state, no connection may start a new -** transaction. -** -** This feature is included to help prevent writer-starvation. -*/ -struct BtShared { - Pager *pPager; /* The page cache */ - sqlite3 *db; /* Database connection currently using this Btree */ - BtCursor *pCursor; /* A list of all open cursors */ - MemPage *pPage1; /* First page of the database */ - u8 openFlags; /* Flags to sqlite3BtreeOpen() */ -#ifndef SQLITE_OMIT_AUTOVACUUM - u8 autoVacuum; /* True if auto-vacuum is enabled */ - u8 incrVacuum; /* True if incr-vacuum is enabled */ - u8 bDoTruncate; /* True to truncate db on commit */ -#endif - u8 inTransaction; /* Transaction state */ - u8 max1bytePayload; /* Maximum first byte of cell for a 1-byte payload */ - u8 nReserveWanted; /* Desired number of extra bytes per page */ - u16 btsFlags; /* Boolean parameters. See BTS_* macros below */ - u16 maxLocal; /* Maximum local payload in non-LEAFDATA tables */ - u16 minLocal; /* Minimum local payload in non-LEAFDATA tables */ - u16 maxLeaf; /* Maximum local payload in a LEAFDATA table */ - u16 minLeaf; /* Minimum local payload in a LEAFDATA table */ - u32 pageSize; /* Total number of bytes on a page */ - u32 usableSize; /* Number of usable bytes on each page */ - int nTransaction; /* Number of open transactions (read + write) */ - u32 nPage; /* Number of pages in the database */ - void *pSchema; /* Pointer to space allocated by sqlite3BtreeSchema() */ - void (*xFreeSchema)(void*); /* Destructor for BtShared.pSchema */ - sqlite3_mutex *mutex; /* Non-recursive mutex required to access this object */ - Bitvec *pHasContent; /* Set of pages moved to free-list this transaction */ -#ifndef SQLITE_OMIT_SHARED_CACHE - int nRef; /* Number of references to this structure */ - BtShared *pNext; /* Next on a list of sharable BtShared structs */ - BtLock *pLock; /* List of locks held on this shared-btree struct */ - Btree *pWriter; /* Btree with currently open write transaction */ -#endif - u8 *pTmpSpace; /* Temp space sufficient to hold a single cell */ - int nPreformatSize; /* Size of last cell written by TransferRow() */ -}; - -/* -** Allowed values for BtShared.btsFlags -*/ -#define BTS_READ_ONLY 0x0001 /* Underlying file is readonly */ -#define BTS_PAGESIZE_FIXED 0x0002 /* Page size can no longer be changed */ -#define BTS_SECURE_DELETE 0x0004 /* PRAGMA secure_delete is enabled */ -#define BTS_OVERWRITE 0x0008 /* Overwrite deleted content with zeros */ -#define BTS_FAST_SECURE 0x000c /* Combination of the previous two */ -#define BTS_INITIALLY_EMPTY 0x0010 /* Database was empty at trans start */ -#define BTS_NO_WAL 0x0020 /* Do not open write-ahead-log files */ -#define BTS_EXCLUSIVE 0x0040 /* pWriter has an exclusive lock */ -#define BTS_PENDING 0x0080 /* Waiting for read-locks to clear */ - -/* -** An instance of the following structure is used to hold information -** about a cell. The parseCellPtr() function fills in this structure -** based on information extract from the raw disk page. -*/ -struct CellInfo { - i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */ - u8 *pPayload; /* Pointer to the start of payload */ - u32 nPayload; /* Bytes of payload */ - u16 nLocal; /* Amount of payload held locally, not on overflow */ - u16 nSize; /* Size of the cell content on the main b-tree page */ -}; - -/* -** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than -** this will be declared corrupt. This value is calculated based on a -** maximum database size of 2^31 pages a minimum fanout of 2 for a -** root-node and 3 for all other internal nodes. -** -** If a tree that appears to be taller than this is encountered, it is -** assumed that the database is corrupt. -*/ -#define BTCURSOR_MAX_DEPTH 20 - -/* -** A cursor is a pointer to a particular entry within a particular -** b-tree within a database file. -** -** The entry is identified by its MemPage and the index in -** MemPage.aCell[] of the entry. -** -** A single database file can be shared by two more database connections, -** but cursors cannot be shared. Each cursor is associated with a -** particular database connection identified BtCursor.pBtree.db. -** -** Fields in this structure are accessed under the BtShared.mutex -** found at self->pBt->mutex. -** -** skipNext meaning: -** The meaning of skipNext depends on the value of eState: -** -** eState Meaning of skipNext -** VALID skipNext is meaningless and is ignored -** INVALID skipNext is meaningless and is ignored -** SKIPNEXT sqlite3BtreeNext() is a no-op if skipNext>0 and -** sqlite3BtreePrevious() is no-op if skipNext<0. -** REQUIRESEEK restoreCursorPosition() restores the cursor to -** eState=SKIPNEXT if skipNext!=0 -** FAULT skipNext holds the cursor fault error code. -*/ -struct BtCursor { - u8 eState; /* One of the CURSOR_XXX constants (see below) */ - u8 curFlags; /* zero or more BTCF_* flags defined below */ - u8 curPagerFlags; /* Flags to send to sqlite3PagerGet() */ - u8 hints; /* As configured by CursorSetHints() */ - int skipNext; /* Prev() is noop if negative. Next() is noop if positive. - ** Error code if eState==CURSOR_FAULT */ - Btree *pBtree; /* The Btree to which this cursor belongs */ - Pgno *aOverflow; /* Cache of overflow page locations */ - void *pKey; /* Saved key that was cursor last known position */ - /* All fields above are zeroed when the cursor is allocated. See - ** sqlite3BtreeCursorZero(). Fields that follow must be manually - ** initialized. */ -#define BTCURSOR_FIRST_UNINIT pBt /* Name of first uninitialized field */ - BtShared *pBt; /* The BtShared this cursor points to */ - BtCursor *pNext; /* Forms a linked list of all cursors */ - CellInfo info; /* A parse of the cell we are pointing at */ - i64 nKey; /* Size of pKey, or last integer key */ - Pgno pgnoRoot; /* The root page of this tree */ - i8 iPage; /* Index of current page in apPage */ - u8 curIntKey; /* Value of apPage[0]->intKey */ - u16 ix; /* Current index for apPage[iPage] */ - u16 aiIdx[BTCURSOR_MAX_DEPTH-1]; /* Current index in apPage[i] */ - struct KeyInfo *pKeyInfo; /* Arg passed to comparison function */ - MemPage *pPage; /* Current page */ - MemPage *apPage[BTCURSOR_MAX_DEPTH-1]; /* Stack of parents of current page */ -}; - -/* -** Legal values for BtCursor.curFlags -*/ -#define BTCF_WriteFlag 0x01 /* True if a write cursor */ -#define BTCF_ValidNKey 0x02 /* True if info.nKey is valid */ -#define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ -#define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ -#define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ -#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ -#define BTCF_Pinned 0x40 /* Cursor is busy and cannot be moved */ - -/* -** Potential values for BtCursor.eState. -** -** CURSOR_INVALID: -** Cursor does not point to a valid entry. This can happen (for example) -** because the table is empty or because BtreeCursorFirst() has not been -** called. -** -** CURSOR_VALID: -** Cursor points to a valid entry. getPayload() etc. may be called. -** -** CURSOR_SKIPNEXT: -** Cursor is valid except that the Cursor.skipNext field is non-zero -** indicating that the next sqlite3BtreeNext() or sqlite3BtreePrevious() -** operation should be a no-op. -** -** CURSOR_REQUIRESEEK: -** The table that this cursor was opened on still exists, but has been -** modified since the cursor was last used. The cursor position is saved -** in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in -** this state, restoreCursorPosition() can be called to attempt to -** seek the cursor to the saved position. -** -** CURSOR_FAULT: -** An unrecoverable error (an I/O error or a malloc failure) has occurred -** on a different connection that shares the BtShared cache with this -** cursor. The error has left the cache in an inconsistent state. -** Do nothing else with this cursor. Any attempt to use the cursor -** should return the error code stored in BtCursor.skipNext -*/ -#define CURSOR_VALID 0 -#define CURSOR_INVALID 1 -#define CURSOR_SKIPNEXT 2 -#define CURSOR_REQUIRESEEK 3 -#define CURSOR_FAULT 4 - -/* -** The database page the PENDING_BYTE occupies. This page is never used. -*/ -# define PENDING_BYTE_PAGE(pBt) PAGER_MJ_PGNO(pBt) - -/* -** These macros define the location of the pointer-map entry for a -** database page. The first argument to each is the number of usable -** bytes on each page of the database (often 1024). The second is the -** page number to look up in the pointer map. -** -** PTRMAP_PAGENO returns the database page number of the pointer-map -** page that stores the required pointer. PTRMAP_PTROFFSET returns -** the offset of the requested map entry. -** -** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page, -** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be -** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements -** this test. -*/ -#define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno) -#define PTRMAP_PTROFFSET(pgptrmap, pgno) (5*(pgno-pgptrmap-1)) -#define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno)) - -/* -** The pointer map is a lookup table that identifies the parent page for -** each child page in the database file. The parent page is the page that -** contains a pointer to the child. Every page in the database contains -** 0 or 1 parent pages. (In this context 'database page' refers -** to any page that is not part of the pointer map itself.) Each pointer map -** entry consists of a single byte 'type' and a 4 byte parent page number. -** The PTRMAP_XXX identifiers below are the valid types. -** -** The purpose of the pointer map is to facility moving pages from one -** position in the file to another as part of autovacuum. When a page -** is moved, the pointer in its parent must be updated to point to the -** new location. The pointer map is used to locate the parent page quickly. -** -** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not -** used in this case. -** -** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number -** is not used in this case. -** -** PTRMAP_OVERFLOW1: The database page is the first page in a list of -** overflow pages. The page number identifies the page that -** contains the cell with a pointer to this overflow page. -** -** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of -** overflow pages. The page-number identifies the previous -** page in the overflow page list. -** -** PTRMAP_BTREE: The database page is a non-root btree page. The page number -** identifies the parent page in the btree. -*/ -#define PTRMAP_ROOTPAGE 1 -#define PTRMAP_FREEPAGE 2 -#define PTRMAP_OVERFLOW1 3 -#define PTRMAP_OVERFLOW2 4 -#define PTRMAP_BTREE 5 - -/* A bunch of assert() statements to check the transaction state variables -** of handle p (type Btree*) are internally consistent. -*/ -#define btreeIntegrity(p) \ - assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \ - assert( p->pBt->inTransaction>=p->inTrans ); - - -/* -** The ISAUTOVACUUM macro is used within balance_nonroot() to determine -** if the database supports auto-vacuum or not. Because it is used -** within an expression that is an argument to another macro -** (sqliteMallocRaw), it is not possible to use conditional compilation. -** So, this macro is defined instead. -*/ -#ifndef SQLITE_OMIT_AUTOVACUUM -#define ISAUTOVACUUM (pBt->autoVacuum) -#else -#define ISAUTOVACUUM 0 -#endif - - -/* -** This structure is passed around through all the sanity checking routines -** in order to keep track of some global state information. -** -** The aRef[] array is allocated so that there is 1 bit for each page in -** the database. As the integrity-check proceeds, for each page used in -** the database the corresponding bit is set. This allows integrity-check to -** detect pages that are used twice and orphaned pages (both of which -** indicate corruption). -*/ -typedef struct IntegrityCk IntegrityCk; -struct IntegrityCk { - BtShared *pBt; /* The tree being checked out */ - Pager *pPager; /* The associated pager. Also accessible by pBt->pPager */ - u8 *aPgRef; /* 1 bit per page in the db (see above) */ - Pgno nPage; /* Number of pages in the database */ - int mxErr; /* Stop accumulating errors when this reaches zero */ - int nErr; /* Number of messages written to zErrMsg so far */ - int bOomFault; /* A memory allocation error has occurred */ - const char *zPfx; /* Error message prefix */ - Pgno v1; /* Value for first %u substitution in zPfx */ - int v2; /* Value for second %d substitution in zPfx */ - StrAccum errMsg; /* Accumulate the error message text here */ - u32 *heap; /* Min-heap used for analyzing cell coverage */ - sqlite3 *db; /* Database connection running the check */ -}; - -/* -** Routines to read or write a two- and four-byte big-endian integer values. -*/ -#define get2byte(x) ((x)[0]<<8 | (x)[1]) -#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v)) -#define get4byte sqlite3Get4byte -#define put4byte sqlite3Put4byte - -/* -** get2byteAligned(), unlike get2byte(), requires that its argument point to a -** two-byte aligned address. get2bytea() is only used for accessing the -** cell addresses in a btree header. -*/ -#if SQLITE_BYTEORDER==4321 -# define get2byteAligned(x) (*(u16*)(x)) -#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000 -# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) -#elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 -# define get2byteAligned(x) _byteswap_ushort(*(u16*)(x)) -#else -# define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) -#endif diff --git a/source/libs/tdb/src/sqliteinc/pager.h b/source/libs/tdb/src/sqliteinc/pager.h deleted file mode 100644 index 93c870f882..0000000000 --- a/source/libs/tdb/src/sqliteinc/pager.h +++ /dev/null @@ -1,241 +0,0 @@ -/* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the interface that the sqlite page cache -** subsystem. The page cache subsystem reads and writes a file a page -** at a time and provides a journal for rollback. -*/ - -#ifndef SQLITE_PAGER_H -#define SQLITE_PAGER_H - -/* -** Default maximum size for persistent journal files. A negative -** value means no limit. This value may be overridden using the -** sqlite3PagerJournalSizeLimit() API. See also "PRAGMA journal_size_limit". -*/ -#ifndef SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT - #define SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT -1 -#endif - -/* -** The type used to represent a page number. The first page in a file -** is called page 1. 0 is used to represent "not a page". -*/ -typedef u32 Pgno; - -/* -** Each open file is managed by a separate instance of the "Pager" structure. -*/ -typedef struct Pager Pager; - -/* -** Handle type for pages. -*/ -typedef struct PgHdr DbPage; - -// /* -// ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is -// ** reserved for working around a windows/posix incompatibility). It is -// ** used in the journal to signify that the remainder of the journal file -// ** is devoted to storing a super-journal name - there are no more pages to -// ** roll back. See comments for function writeSuperJournal() in pager.c -// ** for details. -// */ -// #define PAGER_MJ_PGNO(x) ((Pgno)((PENDING_BYTE/((x)->pageSize))+1)) - -/* -** Allowed values for the flags parameter to sqlite3PagerOpen(). -** -** NOTE: These values must match the corresponding BTREE_ values in btree.h. -*/ -#define PAGER_OMIT_JOURNAL 0x0001 /* Do not use a rollback journal */ -#define PAGER_MEMORY 0x0002 /* In-memory database */ - -/* -** Valid values for the second argument to sqlite3PagerLockingMode(). -*/ -#define PAGER_LOCKINGMODE_QUERY -1 -#define PAGER_LOCKINGMODE_NORMAL 0 -#define PAGER_LOCKINGMODE_EXCLUSIVE 1 - -/* -** Numeric constants that encode the journalmode. -** -** The numeric values encoded here (other than PAGER_JOURNALMODE_QUERY) -** are exposed in the API via the "PRAGMA journal_mode" command and -** therefore cannot be changed without a compatibility break. -*/ -#define PAGER_JOURNALMODE_QUERY (-1) /* Query the value of journalmode */ -#define PAGER_JOURNALMODE_DELETE 0 /* Commit by deleting journal file */ -#define PAGER_JOURNALMODE_PERSIST 1 /* Commit by zeroing journal header */ -#define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ -#define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ -#define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ -#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ - -/* -** Flags that make up the mask passed to sqlite3PagerGet(). -*/ -#define PAGER_GET_NOCONTENT 0x01 /* Do not load data from disk */ -#define PAGER_GET_READONLY 0x02 /* Read-only page is acceptable */ - -/* -** Flags for sqlite3PagerSetFlags() -** -** Value constraints (enforced via assert()): -** PAGER_FULLFSYNC == SQLITE_FullFSync -** PAGER_CKPT_FULLFSYNC == SQLITE_CkptFullFSync -** PAGER_CACHE_SPILL == SQLITE_CacheSpill -*/ -#define PAGER_SYNCHRONOUS_OFF 0x01 /* PRAGMA synchronous=OFF */ -#define PAGER_SYNCHRONOUS_NORMAL 0x02 /* PRAGMA synchronous=NORMAL */ -#define PAGER_SYNCHRONOUS_FULL 0x03 /* PRAGMA synchronous=FULL */ -#define PAGER_SYNCHRONOUS_EXTRA 0x04 /* PRAGMA synchronous=EXTRA */ -#define PAGER_SYNCHRONOUS_MASK 0x07 /* Mask for four values above */ -#define PAGER_FULLFSYNC 0x08 /* PRAGMA fullfsync=ON */ -#define PAGER_CKPT_FULLFSYNC 0x10 /* PRAGMA checkpoint_fullfsync=ON */ -#define PAGER_CACHESPILL 0x20 /* PRAGMA cache_spill=ON */ -#define PAGER_FLAGS_MASK 0x38 /* All above except SYNCHRONOUS */ - -/* -** The remainder of this file contains the declarations of the functions -** that make up the Pager sub-system API. See source code comments for -** a detailed description of each routine. -*/ - -/* Open and close a Pager connection. */ -int sqlite3PagerOpen( - Pager **ppPager, - const char*, - int, - int, - int, - void(*)(DbPage*) -); -int sqlite3PagerClose(Pager *pPager, sqlite3*); -// int sqlite3PagerReadFileheader(Pager*, int, unsigned char*); - -// /* Functions used to configure a Pager object. */ -// void sqlite3PagerSetBusyHandler(Pager*, int(*)(void *), void *); -// int sqlite3PagerSetPagesize(Pager*, u32*, int); -// Pgno sqlite3PagerMaxPageCount(Pager*, Pgno); -// void sqlite3PagerSetCachesize(Pager*, int); -// int sqlite3PagerSetSpillsize(Pager*, int); -// void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64); -// void sqlite3PagerShrink(Pager*); -// void sqlite3PagerSetFlags(Pager*,unsigned); -// int sqlite3PagerLockingMode(Pager *, int); -// int sqlite3PagerSetJournalMode(Pager *, int); -// int sqlite3PagerGetJournalMode(Pager*); -// int sqlite3PagerOkToChangeJournalMode(Pager*); -// i64 sqlite3PagerJournalSizeLimit(Pager *, i64); -// sqlite3_backup **sqlite3PagerBackupPtr(Pager*); -// int sqlite3PagerFlush(Pager*); - -// /* Functions used to obtain and release page references. */ -// int sqlite3PagerGet(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag); -// DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno); -// void sqlite3PagerRef(DbPage*); -// void sqlite3PagerUnref(DbPage*); -// void sqlite3PagerUnrefNotNull(DbPage*); -// void sqlite3PagerUnrefPageOne(DbPage*); - -// /* Operations on page references. */ -// int sqlite3PagerWrite(DbPage*); -// void sqlite3PagerDontWrite(DbPage*); -// int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int); -// int sqlite3PagerPageRefcount(DbPage*); -// void *sqlite3PagerGetData(DbPage *); -// void *sqlite3PagerGetExtra(DbPage *); - -// /* Functions used to manage pager transactions and savepoints. */ -// void sqlite3PagerPagecount(Pager*, int*); -// int sqlite3PagerBegin(Pager*, int exFlag, int); -// int sqlite3PagerCommitPhaseOne(Pager*,const char *zSuper, int); -// int sqlite3PagerExclusiveLock(Pager*); -// int sqlite3PagerSync(Pager *pPager, const char *zSuper); -// int sqlite3PagerCommitPhaseTwo(Pager*); -// int sqlite3PagerRollback(Pager*); -// int sqlite3PagerOpenSavepoint(Pager *pPager, int n); -// int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); -// int sqlite3PagerSharedLock(Pager *pPager); - -// #ifndef SQLITE_OMIT_WAL -// int sqlite3PagerCheckpoint(Pager *pPager, sqlite3*, int, int*, int*); -// int sqlite3PagerWalSupported(Pager *pPager); -// int sqlite3PagerWalCallback(Pager *pPager); -// int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); -// int sqlite3PagerCloseWal(Pager *pPager, sqlite3*); -// # ifdef SQLITE_ENABLE_SNAPSHOT -// int sqlite3PagerSnapshotGet(Pager*, sqlite3_snapshot **ppSnapshot); -// int sqlite3PagerSnapshotOpen(Pager*, sqlite3_snapshot *pSnapshot); -// int sqlite3PagerSnapshotRecover(Pager *pPager); -// int sqlite3PagerSnapshotCheck(Pager *pPager, sqlite3_snapshot *pSnapshot); -// void sqlite3PagerSnapshotUnlock(Pager *pPager); -// # endif -// #endif - -// #if !defined(SQLITE_OMIT_WAL) && defined(SQLITE_ENABLE_SETLK_TIMEOUT) -// int sqlite3PagerWalWriteLock(Pager*, int); -// void sqlite3PagerWalDb(Pager*, sqlite3*); -// #else -// # define sqlite3PagerWalWriteLock(y,z) SQLITE_OK -// # define sqlite3PagerWalDb(x,y) -// #endif - -// #ifdef SQLITE_DIRECT_OVERFLOW_READ -// int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno); -// #endif - -// #ifdef SQLITE_ENABLE_ZIPVFS -// int sqlite3PagerWalFramesize(Pager *pPager); -// #endif - -// /* Functions used to query pager state and configuration. */ -// u8 sqlite3PagerIsreadonly(Pager*); -// u32 sqlite3PagerDataVersion(Pager*); -// #ifdef SQLITE_DEBUG -// int sqlite3PagerRefcount(Pager*); -// #endif -// int sqlite3PagerMemUsed(Pager*); -// const char *sqlite3PagerFilename(const Pager*, int); -// sqlite3_vfs *sqlite3PagerVfs(Pager*); -// sqlite3_file *sqlite3PagerFile(Pager*); -// sqlite3_file *sqlite3PagerJrnlFile(Pager*); -// const char *sqlite3PagerJournalname(Pager*); -// void *sqlite3PagerTempSpace(Pager*); -// int sqlite3PagerIsMemdb(Pager*); -// void sqlite3PagerCacheStat(Pager *, int, int, int *); -// void sqlite3PagerClearCache(Pager*); -// int sqlite3SectorSize(sqlite3_file *); - -// /* Functions used to truncate the database file. */ -// void sqlite3PagerTruncateImage(Pager*,Pgno); - -// void sqlite3PagerRekey(DbPage*, Pgno, u16); - -// /* Functions to support testing and debugging. */ -// #if !defined(NDEBUG) || defined(SQLITE_TEST) -// Pgno sqlite3PagerPagenumber(DbPage*); -// int sqlite3PagerIswriteable(DbPage*); -// #endif -// #ifdef SQLITE_TEST -// int *sqlite3PagerStats(Pager*); -// void sqlite3PagerRefdump(Pager*); -// void disable_simulated_io_errors(void); -// void enable_simulated_io_errors(void); -// #else -// # define disable_simulated_io_errors() -// # define enable_simulated_io_errors() -// #endif - -#endif /* SQLITE_PAGER_H */ diff --git a/source/libs/tdb/src/sqliteinc/pcache.h b/source/libs/tdb/src/sqliteinc/pcache.h deleted file mode 100644 index 62e36991f0..0000000000 --- a/source/libs/tdb/src/sqliteinc/pcache.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -** 2008 August 05 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the interface that the sqlite page cache -** subsystem. -*/ - -#ifndef _PCACHE_H_ - -typedef struct PgHdr PgHdr; -typedef struct PCache PCache; - -/* -** Every page in the cache is controlled by an instance of the following -** structure. -*/ -struct PgHdr { - sqlite3_pcache_page *pPage; /* Pcache object page handle */ - void * pData; /* Page data */ - void * pExtra; /* Extra content */ - PCache * pCache; /* PRIVATE: Cache that owns this page */ - PgHdr * pDirty; /* Transient list of dirty sorted by pgno */ - Pager * pPager; /* The pager this page is part of */ - Pgno pgno; /* Page number for this page */ -#ifdef SQLITE_CHECK_PAGES - u32 pageHash; /* Hash of page content */ -#endif - u16 flags; /* PGHDR flags defined below */ - - /********************************************************************** - ** Elements above, except pCache, are public. All that follow are - ** private to pcache.c and should not be accessed by other modules. - ** pCache is grouped with the public elements for efficiency. - */ - i16 nRef; /* Number of users of this page */ - PgHdr *pDirtyNext; /* Next element in list of dirty pages */ - PgHdr *pDirtyPrev; /* Previous element in list of dirty pages */ - /* NB: pDirtyNext and pDirtyPrev are undefined if the - ** PgHdr object is not dirty */ -}; - -/* Bit values for PgHdr.flags */ -#define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */ -#define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */ -#define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */ -#define PGHDR_NEED_SYNC \ - 0x008 /* Fsync the rollback journal before \ - ** writing this page to the database */ -#define PGHDR_DONT_WRITE 0x010 /* Do not write content to disk */ -#define PGHDR_MMAP 0x020 /* This is an mmap page object */ - -#define PGHDR_WAL_APPEND 0x040 /* Appended to wal file */ - -/* Initialize and shutdown the page cache subsystem */ -int sqlite3PcacheInitialize(void); -void sqlite3PcacheShutdown(void); - -/* Page cache buffer management: -** These routines implement SQLITE_CONFIG_PAGECACHE. -*/ -void sqlite3PCacheBufferSetup(void *, int sz, int n); - -/* Create a new pager cache. -** Under memory stress, invoke xStress to try to make pages clean. -** Only clean and unpinned pages can be reclaimed. -*/ -int sqlite3PcacheOpen(int szPage, /* Size of every page */ - int szExtra, /* Extra space associated with each page */ - int bPurgeable, /* True if pages are on backing store */ - int (*xStress)(void *, PgHdr *), /* Call to try to make pages clean */ - void * pStress, /* Argument to xStress */ - PCache *pToInit /* Preallocated space for the PCache */ -); - -/* Modify the page-size after the cache has been created. */ -int sqlite3PcacheSetPageSize(PCache *, int); - -/* Return the size in bytes of a PCache object. Used to preallocate -** storage space. -*/ -int sqlite3PcacheSize(void); - -/* One release per successful fetch. Page is pinned until released. -** Reference counted. -*/ -sqlite3_pcache_page *sqlite3PcacheFetch(PCache *, Pgno, int createFlag); -int sqlite3PcacheFetchStress(PCache *, Pgno, sqlite3_pcache_page **); -PgHdr * sqlite3PcacheFetchFinish(PCache *, Pgno, sqlite3_pcache_page *pPage); -void sqlite3PcacheRelease(PgHdr *); - -void sqlite3PcacheDrop(PgHdr *); /* Remove page from cache */ -void sqlite3PcacheMakeDirty(PgHdr *); /* Make sure page is marked dirty */ -void sqlite3PcacheMakeClean(PgHdr *); /* Mark a single page as clean */ -void sqlite3PcacheCleanAll(PCache *); /* Mark all dirty list pages as clean */ -void sqlite3PcacheClearWritable(PCache *); - -/* Change a page number. Used by incr-vacuum. */ -void sqlite3PcacheMove(PgHdr *, Pgno); - -/* Remove all pages with pgno>x. Reset the cache if x==0 */ -void sqlite3PcacheTruncate(PCache *, Pgno x); - -/* Get a list of all dirty pages in the cache, sorted by page number */ -PgHdr *sqlite3PcacheDirtyList(PCache *); - -/* Reset and close the cache object */ -void sqlite3PcacheClose(PCache *); - -/* Clear flags from pages of the page cache */ -void sqlite3PcacheClearSyncFlags(PCache *); - -/* Discard the contents of the cache */ -void sqlite3PcacheClear(PCache *); - -/* Return the total number of outstanding page references */ -int sqlite3PcacheRefCount(PCache *); - -/* Increment the reference count of an existing page */ -void sqlite3PcacheRef(PgHdr *); - -int sqlite3PcachePageRefcount(PgHdr *); - -/* Return the total number of pages stored in the cache */ -int sqlite3PcachePagecount(PCache *); - -#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) -/* Iterate through all dirty pages currently stored in the cache. This -** interface is only available if SQLITE_CHECK_PAGES is defined when the -** library is built. -*/ -void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)); -#endif - -#if defined(SQLITE_DEBUG) -/* Check invariants on a PgHdr object */ -int sqlite3PcachePageSanity(PgHdr *); -#endif - -/* Set and get the suggested cache-size for the specified pager-cache. -** -** If no global maximum is configured, then the system attempts to limit -** the total number of pages cached by purgeable pager-caches to the sum -** of the suggested cache-sizes. -*/ -void sqlite3PcacheSetCachesize(PCache *, int); -#ifdef SQLITE_TEST -int sqlite3PcacheGetCachesize(PCache *); -#endif - -/* Set or get the suggested spill-size for the specified pager-cache. -** -** The spill-size is the minimum number of pages in cache before the cache -** will attempt to spill dirty pages by calling xStress. -*/ -int sqlite3PcacheSetSpillsize(PCache *, int); - -/* Free up as much memory as possible from the page cache */ -void sqlite3PcacheShrink(PCache *); - -#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT -/* Try to return memory used by the pcache module to the main memory heap */ -int sqlite3PcacheReleaseMemory(int); -#endif - -#ifdef SQLITE_TEST -void sqlite3PcacheStats(int *, int *, int *, int *); -#endif - -void sqlite3PCacheSetDefault(void); - -/* Return the header size */ -int sqlite3HeaderSizePcache(void); -int sqlite3HeaderSizePcache1(void); - -/* Number of dirty pages as a percentage of the configured cache size */ -int sqlite3PCachePercentDirty(PCache *); - -#ifdef SQLITE_DIRECT_OVERFLOW_READ -int sqlite3PCacheIsDirty(PCache *pCache); -#endif - -// For real implementation of sqlite3_pcache ======================================== -typedef struct sqlite3_pcache sqlite3_pcache; -typedef struct sqlite3_pcache_methods2 { - int iVersion; - void *pArg; - int (*xInit)(void *); - void (*xShutdown)(void *); - sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable); - void (*xCachesize)(sqlite3_pcache *, int nCachesize); - int (*xPagecount)(sqlite3_pcache *); - sqlite3_pcache_page *(*xFetch)(sqlite3_pcache *, unsigned key, int createFlag); - void (*xUnpin)(sqlite3_pcache *, sqlite3_pcache_page *, int discard); - void (*xRekey)(sqlite3_pcache *, sqlite3_pcache_page *, unsigned oldKey, unsigned newKey); - void (*xTruncate)(sqlite3_pcache *, unsigned iLimit); - void (*xDestroy)(sqlite3_pcache *); - void (*xShrink)(sqlite3_pcache *); -} sqlite3_pcache_methods2; - -extern sqlite3_pcache_methods2 pcache2; - -#endif /* _PCACHE_H_ */ diff --git a/source/libs/tdb/src/sqliteinc/sqlite3.h b/source/libs/tdb/src/sqliteinc/sqlite3.h deleted file mode 100644 index a6fa416b31..0000000000 --- a/source/libs/tdb/src/sqliteinc/sqlite3.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -** 2001-09-15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the interface that the SQLite library -** presents to client programs. If a C-function, structure, datatype, -** or constant definition does not appear in this file, then it is -** not a published API of SQLite, is subject to change without -** notice, and should not be referenced by programs that use SQLite. -** -** Some of the definitions that are in this file are marked as -** "experimental". Experimental interfaces are normally new -** features recently added to SQLite. We do not anticipate changes -** to experimental interfaces but reserve the right to make minor changes -** if experience from use "in the wild" suggest such changes are prudent. -** -** The official C-language API documentation for SQLite is derived -** from comments in this file. This file is the authoritative source -** on how SQLite interfaces are supposed to operate. -** -** The name of this file under configuration management is "sqlite.h.in". -** The makefile makes some minor changes to this file (such as inserting -** the version number) and changes its name to "sqlite3.h" as -** part of the build process. -*/ -#ifndef SQLITE3_H -#define SQLITE3_H -#include /* Needed for the definition of va_list */ - -/* -** Make sure we can call this stuff from C++. -*/ -#ifdef __cplusplus -extern "C" { -#endif - -/* -** CAPI3REF: Result Codes -** KEYWORDS: {result code definitions} -** -** Many SQLite functions return an integer result code from the set shown -** here in order to indicate success or failure. -** -** New error codes may be added in future versions of SQLite. -** -** See also: [extended result code definitions] -*/ -#define SQLITE_OK 0 /* Successful result */ -/* beginning-of-error-codes */ -#define SQLITE_ERROR 1 /* Generic error */ -#define SQLITE_INTERNAL 2 /* Internal logic error in SQLite */ -#define SQLITE_PERM 3 /* Access permission denied */ -#define SQLITE_ABORT 4 /* Callback routine requested an abort */ -#define SQLITE_BUSY 5 /* The database file is locked */ -#define SQLITE_LOCKED 6 /* A table in the database is locked */ -#define SQLITE_NOMEM 7 /* A malloc() failed */ -#define SQLITE_READONLY 8 /* Attempt to write a readonly database */ -#define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/ -#define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */ -#define SQLITE_CORRUPT 11 /* The database disk image is malformed */ -#define SQLITE_NOTFOUND 12 /* Unknown opcode in sqlite3_file_control() */ -#define SQLITE_FULL 13 /* Insertion failed because database is full */ -#define SQLITE_CANTOPEN 14 /* Unable to open the database file */ -#define SQLITE_PROTOCOL 15 /* Database lock protocol error */ -#define SQLITE_EMPTY 16 /* Internal use only */ -#define SQLITE_SCHEMA 17 /* The database schema changed */ -#define SQLITE_TOOBIG 18 /* String or BLOB exceeds size limit */ -#define SQLITE_CONSTRAINT 19 /* Abort due to constraint violation */ -#define SQLITE_MISMATCH 20 /* Data type mismatch */ -#define SQLITE_MISUSE 21 /* Library used incorrectly */ -#define SQLITE_NOLFS 22 /* Uses OS features not supported on host */ -#define SQLITE_AUTH 23 /* Authorization denied */ -#define SQLITE_FORMAT 24 /* Not used */ -#define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */ -#define SQLITE_NOTADB 26 /* File opened that is not a database file */ -#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */ -#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */ -#define SQLITE_ROW 100 /* sqlite3_step() has another row ready */ -#define SQLITE_DONE 101 /* sqlite3_step() has finished executing */ -/* end-of-error-codes */ - -#ifdef __cplusplus -} /* end of the 'extern "C"' block */ -#endif - -#endif /* _FTS5_H */ - -/******** End of fts5.h *********/ diff --git a/source/libs/tdb/src/sqliteinc/sqliteInt.h b/source/libs/tdb/src/sqliteinc/sqliteInt.h deleted file mode 100644 index b6c44aa975..0000000000 --- a/source/libs/tdb/src/sqliteinc/sqliteInt.h +++ /dev/null @@ -1,58 +0,0 @@ -/* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Internal interface definitions for SQLite. -** -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef SQLITEINT_H -#define SQLITEINT_H - -#include "sqlite3.h" - -typedef int8_t i8; -typedef int16_t i16; -typedef int32_t i32; -typedef int64_t i64; -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; - -typedef struct sqlite3_pcache_page { - void *pBuf; /* The content of the page */ - void *pExtra; /* Extra information associated with the page */ -} sqlite3_pcache_page; - -#define ROUNDDOWN8(x) ((x) & ~7) - -#define ROUND8(x) (((x) + 7) & ~7) - -typedef struct sqlite3_vfs sqlite3_vfs; -typedef struct sqlite3 sqlite3; - -#define SQLITE_DEFAULT_PAGE_SIZE 4096 - -#include "pager.h" - -#include "pcache.h" - -#endif /* SQLITEINT_H */ diff --git a/source/libs/tdb/src/sqliteinc/wal.h b/source/libs/tdb/src/sqliteinc/wal.h deleted file mode 100644 index 02e2bab360..0000000000 --- a/source/libs/tdb/src/sqliteinc/wal.h +++ /dev/null @@ -1,155 +0,0 @@ -/* -** 2010 February 1 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the interface to the write-ahead logging -** system. Refer to the comments below and the header comment attached to -** the implementation of each function in log.c for further details. -*/ - -#ifndef SQLITE_WAL_H -#define SQLITE_WAL_H - -#include "sqliteInt.h" - -/* Macros for extracting appropriate sync flags for either transaction -** commits (WAL_SYNC_FLAGS(X)) or for checkpoint ops (CKPT_SYNC_FLAGS(X)): -*/ -#define WAL_SYNC_FLAGS(X) ((X)&0x03) -#define CKPT_SYNC_FLAGS(X) (((X)>>2)&0x03) - -#ifdef SQLITE_OMIT_WAL -# define sqlite3WalOpen(x,y,z) 0 -# define sqlite3WalLimit(x,y) -# define sqlite3WalClose(v,w,x,y,z) 0 -# define sqlite3WalBeginReadTransaction(y,z) 0 -# define sqlite3WalEndReadTransaction(z) -# define sqlite3WalDbsize(y) 0 -# define sqlite3WalBeginWriteTransaction(y) 0 -# define sqlite3WalEndWriteTransaction(x) 0 -# define sqlite3WalUndo(x,y,z) 0 -# define sqlite3WalSavepoint(y,z) -# define sqlite3WalSavepointUndo(y,z) 0 -# define sqlite3WalFrames(u,v,w,x,y,z) 0 -# define sqlite3WalCheckpoint(q,r,s,t,u,v,w,x,y,z) 0 -# define sqlite3WalCallback(z) 0 -# define sqlite3WalExclusiveMode(y,z) 0 -# define sqlite3WalHeapMemory(z) 0 -# define sqlite3WalFramesize(z) 0 -# define sqlite3WalFindFrame(x,y,z) 0 -# define sqlite3WalFile(x) 0 -#else - -#define WAL_SAVEPOINT_NDATA 4 - -/* Connection to a write-ahead log (WAL) file. -** There is one object of this type for each pager. -*/ -typedef struct Wal Wal; - -/* Open and close a connection to a write-ahead log. */ -int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**); -int sqlite3WalClose(Wal *pWal, sqlite3*, int sync_flags, int, u8 *); - -/* Set the limiting size of a WAL file. */ -void sqlite3WalLimit(Wal*, i64); - -/* Used by readers to open (lock) and close (unlock) a snapshot. A -** snapshot is like a read-transaction. It is the state of the database -** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and -** preserves the current state even if the other threads or processes -** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the -** transaction and releases the lock. -*/ -int sqlite3WalBeginReadTransaction(Wal *pWal, int *); -void sqlite3WalEndReadTransaction(Wal *pWal); - -/* Read a page from the write-ahead log, if it is present. */ -int sqlite3WalFindFrame(Wal *, Pgno, u32 *); -int sqlite3WalReadFrame(Wal *, u32, int, u8 *); - -/* If the WAL is not empty, return the size of the database. */ -Pgno sqlite3WalDbsize(Wal *pWal); - -/* Obtain or release the WRITER lock. */ -int sqlite3WalBeginWriteTransaction(Wal *pWal); -int sqlite3WalEndWriteTransaction(Wal *pWal); - -/* Undo any frames written (but not committed) to the log */ -int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); - -/* Return an integer that records the current (uncommitted) write -** position in the WAL */ -void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData); - -/* Move the write position of the WAL back to iFrame. Called in -** response to a ROLLBACK TO command. */ -int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData); - -/* Write a frame or frames to the log. */ -int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); - -/* Copy pages from the log to the database file */ -int sqlite3WalCheckpoint( - Wal *pWal, /* Write-ahead log connection */ - sqlite3 *db, /* Check this handle's interrupt flag */ - int eMode, /* One of PASSIVE, FULL and RESTART */ - int (*xBusy)(void*), /* Function to call when busy */ - void *pBusyArg, /* Context argument for xBusyHandler */ - int sync_flags, /* Flags to sync db file with (or 0) */ - int nBuf, /* Size of buffer nBuf */ - u8 *zBuf, /* Temporary buffer to use */ - int *pnLog, /* OUT: Number of frames in WAL */ - int *pnCkpt /* OUT: Number of backfilled frames in WAL */ -); - -/* Return the value to pass to a sqlite3_wal_hook callback, the -** number of frames in the WAL at the point of the last commit since -** sqlite3WalCallback() was called. If no commits have occurred since -** the last call, then return 0. -*/ -int sqlite3WalCallback(Wal *pWal); - -/* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released) -** by the pager layer on the database file. -*/ -int sqlite3WalExclusiveMode(Wal *pWal, int op); - -/* Return true if the argument is non-NULL and the WAL module is using -** heap-memory for the wal-index. Otherwise, if the argument is NULL or the -** WAL module is using shared-memory, return false. -*/ -int sqlite3WalHeapMemory(Wal *pWal); - -#ifdef SQLITE_ENABLE_SNAPSHOT -int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot); -void sqlite3WalSnapshotOpen(Wal *pWal, sqlite3_snapshot *pSnapshot); -int sqlite3WalSnapshotRecover(Wal *pWal); -int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot); -void sqlite3WalSnapshotUnlock(Wal *pWal); -#endif - -#ifdef SQLITE_ENABLE_ZIPVFS -/* If the WAL file is not empty, return the number of bytes of content -** stored in each frame (i.e. the db page-size when the WAL was created). -*/ -int sqlite3WalFramesize(Wal *pWal); -#endif - -/* Return the sqlite3_file object for the WAL file */ -sqlite3_file *sqlite3WalFile(Wal *pWal); - -#ifdef SQLITE_ENABLE_SETLK_TIMEOUT -int sqlite3WalWriteLock(Wal *pWal, int bLock); -void sqlite3WalDb(Wal *pWal, sqlite3 *db); -#endif - -#endif /* ifndef SQLITE_OMIT_WAL */ -#endif /* SQLITE_WAL_H */