btree 相关注释
parent
c08c2c1620
commit
d97ef01e50
|
@ -1960,11 +1960,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
|
|||
}
|
||||
|
||||
/*
|
||||
** Decode the flags byte (the first byte of the header) for a page
|
||||
** and initialize fields of the MemPage structure accordingly.
|
||||
**
|
||||
** Only the following combinations are supported. Anything different
|
||||
** indicates a corrupt database files:
|
||||
** 解析页类型:仅支持如下参数组合
|
||||
**
|
||||
** PTF_ZERODATA (0x02, 2)
|
||||
** PTF_LEAFDATA | PTF_INTKEY (0x05, 5)
|
||||
|
@ -2244,8 +2240,7 @@ static void zeroPage(MemPage *pPage, int flags){
|
|||
|
||||
|
||||
/*
|
||||
** Convert a DbPage obtained from the pager into a MemPage used by
|
||||
** the btree layer.
|
||||
** 转化 DbPage 到 Btree 使用的 MemPage
|
||||
*/
|
||||
static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){
|
||||
MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
|
||||
|
|
112
src/btreeInt.h
112
src/btreeInt.h
|
@ -9,8 +9,7 @@
|
|||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements an external (disk-based) database using BTrees.
|
||||
** For a detailed discussion of BTrees, refer to
|
||||
** BTrees 实现参考
|
||||
**
|
||||
** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
|
||||
** "Sorting And Searching", pages 473-480. Addison-Wesley
|
||||
|
@ -32,27 +31,17 @@
|
|||
** Finding a particular key requires reading O(log(M)) pages from the
|
||||
** disk where M is the number of entries in the tree.
|
||||
**
|
||||
** In this implementation, a single file can hold one or more separate
|
||||
** BTrees. Each BTree is identified by the index of its root page. The
|
||||
** key and data for any entry are combined to form the "payload". A
|
||||
** fixed amount of payload can be carried directly on the database
|
||||
** page. If the payload is larger than the preset amount then surplus
|
||||
** bytes are stored on overflow pages. The payload for an entry
|
||||
** and the preceding pointer are combined to form a "Cell". Each
|
||||
** page has a small header which contains the Ptr(N) pointer and other
|
||||
** information such as the size of key and data.
|
||||
** 一个文件可能含有一个或多个不同的 BTree,每一个 BTree 由根页面表示
|
||||
** key 和 data 构成 payload
|
||||
** 固定大小的 payload 能够直接存放在数据库页中
|
||||
** payload 太大会存放在溢出页中
|
||||
** payload 和前一个指针组成一个 cell
|
||||
**
|
||||
** FORMAT DETAILS
|
||||
** 注意:页面编号从 1 开始,而非 0 ,0页代表页面不存在
|
||||
** 页面大小为 512 - 65536 ,且为 2 的幂次方
|
||||
** 页类型: btree page / freelist page / overflow page / pointer-map page
|
||||
**
|
||||
** The file is divided into pages. The first page is called page 1,
|
||||
** the second is page 2, and so forth. A page number of zero indicates
|
||||
** "no such page". The page size can be any power of 2 between 512 and 65536.
|
||||
** Each page can be either a btree page, a freelist page, an overflow
|
||||
** page, or a pointer-map page.
|
||||
**
|
||||
** The first page is always a btree page. The first 100 bytes of the first
|
||||
** page contain a special header (the "file header") that describes the file.
|
||||
** The format of the file header is as follows:
|
||||
** 第 1 页为 btree page 含有特殊文件头,共 100 字节,格式如下(大端存放):
|
||||
**
|
||||
** OFFSET SIZE DESCRIPTION
|
||||
** 0 16 Header string: "SQLite format 3\000"
|
||||
|
@ -81,31 +70,21 @@
|
|||
** 92 4 The version-valid-for number
|
||||
** 96 4 SQLITE_VERSION_NUMBER
|
||||
**
|
||||
** All of the integer values are big-endian (most significant byte first).
|
||||
** file change counter : 文件修改时增加,多进程场景知道文件是否被修改
|
||||
**
|
||||
** The file change counter is incremented when the database is changed
|
||||
** This counter allows other processes to know when the file has changed
|
||||
** and thus when they need to flush their cache.
|
||||
**
|
||||
** The max embedded payload fraction is the amount of the total usable
|
||||
** space in a page that can be consumed by a single cell for standard
|
||||
** B-tree (non-LEAFDATA) tables. A value of 255 means 100%. The default
|
||||
** is to limit the maximum cell size so that at least 4 cells will fit
|
||||
** on one page. Thus the default max embedded payload fraction is 64.
|
||||
**
|
||||
** If the payload for a cell is larger than the max payload, then extra
|
||||
** payload is spilled to overflow pages. Once an overflow page is allocated,
|
||||
** as many bytes as possible are moved into the overflow pages without letting
|
||||
** the cell size drop below the min embedded payload fraction.
|
||||
** max embedding payload : 一个 cell 最大使用空间占一页的百分比
|
||||
** 255 - 100% 默认 64 - 25% 及一个 cell 最多占用 1/4 页
|
||||
** 如果超过 cell 限定大小,额外的 payload 存放在 overflow 页中
|
||||
**
|
||||
** The min leaf payload fraction is like the min embedded payload fraction
|
||||
** except that it applies to leaf nodes in a LEAFDATA tree. The maximum
|
||||
** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
|
||||
** not specified in the header.
|
||||
**
|
||||
** Each btree pages is divided into three sections: The header, the
|
||||
** cell pointer array, and the cell content area. Page 1 also has a 100-byte
|
||||
** file header that occurs before the page header.
|
||||
** BTree 页面格式:
|
||||
** The header
|
||||
** The cell pointer array
|
||||
** The cell content area
|
||||
**
|
||||
** |----------------|
|
||||
** | file header | 100 bytes. Page 1 only.
|
||||
|
@ -133,12 +112,11 @@
|
|||
** 7 1 number of fragmented free bytes
|
||||
** 8 4 Right child (the Ptr(N) value). Omitted on leaves.
|
||||
**
|
||||
** The flags define the format of this btree page. The leaf flag means that
|
||||
** this page has no children. The zerodata flag means that this page carries
|
||||
** only keys and no data. The intkey flag means that the key is an integer
|
||||
** which is stored in the key size entry of the cell header rather than in
|
||||
** the payload area.
|
||||
** intkey key 是整数
|
||||
** zerodata 只包含 key 不包含 data
|
||||
** leaf 没有子页节点
|
||||
**
|
||||
** cell 指针有序存放
|
||||
** The cell pointer array begins on the first byte after the page header.
|
||||
** The cell pointer array contains zero or more 2-byte numbers which are
|
||||
** offsets from the beginning of the page to the cell content in the cell
|
||||
|
@ -149,23 +127,13 @@
|
|||
** Cell content is stored at the very end of the page and grows toward the
|
||||
** beginning of the page.
|
||||
**
|
||||
** Unused space within the cell content area is collected into a linked list of
|
||||
** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset
|
||||
** to the first freeblock is given in the header. Freeblocks occur in
|
||||
** increasing order. Because a freeblock must be at least 4 bytes in size,
|
||||
** any group of 3 or fewer unused bytes in the cell content area cannot
|
||||
** exist on the freeblock chain. A group of 3 or fewer free bytes is called
|
||||
** a fragment. The total number of bytes in all fragments is recorded.
|
||||
** in the page header at offset 7.
|
||||
**
|
||||
** freeblock 大小至少为 4 字节,小于 4 字节的 block 为碎片内存
|
||||
** freeblock 格式如下:
|
||||
** SIZE DESCRIPTION
|
||||
** 2 Byte offset of the next freeblock
|
||||
** 2 Bytes in this freeblock
|
||||
**
|
||||
** Cells are of variable length. Cells are stored in the cell content area at
|
||||
** the end of the page. Pointers to the cells are in the cell pointer array
|
||||
** that immediately follows the page header. Cells is not necessarily
|
||||
** contiguous or in order, but cell pointers are contiguous and in order.
|
||||
** cell 可以不连续和有序,但是 cell 指针必须有序且连续
|
||||
**
|
||||
** Cell content makes use of variable length integers. A variable
|
||||
** length integer is 1 to 9 bytes where the lower 7 bits of each
|
||||
|
@ -272,35 +240,35 @@ typedef struct CellInfo CellInfo;
|
|||
*/
|
||||
struct MemPage {
|
||||
u8 isInit; /* True if previously initialized. MUST BE FIRST! */
|
||||
u8 intKey; /* True if table b-trees. False for index b-trees */
|
||||
u8 intKey; /* table - 1 / index - 0 */
|
||||
u8 intKeyLeaf; /* True if the leaf of an intKey table */
|
||||
Pgno pgno; /* Page number for this page */
|
||||
Pgno pgno; /* 当前页的页号 */
|
||||
/* Only the first 8 bytes (above) are zeroed by pager.c when a new page
|
||||
** is allocated. All fields that follow must be initialized before use */
|
||||
u8 leaf; /* True if a leaf page */
|
||||
u8 hdrOffset; /* 100 for page 1. 0 otherwise */
|
||||
u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */
|
||||
u8 leaf; /* 是否为叶子节点页 */
|
||||
u8 hdrOffset; /* 第 1 页额外 100 header offset 长度 */
|
||||
u8 childPtrSize; /* 叶子节点无子节点 == 0,否则 == 4 */
|
||||
u8 max1bytePayload; /* min(maxLocal,127) */
|
||||
u8 nOverflow; /* Number of overflow cell bodies in aCell[] */
|
||||
u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
|
||||
u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */
|
||||
u16 cellOffset; /* Index in aData of first cell pointer */
|
||||
u16 cellOffset; /* 第一个 cell 指针的位置 */
|
||||
int nFree; /* Number of free bytes on the page. -1 for unknown */
|
||||
u16 nCell; /* Number of cells on this page, local and ovfl */
|
||||
u16 nCell; /* 当前页面 cell 数量 */
|
||||
u16 maskPage; /* Mask for page offset */
|
||||
u16 aiOvfl[4]; /* Insert the i-th overflow cell before the aiOvfl-th
|
||||
** non-overflow cell */
|
||||
u8 *apOvfl[4]; /* Pointers to the body of overflow cells */
|
||||
BtShared *pBt; /* Pointer to BtShared that this page is part of */
|
||||
u8 *aData; /* Pointer to disk image of the page data */
|
||||
u8 *aData; /* 指向磁盘中数据格式 */
|
||||
u8 *aDataEnd; /* One byte past the end of the entire page - not just
|
||||
** the usable space, the entire page. Used to prevent
|
||||
** corruption-induced buffer overflow. */
|
||||
u8 *aCellIdx; /* The cell index area */
|
||||
u8 *aCellIdx; /* cell 指针的位置 */
|
||||
u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */
|
||||
DbPage *pDbPage; /* Pager page handle */
|
||||
u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */
|
||||
void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */
|
||||
void (*xParseCell)(MemPage*,u8*,CellInfo*); /* cell解析函数 */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -441,7 +409,7 @@ struct BtShared {
|
|||
u16 minLocal; /* Minimum local payload in non-LEAFDATA tables */
|
||||
u16 maxLeaf; /* Maximum local payload in a LEAFDATA table */
|
||||
u16 minLeaf; /* Minimum local payload in a LEAFDATA table */
|
||||
u32 pageSize; /* Total number of bytes on a page */
|
||||
u32 pageSize; /* 页面大小 */
|
||||
u32 usableSize; /* Number of usable bytes on each page */
|
||||
int nTransaction; /* Number of open transactions (read + write) */
|
||||
u32 nPage; /* Number of pages in the database */
|
||||
|
@ -479,10 +447,10 @@ struct BtShared {
|
|||
*/
|
||||
struct CellInfo {
|
||||
i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */
|
||||
u8 *pPayload; /* Pointer to the start of payload */
|
||||
u32 nPayload; /* Bytes of payload */
|
||||
u8 *pPayload; /* payload 的指针位置 */
|
||||
u32 nPayload; /* payload 的字节数 */
|
||||
u16 nLocal; /* Amount of payload held locally, not on overflow */
|
||||
u16 nSize; /* Size of the cell content on the main b-tree page */
|
||||
u16 nSize; /* cell 大小 */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -546,7 +514,7 @@ struct BtCursor {
|
|||
u16 ix; /* Current index for apPage[iPage] */
|
||||
u16 aiIdx[BTCURSOR_MAX_DEPTH-1]; /* Current index in apPage[i] */
|
||||
struct KeyInfo *pKeyInfo; /* Arg passed to comparison function */
|
||||
MemPage *pPage; /* Current page */
|
||||
MemPage *pPage; /* 游标所在内存页面 */
|
||||
MemPage *apPage[BTCURSOR_MAX_DEPTH-1]; /* Stack of parents of current page */
|
||||
};
|
||||
|
||||
|
|
15
src/pager.c
15
src/pager.c
|
@ -171,6 +171,8 @@ int sqlite3PagerTrace=1; /* True to enable tracing */
|
|||
**
|
||||
** OPEN:
|
||||
**
|
||||
** 初始化状态,无读写事务
|
||||
** 所有数据都不一定可靠
|
||||
** The pager starts up in this state. Nothing is guaranteed in this
|
||||
** state - the file may or may not be locked and the database size is
|
||||
** unknown. The database may not be read or written.
|
||||
|
@ -619,7 +621,7 @@ struct PagerSavepoint {
|
|||
struct Pager {
|
||||
sqlite3_vfs *pVfs; /* OS functions to use for IO */
|
||||
u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
|
||||
u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */
|
||||
u8 journalMode; /* 日志模式 */
|
||||
u8 useJournal; /* Use a rollback journal on this file */
|
||||
u8 noSync; /* Do not sync the journal if true */
|
||||
u8 fullSync; /* Do extra syncs of the journal for robustness */
|
||||
|
@ -648,8 +650,8 @@ struct Pager {
|
|||
u8 doNotSpill; /* Do not spill the cache when non-zero */
|
||||
u8 subjInMemory; /* True to use in-memory sub-journals */
|
||||
u8 bUseFetch; /* True to use xFetch() */
|
||||
u8 hasHeldSharedLock; /* True if a shared lock has ever been held */
|
||||
Pgno dbSize; /* Number of pages in the database */
|
||||
u8 hasHeldSharedLock; /* 是否占有 shared Lock */
|
||||
Pgno dbSize; /* 数据库中页面总数 */
|
||||
Pgno dbOrigSize; /* dbSize before the current transaction */
|
||||
Pgno dbFileSize; /* Number of pages in the database file */
|
||||
Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */
|
||||
|
@ -3185,8 +3187,7 @@ static int pagerWalFrames(
|
|||
}
|
||||
|
||||
/*
|
||||
** Begin a read transaction on the WAL.
|
||||
**
|
||||
** WAL 模式下进入读事务
|
||||
** This routine used to be called "pagerOpenSnapshot()" because it essentially
|
||||
** makes a snapshot of the database at the current point in time and preserves
|
||||
** that snapshot for use by the reader in spite of concurrently changes by
|
||||
|
@ -5192,6 +5193,9 @@ int sqlite3PagerSharedLock(Pager *pPager){
|
|||
** outstanding pages. This implies that the pager state should either
|
||||
** be OPEN or READER. READER is only possible if the pager is or was in
|
||||
** exclusive access mode. */
|
||||
/*
|
||||
** 仅 OPEN 和 READER 状态能够进入
|
||||
*/
|
||||
assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
|
||||
assert( assert_pager_state(pPager) );
|
||||
assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
|
||||
|
@ -5551,6 +5555,7 @@ static int getPageNormal(
|
|||
}else{
|
||||
assert( pPg->pPager==pPager );
|
||||
pPager->aStat[PAGER_STAT_MISS]++;
|
||||
// 读取页面数据
|
||||
rc = readDbPage(pPg);
|
||||
if( rc!=SQLITE_OK ){
|
||||
goto pager_acquire_err;
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
struct PCache {
|
||||
PgHdr *pDirty, *pDirtyTail; /* LRU 顺序排列的脏页 */
|
||||
PgHdr *pSynced; /* Last synced page in dirty page list */
|
||||
i64 nRefSum; /* Sum of ref counts over all pages */
|
||||
i64 nRefSum; /* 总页面引用计数 */
|
||||
int szCache; /* Configured cache size */
|
||||
int szSpill; /* Size before spilling occurs */
|
||||
int szPage; /* Size of every page in this cache */
|
||||
|
@ -368,6 +368,7 @@ int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
|
|||
|
||||
/*
|
||||
** Try to obtain a page from the cache.
|
||||
** 尝试从 cache 中获取页面
|
||||
**
|
||||
** This routine returns a pointer to an sqlite3_pcache_page object if
|
||||
** such an object is already in cache, or if a new one is created.
|
||||
|
|
|
@ -24,12 +24,12 @@ typedef struct PCache PCache;
|
|||
*/
|
||||
struct PgHdr {
|
||||
sqlite3_pcache_page *pPage; /* Pcache object page handle */
|
||||
void *pData; /* Page data */
|
||||
void *pData; /* 页面数据 */
|
||||
void *pExtra; /* Extra content */
|
||||
PCache *pCache; /* PRIVATE: Cache that owns this page */
|
||||
PgHdr *pDirty; /* Transient list of dirty sorted by pgno */
|
||||
Pager *pPager; /* The pager this page is part of */
|
||||
Pgno pgno; /* Page number for this page */
|
||||
Pgno pgno; /* 页号 */
|
||||
#ifdef SQLITE_CHECK_PAGES
|
||||
u32 pageHash; /* Hash of page content */
|
||||
#endif
|
||||
|
@ -40,7 +40,7 @@ struct PgHdr {
|
|||
** private to pcache.c and should not be accessed by other modules.
|
||||
** pCache is grouped with the public elements for efficiency.
|
||||
*/
|
||||
i64 nRef; /* Number of users of this page */
|
||||
i64 nRef; /* 页面应用技术 */
|
||||
PgHdr *pDirtyNext; /* Next element in list of dirty pages */
|
||||
PgHdr *pDirtyPrev; /* Previous element in list of dirty pages */
|
||||
/* NB: pDirtyNext and pDirtyPrev are undefined if the
|
||||
|
|
Loading…
Reference in New Issue