1*0Sstevel@tonic-gate /*- 2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*0Sstevel@tonic-gate * 7*0Sstevel@tonic-gate * @(#)db_page.h 10.18 (Sleepycat) 12/2/98 8*0Sstevel@tonic-gate */ 9*0Sstevel@tonic-gate 10*0Sstevel@tonic-gate #ifndef _DB_PAGE_H_ 11*0Sstevel@tonic-gate #define _DB_PAGE_H_ 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate /* 14*0Sstevel@tonic-gate * DB page formats. 15*0Sstevel@tonic-gate * 16*0Sstevel@tonic-gate * This implementation requires that values within the following structures 17*0Sstevel@tonic-gate * NOT be padded -- note, ANSI C permits random padding within structures. 18*0Sstevel@tonic-gate * If your compiler pads randomly you can just forget ever making DB run on 19*0Sstevel@tonic-gate * your system. In addition, no data type can require larger alignment than 20*0Sstevel@tonic-gate * its own size, e.g., a 4-byte data element may not require 8-byte alignment. 21*0Sstevel@tonic-gate * 22*0Sstevel@tonic-gate * Note that key/data lengths are often stored in db_indx_t's -- this is 23*0Sstevel@tonic-gate * not accidental, nor does it limit the key/data size. If the key/data 24*0Sstevel@tonic-gate * item fits on a page, it's guaranteed to be small enough to fit into a 25*0Sstevel@tonic-gate * db_indx_t, and storing it in one saves space. 26*0Sstevel@tonic-gate */ 27*0Sstevel@tonic-gate 28*0Sstevel@tonic-gate #define PGNO_METADATA 0 /* Metadata page number. */ 29*0Sstevel@tonic-gate #define PGNO_INVALID 0 /* Metadata page number, therefore illegal. */ 30*0Sstevel@tonic-gate #define PGNO_ROOT 1 /* Root is page #1. */ 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate /* 33*0Sstevel@tonic-gate * When we create pages in mpool, we ask mpool to clear some number of bytes 34*0Sstevel@tonic-gate * in the header. This number must be at least as big as the regular page 35*0Sstevel@tonic-gate * headers and cover enough of the btree and hash meta-data pages to obliterate 36*0Sstevel@tonic-gate * the magic and version numbers. 37*0Sstevel@tonic-gate */ 38*0Sstevel@tonic-gate #define DB_PAGE_CLEAR_LEN 32 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate /************************************************************************ 41*0Sstevel@tonic-gate BTREE METADATA PAGE LAYOUT 42*0Sstevel@tonic-gate ************************************************************************/ 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gate /* 45*0Sstevel@tonic-gate * Btree metadata page layout: 46*0Sstevel@tonic-gate */ 47*0Sstevel@tonic-gate typedef struct _btmeta { 48*0Sstevel@tonic-gate DB_LSN lsn; /* 00-07: LSN. */ 49*0Sstevel@tonic-gate db_pgno_t pgno; /* 08-11: Current page number. */ 50*0Sstevel@tonic-gate u_int32_t magic; /* 12-15: Magic number. */ 51*0Sstevel@tonic-gate u_int32_t version; /* 16-19: Version. */ 52*0Sstevel@tonic-gate u_int32_t pagesize; /* 20-23: Pagesize. */ 53*0Sstevel@tonic-gate u_int32_t maxkey; /* 24-27: Btree: Maxkey. */ 54*0Sstevel@tonic-gate u_int32_t minkey; /* 28-31: Btree: Minkey. */ 55*0Sstevel@tonic-gate u_int32_t free; /* 32-35: Free list page number. */ 56*0Sstevel@tonic-gate #define BTM_DUP 0x001 /* Duplicates. */ 57*0Sstevel@tonic-gate #define BTM_RECNO 0x002 /* Recno tree. */ 58*0Sstevel@tonic-gate #define BTM_RECNUM 0x004 /* Btree: maintain record count. */ 59*0Sstevel@tonic-gate #define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */ 60*0Sstevel@tonic-gate #define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */ 61*0Sstevel@tonic-gate #define BTM_MASK 0x01f 62*0Sstevel@tonic-gate u_int32_t flags; /* 36-39: Flags. */ 63*0Sstevel@tonic-gate u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */ 64*0Sstevel@tonic-gate u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */ 65*0Sstevel@tonic-gate /* 48-67: Unique file ID. */ 66*0Sstevel@tonic-gate u_int8_t uid[DB_FILE_ID_LEN]; 67*0Sstevel@tonic-gate } BTMETA; 68*0Sstevel@tonic-gate 69*0Sstevel@tonic-gate /************************************************************************ 70*0Sstevel@tonic-gate HASH METADATA PAGE LAYOUT 71*0Sstevel@tonic-gate ************************************************************************/ 72*0Sstevel@tonic-gate 73*0Sstevel@tonic-gate /* 74*0Sstevel@tonic-gate * Hash metadata page layout: 75*0Sstevel@tonic-gate */ 76*0Sstevel@tonic-gate /* Hash Table Information */ 77*0Sstevel@tonic-gate typedef struct hashhdr { /* Disk resident portion */ 78*0Sstevel@tonic-gate DB_LSN lsn; /* 00-07: LSN of the header page */ 79*0Sstevel@tonic-gate db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */ 80*0Sstevel@tonic-gate u_int32_t magic; /* 12-15: Magic NO for hash tables */ 81*0Sstevel@tonic-gate u_int32_t version; /* 16-19: Version ID */ 82*0Sstevel@tonic-gate u_int32_t pagesize; /* 20-23: Bucket/Page Size */ 83*0Sstevel@tonic-gate u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */ 84*0Sstevel@tonic-gate u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */ 85*0Sstevel@tonic-gate u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */ 86*0Sstevel@tonic-gate u_int32_t high_mask; /* 36-39: Modulo mask into table */ 87*0Sstevel@tonic-gate u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */ 88*0Sstevel@tonic-gate u_int32_t ffactor; /* 44-47: Fill factor */ 89*0Sstevel@tonic-gate u_int32_t nelem; /* 48-51: Number of keys in hash table */ 90*0Sstevel@tonic-gate u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */ 91*0Sstevel@tonic-gate #define DB_HASH_DUP 0x01 92*0Sstevel@tonic-gate u_int32_t flags; /* 56-59: Allow duplicates. */ 93*0Sstevel@tonic-gate #define NCACHED 32 /* number of spare points */ 94*0Sstevel@tonic-gate /* 60-187: Spare pages for overflow */ 95*0Sstevel@tonic-gate u_int32_t spares[NCACHED]; 96*0Sstevel@tonic-gate /* 188-207: Unique file ID. */ 97*0Sstevel@tonic-gate u_int8_t uid[DB_FILE_ID_LEN]; 98*0Sstevel@tonic-gate 99*0Sstevel@tonic-gate /* 100*0Sstevel@tonic-gate * Minimum page size is 256. 101*0Sstevel@tonic-gate */ 102*0Sstevel@tonic-gate } HASHHDR; 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate /************************************************************************ 105*0Sstevel@tonic-gate MAIN PAGE LAYOUT 106*0Sstevel@tonic-gate ************************************************************************/ 107*0Sstevel@tonic-gate 108*0Sstevel@tonic-gate /* 109*0Sstevel@tonic-gate * +-----------------------------------+ 110*0Sstevel@tonic-gate * | lsn | pgno | prev pgno | 111*0Sstevel@tonic-gate * +-----------------------------------+ 112*0Sstevel@tonic-gate * | next pgno | entries | hf offset | 113*0Sstevel@tonic-gate * +-----------------------------------+ 114*0Sstevel@tonic-gate * | level | type | index | 115*0Sstevel@tonic-gate * +-----------------------------------+ 116*0Sstevel@tonic-gate * | index | free --> | 117*0Sstevel@tonic-gate * +-----------+-----------------------+ 118*0Sstevel@tonic-gate * | F R E E A R E A | 119*0Sstevel@tonic-gate * +-----------------------------------+ 120*0Sstevel@tonic-gate * | <-- free | item | 121*0Sstevel@tonic-gate * +-----------------------------------+ 122*0Sstevel@tonic-gate * | item | item | item | 123*0Sstevel@tonic-gate * +-----------------------------------+ 124*0Sstevel@tonic-gate * 125*0Sstevel@tonic-gate * sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be 126*0Sstevel@tonic-gate * two-byte aligned. 127*0Sstevel@tonic-gate * 128*0Sstevel@tonic-gate * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the 129*0Sstevel@tonic-gate * key for inp[1]'s data. All other types of pages only contain single items. 130*0Sstevel@tonic-gate */ 131*0Sstevel@tonic-gate typedef struct _db_page { 132*0Sstevel@tonic-gate DB_LSN lsn; /* 00-07: Log sequence number. */ 133*0Sstevel@tonic-gate db_pgno_t pgno; /* 08-11: Current page number. */ 134*0Sstevel@tonic-gate db_pgno_t prev_pgno; /* 12-15: Previous page number. */ 135*0Sstevel@tonic-gate db_pgno_t next_pgno; /* 16-19: Next page number. */ 136*0Sstevel@tonic-gate db_indx_t entries; /* 20-21: Number of item pairs on the page. */ 137*0Sstevel@tonic-gate db_indx_t hf_offset; /* 22-23: High free byte page offset. */ 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate /* 140*0Sstevel@tonic-gate * The btree levels are numbered from the leaf to the root, starting 141*0Sstevel@tonic-gate * with 1, so the leaf is level 1, its parent is level 2, and so on. 142*0Sstevel@tonic-gate * We maintain this level on all btree pages, but the only place that 143*0Sstevel@tonic-gate * we actually need it is on the root page. It would not be difficult 144*0Sstevel@tonic-gate * to hide the byte on the root page once it becomes an internal page, 145*0Sstevel@tonic-gate * so we could get this byte back if we needed it for something else. 146*0Sstevel@tonic-gate */ 147*0Sstevel@tonic-gate #define LEAFLEVEL 1 148*0Sstevel@tonic-gate #define MAXBTREELEVEL 255 149*0Sstevel@tonic-gate u_int8_t level; /* 24: Btree tree level. */ 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate #define P_INVALID 0 /* Invalid page type. */ 152*0Sstevel@tonic-gate #define P_DUPLICATE 1 /* Duplicate. */ 153*0Sstevel@tonic-gate #define P_HASH 2 /* Hash. */ 154*0Sstevel@tonic-gate #define P_IBTREE 3 /* Btree internal. */ 155*0Sstevel@tonic-gate #define P_IRECNO 4 /* Recno internal. */ 156*0Sstevel@tonic-gate #define P_LBTREE 5 /* Btree leaf. */ 157*0Sstevel@tonic-gate #define P_LRECNO 6 /* Recno leaf. */ 158*0Sstevel@tonic-gate #define P_OVERFLOW 7 /* Overflow. */ 159*0Sstevel@tonic-gate u_int8_t type; /* 25: Page type. */ 160*0Sstevel@tonic-gate db_indx_t inp[1]; /* Variable length index of items. */ 161*0Sstevel@tonic-gate } PAGE; 162*0Sstevel@tonic-gate 163*0Sstevel@tonic-gate /* Element macros. */ 164*0Sstevel@tonic-gate #define LSN(p) (((PAGE *)p)->lsn) 165*0Sstevel@tonic-gate #define PGNO(p) (((PAGE *)p)->pgno) 166*0Sstevel@tonic-gate #define PREV_PGNO(p) (((PAGE *)p)->prev_pgno) 167*0Sstevel@tonic-gate #define NEXT_PGNO(p) (((PAGE *)p)->next_pgno) 168*0Sstevel@tonic-gate #define NUM_ENT(p) (((PAGE *)p)->entries) 169*0Sstevel@tonic-gate #define HOFFSET(p) (((PAGE *)p)->hf_offset) 170*0Sstevel@tonic-gate #define LEVEL(p) (((PAGE *)p)->level) 171*0Sstevel@tonic-gate #define TYPE(p) (((PAGE *)p)->type) 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate /* 174*0Sstevel@tonic-gate * !!! 175*0Sstevel@tonic-gate * The next_pgno and prev_pgno fields are not maintained for btree and recno 176*0Sstevel@tonic-gate * internal pages. It's a minor performance improvement, and more, it's 177*0Sstevel@tonic-gate * hard to do when deleting internal pages, and it decreases the chance of 178*0Sstevel@tonic-gate * deadlock during deletes and splits. 179*0Sstevel@tonic-gate * 180*0Sstevel@tonic-gate * !!! 181*0Sstevel@tonic-gate * The btree/recno access method needs db_recno_t bytes of space on the root 182*0Sstevel@tonic-gate * page to specify how many records are stored in the tree. (The alternative 183*0Sstevel@tonic-gate * is to store the number of records in the meta-data page, which will create 184*0Sstevel@tonic-gate * a second hot spot in trees being actively modified, or recalculate it from 185*0Sstevel@tonic-gate * the BINTERNAL fields on each access.) Overload the prev_pgno field. 186*0Sstevel@tonic-gate */ 187*0Sstevel@tonic-gate #define RE_NREC(p) \ 188*0Sstevel@tonic-gate (TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : \ 189*0Sstevel@tonic-gate TYPE(p) == P_LRECNO ? NUM_ENT(p) : PREV_PGNO(p)) 190*0Sstevel@tonic-gate #define RE_NREC_ADJ(p, adj) \ 191*0Sstevel@tonic-gate PREV_PGNO(p) += adj; 192*0Sstevel@tonic-gate #define RE_NREC_SET(p, num) \ 193*0Sstevel@tonic-gate PREV_PGNO(p) = num; 194*0Sstevel@tonic-gate 195*0Sstevel@tonic-gate /* 196*0Sstevel@tonic-gate * Initialize a page. 197*0Sstevel@tonic-gate * 198*0Sstevel@tonic-gate * !!! 199*0Sstevel@tonic-gate * Don't modify the page's LSN, code depends on it being unchanged after a 200*0Sstevel@tonic-gate * P_INIT call. 201*0Sstevel@tonic-gate */ 202*0Sstevel@tonic-gate #define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \ 203*0Sstevel@tonic-gate PGNO(pg) = n; \ 204*0Sstevel@tonic-gate PREV_PGNO(pg) = pg_prev; \ 205*0Sstevel@tonic-gate NEXT_PGNO(pg) = pg_next; \ 206*0Sstevel@tonic-gate NUM_ENT(pg) = 0; \ 207*0Sstevel@tonic-gate HOFFSET(pg) = pg_size; \ 208*0Sstevel@tonic-gate LEVEL(pg) = btl; \ 209*0Sstevel@tonic-gate TYPE(pg) = pg_type; \ 210*0Sstevel@tonic-gate } while (0) 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate /* Page header length (offset to first index). */ 213*0Sstevel@tonic-gate #define P_OVERHEAD (SSZA(PAGE, inp)) 214*0Sstevel@tonic-gate 215*0Sstevel@tonic-gate /* First free byte. */ 216*0Sstevel@tonic-gate #define LOFFSET(pg) (P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t)) 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate /* Free space on the page. */ 219*0Sstevel@tonic-gate #define P_FREESPACE(pg) (HOFFSET(pg) - LOFFSET(pg)) 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate /* Get a pointer to the bytes at a specific index. */ 222*0Sstevel@tonic-gate #define P_ENTRY(pg, indx) ((u_int8_t *)pg + ((PAGE *)pg)->inp[indx]) 223*0Sstevel@tonic-gate 224*0Sstevel@tonic-gate /************************************************************************ 225*0Sstevel@tonic-gate OVERFLOW PAGE LAYOUT 226*0Sstevel@tonic-gate ************************************************************************/ 227*0Sstevel@tonic-gate 228*0Sstevel@tonic-gate /* 229*0Sstevel@tonic-gate * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which 230*0Sstevel@tonic-gate * store a page number (the first page of the overflow item) and a length 231*0Sstevel@tonic-gate * (the total length of the overflow item). The overflow item consists of 232*0Sstevel@tonic-gate * some number of overflow pages, linked by the next_pgno field of the page. 233*0Sstevel@tonic-gate * A next_pgno field of PGNO_INVALID flags the end of the overflow item. 234*0Sstevel@tonic-gate * 235*0Sstevel@tonic-gate * Overflow page overloads: 236*0Sstevel@tonic-gate * The amount of overflow data stored on each page is stored in the 237*0Sstevel@tonic-gate * hf_offset field. 238*0Sstevel@tonic-gate * 239*0Sstevel@tonic-gate * The implementation reference counts overflow items as it's possible 240*0Sstevel@tonic-gate * for them to be promoted onto btree internal pages. The reference 241*0Sstevel@tonic-gate * count is stored in the entries field. 242*0Sstevel@tonic-gate */ 243*0Sstevel@tonic-gate #define OV_LEN(p) (((PAGE *)p)->hf_offset) 244*0Sstevel@tonic-gate #define OV_REF(p) (((PAGE *)p)->entries) 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate /* Maximum number of bytes that you can put on an overflow page. */ 247*0Sstevel@tonic-gate #define P_MAXSPACE(psize) ((psize) - P_OVERHEAD) 248*0Sstevel@tonic-gate 249*0Sstevel@tonic-gate /************************************************************************ 250*0Sstevel@tonic-gate HASH PAGE LAYOUT 251*0Sstevel@tonic-gate ************************************************************************/ 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate /* Each index references a group of bytes on the page. */ 254*0Sstevel@tonic-gate #define H_KEYDATA 1 /* Key/data item. */ 255*0Sstevel@tonic-gate #define H_DUPLICATE 2 /* Duplicate key/data item. */ 256*0Sstevel@tonic-gate #define H_OFFPAGE 3 /* Overflow key/data item. */ 257*0Sstevel@tonic-gate #define H_OFFDUP 4 /* Overflow page of duplicates. */ 258*0Sstevel@tonic-gate 259*0Sstevel@tonic-gate /* 260*0Sstevel@tonic-gate * !!! 261*0Sstevel@tonic-gate * Items on hash pages are (potentially) unaligned, so we can never cast the 262*0Sstevel@tonic-gate * (page + offset) pointer to an HKEYDATA, HOFFPAGE or HOFFDUP structure, as 263*0Sstevel@tonic-gate * we do with B+tree on-page structures. Because we frequently want the type 264*0Sstevel@tonic-gate * field, it requires no alignment, and it's in the same location in all three 265*0Sstevel@tonic-gate * structures, there's a pair of macros. 266*0Sstevel@tonic-gate */ 267*0Sstevel@tonic-gate #define HPAGE_PTYPE(p) (*(u_int8_t *)p) 268*0Sstevel@tonic-gate #define HPAGE_TYPE(pg, indx) (*P_ENTRY(pg, indx)) 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate /* 271*0Sstevel@tonic-gate * The first and second types are H_KEYDATA and H_DUPLICATE, represented 272*0Sstevel@tonic-gate * by the HKEYDATA structure: 273*0Sstevel@tonic-gate * 274*0Sstevel@tonic-gate * +-----------------------------------+ 275*0Sstevel@tonic-gate * | type | key/data ... | 276*0Sstevel@tonic-gate * +-----------------------------------+ 277*0Sstevel@tonic-gate * 278*0Sstevel@tonic-gate * For duplicates, the data field encodes duplicate elements in the data 279*0Sstevel@tonic-gate * field: 280*0Sstevel@tonic-gate * 281*0Sstevel@tonic-gate * +---------------------------------------------------------------+ 282*0Sstevel@tonic-gate * | type | len1 | element1 | len1 | len2 | element2 | len2 | 283*0Sstevel@tonic-gate * +---------------------------------------------------------------+ 284*0Sstevel@tonic-gate * 285*0Sstevel@tonic-gate * Thus, by keeping track of the offset in the element, we can do both 286*0Sstevel@tonic-gate * backward and forward traversal. 287*0Sstevel@tonic-gate */ 288*0Sstevel@tonic-gate typedef struct _hkeydata { 289*0Sstevel@tonic-gate u_int8_t type; /* 00: Page type. */ 290*0Sstevel@tonic-gate u_int8_t data[1]; /* Variable length key/data item. */ 291*0Sstevel@tonic-gate } HKEYDATA; 292*0Sstevel@tonic-gate #define HKEYDATA_DATA(p) (((u_int8_t *)p) + SSZA(HKEYDATA, data)) 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate /* 295*0Sstevel@tonic-gate * The length of any HKEYDATA item. Note that indx is an element index, 296*0Sstevel@tonic-gate * not a PAIR index. 297*0Sstevel@tonic-gate */ 298*0Sstevel@tonic-gate #define LEN_HITEM(pg, pgsize, indx) \ 299*0Sstevel@tonic-gate (((indx) == 0 ? pgsize : pg->inp[indx - 1]) - pg->inp[indx]) 300*0Sstevel@tonic-gate 301*0Sstevel@tonic-gate #define LEN_HKEYDATA(pg, psize, indx) \ 302*0Sstevel@tonic-gate (((indx) == 0 ? psize : pg->inp[indx - 1]) - \ 303*0Sstevel@tonic-gate pg->inp[indx] - HKEYDATA_SIZE(0)) 304*0Sstevel@tonic-gate 305*0Sstevel@tonic-gate /* 306*0Sstevel@tonic-gate * Page space required to add a new HKEYDATA item to the page, with and 307*0Sstevel@tonic-gate * without the index value. 308*0Sstevel@tonic-gate */ 309*0Sstevel@tonic-gate #define HKEYDATA_SIZE(len) \ 310*0Sstevel@tonic-gate ((len) + SSZA(HKEYDATA, data)) 311*0Sstevel@tonic-gate #define HKEYDATA_PSIZE(len) \ 312*0Sstevel@tonic-gate (HKEYDATA_SIZE(len) + sizeof(db_indx_t)) 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate /* Put a HKEYDATA item at the location referenced by a page entry. */ 315*0Sstevel@tonic-gate #define PUT_HKEYDATA(pe, kd, len, type) { \ 316*0Sstevel@tonic-gate ((HKEYDATA *)pe)->type = type; \ 317*0Sstevel@tonic-gate memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len); \ 318*0Sstevel@tonic-gate } 319*0Sstevel@tonic-gate 320*0Sstevel@tonic-gate /* 321*0Sstevel@tonic-gate * Macros the describe the page layout in terms of key-data pairs. 322*0Sstevel@tonic-gate * The use of "pindex" indicates that the argument is the index 323*0Sstevel@tonic-gate * expressed in pairs instead of individual elements. 324*0Sstevel@tonic-gate */ 325*0Sstevel@tonic-gate #define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2) 326*0Sstevel@tonic-gate #define H_KEYINDEX(pindx) (2 * (pindx)) 327*0Sstevel@tonic-gate #define H_DATAINDEX(pindx) ((2 * (pindx)) + 1) 328*0Sstevel@tonic-gate #define H_PAIRKEY(pg, pindx) P_ENTRY(pg, H_KEYINDEX(pindx)) 329*0Sstevel@tonic-gate #define H_PAIRDATA(pg, pindx) P_ENTRY(pg, H_DATAINDEX(pindx)) 330*0Sstevel@tonic-gate #define H_PAIRSIZE(pg, psize, pindx) \ 331*0Sstevel@tonic-gate (LEN_HITEM(pg, psize, H_KEYINDEX(pindx)) + \ 332*0Sstevel@tonic-gate LEN_HITEM(pg, psize, H_DATAINDEX(pindx))) 333*0Sstevel@tonic-gate #define LEN_HDATA(p, psize, pindx) LEN_HKEYDATA(p, psize, H_DATAINDEX(pindx)) 334*0Sstevel@tonic-gate #define LEN_HKEY(p, psize, pindx) LEN_HKEYDATA(p, psize, H_KEYINDEX(pindx)) 335*0Sstevel@tonic-gate 336*0Sstevel@tonic-gate /* 337*0Sstevel@tonic-gate * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure: 338*0Sstevel@tonic-gate */ 339*0Sstevel@tonic-gate typedef struct _hoffpage { 340*0Sstevel@tonic-gate u_int8_t type; /* 00: Page type and delete flag. */ 341*0Sstevel@tonic-gate u_int8_t unused[3]; /* 01-03: Padding, unused. */ 342*0Sstevel@tonic-gate db_pgno_t pgno; /* 04-07: Offpage page number. */ 343*0Sstevel@tonic-gate u_int32_t tlen; /* 08-11: Total length of item. */ 344*0Sstevel@tonic-gate } HOFFPAGE; 345*0Sstevel@tonic-gate 346*0Sstevel@tonic-gate #define HOFFPAGE_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, pgno)) 347*0Sstevel@tonic-gate #define HOFFPAGE_TLEN(p) (((u_int8_t *)p) + SSZ(HOFFPAGE, tlen)) 348*0Sstevel@tonic-gate 349*0Sstevel@tonic-gate /* 350*0Sstevel@tonic-gate * Page space required to add a new HOFFPAGE item to the page, with and 351*0Sstevel@tonic-gate * without the index value. 352*0Sstevel@tonic-gate */ 353*0Sstevel@tonic-gate #define HOFFPAGE_SIZE (sizeof(HOFFPAGE)) 354*0Sstevel@tonic-gate #define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t)) 355*0Sstevel@tonic-gate 356*0Sstevel@tonic-gate /* 357*0Sstevel@tonic-gate * The fourth type is H_OFFDUP represented by the HOFFDUP structure: 358*0Sstevel@tonic-gate */ 359*0Sstevel@tonic-gate typedef struct _hoffdup { 360*0Sstevel@tonic-gate u_int8_t type; /* 00: Page type and delete flag. */ 361*0Sstevel@tonic-gate u_int8_t unused[3]; /* 01-03: Padding, unused. */ 362*0Sstevel@tonic-gate db_pgno_t pgno; /* 04-07: Offpage page number. */ 363*0Sstevel@tonic-gate } HOFFDUP; 364*0Sstevel@tonic-gate #define HOFFDUP_PGNO(p) (((u_int8_t *)p) + SSZ(HOFFDUP, pgno)) 365*0Sstevel@tonic-gate 366*0Sstevel@tonic-gate /* 367*0Sstevel@tonic-gate * Page space required to add a new HOFFDUP item to the page, with and 368*0Sstevel@tonic-gate * without the index value. 369*0Sstevel@tonic-gate */ 370*0Sstevel@tonic-gate #define HOFFDUP_SIZE (sizeof(HOFFDUP)) 371*0Sstevel@tonic-gate #define HOFFDUP_PSIZE (HOFFDUP_SIZE + sizeof(db_indx_t)) 372*0Sstevel@tonic-gate 373*0Sstevel@tonic-gate /************************************************************************ 374*0Sstevel@tonic-gate BTREE PAGE LAYOUT 375*0Sstevel@tonic-gate ************************************************************************/ 376*0Sstevel@tonic-gate 377*0Sstevel@tonic-gate /* Each index references a group of bytes on the page. */ 378*0Sstevel@tonic-gate #define B_KEYDATA 1 /* Key/data item. */ 379*0Sstevel@tonic-gate #define B_DUPLICATE 2 /* Duplicate key/data item. */ 380*0Sstevel@tonic-gate #define B_OVERFLOW 3 /* Overflow key/data item. */ 381*0Sstevel@tonic-gate 382*0Sstevel@tonic-gate /* 383*0Sstevel@tonic-gate * We have to store a deleted entry flag in the page. The reason is complex, 384*0Sstevel@tonic-gate * but the simple version is that we can't delete on-page items referenced by 385*0Sstevel@tonic-gate * a cursor -- the return order of subsequent insertions might be wrong. The 386*0Sstevel@tonic-gate * delete flag is an overload of the top bit of the type byte. 387*0Sstevel@tonic-gate */ 388*0Sstevel@tonic-gate #define B_DELETE (0x80) 389*0Sstevel@tonic-gate #define B_DCLR(t) (t) &= ~B_DELETE 390*0Sstevel@tonic-gate #define B_DSET(t) (t) |= B_DELETE 391*0Sstevel@tonic-gate #define B_DISSET(t) ((t) & B_DELETE) 392*0Sstevel@tonic-gate 393*0Sstevel@tonic-gate #define B_TYPE(t) ((t) & ~B_DELETE) 394*0Sstevel@tonic-gate #define B_TSET(t, type, deleted) { \ 395*0Sstevel@tonic-gate (t) = (type); \ 396*0Sstevel@tonic-gate if (deleted) \ 397*0Sstevel@tonic-gate B_DSET(t); \ 398*0Sstevel@tonic-gate } 399*0Sstevel@tonic-gate 400*0Sstevel@tonic-gate /* 401*0Sstevel@tonic-gate * The first type is B_KEYDATA, represented by the BKEYDATA structure: 402*0Sstevel@tonic-gate */ 403*0Sstevel@tonic-gate typedef struct _bkeydata { 404*0Sstevel@tonic-gate db_indx_t len; /* 00-01: Key/data item length. */ 405*0Sstevel@tonic-gate u_int8_t type; /* 02: Page type AND DELETE FLAG. */ 406*0Sstevel@tonic-gate u_int8_t data[1]; /* Variable length key/data item. */ 407*0Sstevel@tonic-gate } BKEYDATA; 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate /* Get a BKEYDATA item for a specific index. */ 410*0Sstevel@tonic-gate #define GET_BKEYDATA(pg, indx) \ 411*0Sstevel@tonic-gate ((BKEYDATA *)P_ENTRY(pg, indx)) 412*0Sstevel@tonic-gate 413*0Sstevel@tonic-gate /* 414*0Sstevel@tonic-gate * Page space required to add a new BKEYDATA item to the page, with and 415*0Sstevel@tonic-gate * without the index value. 416*0Sstevel@tonic-gate */ 417*0Sstevel@tonic-gate #define BKEYDATA_SIZE(len) \ 418*0Sstevel@tonic-gate ALIGN((len) + SSZA(BKEYDATA, data), 4) 419*0Sstevel@tonic-gate #define BKEYDATA_PSIZE(len) \ 420*0Sstevel@tonic-gate (BKEYDATA_SIZE(len) + sizeof(db_indx_t)) 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate /* 423*0Sstevel@tonic-gate * The second and third types are B_DUPLICATE and B_OVERFLOW, represented 424*0Sstevel@tonic-gate * by the BOVERFLOW structure. 425*0Sstevel@tonic-gate */ 426*0Sstevel@tonic-gate typedef struct _boverflow { 427*0Sstevel@tonic-gate db_indx_t unused1; /* 00-01: Padding, unused. */ 428*0Sstevel@tonic-gate u_int8_t type; /* 02: Page type AND DELETE FLAG. */ 429*0Sstevel@tonic-gate u_int8_t unused2; /* 03: Padding, unused. */ 430*0Sstevel@tonic-gate db_pgno_t pgno; /* 04-07: Next page number. */ 431*0Sstevel@tonic-gate u_int32_t tlen; /* 08-11: Total length of item. */ 432*0Sstevel@tonic-gate } BOVERFLOW; 433*0Sstevel@tonic-gate 434*0Sstevel@tonic-gate /* Get a BOVERFLOW item for a specific index. */ 435*0Sstevel@tonic-gate #define GET_BOVERFLOW(pg, indx) \ 436*0Sstevel@tonic-gate ((BOVERFLOW *)P_ENTRY(pg, indx)) 437*0Sstevel@tonic-gate 438*0Sstevel@tonic-gate /* 439*0Sstevel@tonic-gate * Page space required to add a new BOVERFLOW item to the page, with and 440*0Sstevel@tonic-gate * without the index value. 441*0Sstevel@tonic-gate */ 442*0Sstevel@tonic-gate #define BOVERFLOW_SIZE \ 443*0Sstevel@tonic-gate ALIGN(sizeof(BOVERFLOW), 4) 444*0Sstevel@tonic-gate #define BOVERFLOW_PSIZE \ 445*0Sstevel@tonic-gate (BOVERFLOW_SIZE + sizeof(db_indx_t)) 446*0Sstevel@tonic-gate 447*0Sstevel@tonic-gate /* 448*0Sstevel@tonic-gate * Btree leaf and hash page layouts group indices in sets of two, one 449*0Sstevel@tonic-gate * for the key and one for the data. Everything else does it in sets 450*0Sstevel@tonic-gate * of one to save space. I use the following macros so that it's real 451*0Sstevel@tonic-gate * obvious what's going on... 452*0Sstevel@tonic-gate */ 453*0Sstevel@tonic-gate #define O_INDX 1 454*0Sstevel@tonic-gate #define P_INDX 2 455*0Sstevel@tonic-gate 456*0Sstevel@tonic-gate /************************************************************************ 457*0Sstevel@tonic-gate BTREE INTERNAL PAGE LAYOUT 458*0Sstevel@tonic-gate ************************************************************************/ 459*0Sstevel@tonic-gate 460*0Sstevel@tonic-gate /* 461*0Sstevel@tonic-gate * Btree internal entry. 462*0Sstevel@tonic-gate */ 463*0Sstevel@tonic-gate typedef struct _binternal { 464*0Sstevel@tonic-gate db_indx_t len; /* 00-01: Key/data item length. */ 465*0Sstevel@tonic-gate u_int8_t type; /* 02: Page type AND DELETE FLAG. */ 466*0Sstevel@tonic-gate u_int8_t unused; /* 03: Padding, unused. */ 467*0Sstevel@tonic-gate db_pgno_t pgno; /* 04-07: Page number of referenced page. */ 468*0Sstevel@tonic-gate db_recno_t nrecs; /* 08-11: Subtree record count. */ 469*0Sstevel@tonic-gate u_int8_t data[1]; /* Variable length key item. */ 470*0Sstevel@tonic-gate } BINTERNAL; 471*0Sstevel@tonic-gate 472*0Sstevel@tonic-gate /* Get a BINTERNAL item for a specific index. */ 473*0Sstevel@tonic-gate #define GET_BINTERNAL(pg, indx) \ 474*0Sstevel@tonic-gate ((BINTERNAL *)P_ENTRY(pg, indx)) 475*0Sstevel@tonic-gate 476*0Sstevel@tonic-gate /* 477*0Sstevel@tonic-gate * Page space required to add a new BINTERNAL item to the page, with and 478*0Sstevel@tonic-gate * without the index value. 479*0Sstevel@tonic-gate */ 480*0Sstevel@tonic-gate #define BINTERNAL_SIZE(len) \ 481*0Sstevel@tonic-gate ALIGN((len) + SSZA(BINTERNAL, data), 4) 482*0Sstevel@tonic-gate #define BINTERNAL_PSIZE(len) \ 483*0Sstevel@tonic-gate (BINTERNAL_SIZE(len) + sizeof(db_indx_t)) 484*0Sstevel@tonic-gate 485*0Sstevel@tonic-gate /************************************************************************ 486*0Sstevel@tonic-gate RECNO INTERNAL PAGE LAYOUT 487*0Sstevel@tonic-gate ************************************************************************/ 488*0Sstevel@tonic-gate 489*0Sstevel@tonic-gate /* 490*0Sstevel@tonic-gate * The recno internal entry. 491*0Sstevel@tonic-gate * 492*0Sstevel@tonic-gate * XXX 493*0Sstevel@tonic-gate * Why not fold this into the db_indx_t structure, it's fixed length? 494*0Sstevel@tonic-gate */ 495*0Sstevel@tonic-gate typedef struct _rinternal { 496*0Sstevel@tonic-gate db_pgno_t pgno; /* 00-03: Page number of referenced page. */ 497*0Sstevel@tonic-gate db_recno_t nrecs; /* 04-07: Subtree record count. */ 498*0Sstevel@tonic-gate } RINTERNAL; 499*0Sstevel@tonic-gate 500*0Sstevel@tonic-gate /* Get a RINTERNAL item for a specific index. */ 501*0Sstevel@tonic-gate #define GET_RINTERNAL(pg, indx) \ 502*0Sstevel@tonic-gate ((RINTERNAL *)P_ENTRY(pg, indx)) 503*0Sstevel@tonic-gate 504*0Sstevel@tonic-gate /* 505*0Sstevel@tonic-gate * Page space required to add a new RINTERNAL item to the page, with and 506*0Sstevel@tonic-gate * without the index value. 507*0Sstevel@tonic-gate */ 508*0Sstevel@tonic-gate #define RINTERNAL_SIZE \ 509*0Sstevel@tonic-gate ALIGN(sizeof(RINTERNAL), 4) 510*0Sstevel@tonic-gate #define RINTERNAL_PSIZE \ 511*0Sstevel@tonic-gate (RINTERNAL_SIZE + sizeof(db_indx_t)) 512*0Sstevel@tonic-gate #endif /* _DB_PAGE_H_ */ 513