1*46129Smao /*- 2*46129Smao * Copyright (c) 1990 The Regents of the University of California. 3*46129Smao * All rights reserved. 4*46129Smao * 5*46129Smao * This code is derived from software contributed to Berkeley by 6*46129Smao * Mike Olson. 7*46129Smao * 8*46129Smao * %sccs.include.redist.c% 9*46129Smao */ 10*46129Smao 11*46129Smao #if defined(LIBC_SCCS) && !defined(lint) 12*46129Smao static char sccsid[] = "@(#)bt_overflow.c 5.1 (Berkeley) 01/23/91"; 13*46129Smao #endif /* LIBC_SCCS and not lint */ 14*46129Smao 15*46129Smao #include <sys/types.h> 16*46129Smao #include <db.h> 17*46129Smao #include "btree.h" 18*46129Smao 19*46129Smao /* 20*46129Smao * _BT_GETBIG -- Get big data from indirect pages. 21*46129Smao * 22*46129Smao * This routine chases indirect blocks for the big object at the 23*46129Smao * specified page to a buffer, and return the address of the buffer. 24*46129Smao * 25*46129Smao * Parameters: 26*46129Smao * t -- btree with the indirect blocks 27*46129Smao * pgno -- page number that starts the chain 28*46129Smao * p -- (char **) to get the address of the buffer containing 29*46129Smao * the key or datum. 30*46129Smao * sz -- pointer to an int to get the size of the instantiated 31*46129Smao * object. 32*46129Smao * 33*46129Smao * Returns: 34*46129Smao * RET_SUCCESS, RET_ERROR. 35*46129Smao * 36*46129Smao * Side Effects: 37*46129Smao * None. 38*46129Smao */ 39*46129Smao 40*46129Smao int 41*46129Smao _bt_getbig(t, pgno, p, sz) 42*46129Smao BTREE_P t; 43*46129Smao pgno_t pgno; 44*46129Smao char **p; 45*46129Smao int *sz; 46*46129Smao { 47*46129Smao pgno_t save; 48*46129Smao size_t nbytes; 49*46129Smao size_t nhere; 50*46129Smao BTHEADER *h; 51*46129Smao char *top, *from, *where; 52*46129Smao 53*46129Smao save = t->bt_curpage->h_pgno; 54*46129Smao if (_bt_getpage(t, pgno) == RET_ERROR) 55*46129Smao return (RET_ERROR); 56*46129Smao 57*46129Smao h = t->bt_curpage; 58*46129Smao 59*46129Smao bcopy((char *) &(h->h_linp[0]), 60*46129Smao (char *) &nbytes, 61*46129Smao (size_t) sizeof(nbytes)); 62*46129Smao 63*46129Smao if ((*p = (char *) malloc(nbytes)) == (char *) NULL) 64*46129Smao return (RET_ERROR); 65*46129Smao 66*46129Smao *sz = nbytes; 67*46129Smao from = ((char *) (&h->h_linp[0])) + sizeof(nbytes); 68*46129Smao top = ((char *) h) + t->bt_psize; 69*46129Smao 70*46129Smao /* need more space for data? */ 71*46129Smao 72*46129Smao where = *p; 73*46129Smao 74*46129Smao while (nbytes > 0) { 75*46129Smao nhere = (int) (top - from); 76*46129Smao if (nhere > nbytes) { 77*46129Smao (void) bcopy(from, where, (size_t) nbytes); 78*46129Smao nbytes = 0; 79*46129Smao } else { 80*46129Smao (void) bcopy(from, where, nhere); 81*46129Smao where += nhere; 82*46129Smao nbytes -= nhere; 83*46129Smao if (_bt_getpage(t, h->h_nextpg) == RET_ERROR) 84*46129Smao return (RET_ERROR); 85*46129Smao h = t->bt_curpage; 86*46129Smao top = ((char *) h) + t->bt_psize; 87*46129Smao from = (char *) &(h->h_linp[0]); 88*46129Smao } 89*46129Smao } 90*46129Smao 91*46129Smao if (_bt_getpage(t, save) == RET_ERROR) 92*46129Smao return (RET_ERROR); 93*46129Smao 94*46129Smao return (RET_SUCCESS); 95*46129Smao } 96*46129Smao 97*46129Smao /* 98*46129Smao * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree. 99*46129Smao * 100*46129Smao * When a large item is deleted from the tree, this routine puts the 101*46129Smao * space that it occupied onto the free list for later reuse. The 102*46129Smao * bt_free entry in the btree structure points at the head of this list. 103*46129Smao * This value is also stored on disk in the btree's metadata. 104*46129Smao * 105*46129Smao * Parameters: 106*46129Smao * t -- btree from which to delete pages 107*46129Smao * chain -- page number that starts the chain. 108*46129Smao * 109*46129Smao * Returns: 110*46129Smao * RET_SUCCESS, RET_ERROR. 111*46129Smao * 112*46129Smao * Side Effects: 113*46129Smao * Invalidates the current on-disk version of the btree's 114*46129Smao * metadata (if any), and updates the free list appropriately. 115*46129Smao */ 116*46129Smao 117*46129Smao int 118*46129Smao _bt_delindir(t, chain) 119*46129Smao BTREE_P t; 120*46129Smao pgno_t chain; 121*46129Smao { 122*46129Smao BTHEADER *h; 123*46129Smao pgno_t save; 124*46129Smao pgno_t oldfree; 125*46129Smao 126*46129Smao h = t->bt_curpage; 127*46129Smao save = h->h_pgno; 128*46129Smao if (_bt_getpage(t, chain) == RET_ERROR) 129*46129Smao return (RET_ERROR); 130*46129Smao 131*46129Smao /* 132*46129Smao * If some internal node is pointing at this chain, don't 133*46129Smao * delete it. 134*46129Smao */ 135*46129Smao 136*46129Smao if (t->bt_curpage->h_flags & F_PRESERVE) { 137*46129Smao if (_bt_getpage(t, save) == RET_ERROR) 138*46129Smao return (RET_ERROR); 139*46129Smao return (RET_SUCCESS); 140*46129Smao } 141*46129Smao 142*46129Smao /* if there's nothing on the free list, this is easy... */ 143*46129Smao if (t->bt_free == P_NONE) { 144*46129Smao t->bt_free = chain; 145*46129Smao } else { 146*46129Smao oldfree = t->bt_free; 147*46129Smao 148*46129Smao /* find the end of the data chain for the deleted datum */ 149*46129Smao t->bt_free = chain; 150*46129Smao do { 151*46129Smao if (_bt_getpage(t, chain) == RET_ERROR) 152*46129Smao return (RET_ERROR); 153*46129Smao h = t->bt_curpage; 154*46129Smao if (h->h_nextpg != P_NONE) 155*46129Smao chain = h->h_nextpg; 156*46129Smao } while (h->h_nextpg != P_NONE); 157*46129Smao 158*46129Smao /* link freed pages into free list */ 159*46129Smao h->h_nextpg = oldfree; 160*46129Smao if (_bt_write(t, h, RELEASE) == RET_ERROR) 161*46129Smao return (RET_ERROR); 162*46129Smao if (_bt_getpage(t, oldfree) == RET_ERROR) 163*46129Smao return (RET_ERROR); 164*46129Smao h = t->bt_curpage; 165*46129Smao h->h_prevpg = chain; 166*46129Smao if (_bt_write(t, h, RELEASE) == RET_ERROR) 167*46129Smao return (RET_ERROR); 168*46129Smao } 169*46129Smao 170*46129Smao /* restore the tree's current page pointer */ 171*46129Smao if (_bt_getpage(t, save) == RET_ERROR) 172*46129Smao return (RET_ERROR); 173*46129Smao 174*46129Smao /* we have trashed the tree metadata; rewrite it later */ 175*46129Smao t->bt_flags &= ~BTF_METAOK; 176*46129Smao 177*46129Smao return (RET_SUCCESS); 178*46129Smao } 179*46129Smao 180*46129Smao /* 181*46129Smao * _BT_INDIRECT -- Write a series of indirect pages for big objects. 182*46129Smao * 183*46129Smao * A chain of indirect pages looks like 184*46129Smao * 185*46129Smao * +-------------------+ +---------------------+ 186*46129Smao * |hdr|size| | |hdr| | 187*46129Smao * +---+----+ | +---+ | 188*46129Smao * | ... data ... | | ... data ... | ... 189*46129Smao * | | | | 190*46129Smao * +-------------------+ +---------------------+ 191*46129Smao * 192*46129Smao * where hdr is a standard btree page header (with the indirect bit 193*46129Smao * set), size on the first page is the real size of the datum, and 194*46129Smao * data are bytes of the datum, split across as many pages as necessary. 195*46129Smao * Indirect pages are chained together with the h_prevpg and h_nextpg 196*46129Smao * entries of the page header struct. 197*46129Smao * 198*46129Smao * A single DBT is written per chain, so space on the last page is 199*46129Smao * wasted. 200*46129Smao * 201*46129Smao * We return the page number of the start of the chain. 202*46129Smao * 203*46129Smao * When a big object is deleted from a tree, the space that it occupied 204*46129Smao * is placed on a free list for later reuse. This routine checks that 205*46129Smao * free list before allocating new pages to the big datum being inserted. 206*46129Smao * 207*46129Smao * Parameters: 208*46129Smao * t -- btree in which to store indirect blocks 209*46129Smao * data -- DBT with the big datum in it 210*46129Smao * pgno -- place to put the starting page number of the chain 211*46129Smao * 212*46129Smao * Returns: 213*46129Smao * RET_SUCCESS, RET_ERROR. 214*46129Smao * 215*46129Smao * Side Effects: 216*46129Smao * Current page is changed on return. 217*46129Smao */ 218*46129Smao 219*46129Smao int 220*46129Smao _bt_indirect(t, data, pgno) 221*46129Smao BTREE_P t; 222*46129Smao DBT *data; 223*46129Smao pgno_t *pgno; 224*46129Smao { 225*46129Smao pgno_t prev; 226*46129Smao char *top; 227*46129Smao char *where; 228*46129Smao char *from; 229*46129Smao size_t dsize; 230*46129Smao pgno_t nextchn; 231*46129Smao int ischain; 232*46129Smao BTHEADER *cur; 233*46129Smao 234*46129Smao /* get set for first page in chain */ 235*46129Smao prev = P_NONE; 236*46129Smao dsize = data->size; 237*46129Smao from = (char *) data->data; 238*46129Smao 239*46129Smao /* if there are blocks on the free list, use them first */ 240*46129Smao if ((*pgno = t->bt_free) == P_NONE) { 241*46129Smao if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 242*46129Smao return (RET_ERROR); 243*46129Smao 244*46129Smao ischain = 0; 245*46129Smao *pgno = cur->h_pgno = ++(t->bt_npages); 246*46129Smao } else { 247*46129Smao if (_bt_getpage(t, *pgno) == RET_ERROR) 248*46129Smao return (RET_ERROR); 249*46129Smao ischain = 1; 250*46129Smao cur = t->bt_curpage; 251*46129Smao } 252*46129Smao 253*46129Smao cur->h_flags = F_CONT|F_LEAF; 254*46129Smao (void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t)); 255*46129Smao where = ((char *) (&cur->h_linp[0])) + sizeof(size_t); 256*46129Smao 257*46129Smao /* fill and write pages in the chain */ 258*46129Smao for (;;) { 259*46129Smao int nhere; 260*46129Smao 261*46129Smao top = ((char *) cur) + t->bt_psize; 262*46129Smao cur->h_prevpg = prev; 263*46129Smao nextchn = cur->h_nextpg; 264*46129Smao nhere = (int) (top - where); 265*46129Smao 266*46129Smao if (nhere >= dsize) { 267*46129Smao (void) bcopy(from, where, (int) dsize); 268*46129Smao cur->h_nextpg = P_NONE; 269*46129Smao dsize = 0; 270*46129Smao } else { 271*46129Smao (void) bcopy(from, where, (int) nhere); 272*46129Smao dsize -= nhere; 273*46129Smao from += nhere; 274*46129Smao if (nextchn == P_NONE) 275*46129Smao cur->h_nextpg = t->bt_npages + 1; 276*46129Smao prev = cur->h_pgno; 277*46129Smao } 278*46129Smao 279*46129Smao /* current page is ready to go; write it out */ 280*46129Smao if (_bt_write(t, cur, RELEASE) == RET_ERROR) 281*46129Smao return (RET_ERROR); 282*46129Smao 283*46129Smao /* free the current page, if appropriate */ 284*46129Smao if (ISDISK(t) && !ISCACHE(t) && !ischain) { 285*46129Smao (void) free ((char *) cur); 286*46129Smao } 287*46129Smao 288*46129Smao /* done? */ 289*46129Smao if (dsize == 0) 290*46129Smao break; 291*46129Smao 292*46129Smao /* allocate another page */ 293*46129Smao if (nextchn == P_NONE) { 294*46129Smao if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 295*46129Smao return (RET_ERROR); 296*46129Smao ischain = 0; 297*46129Smao cur->h_pgno = ++(t->bt_npages); 298*46129Smao } else { 299*46129Smao if (_bt_getpage(t, nextchn) == RET_ERROR) 300*46129Smao return (RET_ERROR); 301*46129Smao ischain = 1; 302*46129Smao cur = t->bt_curpage; 303*46129Smao } 304*46129Smao cur->h_flags = F_LEAF | F_CONT; 305*46129Smao 306*46129Smao where = (char *) (&cur->h_linp[0]); 307*46129Smao } 308*46129Smao 309*46129Smao /* if we used pages from the free list, record changes to it */ 310*46129Smao if (*pgno == t->bt_free) { 311*46129Smao t->bt_free = nextchn; 312*46129Smao t->bt_flags &= ~BTF_METAOK; 313*46129Smao } 314*46129Smao 315*46129Smao return (RET_SUCCESS); 316*46129Smao } 317*46129Smao 318*46129Smao /* 319*46129Smao * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node. 320*46129Smao * 321*46129Smao * Chains of indirect blocks pointed to by leaf nodes get reclaimed 322*46129Smao * when the item that points to them gets deleted. Chains pointed 323*46129Smao * to by internal nodes never get deleted. This routine marks a 324*46129Smao * chain as pointed to by an internal node. 325*46129Smao * 326*46129Smao * Parameters: 327*46129Smao * t -- tree in which to mark 328*46129Smao * chain -- number of first page in chain 329*46129Smao * 330*46129Smao * Returns: 331*46129Smao * RET_SUCCESS, RET_ERROR. 332*46129Smao * 333*46129Smao * Side Effects: 334*46129Smao * None. 335*46129Smao */ 336*46129Smao 337*46129Smao int 338*46129Smao _bt_markchain(t, chain) 339*46129Smao BTREE_P t; 340*46129Smao pgno_t chain; 341*46129Smao { 342*46129Smao pgno_t save; 343*46129Smao 344*46129Smao save = t->bt_curpage->h_pgno; 345*46129Smao 346*46129Smao if (_bt_getpage(t, chain) == RET_ERROR) 347*46129Smao return (RET_ERROR); 348*46129Smao 349*46129Smao t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE); 350*46129Smao 351*46129Smao if (_bt_getpage(t, save) == RET_ERROR) 352*46129Smao return (RET_ERROR); 353*46129Smao 354*46129Smao return (RET_SUCCESS); 355*46129Smao } 356