1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_overflow.c 5.1 (Berkeley) 01/23/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #include <db.h> 17 #include "btree.h" 18 19 /* 20 * _BT_GETBIG -- Get big data from indirect pages. 21 * 22 * This routine chases indirect blocks for the big object at the 23 * specified page to a buffer, and return the address of the buffer. 24 * 25 * Parameters: 26 * t -- btree with the indirect blocks 27 * pgno -- page number that starts the chain 28 * p -- (char **) to get the address of the buffer containing 29 * the key or datum. 30 * sz -- pointer to an int to get the size of the instantiated 31 * object. 32 * 33 * Returns: 34 * RET_SUCCESS, RET_ERROR. 35 * 36 * Side Effects: 37 * None. 38 */ 39 40 int 41 _bt_getbig(t, pgno, p, sz) 42 BTREE_P t; 43 pgno_t pgno; 44 char **p; 45 int *sz; 46 { 47 pgno_t save; 48 size_t nbytes; 49 size_t nhere; 50 BTHEADER *h; 51 char *top, *from, *where; 52 53 save = t->bt_curpage->h_pgno; 54 if (_bt_getpage(t, pgno) == RET_ERROR) 55 return (RET_ERROR); 56 57 h = t->bt_curpage; 58 59 bcopy((char *) &(h->h_linp[0]), 60 (char *) &nbytes, 61 (size_t) sizeof(nbytes)); 62 63 if ((*p = (char *) malloc(nbytes)) == (char *) NULL) 64 return (RET_ERROR); 65 66 *sz = nbytes; 67 from = ((char *) (&h->h_linp[0])) + sizeof(nbytes); 68 top = ((char *) h) + t->bt_psize; 69 70 /* need more space for data? */ 71 72 where = *p; 73 74 while (nbytes > 0) { 75 nhere = (int) (top - from); 76 if (nhere > nbytes) { 77 (void) bcopy(from, where, (size_t) nbytes); 78 nbytes = 0; 79 } else { 80 (void) bcopy(from, where, nhere); 81 where += nhere; 82 nbytes -= nhere; 83 if (_bt_getpage(t, h->h_nextpg) == RET_ERROR) 84 return (RET_ERROR); 85 h = t->bt_curpage; 86 top = ((char *) h) + t->bt_psize; 87 from = (char *) &(h->h_linp[0]); 88 } 89 } 90 91 if (_bt_getpage(t, save) == RET_ERROR) 92 return (RET_ERROR); 93 94 return (RET_SUCCESS); 95 } 96 97 /* 98 * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree. 99 * 100 * When a large item is deleted from the tree, this routine puts the 101 * space that it occupied onto the free list for later reuse. The 102 * bt_free entry in the btree structure points at the head of this list. 103 * This value is also stored on disk in the btree's metadata. 104 * 105 * Parameters: 106 * t -- btree from which to delete pages 107 * chain -- page number that starts the chain. 108 * 109 * Returns: 110 * RET_SUCCESS, RET_ERROR. 111 * 112 * Side Effects: 113 * Invalidates the current on-disk version of the btree's 114 * metadata (if any), and updates the free list appropriately. 115 */ 116 117 int 118 _bt_delindir(t, chain) 119 BTREE_P t; 120 pgno_t chain; 121 { 122 BTHEADER *h; 123 pgno_t save; 124 pgno_t oldfree; 125 126 h = t->bt_curpage; 127 save = h->h_pgno; 128 if (_bt_getpage(t, chain) == RET_ERROR) 129 return (RET_ERROR); 130 131 /* 132 * If some internal node is pointing at this chain, don't 133 * delete it. 134 */ 135 136 if (t->bt_curpage->h_flags & F_PRESERVE) { 137 if (_bt_getpage(t, save) == RET_ERROR) 138 return (RET_ERROR); 139 return (RET_SUCCESS); 140 } 141 142 /* if there's nothing on the free list, this is easy... */ 143 if (t->bt_free == P_NONE) { 144 t->bt_free = chain; 145 } else { 146 oldfree = t->bt_free; 147 148 /* find the end of the data chain for the deleted datum */ 149 t->bt_free = chain; 150 do { 151 if (_bt_getpage(t, chain) == RET_ERROR) 152 return (RET_ERROR); 153 h = t->bt_curpage; 154 if (h->h_nextpg != P_NONE) 155 chain = h->h_nextpg; 156 } while (h->h_nextpg != P_NONE); 157 158 /* link freed pages into free list */ 159 h->h_nextpg = oldfree; 160 if (_bt_write(t, h, RELEASE) == RET_ERROR) 161 return (RET_ERROR); 162 if (_bt_getpage(t, oldfree) == RET_ERROR) 163 return (RET_ERROR); 164 h = t->bt_curpage; 165 h->h_prevpg = chain; 166 if (_bt_write(t, h, RELEASE) == RET_ERROR) 167 return (RET_ERROR); 168 } 169 170 /* restore the tree's current page pointer */ 171 if (_bt_getpage(t, save) == RET_ERROR) 172 return (RET_ERROR); 173 174 /* we have trashed the tree metadata; rewrite it later */ 175 t->bt_flags &= ~BTF_METAOK; 176 177 return (RET_SUCCESS); 178 } 179 180 /* 181 * _BT_INDIRECT -- Write a series of indirect pages for big objects. 182 * 183 * A chain of indirect pages looks like 184 * 185 * +-------------------+ +---------------------+ 186 * |hdr|size| | |hdr| | 187 * +---+----+ | +---+ | 188 * | ... data ... | | ... data ... | ... 189 * | | | | 190 * +-------------------+ +---------------------+ 191 * 192 * where hdr is a standard btree page header (with the indirect bit 193 * set), size on the first page is the real size of the datum, and 194 * data are bytes of the datum, split across as many pages as necessary. 195 * Indirect pages are chained together with the h_prevpg and h_nextpg 196 * entries of the page header struct. 197 * 198 * A single DBT is written per chain, so space on the last page is 199 * wasted. 200 * 201 * We return the page number of the start of the chain. 202 * 203 * When a big object is deleted from a tree, the space that it occupied 204 * is placed on a free list for later reuse. This routine checks that 205 * free list before allocating new pages to the big datum being inserted. 206 * 207 * Parameters: 208 * t -- btree in which to store indirect blocks 209 * data -- DBT with the big datum in it 210 * pgno -- place to put the starting page number of the chain 211 * 212 * Returns: 213 * RET_SUCCESS, RET_ERROR. 214 * 215 * Side Effects: 216 * Current page is changed on return. 217 */ 218 219 int 220 _bt_indirect(t, data, pgno) 221 BTREE_P t; 222 DBT *data; 223 pgno_t *pgno; 224 { 225 pgno_t prev; 226 char *top; 227 char *where; 228 char *from; 229 size_t dsize; 230 pgno_t nextchn; 231 int ischain; 232 BTHEADER *cur; 233 234 /* get set for first page in chain */ 235 prev = P_NONE; 236 dsize = data->size; 237 from = (char *) data->data; 238 239 /* if there are blocks on the free list, use them first */ 240 if ((*pgno = t->bt_free) == P_NONE) { 241 if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 242 return (RET_ERROR); 243 244 ischain = 0; 245 *pgno = cur->h_pgno = ++(t->bt_npages); 246 } else { 247 if (_bt_getpage(t, *pgno) == RET_ERROR) 248 return (RET_ERROR); 249 ischain = 1; 250 cur = t->bt_curpage; 251 } 252 253 cur->h_flags = F_CONT|F_LEAF; 254 (void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t)); 255 where = ((char *) (&cur->h_linp[0])) + sizeof(size_t); 256 257 /* fill and write pages in the chain */ 258 for (;;) { 259 int nhere; 260 261 top = ((char *) cur) + t->bt_psize; 262 cur->h_prevpg = prev; 263 nextchn = cur->h_nextpg; 264 nhere = (int) (top - where); 265 266 if (nhere >= dsize) { 267 (void) bcopy(from, where, (int) dsize); 268 cur->h_nextpg = P_NONE; 269 dsize = 0; 270 } else { 271 (void) bcopy(from, where, (int) nhere); 272 dsize -= nhere; 273 from += nhere; 274 if (nextchn == P_NONE) 275 cur->h_nextpg = t->bt_npages + 1; 276 prev = cur->h_pgno; 277 } 278 279 /* current page is ready to go; write it out */ 280 if (_bt_write(t, cur, RELEASE) == RET_ERROR) 281 return (RET_ERROR); 282 283 /* free the current page, if appropriate */ 284 if (ISDISK(t) && !ISCACHE(t) && !ischain) { 285 (void) free ((char *) cur); 286 } 287 288 /* done? */ 289 if (dsize == 0) 290 break; 291 292 /* allocate another page */ 293 if (nextchn == P_NONE) { 294 if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 295 return (RET_ERROR); 296 ischain = 0; 297 cur->h_pgno = ++(t->bt_npages); 298 } else { 299 if (_bt_getpage(t, nextchn) == RET_ERROR) 300 return (RET_ERROR); 301 ischain = 1; 302 cur = t->bt_curpage; 303 } 304 cur->h_flags = F_LEAF | F_CONT; 305 306 where = (char *) (&cur->h_linp[0]); 307 } 308 309 /* if we used pages from the free list, record changes to it */ 310 if (*pgno == t->bt_free) { 311 t->bt_free = nextchn; 312 t->bt_flags &= ~BTF_METAOK; 313 } 314 315 return (RET_SUCCESS); 316 } 317 318 /* 319 * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node. 320 * 321 * Chains of indirect blocks pointed to by leaf nodes get reclaimed 322 * when the item that points to them gets deleted. Chains pointed 323 * to by internal nodes never get deleted. This routine marks a 324 * chain as pointed to by an internal node. 325 * 326 * Parameters: 327 * t -- tree in which to mark 328 * chain -- number of first page in chain 329 * 330 * Returns: 331 * RET_SUCCESS, RET_ERROR. 332 * 333 * Side Effects: 334 * None. 335 */ 336 337 int 338 _bt_markchain(t, chain) 339 BTREE_P t; 340 pgno_t chain; 341 { 342 pgno_t save; 343 344 save = t->bt_curpage->h_pgno; 345 346 if (_bt_getpage(t, chain) == RET_ERROR) 347 return (RET_ERROR); 348 349 t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE); 350 351 if (_bt_getpage(t, save) == RET_ERROR) 352 return (RET_ERROR); 353 354 return (RET_SUCCESS); 355 } 356