146129Smao /*- 246129Smao * Copyright (c) 1990 The Regents of the University of California. 346129Smao * All rights reserved. 446129Smao * 546129Smao * This code is derived from software contributed to Berkeley by 646129Smao * Mike Olson. 746129Smao * 846129Smao * %sccs.include.redist.c% 946129Smao */ 1046129Smao 1146129Smao #if defined(LIBC_SCCS) && !defined(lint) 12*46561Sbostic static char sccsid[] = "@(#)bt_overflow.c 5.2 (Berkeley) 02/22/91"; 1346129Smao #endif /* LIBC_SCCS and not lint */ 1446129Smao 1546129Smao #include <sys/types.h> 1646129Smao #include <db.h> 17*46561Sbostic #include <stdlib.h> 18*46561Sbostic #include <string.h> 1946129Smao #include "btree.h" 2046129Smao 2146129Smao /* 2246129Smao * _BT_GETBIG -- Get big data from indirect pages. 2346129Smao * 2446129Smao * This routine chases indirect blocks for the big object at the 2546129Smao * specified page to a buffer, and return the address of the buffer. 2646129Smao * 2746129Smao * Parameters: 2846129Smao * t -- btree with the indirect blocks 2946129Smao * pgno -- page number that starts the chain 3046129Smao * p -- (char **) to get the address of the buffer containing 3146129Smao * the key or datum. 3246129Smao * sz -- pointer to an int to get the size of the instantiated 3346129Smao * object. 3446129Smao * 3546129Smao * Returns: 3646129Smao * RET_SUCCESS, RET_ERROR. 3746129Smao * 3846129Smao * Side Effects: 3946129Smao * None. 4046129Smao */ 4146129Smao 4246129Smao int 4346129Smao _bt_getbig(t, pgno, p, sz) 4446129Smao BTREE_P t; 4546129Smao pgno_t pgno; 4646129Smao char **p; 4746129Smao int *sz; 4846129Smao { 4946129Smao pgno_t save; 5046129Smao size_t nbytes; 5146129Smao size_t nhere; 5246129Smao BTHEADER *h; 5346129Smao char *top, *from, *where; 5446129Smao 5546129Smao save = t->bt_curpage->h_pgno; 5646129Smao if (_bt_getpage(t, pgno) == RET_ERROR) 5746129Smao return (RET_ERROR); 5846129Smao 5946129Smao h = t->bt_curpage; 6046129Smao 6146129Smao bcopy((char *) &(h->h_linp[0]), 6246129Smao (char *) &nbytes, 6346129Smao (size_t) sizeof(nbytes)); 6446129Smao 6546129Smao if ((*p = (char *) malloc(nbytes)) == (char *) NULL) 6646129Smao return (RET_ERROR); 6746129Smao 6846129Smao *sz = nbytes; 6946129Smao from = ((char *) (&h->h_linp[0])) + sizeof(nbytes); 7046129Smao top = ((char *) h) + t->bt_psize; 7146129Smao 7246129Smao /* need more space for data? */ 7346129Smao 7446129Smao where = *p; 7546129Smao 7646129Smao while (nbytes > 0) { 7746129Smao nhere = (int) (top - from); 7846129Smao if (nhere > nbytes) { 7946129Smao (void) bcopy(from, where, (size_t) nbytes); 8046129Smao nbytes = 0; 8146129Smao } else { 8246129Smao (void) bcopy(from, where, nhere); 8346129Smao where += nhere; 8446129Smao nbytes -= nhere; 8546129Smao if (_bt_getpage(t, h->h_nextpg) == RET_ERROR) 8646129Smao return (RET_ERROR); 8746129Smao h = t->bt_curpage; 8846129Smao top = ((char *) h) + t->bt_psize; 8946129Smao from = (char *) &(h->h_linp[0]); 9046129Smao } 9146129Smao } 9246129Smao 9346129Smao if (_bt_getpage(t, save) == RET_ERROR) 9446129Smao return (RET_ERROR); 9546129Smao 9646129Smao return (RET_SUCCESS); 9746129Smao } 9846129Smao 9946129Smao /* 10046129Smao * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree. 10146129Smao * 10246129Smao * When a large item is deleted from the tree, this routine puts the 10346129Smao * space that it occupied onto the free list for later reuse. The 10446129Smao * bt_free entry in the btree structure points at the head of this list. 10546129Smao * This value is also stored on disk in the btree's metadata. 10646129Smao * 10746129Smao * Parameters: 10846129Smao * t -- btree from which to delete pages 10946129Smao * chain -- page number that starts the chain. 11046129Smao * 11146129Smao * Returns: 11246129Smao * RET_SUCCESS, RET_ERROR. 11346129Smao * 11446129Smao * Side Effects: 11546129Smao * Invalidates the current on-disk version of the btree's 11646129Smao * metadata (if any), and updates the free list appropriately. 11746129Smao */ 11846129Smao 11946129Smao int 12046129Smao _bt_delindir(t, chain) 12146129Smao BTREE_P t; 12246129Smao pgno_t chain; 12346129Smao { 12446129Smao BTHEADER *h; 12546129Smao pgno_t save; 12646129Smao pgno_t oldfree; 12746129Smao 12846129Smao h = t->bt_curpage; 12946129Smao save = h->h_pgno; 13046129Smao if (_bt_getpage(t, chain) == RET_ERROR) 13146129Smao return (RET_ERROR); 13246129Smao 13346129Smao /* 13446129Smao * If some internal node is pointing at this chain, don't 13546129Smao * delete it. 13646129Smao */ 13746129Smao 13846129Smao if (t->bt_curpage->h_flags & F_PRESERVE) { 13946129Smao if (_bt_getpage(t, save) == RET_ERROR) 14046129Smao return (RET_ERROR); 14146129Smao return (RET_SUCCESS); 14246129Smao } 14346129Smao 14446129Smao /* if there's nothing on the free list, this is easy... */ 14546129Smao if (t->bt_free == P_NONE) { 14646129Smao t->bt_free = chain; 14746129Smao } else { 14846129Smao oldfree = t->bt_free; 14946129Smao 15046129Smao /* find the end of the data chain for the deleted datum */ 15146129Smao t->bt_free = chain; 15246129Smao do { 15346129Smao if (_bt_getpage(t, chain) == RET_ERROR) 15446129Smao return (RET_ERROR); 15546129Smao h = t->bt_curpage; 15646129Smao if (h->h_nextpg != P_NONE) 15746129Smao chain = h->h_nextpg; 15846129Smao } while (h->h_nextpg != P_NONE); 15946129Smao 16046129Smao /* link freed pages into free list */ 16146129Smao h->h_nextpg = oldfree; 16246129Smao if (_bt_write(t, h, RELEASE) == RET_ERROR) 16346129Smao return (RET_ERROR); 16446129Smao if (_bt_getpage(t, oldfree) == RET_ERROR) 16546129Smao return (RET_ERROR); 16646129Smao h = t->bt_curpage; 16746129Smao h->h_prevpg = chain; 16846129Smao if (_bt_write(t, h, RELEASE) == RET_ERROR) 16946129Smao return (RET_ERROR); 17046129Smao } 17146129Smao 17246129Smao /* restore the tree's current page pointer */ 17346129Smao if (_bt_getpage(t, save) == RET_ERROR) 17446129Smao return (RET_ERROR); 17546129Smao 17646129Smao /* we have trashed the tree metadata; rewrite it later */ 17746129Smao t->bt_flags &= ~BTF_METAOK; 17846129Smao 17946129Smao return (RET_SUCCESS); 18046129Smao } 18146129Smao 18246129Smao /* 18346129Smao * _BT_INDIRECT -- Write a series of indirect pages for big objects. 18446129Smao * 18546129Smao * A chain of indirect pages looks like 18646129Smao * 18746129Smao * +-------------------+ +---------------------+ 18846129Smao * |hdr|size| | |hdr| | 18946129Smao * +---+----+ | +---+ | 19046129Smao * | ... data ... | | ... data ... | ... 19146129Smao * | | | | 19246129Smao * +-------------------+ +---------------------+ 19346129Smao * 19446129Smao * where hdr is a standard btree page header (with the indirect bit 19546129Smao * set), size on the first page is the real size of the datum, and 19646129Smao * data are bytes of the datum, split across as many pages as necessary. 19746129Smao * Indirect pages are chained together with the h_prevpg and h_nextpg 19846129Smao * entries of the page header struct. 19946129Smao * 20046129Smao * A single DBT is written per chain, so space on the last page is 20146129Smao * wasted. 20246129Smao * 20346129Smao * We return the page number of the start of the chain. 20446129Smao * 20546129Smao * When a big object is deleted from a tree, the space that it occupied 20646129Smao * is placed on a free list for later reuse. This routine checks that 20746129Smao * free list before allocating new pages to the big datum being inserted. 20846129Smao * 20946129Smao * Parameters: 21046129Smao * t -- btree in which to store indirect blocks 21146129Smao * data -- DBT with the big datum in it 21246129Smao * pgno -- place to put the starting page number of the chain 21346129Smao * 21446129Smao * Returns: 21546129Smao * RET_SUCCESS, RET_ERROR. 21646129Smao * 21746129Smao * Side Effects: 21846129Smao * Current page is changed on return. 21946129Smao */ 22046129Smao 22146129Smao int 22246129Smao _bt_indirect(t, data, pgno) 22346129Smao BTREE_P t; 22446129Smao DBT *data; 22546129Smao pgno_t *pgno; 22646129Smao { 22746129Smao pgno_t prev; 22846129Smao char *top; 22946129Smao char *where; 23046129Smao char *from; 23146129Smao size_t dsize; 23246129Smao pgno_t nextchn; 23346129Smao int ischain; 23446129Smao BTHEADER *cur; 23546129Smao 23646129Smao /* get set for first page in chain */ 23746129Smao prev = P_NONE; 23846129Smao dsize = data->size; 23946129Smao from = (char *) data->data; 24046129Smao 24146129Smao /* if there are blocks on the free list, use them first */ 24246129Smao if ((*pgno = t->bt_free) == P_NONE) { 24346129Smao if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 24446129Smao return (RET_ERROR); 24546129Smao 24646129Smao ischain = 0; 24746129Smao *pgno = cur->h_pgno = ++(t->bt_npages); 24846129Smao } else { 24946129Smao if (_bt_getpage(t, *pgno) == RET_ERROR) 25046129Smao return (RET_ERROR); 25146129Smao ischain = 1; 25246129Smao cur = t->bt_curpage; 25346129Smao } 25446129Smao 25546129Smao cur->h_flags = F_CONT|F_LEAF; 25646129Smao (void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t)); 25746129Smao where = ((char *) (&cur->h_linp[0])) + sizeof(size_t); 25846129Smao 25946129Smao /* fill and write pages in the chain */ 26046129Smao for (;;) { 26146129Smao int nhere; 26246129Smao 26346129Smao top = ((char *) cur) + t->bt_psize; 26446129Smao cur->h_prevpg = prev; 26546129Smao nextchn = cur->h_nextpg; 26646129Smao nhere = (int) (top - where); 26746129Smao 26846129Smao if (nhere >= dsize) { 26946129Smao (void) bcopy(from, where, (int) dsize); 27046129Smao cur->h_nextpg = P_NONE; 27146129Smao dsize = 0; 27246129Smao } else { 27346129Smao (void) bcopy(from, where, (int) nhere); 27446129Smao dsize -= nhere; 27546129Smao from += nhere; 27646129Smao if (nextchn == P_NONE) 27746129Smao cur->h_nextpg = t->bt_npages + 1; 27846129Smao prev = cur->h_pgno; 27946129Smao } 28046129Smao 28146129Smao /* current page is ready to go; write it out */ 28246129Smao if (_bt_write(t, cur, RELEASE) == RET_ERROR) 28346129Smao return (RET_ERROR); 28446129Smao 28546129Smao /* free the current page, if appropriate */ 28646129Smao if (ISDISK(t) && !ISCACHE(t) && !ischain) { 28746129Smao (void) free ((char *) cur); 28846129Smao } 28946129Smao 29046129Smao /* done? */ 29146129Smao if (dsize == 0) 29246129Smao break; 29346129Smao 29446129Smao /* allocate another page */ 29546129Smao if (nextchn == P_NONE) { 29646129Smao if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 29746129Smao return (RET_ERROR); 29846129Smao ischain = 0; 29946129Smao cur->h_pgno = ++(t->bt_npages); 30046129Smao } else { 30146129Smao if (_bt_getpage(t, nextchn) == RET_ERROR) 30246129Smao return (RET_ERROR); 30346129Smao ischain = 1; 30446129Smao cur = t->bt_curpage; 30546129Smao } 30646129Smao cur->h_flags = F_LEAF | F_CONT; 30746129Smao 30846129Smao where = (char *) (&cur->h_linp[0]); 30946129Smao } 31046129Smao 31146129Smao /* if we used pages from the free list, record changes to it */ 31246129Smao if (*pgno == t->bt_free) { 31346129Smao t->bt_free = nextchn; 31446129Smao t->bt_flags &= ~BTF_METAOK; 31546129Smao } 31646129Smao 31746129Smao return (RET_SUCCESS); 31846129Smao } 31946129Smao 32046129Smao /* 32146129Smao * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node. 32246129Smao * 32346129Smao * Chains of indirect blocks pointed to by leaf nodes get reclaimed 32446129Smao * when the item that points to them gets deleted. Chains pointed 32546129Smao * to by internal nodes never get deleted. This routine marks a 32646129Smao * chain as pointed to by an internal node. 32746129Smao * 32846129Smao * Parameters: 32946129Smao * t -- tree in which to mark 33046129Smao * chain -- number of first page in chain 33146129Smao * 33246129Smao * Returns: 33346129Smao * RET_SUCCESS, RET_ERROR. 33446129Smao * 33546129Smao * Side Effects: 33646129Smao * None. 33746129Smao */ 33846129Smao 33946129Smao int 34046129Smao _bt_markchain(t, chain) 34146129Smao BTREE_P t; 34246129Smao pgno_t chain; 34346129Smao { 34446129Smao pgno_t save; 34546129Smao 34646129Smao save = t->bt_curpage->h_pgno; 34746129Smao 34846129Smao if (_bt_getpage(t, chain) == RET_ERROR) 34946129Smao return (RET_ERROR); 35046129Smao 35146129Smao t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE); 35246129Smao 35346129Smao if (_bt_getpage(t, save) == RET_ERROR) 35446129Smao return (RET_ERROR); 35546129Smao 35646129Smao return (RET_SUCCESS); 35746129Smao } 358