xref: /csrg-svn/lib/libc/db/btree/bt_overflow.c (revision 46129)
1*46129Smao /*-
2*46129Smao  * Copyright (c) 1990 The Regents of the University of California.
3*46129Smao  * All rights reserved.
4*46129Smao  *
5*46129Smao  * This code is derived from software contributed to Berkeley by
6*46129Smao  * Mike Olson.
7*46129Smao  *
8*46129Smao  * %sccs.include.redist.c%
9*46129Smao  */
10*46129Smao 
11*46129Smao #if defined(LIBC_SCCS) && !defined(lint)
12*46129Smao static char sccsid[] = "@(#)bt_overflow.c	5.1 (Berkeley) 01/23/91";
13*46129Smao #endif /* LIBC_SCCS and not lint */
14*46129Smao 
15*46129Smao #include <sys/types.h>
16*46129Smao #include <db.h>
17*46129Smao #include "btree.h"
18*46129Smao 
19*46129Smao /*
20*46129Smao  *  _BT_GETBIG -- Get big data from indirect pages.
21*46129Smao  *
22*46129Smao  *	This routine chases indirect blocks for the big object at the
23*46129Smao  *	specified page to a buffer, and return the address of the buffer.
24*46129Smao  *
25*46129Smao  *	Parameters:
26*46129Smao  *		t -- btree with the indirect blocks
27*46129Smao  *		pgno -- page number that starts the chain
28*46129Smao  *		p -- (char **) to get the address of the buffer containing
29*46129Smao  *		     the key or datum.
30*46129Smao  *		sz -- pointer to an int to get the size of the instantiated
31*46129Smao  *		      object.
32*46129Smao  *
33*46129Smao  *	Returns:
34*46129Smao  *		RET_SUCCESS, RET_ERROR.
35*46129Smao  *
36*46129Smao  *	Side Effects:
37*46129Smao  *		None.
38*46129Smao  */
39*46129Smao 
40*46129Smao int
41*46129Smao _bt_getbig(t, pgno, p, sz)
42*46129Smao 	BTREE_P t;
43*46129Smao 	pgno_t pgno;
44*46129Smao 	char **p;
45*46129Smao 	int *sz;
46*46129Smao {
47*46129Smao 	pgno_t save;
48*46129Smao 	size_t nbytes;
49*46129Smao 	size_t nhere;
50*46129Smao 	BTHEADER *h;
51*46129Smao 	char *top, *from, *where;
52*46129Smao 
53*46129Smao 	save = t->bt_curpage->h_pgno;
54*46129Smao 	if (_bt_getpage(t, pgno) == RET_ERROR)
55*46129Smao 		return (RET_ERROR);
56*46129Smao 
57*46129Smao 	h = t->bt_curpage;
58*46129Smao 
59*46129Smao 	bcopy((char *) &(h->h_linp[0]),
60*46129Smao 	      (char *) &nbytes,
61*46129Smao 	      (size_t) sizeof(nbytes));
62*46129Smao 
63*46129Smao 	if ((*p = (char *) malloc(nbytes)) == (char *) NULL)
64*46129Smao 		return (RET_ERROR);
65*46129Smao 
66*46129Smao 	*sz = nbytes;
67*46129Smao 	from = ((char *) (&h->h_linp[0])) + sizeof(nbytes);
68*46129Smao 	top = ((char *) h) + t->bt_psize;
69*46129Smao 
70*46129Smao 	/* need more space for data? */
71*46129Smao 
72*46129Smao 	where = *p;
73*46129Smao 
74*46129Smao 	while (nbytes > 0) {
75*46129Smao 		nhere = (int) (top - from);
76*46129Smao 		if (nhere > nbytes) {
77*46129Smao 			(void) bcopy(from, where, (size_t) nbytes);
78*46129Smao 			nbytes = 0;
79*46129Smao 		} else {
80*46129Smao 			(void) bcopy(from, where, nhere);
81*46129Smao 			where += nhere;
82*46129Smao 			nbytes -= nhere;
83*46129Smao 			if (_bt_getpage(t, h->h_nextpg) == RET_ERROR)
84*46129Smao 				return (RET_ERROR);
85*46129Smao 			h = t->bt_curpage;
86*46129Smao 			top = ((char *) h) + t->bt_psize;
87*46129Smao 			from = (char *) &(h->h_linp[0]);
88*46129Smao 		}
89*46129Smao 	}
90*46129Smao 
91*46129Smao 	if (_bt_getpage(t, save) == RET_ERROR)
92*46129Smao 		return (RET_ERROR);
93*46129Smao 
94*46129Smao 	return (RET_SUCCESS);
95*46129Smao }
96*46129Smao 
97*46129Smao /*
98*46129Smao  *  _BT_DELINDIR -- Delete a chain of indirect blocks from the btree.
99*46129Smao  *
100*46129Smao  *	When a large item is deleted from the tree, this routine puts the
101*46129Smao  *	space that it occupied onto the free list for later reuse.  The
102*46129Smao  *	bt_free entry in the btree structure points at the head of this list.
103*46129Smao  *	This value is also stored on disk in the btree's metadata.
104*46129Smao  *
105*46129Smao  *	Parameters:
106*46129Smao  *		t -- btree from which to delete pages
107*46129Smao  *		chain -- page number that starts the chain.
108*46129Smao  *
109*46129Smao  *	Returns:
110*46129Smao  *		RET_SUCCESS, RET_ERROR.
111*46129Smao  *
112*46129Smao  *	Side Effects:
113*46129Smao  *		Invalidates the current on-disk version of the btree's
114*46129Smao  *		metadata (if any), and updates the free list appropriately.
115*46129Smao  */
116*46129Smao 
117*46129Smao int
118*46129Smao _bt_delindir(t, chain)
119*46129Smao 	BTREE_P t;
120*46129Smao 	pgno_t chain;
121*46129Smao {
122*46129Smao 	BTHEADER *h;
123*46129Smao 	pgno_t save;
124*46129Smao 	pgno_t oldfree;
125*46129Smao 
126*46129Smao 	h = t->bt_curpage;
127*46129Smao 	save = h->h_pgno;
128*46129Smao 	if (_bt_getpage(t, chain) == RET_ERROR)
129*46129Smao 		return (RET_ERROR);
130*46129Smao 
131*46129Smao 	/*
132*46129Smao 	 *  If some internal node is pointing at this chain, don't
133*46129Smao 	 *  delete it.
134*46129Smao 	 */
135*46129Smao 
136*46129Smao 	if (t->bt_curpage->h_flags & F_PRESERVE) {
137*46129Smao 		if (_bt_getpage(t, save) == RET_ERROR)
138*46129Smao 			return (RET_ERROR);
139*46129Smao 		return (RET_SUCCESS);
140*46129Smao 	}
141*46129Smao 
142*46129Smao 	/* if there's nothing on the free list, this is easy... */
143*46129Smao 	if (t->bt_free == P_NONE) {
144*46129Smao 		t->bt_free = chain;
145*46129Smao 	} else {
146*46129Smao 		oldfree = t->bt_free;
147*46129Smao 
148*46129Smao 		/* find the end of the data chain for the deleted datum */
149*46129Smao 		t->bt_free = chain;
150*46129Smao 		do {
151*46129Smao 			if (_bt_getpage(t, chain) == RET_ERROR)
152*46129Smao 				return (RET_ERROR);
153*46129Smao 			h = t->bt_curpage;
154*46129Smao 			if (h->h_nextpg != P_NONE)
155*46129Smao 				chain = h->h_nextpg;
156*46129Smao 		} while (h->h_nextpg != P_NONE);
157*46129Smao 
158*46129Smao 		/* link freed pages into free list */
159*46129Smao 		h->h_nextpg = oldfree;
160*46129Smao 		if (_bt_write(t, h, RELEASE) == RET_ERROR)
161*46129Smao 			return (RET_ERROR);
162*46129Smao 		if (_bt_getpage(t, oldfree) == RET_ERROR)
163*46129Smao 			return (RET_ERROR);
164*46129Smao 		h = t->bt_curpage;
165*46129Smao 		h->h_prevpg = chain;
166*46129Smao 		if (_bt_write(t, h, RELEASE) == RET_ERROR)
167*46129Smao 			return (RET_ERROR);
168*46129Smao 	}
169*46129Smao 
170*46129Smao 	/* restore the tree's current page pointer */
171*46129Smao 	if (_bt_getpage(t, save) == RET_ERROR)
172*46129Smao 		return (RET_ERROR);
173*46129Smao 
174*46129Smao 	/* we have trashed the tree metadata; rewrite it later */
175*46129Smao 	t->bt_flags &= ~BTF_METAOK;
176*46129Smao 
177*46129Smao 	return (RET_SUCCESS);
178*46129Smao }
179*46129Smao 
180*46129Smao /*
181*46129Smao  *  _BT_INDIRECT -- Write a series of indirect pages for big objects.
182*46129Smao  *
183*46129Smao  *	A chain of indirect pages looks like
184*46129Smao  *
185*46129Smao  *	   +-------------------+   +---------------------+
186*46129Smao  *	   |hdr|size|	       |   |hdr|		 |
187*46129Smao  *	   +---+----+	       |   +---+		 |
188*46129Smao  *	   |   ... data ...    |   |   ... data ...	 |    ...
189*46129Smao  *	   |		       |   |			 |
190*46129Smao  *	   +-------------------+   +---------------------+
191*46129Smao  *
192*46129Smao  *	where hdr is a standard btree page header (with the indirect bit
193*46129Smao  *	set), size on the first page is the real size of the datum, and
194*46129Smao  *	data are bytes of the datum, split across as many pages as necessary.
195*46129Smao  *	Indirect pages are chained together with the h_prevpg and h_nextpg
196*46129Smao  *	entries of the page header struct.
197*46129Smao  *
198*46129Smao  *	A single DBT is written per chain, so space on the last page is
199*46129Smao  *	wasted.
200*46129Smao  *
201*46129Smao  *	We return the page number of the start of the chain.
202*46129Smao  *
203*46129Smao  *	When a big object is deleted from a tree, the space that it occupied
204*46129Smao  *	is placed on a free list for later reuse.  This routine checks that
205*46129Smao  *	free list before allocating new pages to the big datum being inserted.
206*46129Smao  *
207*46129Smao  *	Parameters:
208*46129Smao  *		t -- btree in which to store indirect blocks
209*46129Smao  *		data -- DBT with the big datum in it
210*46129Smao  *		pgno -- place to put the starting page number of the chain
211*46129Smao  *
212*46129Smao  *	Returns:
213*46129Smao  *		RET_SUCCESS, RET_ERROR.
214*46129Smao  *
215*46129Smao  *	Side Effects:
216*46129Smao  *		Current page is changed on return.
217*46129Smao  */
218*46129Smao 
219*46129Smao int
220*46129Smao _bt_indirect(t, data, pgno)
221*46129Smao 	BTREE_P t;
222*46129Smao 	DBT *data;
223*46129Smao 	pgno_t *pgno;
224*46129Smao {
225*46129Smao 	pgno_t prev;
226*46129Smao 	char *top;
227*46129Smao 	char *where;
228*46129Smao 	char *from;
229*46129Smao 	size_t dsize;
230*46129Smao 	pgno_t nextchn;
231*46129Smao 	int ischain;
232*46129Smao 	BTHEADER *cur;
233*46129Smao 
234*46129Smao 	/* get set for first page in chain */
235*46129Smao 	prev = P_NONE;
236*46129Smao 	dsize = data->size;
237*46129Smao 	from = (char *) data->data;
238*46129Smao 
239*46129Smao 	/* if there are blocks on the free list, use them first */
240*46129Smao 	if ((*pgno = t->bt_free) == P_NONE) {
241*46129Smao 		if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL)
242*46129Smao 			return (RET_ERROR);
243*46129Smao 
244*46129Smao 		ischain = 0;
245*46129Smao 		*pgno = cur->h_pgno = ++(t->bt_npages);
246*46129Smao 	} else {
247*46129Smao 		if (_bt_getpage(t, *pgno) == RET_ERROR)
248*46129Smao 			return (RET_ERROR);
249*46129Smao 		ischain = 1;
250*46129Smao 		cur = t->bt_curpage;
251*46129Smao 	}
252*46129Smao 
253*46129Smao 	cur->h_flags = F_CONT|F_LEAF;
254*46129Smao 	(void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t));
255*46129Smao 	where = ((char *) (&cur->h_linp[0])) + sizeof(size_t);
256*46129Smao 
257*46129Smao 	/* fill and write pages in the chain */
258*46129Smao 	for (;;) {
259*46129Smao 		int nhere;
260*46129Smao 
261*46129Smao 		top = ((char *) cur) + t->bt_psize;
262*46129Smao 		cur->h_prevpg = prev;
263*46129Smao 		nextchn = cur->h_nextpg;
264*46129Smao 		nhere = (int) (top - where);
265*46129Smao 
266*46129Smao 		if (nhere >= dsize) {
267*46129Smao 			(void) bcopy(from, where, (int) dsize);
268*46129Smao 			cur->h_nextpg = P_NONE;
269*46129Smao 			dsize = 0;
270*46129Smao 		} else {
271*46129Smao 			(void) bcopy(from, where, (int) nhere);
272*46129Smao 			dsize -= nhere;
273*46129Smao 			from += nhere;
274*46129Smao 			if (nextchn == P_NONE)
275*46129Smao 				cur->h_nextpg = t->bt_npages + 1;
276*46129Smao 			prev = cur->h_pgno;
277*46129Smao 		}
278*46129Smao 
279*46129Smao 		/* current page is ready to go; write it out */
280*46129Smao 		if (_bt_write(t, cur, RELEASE) == RET_ERROR)
281*46129Smao 			return (RET_ERROR);
282*46129Smao 
283*46129Smao 		/* free the current page, if appropriate */
284*46129Smao 		if (ISDISK(t) && !ISCACHE(t) && !ischain) {
285*46129Smao 			(void) free ((char *) cur);
286*46129Smao 		}
287*46129Smao 
288*46129Smao 		/* done? */
289*46129Smao 		if (dsize == 0)
290*46129Smao 			break;
291*46129Smao 
292*46129Smao 		/* allocate another page */
293*46129Smao 		if (nextchn == P_NONE) {
294*46129Smao 			if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL)
295*46129Smao 				return (RET_ERROR);
296*46129Smao 			ischain = 0;
297*46129Smao 			cur->h_pgno = ++(t->bt_npages);
298*46129Smao 		} else {
299*46129Smao 			if (_bt_getpage(t, nextchn) == RET_ERROR)
300*46129Smao 				return (RET_ERROR);
301*46129Smao 			ischain = 1;
302*46129Smao 			cur = t->bt_curpage;
303*46129Smao 		}
304*46129Smao 		cur->h_flags = F_LEAF | F_CONT;
305*46129Smao 
306*46129Smao 		where = (char *) (&cur->h_linp[0]);
307*46129Smao 	}
308*46129Smao 
309*46129Smao 	/* if we used pages from the free list, record changes to it */
310*46129Smao 	if (*pgno == t->bt_free) {
311*46129Smao 		t->bt_free = nextchn;
312*46129Smao 		t->bt_flags &= ~BTF_METAOK;
313*46129Smao 	}
314*46129Smao 
315*46129Smao 	return (RET_SUCCESS);
316*46129Smao }
317*46129Smao 
318*46129Smao /*
319*46129Smao  *  _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node.
320*46129Smao  *
321*46129Smao  *	Chains of indirect blocks pointed to by leaf nodes get reclaimed
322*46129Smao  *	when the item that points to them gets deleted.  Chains pointed
323*46129Smao  *	to by internal nodes never get deleted.  This routine marks a
324*46129Smao  *	chain as pointed to by an internal node.
325*46129Smao  *
326*46129Smao  *	Parameters:
327*46129Smao  *		t -- tree in which to mark
328*46129Smao  *		chain -- number of first page in chain
329*46129Smao  *
330*46129Smao  *	Returns:
331*46129Smao  *		RET_SUCCESS, RET_ERROR.
332*46129Smao  *
333*46129Smao  *	Side Effects:
334*46129Smao  *		None.
335*46129Smao  */
336*46129Smao 
337*46129Smao int
338*46129Smao _bt_markchain(t, chain)
339*46129Smao 	BTREE_P t;
340*46129Smao 	pgno_t chain;
341*46129Smao {
342*46129Smao 	pgno_t save;
343*46129Smao 
344*46129Smao 	save = t->bt_curpage->h_pgno;
345*46129Smao 
346*46129Smao 	if (_bt_getpage(t, chain) == RET_ERROR)
347*46129Smao 		return (RET_ERROR);
348*46129Smao 
349*46129Smao 	t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE);
350*46129Smao 
351*46129Smao 	if (_bt_getpage(t, save) == RET_ERROR)
352*46129Smao 		return (RET_ERROR);
353*46129Smao 
354*46129Smao 	return (RET_SUCCESS);
355*46129Smao }
356