xref: /csrg-svn/lib/libc/db/btree/bt_seq.c (revision 50989)
146133Smao /*-
246133Smao  * Copyright (c) 1990 The Regents of the University of California.
346133Smao  * All rights reserved.
446133Smao  *
546133Smao  * This code is derived from software contributed to Berkeley by
646133Smao  * Mike Olson.
746133Smao  *
846133Smao  * %sccs.include.redist.c%
946133Smao  */
1046133Smao 
1146133Smao #if defined(LIBC_SCCS) && !defined(lint)
12*50989Sbostic static char sccsid[] = "@(#)bt_seq.c	5.5 (Berkeley) 09/04/91";
1346133Smao #endif /* LIBC_SCCS and not lint */
1446133Smao 
1546133Smao #include <sys/types.h>
1646561Sbostic #include <errno.h>
1746133Smao #include <db.h>
18*50989Sbostic #include <stdio.h>
1946561Sbostic #include <stdlib.h>
20*50989Sbostic #include <stddef.h>
2146133Smao #include "btree.h"
2246133Smao 
23*50989Sbostic static int	 bt_seqadv __P((BTREE *, EPG *, int));
24*50989Sbostic static int	 bt_seqset __P((BTREE *, EPG *, DBT *, int));
25*50989Sbostic 
2646133Smao /*
27*50989Sbostic  * Sequential scan support.
2846133Smao  *
29*50989Sbostic  * The tree can be scanned sequentially, starting from either end of the tree
30*50989Sbostic  * or from any specific key.  A scan request before any scanning is done is
31*50989Sbostic  * initialized as starting from the least node.
3246133Smao  *
33*50989Sbostic  * Each tree has an EPGNO which has the current position of the cursor.  The
34*50989Sbostic  * cursor has to survive deletions/insertions in the tree without losing its
35*50989Sbostic  * position.  This is done by noting deletions without doing them, and then
36*50989Sbostic  * doing them when the cursor moves (or the tree is closed).
37*50989Sbostic  */
38*50989Sbostic 
39*50989Sbostic /*
40*50989Sbostic  * __BT_SEQ -- Btree sequential scan interface.
4146133Smao  *
42*50989Sbostic  * Parameters:
43*50989Sbostic  *	dbp:	pointer to access method
44*50989Sbostic  *	key:	key for positioning and return value
45*50989Sbostic  *	data:	data return value
46*50989Sbostic  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
4746133Smao  *
48*50989Sbostic  * Returns:
49*50989Sbostic  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
5046133Smao  */
5146133Smao int
52*50989Sbostic __bt_seq(dbp, key, data, flags)
53*50989Sbostic 	const DB *dbp;
54*50989Sbostic 	DBT *key, *data;
55*50989Sbostic 	u_int flags;
5646133Smao {
57*50989Sbostic 	BTREE *t;
58*50989Sbostic 	EPG e;
59*50989Sbostic 	int status;
6046133Smao 
6146133Smao 	/*
62*50989Sbostic 	 * If scan unitialized as yet, or starting at a specific record, set
63*50989Sbostic 	 * the scan to a specific key.  Both bt_seqset and bt_seqadv pin the
64*50989Sbostic 	 * page the cursor references if they're successful.
6546133Smao 	 */
66*50989Sbostic 	t = dbp->internal;
67*50989Sbostic 	switch(flags) {
68*50989Sbostic 	case R_NEXT:
69*50989Sbostic 		if (ISSET(t, BTF_SEQINIT)) {
70*50989Sbostic 			status = bt_seqadv(t, &e, flags);
71*50989Sbostic 			break;
72*50989Sbostic 		}
73*50989Sbostic 		/* FALLTHROUGH */
74*50989Sbostic 	case R_CURSOR:
75*50989Sbostic 	case R_FIRST:
76*50989Sbostic 		status = bt_seqset(t, &e, key, flags);
77*50989Sbostic 		SET(t, BTF_SEQINIT);
78*50989Sbostic 		break;
79*50989Sbostic 	case R_PREV:
80*50989Sbostic 		if (ISSET(t, BTF_SEQINIT)) {
81*50989Sbostic 			status = bt_seqadv(t, &e, flags);
82*50989Sbostic 			break;
83*50989Sbostic 		}
84*50989Sbostic 		/* FALLTHROUGH */
85*50989Sbostic 	case R_LAST:
86*50989Sbostic 		status = bt_seqset(t, &e, key, flags);
87*50989Sbostic 		SET(t, BTF_SEQINIT);
88*50989Sbostic 		break;
89*50989Sbostic 	default:
90*50989Sbostic 		errno = EINVAL;
91*50989Sbostic 		return (RET_ERROR);
92*50989Sbostic 	}
9346133Smao 
94*50989Sbostic 	if (status == RET_SUCCESS) {
95*50989Sbostic 		status = __bt_ret(t, &e, key, data);
9646133Smao 
97*50989Sbostic 		/* Update the actual cursor. */
98*50989Sbostic 		if (status == RET_SUCCESS) {
99*50989Sbostic 			t->bt_bcursor.pgno = e.page->pgno;
100*50989Sbostic 			t->bt_bcursor.index = e.index;
10146133Smao 		}
102*50989Sbostic 		mpool_put(t->bt_mp, e.page, 0);
103*50989Sbostic 	}
104*50989Sbostic 	return (status);
105*50989Sbostic }
10646133Smao 
107*50989Sbostic /*
108*50989Sbostic  * BT_SEQSET -- Set the sequential scan to a specific key.
109*50989Sbostic  *
110*50989Sbostic  * Parameters:
111*50989Sbostic  *	t:	tree
112*50989Sbostic  *	ep:	storage for returned key
113*50989Sbostic  *	key:	key for initial scan position
114*50989Sbostic  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
115*50989Sbostic  *
116*50989Sbostic  * Side effects:
117*50989Sbostic  *	Pins the page the cursor references.
118*50989Sbostic  *
119*50989Sbostic  * Returns:
120*50989Sbostic  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
121*50989Sbostic  */
122*50989Sbostic static int
123*50989Sbostic bt_seqset(t, ep, key, flags)
124*50989Sbostic 	BTREE *t;
125*50989Sbostic 	EPG *ep;
126*50989Sbostic 	DBT *key;
127*50989Sbostic 	int flags;
128*50989Sbostic {
129*50989Sbostic 	EPG *e;
130*50989Sbostic 	PAGE *h;
131*50989Sbostic 	pgno_t pg;
132*50989Sbostic 	int exact;
13346133Smao 
134*50989Sbostic 	/*
135*50989Sbostic 	 * Delete any already deleted record that we've been saving because
136*50989Sbostic 	 * the cursor pointed to it.  Since going to a specific key, should
137*50989Sbostic 	 * delete any logically deleted records so they aren't found.
138*50989Sbostic 	 */
139*50989Sbostic 	if (ISSET(t, BTF_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
140*50989Sbostic 		return (RET_ERROR);
14146133Smao 
142*50989Sbostic 	/*
143*50989Sbostic 	 * If R_CURSOR set, find the first instance of the key in the tree and
144*50989Sbostic 	 * point the cursor at it.  Otherwise, find the first or the last record
145*50989Sbostic 	 * in the tree and point the cursor at it.  The cursor may not be moved
146*50989Sbostic 	 * until a new key has been found.
147*50989Sbostic 	 */
148*50989Sbostic 	switch(flags) {
149*50989Sbostic 	case R_CURSOR:				/* Keyed scan. */
150*50989Sbostic 		if (key->data == NULL || key->size == 0) {
151*50989Sbostic 			errno = EINVAL;
15246133Smao 			return (RET_ERROR);
153*50989Sbostic 		}
154*50989Sbostic 		e = __bt_first(t, key, &exact);	/* Returns pinned page. */
155*50989Sbostic 		if (e == NULL)
156*50989Sbostic 			return (RET_ERROR);
157*50989Sbostic 		if (!exact) {
158*50989Sbostic 			mpool_put(t->bt_mp, e->page, 0);
159*50989Sbostic 			return (RET_SPECIAL);
160*50989Sbostic 		}
161*50989Sbostic 		*ep = *e;
162*50989Sbostic 		break;
163*50989Sbostic 	case R_FIRST:				/* First record. */
164*50989Sbostic 	case R_NEXT:
165*50989Sbostic 		/* Walk down the left-hand side of the tree. */
166*50989Sbostic 		for (pg = P_ROOT;;) {
167*50989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
168*50989Sbostic 				return (RET_ERROR);
169*50989Sbostic 			if (h->flags & (P_BLEAF|P_RLEAF))
170*50989Sbostic 				break;
171*50989Sbostic 			pg = GETBINTERNAL(h, 0)->pgno;
172*50989Sbostic 			mpool_put(t->bt_mp, h, 0);
173*50989Sbostic 		}
17446133Smao 
175*50989Sbostic 		/* Skip any empty pages. */
176*50989Sbostic 		while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
177*50989Sbostic 			pg = h->nextpg;
178*50989Sbostic 			mpool_put(t->bt_mp, h, 0);
179*50989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
180*50989Sbostic 				return (RET_ERROR);
181*50989Sbostic 		}
18246133Smao 
183*50989Sbostic 		if (NEXTINDEX(h) == 0) {
184*50989Sbostic 			mpool_put(t->bt_mp, h, 0);
185*50989Sbostic 			return (RET_SPECIAL);
186*50989Sbostic 		}
18746133Smao 
188*50989Sbostic 		ep->page = h;
189*50989Sbostic 		ep->index = 0;
190*50989Sbostic 		break;
191*50989Sbostic 	case R_LAST:				/* Last record. */
192*50989Sbostic 	case R_PREV:
193*50989Sbostic 		/* Walk down the right-hand side of the tree. */
194*50989Sbostic 		for (pg = P_ROOT;;) {
195*50989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
196*50989Sbostic 				return (RET_ERROR);
197*50989Sbostic 			if (h->flags & (P_BLEAF|P_RLEAF))
198*50989Sbostic 				break;
199*50989Sbostic 			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
200*50989Sbostic 			mpool_put(t->bt_mp, h, 0);
201*50989Sbostic 		}
20246133Smao 
203*50989Sbostic 		/* Skip any empty pages. */
204*50989Sbostic 		while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
205*50989Sbostic 			pg = h->prevpg;
206*50989Sbostic 			mpool_put(t->bt_mp, h, 0);
207*50989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
208*50989Sbostic 				return (RET_ERROR);
209*50989Sbostic 		}
21046133Smao 
211*50989Sbostic 		if (NEXTINDEX(h) == 0) {
212*50989Sbostic 			mpool_put(t->bt_mp, h, 0);
213*50989Sbostic 			return (RET_SPECIAL);
21446133Smao 		}
215*50989Sbostic 
216*50989Sbostic 		ep->page = h;
217*50989Sbostic 		ep->index = NEXTINDEX(h) - 1;
218*50989Sbostic 		break;
21946133Smao 	}
22046133Smao 	return (RET_SUCCESS);
22146133Smao }
22246133Smao 
22346133Smao /*
224*50989Sbostic  * BT_SEQADVANCE -- Advance the sequential scan.
22546133Smao  *
226*50989Sbostic  * Parameters:
227*50989Sbostic  *	t:	tree
228*50989Sbostic  *	flags:	R_NEXT, R_PREV
22946133Smao  *
230*50989Sbostic  * Side effects:
231*50989Sbostic  *	Pins the page the new key/data record is on.
23246133Smao  *
233*50989Sbostic  * Returns:
234*50989Sbostic  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
23546133Smao  */
236*50989Sbostic static int
237*50989Sbostic bt_seqadv(t, e, flags)
238*50989Sbostic 	BTREE *t;
239*50989Sbostic 	EPG *e;
24046133Smao 	int flags;
24146133Smao {
242*50989Sbostic 	EPGNO *c, delc;
243*50989Sbostic 	PAGE *h;
24446133Smao 	index_t index;
245*50989Sbostic 	pgno_t pg;
24646133Smao 
247*50989Sbostic 	/* Save the current cursor if going to delete it. */
248*50989Sbostic 	c = &t->bt_bcursor;
249*50989Sbostic 	if (ISSET(t, BTF_DELCRSR))
250*50989Sbostic 		delc = *c;
25146133Smao 
252*50989Sbostic 	if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
25346133Smao 		return (RET_ERROR);
25446133Smao 
255*50989Sbostic 	/*
256*50989Sbostic  	 * Find the next/previous record in the tree and point the cursor at it.
257*50989Sbostic 	 * The cursor may not be moved until a new key has been found.
258*50989Sbostic 	 */
259*50989Sbostic 	index = c->index;
260*50989Sbostic 	switch(flags) {
261*50989Sbostic 	case R_NEXT:			/* Next record. */
262*50989Sbostic 		if (++index == NEXTINDEX(h)) {
263*50989Sbostic 			do {
264*50989Sbostic 				pg = h->nextpg;
265*50989Sbostic 				mpool_put(t->bt_mp, h, 0);
266*50989Sbostic 				if (pg == P_INVALID)
26746133Smao 					return (RET_SPECIAL);
268*50989Sbostic 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
26946133Smao 					return (RET_ERROR);
270*50989Sbostic 			} while (NEXTINDEX(h) == 0);
27146133Smao 			index = 0;
27246133Smao 		}
273*50989Sbostic 		break;
274*50989Sbostic 	case R_PREV:			/* Previous record. */
275*50989Sbostic 		if (index-- == 0) {
27646133Smao 			do {
277*50989Sbostic 				pg = h->prevpg;
278*50989Sbostic 				mpool_put(t->bt_mp, h, 0);
279*50989Sbostic 				if (pg == P_INVALID)
280*50989Sbostic 					return (RET_SPECIAL);
281*50989Sbostic 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
28246133Smao 					return (RET_ERROR);
283*50989Sbostic 			} while (NEXTINDEX(h) == 0);
284*50989Sbostic 			index = NEXTINDEX(h) - 1;
285*50989Sbostic 		}
286*50989Sbostic 		break;
287*50989Sbostic 	}
28846133Smao 
289*50989Sbostic 	e->page = h;
290*50989Sbostic 	e->index = index;
29146133Smao 
292*50989Sbostic 	/*
293*50989Sbostic 	 * Delete any already deleted record that we've been saving because the
294*50989Sbostic 	 * cursor pointed to it.  This could cause the new index to be shifted
295*50989Sbostic 	 * down by one if the record we're deleting is on the same page and has
296*50989Sbostic 	 * a larger index.
297*50989Sbostic 	 */
298*50989Sbostic 	if (ISSET(t, BTF_DELCRSR)) {
299*50989Sbostic 		UNSET(t, BTF_DELCRSR);			/* Don't try twice. */
300*50989Sbostic 		if (c->pgno == delc.pgno && c->index > delc.index)
301*50989Sbostic 			--c->index;
302*50989Sbostic 		if (__bt_crsrdel(t, &delc))
303*50989Sbostic 			return (RET_ERROR);
30446133Smao 	}
30546133Smao 	return (RET_SUCCESS);
30646133Smao }
307*50989Sbostic 
308*50989Sbostic /*
309*50989Sbostic  * __BT_CRSRDEL -- Delete the record referenced by the cursor.
310*50989Sbostic  *
311*50989Sbostic  * Parameters:
312*50989Sbostic  *	t:	tree
313*50989Sbostic  *
314*50989Sbostic  * Returns:
315*50989Sbostic  *	RET_ERROR, RET_SUCCESS
316*50989Sbostic  */
317*50989Sbostic int
318*50989Sbostic __bt_crsrdel(t, c)
319*50989Sbostic 	BTREE *t;
320*50989Sbostic 	EPGNO *c;
321*50989Sbostic {
322*50989Sbostic 	PAGE *h;
323*50989Sbostic 	int status;
324*50989Sbostic 
325*50989Sbostic 	UNSET(t, BTF_DELCRSR);			/* Don't try twice. */
326*50989Sbostic 	if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
327*50989Sbostic 		return (RET_ERROR);
328*50989Sbostic 	status = __bt_dleaf(t, h, c->index);
329*50989Sbostic 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
330*50989Sbostic 	return (status);
331*50989Sbostic }
332