xref: /csrg-svn/lib/libc/db/btree/bt_seq.c (revision 64457)
146133Smao /*-
261196Sbostic  * Copyright (c) 1990, 1993
361196Sbostic  *	The Regents of the University of California.  All rights reserved.
446133Smao  *
546133Smao  * This code is derived from software contributed to Berkeley by
646133Smao  * Mike Olson.
746133Smao  *
846133Smao  * %sccs.include.redist.c%
946133Smao  */
1046133Smao 
1146133Smao #if defined(LIBC_SCCS) && !defined(lint)
12*64457Sbostic static char sccsid[] = "@(#)bt_seq.c	8.2 (Berkeley) 09/07/93";
1346133Smao #endif /* LIBC_SCCS and not lint */
1446133Smao 
1546133Smao #include <sys/types.h>
1656743Sbostic 
1746561Sbostic #include <errno.h>
1856743Sbostic #include <stddef.h>
1950989Sbostic #include <stdio.h>
2046561Sbostic #include <stdlib.h>
2156743Sbostic 
2257932Sbostic #include <db.h>
2346133Smao #include "btree.h"
2446133Smao 
2550989Sbostic static int	 bt_seqadv __P((BTREE *, EPG *, int));
2650989Sbostic static int	 bt_seqset __P((BTREE *, EPG *, DBT *, int));
2750989Sbostic 
2846133Smao /*
2950989Sbostic  * Sequential scan support.
3046133Smao  *
3150989Sbostic  * The tree can be scanned sequentially, starting from either end of the tree
3250989Sbostic  * or from any specific key.  A scan request before any scanning is done is
3350989Sbostic  * initialized as starting from the least node.
3446133Smao  *
3550989Sbostic  * Each tree has an EPGNO which has the current position of the cursor.  The
3650989Sbostic  * cursor has to survive deletions/insertions in the tree without losing its
3750989Sbostic  * position.  This is done by noting deletions without doing them, and then
3850989Sbostic  * doing them when the cursor moves (or the tree is closed).
3950989Sbostic  */
4050989Sbostic 
4150989Sbostic /*
4250989Sbostic  * __BT_SEQ -- Btree sequential scan interface.
4346133Smao  *
4450989Sbostic  * Parameters:
4550989Sbostic  *	dbp:	pointer to access method
4650989Sbostic  *	key:	key for positioning and return value
4750989Sbostic  *	data:	data return value
4850989Sbostic  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
4946133Smao  *
5050989Sbostic  * Returns:
5150989Sbostic  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
5246133Smao  */
5346133Smao int
__bt_seq(dbp,key,data,flags)5450989Sbostic __bt_seq(dbp, key, data, flags)
5550989Sbostic 	const DB *dbp;
5650989Sbostic 	DBT *key, *data;
5750989Sbostic 	u_int flags;
5846133Smao {
5950989Sbostic 	BTREE *t;
6050989Sbostic 	EPG e;
6150989Sbostic 	int status;
6246133Smao 
63*64457Sbostic 	t = dbp->internal;
64*64457Sbostic 
65*64457Sbostic 	/* Toss any page pinned across calls. */
66*64457Sbostic 	if (t->bt_pinned != NULL) {
67*64457Sbostic 		mpool_put(t->bt_mp, t->bt_pinned, 0);
68*64457Sbostic 		t->bt_pinned = NULL;
69*64457Sbostic 	}
70*64457Sbostic 
7146133Smao 	/*
7250989Sbostic 	 * If scan unitialized as yet, or starting at a specific record, set
7350989Sbostic 	 * the scan to a specific key.  Both bt_seqset and bt_seqadv pin the
7450989Sbostic 	 * page the cursor references if they're successful.
7546133Smao 	 */
7650989Sbostic 	switch(flags) {
7750989Sbostic 	case R_NEXT:
7851100Sbostic 	case R_PREV:
7960042Sbostic 		if (ISSET(t, B_SEQINIT)) {
8050989Sbostic 			status = bt_seqadv(t, &e, flags);
8150989Sbostic 			break;
8250989Sbostic 		}
8350989Sbostic 		/* FALLTHROUGH */
8450989Sbostic 	case R_CURSOR:
8550989Sbostic 	case R_FIRST:
8650989Sbostic 	case R_LAST:
8750989Sbostic 		status = bt_seqset(t, &e, key, flags);
8850989Sbostic 		break;
8950989Sbostic 	default:
9050989Sbostic 		errno = EINVAL;
9150989Sbostic 		return (RET_ERROR);
9250989Sbostic 	}
9346133Smao 
9450989Sbostic 	if (status == RET_SUCCESS) {
9550989Sbostic 		status = __bt_ret(t, &e, key, data);
9646133Smao 
9750989Sbostic 		/* Update the actual cursor. */
9851100Sbostic 		t->bt_bcursor.pgno = e.page->pgno;
9951100Sbostic 		t->bt_bcursor.index = e.index;
100*64457Sbostic 
101*64457Sbostic 		/*
102*64457Sbostic 		 * If the user is doing concurrent access, we copied the
103*64457Sbostic 		 * key/data, toss the page.
104*64457Sbostic 		 */
105*64457Sbostic 		if (ISSET(t, B_DB_LOCK))
106*64457Sbostic 			mpool_put(t->bt_mp, e.page, 0);
107*64457Sbostic 		else
108*64457Sbostic 			t->bt_pinned = e.page;
10960042Sbostic 		SET(t, B_SEQINIT);
11050989Sbostic 	}
11150989Sbostic 	return (status);
11250989Sbostic }
11346133Smao 
11450989Sbostic /*
11550989Sbostic  * BT_SEQSET -- Set the sequential scan to a specific key.
11650989Sbostic  *
11750989Sbostic  * Parameters:
11850989Sbostic  *	t:	tree
11950989Sbostic  *	ep:	storage for returned key
12050989Sbostic  *	key:	key for initial scan position
12150989Sbostic  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
12250989Sbostic  *
12350989Sbostic  * Side effects:
12450989Sbostic  *	Pins the page the cursor references.
12550989Sbostic  *
12650989Sbostic  * Returns:
12750989Sbostic  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
12850989Sbostic  */
12950989Sbostic static int
bt_seqset(t,ep,key,flags)13050989Sbostic bt_seqset(t, ep, key, flags)
13150989Sbostic 	BTREE *t;
13250989Sbostic 	EPG *ep;
13350989Sbostic 	DBT *key;
13450989Sbostic 	int flags;
13550989Sbostic {
13650989Sbostic 	EPG *e;
13750989Sbostic 	PAGE *h;
13850989Sbostic 	pgno_t pg;
13950989Sbostic 	int exact;
14046133Smao 
14150989Sbostic 	/*
14250989Sbostic 	 * Delete any already deleted record that we've been saving because
14350989Sbostic 	 * the cursor pointed to it.  Since going to a specific key, should
14450989Sbostic 	 * delete any logically deleted records so they aren't found.
14550989Sbostic 	 */
14660042Sbostic 	if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
14750989Sbostic 		return (RET_ERROR);
14846133Smao 
14950989Sbostic 	/*
15051100Sbostic 	 * Find the first, last or specific key in the tree and point the cursor
15151100Sbostic 	 * at it.  The cursor may not be moved until a new key has been found.
15250989Sbostic 	 */
15350989Sbostic 	switch(flags) {
15450989Sbostic 	case R_CURSOR:				/* Keyed scan. */
15551100Sbostic 		/*
15651100Sbostic 		 * Find the first instance of the key or the smallest key which
15751100Sbostic 		 * is greater than or equal to the specified key.  If run out
15851100Sbostic 		 * of keys, return RET_SPECIAL.
15951100Sbostic 		 */
16050989Sbostic 		if (key->data == NULL || key->size == 0) {
16150989Sbostic 			errno = EINVAL;
16246133Smao 			return (RET_ERROR);
16350989Sbostic 		}
16450989Sbostic 		e = __bt_first(t, key, &exact);	/* Returns pinned page. */
16550989Sbostic 		if (e == NULL)
16650989Sbostic 			return (RET_ERROR);
16751100Sbostic 		/*
16851100Sbostic 		 * If at the end of a page, skip any empty pages and find the
16951100Sbostic 		 * next entry.
17051100Sbostic 		 */
17151100Sbostic 		if (e->index == NEXTINDEX(e->page)) {
17251100Sbostic 			h = e->page;
17351100Sbostic 			do {
17451100Sbostic 				pg = h->nextpg;
17551100Sbostic 				mpool_put(t->bt_mp, h, 0);
17651100Sbostic 				if (pg == P_INVALID)
17751100Sbostic 					return (RET_SPECIAL);
17851100Sbostic 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
17951100Sbostic 					return (RET_ERROR);
18051100Sbostic 			} while (NEXTINDEX(h) == 0);
18151100Sbostic 			e->index = 0;
18251100Sbostic 			e->page = h;
18350989Sbostic 		}
18450989Sbostic 		*ep = *e;
18550989Sbostic 		break;
18650989Sbostic 	case R_FIRST:				/* First record. */
18750989Sbostic 	case R_NEXT:
18850989Sbostic 		/* Walk down the left-hand side of the tree. */
18950989Sbostic 		for (pg = P_ROOT;;) {
19050989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
19150989Sbostic 				return (RET_ERROR);
19256743Sbostic 			if (h->flags & (P_BLEAF | P_RLEAF))
19350989Sbostic 				break;
19450989Sbostic 			pg = GETBINTERNAL(h, 0)->pgno;
19550989Sbostic 			mpool_put(t->bt_mp, h, 0);
19650989Sbostic 		}
19746133Smao 
19850989Sbostic 		/* Skip any empty pages. */
19950989Sbostic 		while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
20050989Sbostic 			pg = h->nextpg;
20150989Sbostic 			mpool_put(t->bt_mp, h, 0);
20250989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
20350989Sbostic 				return (RET_ERROR);
20450989Sbostic 		}
20546133Smao 
20650989Sbostic 		if (NEXTINDEX(h) == 0) {
20750989Sbostic 			mpool_put(t->bt_mp, h, 0);
20850989Sbostic 			return (RET_SPECIAL);
20950989Sbostic 		}
21046133Smao 
21150989Sbostic 		ep->page = h;
21250989Sbostic 		ep->index = 0;
21350989Sbostic 		break;
21450989Sbostic 	case R_LAST:				/* Last record. */
21550989Sbostic 	case R_PREV:
21650989Sbostic 		/* Walk down the right-hand side of the tree. */
21750989Sbostic 		for (pg = P_ROOT;;) {
21850989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
21950989Sbostic 				return (RET_ERROR);
22056743Sbostic 			if (h->flags & (P_BLEAF | P_RLEAF))
22150989Sbostic 				break;
22250989Sbostic 			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
22350989Sbostic 			mpool_put(t->bt_mp, h, 0);
22450989Sbostic 		}
22546133Smao 
22650989Sbostic 		/* Skip any empty pages. */
22750989Sbostic 		while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
22850989Sbostic 			pg = h->prevpg;
22950989Sbostic 			mpool_put(t->bt_mp, h, 0);
23050989Sbostic 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
23150989Sbostic 				return (RET_ERROR);
23250989Sbostic 		}
23346133Smao 
23450989Sbostic 		if (NEXTINDEX(h) == 0) {
23550989Sbostic 			mpool_put(t->bt_mp, h, 0);
23650989Sbostic 			return (RET_SPECIAL);
23746133Smao 		}
23850989Sbostic 
23950989Sbostic 		ep->page = h;
24050989Sbostic 		ep->index = NEXTINDEX(h) - 1;
24150989Sbostic 		break;
24246133Smao 	}
24346133Smao 	return (RET_SUCCESS);
24446133Smao }
24546133Smao 
24646133Smao /*
24750989Sbostic  * BT_SEQADVANCE -- Advance the sequential scan.
24846133Smao  *
24950989Sbostic  * Parameters:
25050989Sbostic  *	t:	tree
25150989Sbostic  *	flags:	R_NEXT, R_PREV
25246133Smao  *
25350989Sbostic  * Side effects:
25450989Sbostic  *	Pins the page the new key/data record is on.
25546133Smao  *
25650989Sbostic  * Returns:
25750989Sbostic  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
25846133Smao  */
25950989Sbostic static int
bt_seqadv(t,e,flags)26050989Sbostic bt_seqadv(t, e, flags)
26150989Sbostic 	BTREE *t;
26250989Sbostic 	EPG *e;
26346133Smao 	int flags;
26446133Smao {
26550989Sbostic 	EPGNO *c, delc;
26650989Sbostic 	PAGE *h;
26757989Sbostic 	indx_t index;
26850989Sbostic 	pgno_t pg;
26946133Smao 
27050989Sbostic 	/* Save the current cursor if going to delete it. */
27150989Sbostic 	c = &t->bt_bcursor;
27260042Sbostic 	if (ISSET(t, B_DELCRSR))
27350989Sbostic 		delc = *c;
27446133Smao 
27550989Sbostic 	if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
27646133Smao 		return (RET_ERROR);
27746133Smao 
27850989Sbostic 	/*
27950989Sbostic  	 * Find the next/previous record in the tree and point the cursor at it.
28050989Sbostic 	 * The cursor may not be moved until a new key has been found.
28150989Sbostic 	 */
28250989Sbostic 	index = c->index;
28350989Sbostic 	switch(flags) {
28450989Sbostic 	case R_NEXT:			/* Next record. */
28550989Sbostic 		if (++index == NEXTINDEX(h)) {
28650989Sbostic 			do {
28750989Sbostic 				pg = h->nextpg;
28850989Sbostic 				mpool_put(t->bt_mp, h, 0);
28950989Sbostic 				if (pg == P_INVALID)
29046133Smao 					return (RET_SPECIAL);
29150989Sbostic 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
29246133Smao 					return (RET_ERROR);
29350989Sbostic 			} while (NEXTINDEX(h) == 0);
29446133Smao 			index = 0;
29546133Smao 		}
29650989Sbostic 		break;
29750989Sbostic 	case R_PREV:			/* Previous record. */
29850989Sbostic 		if (index-- == 0) {
29946133Smao 			do {
30050989Sbostic 				pg = h->prevpg;
30150989Sbostic 				mpool_put(t->bt_mp, h, 0);
30250989Sbostic 				if (pg == P_INVALID)
30350989Sbostic 					return (RET_SPECIAL);
30450989Sbostic 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
30546133Smao 					return (RET_ERROR);
30650989Sbostic 			} while (NEXTINDEX(h) == 0);
30750989Sbostic 			index = NEXTINDEX(h) - 1;
30850989Sbostic 		}
30950989Sbostic 		break;
31050989Sbostic 	}
31146133Smao 
31250989Sbostic 	e->page = h;
31350989Sbostic 	e->index = index;
31446133Smao 
31550989Sbostic 	/*
31650989Sbostic 	 * Delete any already deleted record that we've been saving because the
31750989Sbostic 	 * cursor pointed to it.  This could cause the new index to be shifted
31850989Sbostic 	 * down by one if the record we're deleting is on the same page and has
31950989Sbostic 	 * a larger index.
32050989Sbostic 	 */
32160042Sbostic 	if (ISSET(t, B_DELCRSR)) {
32260042Sbostic 		CLR(t, B_DELCRSR);			/* Don't try twice. */
32350989Sbostic 		if (c->pgno == delc.pgno && c->index > delc.index)
32450989Sbostic 			--c->index;
32550989Sbostic 		if (__bt_crsrdel(t, &delc))
32650989Sbostic 			return (RET_ERROR);
32746133Smao 	}
32846133Smao 	return (RET_SUCCESS);
32946133Smao }
33050989Sbostic 
33150989Sbostic /*
33250989Sbostic  * __BT_CRSRDEL -- Delete the record referenced by the cursor.
33350989Sbostic  *
33450989Sbostic  * Parameters:
33550989Sbostic  *	t:	tree
33650989Sbostic  *
33750989Sbostic  * Returns:
33850989Sbostic  *	RET_ERROR, RET_SUCCESS
33950989Sbostic  */
34050989Sbostic int
__bt_crsrdel(t,c)34150989Sbostic __bt_crsrdel(t, c)
34250989Sbostic 	BTREE *t;
34350989Sbostic 	EPGNO *c;
34450989Sbostic {
34550989Sbostic 	PAGE *h;
34650989Sbostic 	int status;
34750989Sbostic 
34860042Sbostic 	CLR(t, B_DELCRSR);			/* Don't try twice. */
34950989Sbostic 	if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
35050989Sbostic 		return (RET_ERROR);
35150989Sbostic 	status = __bt_dleaf(t, h, c->index);
35250989Sbostic 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
35350989Sbostic 	return (status);
35450989Sbostic }
355