xref: /minix3/lib/libc/db/btree/bt_seq.c (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1*84d9c625SLionel Sambuc /*	$NetBSD: bt_seq.c,v 1.18 2013/09/04 13:03:22 ryoon Exp $	*/
22639ae9bSBen Gras 
32639ae9bSBen Gras /*-
42639ae9bSBen Gras  * Copyright (c) 1990, 1993, 1994
52639ae9bSBen Gras  *	The Regents of the University of California.  All rights reserved.
62639ae9bSBen Gras  *
72639ae9bSBen Gras  * This code is derived from software contributed to Berkeley by
82639ae9bSBen Gras  * Mike Olson.
92639ae9bSBen Gras  *
102639ae9bSBen Gras  * Redistribution and use in source and binary forms, with or without
112639ae9bSBen Gras  * modification, are permitted provided that the following conditions
122639ae9bSBen Gras  * are met:
132639ae9bSBen Gras  * 1. Redistributions of source code must retain the above copyright
142639ae9bSBen Gras  *    notice, this list of conditions and the following disclaimer.
152639ae9bSBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
162639ae9bSBen Gras  *    notice, this list of conditions and the following disclaimer in the
172639ae9bSBen Gras  *    documentation and/or other materials provided with the distribution.
182639ae9bSBen Gras  * 3. Neither the name of the University nor the names of its contributors
192639ae9bSBen Gras  *    may be used to endorse or promote products derived from this software
202639ae9bSBen Gras  *    without specific prior written permission.
212639ae9bSBen Gras  *
222639ae9bSBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
232639ae9bSBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
242639ae9bSBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
252639ae9bSBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
262639ae9bSBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
272639ae9bSBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
282639ae9bSBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
292639ae9bSBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
302639ae9bSBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
312639ae9bSBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
322639ae9bSBen Gras  * SUCH DAMAGE.
332639ae9bSBen Gras  */
342639ae9bSBen Gras 
352639ae9bSBen Gras #if HAVE_NBTOOL_CONFIG_H
362639ae9bSBen Gras #include "nbtool_config.h"
372639ae9bSBen Gras #endif
382639ae9bSBen Gras 
392639ae9bSBen Gras #include <sys/cdefs.h>
40*84d9c625SLionel Sambuc __RCSID("$NetBSD: bt_seq.c,v 1.18 2013/09/04 13:03:22 ryoon Exp $");
412639ae9bSBen Gras 
422639ae9bSBen Gras #include "namespace.h"
432639ae9bSBen Gras #include <sys/types.h>
442639ae9bSBen Gras 
452639ae9bSBen Gras #include <assert.h>
462639ae9bSBen Gras #include <errno.h>
472639ae9bSBen Gras #include <stddef.h>
482639ae9bSBen Gras #include <stdio.h>
492639ae9bSBen Gras #include <stdlib.h>
502639ae9bSBen Gras 
512639ae9bSBen Gras #include <db.h>
522639ae9bSBen Gras #include "btree.h"
532639ae9bSBen Gras 
542639ae9bSBen Gras static int __bt_first(BTREE *, const DBT *, EPG *, int *);
552639ae9bSBen Gras static int __bt_seqadv(BTREE *, EPG *, int);
562639ae9bSBen Gras static int __bt_seqset(BTREE *, EPG *, DBT *, int);
572639ae9bSBen Gras 
582639ae9bSBen Gras /*
592639ae9bSBen Gras  * Sequential scan support.
602639ae9bSBen Gras  *
612639ae9bSBen Gras  * The tree can be scanned sequentially, starting from either end of the
622639ae9bSBen Gras  * tree or from any specific key.  A scan request before any scanning is
632639ae9bSBen Gras  * done is initialized as starting from the least node.
642639ae9bSBen Gras  */
652639ae9bSBen Gras 
662639ae9bSBen Gras /*
672639ae9bSBen Gras  * __bt_seq --
682639ae9bSBen Gras  *	Btree sequential scan interface.
692639ae9bSBen Gras  *
702639ae9bSBen Gras  * Parameters:
712639ae9bSBen Gras  *	dbp:	pointer to access method
722639ae9bSBen Gras  *	key:	key for positioning and return value
732639ae9bSBen Gras  *	data:	data return value
742639ae9bSBen Gras  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
752639ae9bSBen Gras  *
762639ae9bSBen Gras  * Returns:
772639ae9bSBen Gras  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
782639ae9bSBen Gras  */
792639ae9bSBen Gras int
__bt_seq(const DB * dbp,DBT * key,DBT * data,u_int flags)802639ae9bSBen Gras __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
812639ae9bSBen Gras {
822639ae9bSBen Gras 	BTREE *t;
832639ae9bSBen Gras 	EPG e;
842639ae9bSBen Gras 	int status;
852639ae9bSBen Gras 
862639ae9bSBen Gras 	t = dbp->internal;
872639ae9bSBen Gras 
882639ae9bSBen Gras 	/* Toss any page pinned across calls. */
892639ae9bSBen Gras 	if (t->bt_pinned != NULL) {
902639ae9bSBen Gras 		mpool_put(t->bt_mp, t->bt_pinned, 0);
912639ae9bSBen Gras 		t->bt_pinned = NULL;
922639ae9bSBen Gras 	}
932639ae9bSBen Gras 
942639ae9bSBen Gras 	/*
95*84d9c625SLionel Sambuc 	 * If scan uninitialized as yet, or starting at a specific record, set
962639ae9bSBen Gras 	 * the scan to a specific key.  Both __bt_seqset and __bt_seqadv pin
972639ae9bSBen Gras 	 * the page the cursor references if they're successful.
982639ae9bSBen Gras 	 */
992639ae9bSBen Gras 	switch (flags) {
1002639ae9bSBen Gras 	case R_NEXT:
1012639ae9bSBen Gras 	case R_PREV:
1022639ae9bSBen Gras 		if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
1032639ae9bSBen Gras 			status = __bt_seqadv(t, &e, (int)flags);
1042639ae9bSBen Gras 			break;
1052639ae9bSBen Gras 		}
1062639ae9bSBen Gras 		/* FALLTHROUGH */
1072639ae9bSBen Gras 	case R_FIRST:
1082639ae9bSBen Gras 	case R_LAST:
1092639ae9bSBen Gras 	case R_CURSOR:
1102639ae9bSBen Gras 		status = __bt_seqset(t, &e, key, (int)flags);
1112639ae9bSBen Gras 		break;
1122639ae9bSBen Gras 	default:
1132639ae9bSBen Gras 		errno = EINVAL;
1142639ae9bSBen Gras 		return (RET_ERROR);
1152639ae9bSBen Gras 	}
1162639ae9bSBen Gras 
1172639ae9bSBen Gras 	if (status == RET_SUCCESS) {
1182639ae9bSBen Gras 		__bt_setcur(t, e.page->pgno, (u_int)e.index);
1192639ae9bSBen Gras 
1202639ae9bSBen Gras 		status =
1212639ae9bSBen Gras 		    __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
1222639ae9bSBen Gras 
1232639ae9bSBen Gras 		/*
1242639ae9bSBen Gras 		 * If the user is doing concurrent access, we copied the
1252639ae9bSBen Gras 		 * key/data, toss the page.
1262639ae9bSBen Gras 		 */
1272639ae9bSBen Gras 		if (F_ISSET(t, B_DB_LOCK))
1282639ae9bSBen Gras 			mpool_put(t->bt_mp, e.page, 0);
1292639ae9bSBen Gras 		else
1302639ae9bSBen Gras 			t->bt_pinned = e.page;
1312639ae9bSBen Gras 	}
1322639ae9bSBen Gras 	return (status);
1332639ae9bSBen Gras }
1342639ae9bSBen Gras 
1352639ae9bSBen Gras /*
1362639ae9bSBen Gras  * __bt_seqset --
1372639ae9bSBen Gras  *	Set the sequential scan to a specific key.
1382639ae9bSBen Gras  *
1392639ae9bSBen Gras  * Parameters:
1402639ae9bSBen Gras  *	t:	tree
1412639ae9bSBen Gras  *	ep:	storage for returned key
1422639ae9bSBen Gras  *	key:	key for initial scan position
1432639ae9bSBen Gras  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
1442639ae9bSBen Gras  *
1452639ae9bSBen Gras  * Side effects:
1462639ae9bSBen Gras  *	Pins the page the cursor references.
1472639ae9bSBen Gras  *
1482639ae9bSBen Gras  * Returns:
1492639ae9bSBen Gras  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
1502639ae9bSBen Gras  */
1512639ae9bSBen Gras static int
__bt_seqset(BTREE * t,EPG * ep,DBT * key,int flags)1522639ae9bSBen Gras __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags)
1532639ae9bSBen Gras {
1542639ae9bSBen Gras 	PAGE *h;
1552639ae9bSBen Gras 	pgno_t pg;
1562639ae9bSBen Gras 	int exact;
1572639ae9bSBen Gras 
1582639ae9bSBen Gras 	/*
1592639ae9bSBen Gras 	 * Find the first, last or specific key in the tree and point the
1602639ae9bSBen Gras 	 * cursor at it.  The cursor may not be moved until a new key has
1612639ae9bSBen Gras 	 * been found.
1622639ae9bSBen Gras 	 */
1632639ae9bSBen Gras 	switch (flags) {
1642639ae9bSBen Gras 	case R_CURSOR:				/* Keyed scan. */
1652639ae9bSBen Gras 		/*
1662639ae9bSBen Gras 		 * Find the first instance of the key or the smallest key
1672639ae9bSBen Gras 		 * which is greater than or equal to the specified key.
1682639ae9bSBen Gras 		 */
1692639ae9bSBen Gras 		if (key->data == NULL || key->size == 0) {
1702639ae9bSBen Gras 			errno = EINVAL;
1712639ae9bSBen Gras 			return (RET_ERROR);
1722639ae9bSBen Gras 		}
1732639ae9bSBen Gras 		return (__bt_first(t, key, ep, &exact));
1742639ae9bSBen Gras 	case R_FIRST:				/* First record. */
1752639ae9bSBen Gras 	case R_NEXT:
1762639ae9bSBen Gras 		/* Walk down the left-hand side of the tree. */
1772639ae9bSBen Gras 		for (pg = P_ROOT;;) {
1782639ae9bSBen Gras 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
1792639ae9bSBen Gras 				return (RET_ERROR);
1802639ae9bSBen Gras 
1812639ae9bSBen Gras 			/* Check for an empty tree. */
1822639ae9bSBen Gras 			if (NEXTINDEX(h) == 0) {
1832639ae9bSBen Gras 				mpool_put(t->bt_mp, h, 0);
1842639ae9bSBen Gras 				return (RET_SPECIAL);
1852639ae9bSBen Gras 			}
1862639ae9bSBen Gras 
1872639ae9bSBen Gras 			if (h->flags & (P_BLEAF | P_RLEAF))
1882639ae9bSBen Gras 				break;
1892639ae9bSBen Gras 			pg = GETBINTERNAL(h, 0)->pgno;
1902639ae9bSBen Gras 			mpool_put(t->bt_mp, h, 0);
1912639ae9bSBen Gras 		}
1922639ae9bSBen Gras 		ep->page = h;
1932639ae9bSBen Gras 		ep->index = 0;
1942639ae9bSBen Gras 		break;
1952639ae9bSBen Gras 	case R_LAST:				/* Last record. */
1962639ae9bSBen Gras 	case R_PREV:
1972639ae9bSBen Gras 		/* Walk down the right-hand side of the tree. */
1982639ae9bSBen Gras 		for (pg = P_ROOT;;) {
1992639ae9bSBen Gras 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
2002639ae9bSBen Gras 				return (RET_ERROR);
2012639ae9bSBen Gras 
2022639ae9bSBen Gras 			/* Check for an empty tree. */
2032639ae9bSBen Gras 			if (NEXTINDEX(h) == 0) {
2042639ae9bSBen Gras 				mpool_put(t->bt_mp, h, 0);
2052639ae9bSBen Gras 				return (RET_SPECIAL);
2062639ae9bSBen Gras 			}
2072639ae9bSBen Gras 
2082639ae9bSBen Gras 			if (h->flags & (P_BLEAF | P_RLEAF))
2092639ae9bSBen Gras 				break;
2102639ae9bSBen Gras 			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
2112639ae9bSBen Gras 			mpool_put(t->bt_mp, h, 0);
2122639ae9bSBen Gras 		}
2132639ae9bSBen Gras 
2142639ae9bSBen Gras 		ep->page = h;
2152639ae9bSBen Gras 		ep->index = NEXTINDEX(h) - 1;
2162639ae9bSBen Gras 		break;
2172639ae9bSBen Gras 	}
2182639ae9bSBen Gras 	return (RET_SUCCESS);
2192639ae9bSBen Gras }
2202639ae9bSBen Gras 
2212639ae9bSBen Gras /*
2222639ae9bSBen Gras  * __bt_seqadvance --
2232639ae9bSBen Gras  *	Advance the sequential scan.
2242639ae9bSBen Gras  *
2252639ae9bSBen Gras  * Parameters:
2262639ae9bSBen Gras  *	t:	tree
2272639ae9bSBen Gras  *	flags:	R_NEXT, R_PREV
2282639ae9bSBen Gras  *
2292639ae9bSBen Gras  * Side effects:
2302639ae9bSBen Gras  *	Pins the page the new key/data record is on.
2312639ae9bSBen Gras  *
2322639ae9bSBen Gras  * Returns:
2332639ae9bSBen Gras  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
2342639ae9bSBen Gras  */
2352639ae9bSBen Gras static int
__bt_seqadv(BTREE * t,EPG * ep,int flags)2362639ae9bSBen Gras __bt_seqadv(BTREE *t, EPG *ep, int flags)
2372639ae9bSBen Gras {
2382639ae9bSBen Gras 	CURSOR *c;
2392639ae9bSBen Gras 	PAGE *h;
2402639ae9bSBen Gras 	indx_t idx = 0;	/* pacify gcc */
2412639ae9bSBen Gras 	pgno_t pg;
2422639ae9bSBen Gras 	int exact;
2432639ae9bSBen Gras 
2442639ae9bSBen Gras 	/*
2452639ae9bSBen Gras 	 * There are a couple of states that we can be in.  The cursor has
2462639ae9bSBen Gras 	 * been initialized by the time we get here, but that's all we know.
2472639ae9bSBen Gras 	 */
2482639ae9bSBen Gras 	c = &t->bt_cursor;
2492639ae9bSBen Gras 
2502639ae9bSBen Gras 	/*
2512639ae9bSBen Gras 	 * The cursor was deleted where there weren't any duplicate records,
2522639ae9bSBen Gras 	 * so the key was saved.  Find out where that key would go in the
2532639ae9bSBen Gras 	 * current tree.  It doesn't matter if the returned key is an exact
2542639ae9bSBen Gras 	 * match or not -- if it's an exact match, the record was added after
2552639ae9bSBen Gras 	 * the delete so we can just return it.  If not, as long as there's
2562639ae9bSBen Gras 	 * a record there, return it.
2572639ae9bSBen Gras 	 */
2582639ae9bSBen Gras 	if (F_ISSET(c, CURS_ACQUIRE))
2592639ae9bSBen Gras 		return (__bt_first(t, &c->key, ep, &exact));
2602639ae9bSBen Gras 
2612639ae9bSBen Gras 	/* Get the page referenced by the cursor. */
2622639ae9bSBen Gras 	if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
2632639ae9bSBen Gras 		return (RET_ERROR);
2642639ae9bSBen Gras 
2652639ae9bSBen Gras 	/*
2662639ae9bSBen Gras  	 * Find the next/previous record in the tree and point the cursor at
2672639ae9bSBen Gras 	 * it.  The cursor may not be moved until a new key has been found.
2682639ae9bSBen Gras 	 */
2692639ae9bSBen Gras 	switch (flags) {
2702639ae9bSBen Gras 	case R_NEXT:			/* Next record. */
2712639ae9bSBen Gras 		/*
2722639ae9bSBen Gras 		 * The cursor was deleted in duplicate records, and moved
2732639ae9bSBen Gras 		 * forward to a record that has yet to be returned.  Clear
2742639ae9bSBen Gras 		 * that flag, and return the record.
2752639ae9bSBen Gras 		 */
2762639ae9bSBen Gras 		if (F_ISSET(c, CURS_AFTER))
2772639ae9bSBen Gras 			goto usecurrent;
2782639ae9bSBen Gras 		idx = c->pg.index;
2792639ae9bSBen Gras 		if (++idx == NEXTINDEX(h)) {
2802639ae9bSBen Gras 			pg = h->nextpg;
2812639ae9bSBen Gras 			mpool_put(t->bt_mp, h, 0);
2822639ae9bSBen Gras 			if (pg == P_INVALID)
2832639ae9bSBen Gras 				return (RET_SPECIAL);
2842639ae9bSBen Gras 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
2852639ae9bSBen Gras 				return (RET_ERROR);
2862639ae9bSBen Gras 			idx = 0;
2872639ae9bSBen Gras 		}
2882639ae9bSBen Gras 		break;
2892639ae9bSBen Gras 	case R_PREV:			/* Previous record. */
2902639ae9bSBen Gras 		/*
2912639ae9bSBen Gras 		 * The cursor was deleted in duplicate records, and moved
2922639ae9bSBen Gras 		 * backward to a record that has yet to be returned.  Clear
2932639ae9bSBen Gras 		 * that flag, and return the record.
2942639ae9bSBen Gras 		 */
2952639ae9bSBen Gras 		if (F_ISSET(c, CURS_BEFORE)) {
2962639ae9bSBen Gras usecurrent:		F_CLR(c, CURS_AFTER | CURS_BEFORE);
2972639ae9bSBen Gras 			ep->page = h;
2982639ae9bSBen Gras 			ep->index = c->pg.index;
2992639ae9bSBen Gras 			return (RET_SUCCESS);
3002639ae9bSBen Gras 		}
3012639ae9bSBen Gras 		idx = c->pg.index;
3022639ae9bSBen Gras 		if (idx == 0) {
3032639ae9bSBen Gras 			pg = h->prevpg;
3042639ae9bSBen Gras 			mpool_put(t->bt_mp, h, 0);
3052639ae9bSBen Gras 			if (pg == P_INVALID)
3062639ae9bSBen Gras 				return (RET_SPECIAL);
3072639ae9bSBen Gras 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
3082639ae9bSBen Gras 				return (RET_ERROR);
3092639ae9bSBen Gras 			idx = NEXTINDEX(h) - 1;
3102639ae9bSBen Gras 		} else
3112639ae9bSBen Gras 			--idx;
3122639ae9bSBen Gras 		break;
3132639ae9bSBen Gras 	}
3142639ae9bSBen Gras 
3152639ae9bSBen Gras 	ep->page = h;
3162639ae9bSBen Gras 	ep->index = idx;
3172639ae9bSBen Gras 	return (RET_SUCCESS);
3182639ae9bSBen Gras }
3192639ae9bSBen Gras 
3202639ae9bSBen Gras /*
3212639ae9bSBen Gras  * __bt_first --
3222639ae9bSBen Gras  *	Find the first entry.
3232639ae9bSBen Gras  *
3242639ae9bSBen Gras  * Parameters:
3252639ae9bSBen Gras  *	t:	the tree
3262639ae9bSBen Gras  *    key:	the key
3272639ae9bSBen Gras  *  erval:	return EPG
3282639ae9bSBen Gras  * exactp:	pointer to exact match flag
3292639ae9bSBen Gras  *
3302639ae9bSBen Gras  * Returns:
3312639ae9bSBen Gras  *	The first entry in the tree greater than or equal to key,
3322639ae9bSBen Gras  *	or RET_SPECIAL if no such key exists.
3332639ae9bSBen Gras  */
3342639ae9bSBen Gras static int
__bt_first(BTREE * t,const DBT * key,EPG * erval,int * exactp)3352639ae9bSBen Gras __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp)
3362639ae9bSBen Gras {
3372639ae9bSBen Gras 	PAGE *h;
3382639ae9bSBen Gras 	EPG *ep, save;
3392639ae9bSBen Gras 	pgno_t pg;
3402639ae9bSBen Gras 
3412639ae9bSBen Gras 	/*
3422639ae9bSBen Gras 	 * Find any matching record; __bt_search pins the page.
3432639ae9bSBen Gras 	 *
3442639ae9bSBen Gras 	 * If it's an exact match and duplicates are possible, walk backwards
3452639ae9bSBen Gras 	 * in the tree until we find the first one.  Otherwise, make sure it's
3462639ae9bSBen Gras 	 * a valid key (__bt_search may return an index just past the end of a
3472639ae9bSBen Gras 	 * page) and return it.
3482639ae9bSBen Gras 	 */
3492639ae9bSBen Gras 	if ((ep = __bt_search(t, key, exactp)) == NULL)
3502639ae9bSBen Gras 		return (0);
3512639ae9bSBen Gras 	if (*exactp) {
3522639ae9bSBen Gras 		if (F_ISSET(t, B_NODUPS)) {
3532639ae9bSBen Gras 			*erval = *ep;
3542639ae9bSBen Gras 			return (RET_SUCCESS);
3552639ae9bSBen Gras 		}
3562639ae9bSBen Gras 
3572639ae9bSBen Gras 		/*
3582639ae9bSBen Gras 		 * Walk backwards, as long as the entry matches and there are
3592639ae9bSBen Gras 		 * keys left in the tree.  Save a copy of each match in case
3602639ae9bSBen Gras 		 * we go too far.
3612639ae9bSBen Gras 		 */
3622639ae9bSBen Gras 		save = *ep;
3632639ae9bSBen Gras 		h = ep->page;
3642639ae9bSBen Gras 		do {
3652639ae9bSBen Gras 			if (save.page->pgno != ep->page->pgno) {
3662639ae9bSBen Gras 				mpool_put(t->bt_mp, save.page, 0);
3672639ae9bSBen Gras 				save = *ep;
3682639ae9bSBen Gras 			} else
3692639ae9bSBen Gras 				save.index = ep->index;
3702639ae9bSBen Gras 
3712639ae9bSBen Gras 			/*
3722639ae9bSBen Gras 			 * Don't unpin the page the last (or original) match
3732639ae9bSBen Gras 			 * was on, but make sure it's unpinned if an error
3742639ae9bSBen Gras 			 * occurs.
3752639ae9bSBen Gras 			 */
3762639ae9bSBen Gras 			if (ep->index == 0) {
3772639ae9bSBen Gras 				if (h->prevpg == P_INVALID)
3782639ae9bSBen Gras 					break;
3792639ae9bSBen Gras 				if (h->pgno != save.page->pgno)
3802639ae9bSBen Gras 					mpool_put(t->bt_mp, h, 0);
3812639ae9bSBen Gras 				if ((h = mpool_get(t->bt_mp,
3822639ae9bSBen Gras 				    h->prevpg, 0)) == NULL)
3832639ae9bSBen Gras 					return (RET_ERROR);
3842639ae9bSBen Gras 				ep->page = h;
3852639ae9bSBen Gras 				ep->index = NEXTINDEX(h);
3862639ae9bSBen Gras 			}
3872639ae9bSBen Gras 			--ep->index;
3882639ae9bSBen Gras 		} while (__bt_cmp(t, key, ep) == 0);
3892639ae9bSBen Gras 
3902639ae9bSBen Gras 		/*
3912639ae9bSBen Gras 		 * Reach here with the last page that was looked at pinned,
3922639ae9bSBen Gras 		 * which may or may not be the same as the last (or original)
3932639ae9bSBen Gras 		 * match page.  If it's not useful, release it.
3942639ae9bSBen Gras 		 */
3952639ae9bSBen Gras 		if (h->pgno != save.page->pgno)
3962639ae9bSBen Gras 			mpool_put(t->bt_mp, h, 0);
3972639ae9bSBen Gras 
3982639ae9bSBen Gras 		*erval = save;
3992639ae9bSBen Gras 		return (RET_SUCCESS);
4002639ae9bSBen Gras 	}
4012639ae9bSBen Gras 
4022639ae9bSBen Gras 	/* If at the end of a page, find the next entry. */
4032639ae9bSBen Gras 	if (ep->index == NEXTINDEX(ep->page)) {
4042639ae9bSBen Gras 		h = ep->page;
4052639ae9bSBen Gras 		pg = h->nextpg;
4062639ae9bSBen Gras 		mpool_put(t->bt_mp, h, 0);
4072639ae9bSBen Gras 		if (pg == P_INVALID)
4082639ae9bSBen Gras 			return (RET_SPECIAL);
4092639ae9bSBen Gras 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
4102639ae9bSBen Gras 			return (RET_ERROR);
4112639ae9bSBen Gras 		ep->index = 0;
4122639ae9bSBen Gras 		ep->page = h;
4132639ae9bSBen Gras 	}
4142639ae9bSBen Gras 	*erval = *ep;
4152639ae9bSBen Gras 	return (RET_SUCCESS);
4162639ae9bSBen Gras }
4172639ae9bSBen Gras 
4182639ae9bSBen Gras /*
4192639ae9bSBen Gras  * __bt_setcur --
4202639ae9bSBen Gras  *	Set the cursor to an entry in the tree.
4212639ae9bSBen Gras  *
4222639ae9bSBen Gras  * Parameters:
4232639ae9bSBen Gras  *	t:	the tree
4242639ae9bSBen Gras  *   pgno:	page number
4252639ae9bSBen Gras  *    idx:	page index
4262639ae9bSBen Gras  */
4272639ae9bSBen Gras void
__bt_setcur(BTREE * t,pgno_t pgno,u_int idx)4282639ae9bSBen Gras __bt_setcur(BTREE *t, pgno_t pgno, u_int idx)
4292639ae9bSBen Gras {
4302639ae9bSBen Gras 	/* Lose any already deleted key. */
4312639ae9bSBen Gras 	if (t->bt_cursor.key.data != NULL) {
4322639ae9bSBen Gras 		free(t->bt_cursor.key.data);
4332639ae9bSBen Gras 		t->bt_cursor.key.size = 0;
4342639ae9bSBen Gras 		t->bt_cursor.key.data = NULL;
4352639ae9bSBen Gras 	}
4362639ae9bSBen Gras 	F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
4372639ae9bSBen Gras 
4382639ae9bSBen Gras 	/* Update the cursor. */
4392639ae9bSBen Gras 	t->bt_cursor.pg.pgno = pgno;
4402639ae9bSBen Gras 	t->bt_cursor.pg.index = idx;
4412639ae9bSBen Gras 	F_SET(&t->bt_cursor, CURS_INIT);
4422639ae9bSBen Gras }
443