xref: /openbsd-src/lib/libc/db/btree/bt_split.c (revision 53b37aa960c5e4c03a5a2e490de17e3dee91b98f)
1*53b37aa9Sespie /*	$OpenBSD: bt_split.c,v 1.13 2005/08/05 13:03:00 espie Exp $	*/
21b727fc6Smillert 
3df930be7Sderaadt /*-
4df930be7Sderaadt  * Copyright (c) 1990, 1993, 1994
5df930be7Sderaadt  *	The Regents of the University of California.  All rights reserved.
6df930be7Sderaadt  *
7df930be7Sderaadt  * This code is derived from software contributed to Berkeley by
8df930be7Sderaadt  * Mike Olson.
9df930be7Sderaadt  *
10df930be7Sderaadt  * Redistribution and use in source and binary forms, with or without
11df930be7Sderaadt  * modification, are permitted provided that the following conditions
12df930be7Sderaadt  * are met:
13df930be7Sderaadt  * 1. Redistributions of source code must retain the above copyright
14df930be7Sderaadt  *    notice, this list of conditions and the following disclaimer.
15df930be7Sderaadt  * 2. Redistributions in binary form must reproduce the above copyright
16df930be7Sderaadt  *    notice, this list of conditions and the following disclaimer in the
17df930be7Sderaadt  *    documentation and/or other materials provided with the distribution.
186580fee3Smillert  * 3. Neither the name of the University nor the names of its contributors
19df930be7Sderaadt  *    may be used to endorse or promote products derived from this software
20df930be7Sderaadt  *    without specific prior written permission.
21df930be7Sderaadt  *
22df930be7Sderaadt  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df930be7Sderaadt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df930be7Sderaadt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df930be7Sderaadt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df930be7Sderaadt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df930be7Sderaadt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df930be7Sderaadt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df930be7Sderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df930be7Sderaadt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df930be7Sderaadt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df930be7Sderaadt  * SUCH DAMAGE.
33df930be7Sderaadt  */
34df930be7Sderaadt 
35df930be7Sderaadt #include <sys/types.h>
36df930be7Sderaadt 
37df930be7Sderaadt #include <limits.h>
38df930be7Sderaadt #include <stdio.h>
39df930be7Sderaadt #include <stdlib.h>
40df930be7Sderaadt #include <string.h>
41df930be7Sderaadt 
42df930be7Sderaadt #include <db.h>
43df930be7Sderaadt #include "btree.h"
44df930be7Sderaadt 
45c72b5b24Smillert static int	 bt_broot(BTREE *, PAGE *, PAGE *, PAGE *);
46df95a199Smillert static PAGE	*bt_page(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
47c72b5b24Smillert static int	 bt_preserve(BTREE *, pgno_t);
48df95a199Smillert static PAGE	*bt_psplit(BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t);
49df95a199Smillert static PAGE	*bt_root(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
50c72b5b24Smillert static int	 bt_rroot(BTREE *, PAGE *, PAGE *, PAGE *);
51c72b5b24Smillert static recno_t	 rec_total(PAGE *);
52df930be7Sderaadt 
53df930be7Sderaadt #ifdef STATISTICS
54df930be7Sderaadt u_long	bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
55df930be7Sderaadt #endif
56df930be7Sderaadt 
57df930be7Sderaadt /*
58df930be7Sderaadt  * __BT_SPLIT -- Split the tree.
59df930be7Sderaadt  *
60df930be7Sderaadt  * Parameters:
61df930be7Sderaadt  *	t:	tree
62df930be7Sderaadt  *	sp:	page to split
63df930be7Sderaadt  *	key:	key to insert
64df930be7Sderaadt  *	data:	data to insert
65df930be7Sderaadt  *	flags:	BIGKEY/BIGDATA flags
66df930be7Sderaadt  *	ilen:	insert length
67df930be7Sderaadt  *	skip:	index to leave open
68df930be7Sderaadt  *
69df930be7Sderaadt  * Returns:
70df930be7Sderaadt  *	RET_ERROR, RET_SUCCESS
71df930be7Sderaadt  */
72df930be7Sderaadt int
__bt_split(BTREE * t,PAGE * sp,const DBT * key,const DBT * data,int flags,size_t ilen,u_int32_t argskip)73e20a56a5Sotto __bt_split(BTREE *t, PAGE *sp, const DBT *key, const DBT *data, int flags,
74e20a56a5Sotto     size_t ilen, u_int32_t argskip)
75df930be7Sderaadt {
76df930be7Sderaadt 	BINTERNAL *bi;
77df930be7Sderaadt 	BLEAF *bl, *tbl;
78df930be7Sderaadt 	DBT a, b;
79df930be7Sderaadt 	EPGNO *parent;
80df930be7Sderaadt 	PAGE *h, *l, *r, *lchild, *rchild;
81df930be7Sderaadt 	indx_t nxtindex;
82df930be7Sderaadt 	u_int16_t skip;
83df930be7Sderaadt 	u_int32_t n, nbytes, nksize;
84df930be7Sderaadt 	int parentsplit;
85df930be7Sderaadt 	char *dest;
86df930be7Sderaadt 
87df930be7Sderaadt 	/*
88df930be7Sderaadt 	 * Split the page into two pages, l and r.  The split routines return
89df930be7Sderaadt 	 * a pointer to the page into which the key should be inserted and with
90df930be7Sderaadt 	 * skip set to the offset which should be used.  Additionally, l and r
91df930be7Sderaadt 	 * are pinned.
92df930be7Sderaadt 	 */
93df930be7Sderaadt 	skip = argskip;
94df930be7Sderaadt 	h = sp->pgno == P_ROOT ?
95df930be7Sderaadt 	    bt_root(t, sp, &l, &r, &skip, ilen) :
96df930be7Sderaadt 	    bt_page(t, sp, &l, &r, &skip, ilen);
97df930be7Sderaadt 	if (h == NULL)
98df930be7Sderaadt 		return (RET_ERROR);
99df930be7Sderaadt 
100df930be7Sderaadt 	/*
101df930be7Sderaadt 	 * Insert the new key/data pair into the leaf page.  (Key inserts
102df930be7Sderaadt 	 * always cause a leaf page to split first.)
103df930be7Sderaadt 	 */
104df930be7Sderaadt 	h->linp[skip] = h->upper -= ilen;
105df930be7Sderaadt 	dest = (char *)h + h->upper;
106bec2d00aSderaadt 	if (F_ISSET(t, R_RECNO))
107df930be7Sderaadt 		WR_RLEAF(dest, data, flags)
108df930be7Sderaadt 	else
109df930be7Sderaadt 		WR_BLEAF(dest, key, data, flags)
110df930be7Sderaadt 
111df930be7Sderaadt 	/* If the root page was split, make it look right. */
112df930be7Sderaadt 	if (sp->pgno == P_ROOT &&
113bec2d00aSderaadt 	    (F_ISSET(t, R_RECNO) ?
114df930be7Sderaadt 	    bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
115df930be7Sderaadt 		goto err2;
116df930be7Sderaadt 
117df930be7Sderaadt 	/*
118df930be7Sderaadt 	 * Now we walk the parent page stack -- a LIFO stack of the pages that
119df930be7Sderaadt 	 * were traversed when we searched for the page that split.  Each stack
120df930be7Sderaadt 	 * entry is a page number and a page index offset.  The offset is for
121df930be7Sderaadt 	 * the page traversed on the search.  We've just split a page, so we
122df930be7Sderaadt 	 * have to insert a new key into the parent page.
123df930be7Sderaadt 	 *
124df930be7Sderaadt 	 * If the insert into the parent page causes it to split, may have to
125df930be7Sderaadt 	 * continue splitting all the way up the tree.  We stop if the root
126df930be7Sderaadt 	 * splits or the page inserted into didn't have to split to hold the
127df930be7Sderaadt 	 * new key.  Some algorithms replace the key for the old page as well
128df930be7Sderaadt 	 * as the new page.  We don't, as there's no reason to believe that the
129df930be7Sderaadt 	 * first key on the old page is any better than the key we have, and,
130df930be7Sderaadt 	 * in the case of a key being placed at index 0 causing the split, the
131df930be7Sderaadt 	 * key is unavailable.
132df930be7Sderaadt 	 *
133df930be7Sderaadt 	 * There are a maximum of 5 pages pinned at any time.  We keep the left
134df930be7Sderaadt 	 * and right pages pinned while working on the parent.   The 5 are the
135df930be7Sderaadt 	 * two children, left parent and right parent (when the parent splits)
136df930be7Sderaadt 	 * and the root page or the overflow key page when calling bt_preserve.
137df930be7Sderaadt 	 * This code must make sure that all pins are released other than the
138df930be7Sderaadt 	 * root page or overflow page which is unlocked elsewhere.
139df930be7Sderaadt 	 */
140df930be7Sderaadt 	while ((parent = BT_POP(t)) != NULL) {
141df930be7Sderaadt 		lchild = l;
142df930be7Sderaadt 		rchild = r;
143df930be7Sderaadt 
144df930be7Sderaadt 		/* Get the parent page. */
145df930be7Sderaadt 		if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
146df930be7Sderaadt 			goto err2;
147df930be7Sderaadt 
148df930be7Sderaadt 	 	/*
149df930be7Sderaadt 		 * The new key goes ONE AFTER the index, because the split
150df930be7Sderaadt 		 * was to the right.
151df930be7Sderaadt 		 */
152df930be7Sderaadt 		skip = parent->index + 1;
153df930be7Sderaadt 
154df930be7Sderaadt 		/*
155df930be7Sderaadt 		 * Calculate the space needed on the parent page.
156df930be7Sderaadt 		 *
157df930be7Sderaadt 		 * Prefix trees: space hack when inserting into BINTERNAL
158df930be7Sderaadt 		 * pages.  Retain only what's needed to distinguish between
159df930be7Sderaadt 		 * the new entry and the LAST entry on the page to its left.
160df930be7Sderaadt 		 * If the keys compare equal, retain the entire key.  Note,
161df930be7Sderaadt 		 * we don't touch overflow keys, and the entire key must be
162df930be7Sderaadt 		 * retained for the next-to-left most key on the leftmost
163df930be7Sderaadt 		 * page of each level, or the search will fail.  Applicable
164df930be7Sderaadt 		 * ONLY to internal pages that have leaf pages as children.
165df930be7Sderaadt 		 * Further reduction of the key between pairs of internal
166df930be7Sderaadt 		 * pages loses too much information.
167df930be7Sderaadt 		 */
168df930be7Sderaadt 		switch (rchild->flags & P_TYPE) {
169df930be7Sderaadt 		case P_BINTERNAL:
170df930be7Sderaadt 			bi = GETBINTERNAL(rchild, 0);
171df930be7Sderaadt 			nbytes = NBINTERNAL(bi->ksize);
172df930be7Sderaadt 			break;
173df930be7Sderaadt 		case P_BLEAF:
174df930be7Sderaadt 			bl = GETBLEAF(rchild, 0);
175df930be7Sderaadt 			nbytes = NBINTERNAL(bl->ksize);
176df930be7Sderaadt 			if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
177df930be7Sderaadt 			    (h->prevpg != P_INVALID || skip > 1)) {
178df930be7Sderaadt 				tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
179df930be7Sderaadt 				a.size = tbl->ksize;
180df930be7Sderaadt 				a.data = tbl->bytes;
181df930be7Sderaadt 				b.size = bl->ksize;
182df930be7Sderaadt 				b.data = bl->bytes;
183df930be7Sderaadt 				nksize = t->bt_pfx(&a, &b);
184df930be7Sderaadt 				n = NBINTERNAL(nksize);
185df930be7Sderaadt 				if (n < nbytes) {
186df930be7Sderaadt #ifdef STATISTICS
187df930be7Sderaadt 					bt_pfxsaved += nbytes - n;
188df930be7Sderaadt #endif
189df930be7Sderaadt 					nbytes = n;
190df930be7Sderaadt 				} else
191df930be7Sderaadt 					nksize = 0;
192df930be7Sderaadt 			} else
193df930be7Sderaadt 				nksize = 0;
194df930be7Sderaadt 			break;
195df930be7Sderaadt 		case P_RINTERNAL:
196df930be7Sderaadt 		case P_RLEAF:
197df930be7Sderaadt 			nbytes = NRINTERNAL;
198df930be7Sderaadt 			break;
199df930be7Sderaadt 		default:
200df930be7Sderaadt 			abort();
201df930be7Sderaadt 		}
202df930be7Sderaadt 
203df930be7Sderaadt 		/* Split the parent page if necessary or shift the indices. */
204df930be7Sderaadt 		if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
205df930be7Sderaadt 			sp = h;
206df930be7Sderaadt 			h = h->pgno == P_ROOT ?
207df930be7Sderaadt 			    bt_root(t, h, &l, &r, &skip, nbytes) :
208df930be7Sderaadt 			    bt_page(t, h, &l, &r, &skip, nbytes);
209df930be7Sderaadt 			if (h == NULL)
210df930be7Sderaadt 				goto err1;
211df930be7Sderaadt 			parentsplit = 1;
212df930be7Sderaadt 		} else {
213df930be7Sderaadt 			if (skip < (nxtindex = NEXTINDEX(h)))
214df930be7Sderaadt 				memmove(h->linp + skip + 1, h->linp + skip,
215df930be7Sderaadt 				    (nxtindex - skip) * sizeof(indx_t));
216df930be7Sderaadt 			h->lower += sizeof(indx_t);
217df930be7Sderaadt 			parentsplit = 0;
218df930be7Sderaadt 		}
219df930be7Sderaadt 
220df930be7Sderaadt 		/* Insert the key into the parent page. */
221df930be7Sderaadt 		switch (rchild->flags & P_TYPE) {
222df930be7Sderaadt 		case P_BINTERNAL:
223df930be7Sderaadt 			h->linp[skip] = h->upper -= nbytes;
224df930be7Sderaadt 			dest = (char *)h + h->linp[skip];
225df930be7Sderaadt 			memmove(dest, bi, nbytes);
226df930be7Sderaadt 			((BINTERNAL *)dest)->pgno = rchild->pgno;
227df930be7Sderaadt 			break;
228df930be7Sderaadt 		case P_BLEAF:
229df930be7Sderaadt 			h->linp[skip] = h->upper -= nbytes;
230df930be7Sderaadt 			dest = (char *)h + h->linp[skip];
231df930be7Sderaadt 			WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
232df930be7Sderaadt 			    rchild->pgno, bl->flags & P_BIGKEY);
233df930be7Sderaadt 			memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
234df930be7Sderaadt 			if (bl->flags & P_BIGKEY &&
235df930be7Sderaadt 			    bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
236df930be7Sderaadt 				goto err1;
237df930be7Sderaadt 			break;
238df930be7Sderaadt 		case P_RINTERNAL:
239df930be7Sderaadt 			/*
240df930be7Sderaadt 			 * Update the left page count.  If split
241df930be7Sderaadt 			 * added at index 0, fix the correct page.
242df930be7Sderaadt 			 */
243df930be7Sderaadt 			if (skip > 0)
244df930be7Sderaadt 				dest = (char *)h + h->linp[skip - 1];
245df930be7Sderaadt 			else
246df930be7Sderaadt 				dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
247df930be7Sderaadt 			((RINTERNAL *)dest)->nrecs = rec_total(lchild);
248df930be7Sderaadt 			((RINTERNAL *)dest)->pgno = lchild->pgno;
249df930be7Sderaadt 
250df930be7Sderaadt 			/* Update the right page count. */
251df930be7Sderaadt 			h->linp[skip] = h->upper -= nbytes;
252df930be7Sderaadt 			dest = (char *)h + h->linp[skip];
253df930be7Sderaadt 			((RINTERNAL *)dest)->nrecs = rec_total(rchild);
254df930be7Sderaadt 			((RINTERNAL *)dest)->pgno = rchild->pgno;
255df930be7Sderaadt 			break;
256df930be7Sderaadt 		case P_RLEAF:
257df930be7Sderaadt 			/*
258df930be7Sderaadt 			 * Update the left page count.  If split
259df930be7Sderaadt 			 * added at index 0, fix the correct page.
260df930be7Sderaadt 			 */
261df930be7Sderaadt 			if (skip > 0)
262df930be7Sderaadt 				dest = (char *)h + h->linp[skip - 1];
263df930be7Sderaadt 			else
264df930be7Sderaadt 				dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
265df930be7Sderaadt 			((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild);
266df930be7Sderaadt 			((RINTERNAL *)dest)->pgno = lchild->pgno;
267df930be7Sderaadt 
268df930be7Sderaadt 			/* Update the right page count. */
269df930be7Sderaadt 			h->linp[skip] = h->upper -= nbytes;
270df930be7Sderaadt 			dest = (char *)h + h->linp[skip];
271df930be7Sderaadt 			((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild);
272df930be7Sderaadt 			((RINTERNAL *)dest)->pgno = rchild->pgno;
273df930be7Sderaadt 			break;
274df930be7Sderaadt 		default:
275df930be7Sderaadt 			abort();
276df930be7Sderaadt 		}
277df930be7Sderaadt 
278df930be7Sderaadt 		/* Unpin the held pages. */
279df930be7Sderaadt 		if (!parentsplit) {
280df930be7Sderaadt 			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
281df930be7Sderaadt 			break;
282df930be7Sderaadt 		}
283df930be7Sderaadt 
284df930be7Sderaadt 		/* If the root page was split, make it look right. */
285df930be7Sderaadt 		if (sp->pgno == P_ROOT &&
286bec2d00aSderaadt 		    (F_ISSET(t, R_RECNO) ?
287df930be7Sderaadt 		    bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
288df930be7Sderaadt 			goto err1;
289df930be7Sderaadt 
290df930be7Sderaadt 		mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
291df930be7Sderaadt 		mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
292df930be7Sderaadt 	}
293df930be7Sderaadt 
294df930be7Sderaadt 	/* Unpin the held pages. */
295df930be7Sderaadt 	mpool_put(t->bt_mp, l, MPOOL_DIRTY);
296df930be7Sderaadt 	mpool_put(t->bt_mp, r, MPOOL_DIRTY);
297df930be7Sderaadt 
298df930be7Sderaadt 	/* Clear any pages left on the stack. */
299df930be7Sderaadt 	return (RET_SUCCESS);
300df930be7Sderaadt 
301df930be7Sderaadt 	/*
302df930be7Sderaadt 	 * If something fails in the above loop we were already walking back
303df930be7Sderaadt 	 * up the tree and the tree is now inconsistent.  Nothing much we can
304df930be7Sderaadt 	 * do about it but release any memory we're holding.
305df930be7Sderaadt 	 */
306df930be7Sderaadt err1:	mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
307df930be7Sderaadt 	mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
308df930be7Sderaadt 
309df930be7Sderaadt err2:	mpool_put(t->bt_mp, l, 0);
310df930be7Sderaadt 	mpool_put(t->bt_mp, r, 0);
311df930be7Sderaadt 	__dbpanic(t->bt_dbp);
312df930be7Sderaadt 	return (RET_ERROR);
313df930be7Sderaadt }
314df930be7Sderaadt 
315df930be7Sderaadt /*
316df930be7Sderaadt  * BT_PAGE -- Split a non-root page of a btree.
317df930be7Sderaadt  *
318df930be7Sderaadt  * Parameters:
319df930be7Sderaadt  *	t:	tree
320df930be7Sderaadt  *	h:	root page
321df930be7Sderaadt  *	lp:	pointer to left page pointer
322df930be7Sderaadt  *	rp:	pointer to right page pointer
323df930be7Sderaadt  *	skip:	pointer to index to leave open
324df930be7Sderaadt  *	ilen:	insert length
325df930be7Sderaadt  *
326df930be7Sderaadt  * Returns:
327df930be7Sderaadt  *	Pointer to page in which to insert or NULL on error.
328df930be7Sderaadt  */
329df930be7Sderaadt static PAGE *
bt_page(BTREE * t,PAGE * h,PAGE ** lp,PAGE ** rp,indx_t * skip,size_t ilen)330e20a56a5Sotto bt_page(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
331df930be7Sderaadt {
332df930be7Sderaadt 	PAGE *l, *r, *tp;
333df930be7Sderaadt 	pgno_t npg;
334df930be7Sderaadt 
335df930be7Sderaadt #ifdef STATISTICS
336df930be7Sderaadt 	++bt_split;
337df930be7Sderaadt #endif
338df930be7Sderaadt 	/* Put the new right page for the split into place. */
339df930be7Sderaadt 	if ((r = __bt_new(t, &npg)) == NULL)
340df930be7Sderaadt 		return (NULL);
341df930be7Sderaadt 	r->pgno = npg;
342df930be7Sderaadt 	r->lower = BTDATAOFF;
343df930be7Sderaadt 	r->upper = t->bt_psize;
344df930be7Sderaadt 	r->nextpg = h->nextpg;
345df930be7Sderaadt 	r->prevpg = h->pgno;
346df930be7Sderaadt 	r->flags = h->flags & P_TYPE;
347df930be7Sderaadt 
348df930be7Sderaadt 	/*
349df930be7Sderaadt 	 * If we're splitting the last page on a level because we're appending
350df930be7Sderaadt 	 * a key to it (skip is NEXTINDEX()), it's likely that the data is
351df930be7Sderaadt 	 * sorted.  Adding an empty page on the side of the level is less work
352df930be7Sderaadt 	 * and can push the fill factor much higher than normal.  If we're
353df930be7Sderaadt 	 * wrong it's no big deal, we'll just do the split the right way next
354df930be7Sderaadt 	 * time.  It may look like it's equally easy to do a similar hack for
355df930be7Sderaadt 	 * reverse sorted data, that is, split the tree left, but it's not.
356df930be7Sderaadt 	 * Don't even try.
357df930be7Sderaadt 	 */
358df930be7Sderaadt 	if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
359df930be7Sderaadt #ifdef STATISTICS
360df930be7Sderaadt 		++bt_sortsplit;
361df930be7Sderaadt #endif
362df930be7Sderaadt 		h->nextpg = r->pgno;
363df930be7Sderaadt 		r->lower = BTDATAOFF + sizeof(indx_t);
364df930be7Sderaadt 		*skip = 0;
365df930be7Sderaadt 		*lp = h;
366df930be7Sderaadt 		*rp = r;
367df930be7Sderaadt 		return (r);
368df930be7Sderaadt 	}
369df930be7Sderaadt 
370df930be7Sderaadt 	/* Put the new left page for the split into place. */
371df930be7Sderaadt 	if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) {
372df930be7Sderaadt 		mpool_put(t->bt_mp, r, 0);
373df930be7Sderaadt 		return (NULL);
374df930be7Sderaadt 	}
375bec2d00aSderaadt 	memset(l, 0xff, t->bt_psize);
376df930be7Sderaadt 	l->pgno = h->pgno;
377df930be7Sderaadt 	l->nextpg = r->pgno;
378df930be7Sderaadt 	l->prevpg = h->prevpg;
379df930be7Sderaadt 	l->lower = BTDATAOFF;
380df930be7Sderaadt 	l->upper = t->bt_psize;
381df930be7Sderaadt 	l->flags = h->flags & P_TYPE;
382df930be7Sderaadt 
383df930be7Sderaadt 	/* Fix up the previous pointer of the page after the split page. */
384df930be7Sderaadt 	if (h->nextpg != P_INVALID) {
385df930be7Sderaadt 		if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
386df930be7Sderaadt 			free(l);
387df930be7Sderaadt 			/* XXX mpool_free(t->bt_mp, r->pgno); */
388df930be7Sderaadt 			return (NULL);
389df930be7Sderaadt 		}
390df930be7Sderaadt 		tp->prevpg = r->pgno;
391bec2d00aSderaadt 		mpool_put(t->bt_mp, tp, MPOOL_DIRTY);
392df930be7Sderaadt 	}
393df930be7Sderaadt 
394df930be7Sderaadt 	/*
395df930be7Sderaadt 	 * Split right.  The key/data pairs aren't sorted in the btree page so
396df930be7Sderaadt 	 * it's simpler to copy the data from the split page onto two new pages
397df930be7Sderaadt 	 * instead of copying half the data to the right page and compacting
398df930be7Sderaadt 	 * the left page in place.  Since the left page can't change, we have
399df930be7Sderaadt 	 * to swap the original and the allocated left page after the split.
400df930be7Sderaadt 	 */
401df930be7Sderaadt 	tp = bt_psplit(t, h, l, r, skip, ilen);
402df930be7Sderaadt 
403df930be7Sderaadt 	/* Move the new left page onto the old left page. */
404df930be7Sderaadt 	memmove(h, l, t->bt_psize);
405df930be7Sderaadt 	if (tp == l)
406df930be7Sderaadt 		tp = h;
407df930be7Sderaadt 	free(l);
408df930be7Sderaadt 
409df930be7Sderaadt 	*lp = h;
410df930be7Sderaadt 	*rp = r;
411df930be7Sderaadt 	return (tp);
412df930be7Sderaadt }
413df930be7Sderaadt 
414df930be7Sderaadt /*
415df930be7Sderaadt  * BT_ROOT -- Split the root page of a btree.
416df930be7Sderaadt  *
417df930be7Sderaadt  * Parameters:
418df930be7Sderaadt  *	t:	tree
419df930be7Sderaadt  *	h:	root page
420df930be7Sderaadt  *	lp:	pointer to left page pointer
421df930be7Sderaadt  *	rp:	pointer to right page pointer
422df930be7Sderaadt  *	skip:	pointer to index to leave open
423df930be7Sderaadt  *	ilen:	insert length
424df930be7Sderaadt  *
425df930be7Sderaadt  * Returns:
426df930be7Sderaadt  *	Pointer to page in which to insert or NULL on error.
427df930be7Sderaadt  */
428df930be7Sderaadt static PAGE *
bt_root(BTREE * t,PAGE * h,PAGE ** lp,PAGE ** rp,indx_t * skip,size_t ilen)429e20a56a5Sotto bt_root(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
430df930be7Sderaadt {
431df930be7Sderaadt 	PAGE *l, *r, *tp;
432df930be7Sderaadt 	pgno_t lnpg, rnpg;
433df930be7Sderaadt 
434df930be7Sderaadt #ifdef STATISTICS
435df930be7Sderaadt 	++bt_split;
436df930be7Sderaadt 	++bt_rootsplit;
437df930be7Sderaadt #endif
438df930be7Sderaadt 	/* Put the new left and right pages for the split into place. */
439df930be7Sderaadt 	if ((l = __bt_new(t, &lnpg)) == NULL ||
440df930be7Sderaadt 	    (r = __bt_new(t, &rnpg)) == NULL)
441df930be7Sderaadt 		return (NULL);
442df930be7Sderaadt 	l->pgno = lnpg;
443df930be7Sderaadt 	r->pgno = rnpg;
444df930be7Sderaadt 	l->nextpg = r->pgno;
445df930be7Sderaadt 	r->prevpg = l->pgno;
446df930be7Sderaadt 	l->prevpg = r->nextpg = P_INVALID;
447df930be7Sderaadt 	l->lower = r->lower = BTDATAOFF;
448df930be7Sderaadt 	l->upper = r->upper = t->bt_psize;
449df930be7Sderaadt 	l->flags = r->flags = h->flags & P_TYPE;
450df930be7Sderaadt 
451df930be7Sderaadt 	/* Split the root page. */
452df930be7Sderaadt 	tp = bt_psplit(t, h, l, r, skip, ilen);
453df930be7Sderaadt 
454df930be7Sderaadt 	*lp = l;
455df930be7Sderaadt 	*rp = r;
456df930be7Sderaadt 	return (tp);
457df930be7Sderaadt }
458df930be7Sderaadt 
459df930be7Sderaadt /*
460df930be7Sderaadt  * BT_RROOT -- Fix up the recno root page after it has been split.
461df930be7Sderaadt  *
462df930be7Sderaadt  * Parameters:
463df930be7Sderaadt  *	t:	tree
464df930be7Sderaadt  *	h:	root page
465df930be7Sderaadt  *	l:	left page
466df930be7Sderaadt  *	r:	right page
467df930be7Sderaadt  *
468df930be7Sderaadt  * Returns:
469df930be7Sderaadt  *	RET_ERROR, RET_SUCCESS
470df930be7Sderaadt  */
471df930be7Sderaadt static int
bt_rroot(BTREE * t,PAGE * h,PAGE * l,PAGE * r)472e20a56a5Sotto bt_rroot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
473df930be7Sderaadt {
474df930be7Sderaadt 	char *dest;
475df930be7Sderaadt 
476df930be7Sderaadt 	/* Insert the left and right keys, set the header information. */
477df930be7Sderaadt 	h->linp[0] = h->upper = t->bt_psize - NRINTERNAL;
478df930be7Sderaadt 	dest = (char *)h + h->upper;
479df930be7Sderaadt 	WR_RINTERNAL(dest,
480df930be7Sderaadt 	    l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
481df930be7Sderaadt 
482df930be7Sderaadt 	h->linp[1] = h->upper -= NRINTERNAL;
483df930be7Sderaadt 	dest = (char *)h + h->upper;
484df930be7Sderaadt 	WR_RINTERNAL(dest,
485df930be7Sderaadt 	    r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
486df930be7Sderaadt 
487df930be7Sderaadt 	h->lower = BTDATAOFF + 2 * sizeof(indx_t);
488df930be7Sderaadt 
489df930be7Sderaadt 	/* Unpin the root page, set to recno internal page. */
490df930be7Sderaadt 	h->flags &= ~P_TYPE;
491df930be7Sderaadt 	h->flags |= P_RINTERNAL;
492df930be7Sderaadt 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
493df930be7Sderaadt 
494df930be7Sderaadt 	return (RET_SUCCESS);
495df930be7Sderaadt }
496df930be7Sderaadt 
497df930be7Sderaadt /*
498df930be7Sderaadt  * BT_BROOT -- Fix up the btree root page after it has been split.
499df930be7Sderaadt  *
500df930be7Sderaadt  * Parameters:
501df930be7Sderaadt  *	t:	tree
502df930be7Sderaadt  *	h:	root page
503df930be7Sderaadt  *	l:	left page
504df930be7Sderaadt  *	r:	right page
505df930be7Sderaadt  *
506df930be7Sderaadt  * Returns:
507df930be7Sderaadt  *	RET_ERROR, RET_SUCCESS
508df930be7Sderaadt  */
509df930be7Sderaadt static int
bt_broot(BTREE * t,PAGE * h,PAGE * l,PAGE * r)510e20a56a5Sotto bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
511df930be7Sderaadt {
512df930be7Sderaadt 	BINTERNAL *bi;
513df930be7Sderaadt 	BLEAF *bl;
514df930be7Sderaadt 	u_int32_t nbytes;
515df930be7Sderaadt 	char *dest;
516df930be7Sderaadt 
517df930be7Sderaadt 	/*
518df930be7Sderaadt 	 * If the root page was a leaf page, change it into an internal page.
519df930be7Sderaadt 	 * We copy the key we split on (but not the key's data, in the case of
520df930be7Sderaadt 	 * a leaf page) to the new root page.
521df930be7Sderaadt 	 *
522df930be7Sderaadt 	 * The btree comparison code guarantees that the left-most key on any
523df930be7Sderaadt 	 * level of the tree is never used, so it doesn't need to be filled in.
524df930be7Sderaadt 	 */
525df930be7Sderaadt 	nbytes = NBINTERNAL(0);
526df930be7Sderaadt 	h->linp[0] = h->upper = t->bt_psize - nbytes;
527df930be7Sderaadt 	dest = (char *)h + h->upper;
528df930be7Sderaadt 	WR_BINTERNAL(dest, 0, l->pgno, 0);
529df930be7Sderaadt 
530df930be7Sderaadt 	switch (h->flags & P_TYPE) {
531df930be7Sderaadt 	case P_BLEAF:
532df930be7Sderaadt 		bl = GETBLEAF(r, 0);
533df930be7Sderaadt 		nbytes = NBINTERNAL(bl->ksize);
534df930be7Sderaadt 		h->linp[1] = h->upper -= nbytes;
535df930be7Sderaadt 		dest = (char *)h + h->upper;
536df930be7Sderaadt 		WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
537df930be7Sderaadt 		memmove(dest, bl->bytes, bl->ksize);
538df930be7Sderaadt 
539df930be7Sderaadt 		/*
540df930be7Sderaadt 		 * If the key is on an overflow page, mark the overflow chain
541df930be7Sderaadt 		 * so it isn't deleted when the leaf copy of the key is deleted.
542df930be7Sderaadt 		 */
543df930be7Sderaadt 		if (bl->flags & P_BIGKEY &&
544df930be7Sderaadt 		    bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
545df930be7Sderaadt 			return (RET_ERROR);
546df930be7Sderaadt 		break;
547df930be7Sderaadt 	case P_BINTERNAL:
548df930be7Sderaadt 		bi = GETBINTERNAL(r, 0);
549df930be7Sderaadt 		nbytes = NBINTERNAL(bi->ksize);
550df930be7Sderaadt 		h->linp[1] = h->upper -= nbytes;
551df930be7Sderaadt 		dest = (char *)h + h->upper;
552df930be7Sderaadt 		memmove(dest, bi, nbytes);
553df930be7Sderaadt 		((BINTERNAL *)dest)->pgno = r->pgno;
554df930be7Sderaadt 		break;
555df930be7Sderaadt 	default:
556df930be7Sderaadt 		abort();
557df930be7Sderaadt 	}
558df930be7Sderaadt 
559df930be7Sderaadt 	/* There are two keys on the page. */
560df930be7Sderaadt 	h->lower = BTDATAOFF + 2 * sizeof(indx_t);
561df930be7Sderaadt 
562df930be7Sderaadt 	/* Unpin the root page, set to btree internal page. */
563df930be7Sderaadt 	h->flags &= ~P_TYPE;
564df930be7Sderaadt 	h->flags |= P_BINTERNAL;
565df930be7Sderaadt 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
566df930be7Sderaadt 
567df930be7Sderaadt 	return (RET_SUCCESS);
568df930be7Sderaadt }
569df930be7Sderaadt 
570df930be7Sderaadt /*
571df930be7Sderaadt  * BT_PSPLIT -- Do the real work of splitting the page.
572df930be7Sderaadt  *
573df930be7Sderaadt  * Parameters:
574df930be7Sderaadt  *	t:	tree
575df930be7Sderaadt  *	h:	page to be split
576df930be7Sderaadt  *	l:	page to put lower half of data
577df930be7Sderaadt  *	r:	page to put upper half of data
578df930be7Sderaadt  *	pskip:	pointer to index to leave open
579df930be7Sderaadt  *	ilen:	insert length
580df930be7Sderaadt  *
581df930be7Sderaadt  * Returns:
582df930be7Sderaadt  *	Pointer to page in which to insert.
583df930be7Sderaadt  */
584df930be7Sderaadt static PAGE *
bt_psplit(BTREE * t,PAGE * h,PAGE * l,PAGE * r,indx_t * pskip,size_t ilen)585e20a56a5Sotto bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, indx_t *pskip, size_t ilen)
586df930be7Sderaadt {
587df930be7Sderaadt 	BINTERNAL *bi;
588df930be7Sderaadt 	BLEAF *bl;
589bec2d00aSderaadt 	CURSOR *c;
590df930be7Sderaadt 	RLEAF *rl;
591df930be7Sderaadt 	PAGE *rval;
592df930be7Sderaadt 	void *src;
593df930be7Sderaadt 	indx_t full, half, nxt, off, skip, top, used;
594df930be7Sderaadt 	u_int32_t nbytes;
595df930be7Sderaadt 	int bigkeycnt, isbigkey;
596df930be7Sderaadt 
597df930be7Sderaadt 	/*
598df930be7Sderaadt 	 * Split the data to the left and right pages.  Leave the skip index
599df930be7Sderaadt 	 * open.  Additionally, make some effort not to split on an overflow
600df930be7Sderaadt 	 * key.  This makes internal page processing faster and can save
601df930be7Sderaadt 	 * space as overflow keys used by internal pages are never deleted.
602df930be7Sderaadt 	 */
603df930be7Sderaadt 	bigkeycnt = 0;
604df930be7Sderaadt 	skip = *pskip;
605df930be7Sderaadt 	full = t->bt_psize - BTDATAOFF;
606df930be7Sderaadt 	half = full / 2;
607df930be7Sderaadt 	used = 0;
608df930be7Sderaadt 	for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
609df930be7Sderaadt 		if (skip == off) {
610df930be7Sderaadt 			nbytes = ilen;
611df930be7Sderaadt 			isbigkey = 0;		/* XXX: not really known. */
612df930be7Sderaadt 		} else
613df930be7Sderaadt 			switch (h->flags & P_TYPE) {
614df930be7Sderaadt 			case P_BINTERNAL:
615df930be7Sderaadt 				src = bi = GETBINTERNAL(h, nxt);
616df930be7Sderaadt 				nbytes = NBINTERNAL(bi->ksize);
617df930be7Sderaadt 				isbigkey = bi->flags & P_BIGKEY;
618df930be7Sderaadt 				break;
619df930be7Sderaadt 			case P_BLEAF:
620df930be7Sderaadt 				src = bl = GETBLEAF(h, nxt);
621df930be7Sderaadt 				nbytes = NBLEAF(bl);
622df930be7Sderaadt 				isbigkey = bl->flags & P_BIGKEY;
623df930be7Sderaadt 				break;
624df930be7Sderaadt 			case P_RINTERNAL:
625df930be7Sderaadt 				src = GETRINTERNAL(h, nxt);
626df930be7Sderaadt 				nbytes = NRINTERNAL;
627df930be7Sderaadt 				isbigkey = 0;
628df930be7Sderaadt 				break;
629df930be7Sderaadt 			case P_RLEAF:
630df930be7Sderaadt 				src = rl = GETRLEAF(h, nxt);
631df930be7Sderaadt 				nbytes = NRLEAF(rl);
632df930be7Sderaadt 				isbigkey = 0;
633df930be7Sderaadt 				break;
634df930be7Sderaadt 			default:
635df930be7Sderaadt 				abort();
636df930be7Sderaadt 			}
637df930be7Sderaadt 
638df930be7Sderaadt 		/*
639df930be7Sderaadt 		 * If the key/data pairs are substantial fractions of the max
640df930be7Sderaadt 		 * possible size for the page, it's possible to get situations
641df930be7Sderaadt 		 * where we decide to try and copy too much onto the left page.
642df930be7Sderaadt 		 * Make sure that doesn't happen.
643df930be7Sderaadt 		 */
644eab84c55Sderaadt 		if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) ||
645eab84c55Sderaadt 		    nxt == top - 1) {
646df930be7Sderaadt 			--off;
647df930be7Sderaadt 			break;
648df930be7Sderaadt 		}
649df930be7Sderaadt 
650df930be7Sderaadt 		/* Copy the key/data pair, if not the skipped index. */
651df930be7Sderaadt 		if (skip != off) {
652df930be7Sderaadt 			++nxt;
653df930be7Sderaadt 
654df930be7Sderaadt 			l->linp[off] = l->upper -= nbytes;
655df930be7Sderaadt 			memmove((char *)l + l->upper, src, nbytes);
656df930be7Sderaadt 		}
657df930be7Sderaadt 
658eab84c55Sderaadt 		used += nbytes + sizeof(indx_t);
659df930be7Sderaadt 		if (used >= half) {
660df930be7Sderaadt 			if (!isbigkey || bigkeycnt == 3)
661df930be7Sderaadt 				break;
662df930be7Sderaadt 			else
663df930be7Sderaadt 				++bigkeycnt;
664df930be7Sderaadt 		}
665df930be7Sderaadt 	}
666df930be7Sderaadt 
667df930be7Sderaadt 	/*
668df930be7Sderaadt 	 * Off is the last offset that's valid for the left page.
669df930be7Sderaadt 	 * Nxt is the first offset to be placed on the right page.
670df930be7Sderaadt 	 */
671df930be7Sderaadt 	l->lower += (off + 1) * sizeof(indx_t);
672df930be7Sderaadt 
673df930be7Sderaadt 	/*
674df930be7Sderaadt 	 * If splitting the page that the cursor was on, the cursor has to be
675df930be7Sderaadt 	 * adjusted to point to the same record as before the split.  If the
676df930be7Sderaadt 	 * cursor is at or past the skipped slot, the cursor is incremented by
677df930be7Sderaadt 	 * one.  If the cursor is on the right page, it is decremented by the
678df930be7Sderaadt 	 * number of records split to the left page.
679df930be7Sderaadt 	 */
680bec2d00aSderaadt 	c = &t->bt_cursor;
681bec2d00aSderaadt 	if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) {
682bec2d00aSderaadt 		if (c->pg.index >= skip)
683bec2d00aSderaadt 			++c->pg.index;
684bec2d00aSderaadt 		if (c->pg.index < nxt)			/* Left page. */
685bec2d00aSderaadt 			c->pg.pgno = l->pgno;
686df930be7Sderaadt 		else {					/* Right page. */
687bec2d00aSderaadt 			c->pg.pgno = r->pgno;
688bec2d00aSderaadt 			c->pg.index -= nxt;
689df930be7Sderaadt 		}
690df930be7Sderaadt 	}
691df930be7Sderaadt 
692df930be7Sderaadt 	/*
693df930be7Sderaadt 	 * If the skipped index was on the left page, just return that page.
694df930be7Sderaadt 	 * Otherwise, adjust the skip index to reflect the new position on
695df930be7Sderaadt 	 * the right page.
696df930be7Sderaadt 	 */
697df930be7Sderaadt 	if (skip <= off) {
6984da6d40eSmillert 		skip = MAX_PAGE_OFFSET;
699df930be7Sderaadt 		rval = l;
700df930be7Sderaadt 	} else {
701df930be7Sderaadt 		rval = r;
702df930be7Sderaadt 		*pskip -= nxt;
703df930be7Sderaadt 	}
704df930be7Sderaadt 
705df930be7Sderaadt 	for (off = 0; nxt < top; ++off) {
706df930be7Sderaadt 		if (skip == nxt) {
707df930be7Sderaadt 			++off;
7084da6d40eSmillert 			skip = MAX_PAGE_OFFSET;
709df930be7Sderaadt 		}
710df930be7Sderaadt 		switch (h->flags & P_TYPE) {
711df930be7Sderaadt 		case P_BINTERNAL:
712df930be7Sderaadt 			src = bi = GETBINTERNAL(h, nxt);
713df930be7Sderaadt 			nbytes = NBINTERNAL(bi->ksize);
714df930be7Sderaadt 			break;
715df930be7Sderaadt 		case P_BLEAF:
716df930be7Sderaadt 			src = bl = GETBLEAF(h, nxt);
717df930be7Sderaadt 			nbytes = NBLEAF(bl);
718df930be7Sderaadt 			break;
719df930be7Sderaadt 		case P_RINTERNAL:
720df930be7Sderaadt 			src = GETRINTERNAL(h, nxt);
721df930be7Sderaadt 			nbytes = NRINTERNAL;
722df930be7Sderaadt 			break;
723df930be7Sderaadt 		case P_RLEAF:
724df930be7Sderaadt 			src = rl = GETRLEAF(h, nxt);
725df930be7Sderaadt 			nbytes = NRLEAF(rl);
726df930be7Sderaadt 			break;
727df930be7Sderaadt 		default:
728df930be7Sderaadt 			abort();
729df930be7Sderaadt 		}
730df930be7Sderaadt 		++nxt;
731df930be7Sderaadt 		r->linp[off] = r->upper -= nbytes;
732df930be7Sderaadt 		memmove((char *)r + r->upper, src, nbytes);
733df930be7Sderaadt 	}
734df930be7Sderaadt 	r->lower += off * sizeof(indx_t);
735df930be7Sderaadt 
736df930be7Sderaadt 	/* If the key is being appended to the page, adjust the index. */
737df930be7Sderaadt 	if (skip == top)
738df930be7Sderaadt 		r->lower += sizeof(indx_t);
739df930be7Sderaadt 
740df930be7Sderaadt 	return (rval);
741df930be7Sderaadt }
742df930be7Sderaadt 
743df930be7Sderaadt /*
744df930be7Sderaadt  * BT_PRESERVE -- Mark a chain of pages as used by an internal node.
745df930be7Sderaadt  *
746df930be7Sderaadt  * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the
747df930be7Sderaadt  * record that references them gets deleted.  Chains pointed to by internal
748df930be7Sderaadt  * pages never get deleted.  This routine marks a chain as pointed to by an
749df930be7Sderaadt  * internal page.
750df930be7Sderaadt  *
751df930be7Sderaadt  * Parameters:
752df930be7Sderaadt  *	t:	tree
753df930be7Sderaadt  *	pg:	page number of first page in the chain.
754df930be7Sderaadt  *
755df930be7Sderaadt  * Returns:
756df930be7Sderaadt  *	RET_SUCCESS, RET_ERROR.
757df930be7Sderaadt  */
758df930be7Sderaadt static int
bt_preserve(BTREE * t,pgno_t pg)759e20a56a5Sotto bt_preserve(BTREE *t, pgno_t pg)
760df930be7Sderaadt {
761df930be7Sderaadt 	PAGE *h;
762df930be7Sderaadt 
763df930be7Sderaadt 	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
764df930be7Sderaadt 		return (RET_ERROR);
765df930be7Sderaadt 	h->flags |= P_PRESERVE;
766df930be7Sderaadt 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
767df930be7Sderaadt 	return (RET_SUCCESS);
768df930be7Sderaadt }
769df930be7Sderaadt 
770df930be7Sderaadt /*
771df930be7Sderaadt  * REC_TOTAL -- Return the number of recno entries below a page.
772df930be7Sderaadt  *
773df930be7Sderaadt  * Parameters:
774df930be7Sderaadt  *	h:	page
775df930be7Sderaadt  *
776df930be7Sderaadt  * Returns:
777df930be7Sderaadt  *	The number of recno entries below a page.
778df930be7Sderaadt  *
779df930be7Sderaadt  * XXX
780df930be7Sderaadt  * These values could be set by the bt_psplit routine.  The problem is that the
781df930be7Sderaadt  * entry has to be popped off of the stack etc. or the values have to be passed
782df930be7Sderaadt  * all the way back to bt_split/bt_rroot and it's not very clean.
783df930be7Sderaadt  */
784df930be7Sderaadt static recno_t
rec_total(PAGE * h)785e20a56a5Sotto rec_total(PAGE *h)
786df930be7Sderaadt {
787df930be7Sderaadt 	recno_t recs;
788df930be7Sderaadt 	indx_t nxt, top;
789df930be7Sderaadt 
790df930be7Sderaadt 	for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
791df930be7Sderaadt 		recs += GETRINTERNAL(h, nxt)->nrecs;
792df930be7Sderaadt 	return (recs);
793df930be7Sderaadt }
794