xref: /csrg-svn/lib/libc/db/btree/bt_split.c (revision 46561)
146134Smao /*-
246134Smao  * Copyright (c) 1990 The Regents of the University of California.
346134Smao  * All rights reserved.
446134Smao  *
546134Smao  * This code is derived from software contributed to Berkeley by
646134Smao  * Mike Olson.
746134Smao  *
846134Smao  * %sccs.include.redist.c%
946134Smao  */
1046134Smao 
1146134Smao #if defined(LIBC_SCCS) && !defined(lint)
12*46561Sbostic static char sccsid[] = "@(#)bt_split.c	5.2 (Berkeley) 02/22/91";
1346134Smao #endif /* LIBC_SCCS and not lint */
1446134Smao 
1546134Smao #include <sys/types.h>
1646134Smao #include <db.h>
17*46561Sbostic #include <stdlib.h>
18*46561Sbostic #include <string.h>
1946134Smao #include "btree.h"
2046134Smao 
2146134Smao /*
2246134Smao  *  _BT_SPLIT -- Split a page into two pages.
2346134Smao  *
2446134Smao  *	Splits are caused by insertions, and propogate up the tree in
2546134Smao  *	the usual way.  The root page is always page 1 in the file on
2646134Smao  *	disk, so root splits are handled specially.  On entry to this
2746134Smao  *	routine, t->bt_curpage is the page to be split.
2846134Smao  *
2946134Smao  *	Parameters:
3046134Smao  *		t -- btree in which to do split.
3146134Smao  *
3246134Smao  *	Returns:
3346134Smao  *		RET_SUCCESS, RET_ERROR.
3446134Smao  *
3546134Smao  *	Side Effects:
3646134Smao  *		Changes the notion of the current page.
3746134Smao  */
3846134Smao 
3946134Smao int
4046134Smao _bt_split(t)
4146134Smao 	BTREE_P t;
4246134Smao {
4346134Smao 	BTHEADER *h;
4446134Smao 	BTHEADER *left, *right;
4546134Smao 	pgno_t nextpgno, parent;
4646134Smao 	int nbytes, len;
4746134Smao 	IDATUM *id;
4846134Smao 	DATUM *d;
4946134Smao 	char *src;
5046134Smao 	IDATUM *new;
5146134Smao 	pgno_t oldchain;
5246134Smao 	u_char flags;
5346134Smao 
5446134Smao 	h = (BTHEADER *) t->bt_curpage;
5546134Smao 
5646134Smao 	/* split root page specially, since it must remain page 1 */
5746134Smao 	if (h->h_pgno == P_ROOT) {
5846134Smao 		return (_bt_splitroot(t));
5946134Smao 	}
6046134Smao 
6146134Smao 	/*
6246134Smao 	 *  This is a little complicated.  We go to some trouble to
6346134Smao 	 *  figure out which of the three possible cases -- in-memory tree,
6446134Smao 	 *  disk tree (no cache), and disk tree (cache) -- we have, in order
6546134Smao 	 *  to avoid unnecessary copying.  If we have a disk cache, then we
6646134Smao 	 *  have to do some extra copying, though, since the cache code
6746134Smao 	 *  manages buffers externally to this code.
6846134Smao 	 */
6946134Smao 
7046134Smao 	if (ISDISK(t) && ISCACHE(t)) {
7146134Smao 		if ((left = (BTHEADER *) malloc((unsigned) t->bt_psize))
7246134Smao 		    == (BTHEADER *) NULL)
7346134Smao 			return (RET_ERROR);
7446134Smao 		left->h_pgno = left->h_prevpg = left->h_nextpg = P_NONE;
7546134Smao 		left->h_flags = t->bt_curpage->h_flags;
7646134Smao 		left->h_lower = (index_t)
7746134Smao 			  (((char *) &(left->h_linp[0])) - ((char *) left));
7846134Smao 		left->h_upper = t->bt_psize;
7946134Smao 
8046134Smao 	} else {
8146134Smao 		if ((left = _bt_allocpg(t)) == (BTHEADER *) NULL)
8246134Smao 			return (RET_ERROR);
8346134Smao 	}
8446134Smao 	left->h_pgno = h->h_pgno;
8546134Smao 
8646134Smao 	if ((right = _bt_allocpg(t)) == (BTHEADER *) NULL)
8746134Smao 		return (RET_ERROR);
8846134Smao 	right->h_pgno = ++(t->bt_npages);
8946134Smao 
9046134Smao 	/* now do the split */
9146134Smao 	if (_bt_dopsplit(t, left, right) == RET_ERROR)
9246134Smao 		return (RET_ERROR);
9346134Smao 
9446134Smao 	right->h_prevpg = left->h_pgno;
9546134Smao 	nextpgno = right->h_nextpg = h->h_nextpg;
9646134Smao 	left->h_nextpg = right->h_pgno;
9746134Smao 	left->h_prevpg = h->h_prevpg;
9846134Smao 
9946134Smao 	/* okay, now use the left half of the page as the new page */
10046134Smao 	if (ISDISK(t) && ISCACHE(t)) {
10146134Smao 		(void) bcopy((char *) left, (char *) t->bt_curpage,
10246134Smao 			     (int) t->bt_psize);
10346134Smao 		(void) free ((char *) left);
10446134Smao 		left = t->bt_curpage;
10546134Smao 	} else {
10646134Smao 		(void) free((char *) t->bt_curpage);
10746134Smao 		t->bt_curpage = left;
10846134Smao 	}
10946134Smao 
11046134Smao 	/*
11146134Smao 	 *  Write the new pages out.  We need them to stay where they are
11246134Smao 	 *  until we're done updating the parent pages.
11346134Smao 	 */
11446134Smao 
11546134Smao 	if (_bt_write(t, left, NORELEASE) == RET_ERROR)
11646134Smao 		return (RET_ERROR);
11746134Smao 	if (_bt_write(t, right, NORELEASE) == RET_ERROR)
11846134Smao 		return (RET_ERROR);
11946134Smao 
12046134Smao 	/* update 'prev' pointer of old neighbor of left */
12146134Smao 	if (nextpgno != P_NONE) {
12246134Smao 		if (_bt_getpage(t, nextpgno) == RET_ERROR)
12346134Smao 			return (RET_ERROR);
12446134Smao 		h = t->bt_curpage;
12546134Smao 		h->h_prevpg = right->h_pgno;
12646134Smao 		h->h_flags |= F_DIRTY;
12746134Smao 	}
12846134Smao 
12946134Smao 	if ((parent = _bt_pop(t)) != P_NONE) {
13046134Smao 		if (right->h_flags & F_LEAF) {
13146134Smao 			d = (DATUM *) GETDATUM(right, 0);
13246134Smao 			len = d->d_ksize;
13346134Smao 			if (d->d_flags & D_BIGKEY) {
13446134Smao 				bcopy(&(d->d_bytes[0]),
13546134Smao 				      (char *) &oldchain,
13646134Smao 				      sizeof(oldchain));
13746134Smao 				if (_bt_markchain(t, oldchain) == RET_ERROR)
13846134Smao 					return (RET_ERROR);
13946134Smao 				src = (char *) &oldchain;
14046134Smao 				flags = D_BIGKEY;
14146134Smao 			} else {
14246134Smao 				src = (char *) &(d->d_bytes[0]);
14346134Smao 				flags = 0;
14446134Smao 			}
14546134Smao 		} else {
14646134Smao 			id = (IDATUM *) GETDATUM(right, 0);
14746134Smao 			len = id->i_size;
14846134Smao 			flags = id->i_flags;
14946134Smao 			src = (char *) &(id->i_bytes[0]);
15046134Smao 		}
15146134Smao 		nbytes = len + (sizeof(IDATUM) - sizeof(char));
15246134Smao 		new = (IDATUM *) malloc((unsigned) nbytes);
15346134Smao 		if (new == (IDATUM *) NULL)
15446134Smao 			return (RET_ERROR);
15546134Smao 		new->i_size = len;
15646134Smao 		new->i_pgno = right->h_pgno;
15746134Smao 		new->i_flags = flags;
15846134Smao 		if (len > 0)
15946134Smao 			(void) bcopy(src, (char *) &(new->i_bytes[0]), len);
16046134Smao 
16146134Smao 		nbytes = LONGALIGN(nbytes) + sizeof(index_t);
16246134Smao 		if (_bt_getpage(t, parent) == RET_ERROR)
16346134Smao 			return (RET_ERROR);
16446134Smao 
16546134Smao 		h = t->bt_curpage;
16646134Smao 
16746134Smao 		/*
16846134Smao 		 *  Split the parent if we need to, then reposition the
16946134Smao 		 *  tree's current page pointer for the new datum.
17046134Smao 		 */
17146134Smao 		if ((h->h_upper - h->h_lower) < nbytes) {
17246134Smao 			if (_bt_split(t) == RET_ERROR)
17346134Smao 				return (RET_ERROR);
17446134Smao 			if (_bt_reposition(t, new, parent, right->h_prevpg)
17546134Smao 			      == RET_ERROR)
17646134Smao 				return (RET_ERROR);
17746134Smao 		}
17846134Smao 
17946134Smao 		/* okay, now insert the new idatum */
18046134Smao 		if (_bt_inserti(t, new, right->h_prevpg) == RET_ERROR)
18146134Smao 			return (RET_ERROR);
18246134Smao 	}
18346134Smao 
18446134Smao 	/*
18546134Smao 	 *  Okay, split is done; don't need right page stapled down anymore.
18646134Smao 	 *  The page we call 'left' above is the new version of the old
18746134Smao 	 *  (split) page, so we can't release it.
18846134Smao 	 */
18946134Smao 
19046134Smao 	if (_bt_release(t, right) == RET_ERROR)
19146134Smao 		return (RET_ERROR);
19246134Smao 	if (ISDISK(t) && !ISCACHE(t))
19346134Smao 		(void) free((char *) right);
19446134Smao 
19546134Smao 	return (RET_SUCCESS);
19646134Smao }
19746134Smao 
19846134Smao /*
19946134Smao  *  _BT_REPOSITION -- Reposition the current page pointer of a btree.
20046134Smao  *
20146134Smao  *	After splitting a node in the tree in order to make room for
20246134Smao  *	an insertion, we need to figure out which page after the split
20346134Smao  *	should get the item we want to insert.  This routine positions
20446134Smao  *	the tree's current page pointer appropriately.
20546134Smao  *
20646134Smao  *	Parameters:
20746134Smao  *		t -- tree to position
20846134Smao  *		new -- the item we want to insert
20946134Smao  *		parent -- parent of the node that we just split
21046134Smao  *		prev -- page number of item directly to the left of
21146134Smao  *			new's position in the tree.
21246134Smao  *
21346134Smao  *	Returns:
21446134Smao  *		RET_SUCCESS, RET_ERROR.
21546134Smao  *
21646134Smao  *	Side Effects:
21746134Smao  *		None.
21846134Smao  */
21946134Smao 
22046134Smao int
22146134Smao _bt_reposition(t, new, parent, prev)
22246134Smao 	BTREE_P t;
22346134Smao 	IDATUM *new;
22446134Smao 	pgno_t parent;
22546134Smao 	pgno_t prev;
22646134Smao {
22746134Smao 	index_t i, next;
22846134Smao 	IDATUM *idx;
22946134Smao 
23046134Smao 	if (parent == P_ROOT) {
23146134Smao 
23246134Smao 		/*
23346134Smao 		 *  If we just split the root page, then there are guaranteed
23446134Smao 		 *  to be exactly two IDATUMs on it.  Look at both of them
23546134Smao 		 *  to see if they point to the page that we want.
23646134Smao 		 */
23746134Smao 
23846134Smao 		if (_bt_getpage(t, parent) == RET_ERROR)
23946134Smao 			return (RET_ERROR);
24046134Smao 
24146134Smao 		next = NEXTINDEX(t->bt_curpage);
24246134Smao 		for (i = 0; i < next; i++) {
24346134Smao 			idx = (IDATUM *) GETDATUM(t->bt_curpage, i);
24446134Smao 			if (_bt_getpage(t, idx->i_pgno) == RET_ERROR)
24546134Smao 				return (RET_ERROR);
24646134Smao 			if (_bt_isonpage(t, new, prev) == RET_SUCCESS)
24746134Smao 				return (RET_SUCCESS);
24846134Smao 			if (_bt_getpage(t, parent) == RET_ERROR)
24946134Smao 				return (RET_ERROR);
25046134Smao 		}
25146134Smao 	} else {
25246134Smao 
25346134Smao 		/*
25446134Smao 		 *  Get the parent page -- which is where the new item would
25546134Smao 		 *  have gone -- and figure out whether the new item now goes
25646134Smao 		 *  on the parent, or the page immediately to the right of
25746134Smao 		 *  the parent.
25846134Smao 		 */
25946134Smao 
26046134Smao 		if (_bt_getpage(t, parent) == RET_ERROR)
26146134Smao 			return (RET_ERROR);
26246134Smao 		if (_bt_isonpage(t, new, prev) == RET_SUCCESS)
26346134Smao 			return (RET_SUCCESS);
26446134Smao 		if (_bt_getpage(t, t->bt_curpage->h_nextpg) == RET_ERROR)
26546134Smao 			return (RET_ERROR);
26646134Smao 		if (_bt_isonpage(t, new, prev) == RET_SUCCESS)
26746134Smao 			return (RET_SUCCESS);
26846134Smao 	}
26946134Smao 	return (RET_ERROR);
27046134Smao }
27146134Smao 
27246134Smao /*
27346134Smao  *  _BT_ISONPAGE -- Is the IDATUM for a given page number on the current page?
27446134Smao  *
27546134Smao  *	This routine is used by _bt_reposition to decide whether the current
27646134Smao  *	page is the correct one on which to insert a new item.
27746134Smao  *
27846134Smao  *	Parameters:
27946134Smao  *		t -- tree to check
28046134Smao  *		new -- the item that will be inserted (used for binary search)
28146134Smao  *		prev -- page number of page whose IDATUM is immediately to
28246134Smao  *			the left of new's position in the tree.
28346134Smao  *
28446134Smao  *	Returns:
28546134Smao  *		RET_SUCCESS, RET_ERROR (corresponding to TRUE, FALSE).
28646134Smao  */
28746134Smao 
28846134Smao int
28946134Smao _bt_isonpage(t, new, prev)
29046134Smao 	BTREE_P t;
29146134Smao 	IDATUM *new;
29246134Smao 	pgno_t prev;
29346134Smao {
29446134Smao 	BTHEADER *h = (BTHEADER *) t->bt_curpage;
29546134Smao 	index_t i, next;
29646134Smao 	IDATUM *idx;
29746134Smao 
29846134Smao 	i = _bt_binsrch(t, &(new->i_bytes[0]));
29946134Smao 	while (i != 0 && _bt_cmp(t, &(new->i_bytes[0]), i) == 0)
30046134Smao 		--i;
30146134Smao 	next = NEXTINDEX(h);
30246134Smao 	idx = (IDATUM *) GETDATUM(h, i);
30346134Smao 	while (i < next && idx->i_pgno != prev) {
30446134Smao 		i++;
30546134Smao 		idx = (IDATUM *) GETDATUM(h, i);
30646134Smao 	}
30746134Smao 	if (idx->i_pgno == prev)
30846134Smao 		return (RET_SUCCESS);
30946134Smao 	else
31046134Smao 		return (RET_ERROR);
31146134Smao }
31246134Smao 
31346134Smao /*
31446134Smao  *  _BT_SPLITROOT -- Split the root of a btree.
31546134Smao  *
31646134Smao  *	The root page for a btree is always page one.  This means that in
31746134Smao  *	order to split the root, we need to do extra work.
31846134Smao  *
31946134Smao  *	Parameters:
32046134Smao  *		t -- tree to split
32146134Smao  *
32246134Smao  *	Returns:
32346134Smao  *		RET_SUCCESS, RET_ERROR.
32446134Smao  *
32546134Smao  *	Side Effects:
32646134Smao  *		Splits root upward in the usual way, adding two new pages
32746134Smao  *		to the tree (rather than just one, as in usual splits).
32846134Smao  */
32946134Smao 
33046134Smao int
33146134Smao _bt_splitroot(t)
33246134Smao 	BTREE_P t;
33346134Smao {
33446134Smao 	BTHEADER *h = t->bt_curpage;
33546134Smao 	BTHEADER *left, *right;
33646134Smao 	IDATUM *id;
33746134Smao 	BTHEADER *where_h;
33846134Smao 	char *src, *dest;
33946134Smao 	int len, nbytes;
34046134Smao 	u_long was_leaf;
34146134Smao 	pgno_t oldchain;
34246134Smao 	u_char flags;
34346134Smao 
34446134Smao 	/* get two new pages for the split */
34546134Smao 	if ((left = _bt_allocpg(t)) == (BTHEADER *) NULL)
34646134Smao 		return (RET_ERROR);
34746134Smao 	left->h_pgno = ++(t->bt_npages);
34846134Smao 	if ((right = _bt_allocpg(t)) == (BTHEADER *) NULL)
34946134Smao 		return (RET_ERROR);
35046134Smao 	right->h_pgno = ++(t->bt_npages);
35146134Smao 
35246134Smao 	/* do the split */
35346134Smao 	if (_bt_dopsplit(t, left, right) == RET_ERROR)
35446134Smao 		return (RET_ERROR);
35546134Smao 
35646134Smao 	/* connect the new pages correctly */
35746134Smao 	right->h_prevpg = left->h_pgno;
35846134Smao 	left->h_nextpg = right->h_pgno;
35946134Smao 
36046134Smao 	/*
36146134Smao 	 *  Write the child pages out now.  We need them to remain
36246134Smao 	 *  where they are until we finish updating parent pages,
36346134Smao 	 *  however.
36446134Smao 	 */
36546134Smao 
36646134Smao 	if (_bt_write(t, left, NORELEASE) == RET_ERROR)
36746134Smao 		return (RET_ERROR);
36846134Smao 	if (_bt_write(t, right, NORELEASE) == RET_ERROR)
36946134Smao 		return (RET_ERROR);
37046134Smao 
37146134Smao 	/* now change the root page into an internal page */
37246134Smao 	was_leaf = (h->h_flags & F_LEAF);
37346134Smao 	h->h_flags &= ~F_LEAF;
37446134Smao 	h->h_lower = (index_t) (((char *) (&(h->h_linp[0]))) - ((char *) h));
37546134Smao 	h->h_upper = (index_t) t->bt_psize;
37646134Smao 	(void) bzero((char *) &(h->h_linp[0]), (int) (h->h_upper - h->h_lower));
37746134Smao 
37846134Smao 	/* put two new keys on root page */
37946134Smao 	where_h = left;
38046134Smao 	while (where_h) {
38146134Smao 		DATUM *data;
38246134Smao 		IDATUM *idata;
38346134Smao 
38446134Smao 		if (was_leaf) {
38546134Smao 			data = (DATUM *) GETDATUM(where_h, 0);
38646134Smao 
38746134Smao 			if (where_h == left) {
38846134Smao 				len = 0;	/* first key in tree is null */
38946134Smao 			} else {
39046134Smao 				if (data->d_flags & D_BIGKEY) {
39146134Smao 					bcopy(&(data->d_bytes[0]),
39246134Smao 					      (char *) &oldchain,
39346134Smao 					      sizeof(oldchain));
39446134Smao 					if (_bt_markchain(t, oldchain) == RET_ERROR)
39546134Smao 						return (RET_ERROR);
39646134Smao 					src = (char *) &oldchain;
39746134Smao 					flags = D_BIGKEY;
39846134Smao 				} else {
39946134Smao 					src = (char *) &(data->d_bytes[0]);
40046134Smao 					flags = 0;
40146134Smao 				}
40246134Smao 				len = data->d_ksize;
40346134Smao 			}
40446134Smao 		} else {
40546134Smao 			idata = (IDATUM *) GETDATUM(where_h, 0);
40646134Smao 			len = idata->i_size;
40746134Smao 			flags = idata->i_flags;
40846134Smao 			src = &(idata->i_bytes[0]);
40946134Smao 		}
41046134Smao 		dest = ((char *) h) + h->h_upper;
41146134Smao 		nbytes = len + (sizeof (IDATUM) - sizeof(char));
41246134Smao 		dest -= LONGALIGN(nbytes);
41346134Smao 		id = (IDATUM *) dest;
41446134Smao 		id->i_size = len;
41546134Smao 		id->i_pgno = where_h->h_pgno;
41646134Smao 		id->i_flags = flags;
41746134Smao 		if (len > 0)
41846134Smao 			(void) bcopy((char *) src, (char *) &(id->i_bytes[0]), len);
41946134Smao 		dest -= ((int) h);
42046134Smao 		h->h_linp[NEXTINDEX(h)] = (index_t) dest;
42146134Smao 		h->h_upper = (index_t) dest;
42246134Smao 		h->h_lower += sizeof(index_t);
42346134Smao 
42446134Smao 		/* next page */
42546134Smao 		if (where_h == left)
42646134Smao 			where_h = right;
42746134Smao 		else
42846134Smao 			where_h = (BTHEADER *) NULL;
42946134Smao 	}
43046134Smao 
43146134Smao 	if (_bt_release(t, left) == RET_ERROR)
43246134Smao 		return (RET_ERROR);
43346134Smao 	if (_bt_release(t, right) == RET_ERROR)
43446134Smao 		return (RET_ERROR);
43546134Smao 
43646134Smao 	/*
43746134Smao 	 *  That's it, split is done.  If we're doing a non-cached disk
43846134Smao 	 *  btree, we can free up the pages we allocated, as they're on
43946134Smao 	 *  disk, now.
44046134Smao 	 */
44146134Smao 
44246134Smao 	if (ISDISK(t) && !ISCACHE(t)) {
44346134Smao 		(void) free ((char *) left);
44446134Smao 		(void) free ((char *) right);
44546134Smao 	}
44646134Smao 
44746134Smao 	h->h_flags |= F_DIRTY;
44846134Smao 
44946134Smao 	return (RET_SUCCESS);
45046134Smao }
45146134Smao 
45246134Smao /*
45346134Smao  *  _BT_DOPSPLIT -- Do the work of splitting a page
45446134Smao  *
45546134Smao  *	This routine takes two page pointers and splits the data on the
45646134Smao  *	current page of the btree between them.
45746134Smao  *
45846134Smao  *	We do a lot of work here to handle duplicate keys on a page; we
45946134Smao  *	have to place these keys carefully to guarantee that later searches
46046134Smao  *	will find them correctly.  See comments in the code below for details.
46146134Smao  *
46246134Smao  *	Parameters:
46346134Smao  *		t -- tree to split
46446134Smao  *		left -- pointer to page to get left half of the data
46546134Smao  *		right -- pointer to page to get right half of the data
46646134Smao  *
46746134Smao  *	Returns:
46846134Smao  *		None.
46946134Smao  */
47046134Smao 
47146134Smao int
47246134Smao _bt_dopsplit(t, left, right)
47346134Smao 	BTREE_P t;
47446134Smao 	BTHEADER *left;
47546134Smao 	BTHEADER *right;
47646134Smao {
47746134Smao 	BTHEADER *h = t->bt_curpage;
47846134Smao 	size_t psize;
47946134Smao 	char *where;
48046134Smao 	BTHEADER *where_h;
48146134Smao 	index_t where_i;
48246134Smao 	int nbytes, dsize, fixedsize, freespc;
48346134Smao 	index_t i;
48446134Smao 	index_t save_lower, save_upper, save_i;
48546134Smao 	index_t switch_i;
48646134Smao 	char *save_key;
48746134Smao 	DATUM *d;
48846134Smao 	CURSOR *c;
48946134Smao 	index_t top;
49046134Smao 	int free_save;
49146134Smao 	pgno_t chain;
49246134Smao 	int ignore;
49346134Smao 
49446134Smao 	/*
49546134Smao 	 *  Our strategy is to put half the bytes on each page.  We figure
49646134Smao 	 *  out how many bytes we have total, and then move items until
49746134Smao 	 *  the last item moved put at least 50% of the data on the left
49846134Smao 	 *  page.
49946134Smao 	 */
50046134Smao 	save_key = (char *) NULL;
50146134Smao 	psize = (int) t->bt_psize;
50246134Smao 	where = ((char *) left) + psize;
50346134Smao 	where_h = left;
50446134Smao 	where_i = 0;
50546134Smao 	nbytes = psize - (int) ((char *) &(h->h_linp[0]) - ((char *) h));
50646134Smao 	freespc = nbytes;
50746134Smao 
50846134Smao 	top = NEXTINDEX(h);
50946134Smao 	if (h->h_flags & F_LEAF)
51046134Smao 		fixedsize = (sizeof(DATUM) - sizeof(char));
51146134Smao 	else
51246134Smao 		fixedsize = (sizeof(IDATUM) - sizeof(char));
51346134Smao 
51446134Smao 	save_key = (char *) NULL;
51546134Smao 
51646134Smao 	/* move data */
51746134Smao 	for (i = 0; i < top; i++) {
51846134Smao 
51946134Smao 		/*
52046134Smao 		 *  Internal and leaf pages have different layouts for
52146134Smao 		 *  data items, but in both cases the first entry in the
52246134Smao 		 *  data item is a size_t.
52346134Smao 		 */
52446134Smao 		d = (DATUM *) GETDATUM(h,i);
52546134Smao 		if (h->h_flags & F_LEAF) {
52646134Smao 			dsize = d->d_ksize + d->d_dsize + fixedsize;
52746134Smao 		} else {
52846134Smao 			dsize = d->d_ksize + fixedsize;
52946134Smao 		}
53046134Smao 
53146134Smao 		/*
53246134Smao 		 *  If a page contains duplicate keys, we have to be
53346134Smao 		 *  careful about splits.  A sequence of duplicate keys
53446134Smao 		 *  may not begin in the middle of one page and end in
53546134Smao 		 *  the middle of another; it must begin on a page boundary,
53646134Smao 		 *  in order for searches on the internal nodes to work
53746134Smao 		 *  correctly.
53846134Smao 		 */
53946134Smao 		if (where_h == left) {
54046134Smao 			if (save_key == (char *) NULL) {
54146134Smao 				if (h->h_flags & F_LEAF) {
54246134Smao 					if (d->d_flags & D_BIGKEY) {
54346134Smao 						free_save = TRUE;
54446134Smao 						bcopy(&(d->d_bytes[0]),
54546134Smao 						     (char *) &chain,
54646134Smao 						     sizeof(chain));
54746134Smao 						if (_bt_getbig(t, chain,
54846134Smao 							       &save_key,
54946134Smao 							       &ignore)
55046134Smao 						    == RET_ERROR)
55146134Smao 							return (RET_ERROR);
55246134Smao 					} else {
55346134Smao 						free_save = FALSE;
55446134Smao 						save_key = (char *) &(d->d_bytes[0]);
55546134Smao 					}
55646134Smao 				} else {
55746134Smao 					IDATUM *id = (IDATUM *) d;
55846134Smao 
55946134Smao 					if (id->i_flags & D_BIGKEY) {
56046134Smao 						free_save = TRUE;
56146134Smao 						bcopy(&(id->i_bytes[0]),
56246134Smao 						     (char *) &chain,
56346134Smao 						     sizeof(chain));
56446134Smao 						if (_bt_getbig(t, chain,
56546134Smao 							       &save_key,
56646134Smao 							       &ignore)
56746134Smao 						    == RET_ERROR)
56846134Smao 							return (RET_ERROR);
56946134Smao 					} else {
57046134Smao 						free_save = FALSE;
57146134Smao 						save_key = (char *)
57246134Smao 							&(id->i_bytes[0]);
57346134Smao 					}
57446134Smao 				}
57546134Smao 				save_i = 0;
57646134Smao 				save_lower = where_h->h_lower;
57746134Smao 				save_upper = where_h->h_upper;
57846134Smao 			} else {
57946134Smao 				if (_bt_cmp(t, save_key, i) != 0) {
58046134Smao 					save_lower = where_h->h_lower;
58146134Smao 					save_upper = where_h->h_upper;
58246134Smao 					save_i = i;
58346134Smao 				}
58446134Smao 				if (h->h_flags & F_LEAF) {
58546134Smao 					if (free_save)
58646134Smao 						(void) free(save_key);
58746134Smao 					if (d->d_flags & D_BIGKEY) {
58846134Smao 						free_save = TRUE;
58946134Smao 						bcopy(&(d->d_bytes[0]),
59046134Smao 						     (char *) &chain,
59146134Smao 						     sizeof(chain));
59246134Smao 						if (_bt_getbig(t, chain,
59346134Smao 							       &save_key,
59446134Smao 							       &ignore)
59546134Smao 						    == RET_ERROR)
59646134Smao 							return (RET_ERROR);
59746134Smao 					} else {
59846134Smao 						free_save = FALSE;
59946134Smao 						save_key = (char *) &(d->d_bytes[0]);
60046134Smao 					}
60146134Smao 				} else {
60246134Smao 					IDATUM *id = (IDATUM *) d;
60346134Smao 
60446134Smao 					if (id->i_flags & D_BIGKEY) {
60546134Smao 						free_save = TRUE;
60646134Smao 						bcopy(&(id->i_bytes[0]),
60746134Smao 						     (char *) &chain,
60846134Smao 						     sizeof(chain));
60946134Smao 						if (_bt_getbig(t, chain,
61046134Smao 							       &save_key,
61146134Smao 							       &ignore)
61246134Smao 						    == RET_ERROR)
61346134Smao 							return (RET_ERROR);
61446134Smao 					} else {
61546134Smao 						free_save = FALSE;
61646134Smao 						save_key = (char *)
61746134Smao 							&(id->i_bytes[0]);
61846134Smao 					}
61946134Smao 				}
62046134Smao 			}
62146134Smao 		}
62246134Smao 
62346134Smao 		/* copy data and update page state */
62446134Smao 		where -= LONGALIGN(dsize);
62546134Smao 		(void) bcopy((char *) d, (char *) where, dsize);
62646134Smao 		where_h->h_upper = where_h->h_linp[where_i] =
62746134Smao 			(index_t) (where - (int) where_h);
62846134Smao 		where_h->h_lower += sizeof(index_t);
62946134Smao 		where_i++;
63046134Smao 
63146134Smao 		/* if we've moved half, switch to the right-hand page */
63246134Smao 		nbytes -= LONGALIGN(dsize) + sizeof(index_t);
63346134Smao 		if ((freespc - nbytes) > nbytes) {
63446134Smao 			nbytes = 2 * freespc;
63546134Smao 
63646134Smao 			/* identical keys go on the same page */
63746134Smao 			if (save_i > 0) {
63846134Smao 				/* i gets incremented at loop bottom... */
63946134Smao 				i = save_i - 1;
64046134Smao 				where_h->h_lower = save_lower;
64146134Smao 				where_h->h_upper = save_upper;
64246134Smao 			}
64346134Smao 			where = ((char *) right) + psize;
64446134Smao 			where_h = right;
64546134Smao 			switch_i = where_i;
64646134Smao 			where_i = 0;
64746134Smao 		}
64846134Smao 	}
64946134Smao 
65046134Smao 	/*
65146134Smao 	 *  If there was an active scan on the database, and we just
65246134Smao 	 *  split the page that the cursor was on, we may need to
65346134Smao 	 *  adjust the cursor to point to the same entry as before the
65446134Smao 	 *  split.
65546134Smao 	 */
65646134Smao 
65746134Smao 	c = &(t->bt_cursor);
65846134Smao 	if ((t->bt_flags & BTF_SEQINIT)
65946134Smao 	    && (c->c_pgno == h->h_pgno)
66046134Smao 	    && (c->c_index >= switch_i)) {
66146134Smao 		c->c_pgno = where_h->h_pgno;
66246134Smao 		c->c_index -= where_i;
66346134Smao 	}
66446134Smao 	return (RET_SUCCESS);
66546134Smao }
666