xref: /onnv-gate/usr/src/cmd/sendmail/db/btree/bt_put.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*0Sstevel@tonic-gate  */
7*0Sstevel@tonic-gate /*
8*0Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9*0Sstevel@tonic-gate  *	Keith Bostic.  All rights reserved.
10*0Sstevel@tonic-gate  */
11*0Sstevel@tonic-gate /*
12*0Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994, 1995
13*0Sstevel@tonic-gate  *	The Regents of the University of California.  All rights reserved.
14*0Sstevel@tonic-gate  *
15*0Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
16*0Sstevel@tonic-gate  * Mike Olson.
17*0Sstevel@tonic-gate  *
18*0Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
19*0Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
20*0Sstevel@tonic-gate  * are met:
21*0Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
22*0Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
23*0Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
24*0Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
25*0Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
26*0Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
27*0Sstevel@tonic-gate  *    must display the following acknowledgement:
28*0Sstevel@tonic-gate  *	This product includes software developed by the University of
29*0Sstevel@tonic-gate  *	California, Berkeley and its contributors.
30*0Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
31*0Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
32*0Sstevel@tonic-gate  *    without specific prior written permission.
33*0Sstevel@tonic-gate  *
34*0Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35*0Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36*0Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37*0Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38*0Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39*0Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40*0Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41*0Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42*0Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43*0Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44*0Sstevel@tonic-gate  * SUCH DAMAGE.
45*0Sstevel@tonic-gate  */
46*0Sstevel@tonic-gate 
47*0Sstevel@tonic-gate #include "config.h"
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate #ifndef lint
50*0Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_put.c	10.54 (Sleepycat) 12/6/98";
51*0Sstevel@tonic-gate #endif /* not lint */
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
54*0Sstevel@tonic-gate #include <sys/types.h>
55*0Sstevel@tonic-gate 
56*0Sstevel@tonic-gate #include <errno.h>
57*0Sstevel@tonic-gate #include <string.h>
58*0Sstevel@tonic-gate #endif
59*0Sstevel@tonic-gate 
60*0Sstevel@tonic-gate #include "db_int.h"
61*0Sstevel@tonic-gate #include "db_page.h"
62*0Sstevel@tonic-gate #include "btree.h"
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate static int __bam_fixed __P((DBC *, DBT *));
65*0Sstevel@tonic-gate static int __bam_ndup __P((DBC *, PAGE *, u_int32_t));
66*0Sstevel@tonic-gate static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *));
67*0Sstevel@tonic-gate static int __bam_partial __P((DBC *,
68*0Sstevel@tonic-gate     DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t));
69*0Sstevel@tonic-gate static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate /*
72*0Sstevel@tonic-gate  * __bam_iitem --
73*0Sstevel@tonic-gate  *	Insert an item into the tree.
74*0Sstevel@tonic-gate  *
75*0Sstevel@tonic-gate  * PUBLIC: int __bam_iitem __P((DBC *,
76*0Sstevel@tonic-gate  * PUBLIC:    PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
77*0Sstevel@tonic-gate  */
78*0Sstevel@tonic-gate int
__bam_iitem(dbc,hp,indxp,key,data,op,flags)79*0Sstevel@tonic-gate __bam_iitem(dbc, hp, indxp, key, data, op, flags)
80*0Sstevel@tonic-gate 	DBC *dbc;
81*0Sstevel@tonic-gate 	PAGE **hp;
82*0Sstevel@tonic-gate 	db_indx_t *indxp;
83*0Sstevel@tonic-gate 	DBT *key, *data;
84*0Sstevel@tonic-gate 	u_int32_t op, flags;
85*0Sstevel@tonic-gate {
86*0Sstevel@tonic-gate 	BTREE *t;
87*0Sstevel@tonic-gate 	BKEYDATA *bk;
88*0Sstevel@tonic-gate 	DB *dbp;
89*0Sstevel@tonic-gate 	DBT tdbt;
90*0Sstevel@tonic-gate 	PAGE *h;
91*0Sstevel@tonic-gate 	db_indx_t indx, nbytes;
92*0Sstevel@tonic-gate 	u_int32_t data_size, have_bytes, need_bytes, needed;
93*0Sstevel@tonic-gate 	int bigkey, bigdata, dupadjust, replace, ret;
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate 	COMPQUIET(bk, NULL);
96*0Sstevel@tonic-gate 
97*0Sstevel@tonic-gate 	dbp = dbc->dbp;
98*0Sstevel@tonic-gate 	t = dbp->internal;
99*0Sstevel@tonic-gate 	h = *hp;
100*0Sstevel@tonic-gate 	indx = *indxp;
101*0Sstevel@tonic-gate 	dupadjust = replace = 0;
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate 	/*
104*0Sstevel@tonic-gate 	 * If it's a page of duplicates, call the common code to do the work.
105*0Sstevel@tonic-gate 	 *
106*0Sstevel@tonic-gate 	 * !!!
107*0Sstevel@tonic-gate 	 * Here's where the hp and indxp are important.  The duplicate code
108*0Sstevel@tonic-gate 	 * may decide to rework/rearrange the pages and indices we're using,
109*0Sstevel@tonic-gate 	 * so the caller must understand that the page stack may change.
110*0Sstevel@tonic-gate 	 */
111*0Sstevel@tonic-gate 	if (TYPE(h) == P_DUPLICATE) {
112*0Sstevel@tonic-gate 		/* Adjust the index for the new item if it's a DB_AFTER op. */
113*0Sstevel@tonic-gate 		if (op == DB_AFTER)
114*0Sstevel@tonic-gate 			++*indxp;
115*0Sstevel@tonic-gate 
116*0Sstevel@tonic-gate 		/* Remove the current item if it's a DB_CURRENT op. */
117*0Sstevel@tonic-gate 		if (op == DB_CURRENT) {
118*0Sstevel@tonic-gate 			bk = GET_BKEYDATA(*hp, *indxp);
119*0Sstevel@tonic-gate 			switch (B_TYPE(bk->type)) {
120*0Sstevel@tonic-gate 			case B_KEYDATA:
121*0Sstevel@tonic-gate 				nbytes = BKEYDATA_SIZE(bk->len);
122*0Sstevel@tonic-gate 				break;
123*0Sstevel@tonic-gate 			case B_OVERFLOW:
124*0Sstevel@tonic-gate 				nbytes = BOVERFLOW_SIZE;
125*0Sstevel@tonic-gate 				break;
126*0Sstevel@tonic-gate 			default:
127*0Sstevel@tonic-gate 				return (__db_pgfmt(dbp, h->pgno));
128*0Sstevel@tonic-gate 			}
129*0Sstevel@tonic-gate 			if ((ret = __db_ditem(dbc, *hp, *indxp, nbytes)) != 0)
130*0Sstevel@tonic-gate 				return (ret);
131*0Sstevel@tonic-gate 		}
132*0Sstevel@tonic-gate 
133*0Sstevel@tonic-gate 		/* Put the new/replacement item onto the page. */
134*0Sstevel@tonic-gate 		if ((ret = __db_dput(dbc, data, hp, indxp, __bam_new)) != 0)
135*0Sstevel@tonic-gate 			return (ret);
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate 		goto done;
138*0Sstevel@tonic-gate 	}
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate 	/* Handle fixed-length records: build the real record. */
141*0Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->recno->re_len) {
142*0Sstevel@tonic-gate 		tdbt = *data;
143*0Sstevel@tonic-gate 		if ((ret = __bam_fixed(dbc, &tdbt)) != 0)
144*0Sstevel@tonic-gate 			return (ret);
145*0Sstevel@tonic-gate 		data = &tdbt;
146*0Sstevel@tonic-gate 	}
147*0Sstevel@tonic-gate 
148*0Sstevel@tonic-gate 	/*
149*0Sstevel@tonic-gate 	 * Figure out how much space the data will take, including if it's a
150*0Sstevel@tonic-gate 	 * partial record.  If either of the key or data items won't fit on
151*0Sstevel@tonic-gate 	 * a page, we'll have to store them on overflow pages.
152*0Sstevel@tonic-gate 	 */
153*0Sstevel@tonic-gate 	bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize;
154*0Sstevel@tonic-gate 	data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
155*0Sstevel@tonic-gate 	    __bam_partsize(data, h, indx) : data->size;
156*0Sstevel@tonic-gate 	bigdata = data_size > t->bt_ovflsize;
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate 	needed = 0;
159*0Sstevel@tonic-gate 	if (LF_ISSET(BI_NEWKEY)) {
160*0Sstevel@tonic-gate 		/* If BI_NEWKEY is set we're adding a new key and data pair. */
161*0Sstevel@tonic-gate 		if (bigkey)
162*0Sstevel@tonic-gate 			needed += BOVERFLOW_PSIZE;
163*0Sstevel@tonic-gate 		else
164*0Sstevel@tonic-gate 			needed += BKEYDATA_PSIZE(key->size);
165*0Sstevel@tonic-gate 		if (bigdata)
166*0Sstevel@tonic-gate 			needed += BOVERFLOW_PSIZE;
167*0Sstevel@tonic-gate 		else
168*0Sstevel@tonic-gate 			needed += BKEYDATA_PSIZE(data_size);
169*0Sstevel@tonic-gate 	} else {
170*0Sstevel@tonic-gate 		/*
171*0Sstevel@tonic-gate 		 * We're either overwriting the data item of a key/data pair
172*0Sstevel@tonic-gate 		 * or we're adding the data item only, i.e. a new duplicate.
173*0Sstevel@tonic-gate 		 */
174*0Sstevel@tonic-gate 		if (op == DB_CURRENT) {
175*0Sstevel@tonic-gate 			bk = GET_BKEYDATA(h,
176*0Sstevel@tonic-gate 			    indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
177*0Sstevel@tonic-gate 			if (B_TYPE(bk->type) == B_KEYDATA)
178*0Sstevel@tonic-gate 				have_bytes = BKEYDATA_PSIZE(bk->len);
179*0Sstevel@tonic-gate 			else
180*0Sstevel@tonic-gate 				have_bytes = BOVERFLOW_PSIZE;
181*0Sstevel@tonic-gate 			need_bytes = 0;
182*0Sstevel@tonic-gate 		} else {
183*0Sstevel@tonic-gate 			have_bytes = 0;
184*0Sstevel@tonic-gate 			need_bytes = sizeof(db_indx_t);
185*0Sstevel@tonic-gate 		}
186*0Sstevel@tonic-gate 		if (bigdata)
187*0Sstevel@tonic-gate 			need_bytes += BOVERFLOW_PSIZE;
188*0Sstevel@tonic-gate 		else
189*0Sstevel@tonic-gate 			need_bytes += BKEYDATA_PSIZE(data_size);
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate 		if (have_bytes < need_bytes)
192*0Sstevel@tonic-gate 			needed += need_bytes - have_bytes;
193*0Sstevel@tonic-gate 	}
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate 	/*
196*0Sstevel@tonic-gate 	 * If there's not enough room, or the user has put a ceiling on the
197*0Sstevel@tonic-gate 	 * number of keys permitted in the page, split the page.
198*0Sstevel@tonic-gate 	 *
199*0Sstevel@tonic-gate 	 * XXX
200*0Sstevel@tonic-gate 	 * The t->bt_maxkey test here may be insufficient -- do we have to
201*0Sstevel@tonic-gate 	 * check in the btree split code, so we don't undo it there!?!?
202*0Sstevel@tonic-gate 	 */
203*0Sstevel@tonic-gate 	if (P_FREESPACE(h) < needed ||
204*0Sstevel@tonic-gate 	    (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
205*0Sstevel@tonic-gate 		return (DB_NEEDSPLIT);
206*0Sstevel@tonic-gate 
207*0Sstevel@tonic-gate 	/* Handle partial puts: build the real record. */
208*0Sstevel@tonic-gate 	if (F_ISSET(data, DB_DBT_PARTIAL)) {
209*0Sstevel@tonic-gate 		tdbt = *data;
210*0Sstevel@tonic-gate 		if ((ret = __bam_partial(dbc,
211*0Sstevel@tonic-gate 		    &tdbt, h, indx, data_size, flags)) != 0)
212*0Sstevel@tonic-gate 			return (ret);
213*0Sstevel@tonic-gate 		data = &tdbt;
214*0Sstevel@tonic-gate 	}
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate 	/*
217*0Sstevel@tonic-gate 	 * The code breaks it up into six cases:
218*0Sstevel@tonic-gate 	 *
219*0Sstevel@tonic-gate 	 * 1. Append a new key/data pair.
220*0Sstevel@tonic-gate 	 * 2. Insert a new key/data pair.
221*0Sstevel@tonic-gate 	 * 3. Append a new data item (a new duplicate).
222*0Sstevel@tonic-gate 	 * 4. Insert a new data item (a new duplicate).
223*0Sstevel@tonic-gate 	 * 5. Overflow item: delete and re-add the data item.
224*0Sstevel@tonic-gate 	 * 6. Replace the data item.
225*0Sstevel@tonic-gate 	 */
226*0Sstevel@tonic-gate 	if (LF_ISSET(BI_NEWKEY)) {
227*0Sstevel@tonic-gate 		switch (op) {
228*0Sstevel@tonic-gate 		case DB_AFTER:		/* 1. Append a new key/data pair. */
229*0Sstevel@tonic-gate 			indx += 2;
230*0Sstevel@tonic-gate 			*indxp += 2;
231*0Sstevel@tonic-gate 			break;
232*0Sstevel@tonic-gate 		case DB_BEFORE:		/* 2. Insert a new key/data pair. */
233*0Sstevel@tonic-gate 			break;
234*0Sstevel@tonic-gate 		default:
235*0Sstevel@tonic-gate 			return (EINVAL);
236*0Sstevel@tonic-gate 		}
237*0Sstevel@tonic-gate 
238*0Sstevel@tonic-gate 		/* Add the key. */
239*0Sstevel@tonic-gate 		if (bigkey) {
240*0Sstevel@tonic-gate 			if ((ret = __bam_ovput(dbc, h, indx, key)) != 0)
241*0Sstevel@tonic-gate 				return (ret);
242*0Sstevel@tonic-gate 		} else
243*0Sstevel@tonic-gate 			if ((ret = __db_pitem(dbc, h, indx,
244*0Sstevel@tonic-gate 			    BKEYDATA_SIZE(key->size), NULL, key)) != 0)
245*0Sstevel@tonic-gate 				return (ret);
246*0Sstevel@tonic-gate 		++indx;
247*0Sstevel@tonic-gate 	} else {
248*0Sstevel@tonic-gate 		switch (op) {
249*0Sstevel@tonic-gate 		case DB_AFTER:		/* 3. Append a new data item. */
250*0Sstevel@tonic-gate 			if (TYPE(h) == P_LBTREE) {
251*0Sstevel@tonic-gate 				/*
252*0Sstevel@tonic-gate 				 * Adjust the cursor and copy in the key for
253*0Sstevel@tonic-gate 				 * the duplicate.
254*0Sstevel@tonic-gate 				 */
255*0Sstevel@tonic-gate 				if ((ret = __bam_adjindx(dbc,
256*0Sstevel@tonic-gate 				    h, indx + P_INDX, indx, 1)) != 0)
257*0Sstevel@tonic-gate 					return (ret);
258*0Sstevel@tonic-gate 
259*0Sstevel@tonic-gate 				indx += 3;
260*0Sstevel@tonic-gate 				dupadjust = 1;
261*0Sstevel@tonic-gate 
262*0Sstevel@tonic-gate 				*indxp += 2;
263*0Sstevel@tonic-gate 			} else {
264*0Sstevel@tonic-gate 				++indx;
265*0Sstevel@tonic-gate 				__bam_ca_di(dbp, h->pgno, indx, 1);
266*0Sstevel@tonic-gate 
267*0Sstevel@tonic-gate 				*indxp += 1;
268*0Sstevel@tonic-gate 			}
269*0Sstevel@tonic-gate 			break;
270*0Sstevel@tonic-gate 		case DB_BEFORE:		/* 4. Insert a new data item. */
271*0Sstevel@tonic-gate 			if (TYPE(h) == P_LBTREE) {
272*0Sstevel@tonic-gate 				/*
273*0Sstevel@tonic-gate 				 * Adjust the cursor and copy in the key for
274*0Sstevel@tonic-gate 				 * the duplicate.
275*0Sstevel@tonic-gate 				 */
276*0Sstevel@tonic-gate 				if ((ret =
277*0Sstevel@tonic-gate 				    __bam_adjindx(dbc, h, indx, indx, 1)) != 0)
278*0Sstevel@tonic-gate 					return (ret);
279*0Sstevel@tonic-gate 
280*0Sstevel@tonic-gate 				++indx;
281*0Sstevel@tonic-gate 				dupadjust = 1;
282*0Sstevel@tonic-gate 			} else
283*0Sstevel@tonic-gate 				__bam_ca_di(dbp, h->pgno, indx, 1);
284*0Sstevel@tonic-gate 			break;
285*0Sstevel@tonic-gate 		case DB_CURRENT:
286*0Sstevel@tonic-gate 			if (TYPE(h) == P_LBTREE)
287*0Sstevel@tonic-gate 				++indx;
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate 			/*
290*0Sstevel@tonic-gate 			 * 5. Delete/re-add the data item.
291*0Sstevel@tonic-gate 			 *
292*0Sstevel@tonic-gate 			 * If we're dealing with offpage items, we have to
293*0Sstevel@tonic-gate 			 * delete and then re-add the item.
294*0Sstevel@tonic-gate 			 */
295*0Sstevel@tonic-gate 			if (bigdata || B_TYPE(bk->type) != B_KEYDATA) {
296*0Sstevel@tonic-gate 				if ((ret = __bam_ditem(dbc, h, indx)) != 0)
297*0Sstevel@tonic-gate 					return (ret);
298*0Sstevel@tonic-gate 				break;
299*0Sstevel@tonic-gate 			}
300*0Sstevel@tonic-gate 
301*0Sstevel@tonic-gate 			/* 6. Replace the data item. */
302*0Sstevel@tonic-gate 			replace = 1;
303*0Sstevel@tonic-gate 			break;
304*0Sstevel@tonic-gate 		default:
305*0Sstevel@tonic-gate 			return (EINVAL);
306*0Sstevel@tonic-gate 		}
307*0Sstevel@tonic-gate 	}
308*0Sstevel@tonic-gate 
309*0Sstevel@tonic-gate 	/* Add the data. */
310*0Sstevel@tonic-gate 	if (bigdata) {
311*0Sstevel@tonic-gate 		if ((ret = __bam_ovput(dbc, h, indx, data)) != 0)
312*0Sstevel@tonic-gate 			return (ret);
313*0Sstevel@tonic-gate 	} else {
314*0Sstevel@tonic-gate 		BKEYDATA __bk;
315*0Sstevel@tonic-gate 		DBT __hdr;
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate 		if (LF_ISSET(BI_DELETED)) {
318*0Sstevel@tonic-gate 			B_TSET(__bk.type, B_KEYDATA, 1);
319*0Sstevel@tonic-gate 			__bk.len = data->size;
320*0Sstevel@tonic-gate 			__hdr.data = &__bk;
321*0Sstevel@tonic-gate 			__hdr.size = SSZA(BKEYDATA, data);
322*0Sstevel@tonic-gate 			ret = __db_pitem(dbc, h, indx,
323*0Sstevel@tonic-gate 			    BKEYDATA_SIZE(data->size), &__hdr, data);
324*0Sstevel@tonic-gate 		} else if (replace)
325*0Sstevel@tonic-gate 			ret = __bam_ritem(dbc, h, indx, data);
326*0Sstevel@tonic-gate 		else
327*0Sstevel@tonic-gate 			ret = __db_pitem(dbc, h, indx,
328*0Sstevel@tonic-gate 			    BKEYDATA_SIZE(data->size), NULL, data);
329*0Sstevel@tonic-gate 		if (ret != 0)
330*0Sstevel@tonic-gate 			return (ret);
331*0Sstevel@tonic-gate 	}
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate 	if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
334*0Sstevel@tonic-gate 		return (ret);
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate 	/*
337*0Sstevel@tonic-gate 	 * If the page is at least 50% full, and we added a duplicate, see if
338*0Sstevel@tonic-gate 	 * that set of duplicates takes up at least 25% of the space.  If it
339*0Sstevel@tonic-gate 	 * does, move it off onto its own page.
340*0Sstevel@tonic-gate 	 */
341*0Sstevel@tonic-gate 	if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
342*0Sstevel@tonic-gate 		--indx;
343*0Sstevel@tonic-gate 		if ((ret = __bam_ndup(dbc, h, indx)) != 0)
344*0Sstevel@tonic-gate 			return (ret);
345*0Sstevel@tonic-gate 	}
346*0Sstevel@tonic-gate 
347*0Sstevel@tonic-gate 	/*
348*0Sstevel@tonic-gate 	 * If we've changed the record count, update the tree.  Record counts
349*0Sstevel@tonic-gate 	 * need to be updated in recno databases and in btree databases where
350*0Sstevel@tonic-gate 	 * we are supporting records.  In both cases, adjust the count if the
351*0Sstevel@tonic-gate 	 * operation wasn't performed on the current record or when the caller
352*0Sstevel@tonic-gate 	 * overrides and wants the adjustment made regardless.
353*0Sstevel@tonic-gate 	 */
354*0Sstevel@tonic-gate done:	if (LF_ISSET(BI_DOINCR) ||
355*0Sstevel@tonic-gate 	    (op != DB_CURRENT &&
356*0Sstevel@tonic-gate 	    (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO)))
357*0Sstevel@tonic-gate 		if ((ret = __bam_adjust(dbc, 1)) != 0)
358*0Sstevel@tonic-gate 			return (ret);
359*0Sstevel@tonic-gate 
360*0Sstevel@tonic-gate 	/* If we've modified a recno file, set the flag */
361*0Sstevel@tonic-gate 	if (t->recno != NULL)
362*0Sstevel@tonic-gate 		F_SET(t->recno, RECNO_MODIFIED);
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate 	return (ret);
365*0Sstevel@tonic-gate }
366*0Sstevel@tonic-gate 
367*0Sstevel@tonic-gate /*
368*0Sstevel@tonic-gate  * __bam_partsize --
369*0Sstevel@tonic-gate  *	Figure out how much space a partial data item is in total.
370*0Sstevel@tonic-gate  */
371*0Sstevel@tonic-gate static u_int32_t
__bam_partsize(data,h,indx)372*0Sstevel@tonic-gate __bam_partsize(data, h, indx)
373*0Sstevel@tonic-gate 	DBT *data;
374*0Sstevel@tonic-gate 	PAGE *h;
375*0Sstevel@tonic-gate 	u_int32_t indx;
376*0Sstevel@tonic-gate {
377*0Sstevel@tonic-gate 	BKEYDATA *bk;
378*0Sstevel@tonic-gate 	u_int32_t nbytes;
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 	/*
381*0Sstevel@tonic-gate 	 * Figure out how much total space we'll need.  If the record doesn't
382*0Sstevel@tonic-gate 	 * already exist, it's simply the data we're provided.
383*0Sstevel@tonic-gate 	 */
384*0Sstevel@tonic-gate 	if (indx >= NUM_ENT(h))
385*0Sstevel@tonic-gate 		return (data->doff + data->size);
386*0Sstevel@tonic-gate 
387*0Sstevel@tonic-gate 	/*
388*0Sstevel@tonic-gate 	 * Otherwise, it's the data provided plus any already existing data
389*0Sstevel@tonic-gate 	 * that we're not replacing.
390*0Sstevel@tonic-gate 	 */
391*0Sstevel@tonic-gate 	bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
392*0Sstevel@tonic-gate 	nbytes =
393*0Sstevel@tonic-gate 	    B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
394*0Sstevel@tonic-gate 
395*0Sstevel@tonic-gate 	/*
396*0Sstevel@tonic-gate 	 * There are really two cases here:
397*0Sstevel@tonic-gate 	 *
398*0Sstevel@tonic-gate 	 * Case 1: We are replacing some bytes that do not exist (i.e., they
399*0Sstevel@tonic-gate 	 * are past the end of the record).  In this case the number of bytes
400*0Sstevel@tonic-gate 	 * we are replacing is irrelevant and all we care about is how many
401*0Sstevel@tonic-gate 	 * bytes we are going to add from offset.  So, the new record length
402*0Sstevel@tonic-gate 	 * is going to be the size of the new bytes (size) plus wherever those
403*0Sstevel@tonic-gate 	 * new bytes begin (doff).
404*0Sstevel@tonic-gate 	 *
405*0Sstevel@tonic-gate 	 * Case 2: All the bytes we are replacing exist.  Therefore, the new
406*0Sstevel@tonic-gate 	 * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
407*0Sstevel@tonic-gate 	 * plus the bytes we are adding (size).
408*0Sstevel@tonic-gate 	 */
409*0Sstevel@tonic-gate 	if (nbytes < data->doff + data->dlen)		/* Case 1 */
410*0Sstevel@tonic-gate 		return (data->doff + data->size);
411*0Sstevel@tonic-gate 
412*0Sstevel@tonic-gate 	return (nbytes + data->size - data->dlen);	/* Case 2 */
413*0Sstevel@tonic-gate }
414*0Sstevel@tonic-gate 
415*0Sstevel@tonic-gate /*
416*0Sstevel@tonic-gate  * OVPUT --
417*0Sstevel@tonic-gate  *	Copy an overflow item onto a page.
418*0Sstevel@tonic-gate  */
419*0Sstevel@tonic-gate #undef	OVPUT
420*0Sstevel@tonic-gate #define	OVPUT(h, indx, bo) do {						\
421*0Sstevel@tonic-gate 	DBT __hdr;							\
422*0Sstevel@tonic-gate 	memset(&__hdr, 0, sizeof(__hdr));				\
423*0Sstevel@tonic-gate 	__hdr.data = &bo;						\
424*0Sstevel@tonic-gate 	__hdr.size = BOVERFLOW_SIZE;					\
425*0Sstevel@tonic-gate 	if ((ret = __db_pitem(dbc,					\
426*0Sstevel@tonic-gate 	    h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0)		\
427*0Sstevel@tonic-gate 		return (ret);						\
428*0Sstevel@tonic-gate } while (0)
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate /*
431*0Sstevel@tonic-gate  * __bam_ovput --
432*0Sstevel@tonic-gate  *	Build an overflow item and put it on the page.
433*0Sstevel@tonic-gate  */
434*0Sstevel@tonic-gate static int
__bam_ovput(dbc,h,indx,item)435*0Sstevel@tonic-gate __bam_ovput(dbc, h, indx, item)
436*0Sstevel@tonic-gate 	DBC *dbc;
437*0Sstevel@tonic-gate 	PAGE *h;
438*0Sstevel@tonic-gate 	u_int32_t indx;
439*0Sstevel@tonic-gate 	DBT *item;
440*0Sstevel@tonic-gate {
441*0Sstevel@tonic-gate 	BOVERFLOW bo;
442*0Sstevel@tonic-gate 	int ret;
443*0Sstevel@tonic-gate 
444*0Sstevel@tonic-gate 	UMRW(bo.unused1);
445*0Sstevel@tonic-gate 	B_TSET(bo.type, B_OVERFLOW, 0);
446*0Sstevel@tonic-gate 	UMRW(bo.unused2);
447*0Sstevel@tonic-gate 	if ((ret = __db_poff(dbc, item, &bo.pgno, __bam_new)) != 0)
448*0Sstevel@tonic-gate 		return (ret);
449*0Sstevel@tonic-gate 	bo.tlen = item->size;
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 	OVPUT(h, indx, bo);
452*0Sstevel@tonic-gate 
453*0Sstevel@tonic-gate 	return (0);
454*0Sstevel@tonic-gate }
455*0Sstevel@tonic-gate 
456*0Sstevel@tonic-gate /*
457*0Sstevel@tonic-gate  * __bam_ritem --
458*0Sstevel@tonic-gate  *	Replace an item on a page.
459*0Sstevel@tonic-gate  *
460*0Sstevel@tonic-gate  * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *));
461*0Sstevel@tonic-gate  */
462*0Sstevel@tonic-gate int
__bam_ritem(dbc,h,indx,data)463*0Sstevel@tonic-gate __bam_ritem(dbc, h, indx, data)
464*0Sstevel@tonic-gate 	DBC *dbc;
465*0Sstevel@tonic-gate 	PAGE *h;
466*0Sstevel@tonic-gate 	u_int32_t indx;
467*0Sstevel@tonic-gate 	DBT *data;
468*0Sstevel@tonic-gate {
469*0Sstevel@tonic-gate 	BKEYDATA *bk;
470*0Sstevel@tonic-gate 	DB *dbp;
471*0Sstevel@tonic-gate 	DBT orig, repl;
472*0Sstevel@tonic-gate 	db_indx_t cnt, lo, ln, min, off, prefix, suffix;
473*0Sstevel@tonic-gate 	int32_t nbytes;
474*0Sstevel@tonic-gate 	int ret;
475*0Sstevel@tonic-gate 	u_int8_t *p, *t;
476*0Sstevel@tonic-gate 
477*0Sstevel@tonic-gate 	dbp = dbc->dbp;
478*0Sstevel@tonic-gate 
479*0Sstevel@tonic-gate 	/*
480*0Sstevel@tonic-gate 	 * Replace a single item onto a page.  The logic figuring out where
481*0Sstevel@tonic-gate 	 * to insert and whether it fits is handled in the caller.  All we do
482*0Sstevel@tonic-gate 	 * here is manage the page shuffling.
483*0Sstevel@tonic-gate 	 */
484*0Sstevel@tonic-gate 	bk = GET_BKEYDATA(h, indx);
485*0Sstevel@tonic-gate 
486*0Sstevel@tonic-gate 	/* Log the change. */
487*0Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
488*0Sstevel@tonic-gate 		/*
489*0Sstevel@tonic-gate 		 * We might as well check to see if the two data items share
490*0Sstevel@tonic-gate 		 * a common prefix and suffix -- it can save us a lot of log
491*0Sstevel@tonic-gate 		 * message if they're large.
492*0Sstevel@tonic-gate 		 */
493*0Sstevel@tonic-gate 		min = data->size < bk->len ? data->size : bk->len;
494*0Sstevel@tonic-gate 		for (prefix = 0,
495*0Sstevel@tonic-gate 		    p = bk->data, t = data->data;
496*0Sstevel@tonic-gate 		    prefix < min && *p == *t; ++prefix, ++p, ++t)
497*0Sstevel@tonic-gate 			;
498*0Sstevel@tonic-gate 
499*0Sstevel@tonic-gate 		min -= prefix;
500*0Sstevel@tonic-gate 		for (suffix = 0,
501*0Sstevel@tonic-gate 		    p = (u_int8_t *)bk->data + bk->len - 1,
502*0Sstevel@tonic-gate 		    t = (u_int8_t *)data->data + data->size - 1;
503*0Sstevel@tonic-gate 		    suffix < min && *p == *t; ++suffix, --p, --t)
504*0Sstevel@tonic-gate 			;
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate 		/* We only log the parts of the keys that have changed. */
507*0Sstevel@tonic-gate 		orig.data = (u_int8_t *)bk->data + prefix;
508*0Sstevel@tonic-gate 		orig.size = bk->len - (prefix + suffix);
509*0Sstevel@tonic-gate 		repl.data = (u_int8_t *)data->data + prefix;
510*0Sstevel@tonic-gate 		repl.size = data->size - (prefix + suffix);
511*0Sstevel@tonic-gate 		if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbc->txn,
512*0Sstevel@tonic-gate 		    &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
513*0Sstevel@tonic-gate 		    (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
514*0Sstevel@tonic-gate 		    &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
515*0Sstevel@tonic-gate 			return (ret);
516*0Sstevel@tonic-gate 	}
517*0Sstevel@tonic-gate 
518*0Sstevel@tonic-gate 	/*
519*0Sstevel@tonic-gate 	 * Set references to the first in-use byte on the page and the
520*0Sstevel@tonic-gate 	 * first byte of the item being replaced.
521*0Sstevel@tonic-gate 	 */
522*0Sstevel@tonic-gate 	p = (u_int8_t *)h + HOFFSET(h);
523*0Sstevel@tonic-gate 	t = (u_int8_t *)bk;
524*0Sstevel@tonic-gate 
525*0Sstevel@tonic-gate 	/*
526*0Sstevel@tonic-gate 	 * If the entry is growing in size, shift the beginning of the data
527*0Sstevel@tonic-gate 	 * part of the page down.  If the entry is shrinking in size, shift
528*0Sstevel@tonic-gate 	 * the beginning of the data part of the page up.  Use memmove(3),
529*0Sstevel@tonic-gate 	 * the regions overlap.
530*0Sstevel@tonic-gate 	 */
531*0Sstevel@tonic-gate 	lo = BKEYDATA_SIZE(bk->len);
532*0Sstevel@tonic-gate 	ln = BKEYDATA_SIZE(data->size);
533*0Sstevel@tonic-gate 	if (lo != ln) {
534*0Sstevel@tonic-gate 		nbytes = lo - ln;		/* Signed difference. */
535*0Sstevel@tonic-gate 		if (p == t)			/* First index is fast. */
536*0Sstevel@tonic-gate 			h->inp[indx] += nbytes;
537*0Sstevel@tonic-gate 		else {				/* Else, shift the page. */
538*0Sstevel@tonic-gate 			memmove(p + nbytes, p, t - p);
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate 			/* Adjust the indices' offsets. */
541*0Sstevel@tonic-gate 			off = h->inp[indx];
542*0Sstevel@tonic-gate 			for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
543*0Sstevel@tonic-gate 				if (h->inp[cnt] <= off)
544*0Sstevel@tonic-gate 					h->inp[cnt] += nbytes;
545*0Sstevel@tonic-gate 		}
546*0Sstevel@tonic-gate 
547*0Sstevel@tonic-gate 		/* Clean up the page and adjust the item's reference. */
548*0Sstevel@tonic-gate 		HOFFSET(h) += nbytes;
549*0Sstevel@tonic-gate 		t += nbytes;
550*0Sstevel@tonic-gate 	}
551*0Sstevel@tonic-gate 
552*0Sstevel@tonic-gate 	/* Copy the new item onto the page. */
553*0Sstevel@tonic-gate 	bk = (BKEYDATA *)t;
554*0Sstevel@tonic-gate 	B_TSET(bk->type, B_KEYDATA, 0);
555*0Sstevel@tonic-gate 	bk->len = data->size;
556*0Sstevel@tonic-gate 	memcpy(bk->data, data->data, data->size);
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 	return (0);
559*0Sstevel@tonic-gate }
560*0Sstevel@tonic-gate 
561*0Sstevel@tonic-gate /*
562*0Sstevel@tonic-gate  * __bam_ndup --
563*0Sstevel@tonic-gate  *	Check to see if the duplicate set at indx should have its own page.
564*0Sstevel@tonic-gate  *	If it should, create it.
565*0Sstevel@tonic-gate  */
566*0Sstevel@tonic-gate static int
__bam_ndup(dbc,h,indx)567*0Sstevel@tonic-gate __bam_ndup(dbc, h, indx)
568*0Sstevel@tonic-gate 	DBC *dbc;
569*0Sstevel@tonic-gate 	PAGE *h;
570*0Sstevel@tonic-gate 	u_int32_t indx;
571*0Sstevel@tonic-gate {
572*0Sstevel@tonic-gate 	BKEYDATA *bk;
573*0Sstevel@tonic-gate 	BOVERFLOW bo;
574*0Sstevel@tonic-gate 	DB *dbp;
575*0Sstevel@tonic-gate 	DBT hdr;
576*0Sstevel@tonic-gate 	PAGE *cp;
577*0Sstevel@tonic-gate 	db_indx_t cnt, cpindx, first, sz;
578*0Sstevel@tonic-gate 	int ret;
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 	dbp = dbc->dbp;
581*0Sstevel@tonic-gate 
582*0Sstevel@tonic-gate 	while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
583*0Sstevel@tonic-gate 		indx -= P_INDX;
584*0Sstevel@tonic-gate 	for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
585*0Sstevel@tonic-gate 		if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
586*0Sstevel@tonic-gate 			break;
587*0Sstevel@tonic-gate 		bk = GET_BKEYDATA(h, indx);
588*0Sstevel@tonic-gate 		sz += B_TYPE(bk->type) == B_KEYDATA ?
589*0Sstevel@tonic-gate 		    BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
590*0Sstevel@tonic-gate 		bk = GET_BKEYDATA(h, indx + O_INDX);
591*0Sstevel@tonic-gate 		sz += B_TYPE(bk->type) == B_KEYDATA ?
592*0Sstevel@tonic-gate 		    BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
593*0Sstevel@tonic-gate 	}
594*0Sstevel@tonic-gate 
595*0Sstevel@tonic-gate 	/*
596*0Sstevel@tonic-gate 	 * If this set of duplicates is using more than 25% of the page, move
597*0Sstevel@tonic-gate 	 * them off.  The choice of 25% is a WAG, but it has to be small enough
598*0Sstevel@tonic-gate 	 * that we can always split regardless of the presence of duplicates.
599*0Sstevel@tonic-gate 	 */
600*0Sstevel@tonic-gate 	if (sz < dbp->pgsize / 4)
601*0Sstevel@tonic-gate 		return (0);
602*0Sstevel@tonic-gate 
603*0Sstevel@tonic-gate 	/* Get a new page. */
604*0Sstevel@tonic-gate 	if ((ret = __bam_new(dbc, P_DUPLICATE, &cp)) != 0)
605*0Sstevel@tonic-gate 		return (ret);
606*0Sstevel@tonic-gate 
607*0Sstevel@tonic-gate 	/*
608*0Sstevel@tonic-gate 	 * Move this set of duplicates off the page.  First points to the first
609*0Sstevel@tonic-gate 	 * key of the first duplicate key/data pair, cnt is the number of pairs
610*0Sstevel@tonic-gate 	 * we're dealing with.
611*0Sstevel@tonic-gate 	 */
612*0Sstevel@tonic-gate 	memset(&hdr, 0, sizeof(hdr));
613*0Sstevel@tonic-gate 	for (indx = first + O_INDX, cpindx = 0;; ++cpindx) {
614*0Sstevel@tonic-gate 		/* Copy the entry to the new page. */
615*0Sstevel@tonic-gate 		bk = GET_BKEYDATA(h, indx);
616*0Sstevel@tonic-gate 		hdr.data = bk;
617*0Sstevel@tonic-gate 		hdr.size = B_TYPE(bk->type) == B_KEYDATA ?
618*0Sstevel@tonic-gate 		    BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
619*0Sstevel@tonic-gate 		if ((ret =
620*0Sstevel@tonic-gate 		    __db_pitem(dbc, cp, cpindx, hdr.size, &hdr, NULL)) != 0)
621*0Sstevel@tonic-gate 			goto err;
622*0Sstevel@tonic-gate 
623*0Sstevel@tonic-gate 		/*
624*0Sstevel@tonic-gate 		 * Move cursors referencing the old entry to the new entry.
625*0Sstevel@tonic-gate 		 * Done after the page put because __db_pitem() adjusts
626*0Sstevel@tonic-gate 		 * cursors on the new page, and before the delete because
627*0Sstevel@tonic-gate 		 * __db_ditem adjusts cursors on the old page.
628*0Sstevel@tonic-gate 		 */
629*0Sstevel@tonic-gate 		__bam_ca_dup(dbp,
630*0Sstevel@tonic-gate 		    PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx);
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate 		/* Delete the data item. */
633*0Sstevel@tonic-gate 		if ((ret = __db_ditem(dbc, h, indx, hdr.size)) != 0)
634*0Sstevel@tonic-gate 			goto err;
635*0Sstevel@tonic-gate 
636*0Sstevel@tonic-gate 		/* Delete all but the first reference to the key. */
637*0Sstevel@tonic-gate 		if (--cnt == 0)
638*0Sstevel@tonic-gate 			break;
639*0Sstevel@tonic-gate 		if ((ret = __bam_adjindx(dbc, h, indx, first, 0)) != 0)
640*0Sstevel@tonic-gate 			goto err;
641*0Sstevel@tonic-gate 	}
642*0Sstevel@tonic-gate 
643*0Sstevel@tonic-gate 	/* Put in a new data item that points to the duplicates page. */
644*0Sstevel@tonic-gate 	UMRW(bo.unused1);
645*0Sstevel@tonic-gate 	B_TSET(bo.type, B_DUPLICATE, 0);
646*0Sstevel@tonic-gate 	UMRW(bo.unused2);
647*0Sstevel@tonic-gate 	bo.pgno = cp->pgno;
648*0Sstevel@tonic-gate 	bo.tlen = 0;
649*0Sstevel@tonic-gate 
650*0Sstevel@tonic-gate 	OVPUT(h, indx, bo);
651*0Sstevel@tonic-gate 
652*0Sstevel@tonic-gate 	return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY));
653*0Sstevel@tonic-gate 
654*0Sstevel@tonic-gate err:	(void)__bam_free(dbc, cp);
655*0Sstevel@tonic-gate 	return (ret);
656*0Sstevel@tonic-gate }
657*0Sstevel@tonic-gate 
658*0Sstevel@tonic-gate /*
659*0Sstevel@tonic-gate  * __bam_fixed --
660*0Sstevel@tonic-gate  *	Build the real record for a fixed length put.
661*0Sstevel@tonic-gate  */
662*0Sstevel@tonic-gate static int
__bam_fixed(dbc,dbt)663*0Sstevel@tonic-gate __bam_fixed(dbc, dbt)
664*0Sstevel@tonic-gate 	DBC *dbc;
665*0Sstevel@tonic-gate 	DBT *dbt;
666*0Sstevel@tonic-gate {
667*0Sstevel@tonic-gate 	DB *dbp;
668*0Sstevel@tonic-gate 	RECNO *rp;
669*0Sstevel@tonic-gate 	int ret;
670*0Sstevel@tonic-gate 
671*0Sstevel@tonic-gate 	dbp = dbc->dbp;
672*0Sstevel@tonic-gate 	rp = ((BTREE *)dbp->internal)->recno;
673*0Sstevel@tonic-gate 
674*0Sstevel@tonic-gate 	/*
675*0Sstevel@tonic-gate 	 * If database contains fixed-length records, and the record is long,
676*0Sstevel@tonic-gate 	 * return EINVAL.
677*0Sstevel@tonic-gate 	 */
678*0Sstevel@tonic-gate 	if (dbt->size > rp->re_len)
679*0Sstevel@tonic-gate 		return (EINVAL);
680*0Sstevel@tonic-gate 
681*0Sstevel@tonic-gate 	/*
682*0Sstevel@tonic-gate 	 * The caller checked to see if it was just right, so we know it's
683*0Sstevel@tonic-gate 	 * short.  Pad it out.  We use the record data return memory, it's
684*0Sstevel@tonic-gate 	 * only a short-term use.
685*0Sstevel@tonic-gate 	 */
686*0Sstevel@tonic-gate 	if (dbc->rdata.ulen < rp->re_len) {
687*0Sstevel@tonic-gate 		 if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
688*0Sstevel@tonic-gate 			dbc->rdata.ulen = 0;
689*0Sstevel@tonic-gate 			dbc->rdata.data = NULL;
690*0Sstevel@tonic-gate 			return (ret);
691*0Sstevel@tonic-gate 		}
692*0Sstevel@tonic-gate 		dbc->rdata.ulen = rp->re_len;
693*0Sstevel@tonic-gate 	}
694*0Sstevel@tonic-gate 	memcpy(dbc->rdata.data, dbt->data, dbt->size);
695*0Sstevel@tonic-gate 	memset((u_int8_t *)dbc->rdata.data + dbt->size,
696*0Sstevel@tonic-gate 	    rp->re_pad, rp->re_len - dbt->size);
697*0Sstevel@tonic-gate 
698*0Sstevel@tonic-gate 	/*
699*0Sstevel@tonic-gate 	 * Clean up our flags and other information just in case, and
700*0Sstevel@tonic-gate 	 * change the caller's DBT to reference our created record.
701*0Sstevel@tonic-gate 	 */
702*0Sstevel@tonic-gate 	dbc->rdata.size = rp->re_len;
703*0Sstevel@tonic-gate 	dbc->rdata.dlen = 0;
704*0Sstevel@tonic-gate 	dbc->rdata.doff = 0;
705*0Sstevel@tonic-gate 	dbc->rdata.flags = 0;
706*0Sstevel@tonic-gate 	*dbt = dbc->rdata;
707*0Sstevel@tonic-gate 
708*0Sstevel@tonic-gate 	return (0);
709*0Sstevel@tonic-gate }
710*0Sstevel@tonic-gate 
711*0Sstevel@tonic-gate /*
712*0Sstevel@tonic-gate  * __bam_partial --
713*0Sstevel@tonic-gate  *	Build the real record for a partial put.
714*0Sstevel@tonic-gate  */
715*0Sstevel@tonic-gate static int
__bam_partial(dbc,dbt,h,indx,nbytes,flags)716*0Sstevel@tonic-gate __bam_partial(dbc, dbt, h, indx, nbytes, flags)
717*0Sstevel@tonic-gate 	DBC *dbc;
718*0Sstevel@tonic-gate 	DBT *dbt;
719*0Sstevel@tonic-gate 	PAGE *h;
720*0Sstevel@tonic-gate 	u_int32_t indx, nbytes, flags;
721*0Sstevel@tonic-gate {
722*0Sstevel@tonic-gate 	BKEYDATA *bk, tbk;
723*0Sstevel@tonic-gate 	BOVERFLOW *bo;
724*0Sstevel@tonic-gate 	DB *dbp;
725*0Sstevel@tonic-gate 	DBT copy;
726*0Sstevel@tonic-gate 	u_int32_t len, tlen;
727*0Sstevel@tonic-gate 	u_int8_t *p;
728*0Sstevel@tonic-gate 	int ret;
729*0Sstevel@tonic-gate 
730*0Sstevel@tonic-gate 	COMPQUIET(bo, NULL);
731*0Sstevel@tonic-gate 
732*0Sstevel@tonic-gate 	dbp = dbc->dbp;
733*0Sstevel@tonic-gate 
734*0Sstevel@tonic-gate 	/* We use the record data return memory, it's only a short-term use. */
735*0Sstevel@tonic-gate 	if (dbc->rdata.ulen < nbytes) {
736*0Sstevel@tonic-gate 		 if ((ret = __os_realloc(&dbc->rdata.data, nbytes)) != 0) {
737*0Sstevel@tonic-gate 			dbc->rdata.ulen = 0;
738*0Sstevel@tonic-gate 			dbc->rdata.data = NULL;
739*0Sstevel@tonic-gate 			return (ret);
740*0Sstevel@tonic-gate 		}
741*0Sstevel@tonic-gate 		dbc->rdata.ulen = nbytes;
742*0Sstevel@tonic-gate 	}
743*0Sstevel@tonic-gate 
744*0Sstevel@tonic-gate 	/*
745*0Sstevel@tonic-gate 	 * We use nul bytes for any part of the record that isn't specified;
746*0Sstevel@tonic-gate 	 * get it over with.
747*0Sstevel@tonic-gate 	 */
748*0Sstevel@tonic-gate 	memset(dbc->rdata.data, 0, nbytes);
749*0Sstevel@tonic-gate 
750*0Sstevel@tonic-gate 	/*
751*0Sstevel@tonic-gate 	 * In the next clauses, we need to do three things: a) set p to point
752*0Sstevel@tonic-gate 	 * to the place at which to copy the user's data, b) set tlen to the
753*0Sstevel@tonic-gate 	 * total length of the record, not including the bytes contributed by
754*0Sstevel@tonic-gate 	 * the user, and c) copy any valid data from an existing record.
755*0Sstevel@tonic-gate 	 */
756*0Sstevel@tonic-gate 	if (LF_ISSET(BI_NEWKEY)) {
757*0Sstevel@tonic-gate 		tlen = dbt->doff;
758*0Sstevel@tonic-gate 		p = (u_int8_t *)dbc->rdata.data + dbt->doff;
759*0Sstevel@tonic-gate 		goto ucopy;
760*0Sstevel@tonic-gate 	}
761*0Sstevel@tonic-gate 
762*0Sstevel@tonic-gate 	/* Find the current record. */
763*0Sstevel@tonic-gate 	if (indx < NUM_ENT(h)) {
764*0Sstevel@tonic-gate 		bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
765*0Sstevel@tonic-gate 		bo = (BOVERFLOW *)bk;
766*0Sstevel@tonic-gate 	} else {
767*0Sstevel@tonic-gate 		bk = &tbk;
768*0Sstevel@tonic-gate 		B_TSET(bk->type, B_KEYDATA, 0);
769*0Sstevel@tonic-gate 		bk->len = 0;
770*0Sstevel@tonic-gate 	}
771*0Sstevel@tonic-gate 	if (B_TYPE(bk->type) == B_OVERFLOW) {
772*0Sstevel@tonic-gate 		/*
773*0Sstevel@tonic-gate 		 * In the case of an overflow record, we shift things around
774*0Sstevel@tonic-gate 		 * in the current record rather than allocate a separate copy.
775*0Sstevel@tonic-gate 		 */
776*0Sstevel@tonic-gate 		memset(&copy, 0, sizeof(copy));
777*0Sstevel@tonic-gate 		if ((ret = __db_goff(dbp, &copy, bo->tlen,
778*0Sstevel@tonic-gate 		    bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
779*0Sstevel@tonic-gate 			return (ret);
780*0Sstevel@tonic-gate 
781*0Sstevel@tonic-gate 		/* Skip any leading data from the original record. */
782*0Sstevel@tonic-gate 		tlen = dbt->doff;
783*0Sstevel@tonic-gate 		p = (u_int8_t *)dbc->rdata.data + dbt->doff;
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate 		/*
786*0Sstevel@tonic-gate 		 * Copy in any trailing data from the original record.
787*0Sstevel@tonic-gate 		 *
788*0Sstevel@tonic-gate 		 * If the original record was larger than the original offset
789*0Sstevel@tonic-gate 		 * plus the bytes being deleted, there is trailing data in the
790*0Sstevel@tonic-gate 		 * original record we need to preserve.  If we aren't deleting
791*0Sstevel@tonic-gate 		 * the same number of bytes as we're inserting, copy it up or
792*0Sstevel@tonic-gate 		 * down, into place.
793*0Sstevel@tonic-gate 		 *
794*0Sstevel@tonic-gate 		 * Use memmove(), the regions may overlap.
795*0Sstevel@tonic-gate 		 */
796*0Sstevel@tonic-gate 		if (bo->tlen > dbt->doff + dbt->dlen) {
797*0Sstevel@tonic-gate 			len = bo->tlen - (dbt->doff + dbt->dlen);
798*0Sstevel@tonic-gate 			if (dbt->dlen != dbt->size)
799*0Sstevel@tonic-gate 				memmove(p + dbt->size, p + dbt->dlen, len);
800*0Sstevel@tonic-gate 			tlen += len;
801*0Sstevel@tonic-gate 		}
802*0Sstevel@tonic-gate 	} else {
803*0Sstevel@tonic-gate 		/* Copy in any leading data from the original record. */
804*0Sstevel@tonic-gate 		memcpy(dbc->rdata.data,
805*0Sstevel@tonic-gate 		    bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
806*0Sstevel@tonic-gate 		tlen = dbt->doff;
807*0Sstevel@tonic-gate 		p = (u_int8_t *)dbc->rdata.data + dbt->doff;
808*0Sstevel@tonic-gate 
809*0Sstevel@tonic-gate 		/* Copy in any trailing data from the original record. */
810*0Sstevel@tonic-gate 		len = dbt->doff + dbt->dlen;
811*0Sstevel@tonic-gate 		if (bk->len > len) {
812*0Sstevel@tonic-gate 			memcpy(p + dbt->size, bk->data + len, bk->len - len);
813*0Sstevel@tonic-gate 			tlen += bk->len - len;
814*0Sstevel@tonic-gate 		}
815*0Sstevel@tonic-gate 	}
816*0Sstevel@tonic-gate 
817*0Sstevel@tonic-gate ucopy:	/*
818*0Sstevel@tonic-gate 	 * Copy in the application provided data -- p and tlen must have been
819*0Sstevel@tonic-gate 	 * initialized above.
820*0Sstevel@tonic-gate 	 */
821*0Sstevel@tonic-gate 	memcpy(p, dbt->data, dbt->size);
822*0Sstevel@tonic-gate 	tlen += dbt->size;
823*0Sstevel@tonic-gate 
824*0Sstevel@tonic-gate 	/* Set the DBT to reference our new record. */
825*0Sstevel@tonic-gate 	dbc->rdata.size = tlen;
826*0Sstevel@tonic-gate 	dbc->rdata.dlen = 0;
827*0Sstevel@tonic-gate 	dbc->rdata.doff = 0;
828*0Sstevel@tonic-gate 	dbc->rdata.flags = 0;
829*0Sstevel@tonic-gate 	*dbt = dbc->rdata;
830*0Sstevel@tonic-gate 	return (0);
831*0Sstevel@tonic-gate }
832