xref: /netbsd-src/lib/libc/db/btree/bt_overflow.c (revision aae80e6be7bf8e6ad43f8b63d296d2d3923c4ee5)
1*aae80e6bSchristos /*	$NetBSD: bt_overflow.c,v 1.22 2016/09/24 21:31:25 christos Exp $	*/
2a954b078Scgd 
39f0aa214Scgd /*-
4a6d14e36Scgd  * Copyright (c) 1990, 1993, 1994
59f0aa214Scgd  *	The Regents of the University of California.  All rights reserved.
69f0aa214Scgd  *
79f0aa214Scgd  * This code is derived from software contributed to Berkeley by
89f0aa214Scgd  * Mike Olson.
99f0aa214Scgd  *
109f0aa214Scgd  * Redistribution and use in source and binary forms, with or without
119f0aa214Scgd  * modification, are permitted provided that the following conditions
129f0aa214Scgd  * are met:
139f0aa214Scgd  * 1. Redistributions of source code must retain the above copyright
149f0aa214Scgd  *    notice, this list of conditions and the following disclaimer.
159f0aa214Scgd  * 2. Redistributions in binary form must reproduce the above copyright
169f0aa214Scgd  *    notice, this list of conditions and the following disclaimer in the
179f0aa214Scgd  *    documentation and/or other materials provided with the distribution.
18eb7c1594Sagc  * 3. Neither the name of the University nor the names of its contributors
199f0aa214Scgd  *    may be used to endorse or promote products derived from this software
209f0aa214Scgd  *    without specific prior written permission.
219f0aa214Scgd  *
229f0aa214Scgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239f0aa214Scgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249f0aa214Scgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259f0aa214Scgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269f0aa214Scgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279f0aa214Scgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289f0aa214Scgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299f0aa214Scgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309f0aa214Scgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319f0aa214Scgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329f0aa214Scgd  * SUCH DAMAGE.
339f0aa214Scgd  */
349f0aa214Scgd 
35d3595ddfSjoerg #if HAVE_NBTOOL_CONFIG_H
36d3595ddfSjoerg #include "nbtool_config.h"
37d3595ddfSjoerg #endif
38d3595ddfSjoerg 
3900ae392dSchristos #include <sys/cdefs.h>
40*aae80e6bSchristos __RCSID("$NetBSD: bt_overflow.c,v 1.22 2016/09/24 21:31:25 christos Exp $");
419f0aa214Scgd 
4243fa6fe3Sjtc #include "namespace.h"
439f0aa214Scgd #include <sys/param.h>
449f0aa214Scgd 
45cb9daf8fSchristos #include <assert.h>
469f0aa214Scgd #include <stdio.h>
479f0aa214Scgd #include <stdlib.h>
489f0aa214Scgd #include <string.h>
499f0aa214Scgd 
509f0aa214Scgd #include <db.h>
519f0aa214Scgd #include "btree.h"
529f0aa214Scgd 
539f0aa214Scgd /*
549f0aa214Scgd  * Big key/data code.
559f0aa214Scgd  *
569f0aa214Scgd  * Big key and data entries are stored on linked lists of pages.  The initial
579f0aa214Scgd  * reference is byte string stored with the key or data and is the page number
589f0aa214Scgd  * and size.  The actual record is stored in a chain of pages linked by the
599f0aa214Scgd  * nextpg field of the PAGE header.
609f0aa214Scgd  *
619f0aa214Scgd  * The first page of the chain has a special property.  If the record is used
629f0aa214Scgd  * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
639f0aa214Scgd  * in the header.
649f0aa214Scgd  *
659f0aa214Scgd  * XXX
669f0aa214Scgd  * A single DBT is written to each chain, so a lot of space on the last page
679f0aa214Scgd  * is wasted.  This is a fairly major bug for some data sets.
689f0aa214Scgd  */
699f0aa214Scgd 
709f0aa214Scgd /*
719f0aa214Scgd  * __OVFL_GET -- Get an overflow key/data item.
729f0aa214Scgd  *
739f0aa214Scgd  * Parameters:
749f0aa214Scgd  *	t:	tree
7540b37a3bSjoerg  *	p:	pointer to { pgno_t, uint32_t }
769f0aa214Scgd  *	buf:	storage address
779f0aa214Scgd  *	bufsz:	storage size
789f0aa214Scgd  *
799f0aa214Scgd  * Returns:
809f0aa214Scgd  *	RET_ERROR, RET_SUCCESS
819f0aa214Scgd  */
829f0aa214Scgd int
__ovfl_get(BTREE * t,void * p,size_t * ssz,void ** buf,size_t * bufsz)83cb9daf8fSchristos __ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz)
849f0aa214Scgd {
859f0aa214Scgd 	PAGE *h;
869f0aa214Scgd 	pgno_t pg;
8740b37a3bSjoerg 	uint32_t sz, nb, plen;
88cb9daf8fSchristos 	size_t temp;
899f0aa214Scgd 
90dcc9c821Schristos 	memmove(&pg, p, sizeof(pg));
9140b37a3bSjoerg 	memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t));
929f0aa214Scgd 	*ssz = sz;
939f0aa214Scgd 
949f0aa214Scgd #ifdef DEBUG
959f0aa214Scgd 	if (pg == P_INVALID || sz == 0)
969f0aa214Scgd 		abort();
979f0aa214Scgd #endif
989f0aa214Scgd 	/* Make the buffer bigger as necessary. */
999f0aa214Scgd 	if (*bufsz < sz) {
100e73d0381Schristos 		void *nbuf = realloc(*buf, sz);
101e73d0381Schristos 		if (nbuf == NULL)
1029f0aa214Scgd 			return (RET_ERROR);
103e73d0381Schristos 		*buf = nbuf;
1049f0aa214Scgd 		*bufsz = sz;
1059f0aa214Scgd 	}
1069f0aa214Scgd 
1079f0aa214Scgd 	/*
1089f0aa214Scgd 	 * Step through the linked list of pages, copying the data on each one
1099f0aa214Scgd 	 * into the buffer.  Never copy more than the data's length.
1109f0aa214Scgd 	 */
111cb9daf8fSchristos 	temp = t->bt_psize - BTDATAOFF;
11240b37a3bSjoerg 	_DBFIT(temp, uint32_t);
11340b37a3bSjoerg 	plen = (uint32_t)temp;
1149f0aa214Scgd 	for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
115*aae80e6bSchristos 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
1169f0aa214Scgd 			return (RET_ERROR);
1179f0aa214Scgd 
1189f0aa214Scgd 		nb = MIN(sz, plen);
11961238e71Schristos 		memmove(p, (char *)(void *)h + BTDATAOFF, nb);
1209f0aa214Scgd 		mpool_put(t->bt_mp, h, 0);
1219f0aa214Scgd 
1229f0aa214Scgd 		if ((sz -= nb) == 0)
1239f0aa214Scgd 			break;
1249f0aa214Scgd 	}
1259f0aa214Scgd 	return (RET_SUCCESS);
1269f0aa214Scgd }
1279f0aa214Scgd 
1289f0aa214Scgd /*
1299f0aa214Scgd  * __OVFL_PUT -- Store an overflow key/data item.
1309f0aa214Scgd  *
1319f0aa214Scgd  * Parameters:
1329f0aa214Scgd  *	t:	tree
1339f0aa214Scgd  *	data:	DBT to store
1349f0aa214Scgd  *	pgno:	storage page number
1359f0aa214Scgd  *
1369f0aa214Scgd  * Returns:
1379f0aa214Scgd  *	RET_ERROR, RET_SUCCESS
1389f0aa214Scgd  */
1399f0aa214Scgd int
__ovfl_put(BTREE * t,const DBT * dbt,pgno_t * pg)140cb9daf8fSchristos __ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg)
1419f0aa214Scgd {
1429f0aa214Scgd 	PAGE *h, *last;
1439f0aa214Scgd 	void *p;
1449f0aa214Scgd 	pgno_t npg;
14540b37a3bSjoerg 	uint32_t sz, nb, plen;
146cb9daf8fSchristos 	size_t temp;
1479f0aa214Scgd 
1489f0aa214Scgd 	/*
1499f0aa214Scgd 	 * Allocate pages and copy the key/data record into them.  Store the
1509f0aa214Scgd 	 * number of the first page in the chain.
1519f0aa214Scgd 	 */
152cb9daf8fSchristos 	temp = t->bt_psize - BTDATAOFF;
15340b37a3bSjoerg 	_DBFIT(temp, uint32_t);
15440b37a3bSjoerg 	plen = (uint32_t)temp;
155cb9daf8fSchristos 	last = NULL;
156cb9daf8fSchristos 	p = dbt->data;
157cb9daf8fSchristos 	temp = dbt->size;
15840b37a3bSjoerg 	_DBFIT(temp, uint32_t);
159c5e820caSchristos 	sz = (uint32_t)temp;
160cb9daf8fSchristos 	for (;; p = (char *)p + plen, last = h) {
1619f0aa214Scgd 		if ((h = __bt_new(t, &npg)) == NULL)
1629f0aa214Scgd 			return (RET_ERROR);
1639f0aa214Scgd 
1649f0aa214Scgd 		h->pgno = npg;
1659f0aa214Scgd 		h->nextpg = h->prevpg = P_INVALID;
1669f0aa214Scgd 		h->flags = P_OVERFLOW;
1679f0aa214Scgd 		h->lower = h->upper = 0;
1689f0aa214Scgd 
1699f0aa214Scgd 		nb = MIN(sz, plen);
170cb9daf8fSchristos 		(void)memmove((char *)(void *)h + BTDATAOFF, p, (size_t)nb);
1719f0aa214Scgd 
1729f0aa214Scgd 		if (last) {
1739f0aa214Scgd 			last->nextpg = h->pgno;
1749f0aa214Scgd 			mpool_put(t->bt_mp, last, MPOOL_DIRTY);
1759f0aa214Scgd 		} else
1769f0aa214Scgd 			*pg = h->pgno;
1779f0aa214Scgd 
1789f0aa214Scgd 		if ((sz -= nb) == 0) {
1799f0aa214Scgd 			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
1809f0aa214Scgd 			break;
1819f0aa214Scgd 		}
1829f0aa214Scgd 	}
1839f0aa214Scgd 	return (RET_SUCCESS);
1849f0aa214Scgd }
1859f0aa214Scgd 
1869f0aa214Scgd /*
1879f0aa214Scgd  * __OVFL_DELETE -- Delete an overflow chain.
1889f0aa214Scgd  *
1899f0aa214Scgd  * Parameters:
1909f0aa214Scgd  *	t:	tree
19140b37a3bSjoerg  *	p:	pointer to { pgno_t, uint32_t }
1929f0aa214Scgd  *
1939f0aa214Scgd  * Returns:
1949f0aa214Scgd  *	RET_ERROR, RET_SUCCESS
1959f0aa214Scgd  */
1969f0aa214Scgd int
__ovfl_delete(BTREE * t,void * p)197cb9daf8fSchristos __ovfl_delete(BTREE *t, void *p)
1989f0aa214Scgd {
1999f0aa214Scgd 	PAGE *h;
2009f0aa214Scgd 	pgno_t pg;
20140b37a3bSjoerg 	uint32_t sz, plen;
202cb9daf8fSchristos 	size_t temp;
2039f0aa214Scgd 
204dcc9c821Schristos 	(void)memmove(&pg, p, sizeof(pg));
20540b37a3bSjoerg 	(void)memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t));
2069f0aa214Scgd 
2079f0aa214Scgd #ifdef DEBUG
2089f0aa214Scgd 	if (pg == P_INVALID || sz == 0)
2099f0aa214Scgd 		abort();
2109f0aa214Scgd #endif
211*aae80e6bSchristos 	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
2129f0aa214Scgd 		return (RET_ERROR);
2139f0aa214Scgd 
2149f0aa214Scgd 	/* Don't delete chains used by internal pages. */
2159f0aa214Scgd 	if (h->flags & P_PRESERVE) {
2169f0aa214Scgd 		mpool_put(t->bt_mp, h, 0);
2179f0aa214Scgd 		return (RET_SUCCESS);
2189f0aa214Scgd 	}
2199f0aa214Scgd 
2209f0aa214Scgd 	/* Step through the chain, calling the free routine for each page. */
221cb9daf8fSchristos 	temp = t->bt_psize - BTDATAOFF;
22240b37a3bSjoerg 	_DBFIT(temp, uint32_t);
22340b37a3bSjoerg 	plen = (uint32_t)temp;
224cb9daf8fSchristos 	for (;; sz -= plen) {
2259f0aa214Scgd 		pg = h->nextpg;
2269f0aa214Scgd 		__bt_free(t, h);
2279f0aa214Scgd 		if (sz <= plen)
2289f0aa214Scgd 			break;
229*aae80e6bSchristos 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
2309f0aa214Scgd 			return (RET_ERROR);
2319f0aa214Scgd 	}
2329f0aa214Scgd 	return (RET_SUCCESS);
2339f0aa214Scgd }
234