xref: /csrg-svn/lib/libc/db/btree/bt_overflow.c (revision 56558)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Mike Olson.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)bt_overflow.c	5.5 (Berkeley) 10/13/92";
13 #endif /* LIBC_SCCS and not lint */
14 
15 #include <sys/param.h>
16 #include <db.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "btree.h"
21 
22 /*
23  * Big key/data code.
24  *
25  * Big key and data entries are stored on linked lists of pages.  The initial
26  * reference is byte string stored with the key or data and is the page number
27  * and size.  The actual record is stored in a chain of pages linked by the
28  * nextpg field of the PAGE header.
29  *
30  * The first page of the chain has a special property.  If the record is used
31  * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
32  * in the header.
33  *
34  * XXX
35  * A single DBT is written to each chain, so a lot of space on the last page
36  * is wasted.  This is a fairly major bug for some data sets.
37  */
38 
39 /*
40  * __OVFL_GET -- Get an overflow key/data item.
41  *
42  * Parameters:
43  *	t:	tree
44  *	p:	pointer to { pgno_t, size_t }
45  *	buf:	storage address
46  *	bufsz:	storage size
47  *
48  * Returns:
49  *	RET_ERROR, RET_SUCCESS
50  */
51 int
52 __ovfl_get(t, p, ssz, buf, bufsz)
53 	BTREE *t;
54 	void *p;
55 	size_t *ssz;
56 	char **buf;
57 	size_t *bufsz;
58 {
59 	PAGE *h;
60 	pgno_t pg;
61 	size_t nb, plen, sz;
62 
63 	bcopy(p, &pg, sizeof(pgno_t));
64 	bcopy((char *)p + sizeof(pgno_t), &sz, sizeof(size_t));
65 	*ssz = sz;
66 
67 #ifdef DEBUG
68 	if (pg == P_INVALID || sz == 0)
69 		abort();
70 #endif
71 	/* Make the buffer bigger as necessary. */
72 	if (*bufsz < sz) {
73 		if ((*buf = realloc(*buf, sz)) == NULL)
74 			return (RET_ERROR);
75 		*bufsz = sz;
76 	}
77 
78 	/*
79 	 * Step through the linked list of pages, copying the data on each one
80 	 * into the buffer.  Never copy more than the data's length.
81 	 */
82 	plen = t->bt_psize - BTDATAOFF;
83 	for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
84 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
85 			return (RET_ERROR);
86 
87 		nb = MIN(sz, plen);
88 		bcopy((char *)h + BTDATAOFF, p, nb);
89 		mpool_put(t->bt_mp, h, 0);
90 
91 		if ((sz -= nb) == 0)
92 			break;
93 	}
94 	return (RET_SUCCESS);
95 }
96 
97 /*
98  * __OVFL_PUT -- Store an overflow key/data item.
99  *
100  * Parameters:
101  *	t:	tree
102  *	data:	DBT to store
103  *	pgno:	storage page number
104  *
105  * Returns:
106  *	RET_ERROR, RET_SUCCESS
107  */
108 int
109 __ovfl_put(t, dbt, pg)
110 	BTREE *t;
111 	const DBT *dbt;
112 	pgno_t *pg;
113 {
114 	PAGE *h, *last;
115 	void *p;
116 	pgno_t npg;
117 	size_t nb, plen, sz;
118 
119 	/*
120 	 * Allocate pages and copy the key/data record into them.  Store the
121 	 * number of the first page in the chain.
122 	 */
123 	plen = t->bt_psize - BTDATAOFF;
124 	for (last = NULL, p = dbt->data, sz = dbt->size;;
125 	    p = (char *)p + plen, last = h) {
126 		if ((h = __bt_new(t, &npg)) == NULL)
127 			return (RET_ERROR);
128 
129 		h->pgno = npg;
130 		h->nextpg = h->prevpg = P_INVALID;
131 		h->flags = P_OVERFLOW;
132 		h->lower = h->upper = 0;
133 
134 		nb = MIN(sz, plen);
135 		bcopy(p, (char *)h + BTDATAOFF, nb);
136 
137 		if (last) {
138 			last->nextpg = h->pgno;
139 			mpool_put(t->bt_mp, last, MPOOL_DIRTY);
140 		} else
141 			*pg = h->pgno;
142 
143 		if ((sz -= nb) == 0) {
144 			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
145 			break;
146 		}
147 	}
148 	return (RET_SUCCESS);
149 }
150 
151 /*
152  * __OVFL_DELETE -- Delete an overflow chain.
153  *
154  * Parameters:
155  *	t:	tree
156  *	p:	pointer to { pgno_t, size_t }
157  *
158  * Returns:
159  *	RET_ERROR, RET_SUCCESS
160  */
161 int
162 __ovfl_delete(t, p)
163 	BTREE *t;
164 	void *p;
165 {
166 	PAGE *h;
167 	pgno_t pg;
168 	size_t plen, sz;
169 
170 	bcopy(p, &pg, sizeof(pgno_t));
171 	bcopy((char *)p + sizeof(pgno_t), &sz, sizeof(size_t));
172 
173 #ifdef DEBUG
174 	if (pg == P_INVALID || sz == 0)
175 		abort();
176 #endif
177 	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
178 		return (RET_ERROR);
179 
180 	/* Don't delete chains used by internal pages. */
181 	if (h->flags & P_PRESERVE) {
182 		mpool_put(t->bt_mp, h, 0);
183 		return (RET_SUCCESS);
184 	}
185 
186 	/* Step through the chain, calling the free routine for each page. */
187 	for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
188 		pg = h->nextpg;
189 		__bt_free(t, h);
190 		if (sz <= plen)
191 			break;
192 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
193 			return (RET_ERROR);
194 	}
195 	return (RET_SUCCESS);
196 }
197