1*2639ae9bSBen Gras /* $NetBSD: bt_overflow.c,v 1.16 2008/09/11 12:58:00 joerg Exp $ */ 2*2639ae9bSBen Gras 3*2639ae9bSBen Gras /*- 4*2639ae9bSBen Gras * Copyright (c) 1990, 1993, 1994 5*2639ae9bSBen Gras * The Regents of the University of California. All rights reserved. 6*2639ae9bSBen Gras * 7*2639ae9bSBen Gras * This code is derived from software contributed to Berkeley by 8*2639ae9bSBen Gras * Mike Olson. 9*2639ae9bSBen Gras * 10*2639ae9bSBen Gras * Redistribution and use in source and binary forms, with or without 11*2639ae9bSBen Gras * modification, are permitted provided that the following conditions 12*2639ae9bSBen Gras * are met: 13*2639ae9bSBen Gras * 1. Redistributions of source code must retain the above copyright 14*2639ae9bSBen Gras * notice, this list of conditions and the following disclaimer. 15*2639ae9bSBen Gras * 2. Redistributions in binary form must reproduce the above copyright 16*2639ae9bSBen Gras * notice, this list of conditions and the following disclaimer in the 17*2639ae9bSBen Gras * documentation and/or other materials provided with the distribution. 18*2639ae9bSBen Gras * 3. Neither the name of the University nor the names of its contributors 19*2639ae9bSBen Gras * may be used to endorse or promote products derived from this software 20*2639ae9bSBen Gras * without specific prior written permission. 21*2639ae9bSBen Gras * 22*2639ae9bSBen Gras * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23*2639ae9bSBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24*2639ae9bSBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25*2639ae9bSBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26*2639ae9bSBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27*2639ae9bSBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28*2639ae9bSBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29*2639ae9bSBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30*2639ae9bSBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31*2639ae9bSBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32*2639ae9bSBen Gras * SUCH DAMAGE. 33*2639ae9bSBen Gras */ 34*2639ae9bSBen Gras 35*2639ae9bSBen Gras #if HAVE_NBTOOL_CONFIG_H 36*2639ae9bSBen Gras #include "nbtool_config.h" 37*2639ae9bSBen Gras #endif 38*2639ae9bSBen Gras 39*2639ae9bSBen Gras #include <sys/cdefs.h> 40*2639ae9bSBen Gras #ifndef __minix 41*2639ae9bSBen Gras __RCSID("$NetBSD: bt_overflow.c,v 1.16 2008/09/11 12:58:00 joerg Exp $"); 42*2639ae9bSBen Gras #endif 43*2639ae9bSBen Gras 44*2639ae9bSBen Gras #ifndef __minix 45*2639ae9bSBen Gras #include "namespace.h" 46*2639ae9bSBen Gras #endif 47*2639ae9bSBen Gras #include <sys/param.h> 48*2639ae9bSBen Gras 49*2639ae9bSBen Gras #include <assert.h> 50*2639ae9bSBen Gras #include <stdio.h> 51*2639ae9bSBen Gras #include <stdlib.h> 52*2639ae9bSBen Gras #include <string.h> 53*2639ae9bSBen Gras 54*2639ae9bSBen Gras #include <db.h> 55*2639ae9bSBen Gras #include "btree.h" 56*2639ae9bSBen Gras 57*2639ae9bSBen Gras #define MAX(a, b) ((a) > (b) ? (a) : (b)) 58*2639ae9bSBen Gras #define MIN(a, b) ((a) < (b) ? (a) : (b)) 59*2639ae9bSBen Gras 60*2639ae9bSBen Gras /* 61*2639ae9bSBen Gras * Big key/data code. 62*2639ae9bSBen Gras * 63*2639ae9bSBen Gras * Big key and data entries are stored on linked lists of pages. The initial 64*2639ae9bSBen Gras * reference is byte string stored with the key or data and is the page number 65*2639ae9bSBen Gras * and size. The actual record is stored in a chain of pages linked by the 66*2639ae9bSBen Gras * nextpg field of the PAGE header. 67*2639ae9bSBen Gras * 68*2639ae9bSBen Gras * The first page of the chain has a special property. If the record is used 69*2639ae9bSBen Gras * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set 70*2639ae9bSBen Gras * in the header. 71*2639ae9bSBen Gras * 72*2639ae9bSBen Gras * XXX 73*2639ae9bSBen Gras * A single DBT is written to each chain, so a lot of space on the last page 74*2639ae9bSBen Gras * is wasted. This is a fairly major bug for some data sets. 75*2639ae9bSBen Gras */ 76*2639ae9bSBen Gras 77*2639ae9bSBen Gras /* 78*2639ae9bSBen Gras * __OVFL_GET -- Get an overflow key/data item. 79*2639ae9bSBen Gras * 80*2639ae9bSBen Gras * Parameters: 81*2639ae9bSBen Gras * t: tree 82*2639ae9bSBen Gras * p: pointer to { pgno_t, uint32_t } 83*2639ae9bSBen Gras * buf: storage address 84*2639ae9bSBen Gras * bufsz: storage size 85*2639ae9bSBen Gras * 86*2639ae9bSBen Gras * Returns: 87*2639ae9bSBen Gras * RET_ERROR, RET_SUCCESS 88*2639ae9bSBen Gras */ 89*2639ae9bSBen Gras int 90*2639ae9bSBen Gras __ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz) 91*2639ae9bSBen Gras { 92*2639ae9bSBen Gras PAGE *h; 93*2639ae9bSBen Gras pgno_t pg; 94*2639ae9bSBen Gras uint32_t sz, nb, plen; 95*2639ae9bSBen Gras size_t temp; 96*2639ae9bSBen Gras 97*2639ae9bSBen Gras memmove(&pg, p, sizeof(pgno_t)); 98*2639ae9bSBen Gras memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t)); 99*2639ae9bSBen Gras *ssz = sz; 100*2639ae9bSBen Gras 101*2639ae9bSBen Gras #ifdef DEBUG 102*2639ae9bSBen Gras if (pg == P_INVALID || sz == 0) 103*2639ae9bSBen Gras abort(); 104*2639ae9bSBen Gras #endif 105*2639ae9bSBen Gras /* Make the buffer bigger as necessary. */ 106*2639ae9bSBen Gras if (*bufsz < sz) { 107*2639ae9bSBen Gras *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz)); 108*2639ae9bSBen Gras if (*buf == NULL) 109*2639ae9bSBen Gras return (RET_ERROR); 110*2639ae9bSBen Gras *bufsz = sz; 111*2639ae9bSBen Gras } 112*2639ae9bSBen Gras 113*2639ae9bSBen Gras /* 114*2639ae9bSBen Gras * Step through the linked list of pages, copying the data on each one 115*2639ae9bSBen Gras * into the buffer. Never copy more than the data's length. 116*2639ae9bSBen Gras */ 117*2639ae9bSBen Gras temp = t->bt_psize - BTDATAOFF; 118*2639ae9bSBen Gras _DBFIT(temp, uint32_t); 119*2639ae9bSBen Gras plen = (uint32_t)temp; 120*2639ae9bSBen Gras for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) { 121*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 122*2639ae9bSBen Gras return (RET_ERROR); 123*2639ae9bSBen Gras 124*2639ae9bSBen Gras nb = MIN(sz, plen); 125*2639ae9bSBen Gras memmove(p, (char *)(void *)h + BTDATAOFF, nb); 126*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 127*2639ae9bSBen Gras 128*2639ae9bSBen Gras if ((sz -= nb) == 0) 129*2639ae9bSBen Gras break; 130*2639ae9bSBen Gras } 131*2639ae9bSBen Gras return (RET_SUCCESS); 132*2639ae9bSBen Gras } 133*2639ae9bSBen Gras 134*2639ae9bSBen Gras /* 135*2639ae9bSBen Gras * __OVFL_PUT -- Store an overflow key/data item. 136*2639ae9bSBen Gras * 137*2639ae9bSBen Gras * Parameters: 138*2639ae9bSBen Gras * t: tree 139*2639ae9bSBen Gras * data: DBT to store 140*2639ae9bSBen Gras * pgno: storage page number 141*2639ae9bSBen Gras * 142*2639ae9bSBen Gras * Returns: 143*2639ae9bSBen Gras * RET_ERROR, RET_SUCCESS 144*2639ae9bSBen Gras */ 145*2639ae9bSBen Gras int 146*2639ae9bSBen Gras __ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg) 147*2639ae9bSBen Gras { 148*2639ae9bSBen Gras PAGE *h, *last; 149*2639ae9bSBen Gras void *p; 150*2639ae9bSBen Gras pgno_t npg; 151*2639ae9bSBen Gras uint32_t sz, nb, plen; 152*2639ae9bSBen Gras size_t temp; 153*2639ae9bSBen Gras 154*2639ae9bSBen Gras /* 155*2639ae9bSBen Gras * Allocate pages and copy the key/data record into them. Store the 156*2639ae9bSBen Gras * number of the first page in the chain. 157*2639ae9bSBen Gras */ 158*2639ae9bSBen Gras temp = t->bt_psize - BTDATAOFF; 159*2639ae9bSBen Gras _DBFIT(temp, uint32_t); 160*2639ae9bSBen Gras plen = (uint32_t)temp; 161*2639ae9bSBen Gras last = NULL; 162*2639ae9bSBen Gras p = dbt->data; 163*2639ae9bSBen Gras temp = dbt->size; 164*2639ae9bSBen Gras _DBFIT(temp, uint32_t); 165*2639ae9bSBen Gras sz = temp; 166*2639ae9bSBen Gras for (;; p = (char *)p + plen, last = h) { 167*2639ae9bSBen Gras if ((h = __bt_new(t, &npg)) == NULL) 168*2639ae9bSBen Gras return (RET_ERROR); 169*2639ae9bSBen Gras 170*2639ae9bSBen Gras h->pgno = npg; 171*2639ae9bSBen Gras h->nextpg = h->prevpg = P_INVALID; 172*2639ae9bSBen Gras h->flags = P_OVERFLOW; 173*2639ae9bSBen Gras h->lower = h->upper = 0; 174*2639ae9bSBen Gras 175*2639ae9bSBen Gras nb = MIN(sz, plen); 176*2639ae9bSBen Gras (void)memmove((char *)(void *)h + BTDATAOFF, p, (size_t)nb); 177*2639ae9bSBen Gras 178*2639ae9bSBen Gras if (last) { 179*2639ae9bSBen Gras last->nextpg = h->pgno; 180*2639ae9bSBen Gras mpool_put(t->bt_mp, last, MPOOL_DIRTY); 181*2639ae9bSBen Gras } else 182*2639ae9bSBen Gras *pg = h->pgno; 183*2639ae9bSBen Gras 184*2639ae9bSBen Gras if ((sz -= nb) == 0) { 185*2639ae9bSBen Gras mpool_put(t->bt_mp, h, MPOOL_DIRTY); 186*2639ae9bSBen Gras break; 187*2639ae9bSBen Gras } 188*2639ae9bSBen Gras } 189*2639ae9bSBen Gras return (RET_SUCCESS); 190*2639ae9bSBen Gras } 191*2639ae9bSBen Gras 192*2639ae9bSBen Gras /* 193*2639ae9bSBen Gras * __OVFL_DELETE -- Delete an overflow chain. 194*2639ae9bSBen Gras * 195*2639ae9bSBen Gras * Parameters: 196*2639ae9bSBen Gras * t: tree 197*2639ae9bSBen Gras * p: pointer to { pgno_t, uint32_t } 198*2639ae9bSBen Gras * 199*2639ae9bSBen Gras * Returns: 200*2639ae9bSBen Gras * RET_ERROR, RET_SUCCESS 201*2639ae9bSBen Gras */ 202*2639ae9bSBen Gras int 203*2639ae9bSBen Gras __ovfl_delete(BTREE *t, void *p) 204*2639ae9bSBen Gras { 205*2639ae9bSBen Gras PAGE *h; 206*2639ae9bSBen Gras pgno_t pg; 207*2639ae9bSBen Gras uint32_t sz, plen; 208*2639ae9bSBen Gras size_t temp; 209*2639ae9bSBen Gras 210*2639ae9bSBen Gras (void)memmove(&pg, p, sizeof(pgno_t)); 211*2639ae9bSBen Gras (void)memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t)); 212*2639ae9bSBen Gras 213*2639ae9bSBen Gras #ifdef DEBUG 214*2639ae9bSBen Gras if (pg == P_INVALID || sz == 0) 215*2639ae9bSBen Gras abort(); 216*2639ae9bSBen Gras #endif 217*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 218*2639ae9bSBen Gras return (RET_ERROR); 219*2639ae9bSBen Gras 220*2639ae9bSBen Gras /* Don't delete chains used by internal pages. */ 221*2639ae9bSBen Gras if (h->flags & P_PRESERVE) { 222*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 223*2639ae9bSBen Gras return (RET_SUCCESS); 224*2639ae9bSBen Gras } 225*2639ae9bSBen Gras 226*2639ae9bSBen Gras /* Step through the chain, calling the free routine for each page. */ 227*2639ae9bSBen Gras temp = t->bt_psize - BTDATAOFF; 228*2639ae9bSBen Gras _DBFIT(temp, uint32_t); 229*2639ae9bSBen Gras plen = (uint32_t)temp; 230*2639ae9bSBen Gras for (;; sz -= plen) { 231*2639ae9bSBen Gras pg = h->nextpg; 232*2639ae9bSBen Gras __bt_free(t, h); 233*2639ae9bSBen Gras if (sz <= plen) 234*2639ae9bSBen Gras break; 235*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 236*2639ae9bSBen Gras return (RET_ERROR); 237*2639ae9bSBen Gras } 238*2639ae9bSBen Gras return (RET_SUCCESS); 239*2639ae9bSBen Gras } 240