1*0Sstevel@tonic-gate /*- 2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*0Sstevel@tonic-gate */ 7*0Sstevel@tonic-gate /* 8*0Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994, 1995, 1996 9*0Sstevel@tonic-gate * Keith Bostic. All rights reserved. 10*0Sstevel@tonic-gate */ 11*0Sstevel@tonic-gate /* 12*0Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994, 1995 13*0Sstevel@tonic-gate * The Regents of the University of California. All rights reserved. 14*0Sstevel@tonic-gate * 15*0Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by 16*0Sstevel@tonic-gate * Mike Olson. 17*0Sstevel@tonic-gate * 18*0Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without 19*0Sstevel@tonic-gate * modification, are permitted provided that the following conditions 20*0Sstevel@tonic-gate * are met: 21*0Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright 22*0Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer. 23*0Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright 24*0Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the 25*0Sstevel@tonic-gate * documentation and/or other materials provided with the distribution. 26*0Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software 27*0Sstevel@tonic-gate * must display the following acknowledgement: 28*0Sstevel@tonic-gate * This product includes software developed by the University of 29*0Sstevel@tonic-gate * California, Berkeley and its contributors. 30*0Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors 31*0Sstevel@tonic-gate * may be used to endorse or promote products derived from this software 32*0Sstevel@tonic-gate * without specific prior written permission. 33*0Sstevel@tonic-gate * 34*0Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 35*0Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36*0Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37*0Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 38*0Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39*0Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40*0Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41*0Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42*0Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43*0Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44*0Sstevel@tonic-gate * SUCH DAMAGE. 45*0Sstevel@tonic-gate */ 46*0Sstevel@tonic-gate 47*0Sstevel@tonic-gate #include "config.h" 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate #ifndef lint 50*0Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_open.c 10.39 (Sleepycat) 11/21/98"; 51*0Sstevel@tonic-gate #endif /* not lint */ 52*0Sstevel@tonic-gate 53*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 54*0Sstevel@tonic-gate #include <sys/types.h> 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gate #include <errno.h> 57*0Sstevel@tonic-gate #include <limits.h> 58*0Sstevel@tonic-gate #include <string.h> 59*0Sstevel@tonic-gate #endif 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate #include "db_int.h" 62*0Sstevel@tonic-gate #include "db_page.h" 63*0Sstevel@tonic-gate #include "btree.h" 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate /* 66*0Sstevel@tonic-gate * __bam_open -- 67*0Sstevel@tonic-gate * Open a btree. 68*0Sstevel@tonic-gate * 69*0Sstevel@tonic-gate * PUBLIC: int __bam_open __P((DB *, DB_INFO *)); 70*0Sstevel@tonic-gate */ 71*0Sstevel@tonic-gate int 72*0Sstevel@tonic-gate __bam_open(dbp, dbinfo) 73*0Sstevel@tonic-gate DB *dbp; 74*0Sstevel@tonic-gate DB_INFO *dbinfo; 75*0Sstevel@tonic-gate { 76*0Sstevel@tonic-gate BTREE *t; 77*0Sstevel@tonic-gate int ret; 78*0Sstevel@tonic-gate 79*0Sstevel@tonic-gate /* Allocate and initialize the private btree structure. */ 80*0Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0) 81*0Sstevel@tonic-gate return (ret); 82*0Sstevel@tonic-gate dbp->internal = t; 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gate /* 85*0Sstevel@tonic-gate * Intention is to make sure all of the user's selections are okay 86*0Sstevel@tonic-gate * here and then use them without checking. 87*0Sstevel@tonic-gate */ 88*0Sstevel@tonic-gate if (dbinfo == NULL) { 89*0Sstevel@tonic-gate t->bt_minkey = DEFMINKEYPAGE; 90*0Sstevel@tonic-gate t->bt_compare = __bam_defcmp; 91*0Sstevel@tonic-gate t->bt_prefix = __bam_defpfx; 92*0Sstevel@tonic-gate } else { 93*0Sstevel@tonic-gate /* Minimum number of keys per page. */ 94*0Sstevel@tonic-gate if (dbinfo->bt_minkey == 0) 95*0Sstevel@tonic-gate t->bt_minkey = DEFMINKEYPAGE; 96*0Sstevel@tonic-gate else { 97*0Sstevel@tonic-gate if (dbinfo->bt_minkey < 2) 98*0Sstevel@tonic-gate goto einval; 99*0Sstevel@tonic-gate t->bt_minkey = dbinfo->bt_minkey; 100*0Sstevel@tonic-gate } 101*0Sstevel@tonic-gate 102*0Sstevel@tonic-gate /* Maximum number of keys per page. */ 103*0Sstevel@tonic-gate if (dbinfo->bt_maxkey == 0) 104*0Sstevel@tonic-gate t->bt_maxkey = 0; 105*0Sstevel@tonic-gate else { 106*0Sstevel@tonic-gate if (dbinfo->bt_maxkey < 1) 107*0Sstevel@tonic-gate goto einval; 108*0Sstevel@tonic-gate t->bt_maxkey = dbinfo->bt_maxkey; 109*0Sstevel@tonic-gate } 110*0Sstevel@tonic-gate 111*0Sstevel@tonic-gate /* 112*0Sstevel@tonic-gate * If no comparison, use default comparison. If no comparison 113*0Sstevel@tonic-gate * and no prefix, use default prefix. (We can't default the 114*0Sstevel@tonic-gate * prefix if the user supplies a comparison routine; shortening 115*0Sstevel@tonic-gate * the keys may break their comparison algorithm. We don't 116*0Sstevel@tonic-gate * permit the user to specify a prefix routine if they didn't 117*0Sstevel@tonic-gate * also specify a comparison routine, they can't know enough 118*0Sstevel@tonic-gate * about our comparison routine to get it right.) 119*0Sstevel@tonic-gate */ 120*0Sstevel@tonic-gate if ((t->bt_compare = dbinfo->bt_compare) == NULL) { 121*0Sstevel@tonic-gate if (dbinfo->bt_prefix != NULL) 122*0Sstevel@tonic-gate goto einval; 123*0Sstevel@tonic-gate t->bt_compare = __bam_defcmp; 124*0Sstevel@tonic-gate t->bt_prefix = __bam_defpfx; 125*0Sstevel@tonic-gate } else 126*0Sstevel@tonic-gate t->bt_prefix = dbinfo->bt_prefix; 127*0Sstevel@tonic-gate } 128*0Sstevel@tonic-gate 129*0Sstevel@tonic-gate /* Initialize the remaining fields/methods of the DB. */ 130*0Sstevel@tonic-gate dbp->am_close = __bam_close; 131*0Sstevel@tonic-gate dbp->del = __bam_delete; 132*0Sstevel@tonic-gate dbp->stat = __bam_stat; 133*0Sstevel@tonic-gate 134*0Sstevel@tonic-gate /* Start up the tree. */ 135*0Sstevel@tonic-gate if ((ret = __bam_read_root(dbp)) != 0) 136*0Sstevel@tonic-gate goto err; 137*0Sstevel@tonic-gate 138*0Sstevel@tonic-gate /* Set the overflow page size. */ 139*0Sstevel@tonic-gate __bam_setovflsize(dbp); 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate return (0); 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate einval: ret = EINVAL; 144*0Sstevel@tonic-gate 145*0Sstevel@tonic-gate err: __os_free(t, sizeof(BTREE)); 146*0Sstevel@tonic-gate return (ret); 147*0Sstevel@tonic-gate } 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate /* 150*0Sstevel@tonic-gate * __bam_close -- 151*0Sstevel@tonic-gate * Close a btree. 152*0Sstevel@tonic-gate * 153*0Sstevel@tonic-gate * PUBLIC: int __bam_close __P((DB *)); 154*0Sstevel@tonic-gate */ 155*0Sstevel@tonic-gate int 156*0Sstevel@tonic-gate __bam_close(dbp) 157*0Sstevel@tonic-gate DB *dbp; 158*0Sstevel@tonic-gate { 159*0Sstevel@tonic-gate __os_free(dbp->internal, sizeof(BTREE)); 160*0Sstevel@tonic-gate dbp->internal = NULL; 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate return (0); 163*0Sstevel@tonic-gate } 164*0Sstevel@tonic-gate 165*0Sstevel@tonic-gate /* 166*0Sstevel@tonic-gate * __bam_setovflsize -- 167*0Sstevel@tonic-gate * 168*0Sstevel@tonic-gate * PUBLIC: void __bam_setovflsize __P((DB *)); 169*0Sstevel@tonic-gate */ 170*0Sstevel@tonic-gate void 171*0Sstevel@tonic-gate __bam_setovflsize(dbp) 172*0Sstevel@tonic-gate DB *dbp; 173*0Sstevel@tonic-gate { 174*0Sstevel@tonic-gate BTREE *t; 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate t = dbp->internal; 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate /* 179*0Sstevel@tonic-gate * !!! 180*0Sstevel@tonic-gate * Correction for recno, which doesn't know anything about minimum 181*0Sstevel@tonic-gate * keys per page. 182*0Sstevel@tonic-gate */ 183*0Sstevel@tonic-gate if (t->bt_minkey == 0) 184*0Sstevel@tonic-gate t->bt_minkey = DEFMINKEYPAGE; 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate /* 187*0Sstevel@tonic-gate * The btree data structure requires that at least two key/data pairs 188*0Sstevel@tonic-gate * can fit on a page, but other than that there's no fixed requirement. 189*0Sstevel@tonic-gate * Translate the minimum number of items into the bytes a key/data pair 190*0Sstevel@tonic-gate * can use before being placed on an overflow page. We calculate for 191*0Sstevel@tonic-gate * the worst possible alignment by assuming every item requires the 192*0Sstevel@tonic-gate * maximum alignment for padding. 193*0Sstevel@tonic-gate * 194*0Sstevel@tonic-gate * Recno uses the btree bt_ovflsize value -- it's close enough. 195*0Sstevel@tonic-gate */ 196*0Sstevel@tonic-gate t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX) 197*0Sstevel@tonic-gate - (BKEYDATA_PSIZE(0) + ALIGN(1, 4)); 198*0Sstevel@tonic-gate } 199*0Sstevel@tonic-gate 200*0Sstevel@tonic-gate /* 201*0Sstevel@tonic-gate * __bam_read_root -- 202*0Sstevel@tonic-gate * Check (and optionally create) a tree. 203*0Sstevel@tonic-gate * 204*0Sstevel@tonic-gate * PUBLIC: int __bam_read_root __P((DB *)); 205*0Sstevel@tonic-gate */ 206*0Sstevel@tonic-gate int 207*0Sstevel@tonic-gate __bam_read_root(dbp) 208*0Sstevel@tonic-gate DB *dbp; 209*0Sstevel@tonic-gate { 210*0Sstevel@tonic-gate BTMETA *meta; 211*0Sstevel@tonic-gate BTREE *t; 212*0Sstevel@tonic-gate DBC *dbc; 213*0Sstevel@tonic-gate DB_LOCK metalock, rootlock; 214*0Sstevel@tonic-gate PAGE *root; 215*0Sstevel@tonic-gate db_pgno_t pgno; 216*0Sstevel@tonic-gate int ret, t_ret; 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate ret = 0; 219*0Sstevel@tonic-gate t = dbp->internal; 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate /* Get a cursor. */ 222*0Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) 223*0Sstevel@tonic-gate return (ret); 224*0Sstevel@tonic-gate 225*0Sstevel@tonic-gate /* Get, and optionally create the metadata page. */ 226*0Sstevel@tonic-gate pgno = PGNO_METADATA; 227*0Sstevel@tonic-gate if ((ret = 228*0Sstevel@tonic-gate __bam_lget(dbc, 0, PGNO_METADATA, DB_LOCK_WRITE, &metalock)) != 0) 229*0Sstevel@tonic-gate goto err; 230*0Sstevel@tonic-gate if ((ret = 231*0Sstevel@tonic-gate memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0) { 232*0Sstevel@tonic-gate (void)__BT_LPUT(dbc, metalock); 233*0Sstevel@tonic-gate goto err; 234*0Sstevel@tonic-gate } 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate /* 237*0Sstevel@tonic-gate * If the magic number is correct, we're not creating the tree. 238*0Sstevel@tonic-gate * Correct any fields that may not be right. Note, all of the 239*0Sstevel@tonic-gate * local flags were set by db_open(3). 240*0Sstevel@tonic-gate */ 241*0Sstevel@tonic-gate if (meta->magic != 0) { 242*0Sstevel@tonic-gate t->bt_maxkey = meta->maxkey; 243*0Sstevel@tonic-gate t->bt_minkey = meta->minkey; 244*0Sstevel@tonic-gate 245*0Sstevel@tonic-gate (void)memp_fput(dbp->mpf, (PAGE *)meta, 0); 246*0Sstevel@tonic-gate (void)__BT_LPUT(dbc, metalock); 247*0Sstevel@tonic-gate goto done; 248*0Sstevel@tonic-gate } 249*0Sstevel@tonic-gate 250*0Sstevel@tonic-gate /* Initialize the tree structure metadata information. */ 251*0Sstevel@tonic-gate memset(meta, 0, sizeof(BTMETA)); 252*0Sstevel@tonic-gate ZERO_LSN(meta->lsn); 253*0Sstevel@tonic-gate meta->pgno = PGNO_METADATA; 254*0Sstevel@tonic-gate meta->magic = DB_BTREEMAGIC; 255*0Sstevel@tonic-gate meta->version = DB_BTREEVERSION; 256*0Sstevel@tonic-gate meta->pagesize = dbp->pgsize; 257*0Sstevel@tonic-gate meta->maxkey = t->bt_maxkey; 258*0Sstevel@tonic-gate meta->minkey = t->bt_minkey; 259*0Sstevel@tonic-gate meta->free = PGNO_INVALID; 260*0Sstevel@tonic-gate if (dbp->type == DB_RECNO) 261*0Sstevel@tonic-gate F_SET(meta, BTM_RECNO); 262*0Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_DUP)) 263*0Sstevel@tonic-gate F_SET(meta, BTM_DUP); 264*0Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) 265*0Sstevel@tonic-gate F_SET(meta, BTM_FIXEDLEN); 266*0Sstevel@tonic-gate if (F_ISSET(dbp, DB_BT_RECNUM)) 267*0Sstevel@tonic-gate F_SET(meta, BTM_RECNUM); 268*0Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) 269*0Sstevel@tonic-gate F_SET(meta, BTM_RENUMBER); 270*0Sstevel@tonic-gate memcpy(meta->uid, dbp->fileid, DB_FILE_ID_LEN); 271*0Sstevel@tonic-gate 272*0Sstevel@tonic-gate /* Create and initialize a root page. */ 273*0Sstevel@tonic-gate pgno = PGNO_ROOT; 274*0Sstevel@tonic-gate if ((ret = 275*0Sstevel@tonic-gate __bam_lget(dbc, 0, PGNO_ROOT, DB_LOCK_WRITE, &rootlock)) != 0) 276*0Sstevel@tonic-gate goto err; 277*0Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0) { 278*0Sstevel@tonic-gate (void)__BT_LPUT(dbc, rootlock); 279*0Sstevel@tonic-gate goto err; 280*0Sstevel@tonic-gate } 281*0Sstevel@tonic-gate P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID, 282*0Sstevel@tonic-gate PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE); 283*0Sstevel@tonic-gate ZERO_LSN(root->lsn); 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate /* Release the metadata and root pages. */ 286*0Sstevel@tonic-gate if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0) 287*0Sstevel@tonic-gate goto err; 288*0Sstevel@tonic-gate if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0) 289*0Sstevel@tonic-gate goto err; 290*0Sstevel@tonic-gate 291*0Sstevel@tonic-gate /* 292*0Sstevel@tonic-gate * Flush the metadata and root pages to disk -- since the user can't 293*0Sstevel@tonic-gate * transaction protect open, the pages have to exist during recovery. 294*0Sstevel@tonic-gate * 295*0Sstevel@tonic-gate * XXX 296*0Sstevel@tonic-gate * It's not useful to return not-yet-flushed here -- convert it to 297*0Sstevel@tonic-gate * an error. 298*0Sstevel@tonic-gate */ 299*0Sstevel@tonic-gate if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) 300*0Sstevel@tonic-gate ret = EINVAL; 301*0Sstevel@tonic-gate 302*0Sstevel@tonic-gate /* Release the locks. */ 303*0Sstevel@tonic-gate (void)__BT_LPUT(dbc, metalock); 304*0Sstevel@tonic-gate (void)__BT_LPUT(dbc, rootlock); 305*0Sstevel@tonic-gate 306*0Sstevel@tonic-gate err: 307*0Sstevel@tonic-gate done: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) 308*0Sstevel@tonic-gate ret = t_ret; 309*0Sstevel@tonic-gate return (ret); 310*0Sstevel@tonic-gate } 311