1*2639ae9bSBen Gras /* $NetBSD: bt_delete.c,v 1.17 2009/01/29 02:02:36 lukem Exp $ */ 2*2639ae9bSBen Gras 3*2639ae9bSBen Gras /*- 4*2639ae9bSBen Gras * Copyright (c) 1990, 1993, 1994 5*2639ae9bSBen Gras * The Regents of the University of California. All rights reserved. 6*2639ae9bSBen Gras * 7*2639ae9bSBen Gras * This code is derived from software contributed to Berkeley by 8*2639ae9bSBen Gras * Mike Olson. 9*2639ae9bSBen Gras * 10*2639ae9bSBen Gras * Redistribution and use in source and binary forms, with or without 11*2639ae9bSBen Gras * modification, are permitted provided that the following conditions 12*2639ae9bSBen Gras * are met: 13*2639ae9bSBen Gras * 1. Redistributions of source code must retain the above copyright 14*2639ae9bSBen Gras * notice, this list of conditions and the following disclaimer. 15*2639ae9bSBen Gras * 2. Redistributions in binary form must reproduce the above copyright 16*2639ae9bSBen Gras * notice, this list of conditions and the following disclaimer in the 17*2639ae9bSBen Gras * documentation and/or other materials provided with the distribution. 18*2639ae9bSBen Gras * 3. Neither the name of the University nor the names of its contributors 19*2639ae9bSBen Gras * may be used to endorse or promote products derived from this software 20*2639ae9bSBen Gras * without specific prior written permission. 21*2639ae9bSBen Gras * 22*2639ae9bSBen Gras * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23*2639ae9bSBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24*2639ae9bSBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25*2639ae9bSBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26*2639ae9bSBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27*2639ae9bSBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28*2639ae9bSBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29*2639ae9bSBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30*2639ae9bSBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31*2639ae9bSBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32*2639ae9bSBen Gras * SUCH DAMAGE. 33*2639ae9bSBen Gras */ 34*2639ae9bSBen Gras 35*2639ae9bSBen Gras #if HAVE_NBTOOL_CONFIG_H 36*2639ae9bSBen Gras #include "nbtool_config.h" 37*2639ae9bSBen Gras #endif 38*2639ae9bSBen Gras 39*2639ae9bSBen Gras #include <sys/cdefs.h> 40*2639ae9bSBen Gras #ifndef __minix 41*2639ae9bSBen Gras __RCSID("$NetBSD: bt_delete.c,v 1.17 2009/01/29 02:02:36 lukem Exp $"); 42*2639ae9bSBen Gras #endif 43*2639ae9bSBen Gras 44*2639ae9bSBen Gras #ifndef __minix 45*2639ae9bSBen Gras #include "namespace.h" 46*2639ae9bSBen Gras #endif 47*2639ae9bSBen Gras #include <sys/types.h> 48*2639ae9bSBen Gras 49*2639ae9bSBen Gras #include <assert.h> 50*2639ae9bSBen Gras #include <errno.h> 51*2639ae9bSBen Gras #include <stdio.h> 52*2639ae9bSBen Gras #include <string.h> 53*2639ae9bSBen Gras 54*2639ae9bSBen Gras #include <db.h> 55*2639ae9bSBen Gras #include "btree.h" 56*2639ae9bSBen Gras 57*2639ae9bSBen Gras static int __bt_bdelete(BTREE *, const DBT *); 58*2639ae9bSBen Gras static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int); 59*2639ae9bSBen Gras static int __bt_pdelete(BTREE *, PAGE *); 60*2639ae9bSBen Gras static int __bt_relink(BTREE *, PAGE *); 61*2639ae9bSBen Gras static int __bt_stkacq(BTREE *, PAGE **, CURSOR *); 62*2639ae9bSBen Gras 63*2639ae9bSBen Gras /* 64*2639ae9bSBen Gras * __bt_delete 65*2639ae9bSBen Gras * Delete the item(s) referenced by a key. 66*2639ae9bSBen Gras * 67*2639ae9bSBen Gras * Return RET_SPECIAL if the key is not found. 68*2639ae9bSBen Gras */ 69*2639ae9bSBen Gras int 70*2639ae9bSBen Gras __bt_delete(const DB *dbp, const DBT *key, u_int flags) 71*2639ae9bSBen Gras { 72*2639ae9bSBen Gras BTREE *t; 73*2639ae9bSBen Gras CURSOR *c; 74*2639ae9bSBen Gras PAGE *h; 75*2639ae9bSBen Gras int status; 76*2639ae9bSBen Gras 77*2639ae9bSBen Gras t = dbp->internal; 78*2639ae9bSBen Gras 79*2639ae9bSBen Gras /* Toss any page pinned across calls. */ 80*2639ae9bSBen Gras if (t->bt_pinned != NULL) { 81*2639ae9bSBen Gras mpool_put(t->bt_mp, t->bt_pinned, 0); 82*2639ae9bSBen Gras t->bt_pinned = NULL; 83*2639ae9bSBen Gras } 84*2639ae9bSBen Gras 85*2639ae9bSBen Gras /* Check for change to a read-only tree. */ 86*2639ae9bSBen Gras if (F_ISSET(t, B_RDONLY)) { 87*2639ae9bSBen Gras errno = EPERM; 88*2639ae9bSBen Gras return (RET_ERROR); 89*2639ae9bSBen Gras } 90*2639ae9bSBen Gras 91*2639ae9bSBen Gras switch (flags) { 92*2639ae9bSBen Gras case 0: 93*2639ae9bSBen Gras status = __bt_bdelete(t, key); 94*2639ae9bSBen Gras break; 95*2639ae9bSBen Gras case R_CURSOR: 96*2639ae9bSBen Gras /* 97*2639ae9bSBen Gras * If flags is R_CURSOR, delete the cursor. Must already 98*2639ae9bSBen Gras * have started a scan and not have already deleted it. 99*2639ae9bSBen Gras */ 100*2639ae9bSBen Gras c = &t->bt_cursor; 101*2639ae9bSBen Gras if (F_ISSET(c, CURS_INIT)) { 102*2639ae9bSBen Gras if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) 103*2639ae9bSBen Gras return (RET_SPECIAL); 104*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 105*2639ae9bSBen Gras return (RET_ERROR); 106*2639ae9bSBen Gras 107*2639ae9bSBen Gras /* 108*2639ae9bSBen Gras * If the page is about to be emptied, we'll need to 109*2639ae9bSBen Gras * delete it, which means we have to acquire a stack. 110*2639ae9bSBen Gras */ 111*2639ae9bSBen Gras if (NEXTINDEX(h) == 1) 112*2639ae9bSBen Gras if (__bt_stkacq(t, &h, &t->bt_cursor)) 113*2639ae9bSBen Gras return (RET_ERROR); 114*2639ae9bSBen Gras 115*2639ae9bSBen Gras status = __bt_dleaf(t, NULL, h, (u_int)c->pg.index); 116*2639ae9bSBen Gras 117*2639ae9bSBen Gras if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { 118*2639ae9bSBen Gras if (__bt_pdelete(t, h)) 119*2639ae9bSBen Gras return (RET_ERROR); 120*2639ae9bSBen Gras } else 121*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 122*2639ae9bSBen Gras (u_int)(status == RET_SUCCESS ? 123*2639ae9bSBen Gras MPOOL_DIRTY : 0)); 124*2639ae9bSBen Gras break; 125*2639ae9bSBen Gras } 126*2639ae9bSBen Gras /* FALLTHROUGH */ 127*2639ae9bSBen Gras default: 128*2639ae9bSBen Gras errno = EINVAL; 129*2639ae9bSBen Gras return (RET_ERROR); 130*2639ae9bSBen Gras } 131*2639ae9bSBen Gras if (status == RET_SUCCESS) 132*2639ae9bSBen Gras F_SET(t, B_MODIFIED); 133*2639ae9bSBen Gras return (status); 134*2639ae9bSBen Gras } 135*2639ae9bSBen Gras 136*2639ae9bSBen Gras /* 137*2639ae9bSBen Gras * __bt_stkacq -- 138*2639ae9bSBen Gras * Acquire a stack so we can delete a cursor entry. 139*2639ae9bSBen Gras * 140*2639ae9bSBen Gras * Parameters: 141*2639ae9bSBen Gras * t: tree 142*2639ae9bSBen Gras * hp: pointer to current, pinned PAGE pointer 143*2639ae9bSBen Gras * c: pointer to the cursor 144*2639ae9bSBen Gras * 145*2639ae9bSBen Gras * Returns: 146*2639ae9bSBen Gras * 0 on success, 1 on failure 147*2639ae9bSBen Gras */ 148*2639ae9bSBen Gras static int 149*2639ae9bSBen Gras __bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c) 150*2639ae9bSBen Gras { 151*2639ae9bSBen Gras BINTERNAL *bi; 152*2639ae9bSBen Gras EPG *e; 153*2639ae9bSBen Gras EPGNO *parent; 154*2639ae9bSBen Gras PAGE *h; 155*2639ae9bSBen Gras indx_t idx = 0; /* Pacify gcc */ 156*2639ae9bSBen Gras pgno_t pgno; 157*2639ae9bSBen Gras recno_t nextpg, prevpg; 158*2639ae9bSBen Gras int exact, level; 159*2639ae9bSBen Gras 160*2639ae9bSBen Gras /* 161*2639ae9bSBen Gras * Find the first occurrence of the key in the tree. Toss the 162*2639ae9bSBen Gras * currently locked page so we don't hit an already-locked page. 163*2639ae9bSBen Gras */ 164*2639ae9bSBen Gras h = *hp; 165*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 166*2639ae9bSBen Gras if ((e = __bt_search(t, &c->key, &exact)) == NULL) 167*2639ae9bSBen Gras return (1); 168*2639ae9bSBen Gras h = e->page; 169*2639ae9bSBen Gras 170*2639ae9bSBen Gras /* See if we got it in one shot. */ 171*2639ae9bSBen Gras if (h->pgno == c->pg.pgno) 172*2639ae9bSBen Gras goto ret; 173*2639ae9bSBen Gras 174*2639ae9bSBen Gras /* 175*2639ae9bSBen Gras * Move right, looking for the page. At each move we have to move 176*2639ae9bSBen Gras * up the stack until we don't have to move to the next page. If 177*2639ae9bSBen Gras * we have to change pages at an internal level, we have to fix the 178*2639ae9bSBen Gras * stack back up. 179*2639ae9bSBen Gras */ 180*2639ae9bSBen Gras while (h->pgno != c->pg.pgno) { 181*2639ae9bSBen Gras if ((nextpg = h->nextpg) == P_INVALID) 182*2639ae9bSBen Gras break; 183*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 184*2639ae9bSBen Gras 185*2639ae9bSBen Gras /* Move up the stack. */ 186*2639ae9bSBen Gras for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { 187*2639ae9bSBen Gras /* Get the parent page. */ 188*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 189*2639ae9bSBen Gras return (1); 190*2639ae9bSBen Gras 191*2639ae9bSBen Gras /* Move to the next index. */ 192*2639ae9bSBen Gras if (parent->index != NEXTINDEX(h) - 1) { 193*2639ae9bSBen Gras idx = parent->index + 1; 194*2639ae9bSBen Gras BT_PUSH(t, h->pgno, idx); 195*2639ae9bSBen Gras break; 196*2639ae9bSBen Gras } 197*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 198*2639ae9bSBen Gras } 199*2639ae9bSBen Gras 200*2639ae9bSBen Gras /* Restore the stack. */ 201*2639ae9bSBen Gras while (level--) { 202*2639ae9bSBen Gras /* Push the next level down onto the stack. */ 203*2639ae9bSBen Gras bi = GETBINTERNAL(h, idx); 204*2639ae9bSBen Gras pgno = bi->pgno; 205*2639ae9bSBen Gras BT_PUSH(t, pgno, 0); 206*2639ae9bSBen Gras 207*2639ae9bSBen Gras /* Lose the currently pinned page. */ 208*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 209*2639ae9bSBen Gras 210*2639ae9bSBen Gras /* Get the next level down. */ 211*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) 212*2639ae9bSBen Gras return (1); 213*2639ae9bSBen Gras idx = 0; 214*2639ae9bSBen Gras } 215*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 216*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) 217*2639ae9bSBen Gras return (1); 218*2639ae9bSBen Gras } 219*2639ae9bSBen Gras 220*2639ae9bSBen Gras if (h->pgno == c->pg.pgno) 221*2639ae9bSBen Gras goto ret; 222*2639ae9bSBen Gras 223*2639ae9bSBen Gras /* Reacquire the original stack. */ 224*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 225*2639ae9bSBen Gras if ((e = __bt_search(t, &c->key, &exact)) == NULL) 226*2639ae9bSBen Gras return (1); 227*2639ae9bSBen Gras h = e->page; 228*2639ae9bSBen Gras 229*2639ae9bSBen Gras /* 230*2639ae9bSBen Gras * Move left, looking for the page. At each move we have to move 231*2639ae9bSBen Gras * up the stack until we don't have to change pages to move to the 232*2639ae9bSBen Gras * next page. If we have to change pages at an internal level, we 233*2639ae9bSBen Gras * have to fix the stack back up. 234*2639ae9bSBen Gras */ 235*2639ae9bSBen Gras while (h->pgno != c->pg.pgno) { 236*2639ae9bSBen Gras if ((prevpg = h->prevpg) == P_INVALID) 237*2639ae9bSBen Gras break; 238*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 239*2639ae9bSBen Gras 240*2639ae9bSBen Gras /* Move up the stack. */ 241*2639ae9bSBen Gras for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { 242*2639ae9bSBen Gras /* Get the parent page. */ 243*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 244*2639ae9bSBen Gras return (1); 245*2639ae9bSBen Gras 246*2639ae9bSBen Gras /* Move to the next index. */ 247*2639ae9bSBen Gras if (parent->index != 0) { 248*2639ae9bSBen Gras idx = parent->index - 1; 249*2639ae9bSBen Gras BT_PUSH(t, h->pgno, idx); 250*2639ae9bSBen Gras break; 251*2639ae9bSBen Gras } 252*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 253*2639ae9bSBen Gras } 254*2639ae9bSBen Gras 255*2639ae9bSBen Gras /* Restore the stack. */ 256*2639ae9bSBen Gras while (level--) { 257*2639ae9bSBen Gras /* Push the next level down onto the stack. */ 258*2639ae9bSBen Gras bi = GETBINTERNAL(h, idx); 259*2639ae9bSBen Gras pgno = bi->pgno; 260*2639ae9bSBen Gras 261*2639ae9bSBen Gras /* Lose the currently pinned page. */ 262*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 263*2639ae9bSBen Gras 264*2639ae9bSBen Gras /* Get the next level down. */ 265*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) 266*2639ae9bSBen Gras return (1); 267*2639ae9bSBen Gras 268*2639ae9bSBen Gras idx = NEXTINDEX(h) - 1; 269*2639ae9bSBen Gras BT_PUSH(t, pgno, idx); 270*2639ae9bSBen Gras } 271*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 272*2639ae9bSBen Gras if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) 273*2639ae9bSBen Gras return (1); 274*2639ae9bSBen Gras } 275*2639ae9bSBen Gras 276*2639ae9bSBen Gras 277*2639ae9bSBen Gras ret: mpool_put(t->bt_mp, h, 0); 278*2639ae9bSBen Gras return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); 279*2639ae9bSBen Gras } 280*2639ae9bSBen Gras 281*2639ae9bSBen Gras /* 282*2639ae9bSBen Gras * __bt_bdelete -- 283*2639ae9bSBen Gras * Delete all key/data pairs matching the specified key. 284*2639ae9bSBen Gras * 285*2639ae9bSBen Gras * Parameters: 286*2639ae9bSBen Gras * t: tree 287*2639ae9bSBen Gras * key: key to delete 288*2639ae9bSBen Gras * 289*2639ae9bSBen Gras * Returns: 290*2639ae9bSBen Gras * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 291*2639ae9bSBen Gras */ 292*2639ae9bSBen Gras static int 293*2639ae9bSBen Gras __bt_bdelete(BTREE *t, const DBT *key) 294*2639ae9bSBen Gras { 295*2639ae9bSBen Gras EPG *e; 296*2639ae9bSBen Gras PAGE *h; 297*2639ae9bSBen Gras int deleted, exact, redo; 298*2639ae9bSBen Gras 299*2639ae9bSBen Gras deleted = 0; 300*2639ae9bSBen Gras 301*2639ae9bSBen Gras /* Find any matching record; __bt_search pins the page. */ 302*2639ae9bSBen Gras loop: if ((e = __bt_search(t, key, &exact)) == NULL) 303*2639ae9bSBen Gras return (deleted ? RET_SUCCESS : RET_ERROR); 304*2639ae9bSBen Gras if (!exact) { 305*2639ae9bSBen Gras mpool_put(t->bt_mp, e->page, 0); 306*2639ae9bSBen Gras return (deleted ? RET_SUCCESS : RET_SPECIAL); 307*2639ae9bSBen Gras } 308*2639ae9bSBen Gras 309*2639ae9bSBen Gras /* 310*2639ae9bSBen Gras * Delete forward, then delete backward, from the found key. If 311*2639ae9bSBen Gras * there are duplicates and we reach either side of the page, do 312*2639ae9bSBen Gras * the key search again, so that we get them all. 313*2639ae9bSBen Gras */ 314*2639ae9bSBen Gras redo = 0; 315*2639ae9bSBen Gras h = e->page; 316*2639ae9bSBen Gras do { 317*2639ae9bSBen Gras if (__bt_dleaf(t, key, h, (u_int)e->index)) { 318*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 319*2639ae9bSBen Gras return (RET_ERROR); 320*2639ae9bSBen Gras } 321*2639ae9bSBen Gras if (F_ISSET(t, B_NODUPS)) { 322*2639ae9bSBen Gras if (NEXTINDEX(h) == 0) { 323*2639ae9bSBen Gras if (__bt_pdelete(t, h)) 324*2639ae9bSBen Gras return (RET_ERROR); 325*2639ae9bSBen Gras } else 326*2639ae9bSBen Gras mpool_put(t->bt_mp, h, MPOOL_DIRTY); 327*2639ae9bSBen Gras return (RET_SUCCESS); 328*2639ae9bSBen Gras } 329*2639ae9bSBen Gras deleted = 1; 330*2639ae9bSBen Gras } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 331*2639ae9bSBen Gras 332*2639ae9bSBen Gras /* Check for right-hand edge of the page. */ 333*2639ae9bSBen Gras if (e->index == NEXTINDEX(h)) 334*2639ae9bSBen Gras redo = 1; 335*2639ae9bSBen Gras 336*2639ae9bSBen Gras /* Delete from the key to the beginning of the page. */ 337*2639ae9bSBen Gras while (e->index-- > 0) { 338*2639ae9bSBen Gras if (__bt_cmp(t, key, e) != 0) 339*2639ae9bSBen Gras break; 340*2639ae9bSBen Gras if (__bt_dleaf(t, key, h, (u_int)e->index) == RET_ERROR) { 341*2639ae9bSBen Gras mpool_put(t->bt_mp, h, 0); 342*2639ae9bSBen Gras return (RET_ERROR); 343*2639ae9bSBen Gras } 344*2639ae9bSBen Gras if (e->index == 0) 345*2639ae9bSBen Gras redo = 1; 346*2639ae9bSBen Gras } 347*2639ae9bSBen Gras 348*2639ae9bSBen Gras /* Check for an empty page. */ 349*2639ae9bSBen Gras if (NEXTINDEX(h) == 0) { 350*2639ae9bSBen Gras if (__bt_pdelete(t, h)) 351*2639ae9bSBen Gras return (RET_ERROR); 352*2639ae9bSBen Gras goto loop; 353*2639ae9bSBen Gras } 354*2639ae9bSBen Gras 355*2639ae9bSBen Gras /* Put the page. */ 356*2639ae9bSBen Gras mpool_put(t->bt_mp, h, MPOOL_DIRTY); 357*2639ae9bSBen Gras 358*2639ae9bSBen Gras if (redo) 359*2639ae9bSBen Gras goto loop; 360*2639ae9bSBen Gras return (RET_SUCCESS); 361*2639ae9bSBen Gras } 362*2639ae9bSBen Gras 363*2639ae9bSBen Gras /* 364*2639ae9bSBen Gras * __bt_pdelete -- 365*2639ae9bSBen Gras * Delete a single page from the tree. 366*2639ae9bSBen Gras * 367*2639ae9bSBen Gras * Parameters: 368*2639ae9bSBen Gras * t: tree 369*2639ae9bSBen Gras * h: leaf page 370*2639ae9bSBen Gras * 371*2639ae9bSBen Gras * Returns: 372*2639ae9bSBen Gras * RET_SUCCESS, RET_ERROR. 373*2639ae9bSBen Gras * 374*2639ae9bSBen Gras * Side-effects: 375*2639ae9bSBen Gras * mpool_put's the page 376*2639ae9bSBen Gras */ 377*2639ae9bSBen Gras static int 378*2639ae9bSBen Gras __bt_pdelete(BTREE *t, PAGE *h) 379*2639ae9bSBen Gras { 380*2639ae9bSBen Gras BINTERNAL *bi; 381*2639ae9bSBen Gras PAGE *pg; 382*2639ae9bSBen Gras EPGNO *parent; 383*2639ae9bSBen Gras indx_t cnt, idx, *ip, offset; 384*2639ae9bSBen Gras uint32_t nksize; 385*2639ae9bSBen Gras char *from; 386*2639ae9bSBen Gras 387*2639ae9bSBen Gras /* 388*2639ae9bSBen Gras * Walk the parent page stack -- a LIFO stack of the pages that were 389*2639ae9bSBen Gras * traversed when we searched for the page where the delete occurred. 390*2639ae9bSBen Gras * Each stack entry is a page number and a page index offset. The 391*2639ae9bSBen Gras * offset is for the page traversed on the search. We've just deleted 392*2639ae9bSBen Gras * a page, so we have to delete the key from the parent page. 393*2639ae9bSBen Gras * 394*2639ae9bSBen Gras * If the delete from the parent page makes it empty, this process may 395*2639ae9bSBen Gras * continue all the way up the tree. We stop if we reach the root page 396*2639ae9bSBen Gras * (which is never deleted, it's just not worth the effort) or if the 397*2639ae9bSBen Gras * delete does not empty the page. 398*2639ae9bSBen Gras */ 399*2639ae9bSBen Gras while ((parent = BT_POP(t)) != NULL) { 400*2639ae9bSBen Gras /* Get the parent page. */ 401*2639ae9bSBen Gras if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) 402*2639ae9bSBen Gras return (RET_ERROR); 403*2639ae9bSBen Gras 404*2639ae9bSBen Gras idx = parent->index; 405*2639ae9bSBen Gras bi = GETBINTERNAL(pg, idx); 406*2639ae9bSBen Gras 407*2639ae9bSBen Gras /* Free any overflow pages. */ 408*2639ae9bSBen Gras if (bi->flags & P_BIGKEY && 409*2639ae9bSBen Gras __ovfl_delete(t, bi->bytes) == RET_ERROR) { 410*2639ae9bSBen Gras mpool_put(t->bt_mp, pg, 0); 411*2639ae9bSBen Gras return (RET_ERROR); 412*2639ae9bSBen Gras } 413*2639ae9bSBen Gras 414*2639ae9bSBen Gras /* 415*2639ae9bSBen Gras * Free the parent if it has only the one key and it's not the 416*2639ae9bSBen Gras * root page. If it's the rootpage, turn it back into an empty 417*2639ae9bSBen Gras * leaf page. 418*2639ae9bSBen Gras */ 419*2639ae9bSBen Gras if (NEXTINDEX(pg) == 1) { 420*2639ae9bSBen Gras if (pg->pgno == P_ROOT) { 421*2639ae9bSBen Gras pg->lower = BTDATAOFF; 422*2639ae9bSBen Gras pg->upper = t->bt_psize; 423*2639ae9bSBen Gras pg->flags = P_BLEAF; 424*2639ae9bSBen Gras } else { 425*2639ae9bSBen Gras if (__bt_relink(t, pg) || __bt_free(t, pg)) 426*2639ae9bSBen Gras return (RET_ERROR); 427*2639ae9bSBen Gras continue; 428*2639ae9bSBen Gras } 429*2639ae9bSBen Gras } else { 430*2639ae9bSBen Gras /* Pack remaining key items at the end of the page. */ 431*2639ae9bSBen Gras nksize = NBINTERNAL(bi->ksize); 432*2639ae9bSBen Gras from = (char *)(void *)pg + pg->upper; 433*2639ae9bSBen Gras memmove(from + nksize, from, 434*2639ae9bSBen Gras (size_t)((char *)(void *)bi - from)); 435*2639ae9bSBen Gras pg->upper += nksize; 436*2639ae9bSBen Gras 437*2639ae9bSBen Gras /* Adjust indices' offsets, shift the indices down. */ 438*2639ae9bSBen Gras offset = pg->linp[idx]; 439*2639ae9bSBen Gras for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip) 440*2639ae9bSBen Gras if (ip[0] < offset) 441*2639ae9bSBen Gras ip[0] += nksize; 442*2639ae9bSBen Gras for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip) 443*2639ae9bSBen Gras ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; 444*2639ae9bSBen Gras pg->lower -= sizeof(indx_t); 445*2639ae9bSBen Gras } 446*2639ae9bSBen Gras 447*2639ae9bSBen Gras mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 448*2639ae9bSBen Gras break; 449*2639ae9bSBen Gras } 450*2639ae9bSBen Gras 451*2639ae9bSBen Gras /* Free the leaf page, as long as it wasn't the root. */ 452*2639ae9bSBen Gras if (h->pgno == P_ROOT) { 453*2639ae9bSBen Gras mpool_put(t->bt_mp, h, MPOOL_DIRTY); 454*2639ae9bSBen Gras return (RET_SUCCESS); 455*2639ae9bSBen Gras } 456*2639ae9bSBen Gras return (__bt_relink(t, h) || __bt_free(t, h)); 457*2639ae9bSBen Gras } 458*2639ae9bSBen Gras 459*2639ae9bSBen Gras /* 460*2639ae9bSBen Gras * __bt_dleaf -- 461*2639ae9bSBen Gras * Delete a single record from a leaf page. 462*2639ae9bSBen Gras * 463*2639ae9bSBen Gras * Parameters: 464*2639ae9bSBen Gras * t: tree 465*2639ae9bSBen Gras * key: referenced key 466*2639ae9bSBen Gras * h: page 467*2639ae9bSBen Gras * idx: index on page to delete 468*2639ae9bSBen Gras * 469*2639ae9bSBen Gras * Returns: 470*2639ae9bSBen Gras * RET_SUCCESS, RET_ERROR. 471*2639ae9bSBen Gras */ 472*2639ae9bSBen Gras int 473*2639ae9bSBen Gras __bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx) 474*2639ae9bSBen Gras { 475*2639ae9bSBen Gras BLEAF *bl; 476*2639ae9bSBen Gras indx_t cnt, *ip, offset; 477*2639ae9bSBen Gras uint32_t nbytes; 478*2639ae9bSBen Gras void *to; 479*2639ae9bSBen Gras char *from; 480*2639ae9bSBen Gras 481*2639ae9bSBen Gras /* If this record is referenced by the cursor, delete the cursor. */ 482*2639ae9bSBen Gras if (F_ISSET(&t->bt_cursor, CURS_INIT) && 483*2639ae9bSBen Gras !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && 484*2639ae9bSBen Gras t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx && 485*2639ae9bSBen Gras __bt_curdel(t, key, h, idx)) 486*2639ae9bSBen Gras return (RET_ERROR); 487*2639ae9bSBen Gras 488*2639ae9bSBen Gras /* If the entry uses overflow pages, make them available for reuse. */ 489*2639ae9bSBen Gras to = bl = GETBLEAF(h, idx); 490*2639ae9bSBen Gras if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 491*2639ae9bSBen Gras return (RET_ERROR); 492*2639ae9bSBen Gras if (bl->flags & P_BIGDATA && 493*2639ae9bSBen Gras __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 494*2639ae9bSBen Gras return (RET_ERROR); 495*2639ae9bSBen Gras 496*2639ae9bSBen Gras /* Pack the remaining key/data items at the end of the page. */ 497*2639ae9bSBen Gras nbytes = NBLEAF(bl); 498*2639ae9bSBen Gras from = (char *)(void *)h + h->upper; 499*2639ae9bSBen Gras memmove(from + nbytes, from, (size_t)((char *)(void *)to - from)); 500*2639ae9bSBen Gras h->upper += nbytes; 501*2639ae9bSBen Gras 502*2639ae9bSBen Gras /* Adjust the indices' offsets, shift the indices down. */ 503*2639ae9bSBen Gras offset = h->linp[idx]; 504*2639ae9bSBen Gras for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip) 505*2639ae9bSBen Gras if (ip[0] < offset) 506*2639ae9bSBen Gras ip[0] += nbytes; 507*2639ae9bSBen Gras for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip) 508*2639ae9bSBen Gras ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 509*2639ae9bSBen Gras h->lower -= sizeof(indx_t); 510*2639ae9bSBen Gras 511*2639ae9bSBen Gras /* If the cursor is on this page, adjust it as necessary. */ 512*2639ae9bSBen Gras if (F_ISSET(&t->bt_cursor, CURS_INIT) && 513*2639ae9bSBen Gras !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && 514*2639ae9bSBen Gras t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx) 515*2639ae9bSBen Gras --t->bt_cursor.pg.index; 516*2639ae9bSBen Gras 517*2639ae9bSBen Gras return (RET_SUCCESS); 518*2639ae9bSBen Gras } 519*2639ae9bSBen Gras 520*2639ae9bSBen Gras /* 521*2639ae9bSBen Gras * __bt_curdel -- 522*2639ae9bSBen Gras * Delete the cursor. 523*2639ae9bSBen Gras * 524*2639ae9bSBen Gras * Parameters: 525*2639ae9bSBen Gras * t: tree 526*2639ae9bSBen Gras * key: referenced key (or NULL) 527*2639ae9bSBen Gras * h: page 528*2639ae9bSBen Gras * idx: index on page to delete 529*2639ae9bSBen Gras * 530*2639ae9bSBen Gras * Returns: 531*2639ae9bSBen Gras * RET_SUCCESS, RET_ERROR. 532*2639ae9bSBen Gras */ 533*2639ae9bSBen Gras static int 534*2639ae9bSBen Gras __bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx) 535*2639ae9bSBen Gras { 536*2639ae9bSBen Gras CURSOR *c; 537*2639ae9bSBen Gras EPG e; 538*2639ae9bSBen Gras PAGE *pg; 539*2639ae9bSBen Gras int curcopy, status; 540*2639ae9bSBen Gras 541*2639ae9bSBen Gras /* 542*2639ae9bSBen Gras * If there are duplicates, move forward or backward to one. 543*2639ae9bSBen Gras * Otherwise, copy the key into the cursor area. 544*2639ae9bSBen Gras */ 545*2639ae9bSBen Gras c = &t->bt_cursor; 546*2639ae9bSBen Gras F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); 547*2639ae9bSBen Gras 548*2639ae9bSBen Gras curcopy = 0; 549*2639ae9bSBen Gras if (!F_ISSET(t, B_NODUPS)) { 550*2639ae9bSBen Gras /* 551*2639ae9bSBen Gras * We're going to have to do comparisons. If we weren't 552*2639ae9bSBen Gras * provided a copy of the key, i.e. the user is deleting 553*2639ae9bSBen Gras * the current cursor position, get one. 554*2639ae9bSBen Gras */ 555*2639ae9bSBen Gras if (key == NULL) { 556*2639ae9bSBen Gras e.page = h; 557*2639ae9bSBen Gras e.index = idx; 558*2639ae9bSBen Gras if ((status = __bt_ret(t, &e, 559*2639ae9bSBen Gras &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) 560*2639ae9bSBen Gras return (status); 561*2639ae9bSBen Gras curcopy = 1; 562*2639ae9bSBen Gras key = &c->key; 563*2639ae9bSBen Gras } 564*2639ae9bSBen Gras /* Check previous key, if not at the beginning of the page. */ 565*2639ae9bSBen Gras if (idx > 0) { 566*2639ae9bSBen Gras e.page = h; 567*2639ae9bSBen Gras e.index = idx - 1; 568*2639ae9bSBen Gras if (__bt_cmp(t, key, &e) == 0) { 569*2639ae9bSBen Gras F_SET(c, CURS_BEFORE); 570*2639ae9bSBen Gras goto dup2; 571*2639ae9bSBen Gras } 572*2639ae9bSBen Gras } 573*2639ae9bSBen Gras /* Check next key, if not at the end of the page. */ 574*2639ae9bSBen Gras if (idx < (unsigned)(NEXTINDEX(h) - 1)) { 575*2639ae9bSBen Gras e.page = h; 576*2639ae9bSBen Gras e.index = idx + 1; 577*2639ae9bSBen Gras if (__bt_cmp(t, key, &e) == 0) { 578*2639ae9bSBen Gras F_SET(c, CURS_AFTER); 579*2639ae9bSBen Gras goto dup2; 580*2639ae9bSBen Gras } 581*2639ae9bSBen Gras } 582*2639ae9bSBen Gras /* Check previous key if at the beginning of the page. */ 583*2639ae9bSBen Gras if (idx == 0 && h->prevpg != P_INVALID) { 584*2639ae9bSBen Gras if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) 585*2639ae9bSBen Gras return (RET_ERROR); 586*2639ae9bSBen Gras e.page = pg; 587*2639ae9bSBen Gras e.index = NEXTINDEX(pg) - 1; 588*2639ae9bSBen Gras if (__bt_cmp(t, key, &e) == 0) { 589*2639ae9bSBen Gras F_SET(c, CURS_BEFORE); 590*2639ae9bSBen Gras goto dup1; 591*2639ae9bSBen Gras } 592*2639ae9bSBen Gras mpool_put(t->bt_mp, pg, 0); 593*2639ae9bSBen Gras } 594*2639ae9bSBen Gras /* Check next key if at the end of the page. */ 595*2639ae9bSBen Gras if (idx == (unsigned)(NEXTINDEX(h) - 1) && h->nextpg != P_INVALID) { 596*2639ae9bSBen Gras if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) 597*2639ae9bSBen Gras return (RET_ERROR); 598*2639ae9bSBen Gras e.page = pg; 599*2639ae9bSBen Gras e.index = 0; 600*2639ae9bSBen Gras if (__bt_cmp(t, key, &e) == 0) { 601*2639ae9bSBen Gras F_SET(c, CURS_AFTER); 602*2639ae9bSBen Gras dup1: mpool_put(t->bt_mp, pg, 0); 603*2639ae9bSBen Gras dup2: c->pg.pgno = e.page->pgno; 604*2639ae9bSBen Gras c->pg.index = e.index; 605*2639ae9bSBen Gras return (RET_SUCCESS); 606*2639ae9bSBen Gras } 607*2639ae9bSBen Gras mpool_put(t->bt_mp, pg, 0); 608*2639ae9bSBen Gras } 609*2639ae9bSBen Gras } 610*2639ae9bSBen Gras e.page = h; 611*2639ae9bSBen Gras e.index = idx; 612*2639ae9bSBen Gras if (curcopy || (status = 613*2639ae9bSBen Gras __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { 614*2639ae9bSBen Gras F_SET(c, CURS_ACQUIRE); 615*2639ae9bSBen Gras return (RET_SUCCESS); 616*2639ae9bSBen Gras } 617*2639ae9bSBen Gras return (status); 618*2639ae9bSBen Gras } 619*2639ae9bSBen Gras 620*2639ae9bSBen Gras /* 621*2639ae9bSBen Gras * __bt_relink -- 622*2639ae9bSBen Gras * Link around a deleted page. 623*2639ae9bSBen Gras * 624*2639ae9bSBen Gras * Parameters: 625*2639ae9bSBen Gras * t: tree 626*2639ae9bSBen Gras * h: page to be deleted 627*2639ae9bSBen Gras */ 628*2639ae9bSBen Gras static int 629*2639ae9bSBen Gras __bt_relink(BTREE *t, PAGE *h) 630*2639ae9bSBen Gras { 631*2639ae9bSBen Gras PAGE *pg; 632*2639ae9bSBen Gras 633*2639ae9bSBen Gras if (h->nextpg != P_INVALID) { 634*2639ae9bSBen Gras if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) 635*2639ae9bSBen Gras return (RET_ERROR); 636*2639ae9bSBen Gras pg->prevpg = h->prevpg; 637*2639ae9bSBen Gras mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 638*2639ae9bSBen Gras } 639*2639ae9bSBen Gras if (h->prevpg != P_INVALID) { 640*2639ae9bSBen Gras if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) 641*2639ae9bSBen Gras return (RET_ERROR); 642*2639ae9bSBen Gras pg->nextpg = h->nextpg; 643*2639ae9bSBen Gras mpool_put(t->bt_mp, pg, MPOOL_DIRTY); 644*2639ae9bSBen Gras } 645*2639ae9bSBen Gras return (0); 646*2639ae9bSBen Gras } 647