146130Smao /*- 246130Smao * Copyright (c) 1990 The Regents of the University of California. 346130Smao * All rights reserved. 446130Smao * 546130Smao * This code is derived from software contributed to Berkeley by 646130Smao * Mike Olson. 746130Smao * 846130Smao * %sccs.include.redist.c% 946130Smao */ 1046130Smao 1146130Smao #if defined(LIBC_SCCS) && !defined(lint) 12*50989Sbostic static char sccsid[] = "@(#)bt_delete.c 5.3 (Berkeley) 09/04/91"; 1346130Smao #endif /* LIBC_SCCS and not lint */ 1446130Smao 1546130Smao #include <sys/types.h> 16*50989Sbostic #include <errno.h> 1746130Smao #include <db.h> 18*50989Sbostic #include <stdio.h> 1946561Sbostic #include <string.h> 2046130Smao #include "btree.h" 2146130Smao 22*50989Sbostic static int bt_bdelete __P((BTREE *, const DBT *)); 23*50989Sbostic 2446130Smao /* 25*50989Sbostic * __BT_DELETE -- Delete the item(s) referenced by a key. 2646130Smao * 27*50989Sbostic * Parameters: 28*50989Sbostic * dbp: pointer to access method 29*50989Sbostic * key: key to delete 30*50989Sbostic * flags: R_CURSOR if deleting what the cursor references 3146130Smao * 32*50989Sbostic * Returns: 33*50989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 3446130Smao */ 3546130Smao int 36*50989Sbostic __bt_delete(dbp, key, flags) 37*50989Sbostic const DB *dbp; 38*50989Sbostic const DBT *key; 39*50989Sbostic u_int flags; 4046130Smao { 41*50989Sbostic BTREE *t; 42*50989Sbostic int status; 4346130Smao 44*50989Sbostic t = dbp->internal; 45*50989Sbostic if (ISSET(t, BTF_RDONLY)) { 46*50989Sbostic errno = EPERM; 4746130Smao return (RET_ERROR); 4846130Smao } 49*50989Sbostic switch(flags) { 50*50989Sbostic case 0: 51*50989Sbostic status = bt_bdelete(t, key); 52*50989Sbostic break; 53*50989Sbostic case R_CURSOR: 54*50989Sbostic /* 55*50989Sbostic * If flags is R_CURSOR, delete the cursor; must already have 56*50989Sbostic * started a scan and not have already deleted the record. For 57*50989Sbostic * the delete cursor bit to have been set requires that the 58*50989Sbostic * scan be initialized, so no reason to check. 59*50989Sbostic */ 60*50989Sbostic status = ISSET(t, BTF_DELCRSR) ? 61*50989Sbostic RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 62*50989Sbostic break; 63*50989Sbostic default: 6446130Smao errno = EINVAL; 6546130Smao return (RET_ERROR); 6646130Smao } 67*50989Sbostic if (status == RET_SUCCESS) 68*50989Sbostic SET(t, BTF_MODIFIED); 69*50989Sbostic return (status); 7046130Smao } 7146130Smao 7246130Smao /* 73*50989Sbostic * BT_BDELETE -- Delete all key/data pairs matching the specified key. 7446130Smao * 75*50989Sbostic * Parameters: 76*50989Sbostic * tree: tree 77*50989Sbostic * key: key to delete 7846130Smao * 79*50989Sbostic * Returns: 80*50989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 8146130Smao */ 82*50989Sbostic static int 83*50989Sbostic bt_bdelete(t, key) 84*50989Sbostic BTREE *t; 85*50989Sbostic const DBT *key; 8646130Smao { 87*50989Sbostic EPG *e, save; 88*50989Sbostic PAGE *h; 89*50989Sbostic pgno_t cpgno, pg; 90*50989Sbostic index_t cindex; 91*50989Sbostic int deleted, exact; 9246130Smao 93*50989Sbostic /* Find any matching record; __bt_search pins the page. */ 94*50989Sbostic if ((e = __bt_search(t, key, &exact)) == NULL) 95*50989Sbostic return (RET_ERROR); 96*50989Sbostic if (!exact) { 97*50989Sbostic mpool_put(t->bt_mp, e->page, 0); 98*50989Sbostic return (RET_SPECIAL); 99*50989Sbostic } 10046130Smao 101*50989Sbostic /* 102*50989Sbostic * Delete forward, then delete backward, from the found key. The 103*50989Sbostic * ordering is so that the deletions don't mess up the page refs. 104*50989Sbostic * The first loop deletes the found key, the second unpins the found 105*50989Sbostic * page. 106*50989Sbostic * 107*50989Sbostic * If find the key referenced by the cursor, don't delete it, just 108*50989Sbostic * flag it for future deletion. The cursor page number is P_INVALID 109*50989Sbostic * unless the sequential scan is initialized, so no reason to check. 110*50989Sbostic * A special case is when the already deleted cursor record was the 111*50989Sbostic * only record found. If so, then the delete opertion fails as no 112*50989Sbostic * records were deleted. 113*50989Sbostic * 114*50989Sbostic * Cycle in place in the current page until the current record doesn't 115*50989Sbostic * match the key or the page is empty. If the latter, walk forward, 116*50989Sbostic * skipping empty pages and repeating until an record doesn't match 117*50989Sbostic * the key or the end of the tree is reached. 118*50989Sbostic */ 119*50989Sbostic cpgno = t->bt_bcursor.pgno; 120*50989Sbostic cindex = t->bt_bcursor.index; 121*50989Sbostic save = *e; 122*50989Sbostic for (h = e->page, deleted = 0;;) { 123*50989Sbostic do { 124*50989Sbostic if (h->pgno == cpgno && e->index == cindex) { 125*50989Sbostic if (NOTSET(t, BTF_DELCRSR)) { 126*50989Sbostic SET(t, BTF_DELCRSR); 127*50989Sbostic deleted = 1; 128*50989Sbostic } 129*50989Sbostic ++e->index; 130*50989Sbostic } else { 131*50989Sbostic if (__bt_dleaf(t, h, e->index)) 132*50989Sbostic goto err; 133*50989Sbostic mpool_put(t->bt_mp, h, MPOOL_DIRTY); 134*50989Sbostic deleted = 1; 135*50989Sbostic } 136*50989Sbostic } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 13746130Smao 138*50989Sbostic /* 139*50989Sbostic * Quit if didn't find a match, no next page, or first key on 140*50989Sbostic * the next page doesn't match. Make a special effort not to 141*50989Sbostic * unpin the page the original match was on, but also make sure 142*50989Sbostic * it's unpinned if an error occurs. 143*50989Sbostic */ 144*50989Sbostic if (e->index < NEXTINDEX(h)) 145*50989Sbostic break; 146*50989Sbostic for (;;) { 147*50989Sbostic if ((pg = h->nextpg) == P_INVALID) 148*50989Sbostic goto done1; 149*50989Sbostic if (h->pgno != save.page->pgno) 150*50989Sbostic mpool_put(t->bt_mp, h, 0); 151*50989Sbostic if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 152*50989Sbostic if (h->pgno == save.page->pgno) 153*50989Sbostic mpool_put(t->bt_mp, save.page, 0); 154*50989Sbostic return (RET_ERROR); 155*50989Sbostic } 156*50989Sbostic if (NEXTINDEX(h) != 0) { 157*50989Sbostic e->page = h; 158*50989Sbostic e->index = 0; 159*50989Sbostic break; 160*50989Sbostic } 161*50989Sbostic } 162*50989Sbostic 163*50989Sbostic if (__bt_cmp(t, key, e) != 0) 164*50989Sbostic break; 16546130Smao } 166*50989Sbostic 167*50989Sbostic /* 168*50989Sbostic * Reach here with the last page that was looked at pinned, and it may 169*50989Sbostic * or may not be the same as the page with the original match. If it's 170*50989Sbostic * not, release it. 171*50989Sbostic */ 172*50989Sbostic done1: if (h->pgno != save.page->pgno) 173*50989Sbostic mpool_put(t->bt_mp, h, 0); 174*50989Sbostic 175*50989Sbostic /* 176*50989Sbostic * Walk backwards from the record previous to the record returned by 177*50989Sbostic * __bt_search, skipping empty pages, until a current record doesn't 178*50989Sbostic * match the key or reach the beginning of the tree. 179*50989Sbostic */ 180*50989Sbostic *e = save; 181*50989Sbostic for (;;) { 182*50989Sbostic if (e->index) 183*50989Sbostic --e->index; 184*50989Sbostic for (h = e->page; e->index; --e->index) { 185*50989Sbostic if (__bt_cmp(t, key, e) != 0) 186*50989Sbostic goto done2; 187*50989Sbostic if (h->pgno == cpgno && e->index == cindex) { 188*50989Sbostic if (NOTSET(t, BTF_DELCRSR)) { 189*50989Sbostic SET(t, BTF_DELCRSR); 190*50989Sbostic deleted = 1; 191*50989Sbostic } 192*50989Sbostic } else { 193*50989Sbostic if (__bt_dleaf(t, h, e->index) == RET_ERROR) 194*50989Sbostic goto err; 195*50989Sbostic mpool_put(t->bt_mp, h, MPOOL_DIRTY); 196*50989Sbostic deleted = 1; 197*50989Sbostic } 198*50989Sbostic } 199*50989Sbostic 200*50989Sbostic if ((pg = h->prevpg) == P_INVALID) 201*50989Sbostic goto done2; 202*50989Sbostic mpool_put(t->bt_mp, h, 0); 203*50989Sbostic if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 20446130Smao return (RET_ERROR); 205*50989Sbostic e->index = NEXTINDEX(h); 20646130Smao } 20746130Smao 208*50989Sbostic /* 209*50989Sbostic * Reach here with the last page that was looked at pinned. Release 210*50989Sbostic * it. 211*50989Sbostic */ 212*50989Sbostic done2: mpool_put(t->bt_mp, h, 0); 213*50989Sbostic return (deleted ? RET_SUCCESS : RET_SPECIAL); 21446130Smao 215*50989Sbostic err: mpool_put(t->bt_mp, h, 0); 216*50989Sbostic return (RET_ERROR); 217*50989Sbostic } 21846130Smao 219*50989Sbostic /* 220*50989Sbostic * __BT_DLEAF -- Delete a single record from a leaf page. 221*50989Sbostic * 222*50989Sbostic * Parameters: 223*50989Sbostic * t: tree 224*50989Sbostic * index: index on current page to delete 225*50989Sbostic * 226*50989Sbostic * Returns: 227*50989Sbostic * RET_SUCCESS, RET_ERROR. 228*50989Sbostic */ 229*50989Sbostic int 230*50989Sbostic __bt_dleaf(t, h, index) 231*50989Sbostic BTREE *t; 232*50989Sbostic PAGE *h; 233*50989Sbostic int index; 234*50989Sbostic { 235*50989Sbostic register BLEAF *bl; 236*50989Sbostic register index_t *ip, offset; 237*50989Sbostic register size_t nbytes; 238*50989Sbostic register int cnt; 239*50989Sbostic char *from; 240*50989Sbostic void *to; 24146130Smao 242*50989Sbostic /* 243*50989Sbostic * Delete a record from a btree leaf page. Internal records are never 244*50989Sbostic * deleted from internal pages, regardless of the records that caused 245*50989Sbostic * them to be added being deleted. Pages made empty by deletion are 246*50989Sbostic * not reclaimed. They are, however, made available for reuse. 247*50989Sbostic * 248*50989Sbostic * Pack the remaining entries at the end of the page, shift the indices 249*50989Sbostic * down, overwriting the deleted record and its index. If the record 250*50989Sbostic * uses overflow pages, make them available for reuse. 251*50989Sbostic */ 252*50989Sbostic to = bl = GETBLEAF(h, index); 253*50989Sbostic if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 254*50989Sbostic return (RET_ERROR); 255*50989Sbostic if (bl->flags & P_BIGDATA && 256*50989Sbostic __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 257*50989Sbostic return (RET_ERROR); 258*50989Sbostic nbytes = NBLEAF(bl); 25946130Smao 260*50989Sbostic /* 261*50989Sbostic * Compress the key/data pairs. Compress and adjust the [BR]LEAF 262*50989Sbostic * offsets. Reset the headers. 263*50989Sbostic */ 264*50989Sbostic from = (char *)h + h->upper; 265*50989Sbostic bcopy(from, from + nbytes, (char *)to - from); 266*50989Sbostic h->upper += nbytes; 26746130Smao 268*50989Sbostic offset = h->linp[index]; 269*50989Sbostic for (cnt = &h->linp[index] - (ip = &h->linp[0]); cnt--; ++ip) 270*50989Sbostic if (ip[0] < offset) 271*50989Sbostic ip[0] += nbytes; 272*50989Sbostic for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip) 273*50989Sbostic ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 274*50989Sbostic h->lower -= sizeof(index_t); 27546130Smao return (RET_SUCCESS); 27646130Smao } 277