146130Smao /*- 246130Smao * Copyright (c) 1990 The Regents of the University of California. 346130Smao * All rights reserved. 446130Smao * 546130Smao * This code is derived from software contributed to Berkeley by 646130Smao * Mike Olson. 746130Smao * 846130Smao * %sccs.include.redist.c% 946130Smao */ 1046130Smao 1146130Smao #if defined(LIBC_SCCS) && !defined(lint) 12*56404Sbostic static char sccsid[] = "@(#)bt_delete.c 5.4 (Berkeley) 10/03/92"; 1346130Smao #endif /* LIBC_SCCS and not lint */ 1446130Smao 1546130Smao #include <sys/types.h> 1650989Sbostic #include <errno.h> 1746130Smao #include <db.h> 1850989Sbostic #include <stdio.h> 1946561Sbostic #include <string.h> 2046130Smao #include "btree.h" 2146130Smao 2250989Sbostic static int bt_bdelete __P((BTREE *, const DBT *)); 2350989Sbostic 2446130Smao /* 2550989Sbostic * __BT_DELETE -- Delete the item(s) referenced by a key. 2646130Smao * 2750989Sbostic * Parameters: 2850989Sbostic * dbp: pointer to access method 2950989Sbostic * key: key to delete 3050989Sbostic * flags: R_CURSOR if deleting what the cursor references 3146130Smao * 3250989Sbostic * Returns: 3350989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 3446130Smao */ 3546130Smao int 3650989Sbostic __bt_delete(dbp, key, flags) 3750989Sbostic const DB *dbp; 3850989Sbostic const DBT *key; 3950989Sbostic u_int flags; 4046130Smao { 4150989Sbostic BTREE *t; 4250989Sbostic int status; 4346130Smao 4450989Sbostic t = dbp->internal; 4550989Sbostic if (ISSET(t, BTF_RDONLY)) { 4650989Sbostic errno = EPERM; 4746130Smao return (RET_ERROR); 4846130Smao } 4950989Sbostic switch(flags) { 5050989Sbostic case 0: 5150989Sbostic status = bt_bdelete(t, key); 5250989Sbostic break; 5350989Sbostic case R_CURSOR: 5450989Sbostic /* 5550989Sbostic * If flags is R_CURSOR, delete the cursor; must already have 5650989Sbostic * started a scan and not have already deleted the record. For 5750989Sbostic * the delete cursor bit to have been set requires that the 5850989Sbostic * scan be initialized, so no reason to check. 5950989Sbostic */ 6050989Sbostic status = ISSET(t, BTF_DELCRSR) ? 6150989Sbostic RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 6250989Sbostic break; 6350989Sbostic default: 6446130Smao errno = EINVAL; 6546130Smao return (RET_ERROR); 6646130Smao } 6750989Sbostic if (status == RET_SUCCESS) 6850989Sbostic SET(t, BTF_MODIFIED); 6950989Sbostic return (status); 7046130Smao } 7146130Smao 7246130Smao /* 7350989Sbostic * BT_BDELETE -- Delete all key/data pairs matching the specified key. 7446130Smao * 7550989Sbostic * Parameters: 7650989Sbostic * tree: tree 7750989Sbostic * key: key to delete 7846130Smao * 7950989Sbostic * Returns: 8050989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 8146130Smao */ 8250989Sbostic static int 8350989Sbostic bt_bdelete(t, key) 8450989Sbostic BTREE *t; 8550989Sbostic const DBT *key; 8646130Smao { 8750989Sbostic EPG *e, save; 8850989Sbostic PAGE *h; 8950989Sbostic pgno_t cpgno, pg; 9050989Sbostic index_t cindex; 91*56404Sbostic int deleted, dirty1, dirty2, exact; 9246130Smao 9350989Sbostic /* Find any matching record; __bt_search pins the page. */ 9450989Sbostic if ((e = __bt_search(t, key, &exact)) == NULL) 9550989Sbostic return (RET_ERROR); 9650989Sbostic if (!exact) { 9750989Sbostic mpool_put(t->bt_mp, e->page, 0); 9850989Sbostic return (RET_SPECIAL); 9950989Sbostic } 10046130Smao 10150989Sbostic /* 10250989Sbostic * Delete forward, then delete backward, from the found key. The 10350989Sbostic * ordering is so that the deletions don't mess up the page refs. 104*56404Sbostic * The first loop deletes the key from the original page, the second 105*56404Sbostic * unpins the original page. In the first loop, dirty1 is set if 106*56404Sbostic * the original page is modified, and dirty2 is set if any subsequent 107*56404Sbostic * pages are modified. In the second loop, dirty1 starts off set if 108*56404Sbostic * the original page has been modified, and is set if any subsequent 109*56404Sbostic * pages are modified. 11050989Sbostic * 11150989Sbostic * If find the key referenced by the cursor, don't delete it, just 11250989Sbostic * flag it for future deletion. The cursor page number is P_INVALID 11350989Sbostic * unless the sequential scan is initialized, so no reason to check. 11450989Sbostic * A special case is when the already deleted cursor record was the 11550989Sbostic * only record found. If so, then the delete opertion fails as no 11650989Sbostic * records were deleted. 11750989Sbostic * 11850989Sbostic * Cycle in place in the current page until the current record doesn't 11950989Sbostic * match the key or the page is empty. If the latter, walk forward, 120*56404Sbostic * skipping empty pages and repeating until a record doesn't match 12150989Sbostic * the key or the end of the tree is reached. 12250989Sbostic */ 12350989Sbostic cpgno = t->bt_bcursor.pgno; 12450989Sbostic cindex = t->bt_bcursor.index; 12550989Sbostic save = *e; 126*56404Sbostic dirty1 = 0; 12750989Sbostic for (h = e->page, deleted = 0;;) { 128*56404Sbostic dirty2 = 0; 12950989Sbostic do { 13050989Sbostic if (h->pgno == cpgno && e->index == cindex) { 13150989Sbostic if (NOTSET(t, BTF_DELCRSR)) { 13250989Sbostic SET(t, BTF_DELCRSR); 13350989Sbostic deleted = 1; 13450989Sbostic } 13550989Sbostic ++e->index; 13650989Sbostic } else { 137*56404Sbostic if (__bt_dleaf(t, h, e->index)) { 138*56404Sbostic if (h->pgno != save.page->pgno) 139*56404Sbostic mpool_put(t->bt_mp, h, dirty2); 140*56404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 141*56404Sbostic return (RET_ERROR); 142*56404Sbostic } 143*56404Sbostic if (h->pgno == save.page->pgno) 144*56404Sbostic dirty1 = MPOOL_DIRTY; 145*56404Sbostic else 146*56404Sbostic dirty2 = MPOOL_DIRTY; 14750989Sbostic deleted = 1; 14850989Sbostic } 14950989Sbostic } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 15046130Smao 15150989Sbostic /* 15250989Sbostic * Quit if didn't find a match, no next page, or first key on 153*56404Sbostic * the next page doesn't match. Don't unpin the original page 154*56404Sbostic * unless an error occurs. 15550989Sbostic */ 15650989Sbostic if (e->index < NEXTINDEX(h)) 15750989Sbostic break; 15850989Sbostic for (;;) { 15950989Sbostic if ((pg = h->nextpg) == P_INVALID) 16050989Sbostic goto done1; 16150989Sbostic if (h->pgno != save.page->pgno) 162*56404Sbostic mpool_put(t->bt_mp, h, dirty2); 16350989Sbostic if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 164*56404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 16550989Sbostic return (RET_ERROR); 16650989Sbostic } 16750989Sbostic if (NEXTINDEX(h) != 0) { 16850989Sbostic e->page = h; 16950989Sbostic e->index = 0; 17050989Sbostic break; 17150989Sbostic } 17250989Sbostic } 17350989Sbostic 17450989Sbostic if (__bt_cmp(t, key, e) != 0) 17550989Sbostic break; 17646130Smao } 17750989Sbostic 17850989Sbostic /* 179*56404Sbostic * Reach here with the original page and the last page referenced 180*56404Sbostic * pinned (they may be the same). Release it if not the original. 18150989Sbostic */ 18250989Sbostic done1: if (h->pgno != save.page->pgno) 183*56404Sbostic mpool_put(t->bt_mp, h, dirty2); 18450989Sbostic 18550989Sbostic /* 18650989Sbostic * Walk backwards from the record previous to the record returned by 187*56404Sbostic * __bt_search, skipping empty pages, until a record doesn't match 188*56404Sbostic * the key or reach the beginning of the tree. 18950989Sbostic */ 19050989Sbostic *e = save; 19150989Sbostic for (;;) { 19250989Sbostic if (e->index) 19350989Sbostic --e->index; 19450989Sbostic for (h = e->page; e->index; --e->index) { 19550989Sbostic if (__bt_cmp(t, key, e) != 0) 19650989Sbostic goto done2; 19750989Sbostic if (h->pgno == cpgno && e->index == cindex) { 19850989Sbostic if (NOTSET(t, BTF_DELCRSR)) { 19950989Sbostic SET(t, BTF_DELCRSR); 20050989Sbostic deleted = 1; 20150989Sbostic } 20250989Sbostic } else { 203*56404Sbostic if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 204*56404Sbostic mpool_put(t->bt_mp, h, dirty1); 205*56404Sbostic return (RET_ERROR); 206*56404Sbostic } 207*56404Sbostic if (h->pgno == save.page->pgno) 208*56404Sbostic dirty1 = MPOOL_DIRTY; 20950989Sbostic deleted = 1; 21050989Sbostic } 21150989Sbostic } 21250989Sbostic 21350989Sbostic if ((pg = h->prevpg) == P_INVALID) 21450989Sbostic goto done2; 215*56404Sbostic mpool_put(t->bt_mp, h, dirty1); 216*56404Sbostic dirty1 = 0; 21750989Sbostic if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 21846130Smao return (RET_ERROR); 21950989Sbostic e->index = NEXTINDEX(h); 22046130Smao } 22146130Smao 22250989Sbostic /* 22350989Sbostic * Reach here with the last page that was looked at pinned. Release 22450989Sbostic * it. 22550989Sbostic */ 226*56404Sbostic done2: mpool_put(t->bt_mp, h, dirty1); 22750989Sbostic return (deleted ? RET_SUCCESS : RET_SPECIAL); 22850989Sbostic } 22946130Smao 23050989Sbostic /* 23150989Sbostic * __BT_DLEAF -- Delete a single record from a leaf page. 23250989Sbostic * 23350989Sbostic * Parameters: 23450989Sbostic * t: tree 23550989Sbostic * index: index on current page to delete 23650989Sbostic * 23750989Sbostic * Returns: 23850989Sbostic * RET_SUCCESS, RET_ERROR. 23950989Sbostic */ 24050989Sbostic int 24150989Sbostic __bt_dleaf(t, h, index) 24250989Sbostic BTREE *t; 24350989Sbostic PAGE *h; 24450989Sbostic int index; 24550989Sbostic { 24650989Sbostic register BLEAF *bl; 24750989Sbostic register index_t *ip, offset; 24850989Sbostic register size_t nbytes; 24950989Sbostic register int cnt; 25050989Sbostic char *from; 25150989Sbostic void *to; 25246130Smao 25350989Sbostic /* 25450989Sbostic * Delete a record from a btree leaf page. Internal records are never 25550989Sbostic * deleted from internal pages, regardless of the records that caused 25650989Sbostic * them to be added being deleted. Pages made empty by deletion are 25750989Sbostic * not reclaimed. They are, however, made available for reuse. 25850989Sbostic * 25950989Sbostic * Pack the remaining entries at the end of the page, shift the indices 26050989Sbostic * down, overwriting the deleted record and its index. If the record 26150989Sbostic * uses overflow pages, make them available for reuse. 26250989Sbostic */ 26350989Sbostic to = bl = GETBLEAF(h, index); 26450989Sbostic if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 26550989Sbostic return (RET_ERROR); 26650989Sbostic if (bl->flags & P_BIGDATA && 26750989Sbostic __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 26850989Sbostic return (RET_ERROR); 26950989Sbostic nbytes = NBLEAF(bl); 27046130Smao 27150989Sbostic /* 27250989Sbostic * Compress the key/data pairs. Compress and adjust the [BR]LEAF 27350989Sbostic * offsets. Reset the headers. 27450989Sbostic */ 27550989Sbostic from = (char *)h + h->upper; 27650989Sbostic bcopy(from, from + nbytes, (char *)to - from); 27750989Sbostic h->upper += nbytes; 27846130Smao 27950989Sbostic offset = h->linp[index]; 280*56404Sbostic for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 28150989Sbostic if (ip[0] < offset) 28250989Sbostic ip[0] += nbytes; 283*56404Sbostic for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 28450989Sbostic ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 28550989Sbostic h->lower -= sizeof(index_t); 28646130Smao return (RET_SUCCESS); 28746130Smao } 288