146130Smao /*- 246130Smao * Copyright (c) 1990 The Regents of the University of California. 346130Smao * All rights reserved. 446130Smao * 546130Smao * This code is derived from software contributed to Berkeley by 646130Smao * Mike Olson. 746130Smao * 846130Smao * %sccs.include.redist.c% 946130Smao */ 1046130Smao 1146130Smao #if defined(LIBC_SCCS) && !defined(lint) 12*56738Sbostic static char sccsid[] = "@(#)bt_delete.c 5.5 (Berkeley) 11/13/92"; 1346130Smao #endif /* LIBC_SCCS and not lint */ 1446130Smao 1546130Smao #include <sys/types.h> 16*56738Sbostic 17*56738Sbostic #include <db.h> 1850989Sbostic #include <errno.h> 1950989Sbostic #include <stdio.h> 2046561Sbostic #include <string.h> 21*56738Sbostic 2246130Smao #include "btree.h" 2346130Smao 2450989Sbostic static int bt_bdelete __P((BTREE *, const DBT *)); 2550989Sbostic 2646130Smao /* 2750989Sbostic * __BT_DELETE -- Delete the item(s) referenced by a key. 2846130Smao * 2950989Sbostic * Parameters: 3050989Sbostic * dbp: pointer to access method 3150989Sbostic * key: key to delete 3250989Sbostic * flags: R_CURSOR if deleting what the cursor references 3346130Smao * 3450989Sbostic * Returns: 3550989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 3646130Smao */ 3746130Smao int 3850989Sbostic __bt_delete(dbp, key, flags) 3950989Sbostic const DB *dbp; 4050989Sbostic const DBT *key; 4150989Sbostic u_int flags; 4246130Smao { 4350989Sbostic BTREE *t; 4450989Sbostic int status; 4546130Smao 4650989Sbostic t = dbp->internal; 4750989Sbostic if (ISSET(t, BTF_RDONLY)) { 4850989Sbostic errno = EPERM; 4946130Smao return (RET_ERROR); 5046130Smao } 5150989Sbostic switch(flags) { 5250989Sbostic case 0: 5350989Sbostic status = bt_bdelete(t, key); 5450989Sbostic break; 5550989Sbostic case R_CURSOR: 5650989Sbostic /* 5750989Sbostic * If flags is R_CURSOR, delete the cursor; must already have 5850989Sbostic * started a scan and not have already deleted the record. For 5950989Sbostic * the delete cursor bit to have been set requires that the 6050989Sbostic * scan be initialized, so no reason to check. 6150989Sbostic */ 6250989Sbostic status = ISSET(t, BTF_DELCRSR) ? 6350989Sbostic RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 6450989Sbostic break; 6550989Sbostic default: 6646130Smao errno = EINVAL; 6746130Smao return (RET_ERROR); 6846130Smao } 6950989Sbostic if (status == RET_SUCCESS) 7050989Sbostic SET(t, BTF_MODIFIED); 7150989Sbostic return (status); 7246130Smao } 7346130Smao 7446130Smao /* 7550989Sbostic * BT_BDELETE -- Delete all key/data pairs matching the specified key. 7646130Smao * 7750989Sbostic * Parameters: 7850989Sbostic * tree: tree 7950989Sbostic * key: key to delete 8046130Smao * 8150989Sbostic * Returns: 8250989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 8346130Smao */ 8450989Sbostic static int 8550989Sbostic bt_bdelete(t, key) 8650989Sbostic BTREE *t; 8750989Sbostic const DBT *key; 8846130Smao { 8950989Sbostic EPG *e, save; 9050989Sbostic PAGE *h; 9150989Sbostic pgno_t cpgno, pg; 9250989Sbostic index_t cindex; 9356404Sbostic int deleted, dirty1, dirty2, exact; 9446130Smao 9550989Sbostic /* Find any matching record; __bt_search pins the page. */ 9650989Sbostic if ((e = __bt_search(t, key, &exact)) == NULL) 9750989Sbostic return (RET_ERROR); 9850989Sbostic if (!exact) { 9950989Sbostic mpool_put(t->bt_mp, e->page, 0); 10050989Sbostic return (RET_SPECIAL); 10150989Sbostic } 10246130Smao 10350989Sbostic /* 10450989Sbostic * Delete forward, then delete backward, from the found key. The 10550989Sbostic * ordering is so that the deletions don't mess up the page refs. 10656404Sbostic * The first loop deletes the key from the original page, the second 10756404Sbostic * unpins the original page. In the first loop, dirty1 is set if 10856404Sbostic * the original page is modified, and dirty2 is set if any subsequent 10956404Sbostic * pages are modified. In the second loop, dirty1 starts off set if 11056404Sbostic * the original page has been modified, and is set if any subsequent 11156404Sbostic * pages are modified. 11250989Sbostic * 11350989Sbostic * If find the key referenced by the cursor, don't delete it, just 11450989Sbostic * flag it for future deletion. The cursor page number is P_INVALID 11550989Sbostic * unless the sequential scan is initialized, so no reason to check. 11650989Sbostic * A special case is when the already deleted cursor record was the 11750989Sbostic * only record found. If so, then the delete opertion fails as no 11850989Sbostic * records were deleted. 11950989Sbostic * 12050989Sbostic * Cycle in place in the current page until the current record doesn't 12150989Sbostic * match the key or the page is empty. If the latter, walk forward, 12256404Sbostic * skipping empty pages and repeating until a record doesn't match 12350989Sbostic * the key or the end of the tree is reached. 12450989Sbostic */ 12550989Sbostic cpgno = t->bt_bcursor.pgno; 12650989Sbostic cindex = t->bt_bcursor.index; 12750989Sbostic save = *e; 12856404Sbostic dirty1 = 0; 12950989Sbostic for (h = e->page, deleted = 0;;) { 13056404Sbostic dirty2 = 0; 13150989Sbostic do { 13250989Sbostic if (h->pgno == cpgno && e->index == cindex) { 133*56738Sbostic if (!ISSET(t, BTF_DELCRSR)) { 13450989Sbostic SET(t, BTF_DELCRSR); 13550989Sbostic deleted = 1; 13650989Sbostic } 13750989Sbostic ++e->index; 13850989Sbostic } else { 13956404Sbostic if (__bt_dleaf(t, h, e->index)) { 14056404Sbostic if (h->pgno != save.page->pgno) 14156404Sbostic mpool_put(t->bt_mp, h, dirty2); 14256404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 14356404Sbostic return (RET_ERROR); 14456404Sbostic } 14556404Sbostic if (h->pgno == save.page->pgno) 14656404Sbostic dirty1 = MPOOL_DIRTY; 14756404Sbostic else 14856404Sbostic dirty2 = MPOOL_DIRTY; 14950989Sbostic deleted = 1; 15050989Sbostic } 15150989Sbostic } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 15246130Smao 15350989Sbostic /* 15450989Sbostic * Quit if didn't find a match, no next page, or first key on 15556404Sbostic * the next page doesn't match. Don't unpin the original page 15656404Sbostic * unless an error occurs. 15750989Sbostic */ 15850989Sbostic if (e->index < NEXTINDEX(h)) 15950989Sbostic break; 16050989Sbostic for (;;) { 16150989Sbostic if ((pg = h->nextpg) == P_INVALID) 16250989Sbostic goto done1; 16350989Sbostic if (h->pgno != save.page->pgno) 16456404Sbostic mpool_put(t->bt_mp, h, dirty2); 16550989Sbostic if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 16656404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 16750989Sbostic return (RET_ERROR); 16850989Sbostic } 16950989Sbostic if (NEXTINDEX(h) != 0) { 17050989Sbostic e->page = h; 17150989Sbostic e->index = 0; 17250989Sbostic break; 17350989Sbostic } 17450989Sbostic } 17550989Sbostic 17650989Sbostic if (__bt_cmp(t, key, e) != 0) 17750989Sbostic break; 17846130Smao } 17950989Sbostic 18050989Sbostic /* 18156404Sbostic * Reach here with the original page and the last page referenced 18256404Sbostic * pinned (they may be the same). Release it if not the original. 18350989Sbostic */ 18450989Sbostic done1: if (h->pgno != save.page->pgno) 18556404Sbostic mpool_put(t->bt_mp, h, dirty2); 18650989Sbostic 18750989Sbostic /* 18850989Sbostic * Walk backwards from the record previous to the record returned by 18956404Sbostic * __bt_search, skipping empty pages, until a record doesn't match 19056404Sbostic * the key or reach the beginning of the tree. 19150989Sbostic */ 19250989Sbostic *e = save; 19350989Sbostic for (;;) { 19450989Sbostic if (e->index) 19550989Sbostic --e->index; 19650989Sbostic for (h = e->page; e->index; --e->index) { 19750989Sbostic if (__bt_cmp(t, key, e) != 0) 19850989Sbostic goto done2; 19950989Sbostic if (h->pgno == cpgno && e->index == cindex) { 200*56738Sbostic if (!ISSET(t, BTF_DELCRSR)) { 20150989Sbostic SET(t, BTF_DELCRSR); 20250989Sbostic deleted = 1; 20350989Sbostic } 20450989Sbostic } else { 20556404Sbostic if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 20656404Sbostic mpool_put(t->bt_mp, h, dirty1); 20756404Sbostic return (RET_ERROR); 20856404Sbostic } 20956404Sbostic if (h->pgno == save.page->pgno) 21056404Sbostic dirty1 = MPOOL_DIRTY; 21150989Sbostic deleted = 1; 21250989Sbostic } 21350989Sbostic } 21450989Sbostic 21550989Sbostic if ((pg = h->prevpg) == P_INVALID) 21650989Sbostic goto done2; 21756404Sbostic mpool_put(t->bt_mp, h, dirty1); 21856404Sbostic dirty1 = 0; 21950989Sbostic if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 22046130Smao return (RET_ERROR); 22150989Sbostic e->index = NEXTINDEX(h); 22246130Smao } 22346130Smao 22450989Sbostic /* 22550989Sbostic * Reach here with the last page that was looked at pinned. Release 22650989Sbostic * it. 22750989Sbostic */ 22856404Sbostic done2: mpool_put(t->bt_mp, h, dirty1); 22950989Sbostic return (deleted ? RET_SUCCESS : RET_SPECIAL); 23050989Sbostic } 23146130Smao 23250989Sbostic /* 23350989Sbostic * __BT_DLEAF -- Delete a single record from a leaf page. 23450989Sbostic * 23550989Sbostic * Parameters: 23650989Sbostic * t: tree 23750989Sbostic * index: index on current page to delete 23850989Sbostic * 23950989Sbostic * Returns: 24050989Sbostic * RET_SUCCESS, RET_ERROR. 24150989Sbostic */ 24250989Sbostic int 24350989Sbostic __bt_dleaf(t, h, index) 24450989Sbostic BTREE *t; 24550989Sbostic PAGE *h; 24650989Sbostic int index; 24750989Sbostic { 24850989Sbostic register BLEAF *bl; 24950989Sbostic register index_t *ip, offset; 25050989Sbostic register size_t nbytes; 25150989Sbostic register int cnt; 25250989Sbostic char *from; 25350989Sbostic void *to; 25446130Smao 25550989Sbostic /* 25650989Sbostic * Delete a record from a btree leaf page. Internal records are never 25750989Sbostic * deleted from internal pages, regardless of the records that caused 25850989Sbostic * them to be added being deleted. Pages made empty by deletion are 25950989Sbostic * not reclaimed. They are, however, made available for reuse. 26050989Sbostic * 26150989Sbostic * Pack the remaining entries at the end of the page, shift the indices 26250989Sbostic * down, overwriting the deleted record and its index. If the record 26350989Sbostic * uses overflow pages, make them available for reuse. 26450989Sbostic */ 26550989Sbostic to = bl = GETBLEAF(h, index); 26650989Sbostic if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 26750989Sbostic return (RET_ERROR); 26850989Sbostic if (bl->flags & P_BIGDATA && 26950989Sbostic __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 27050989Sbostic return (RET_ERROR); 27150989Sbostic nbytes = NBLEAF(bl); 27246130Smao 27350989Sbostic /* 27450989Sbostic * Compress the key/data pairs. Compress and adjust the [BR]LEAF 27550989Sbostic * offsets. Reset the headers. 27650989Sbostic */ 27750989Sbostic from = (char *)h + h->upper; 27850989Sbostic bcopy(from, from + nbytes, (char *)to - from); 27950989Sbostic h->upper += nbytes; 28046130Smao 28150989Sbostic offset = h->linp[index]; 28256404Sbostic for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 28350989Sbostic if (ip[0] < offset) 28450989Sbostic ip[0] += nbytes; 28556404Sbostic for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 28650989Sbostic ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 28750989Sbostic h->lower -= sizeof(index_t); 28846130Smao return (RET_SUCCESS); 28946130Smao } 290