146130Smao /*- 261194Sbostic * Copyright (c) 1990, 1993 361194Sbostic * The Regents of the University of California. All rights reserved. 446130Smao * 546130Smao * This code is derived from software contributed to Berkeley by 646130Smao * Mike Olson. 746130Smao * 846130Smao * %sccs.include.redist.c% 946130Smao */ 1046130Smao 1146130Smao #if defined(LIBC_SCCS) && !defined(lint) 12*64460Sbostic static char sccsid[] = "@(#)bt_delete.c 8.2 (Berkeley) 09/07/93"; 1346130Smao #endif /* LIBC_SCCS and not lint */ 1446130Smao 1546130Smao #include <sys/types.h> 1656738Sbostic 1750989Sbostic #include <errno.h> 1850989Sbostic #include <stdio.h> 1946561Sbostic #include <string.h> 2056738Sbostic 2157932Sbostic #include <db.h> 2246130Smao #include "btree.h" 2346130Smao 2450989Sbostic static int bt_bdelete __P((BTREE *, const DBT *)); 2550989Sbostic 2646130Smao /* 2750989Sbostic * __BT_DELETE -- Delete the item(s) referenced by a key. 2846130Smao * 2950989Sbostic * Parameters: 3050989Sbostic * dbp: pointer to access method 3150989Sbostic * key: key to delete 3250989Sbostic * flags: R_CURSOR if deleting what the cursor references 3346130Smao * 3450989Sbostic * Returns: 3550989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 3646130Smao */ 3746130Smao int 3850989Sbostic __bt_delete(dbp, key, flags) 3950989Sbostic const DB *dbp; 4050989Sbostic const DBT *key; 4150989Sbostic u_int flags; 4246130Smao { 4350989Sbostic BTREE *t; 4450989Sbostic int status; 4546130Smao 4650989Sbostic t = dbp->internal; 47*64460Sbostic 48*64460Sbostic /* Toss any page pinned across calls. */ 49*64460Sbostic if (t->bt_pinned != NULL) { 50*64460Sbostic mpool_put(t->bt_mp, t->bt_pinned, 0); 51*64460Sbostic t->bt_pinned = NULL; 52*64460Sbostic } 53*64460Sbostic 5460046Sbostic if (ISSET(t, B_RDONLY)) { 5550989Sbostic errno = EPERM; 5646130Smao return (RET_ERROR); 5746130Smao } 58*64460Sbostic 5950989Sbostic switch(flags) { 6050989Sbostic case 0: 6150989Sbostic status = bt_bdelete(t, key); 6250989Sbostic break; 6350989Sbostic case R_CURSOR: 6450989Sbostic /* 6550989Sbostic * If flags is R_CURSOR, delete the cursor; must already have 6650989Sbostic * started a scan and not have already deleted the record. For 6750989Sbostic * the delete cursor bit to have been set requires that the 6850989Sbostic * scan be initialized, so no reason to check. 6950989Sbostic */ 7060046Sbostic if (!ISSET(t, B_SEQINIT)) 7156755Sbostic goto einval; 7260046Sbostic status = ISSET(t, B_DELCRSR) ? 7350989Sbostic RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 7450989Sbostic break; 7550989Sbostic default: 7656755Sbostic einval: errno = EINVAL; 7746130Smao return (RET_ERROR); 7846130Smao } 7950989Sbostic if (status == RET_SUCCESS) 8060046Sbostic SET(t, B_MODIFIED); 8150989Sbostic return (status); 8246130Smao } 8346130Smao 8446130Smao /* 8550989Sbostic * BT_BDELETE -- Delete all key/data pairs matching the specified key. 8646130Smao * 8750989Sbostic * Parameters: 8850989Sbostic * tree: tree 8950989Sbostic * key: key to delete 9046130Smao * 9150989Sbostic * Returns: 9250989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 9346130Smao */ 9450989Sbostic static int 9550989Sbostic bt_bdelete(t, key) 9650989Sbostic BTREE *t; 9750989Sbostic const DBT *key; 9846130Smao { 9950989Sbostic EPG *e, save; 10050989Sbostic PAGE *h; 10150989Sbostic pgno_t cpgno, pg; 10257989Sbostic indx_t cindex; 10356404Sbostic int deleted, dirty1, dirty2, exact; 10446130Smao 10550989Sbostic /* Find any matching record; __bt_search pins the page. */ 10650989Sbostic if ((e = __bt_search(t, key, &exact)) == NULL) 10750989Sbostic return (RET_ERROR); 10850989Sbostic if (!exact) { 10950989Sbostic mpool_put(t->bt_mp, e->page, 0); 11050989Sbostic return (RET_SPECIAL); 11150989Sbostic } 11246130Smao 11350989Sbostic /* 11450989Sbostic * Delete forward, then delete backward, from the found key. The 11550989Sbostic * ordering is so that the deletions don't mess up the page refs. 11656404Sbostic * The first loop deletes the key from the original page, the second 11756404Sbostic * unpins the original page. In the first loop, dirty1 is set if 11856404Sbostic * the original page is modified, and dirty2 is set if any subsequent 11956404Sbostic * pages are modified. In the second loop, dirty1 starts off set if 12056404Sbostic * the original page has been modified, and is set if any subsequent 12156404Sbostic * pages are modified. 12250989Sbostic * 12350989Sbostic * If find the key referenced by the cursor, don't delete it, just 12450989Sbostic * flag it for future deletion. The cursor page number is P_INVALID 12550989Sbostic * unless the sequential scan is initialized, so no reason to check. 12650989Sbostic * A special case is when the already deleted cursor record was the 12750989Sbostic * only record found. If so, then the delete opertion fails as no 12850989Sbostic * records were deleted. 12950989Sbostic * 13050989Sbostic * Cycle in place in the current page until the current record doesn't 13150989Sbostic * match the key or the page is empty. If the latter, walk forward, 13256404Sbostic * skipping empty pages and repeating until a record doesn't match 13350989Sbostic * the key or the end of the tree is reached. 13450989Sbostic */ 13550989Sbostic cpgno = t->bt_bcursor.pgno; 13650989Sbostic cindex = t->bt_bcursor.index; 13750989Sbostic save = *e; 13856404Sbostic dirty1 = 0; 13950989Sbostic for (h = e->page, deleted = 0;;) { 14056404Sbostic dirty2 = 0; 14150989Sbostic do { 14250989Sbostic if (h->pgno == cpgno && e->index == cindex) { 14360046Sbostic if (!ISSET(t, B_DELCRSR)) { 14460046Sbostic SET(t, B_DELCRSR); 14550989Sbostic deleted = 1; 14650989Sbostic } 14750989Sbostic ++e->index; 14850989Sbostic } else { 14956404Sbostic if (__bt_dleaf(t, h, e->index)) { 15056404Sbostic if (h->pgno != save.page->pgno) 15156404Sbostic mpool_put(t->bt_mp, h, dirty2); 15256404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 15356404Sbostic return (RET_ERROR); 15456404Sbostic } 15556404Sbostic if (h->pgno == save.page->pgno) 15656404Sbostic dirty1 = MPOOL_DIRTY; 15756404Sbostic else 15856404Sbostic dirty2 = MPOOL_DIRTY; 15950989Sbostic deleted = 1; 16050989Sbostic } 16150989Sbostic } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 16246130Smao 16350989Sbostic /* 16450989Sbostic * Quit if didn't find a match, no next page, or first key on 16556404Sbostic * the next page doesn't match. Don't unpin the original page 16656404Sbostic * unless an error occurs. 16750989Sbostic */ 16850989Sbostic if (e->index < NEXTINDEX(h)) 16950989Sbostic break; 17050989Sbostic for (;;) { 17150989Sbostic if ((pg = h->nextpg) == P_INVALID) 17250989Sbostic goto done1; 17350989Sbostic if (h->pgno != save.page->pgno) 17456404Sbostic mpool_put(t->bt_mp, h, dirty2); 17550989Sbostic if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 17656404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 17750989Sbostic return (RET_ERROR); 17850989Sbostic } 17950989Sbostic if (NEXTINDEX(h) != 0) { 18050989Sbostic e->page = h; 18150989Sbostic e->index = 0; 18250989Sbostic break; 18350989Sbostic } 18450989Sbostic } 18550989Sbostic 18650989Sbostic if (__bt_cmp(t, key, e) != 0) 18750989Sbostic break; 18846130Smao } 18950989Sbostic 19050989Sbostic /* 19156404Sbostic * Reach here with the original page and the last page referenced 19256404Sbostic * pinned (they may be the same). Release it if not the original. 19350989Sbostic */ 19450989Sbostic done1: if (h->pgno != save.page->pgno) 19556404Sbostic mpool_put(t->bt_mp, h, dirty2); 19650989Sbostic 19750989Sbostic /* 19850989Sbostic * Walk backwards from the record previous to the record returned by 19956404Sbostic * __bt_search, skipping empty pages, until a record doesn't match 20056404Sbostic * the key or reach the beginning of the tree. 20150989Sbostic */ 20250989Sbostic *e = save; 20350989Sbostic for (;;) { 20450989Sbostic if (e->index) 20550989Sbostic --e->index; 20650989Sbostic for (h = e->page; e->index; --e->index) { 20750989Sbostic if (__bt_cmp(t, key, e) != 0) 20850989Sbostic goto done2; 20950989Sbostic if (h->pgno == cpgno && e->index == cindex) { 21060046Sbostic if (!ISSET(t, B_DELCRSR)) { 21160046Sbostic SET(t, B_DELCRSR); 21250989Sbostic deleted = 1; 21350989Sbostic } 21450989Sbostic } else { 21556404Sbostic if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 21656404Sbostic mpool_put(t->bt_mp, h, dirty1); 21756404Sbostic return (RET_ERROR); 21856404Sbostic } 21956404Sbostic if (h->pgno == save.page->pgno) 22056404Sbostic dirty1 = MPOOL_DIRTY; 22150989Sbostic deleted = 1; 22250989Sbostic } 22350989Sbostic } 22450989Sbostic 22550989Sbostic if ((pg = h->prevpg) == P_INVALID) 22650989Sbostic goto done2; 22756404Sbostic mpool_put(t->bt_mp, h, dirty1); 22856404Sbostic dirty1 = 0; 22950989Sbostic if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 23046130Smao return (RET_ERROR); 23157934Sbostic e->index = NEXTINDEX(e->page); 23246130Smao } 23346130Smao 23450989Sbostic /* 23550989Sbostic * Reach here with the last page that was looked at pinned. Release 23650989Sbostic * it. 23750989Sbostic */ 23856404Sbostic done2: mpool_put(t->bt_mp, h, dirty1); 23950989Sbostic return (deleted ? RET_SUCCESS : RET_SPECIAL); 24050989Sbostic } 24146130Smao 24250989Sbostic /* 24350989Sbostic * __BT_DLEAF -- Delete a single record from a leaf page. 24450989Sbostic * 24550989Sbostic * Parameters: 24650989Sbostic * t: tree 24750989Sbostic * index: index on current page to delete 24850989Sbostic * 24950989Sbostic * Returns: 25050989Sbostic * RET_SUCCESS, RET_ERROR. 25150989Sbostic */ 25250989Sbostic int 25350989Sbostic __bt_dleaf(t, h, index) 25450989Sbostic BTREE *t; 25550989Sbostic PAGE *h; 25650989Sbostic int index; 25750989Sbostic { 25850989Sbostic register BLEAF *bl; 25957989Sbostic register indx_t *ip, offset; 26050989Sbostic register size_t nbytes; 26150989Sbostic register int cnt; 26250989Sbostic char *from; 26350989Sbostic void *to; 26446130Smao 26550989Sbostic /* 26650989Sbostic * Delete a record from a btree leaf page. Internal records are never 26750989Sbostic * deleted from internal pages, regardless of the records that caused 26850989Sbostic * them to be added being deleted. Pages made empty by deletion are 26950989Sbostic * not reclaimed. They are, however, made available for reuse. 27050989Sbostic * 27150989Sbostic * Pack the remaining entries at the end of the page, shift the indices 27250989Sbostic * down, overwriting the deleted record and its index. If the record 27350989Sbostic * uses overflow pages, make them available for reuse. 27450989Sbostic */ 27550989Sbostic to = bl = GETBLEAF(h, index); 27650989Sbostic if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 27750989Sbostic return (RET_ERROR); 27850989Sbostic if (bl->flags & P_BIGDATA && 27950989Sbostic __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 28050989Sbostic return (RET_ERROR); 28150989Sbostic nbytes = NBLEAF(bl); 28246130Smao 28350989Sbostic /* 28450989Sbostic * Compress the key/data pairs. Compress and adjust the [BR]LEAF 28550989Sbostic * offsets. Reset the headers. 28650989Sbostic */ 28750989Sbostic from = (char *)h + h->upper; 28858017Sbostic memmove(from + nbytes, from, (char *)to - from); 28950989Sbostic h->upper += nbytes; 29046130Smao 29150989Sbostic offset = h->linp[index]; 29256404Sbostic for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 29350989Sbostic if (ip[0] < offset) 29450989Sbostic ip[0] += nbytes; 29556404Sbostic for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 29650989Sbostic ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 29757989Sbostic h->lower -= sizeof(indx_t); 29846130Smao return (RET_SUCCESS); 29946130Smao } 300