146130Smao /*- 246130Smao * Copyright (c) 1990 The Regents of the University of California. 346130Smao * All rights reserved. 446130Smao * 546130Smao * This code is derived from software contributed to Berkeley by 646130Smao * Mike Olson. 746130Smao * 846130Smao * %sccs.include.redist.c% 946130Smao */ 1046130Smao 1146130Smao #if defined(LIBC_SCCS) && !defined(lint) 12*58017Sbostic static char sccsid[] = "@(#)bt_delete.c 5.10 (Berkeley) 02/16/93"; 1346130Smao #endif /* LIBC_SCCS and not lint */ 1446130Smao 1546130Smao #include <sys/types.h> 1656738Sbostic 1750989Sbostic #include <errno.h> 1850989Sbostic #include <stdio.h> 1946561Sbostic #include <string.h> 2056738Sbostic 2157932Sbostic #include <db.h> 2246130Smao #include "btree.h" 2346130Smao 2450989Sbostic static int bt_bdelete __P((BTREE *, const DBT *)); 2550989Sbostic 2646130Smao /* 2750989Sbostic * __BT_DELETE -- Delete the item(s) referenced by a key. 2846130Smao * 2950989Sbostic * Parameters: 3050989Sbostic * dbp: pointer to access method 3150989Sbostic * key: key to delete 3250989Sbostic * flags: R_CURSOR if deleting what the cursor references 3346130Smao * 3450989Sbostic * Returns: 3550989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 3646130Smao */ 3746130Smao int 3850989Sbostic __bt_delete(dbp, key, flags) 3950989Sbostic const DB *dbp; 4050989Sbostic const DBT *key; 4150989Sbostic u_int flags; 4246130Smao { 4350989Sbostic BTREE *t; 4450989Sbostic int status; 4546130Smao 4650989Sbostic t = dbp->internal; 4750989Sbostic if (ISSET(t, BTF_RDONLY)) { 4850989Sbostic errno = EPERM; 4946130Smao return (RET_ERROR); 5046130Smao } 5150989Sbostic switch(flags) { 5250989Sbostic case 0: 5350989Sbostic status = bt_bdelete(t, key); 5450989Sbostic break; 5550989Sbostic case R_CURSOR: 5650989Sbostic /* 5750989Sbostic * If flags is R_CURSOR, delete the cursor; must already have 5850989Sbostic * started a scan and not have already deleted the record. For 5950989Sbostic * the delete cursor bit to have been set requires that the 6050989Sbostic * scan be initialized, so no reason to check. 6150989Sbostic */ 6256755Sbostic if (!ISSET(t, BTF_SEQINIT)) 6356755Sbostic goto einval; 6450989Sbostic status = ISSET(t, BTF_DELCRSR) ? 6550989Sbostic RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 6650989Sbostic break; 6750989Sbostic default: 6856755Sbostic einval: errno = EINVAL; 6946130Smao return (RET_ERROR); 7046130Smao } 7150989Sbostic if (status == RET_SUCCESS) 7250989Sbostic SET(t, BTF_MODIFIED); 7350989Sbostic return (status); 7446130Smao } 7546130Smao 7646130Smao /* 7750989Sbostic * BT_BDELETE -- Delete all key/data pairs matching the specified key. 7846130Smao * 7950989Sbostic * Parameters: 8050989Sbostic * tree: tree 8150989Sbostic * key: key to delete 8246130Smao * 8350989Sbostic * Returns: 8450989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 8546130Smao */ 8650989Sbostic static int 8750989Sbostic bt_bdelete(t, key) 8850989Sbostic BTREE *t; 8950989Sbostic const DBT *key; 9046130Smao { 9150989Sbostic EPG *e, save; 9250989Sbostic PAGE *h; 9350989Sbostic pgno_t cpgno, pg; 9457989Sbostic indx_t cindex; 9556404Sbostic int deleted, dirty1, dirty2, exact; 9646130Smao 9750989Sbostic /* Find any matching record; __bt_search pins the page. */ 9850989Sbostic if ((e = __bt_search(t, key, &exact)) == NULL) 9950989Sbostic return (RET_ERROR); 10050989Sbostic if (!exact) { 10150989Sbostic mpool_put(t->bt_mp, e->page, 0); 10250989Sbostic return (RET_SPECIAL); 10350989Sbostic } 10446130Smao 10550989Sbostic /* 10650989Sbostic * Delete forward, then delete backward, from the found key. The 10750989Sbostic * ordering is so that the deletions don't mess up the page refs. 10856404Sbostic * The first loop deletes the key from the original page, the second 10956404Sbostic * unpins the original page. In the first loop, dirty1 is set if 11056404Sbostic * the original page is modified, and dirty2 is set if any subsequent 11156404Sbostic * pages are modified. In the second loop, dirty1 starts off set if 11256404Sbostic * the original page has been modified, and is set if any subsequent 11356404Sbostic * pages are modified. 11450989Sbostic * 11550989Sbostic * If find the key referenced by the cursor, don't delete it, just 11650989Sbostic * flag it for future deletion. The cursor page number is P_INVALID 11750989Sbostic * unless the sequential scan is initialized, so no reason to check. 11850989Sbostic * A special case is when the already deleted cursor record was the 11950989Sbostic * only record found. If so, then the delete opertion fails as no 12050989Sbostic * records were deleted. 12150989Sbostic * 12250989Sbostic * Cycle in place in the current page until the current record doesn't 12350989Sbostic * match the key or the page is empty. If the latter, walk forward, 12456404Sbostic * skipping empty pages and repeating until a record doesn't match 12550989Sbostic * the key or the end of the tree is reached. 12650989Sbostic */ 12750989Sbostic cpgno = t->bt_bcursor.pgno; 12850989Sbostic cindex = t->bt_bcursor.index; 12950989Sbostic save = *e; 13056404Sbostic dirty1 = 0; 13150989Sbostic for (h = e->page, deleted = 0;;) { 13256404Sbostic dirty2 = 0; 13350989Sbostic do { 13450989Sbostic if (h->pgno == cpgno && e->index == cindex) { 13556738Sbostic if (!ISSET(t, BTF_DELCRSR)) { 13650989Sbostic SET(t, BTF_DELCRSR); 13750989Sbostic deleted = 1; 13850989Sbostic } 13950989Sbostic ++e->index; 14050989Sbostic } else { 14156404Sbostic if (__bt_dleaf(t, h, e->index)) { 14256404Sbostic if (h->pgno != save.page->pgno) 14356404Sbostic mpool_put(t->bt_mp, h, dirty2); 14456404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 14556404Sbostic return (RET_ERROR); 14656404Sbostic } 14756404Sbostic if (h->pgno == save.page->pgno) 14856404Sbostic dirty1 = MPOOL_DIRTY; 14956404Sbostic else 15056404Sbostic dirty2 = MPOOL_DIRTY; 15150989Sbostic deleted = 1; 15250989Sbostic } 15350989Sbostic } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 15446130Smao 15550989Sbostic /* 15650989Sbostic * Quit if didn't find a match, no next page, or first key on 15756404Sbostic * the next page doesn't match. Don't unpin the original page 15856404Sbostic * unless an error occurs. 15950989Sbostic */ 16050989Sbostic if (e->index < NEXTINDEX(h)) 16150989Sbostic break; 16250989Sbostic for (;;) { 16350989Sbostic if ((pg = h->nextpg) == P_INVALID) 16450989Sbostic goto done1; 16550989Sbostic if (h->pgno != save.page->pgno) 16656404Sbostic mpool_put(t->bt_mp, h, dirty2); 16750989Sbostic if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 16856404Sbostic mpool_put(t->bt_mp, save.page, dirty1); 16950989Sbostic return (RET_ERROR); 17050989Sbostic } 17150989Sbostic if (NEXTINDEX(h) != 0) { 17250989Sbostic e->page = h; 17350989Sbostic e->index = 0; 17450989Sbostic break; 17550989Sbostic } 17650989Sbostic } 17750989Sbostic 17850989Sbostic if (__bt_cmp(t, key, e) != 0) 17950989Sbostic break; 18046130Smao } 18150989Sbostic 18250989Sbostic /* 18356404Sbostic * Reach here with the original page and the last page referenced 18456404Sbostic * pinned (they may be the same). Release it if not the original. 18550989Sbostic */ 18650989Sbostic done1: if (h->pgno != save.page->pgno) 18756404Sbostic mpool_put(t->bt_mp, h, dirty2); 18850989Sbostic 18950989Sbostic /* 19050989Sbostic * Walk backwards from the record previous to the record returned by 19156404Sbostic * __bt_search, skipping empty pages, until a record doesn't match 19256404Sbostic * the key or reach the beginning of the tree. 19350989Sbostic */ 19450989Sbostic *e = save; 19550989Sbostic for (;;) { 19650989Sbostic if (e->index) 19750989Sbostic --e->index; 19850989Sbostic for (h = e->page; e->index; --e->index) { 19950989Sbostic if (__bt_cmp(t, key, e) != 0) 20050989Sbostic goto done2; 20150989Sbostic if (h->pgno == cpgno && e->index == cindex) { 20256738Sbostic if (!ISSET(t, BTF_DELCRSR)) { 20350989Sbostic SET(t, BTF_DELCRSR); 20450989Sbostic deleted = 1; 20550989Sbostic } 20650989Sbostic } else { 20756404Sbostic if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 20856404Sbostic mpool_put(t->bt_mp, h, dirty1); 20956404Sbostic return (RET_ERROR); 21056404Sbostic } 21156404Sbostic if (h->pgno == save.page->pgno) 21256404Sbostic dirty1 = MPOOL_DIRTY; 21350989Sbostic deleted = 1; 21450989Sbostic } 21550989Sbostic } 21650989Sbostic 21750989Sbostic if ((pg = h->prevpg) == P_INVALID) 21850989Sbostic goto done2; 21956404Sbostic mpool_put(t->bt_mp, h, dirty1); 22056404Sbostic dirty1 = 0; 22150989Sbostic if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 22246130Smao return (RET_ERROR); 22357934Sbostic e->index = NEXTINDEX(e->page); 22446130Smao } 22546130Smao 22650989Sbostic /* 22750989Sbostic * Reach here with the last page that was looked at pinned. Release 22850989Sbostic * it. 22950989Sbostic */ 23056404Sbostic done2: mpool_put(t->bt_mp, h, dirty1); 23150989Sbostic return (deleted ? RET_SUCCESS : RET_SPECIAL); 23250989Sbostic } 23346130Smao 23450989Sbostic /* 23550989Sbostic * __BT_DLEAF -- Delete a single record from a leaf page. 23650989Sbostic * 23750989Sbostic * Parameters: 23850989Sbostic * t: tree 23950989Sbostic * index: index on current page to delete 24050989Sbostic * 24150989Sbostic * Returns: 24250989Sbostic * RET_SUCCESS, RET_ERROR. 24350989Sbostic */ 24450989Sbostic int 24550989Sbostic __bt_dleaf(t, h, index) 24650989Sbostic BTREE *t; 24750989Sbostic PAGE *h; 24850989Sbostic int index; 24950989Sbostic { 25050989Sbostic register BLEAF *bl; 25157989Sbostic register indx_t *ip, offset; 25250989Sbostic register size_t nbytes; 25350989Sbostic register int cnt; 25450989Sbostic char *from; 25550989Sbostic void *to; 25646130Smao 25750989Sbostic /* 25850989Sbostic * Delete a record from a btree leaf page. Internal records are never 25950989Sbostic * deleted from internal pages, regardless of the records that caused 26050989Sbostic * them to be added being deleted. Pages made empty by deletion are 26150989Sbostic * not reclaimed. They are, however, made available for reuse. 26250989Sbostic * 26350989Sbostic * Pack the remaining entries at the end of the page, shift the indices 26450989Sbostic * down, overwriting the deleted record and its index. If the record 26550989Sbostic * uses overflow pages, make them available for reuse. 26650989Sbostic */ 26750989Sbostic to = bl = GETBLEAF(h, index); 26850989Sbostic if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 26950989Sbostic return (RET_ERROR); 27050989Sbostic if (bl->flags & P_BIGDATA && 27150989Sbostic __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 27250989Sbostic return (RET_ERROR); 27350989Sbostic nbytes = NBLEAF(bl); 27446130Smao 27550989Sbostic /* 27650989Sbostic * Compress the key/data pairs. Compress and adjust the [BR]LEAF 27750989Sbostic * offsets. Reset the headers. 27850989Sbostic */ 27950989Sbostic from = (char *)h + h->upper; 280*58017Sbostic memmove(from + nbytes, from, (char *)to - from); 28150989Sbostic h->upper += nbytes; 28246130Smao 28350989Sbostic offset = h->linp[index]; 28456404Sbostic for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 28550989Sbostic if (ip[0] < offset) 28650989Sbostic ip[0] += nbytes; 28756404Sbostic for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 28850989Sbostic ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 28957989Sbostic h->lower -= sizeof(indx_t); 29046130Smao return (RET_SUCCESS); 29146130Smao } 292