146130Smao /*-
261194Sbostic * Copyright (c) 1990, 1993
361194Sbostic * The Regents of the University of California. All rights reserved.
446130Smao *
546130Smao * This code is derived from software contributed to Berkeley by
646130Smao * Mike Olson.
746130Smao *
846130Smao * %sccs.include.redist.c%
946130Smao */
1046130Smao
1146130Smao #if defined(LIBC_SCCS) && !defined(lint)
12*66192Sbostic static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 02/21/94";
1346130Smao #endif /* LIBC_SCCS and not lint */
1446130Smao
1546130Smao #include <sys/types.h>
1656738Sbostic
1750989Sbostic #include <errno.h>
1850989Sbostic #include <stdio.h>
1946561Sbostic #include <string.h>
2056738Sbostic
2157932Sbostic #include <db.h>
2246130Smao #include "btree.h"
2346130Smao
2450989Sbostic static int bt_bdelete __P((BTREE *, const DBT *));
2550989Sbostic
2646130Smao /*
2750989Sbostic * __BT_DELETE -- Delete the item(s) referenced by a key.
2846130Smao *
2950989Sbostic * Parameters:
3050989Sbostic * dbp: pointer to access method
3150989Sbostic * key: key to delete
3250989Sbostic * flags: R_CURSOR if deleting what the cursor references
3346130Smao *
3450989Sbostic * Returns:
3550989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
3646130Smao */
3746130Smao int
__bt_delete(dbp,key,flags)3850989Sbostic __bt_delete(dbp, key, flags)
3950989Sbostic const DB *dbp;
4050989Sbostic const DBT *key;
4150989Sbostic u_int flags;
4246130Smao {
4350989Sbostic BTREE *t;
4450989Sbostic int status;
4546130Smao
4650989Sbostic t = dbp->internal;
4764460Sbostic
4864460Sbostic /* Toss any page pinned across calls. */
4964460Sbostic if (t->bt_pinned != NULL) {
5064460Sbostic mpool_put(t->bt_mp, t->bt_pinned, 0);
5164460Sbostic t->bt_pinned = NULL;
5264460Sbostic }
5364460Sbostic
5460046Sbostic if (ISSET(t, B_RDONLY)) {
5550989Sbostic errno = EPERM;
5646130Smao return (RET_ERROR);
5746130Smao }
5864460Sbostic
5950989Sbostic switch(flags) {
6050989Sbostic case 0:
6150989Sbostic status = bt_bdelete(t, key);
6250989Sbostic break;
6350989Sbostic case R_CURSOR:
6450989Sbostic /*
6550989Sbostic * If flags is R_CURSOR, delete the cursor; must already have
6650989Sbostic * started a scan and not have already deleted the record. For
6750989Sbostic * the delete cursor bit to have been set requires that the
6850989Sbostic * scan be initialized, so no reason to check.
6950989Sbostic */
7060046Sbostic if (!ISSET(t, B_SEQINIT))
7156755Sbostic goto einval;
7260046Sbostic status = ISSET(t, B_DELCRSR) ?
7350989Sbostic RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
7450989Sbostic break;
7550989Sbostic default:
7656755Sbostic einval: errno = EINVAL;
7746130Smao return (RET_ERROR);
7846130Smao }
7950989Sbostic if (status == RET_SUCCESS)
8060046Sbostic SET(t, B_MODIFIED);
8150989Sbostic return (status);
8246130Smao }
8346130Smao
8446130Smao /*
8550989Sbostic * BT_BDELETE -- Delete all key/data pairs matching the specified key.
8646130Smao *
8750989Sbostic * Parameters:
8850989Sbostic * tree: tree
8950989Sbostic * key: key to delete
9046130Smao *
9150989Sbostic * Returns:
9250989Sbostic * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
9346130Smao */
9450989Sbostic static int
bt_bdelete(t,key)9550989Sbostic bt_bdelete(t, key)
9650989Sbostic BTREE *t;
9750989Sbostic const DBT *key;
9846130Smao {
9950989Sbostic EPG *e, save;
10050989Sbostic PAGE *h;
10150989Sbostic pgno_t cpgno, pg;
10257989Sbostic indx_t cindex;
10356404Sbostic int deleted, dirty1, dirty2, exact;
10446130Smao
10550989Sbostic /* Find any matching record; __bt_search pins the page. */
10650989Sbostic if ((e = __bt_search(t, key, &exact)) == NULL)
10750989Sbostic return (RET_ERROR);
10850989Sbostic if (!exact) {
10950989Sbostic mpool_put(t->bt_mp, e->page, 0);
11050989Sbostic return (RET_SPECIAL);
11150989Sbostic }
11246130Smao
11350989Sbostic /*
11450989Sbostic * Delete forward, then delete backward, from the found key. The
11550989Sbostic * ordering is so that the deletions don't mess up the page refs.
11656404Sbostic * The first loop deletes the key from the original page, the second
11756404Sbostic * unpins the original page. In the first loop, dirty1 is set if
11856404Sbostic * the original page is modified, and dirty2 is set if any subsequent
11956404Sbostic * pages are modified. In the second loop, dirty1 starts off set if
12056404Sbostic * the original page has been modified, and is set if any subsequent
12156404Sbostic * pages are modified.
12250989Sbostic *
12350989Sbostic * If find the key referenced by the cursor, don't delete it, just
12450989Sbostic * flag it for future deletion. The cursor page number is P_INVALID
12550989Sbostic * unless the sequential scan is initialized, so no reason to check.
12650989Sbostic * A special case is when the already deleted cursor record was the
12750989Sbostic * only record found. If so, then the delete opertion fails as no
12850989Sbostic * records were deleted.
12950989Sbostic *
13050989Sbostic * Cycle in place in the current page until the current record doesn't
13150989Sbostic * match the key or the page is empty. If the latter, walk forward,
13256404Sbostic * skipping empty pages and repeating until a record doesn't match
13350989Sbostic * the key or the end of the tree is reached.
13450989Sbostic */
13550989Sbostic cpgno = t->bt_bcursor.pgno;
13650989Sbostic cindex = t->bt_bcursor.index;
13750989Sbostic save = *e;
13856404Sbostic dirty1 = 0;
13950989Sbostic for (h = e->page, deleted = 0;;) {
14056404Sbostic dirty2 = 0;
14150989Sbostic do {
14250989Sbostic if (h->pgno == cpgno && e->index == cindex) {
14360046Sbostic if (!ISSET(t, B_DELCRSR)) {
14460046Sbostic SET(t, B_DELCRSR);
14550989Sbostic deleted = 1;
14650989Sbostic }
14750989Sbostic ++e->index;
14850989Sbostic } else {
14956404Sbostic if (__bt_dleaf(t, h, e->index)) {
15056404Sbostic if (h->pgno != save.page->pgno)
15156404Sbostic mpool_put(t->bt_mp, h, dirty2);
15256404Sbostic mpool_put(t->bt_mp, save.page, dirty1);
15356404Sbostic return (RET_ERROR);
15456404Sbostic }
15556404Sbostic if (h->pgno == save.page->pgno)
15656404Sbostic dirty1 = MPOOL_DIRTY;
15756404Sbostic else
15856404Sbostic dirty2 = MPOOL_DIRTY;
15950989Sbostic deleted = 1;
16050989Sbostic }
16150989Sbostic } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
16246130Smao
16350989Sbostic /*
16450989Sbostic * Quit if didn't find a match, no next page, or first key on
16556404Sbostic * the next page doesn't match. Don't unpin the original page
16656404Sbostic * unless an error occurs.
16750989Sbostic */
16850989Sbostic if (e->index < NEXTINDEX(h))
16950989Sbostic break;
17050989Sbostic for (;;) {
17150989Sbostic if ((pg = h->nextpg) == P_INVALID)
17250989Sbostic goto done1;
17350989Sbostic if (h->pgno != save.page->pgno)
17456404Sbostic mpool_put(t->bt_mp, h, dirty2);
17550989Sbostic if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
17656404Sbostic mpool_put(t->bt_mp, save.page, dirty1);
17750989Sbostic return (RET_ERROR);
17850989Sbostic }
17950989Sbostic if (NEXTINDEX(h) != 0) {
18050989Sbostic e->page = h;
18150989Sbostic e->index = 0;
18250989Sbostic break;
18350989Sbostic }
18450989Sbostic }
18550989Sbostic
18650989Sbostic if (__bt_cmp(t, key, e) != 0)
18750989Sbostic break;
18846130Smao }
18950989Sbostic
19050989Sbostic /*
19156404Sbostic * Reach here with the original page and the last page referenced
19256404Sbostic * pinned (they may be the same). Release it if not the original.
19350989Sbostic */
19450989Sbostic done1: if (h->pgno != save.page->pgno)
19556404Sbostic mpool_put(t->bt_mp, h, dirty2);
19650989Sbostic
19750989Sbostic /*
19850989Sbostic * Walk backwards from the record previous to the record returned by
19956404Sbostic * __bt_search, skipping empty pages, until a record doesn't match
20056404Sbostic * the key or reach the beginning of the tree.
20150989Sbostic */
20250989Sbostic *e = save;
20350989Sbostic for (;;) {
20450989Sbostic if (e->index)
20550989Sbostic --e->index;
20650989Sbostic for (h = e->page; e->index; --e->index) {
20750989Sbostic if (__bt_cmp(t, key, e) != 0)
20850989Sbostic goto done2;
20950989Sbostic if (h->pgno == cpgno && e->index == cindex) {
21060046Sbostic if (!ISSET(t, B_DELCRSR)) {
21160046Sbostic SET(t, B_DELCRSR);
21250989Sbostic deleted = 1;
21350989Sbostic }
21450989Sbostic } else {
21556404Sbostic if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
21656404Sbostic mpool_put(t->bt_mp, h, dirty1);
21756404Sbostic return (RET_ERROR);
21856404Sbostic }
21956404Sbostic if (h->pgno == save.page->pgno)
22056404Sbostic dirty1 = MPOOL_DIRTY;
22150989Sbostic deleted = 1;
22250989Sbostic }
22350989Sbostic }
22450989Sbostic
22550989Sbostic if ((pg = h->prevpg) == P_INVALID)
22650989Sbostic goto done2;
22756404Sbostic mpool_put(t->bt_mp, h, dirty1);
22856404Sbostic dirty1 = 0;
22950989Sbostic if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
23046130Smao return (RET_ERROR);
23157934Sbostic e->index = NEXTINDEX(e->page);
23246130Smao }
23346130Smao
23450989Sbostic /*
23550989Sbostic * Reach here with the last page that was looked at pinned. Release
23650989Sbostic * it.
23750989Sbostic */
23856404Sbostic done2: mpool_put(t->bt_mp, h, dirty1);
23950989Sbostic return (deleted ? RET_SUCCESS : RET_SPECIAL);
24050989Sbostic }
24146130Smao
24250989Sbostic /*
24350989Sbostic * __BT_DLEAF -- Delete a single record from a leaf page.
24450989Sbostic *
24550989Sbostic * Parameters:
24650989Sbostic * t: tree
24750989Sbostic * index: index on current page to delete
24850989Sbostic *
24950989Sbostic * Returns:
25050989Sbostic * RET_SUCCESS, RET_ERROR.
25150989Sbostic */
25250989Sbostic int
__bt_dleaf(t,h,index)25350989Sbostic __bt_dleaf(t, h, index)
25450989Sbostic BTREE *t;
25550989Sbostic PAGE *h;
256*66192Sbostic indx_t index;
25750989Sbostic {
25850989Sbostic register BLEAF *bl;
259*66192Sbostic register indx_t cnt, *ip, offset;
26050989Sbostic register size_t nbytes;
26150989Sbostic char *from;
26250989Sbostic void *to;
26346130Smao
26450989Sbostic /*
26550989Sbostic * Delete a record from a btree leaf page. Internal records are never
26650989Sbostic * deleted from internal pages, regardless of the records that caused
26750989Sbostic * them to be added being deleted. Pages made empty by deletion are
26850989Sbostic * not reclaimed. They are, however, made available for reuse.
26950989Sbostic *
27050989Sbostic * Pack the remaining entries at the end of the page, shift the indices
27150989Sbostic * down, overwriting the deleted record and its index. If the record
27250989Sbostic * uses overflow pages, make them available for reuse.
27350989Sbostic */
27450989Sbostic to = bl = GETBLEAF(h, index);
27550989Sbostic if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
27650989Sbostic return (RET_ERROR);
27750989Sbostic if (bl->flags & P_BIGDATA &&
27850989Sbostic __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
27950989Sbostic return (RET_ERROR);
28050989Sbostic nbytes = NBLEAF(bl);
28146130Smao
28250989Sbostic /*
28350989Sbostic * Compress the key/data pairs. Compress and adjust the [BR]LEAF
28450989Sbostic * offsets. Reset the headers.
28550989Sbostic */
28650989Sbostic from = (char *)h + h->upper;
28758017Sbostic memmove(from + nbytes, from, (char *)to - from);
28850989Sbostic h->upper += nbytes;
28946130Smao
29050989Sbostic offset = h->linp[index];
29156404Sbostic for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
29250989Sbostic if (ip[0] < offset)
29350989Sbostic ip[0] += nbytes;
29456404Sbostic for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
29550989Sbostic ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
29657989Sbostic h->lower -= sizeof(indx_t);
29746130Smao return (RET_SUCCESS);
29846130Smao }
299