1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_delete.c 5.5 (Berkeley) 11/13/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 17 #include <db.h> 18 #include <errno.h> 19 #include <stdio.h> 20 #include <string.h> 21 22 #include "btree.h" 23 24 static int bt_bdelete __P((BTREE *, const DBT *)); 25 26 /* 27 * __BT_DELETE -- Delete the item(s) referenced by a key. 28 * 29 * Parameters: 30 * dbp: pointer to access method 31 * key: key to delete 32 * flags: R_CURSOR if deleting what the cursor references 33 * 34 * Returns: 35 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 36 */ 37 int 38 __bt_delete(dbp, key, flags) 39 const DB *dbp; 40 const DBT *key; 41 u_int flags; 42 { 43 BTREE *t; 44 int status; 45 46 t = dbp->internal; 47 if (ISSET(t, BTF_RDONLY)) { 48 errno = EPERM; 49 return (RET_ERROR); 50 } 51 switch(flags) { 52 case 0: 53 status = bt_bdelete(t, key); 54 break; 55 case R_CURSOR: 56 /* 57 * If flags is R_CURSOR, delete the cursor; must already have 58 * started a scan and not have already deleted the record. For 59 * the delete cursor bit to have been set requires that the 60 * scan be initialized, so no reason to check. 61 */ 62 status = ISSET(t, BTF_DELCRSR) ? 63 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 64 break; 65 default: 66 errno = EINVAL; 67 return (RET_ERROR); 68 } 69 if (status == RET_SUCCESS) 70 SET(t, BTF_MODIFIED); 71 return (status); 72 } 73 74 /* 75 * BT_BDELETE -- Delete all key/data pairs matching the specified key. 76 * 77 * Parameters: 78 * tree: tree 79 * key: key to delete 80 * 81 * Returns: 82 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 83 */ 84 static int 85 bt_bdelete(t, key) 86 BTREE *t; 87 const DBT *key; 88 { 89 EPG *e, save; 90 PAGE *h; 91 pgno_t cpgno, pg; 92 index_t cindex; 93 int deleted, dirty1, dirty2, exact; 94 95 /* Find any matching record; __bt_search pins the page. */ 96 if ((e = __bt_search(t, key, &exact)) == NULL) 97 return (RET_ERROR); 98 if (!exact) { 99 mpool_put(t->bt_mp, e->page, 0); 100 return (RET_SPECIAL); 101 } 102 103 /* 104 * Delete forward, then delete backward, from the found key. The 105 * ordering is so that the deletions don't mess up the page refs. 106 * The first loop deletes the key from the original page, the second 107 * unpins the original page. In the first loop, dirty1 is set if 108 * the original page is modified, and dirty2 is set if any subsequent 109 * pages are modified. In the second loop, dirty1 starts off set if 110 * the original page has been modified, and is set if any subsequent 111 * pages are modified. 112 * 113 * If find the key referenced by the cursor, don't delete it, just 114 * flag it for future deletion. The cursor page number is P_INVALID 115 * unless the sequential scan is initialized, so no reason to check. 116 * A special case is when the already deleted cursor record was the 117 * only record found. If so, then the delete opertion fails as no 118 * records were deleted. 119 * 120 * Cycle in place in the current page until the current record doesn't 121 * match the key or the page is empty. If the latter, walk forward, 122 * skipping empty pages and repeating until a record doesn't match 123 * the key or the end of the tree is reached. 124 */ 125 cpgno = t->bt_bcursor.pgno; 126 cindex = t->bt_bcursor.index; 127 save = *e; 128 dirty1 = 0; 129 for (h = e->page, deleted = 0;;) { 130 dirty2 = 0; 131 do { 132 if (h->pgno == cpgno && e->index == cindex) { 133 if (!ISSET(t, BTF_DELCRSR)) { 134 SET(t, BTF_DELCRSR); 135 deleted = 1; 136 } 137 ++e->index; 138 } else { 139 if (__bt_dleaf(t, h, e->index)) { 140 if (h->pgno != save.page->pgno) 141 mpool_put(t->bt_mp, h, dirty2); 142 mpool_put(t->bt_mp, save.page, dirty1); 143 return (RET_ERROR); 144 } 145 if (h->pgno == save.page->pgno) 146 dirty1 = MPOOL_DIRTY; 147 else 148 dirty2 = MPOOL_DIRTY; 149 deleted = 1; 150 } 151 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 152 153 /* 154 * Quit if didn't find a match, no next page, or first key on 155 * the next page doesn't match. Don't unpin the original page 156 * unless an error occurs. 157 */ 158 if (e->index < NEXTINDEX(h)) 159 break; 160 for (;;) { 161 if ((pg = h->nextpg) == P_INVALID) 162 goto done1; 163 if (h->pgno != save.page->pgno) 164 mpool_put(t->bt_mp, h, dirty2); 165 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 166 mpool_put(t->bt_mp, save.page, dirty1); 167 return (RET_ERROR); 168 } 169 if (NEXTINDEX(h) != 0) { 170 e->page = h; 171 e->index = 0; 172 break; 173 } 174 } 175 176 if (__bt_cmp(t, key, e) != 0) 177 break; 178 } 179 180 /* 181 * Reach here with the original page and the last page referenced 182 * pinned (they may be the same). Release it if not the original. 183 */ 184 done1: if (h->pgno != save.page->pgno) 185 mpool_put(t->bt_mp, h, dirty2); 186 187 /* 188 * Walk backwards from the record previous to the record returned by 189 * __bt_search, skipping empty pages, until a record doesn't match 190 * the key or reach the beginning of the tree. 191 */ 192 *e = save; 193 for (;;) { 194 if (e->index) 195 --e->index; 196 for (h = e->page; e->index; --e->index) { 197 if (__bt_cmp(t, key, e) != 0) 198 goto done2; 199 if (h->pgno == cpgno && e->index == cindex) { 200 if (!ISSET(t, BTF_DELCRSR)) { 201 SET(t, BTF_DELCRSR); 202 deleted = 1; 203 } 204 } else { 205 if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 206 mpool_put(t->bt_mp, h, dirty1); 207 return (RET_ERROR); 208 } 209 if (h->pgno == save.page->pgno) 210 dirty1 = MPOOL_DIRTY; 211 deleted = 1; 212 } 213 } 214 215 if ((pg = h->prevpg) == P_INVALID) 216 goto done2; 217 mpool_put(t->bt_mp, h, dirty1); 218 dirty1 = 0; 219 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 220 return (RET_ERROR); 221 e->index = NEXTINDEX(h); 222 } 223 224 /* 225 * Reach here with the last page that was looked at pinned. Release 226 * it. 227 */ 228 done2: mpool_put(t->bt_mp, h, dirty1); 229 return (deleted ? RET_SUCCESS : RET_SPECIAL); 230 } 231 232 /* 233 * __BT_DLEAF -- Delete a single record from a leaf page. 234 * 235 * Parameters: 236 * t: tree 237 * index: index on current page to delete 238 * 239 * Returns: 240 * RET_SUCCESS, RET_ERROR. 241 */ 242 int 243 __bt_dleaf(t, h, index) 244 BTREE *t; 245 PAGE *h; 246 int index; 247 { 248 register BLEAF *bl; 249 register index_t *ip, offset; 250 register size_t nbytes; 251 register int cnt; 252 char *from; 253 void *to; 254 255 /* 256 * Delete a record from a btree leaf page. Internal records are never 257 * deleted from internal pages, regardless of the records that caused 258 * them to be added being deleted. Pages made empty by deletion are 259 * not reclaimed. They are, however, made available for reuse. 260 * 261 * Pack the remaining entries at the end of the page, shift the indices 262 * down, overwriting the deleted record and its index. If the record 263 * uses overflow pages, make them available for reuse. 264 */ 265 to = bl = GETBLEAF(h, index); 266 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 267 return (RET_ERROR); 268 if (bl->flags & P_BIGDATA && 269 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 270 return (RET_ERROR); 271 nbytes = NBLEAF(bl); 272 273 /* 274 * Compress the key/data pairs. Compress and adjust the [BR]LEAF 275 * offsets. Reset the headers. 276 */ 277 from = (char *)h + h->upper; 278 bcopy(from, from + nbytes, (char *)to - from); 279 h->upper += nbytes; 280 281 offset = h->linp[index]; 282 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 283 if (ip[0] < offset) 284 ip[0] += nbytes; 285 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 286 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 287 h->lower -= sizeof(index_t); 288 return (RET_SUCCESS); 289 } 290