1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)bt_seq.c 8.1 (Berkeley) 6/4/93"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 #include <sys/types.h> 42 43 #include <errno.h> 44 #include <stddef.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 48 #include <db.h> 49 #include "btree.h" 50 51 static int bt_seqadv __P((BTREE *, EPG *, int)); 52 static int bt_seqset __P((BTREE *, EPG *, DBT *, int)); 53 54 /* 55 * Sequential scan support. 56 * 57 * The tree can be scanned sequentially, starting from either end of the tree 58 * or from any specific key. A scan request before any scanning is done is 59 * initialized as starting from the least node. 60 * 61 * Each tree has an EPGNO which has the current position of the cursor. The 62 * cursor has to survive deletions/insertions in the tree without losing its 63 * position. This is done by noting deletions without doing them, and then 64 * doing them when the cursor moves (or the tree is closed). 65 */ 66 67 /* 68 * __BT_SEQ -- Btree sequential scan interface. 69 * 70 * Parameters: 71 * dbp: pointer to access method 72 * key: key for positioning and return value 73 * data: data return value 74 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 75 * 76 * Returns: 77 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 78 */ 79 int 80 __bt_seq(dbp, key, data, flags) 81 const DB *dbp; 82 DBT *key, *data; 83 u_int flags; 84 { 85 BTREE *t; 86 EPG e; 87 int status; 88 89 /* 90 * If scan unitialized as yet, or starting at a specific record, set 91 * the scan to a specific key. Both bt_seqset and bt_seqadv pin the 92 * page the cursor references if they're successful. 93 */ 94 t = dbp->internal; 95 switch(flags) { 96 case R_NEXT: 97 case R_PREV: 98 if (ISSET(t, B_SEQINIT)) { 99 status = bt_seqadv(t, &e, flags); 100 break; 101 } 102 /* FALLTHROUGH */ 103 case R_CURSOR: 104 case R_FIRST: 105 case R_LAST: 106 status = bt_seqset(t, &e, key, flags); 107 break; 108 default: 109 errno = EINVAL; 110 return (RET_ERROR); 111 } 112 113 if (status == RET_SUCCESS) { 114 status = __bt_ret(t, &e, key, data); 115 116 /* Update the actual cursor. */ 117 t->bt_bcursor.pgno = e.page->pgno; 118 t->bt_bcursor.index = e.index; 119 mpool_put(t->bt_mp, e.page, 0); 120 SET(t, B_SEQINIT); 121 } 122 return (status); 123 } 124 125 /* 126 * BT_SEQSET -- Set the sequential scan to a specific key. 127 * 128 * Parameters: 129 * t: tree 130 * ep: storage for returned key 131 * key: key for initial scan position 132 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 133 * 134 * Side effects: 135 * Pins the page the cursor references. 136 * 137 * Returns: 138 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 139 */ 140 static int 141 bt_seqset(t, ep, key, flags) 142 BTREE *t; 143 EPG *ep; 144 DBT *key; 145 int flags; 146 { 147 EPG *e; 148 PAGE *h; 149 pgno_t pg; 150 int exact; 151 152 /* 153 * Delete any already deleted record that we've been saving because 154 * the cursor pointed to it. Since going to a specific key, should 155 * delete any logically deleted records so they aren't found. 156 */ 157 if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) 158 return (RET_ERROR); 159 160 /* 161 * Find the first, last or specific key in the tree and point the cursor 162 * at it. The cursor may not be moved until a new key has been found. 163 */ 164 switch(flags) { 165 case R_CURSOR: /* Keyed scan. */ 166 /* 167 * Find the first instance of the key or the smallest key which 168 * is greater than or equal to the specified key. If run out 169 * of keys, return RET_SPECIAL. 170 */ 171 if (key->data == NULL || key->size == 0) { 172 errno = EINVAL; 173 return (RET_ERROR); 174 } 175 e = __bt_first(t, key, &exact); /* Returns pinned page. */ 176 if (e == NULL) 177 return (RET_ERROR); 178 /* 179 * If at the end of a page, skip any empty pages and find the 180 * next entry. 181 */ 182 if (e->index == NEXTINDEX(e->page)) { 183 h = e->page; 184 do { 185 pg = h->nextpg; 186 mpool_put(t->bt_mp, h, 0); 187 if (pg == P_INVALID) 188 return (RET_SPECIAL); 189 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 190 return (RET_ERROR); 191 } while (NEXTINDEX(h) == 0); 192 e->index = 0; 193 e->page = h; 194 } 195 *ep = *e; 196 break; 197 case R_FIRST: /* First record. */ 198 case R_NEXT: 199 /* Walk down the left-hand side of the tree. */ 200 for (pg = P_ROOT;;) { 201 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 202 return (RET_ERROR); 203 if (h->flags & (P_BLEAF | P_RLEAF)) 204 break; 205 pg = GETBINTERNAL(h, 0)->pgno; 206 mpool_put(t->bt_mp, h, 0); 207 } 208 209 /* Skip any empty pages. */ 210 while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) { 211 pg = h->nextpg; 212 mpool_put(t->bt_mp, h, 0); 213 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 214 return (RET_ERROR); 215 } 216 217 if (NEXTINDEX(h) == 0) { 218 mpool_put(t->bt_mp, h, 0); 219 return (RET_SPECIAL); 220 } 221 222 ep->page = h; 223 ep->index = 0; 224 break; 225 case R_LAST: /* Last record. */ 226 case R_PREV: 227 /* Walk down the right-hand side of the tree. */ 228 for (pg = P_ROOT;;) { 229 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 230 return (RET_ERROR); 231 if (h->flags & (P_BLEAF | P_RLEAF)) 232 break; 233 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 234 mpool_put(t->bt_mp, h, 0); 235 } 236 237 /* Skip any empty pages. */ 238 while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) { 239 pg = h->prevpg; 240 mpool_put(t->bt_mp, h, 0); 241 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 242 return (RET_ERROR); 243 } 244 245 if (NEXTINDEX(h) == 0) { 246 mpool_put(t->bt_mp, h, 0); 247 return (RET_SPECIAL); 248 } 249 250 ep->page = h; 251 ep->index = NEXTINDEX(h) - 1; 252 break; 253 } 254 return (RET_SUCCESS); 255 } 256 257 /* 258 * BT_SEQADVANCE -- Advance the sequential scan. 259 * 260 * Parameters: 261 * t: tree 262 * flags: R_NEXT, R_PREV 263 * 264 * Side effects: 265 * Pins the page the new key/data record is on. 266 * 267 * Returns: 268 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 269 */ 270 static int 271 bt_seqadv(t, e, flags) 272 BTREE *t; 273 EPG *e; 274 int flags; 275 { 276 EPGNO *c, delc; 277 PAGE *h; 278 indx_t index; 279 pgno_t pg; 280 281 /* Save the current cursor if going to delete it. */ 282 c = &t->bt_bcursor; 283 if (ISSET(t, B_DELCRSR)) 284 delc = *c; 285 286 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) 287 return (RET_ERROR); 288 289 /* 290 * Find the next/previous record in the tree and point the cursor at it. 291 * The cursor may not be moved until a new key has been found. 292 */ 293 index = c->index; 294 switch(flags) { 295 case R_NEXT: /* Next record. */ 296 if (++index == NEXTINDEX(h)) { 297 do { 298 pg = h->nextpg; 299 mpool_put(t->bt_mp, h, 0); 300 if (pg == P_INVALID) 301 return (RET_SPECIAL); 302 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 303 return (RET_ERROR); 304 } while (NEXTINDEX(h) == 0); 305 index = 0; 306 } 307 break; 308 case R_PREV: /* Previous record. */ 309 if (index-- == 0) { 310 do { 311 pg = h->prevpg; 312 mpool_put(t->bt_mp, h, 0); 313 if (pg == P_INVALID) 314 return (RET_SPECIAL); 315 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 316 return (RET_ERROR); 317 } while (NEXTINDEX(h) == 0); 318 index = NEXTINDEX(h) - 1; 319 } 320 break; 321 } 322 323 e->page = h; 324 e->index = index; 325 326 /* 327 * Delete any already deleted record that we've been saving because the 328 * cursor pointed to it. This could cause the new index to be shifted 329 * down by one if the record we're deleting is on the same page and has 330 * a larger index. 331 */ 332 if (ISSET(t, B_DELCRSR)) { 333 CLR(t, B_DELCRSR); /* Don't try twice. */ 334 if (c->pgno == delc.pgno && c->index > delc.index) 335 --c->index; 336 if (__bt_crsrdel(t, &delc)) 337 return (RET_ERROR); 338 } 339 return (RET_SUCCESS); 340 } 341 342 /* 343 * __BT_CRSRDEL -- Delete the record referenced by the cursor. 344 * 345 * Parameters: 346 * t: tree 347 * 348 * Returns: 349 * RET_ERROR, RET_SUCCESS 350 */ 351 int 352 __bt_crsrdel(t, c) 353 BTREE *t; 354 EPGNO *c; 355 { 356 PAGE *h; 357 int status; 358 359 CLR(t, B_DELCRSR); /* Don't try twice. */ 360 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) 361 return (RET_ERROR); 362 status = __bt_dleaf(t, h, c->index); 363 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 364 return (status); 365 } 366