1 /* $NetBSD: bt_seq.c,v 1.10 1997/07/21 14:06:37 jtc Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #if defined(LIBC_SCCS) && !defined(lint) 41 #if 0 42 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 43 #else 44 __RCSID("$NetBSD: bt_seq.c,v 1.10 1997/07/21 14:06:37 jtc Exp $"); 45 #endif 46 #endif /* LIBC_SCCS and not lint */ 47 48 #include "namespace.h" 49 #include <sys/types.h> 50 51 #include <errno.h> 52 #include <stddef.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 56 #include <db.h> 57 #include "btree.h" 58 59 static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); 60 static int __bt_seqadv __P((BTREE *, EPG *, int)); 61 static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); 62 63 /* 64 * Sequential scan support. 65 * 66 * The tree can be scanned sequentially, starting from either end of the 67 * tree or from any specific key. A scan request before any scanning is 68 * done is initialized as starting from the least node. 69 */ 70 71 /* 72 * __bt_seq -- 73 * Btree sequential scan interface. 74 * 75 * Parameters: 76 * dbp: pointer to access method 77 * key: key for positioning and return value 78 * data: data return value 79 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 80 * 81 * Returns: 82 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 83 */ 84 int 85 __bt_seq(dbp, key, data, flags) 86 const DB *dbp; 87 DBT *key, *data; 88 u_int flags; 89 { 90 BTREE *t; 91 EPG e; 92 int status; 93 94 t = dbp->internal; 95 96 /* Toss any page pinned across calls. */ 97 if (t->bt_pinned != NULL) { 98 mpool_put(t->bt_mp, t->bt_pinned, 0); 99 t->bt_pinned = NULL; 100 } 101 102 /* 103 * If scan unitialized as yet, or starting at a specific record, set 104 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 105 * the page the cursor references if they're successful. 106 */ 107 switch (flags) { 108 case R_NEXT: 109 case R_PREV: 110 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 111 status = __bt_seqadv(t, &e, flags); 112 break; 113 } 114 /* FALLTHROUGH */ 115 case R_FIRST: 116 case R_LAST: 117 case R_CURSOR: 118 status = __bt_seqset(t, &e, key, flags); 119 break; 120 default: 121 errno = EINVAL; 122 return (RET_ERROR); 123 } 124 125 if (status == RET_SUCCESS) { 126 __bt_setcur(t, e.page->pgno, e.index); 127 128 status = 129 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 130 131 /* 132 * If the user is doing concurrent access, we copied the 133 * key/data, toss the page. 134 */ 135 if (F_ISSET(t, B_DB_LOCK)) 136 mpool_put(t->bt_mp, e.page, 0); 137 else 138 t->bt_pinned = e.page; 139 } 140 return (status); 141 } 142 143 /* 144 * __bt_seqset -- 145 * Set the sequential scan to a specific key. 146 * 147 * Parameters: 148 * t: tree 149 * ep: storage for returned key 150 * key: key for initial scan position 151 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 152 * 153 * Side effects: 154 * Pins the page the cursor references. 155 * 156 * Returns: 157 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 158 */ 159 static int 160 __bt_seqset(t, ep, key, flags) 161 BTREE *t; 162 EPG *ep; 163 DBT *key; 164 int flags; 165 { 166 PAGE *h; 167 pgno_t pg; 168 int exact; 169 170 /* 171 * Find the first, last or specific key in the tree and point the 172 * cursor at it. The cursor may not be moved until a new key has 173 * been found. 174 */ 175 switch (flags) { 176 case R_CURSOR: /* Keyed scan. */ 177 /* 178 * Find the first instance of the key or the smallest key 179 * which is greater than or equal to the specified key. 180 */ 181 if (key->data == NULL || key->size == 0) { 182 errno = EINVAL; 183 return (RET_ERROR); 184 } 185 return (__bt_first(t, key, ep, &exact)); 186 case R_FIRST: /* First record. */ 187 case R_NEXT: 188 /* Walk down the left-hand side of the tree. */ 189 for (pg = P_ROOT;;) { 190 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 191 return (RET_ERROR); 192 193 /* Check for an empty tree. */ 194 if (NEXTINDEX(h) == 0) { 195 mpool_put(t->bt_mp, h, 0); 196 return (RET_SPECIAL); 197 } 198 199 if (h->flags & (P_BLEAF | P_RLEAF)) 200 break; 201 pg = GETBINTERNAL(h, 0)->pgno; 202 mpool_put(t->bt_mp, h, 0); 203 } 204 ep->page = h; 205 ep->index = 0; 206 break; 207 case R_LAST: /* Last record. */ 208 case R_PREV: 209 /* Walk down the right-hand side of the tree. */ 210 for (pg = P_ROOT;;) { 211 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 212 return (RET_ERROR); 213 214 /* Check for an empty tree. */ 215 if (NEXTINDEX(h) == 0) { 216 mpool_put(t->bt_mp, h, 0); 217 return (RET_SPECIAL); 218 } 219 220 if (h->flags & (P_BLEAF | P_RLEAF)) 221 break; 222 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 223 mpool_put(t->bt_mp, h, 0); 224 } 225 226 ep->page = h; 227 ep->index = NEXTINDEX(h) - 1; 228 break; 229 } 230 return (RET_SUCCESS); 231 } 232 233 /* 234 * __bt_seqadvance -- 235 * Advance the sequential scan. 236 * 237 * Parameters: 238 * t: tree 239 * flags: R_NEXT, R_PREV 240 * 241 * Side effects: 242 * Pins the page the new key/data record is on. 243 * 244 * Returns: 245 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 246 */ 247 static int 248 __bt_seqadv(t, ep, flags) 249 BTREE *t; 250 EPG *ep; 251 int flags; 252 { 253 CURSOR *c; 254 PAGE *h; 255 indx_t index = 0; /* pacify gcc */ 256 pgno_t pg; 257 int exact; 258 259 /* 260 * There are a couple of states that we can be in. The cursor has 261 * been initialized by the time we get here, but that's all we know. 262 */ 263 c = &t->bt_cursor; 264 265 /* 266 * The cursor was deleted where there weren't any duplicate records, 267 * so the key was saved. Find out where that key would go in the 268 * current tree. It doesn't matter if the returned key is an exact 269 * match or not -- if it's an exact match, the record was added after 270 * the delete so we can just return it. If not, as long as there's 271 * a record there, return it. 272 */ 273 if (F_ISSET(c, CURS_ACQUIRE)) 274 return (__bt_first(t, &c->key, ep, &exact)); 275 276 /* Get the page referenced by the cursor. */ 277 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 278 return (RET_ERROR); 279 280 /* 281 * Find the next/previous record in the tree and point the cursor at 282 * it. The cursor may not be moved until a new key has been found. 283 */ 284 switch (flags) { 285 case R_NEXT: /* Next record. */ 286 /* 287 * The cursor was deleted in duplicate records, and moved 288 * forward to a record that has yet to be returned. Clear 289 * that flag, and return the record. 290 */ 291 if (F_ISSET(c, CURS_AFTER)) 292 goto usecurrent; 293 index = c->pg.index; 294 if (++index == NEXTINDEX(h)) { 295 pg = h->nextpg; 296 mpool_put(t->bt_mp, h, 0); 297 if (pg == P_INVALID) 298 return (RET_SPECIAL); 299 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 300 return (RET_ERROR); 301 index = 0; 302 } 303 break; 304 case R_PREV: /* Previous record. */ 305 /* 306 * The cursor was deleted in duplicate records, and moved 307 * backward to a record that has yet to be returned. Clear 308 * that flag, and return the record. 309 */ 310 if (F_ISSET(c, CURS_BEFORE)) { 311 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 312 ep->page = h; 313 ep->index = c->pg.index; 314 return (RET_SUCCESS); 315 } 316 index = c->pg.index; 317 if (index == 0) { 318 pg = h->prevpg; 319 mpool_put(t->bt_mp, h, 0); 320 if (pg == P_INVALID) 321 return (RET_SPECIAL); 322 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 323 return (RET_ERROR); 324 index = NEXTINDEX(h) - 1; 325 } else 326 --index; 327 break; 328 } 329 330 ep->page = h; 331 ep->index = index; 332 return (RET_SUCCESS); 333 } 334 335 /* 336 * __bt_first -- 337 * Find the first entry. 338 * 339 * Parameters: 340 * t: the tree 341 * key: the key 342 * erval: return EPG 343 * exactp: pointer to exact match flag 344 * 345 * Returns: 346 * The first entry in the tree greater than or equal to key, 347 * or RET_SPECIAL if no such key exists. 348 */ 349 static int 350 __bt_first(t, key, erval, exactp) 351 BTREE *t; 352 const DBT *key; 353 EPG *erval; 354 int *exactp; 355 { 356 PAGE *h; 357 EPG *ep, save; 358 pgno_t pg; 359 360 /* 361 * Find any matching record; __bt_search pins the page. 362 * 363 * If it's an exact match and duplicates are possible, walk backwards 364 * in the tree until we find the first one. Otherwise, make sure it's 365 * a valid key (__bt_search may return an index just past the end of a 366 * page) and return it. 367 */ 368 if ((ep = __bt_search(t, key, exactp)) == NULL) 369 return (0); 370 if (*exactp) { 371 if (F_ISSET(t, B_NODUPS)) { 372 *erval = *ep; 373 return (RET_SUCCESS); 374 } 375 376 /* 377 * Walk backwards, as long as the entry matches and there are 378 * keys left in the tree. Save a copy of each match in case 379 * we go too far. 380 */ 381 save = *ep; 382 h = ep->page; 383 do { 384 if (save.page->pgno != ep->page->pgno) { 385 mpool_put(t->bt_mp, save.page, 0); 386 save = *ep; 387 } else 388 save.index = ep->index; 389 390 /* 391 * Don't unpin the page the last (or original) match 392 * was on, but make sure it's unpinned if an error 393 * occurs. 394 */ 395 if (ep->index == 0) { 396 if (h->prevpg == P_INVALID) 397 break; 398 if (h->pgno != save.page->pgno) 399 mpool_put(t->bt_mp, h, 0); 400 if ((h = mpool_get(t->bt_mp, 401 h->prevpg, 0)) == NULL) { 402 if (h->pgno == save.page->pgno) 403 mpool_put(t->bt_mp, 404 save.page, 0); 405 return (RET_ERROR); 406 } 407 ep->page = h; 408 ep->index = NEXTINDEX(h); 409 } 410 --ep->index; 411 } while (__bt_cmp(t, key, ep) == 0); 412 413 /* 414 * Reach here with the last page that was looked at pinned, 415 * which may or may not be the same as the last (or original) 416 * match page. If it's not useful, release it. 417 */ 418 if (h->pgno != save.page->pgno) 419 mpool_put(t->bt_mp, h, 0); 420 421 *erval = save; 422 return (RET_SUCCESS); 423 } 424 425 /* If at the end of a page, find the next entry. */ 426 if (ep->index == NEXTINDEX(ep->page)) { 427 h = ep->page; 428 pg = h->nextpg; 429 mpool_put(t->bt_mp, h, 0); 430 if (pg == P_INVALID) 431 return (RET_SPECIAL); 432 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 433 return (RET_ERROR); 434 ep->index = 0; 435 ep->page = h; 436 } 437 *erval = *ep; 438 return (RET_SUCCESS); 439 } 440 441 /* 442 * __bt_setcur -- 443 * Set the cursor to an entry in the tree. 444 * 445 * Parameters: 446 * t: the tree 447 * pgno: page number 448 * index: page index 449 */ 450 void 451 __bt_setcur(t, pgno, index) 452 BTREE *t; 453 pgno_t pgno; 454 u_int index; 455 { 456 /* Lose any already deleted key. */ 457 if (t->bt_cursor.key.data != NULL) { 458 free(t->bt_cursor.key.data); 459 t->bt_cursor.key.size = 0; 460 t->bt_cursor.key.data = NULL; 461 } 462 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 463 464 /* Update the cursor. */ 465 t->bt_cursor.pg.pgno = pgno; 466 t->bt_cursor.pg.index = index; 467 F_SET(&t->bt_cursor, CURS_INIT); 468 } 469