1 /* $NetBSD: bt_seq.c,v 1.13 2003/08/07 16:42:41 agc Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #if defined(LIBC_SCCS) && !defined(lint) 37 #if 0 38 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 39 #else 40 __RCSID("$NetBSD: bt_seq.c,v 1.13 2003/08/07 16:42:41 agc Exp $"); 41 #endif 42 #endif /* LIBC_SCCS and not lint */ 43 44 #include "namespace.h" 45 #include <sys/types.h> 46 47 #include <errno.h> 48 #include <stddef.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 52 #include <db.h> 53 #include "btree.h" 54 55 static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); 56 static int __bt_seqadv __P((BTREE *, EPG *, int)); 57 static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); 58 59 /* 60 * Sequential scan support. 61 * 62 * The tree can be scanned sequentially, starting from either end of the 63 * tree or from any specific key. A scan request before any scanning is 64 * done is initialized as starting from the least node. 65 */ 66 67 /* 68 * __bt_seq -- 69 * Btree sequential scan interface. 70 * 71 * Parameters: 72 * dbp: pointer to access method 73 * key: key for positioning and return value 74 * data: data return value 75 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 76 * 77 * Returns: 78 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 79 */ 80 int 81 __bt_seq(dbp, key, data, flags) 82 const DB *dbp; 83 DBT *key, *data; 84 u_int flags; 85 { 86 BTREE *t; 87 EPG e; 88 int status; 89 90 t = dbp->internal; 91 92 /* Toss any page pinned across calls. */ 93 if (t->bt_pinned != NULL) { 94 mpool_put(t->bt_mp, t->bt_pinned, 0); 95 t->bt_pinned = NULL; 96 } 97 98 /* 99 * If scan unitialized as yet, or starting at a specific record, set 100 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 101 * the page the cursor references if they're successful. 102 */ 103 switch (flags) { 104 case R_NEXT: 105 case R_PREV: 106 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 107 status = __bt_seqadv(t, &e, (int)flags); 108 break; 109 } 110 /* FALLTHROUGH */ 111 case R_FIRST: 112 case R_LAST: 113 case R_CURSOR: 114 status = __bt_seqset(t, &e, key, (int)flags); 115 break; 116 default: 117 errno = EINVAL; 118 return (RET_ERROR); 119 } 120 121 if (status == RET_SUCCESS) { 122 __bt_setcur(t, e.page->pgno, (u_int)e.index); 123 124 status = 125 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 126 127 /* 128 * If the user is doing concurrent access, we copied the 129 * key/data, toss the page. 130 */ 131 if (F_ISSET(t, B_DB_LOCK)) 132 mpool_put(t->bt_mp, e.page, 0); 133 else 134 t->bt_pinned = e.page; 135 } 136 return (status); 137 } 138 139 /* 140 * __bt_seqset -- 141 * Set the sequential scan to a specific key. 142 * 143 * Parameters: 144 * t: tree 145 * ep: storage for returned key 146 * key: key for initial scan position 147 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 148 * 149 * Side effects: 150 * Pins the page the cursor references. 151 * 152 * Returns: 153 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 154 */ 155 static int 156 __bt_seqset(t, ep, key, flags) 157 BTREE *t; 158 EPG *ep; 159 DBT *key; 160 int flags; 161 { 162 PAGE *h; 163 pgno_t pg; 164 int exact; 165 166 /* 167 * Find the first, last or specific key in the tree and point the 168 * cursor at it. The cursor may not be moved until a new key has 169 * been found. 170 */ 171 switch (flags) { 172 case R_CURSOR: /* Keyed scan. */ 173 /* 174 * Find the first instance of the key or the smallest key 175 * which is greater than or equal to the specified key. 176 */ 177 if (key->data == NULL || key->size == 0) { 178 errno = EINVAL; 179 return (RET_ERROR); 180 } 181 return (__bt_first(t, key, ep, &exact)); 182 case R_FIRST: /* First record. */ 183 case R_NEXT: 184 /* Walk down the left-hand side of the tree. */ 185 for (pg = P_ROOT;;) { 186 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 187 return (RET_ERROR); 188 189 /* Check for an empty tree. */ 190 if (NEXTINDEX(h) == 0) { 191 mpool_put(t->bt_mp, h, 0); 192 return (RET_SPECIAL); 193 } 194 195 if (h->flags & (P_BLEAF | P_RLEAF)) 196 break; 197 pg = GETBINTERNAL(h, 0)->pgno; 198 mpool_put(t->bt_mp, h, 0); 199 } 200 ep->page = h; 201 ep->index = 0; 202 break; 203 case R_LAST: /* Last record. */ 204 case R_PREV: 205 /* Walk down the right-hand side of the tree. */ 206 for (pg = P_ROOT;;) { 207 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 208 return (RET_ERROR); 209 210 /* Check for an empty tree. */ 211 if (NEXTINDEX(h) == 0) { 212 mpool_put(t->bt_mp, h, 0); 213 return (RET_SPECIAL); 214 } 215 216 if (h->flags & (P_BLEAF | P_RLEAF)) 217 break; 218 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 219 mpool_put(t->bt_mp, h, 0); 220 } 221 222 ep->page = h; 223 ep->index = NEXTINDEX(h) - 1; 224 break; 225 } 226 return (RET_SUCCESS); 227 } 228 229 /* 230 * __bt_seqadvance -- 231 * Advance the sequential scan. 232 * 233 * Parameters: 234 * t: tree 235 * flags: R_NEXT, R_PREV 236 * 237 * Side effects: 238 * Pins the page the new key/data record is on. 239 * 240 * Returns: 241 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 242 */ 243 static int 244 __bt_seqadv(t, ep, flags) 245 BTREE *t; 246 EPG *ep; 247 int flags; 248 { 249 CURSOR *c; 250 PAGE *h; 251 indx_t idx = 0; /* pacify gcc */ 252 pgno_t pg; 253 int exact; 254 255 /* 256 * There are a couple of states that we can be in. The cursor has 257 * been initialized by the time we get here, but that's all we know. 258 */ 259 c = &t->bt_cursor; 260 261 /* 262 * The cursor was deleted where there weren't any duplicate records, 263 * so the key was saved. Find out where that key would go in the 264 * current tree. It doesn't matter if the returned key is an exact 265 * match or not -- if it's an exact match, the record was added after 266 * the delete so we can just return it. If not, as long as there's 267 * a record there, return it. 268 */ 269 if (F_ISSET(c, CURS_ACQUIRE)) 270 return (__bt_first(t, &c->key, ep, &exact)); 271 272 /* Get the page referenced by the cursor. */ 273 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 274 return (RET_ERROR); 275 276 /* 277 * Find the next/previous record in the tree and point the cursor at 278 * it. The cursor may not be moved until a new key has been found. 279 */ 280 switch (flags) { 281 case R_NEXT: /* Next record. */ 282 /* 283 * The cursor was deleted in duplicate records, and moved 284 * forward to a record that has yet to be returned. Clear 285 * that flag, and return the record. 286 */ 287 if (F_ISSET(c, CURS_AFTER)) 288 goto usecurrent; 289 idx = c->pg.index; 290 if (++idx == NEXTINDEX(h)) { 291 pg = h->nextpg; 292 mpool_put(t->bt_mp, h, 0); 293 if (pg == P_INVALID) 294 return (RET_SPECIAL); 295 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 296 return (RET_ERROR); 297 idx = 0; 298 } 299 break; 300 case R_PREV: /* Previous record. */ 301 /* 302 * The cursor was deleted in duplicate records, and moved 303 * backward to a record that has yet to be returned. Clear 304 * that flag, and return the record. 305 */ 306 if (F_ISSET(c, CURS_BEFORE)) { 307 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 308 ep->page = h; 309 ep->index = c->pg.index; 310 return (RET_SUCCESS); 311 } 312 idx = c->pg.index; 313 if (idx == 0) { 314 pg = h->prevpg; 315 mpool_put(t->bt_mp, h, 0); 316 if (pg == P_INVALID) 317 return (RET_SPECIAL); 318 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 319 return (RET_ERROR); 320 idx = NEXTINDEX(h) - 1; 321 } else 322 --idx; 323 break; 324 } 325 326 ep->page = h; 327 ep->index = idx; 328 return (RET_SUCCESS); 329 } 330 331 /* 332 * __bt_first -- 333 * Find the first entry. 334 * 335 * Parameters: 336 * t: the tree 337 * key: the key 338 * erval: return EPG 339 * exactp: pointer to exact match flag 340 * 341 * Returns: 342 * The first entry in the tree greater than or equal to key, 343 * or RET_SPECIAL if no such key exists. 344 */ 345 static int 346 __bt_first(t, key, erval, exactp) 347 BTREE *t; 348 const DBT *key; 349 EPG *erval; 350 int *exactp; 351 { 352 PAGE *h; 353 EPG *ep, save; 354 pgno_t pg; 355 356 /* 357 * Find any matching record; __bt_search pins the page. 358 * 359 * If it's an exact match and duplicates are possible, walk backwards 360 * in the tree until we find the first one. Otherwise, make sure it's 361 * a valid key (__bt_search may return an index just past the end of a 362 * page) and return it. 363 */ 364 if ((ep = __bt_search(t, key, exactp)) == NULL) 365 return (0); 366 if (*exactp) { 367 if (F_ISSET(t, B_NODUPS)) { 368 *erval = *ep; 369 return (RET_SUCCESS); 370 } 371 372 /* 373 * Walk backwards, as long as the entry matches and there are 374 * keys left in the tree. Save a copy of each match in case 375 * we go too far. 376 */ 377 save = *ep; 378 h = ep->page; 379 do { 380 if (save.page->pgno != ep->page->pgno) { 381 mpool_put(t->bt_mp, save.page, 0); 382 save = *ep; 383 } else 384 save.index = ep->index; 385 386 /* 387 * Don't unpin the page the last (or original) match 388 * was on, but make sure it's unpinned if an error 389 * occurs. 390 */ 391 if (ep->index == 0) { 392 if (h->prevpg == P_INVALID) 393 break; 394 if (h->pgno != save.page->pgno) 395 mpool_put(t->bt_mp, h, 0); 396 if ((h = mpool_get(t->bt_mp, 397 h->prevpg, 0)) == NULL) { 398 if (h->pgno == save.page->pgno) 399 mpool_put(t->bt_mp, 400 save.page, 0); 401 return (RET_ERROR); 402 } 403 ep->page = h; 404 ep->index = NEXTINDEX(h); 405 } 406 --ep->index; 407 } while (__bt_cmp(t, key, ep) == 0); 408 409 /* 410 * Reach here with the last page that was looked at pinned, 411 * which may or may not be the same as the last (or original) 412 * match page. If it's not useful, release it. 413 */ 414 if (h->pgno != save.page->pgno) 415 mpool_put(t->bt_mp, h, 0); 416 417 *erval = save; 418 return (RET_SUCCESS); 419 } 420 421 /* If at the end of a page, find the next entry. */ 422 if (ep->index == NEXTINDEX(ep->page)) { 423 h = ep->page; 424 pg = h->nextpg; 425 mpool_put(t->bt_mp, h, 0); 426 if (pg == P_INVALID) 427 return (RET_SPECIAL); 428 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 429 return (RET_ERROR); 430 ep->index = 0; 431 ep->page = h; 432 } 433 *erval = *ep; 434 return (RET_SUCCESS); 435 } 436 437 /* 438 * __bt_setcur -- 439 * Set the cursor to an entry in the tree. 440 * 441 * Parameters: 442 * t: the tree 443 * pgno: page number 444 * idx: page index 445 */ 446 void 447 __bt_setcur(t, pgno, idx) 448 BTREE *t; 449 pgno_t pgno; 450 u_int idx; 451 { 452 /* Lose any already deleted key. */ 453 if (t->bt_cursor.key.data != NULL) { 454 free(t->bt_cursor.key.data); 455 t->bt_cursor.key.size = 0; 456 t->bt_cursor.key.data = NULL; 457 } 458 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 459 460 /* Update the cursor. */ 461 t->bt_cursor.pg.pgno = pgno; 462 t->bt_cursor.pg.index = idx; 463 F_SET(&t->bt_cursor, CURS_INIT); 464 } 465