1 /* $NetBSD: bt_seq.c,v 1.15 2007/02/03 23:46:09 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #if defined(LIBC_SCCS) && !defined(lint) 37 #if 0 38 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 39 #else 40 __RCSID("$NetBSD: bt_seq.c,v 1.15 2007/02/03 23:46:09 christos Exp $"); 41 #endif 42 #endif /* LIBC_SCCS and not lint */ 43 44 #include "namespace.h" 45 #include <sys/types.h> 46 47 #include <assert.h> 48 #include <errno.h> 49 #include <stddef.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 53 #include <db.h> 54 #include "btree.h" 55 56 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 57 static int __bt_seqadv(BTREE *, EPG *, int); 58 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 59 60 /* 61 * Sequential scan support. 62 * 63 * The tree can be scanned sequentially, starting from either end of the 64 * tree or from any specific key. A scan request before any scanning is 65 * done is initialized as starting from the least node. 66 */ 67 68 /* 69 * __bt_seq -- 70 * Btree sequential scan interface. 71 * 72 * Parameters: 73 * dbp: pointer to access method 74 * key: key for positioning and return value 75 * data: data return value 76 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 77 * 78 * Returns: 79 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 80 */ 81 int 82 __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 83 { 84 BTREE *t; 85 EPG e; 86 int status; 87 88 t = dbp->internal; 89 90 /* Toss any page pinned across calls. */ 91 if (t->bt_pinned != NULL) { 92 mpool_put(t->bt_mp, t->bt_pinned, 0); 93 t->bt_pinned = NULL; 94 } 95 96 /* 97 * If scan unitialized as yet, or starting at a specific record, set 98 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 99 * the page the cursor references if they're successful. 100 */ 101 switch (flags) { 102 case R_NEXT: 103 case R_PREV: 104 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 105 status = __bt_seqadv(t, &e, (int)flags); 106 break; 107 } 108 /* FALLTHROUGH */ 109 case R_FIRST: 110 case R_LAST: 111 case R_CURSOR: 112 status = __bt_seqset(t, &e, key, (int)flags); 113 break; 114 default: 115 errno = EINVAL; 116 return (RET_ERROR); 117 } 118 119 if (status == RET_SUCCESS) { 120 __bt_setcur(t, e.page->pgno, (u_int)e.index); 121 122 status = 123 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 124 125 /* 126 * If the user is doing concurrent access, we copied the 127 * key/data, toss the page. 128 */ 129 if (F_ISSET(t, B_DB_LOCK)) 130 mpool_put(t->bt_mp, e.page, 0); 131 else 132 t->bt_pinned = e.page; 133 } 134 return (status); 135 } 136 137 /* 138 * __bt_seqset -- 139 * Set the sequential scan to a specific key. 140 * 141 * Parameters: 142 * t: tree 143 * ep: storage for returned key 144 * key: key for initial scan position 145 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 146 * 147 * Side effects: 148 * Pins the page the cursor references. 149 * 150 * Returns: 151 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 152 */ 153 static int 154 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 155 { 156 PAGE *h; 157 pgno_t pg; 158 int exact; 159 160 /* 161 * Find the first, last or specific key in the tree and point the 162 * cursor at it. The cursor may not be moved until a new key has 163 * been found. 164 */ 165 switch (flags) { 166 case R_CURSOR: /* Keyed scan. */ 167 /* 168 * Find the first instance of the key or the smallest key 169 * which is greater than or equal to the specified key. 170 */ 171 if (key->data == NULL || key->size == 0) { 172 errno = EINVAL; 173 return (RET_ERROR); 174 } 175 return (__bt_first(t, key, ep, &exact)); 176 case R_FIRST: /* First record. */ 177 case R_NEXT: 178 /* Walk down the left-hand side of the tree. */ 179 for (pg = P_ROOT;;) { 180 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 181 return (RET_ERROR); 182 183 /* Check for an empty tree. */ 184 if (NEXTINDEX(h) == 0) { 185 mpool_put(t->bt_mp, h, 0); 186 return (RET_SPECIAL); 187 } 188 189 if (h->flags & (P_BLEAF | P_RLEAF)) 190 break; 191 pg = GETBINTERNAL(h, 0)->pgno; 192 mpool_put(t->bt_mp, h, 0); 193 } 194 ep->page = h; 195 ep->index = 0; 196 break; 197 case R_LAST: /* Last record. */ 198 case R_PREV: 199 /* Walk down the right-hand side of the tree. */ 200 for (pg = P_ROOT;;) { 201 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 202 return (RET_ERROR); 203 204 /* Check for an empty tree. */ 205 if (NEXTINDEX(h) == 0) { 206 mpool_put(t->bt_mp, h, 0); 207 return (RET_SPECIAL); 208 } 209 210 if (h->flags & (P_BLEAF | P_RLEAF)) 211 break; 212 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 213 mpool_put(t->bt_mp, h, 0); 214 } 215 216 ep->page = h; 217 ep->index = NEXTINDEX(h) - 1; 218 break; 219 } 220 return (RET_SUCCESS); 221 } 222 223 /* 224 * __bt_seqadvance -- 225 * Advance the sequential scan. 226 * 227 * Parameters: 228 * t: tree 229 * flags: R_NEXT, R_PREV 230 * 231 * Side effects: 232 * Pins the page the new key/data record is on. 233 * 234 * Returns: 235 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 236 */ 237 static int 238 __bt_seqadv(BTREE *t, EPG *ep, int flags) 239 { 240 CURSOR *c; 241 PAGE *h; 242 indx_t idx = 0; /* pacify gcc */ 243 pgno_t pg; 244 int exact; 245 246 /* 247 * There are a couple of states that we can be in. The cursor has 248 * been initialized by the time we get here, but that's all we know. 249 */ 250 c = &t->bt_cursor; 251 252 /* 253 * The cursor was deleted where there weren't any duplicate records, 254 * so the key was saved. Find out where that key would go in the 255 * current tree. It doesn't matter if the returned key is an exact 256 * match or not -- if it's an exact match, the record was added after 257 * the delete so we can just return it. If not, as long as there's 258 * a record there, return it. 259 */ 260 if (F_ISSET(c, CURS_ACQUIRE)) 261 return (__bt_first(t, &c->key, ep, &exact)); 262 263 /* Get the page referenced by the cursor. */ 264 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 265 return (RET_ERROR); 266 267 /* 268 * Find the next/previous record in the tree and point the cursor at 269 * it. The cursor may not be moved until a new key has been found. 270 */ 271 switch (flags) { 272 case R_NEXT: /* Next record. */ 273 /* 274 * The cursor was deleted in duplicate records, and moved 275 * forward to a record that has yet to be returned. Clear 276 * that flag, and return the record. 277 */ 278 if (F_ISSET(c, CURS_AFTER)) 279 goto usecurrent; 280 idx = c->pg.index; 281 if (++idx == NEXTINDEX(h)) { 282 pg = h->nextpg; 283 mpool_put(t->bt_mp, h, 0); 284 if (pg == P_INVALID) 285 return (RET_SPECIAL); 286 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 287 return (RET_ERROR); 288 idx = 0; 289 } 290 break; 291 case R_PREV: /* Previous record. */ 292 /* 293 * The cursor was deleted in duplicate records, and moved 294 * backward to a record that has yet to be returned. Clear 295 * that flag, and return the record. 296 */ 297 if (F_ISSET(c, CURS_BEFORE)) { 298 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 299 ep->page = h; 300 ep->index = c->pg.index; 301 return (RET_SUCCESS); 302 } 303 idx = c->pg.index; 304 if (idx == 0) { 305 pg = h->prevpg; 306 mpool_put(t->bt_mp, h, 0); 307 if (pg == P_INVALID) 308 return (RET_SPECIAL); 309 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 310 return (RET_ERROR); 311 idx = NEXTINDEX(h) - 1; 312 } else 313 --idx; 314 break; 315 } 316 317 ep->page = h; 318 ep->index = idx; 319 return (RET_SUCCESS); 320 } 321 322 /* 323 * __bt_first -- 324 * Find the first entry. 325 * 326 * Parameters: 327 * t: the tree 328 * key: the key 329 * erval: return EPG 330 * exactp: pointer to exact match flag 331 * 332 * Returns: 333 * The first entry in the tree greater than or equal to key, 334 * or RET_SPECIAL if no such key exists. 335 */ 336 static int 337 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 338 { 339 PAGE *h; 340 EPG *ep, save; 341 pgno_t pg; 342 343 /* 344 * Find any matching record; __bt_search pins the page. 345 * 346 * If it's an exact match and duplicates are possible, walk backwards 347 * in the tree until we find the first one. Otherwise, make sure it's 348 * a valid key (__bt_search may return an index just past the end of a 349 * page) and return it. 350 */ 351 if ((ep = __bt_search(t, key, exactp)) == NULL) 352 return (0); 353 if (*exactp) { 354 if (F_ISSET(t, B_NODUPS)) { 355 *erval = *ep; 356 return (RET_SUCCESS); 357 } 358 359 /* 360 * Walk backwards, as long as the entry matches and there are 361 * keys left in the tree. Save a copy of each match in case 362 * we go too far. 363 */ 364 save = *ep; 365 h = ep->page; 366 do { 367 if (save.page->pgno != ep->page->pgno) { 368 mpool_put(t->bt_mp, save.page, 0); 369 save = *ep; 370 } else 371 save.index = ep->index; 372 373 /* 374 * Don't unpin the page the last (or original) match 375 * was on, but make sure it's unpinned if an error 376 * occurs. 377 */ 378 if (ep->index == 0) { 379 if (h->prevpg == P_INVALID) 380 break; 381 if (h->pgno != save.page->pgno) 382 mpool_put(t->bt_mp, h, 0); 383 if ((h = mpool_get(t->bt_mp, 384 h->prevpg, 0)) == NULL) 385 return (RET_ERROR); 386 ep->page = h; 387 ep->index = NEXTINDEX(h); 388 } 389 --ep->index; 390 } while (__bt_cmp(t, key, ep) == 0); 391 392 /* 393 * Reach here with the last page that was looked at pinned, 394 * which may or may not be the same as the last (or original) 395 * match page. If it's not useful, release it. 396 */ 397 if (h->pgno != save.page->pgno) 398 mpool_put(t->bt_mp, h, 0); 399 400 *erval = save; 401 return (RET_SUCCESS); 402 } 403 404 /* If at the end of a page, find the next entry. */ 405 if (ep->index == NEXTINDEX(ep->page)) { 406 h = ep->page; 407 pg = h->nextpg; 408 mpool_put(t->bt_mp, h, 0); 409 if (pg == P_INVALID) 410 return (RET_SPECIAL); 411 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 412 return (RET_ERROR); 413 ep->index = 0; 414 ep->page = h; 415 } 416 *erval = *ep; 417 return (RET_SUCCESS); 418 } 419 420 /* 421 * __bt_setcur -- 422 * Set the cursor to an entry in the tree. 423 * 424 * Parameters: 425 * t: the tree 426 * pgno: page number 427 * idx: page index 428 */ 429 void 430 __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 431 { 432 /* Lose any already deleted key. */ 433 if (t->bt_cursor.key.data != NULL) { 434 free(t->bt_cursor.key.data); 435 t->bt_cursor.key.size = 0; 436 t->bt_cursor.key.data = NULL; 437 } 438 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 439 440 /* Update the cursor. */ 441 t->bt_cursor.pg.pgno = pgno; 442 t->bt_cursor.pg.index = idx; 443 F_SET(&t->bt_cursor, CURS_INIT); 444 } 445