1 /* $NetBSD: bsearch.c,v 1.3 2023/06/19 21:41:42 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2011, Secure Endpoints Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * - Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * - Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 30 * OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 */ 33 34 #include "baselocl.h" 35 36 #include <sys/types.h> 37 #include <sys/stat.h> 38 #ifdef HAVE_IO_H 39 #include <io.h> 40 #endif 41 #ifdef HAVE_UNISTD_H 42 #include <unistd.h> 43 #endif 44 #include <fcntl.h> 45 #include <ctype.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #ifdef HAVE_STRINGS_H 50 #include <strings.h> 51 #endif 52 #include <errno.h> 53 #include <assert.h> 54 55 /* 56 * This file contains functions for binary searching flat text in memory 57 * and in text files where each line is a [variable length] record. 58 * Each record has a key and an optional value separated from the key by 59 * unquoted whitespace. Whitespace in the key, and leading whitespace 60 * for the value, can be quoted with backslashes (but CR and LF must be 61 * quoted in such a way that they don't appear in the quoted result). 62 * 63 * Binary searching a tree are normally a dead simple algorithm. It 64 * turns out that binary searching flat text with *variable* length 65 * records is... tricky. There's no indexes to record beginning bytes, 66 * thus any index selected during the search is likely to fall in the 67 * middle of a record. When deciding to search a left sub-tree one 68 * might fail to find the last record in that sub-tree on account of the 69 * right boundary falling in the middle of it -- the chosen solution to 70 * this makes left sub-tree searches slightly less efficient than right 71 * sub-tree searches. 72 * 73 * If binary searching flat text in memory is tricky, using block-wise 74 * I/O instead is trickier! But it's necessary in order to support 75 * large files (which we either can't or wouldn't want to read or map 76 * into memory). Each block we read has to be large enough that the 77 * largest record can fit in it. And each block might start and/or end 78 * in the middle of a record. Here it is the right sub-tree searches 79 * that are less efficient than left sub-tree searches. 80 * 81 * bsearch_common() contains the common text block binary search code. 82 * 83 * _bsearch_text() is the interface for searching in-core text. 84 * _bsearch_file() is the interface for block-wise searching files. 85 */ 86 87 struct bsearch_file_handle { 88 int fd; /* file descriptor */ 89 char *cache; /* cache bytes */ 90 char *page; /* one double-size page worth of bytes */ 91 size_t file_sz; /* file size */ 92 size_t cache_sz; /* cache size */ 93 size_t page_sz; /* page size */ 94 }; 95 96 /* Find a new-line */ 97 static const char * 98 find_line(const char *buf, size_t i, size_t right) 99 { 100 if (i == 0) 101 return &buf[i]; 102 for (; i < right; i++) { 103 if (buf[i] == '\n') { 104 if ((i + 1) < right) 105 return &buf[i + 1]; 106 return NULL; 107 } 108 } 109 return NULL; 110 } 111 112 /* 113 * Common routine for binary searching text in core. 114 * 115 * Perform a binary search of a char array containing a block from a 116 * text file where each line is a record (LF and CRLF supported). Each 117 * record consists of a key followed by an optional value separated from 118 * the key by whitespace. Whitespace can be quoted with backslashes. 119 * It's the caller's responsibility to encode/decode keys/values if 120 * quoting is desired; newlines should be encoded such that a newline 121 * does not appear in the result. 122 * 123 * All output arguments are optional. 124 * 125 * Returns 0 if key is found, -1 if not found, or an error code such as 126 * ENOMEM in case of error. 127 * 128 * Inputs: 129 * 130 * @buf String to search 131 * @sz Size of string to search 132 * @key Key string to search for 133 * @buf_is_start True if the buffer starts with a record, false if it 134 * starts in the middle of a record or if the caller 135 * doesn't know. 136 * 137 * Outputs: 138 * 139 * @value Location to store a copy of the value (caller must free) 140 * @location Record location if found else the location where the 141 * record should be inserted (index into @buf) 142 * @cmp Set to less than or greater than 0 to indicate that a 143 * key not found would have fit in an earlier or later 144 * part of a file. Callers should use this to decide 145 * whether to read a block to the left or to the right and 146 * search that. 147 * @loops Location to store a count of bisections required for 148 * search (useful for confirming logarithmic performance) 149 */ 150 static int 151 bsearch_common(const char *buf, size_t sz, const char *key, 152 int buf_is_start, char **value, size_t *location, 153 int *cmp, size_t *loops) 154 { 155 const char *linep; 156 size_t key_start, key_len; /* key string in buf */ 157 size_t val_start, val_len; /* value string in buf */ 158 int key_cmp = -1; 159 size_t k; 160 size_t l; /* left side of buffer for binary search */ 161 size_t r; /* right side of buffer for binary search */ 162 size_t rmax; /* right side of buffer for binary search */ 163 size_t i; /* index into buffer, typically in the middle of l and r */ 164 size_t loop_count = 0; 165 int ret = -1; 166 167 if (value) 168 *value = NULL; 169 if (cmp) 170 *cmp = 0; 171 if (loops) 172 *loops = 0; 173 174 /* Binary search; file should be sorted */ 175 for (l = 0, r = rmax = sz, i = sz >> 1; i >= l && i < rmax; loop_count++) { 176 heim_assert(i < sz, "invalid aname2lname db index"); 177 178 /* buf[i] is likely in the middle of a line; find the next line */ 179 linep = find_line(buf, i, rmax); 180 k = linep ? linep - buf : i; 181 if (linep == NULL || k >= rmax) { 182 /* 183 * No new line found to the right; search to the left then 184 * but don't change rmax (this isn't optimal, but it's 185 * simple). 186 */ 187 if (i == l) 188 break; 189 r = i; 190 i = l + ((r - l) >> 1); 191 continue; 192 } 193 i = k; 194 heim_assert(i >= l && i < rmax, "invalid aname2lname db index"); 195 196 /* Got a line; check it */ 197 198 /* Search for and split on unquoted whitespace */ 199 val_start = 0; 200 for (key_start = i, key_len = 0, val_len = 0, k = i; k < rmax; k++) { 201 if (buf[k] == '\\') { 202 k++; 203 continue; 204 } 205 if (buf[k] == '\r' || buf[k] == '\n') { 206 /* We now know where the key ends, and there's no value */ 207 key_len = k - i; 208 break; 209 } 210 if (!isspace((unsigned char)buf[k])) 211 continue; 212 213 while (k < rmax && isspace((unsigned char)buf[k])) { 214 key_len = k - i; 215 k++; 216 } 217 if (k < rmax) 218 val_start = k; 219 /* Find end of value */ 220 for (; k < rmax && buf[k] != '\0'; k++) { 221 if (buf[k] == '\r' || buf[k] == '\n') { 222 val_len = k - val_start; 223 break; 224 } 225 } 226 break; 227 } 228 229 /* 230 * The following logic is for dealing with partial buffers, 231 * which we use for block-wise binary searches of large files 232 */ 233 if (key_start == 0 && !buf_is_start) { 234 /* 235 * We're at the beginning of a block that might have started 236 * in the middle of a record whose "key" might well compare 237 * as greater than the key we're looking for, so we don't 238 * bother comparing -- we know key_cmp must be -1 here. 239 */ 240 key_cmp = -1; 241 break; 242 } 243 if ((val_len && buf[val_start + val_len] != '\n') || 244 (!val_len && buf[key_start + key_len] != '\n')) { 245 /* 246 * We're at the end of a block that ends in the middle of a 247 * record whose "key" might well compare as less than the 248 * key we're looking for, so we don't bother comparing -- we 249 * know key_cmp must be >= 0 but we can't tell. Our caller 250 * will end up reading a double-size block to handle this. 251 */ 252 key_cmp = 1; 253 break; 254 } 255 256 key_cmp = strncmp(key, &buf[key_start], key_len); 257 if (key_cmp == 0 && strlen(key) != key_len) 258 key_cmp = 1; 259 if (key_cmp < 0) { 260 /* search left */ 261 r = rmax = (linep - buf); 262 i = l + ((r - l) >> 1); 263 if (location) 264 *location = key_start; 265 } else if (key_cmp > 0) { 266 /* search right */ 267 if (l == i) 268 break; /* not found */ 269 l = i; 270 i = l + ((r - l) >> 1); 271 if (location) 272 *location = val_start + val_len; 273 } else { 274 /* match! */ 275 if (location) 276 *location = key_start; 277 ret = 0; 278 if (val_len && value) { 279 /* Avoid strndup() so we don't need libroken here yet */ 280 if ((*value = malloc(val_len + 1))) { 281 (void) memcpy(*value, &buf[val_start], val_len); 282 (*value)[val_len] = '\0'; 283 } else { 284 ret = errno; 285 } 286 } 287 break; 288 } 289 } 290 291 if (cmp) 292 *cmp = key_cmp; 293 if (loops) 294 *loops = loop_count; 295 296 return ret; 297 } 298 299 /* 300 * Binary search a char array containing sorted text records separated 301 * by new-lines (or CRLF). Each record consists of a key and an 302 * optional value following the key, separated from the key by unquoted 303 * whitespace. 304 * 305 * All output arguments are optional. 306 * 307 * Returns 0 if key is found, -1 if not found, or an error code such as 308 * ENOMEM in case of error. 309 * 310 * Inputs: 311 * 312 * @buf Char array pointer 313 * @buf_sz Size of buf 314 * @key Key to search for 315 * 316 * Outputs: 317 * 318 * @value Location where to put the value, if any (caller must free) 319 * @location Record location if found else the location where the record 320 * should be inserted (index into @buf) 321 * @loops Location where to put a number of loops (or comparisons) 322 * needed for the search (useful for benchmarking) 323 */ 324 int 325 _bsearch_text(const char *buf, size_t buf_sz, const char *key, 326 char **value, size_t *location, size_t *loops) 327 { 328 return bsearch_common(buf, buf_sz, key, 1, value, location, NULL, loops); 329 } 330 331 #define MAX_BLOCK_SIZE (1024 * 1024) 332 #define DEFAULT_MAX_FILE_SIZE (1024 * 1024) 333 /* 334 * Open a file for binary searching. The file will be read in entirely 335 * if it is smaller than @max_sz, else a cache of @max_sz bytes will be 336 * allocated. 337 * 338 * Returns 0 on success, else an error number or -1 if the file is empty. 339 * 340 * Inputs: 341 * 342 * @fname Name of file to open 343 * @max_sz Maximum size of cache to allocate, in bytes (if zero, default) 344 * @page_sz Page size (must be a power of two, larger than 256, smaller 345 * than 1MB; if zero use default) 346 * 347 * Outputs: 348 * 349 * @bfh Handle for use with _bsearch_file() and _bsearch_file_close() 350 * @reads Number of reads performed 351 */ 352 int 353 _bsearch_file_open(const char *fname, size_t max_sz, size_t page_sz, 354 bsearch_file_handle *bfh, size_t *reads) 355 { 356 bsearch_file_handle new_bfh = NULL; 357 struct stat st; 358 size_t i; 359 int fd; 360 int ret; 361 362 *bfh = NULL; 363 364 if (reads) 365 *reads = 0; 366 367 fd = open(fname, O_RDONLY); 368 if (fd == -1) 369 return errno; 370 371 if (fstat(fd, &st) == -1) { 372 ret = errno; 373 goto err; 374 } 375 376 if (st.st_size == 0) { 377 ret = -1; /* no data -> no binary search */ 378 goto err; 379 } 380 381 /* Validate / default arguments */ 382 if (max_sz == 0) 383 max_sz = DEFAULT_MAX_FILE_SIZE; 384 for (i = page_sz; i; i >>= 1) { 385 /* Make sure page_sz is a power of two */ 386 if ((i % 2) && (i >> 1)) { 387 page_sz = 0; 388 break; 389 } 390 } 391 if (page_sz == 0) 392 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE 393 page_sz = st.st_blksize; 394 #else 395 page_sz = 4096; 396 #endif 397 for (i = page_sz; i; i >>= 1) { 398 /* Make sure page_sz is a power of two */ 399 if ((i % 2) && (i >> 1)) { 400 /* Can't happen! Filesystems always use powers of two! */ 401 page_sz = 4096; 402 break; 403 } 404 } 405 if (page_sz > MAX_BLOCK_SIZE) 406 page_sz = MAX_BLOCK_SIZE; 407 408 new_bfh = calloc(1, sizeof (*new_bfh)); 409 if (new_bfh == NULL) { 410 ret = ENOMEM; 411 goto err; 412 } 413 414 new_bfh->fd = fd; 415 new_bfh->page_sz = page_sz; 416 new_bfh->file_sz = st.st_size; 417 418 if (max_sz >= st.st_size) { 419 /* Whole-file method */ 420 new_bfh->cache = malloc(st.st_size + 1); 421 if (new_bfh->cache) { 422 new_bfh->cache[st.st_size] = '\0'; 423 new_bfh->cache_sz = st.st_size; 424 ret = read(fd, new_bfh->cache, st.st_size); 425 if (ret < 0) { 426 ret = errno; 427 goto err; 428 } 429 if (ret != st.st_size) { 430 ret = EIO; /* XXX ??? */ 431 goto err; 432 } 433 if (reads) 434 *reads = 1; 435 (void) close(fd); 436 new_bfh->fd = -1; 437 *bfh = new_bfh; 438 return 0; 439 } 440 } 441 442 /* Block-size method, or above malloc() failed */ 443 new_bfh->page = malloc(new_bfh->page_sz << 1); 444 if (new_bfh->page == NULL) { 445 /* Can't even allocate a single double-size page! */ 446 ret = ENOMEM; 447 goto err; 448 } 449 450 new_bfh->cache_sz = max_sz < st.st_size ? max_sz : st.st_size; 451 new_bfh->cache = malloc(new_bfh->cache_sz); 452 *bfh = new_bfh; 453 454 /* 455 * malloc() may have failed because we were asking for a lot of 456 * memory, but we may still be able to operate without a cache, 457 * so let's not fail. 458 */ 459 if (new_bfh->cache == NULL) { 460 new_bfh->cache_sz = 0; 461 return 0; 462 } 463 464 /* Initialize cache */ 465 for (i = 0; i < new_bfh->cache_sz; i += new_bfh->page_sz) 466 new_bfh->cache[i] = '\0'; 467 return 0; 468 469 err: 470 (void) close(fd); 471 if (new_bfh) { 472 free(new_bfh->page); 473 free(new_bfh->cache); 474 free(new_bfh); 475 } 476 return ret; 477 } 478 479 /* 480 * Indicate whether the given binary search file handle will be searched 481 * with block-wise method. 482 */ 483 void 484 _bsearch_file_info(bsearch_file_handle bfh, 485 size_t *page_sz, size_t *max_sz, int *blockwise) 486 { 487 if (page_sz) 488 *page_sz = bfh->page_sz; 489 if (max_sz) 490 *max_sz = bfh->cache_sz; 491 if (blockwise) 492 *blockwise = (bfh->file_sz != bfh->cache_sz); 493 } 494 495 /* 496 * Close the given binary file search handle. 497 * 498 * Inputs: 499 * 500 * @bfh Pointer to variable containing handle to close. 501 */ 502 void 503 _bsearch_file_close(bsearch_file_handle *bfh) 504 { 505 if (!*bfh) 506 return; 507 if ((*bfh)->fd >= 0) 508 (void) close((*bfh)->fd); 509 if ((*bfh)->page) 510 free((*bfh)->page); 511 if ((*bfh)->cache) 512 free((*bfh)->cache); 513 free(*bfh); 514 *bfh = NULL; 515 } 516 517 /* 518 * Private function to get a page from a cache. The cache is a char 519 * array of 2^n - 1 double-size page worth of bytes, where n is the 520 * number of tree levels that the cache stores. The cache can be 521 * smaller than n implies. 522 * 523 * The page may or may not be valid. If the first byte of it is NUL 524 * then it's not valid, else it is. 525 * 526 * Returns 1 if page is in cache and valid, 0 if the cache is too small 527 * or the page is invalid. The page address is output in @buf if the 528 * cache is large enough to contain it regardless of whether the page is 529 * valid. 530 * 531 * Inputs: 532 * 533 * @bfh Binary search file handle 534 * @level Level in the tree that we want a page for 535 * @page_idx Page number in the given level (0..2^level - 1) 536 * 537 * Outputs: 538 * 539 * @buf Set to address of page if the cache is large enough 540 */ 541 static int 542 get_page_from_cache(bsearch_file_handle bfh, size_t level, size_t page_idx, 543 char **buf) 544 { 545 size_t idx = 0; 546 size_t page_sz; 547 548 page_sz = bfh->page_sz << 1; /* we use double-size pages in the cache */ 549 550 *buf = NULL; 551 552 /* 553 * Compute index into cache. The cache is basically an array of 554 * double-size pages. The first (zeroth) double-size page in the 555 * cache will be the middle page of the file -- the root of the 556 * tree. The next two double-size pages will be the left and right 557 * pages of the second level in the tree. The next four double-size 558 * pages will be the four pages at the next level. And so on for as 559 * many pages as fit in the cache. 560 * 561 * The page index is the number of the page at the given level. We 562 * then compute (2^level - 1 + page index) * 2page size, check that 563 * we have that in the cache, check that the page has been read (it 564 * doesn't start with NUL). 565 */ 566 if (level) 567 idx = (1 << level) - 1 + page_idx; 568 if (((idx + 1) * page_sz * 2) > bfh->cache_sz) 569 return 0; 570 571 *buf = &bfh->cache[idx * page_sz * 2]; 572 if (bfh->cache[idx * page_sz * 2] == '\0') 573 return 0; /* cache[idx] == NUL -> page not loaded in cache */ 574 return 1; 575 } 576 577 /* 578 * Private function to read a page of @page_sz from @fd at offset @off 579 * into @buf, outputing the number of bytes read, which will be the same 580 * as @page_sz unless the page being read is the last page, in which 581 * case the number of remaining bytes in the file will be output. 582 * 583 * Returns 0 on success or an errno value otherwise (EIO if reads are 584 * short). 585 * 586 * Inputs: 587 * 588 * @bfh Binary search file handle 589 * @level Level in the binary search tree that we're at 590 * @page_idx Page "index" at the @level of the tree that we want 591 * @page Actual page number that we want 592 * want_double Whether we need a page or double page read 593 * 594 * Outputs: 595 * 596 * @buf Page read or cached 597 * @bytes Bytes read (may be less than page or double page size in 598 * the case of the last page, of course) 599 */ 600 static int 601 read_page(bsearch_file_handle bfh, size_t level, size_t page_idx, size_t page, 602 int want_double, const char **buf, size_t *bytes) 603 { 604 int ret; 605 off_t off; 606 size_t expected; 607 size_t wanted; 608 char *page_buf; 609 610 /* Figure out where we're reading and how much */ 611 off = page * bfh->page_sz; 612 if (off < 0) 613 return EOVERFLOW; 614 615 wanted = bfh->page_sz << want_double; 616 expected = ((bfh->file_sz - off) > wanted) ? wanted : bfh->file_sz - off; 617 618 if (get_page_from_cache(bfh, level, page_idx, &page_buf)) { 619 *buf = page_buf; 620 *bytes = expected; 621 return 0; /* found in cache */ 622 } 623 624 625 *bytes = 0; 626 *buf = NULL; 627 628 /* OK, we have to read a page or double-size page */ 629 630 if (page_buf) 631 want_double = 1; /* we'll be caching; we cache double-size pages */ 632 else 633 page_buf = bfh->page; /* we won't cache this page */ 634 635 wanted = bfh->page_sz << want_double; 636 expected = ((bfh->file_sz - off) > wanted) ? wanted : bfh->file_sz - off; 637 638 #ifdef HAVE_PREAD 639 ret = pread(bfh->fd, page_buf, expected, off); 640 #else 641 if (lseek(bfh->fd, off, SEEK_SET) == (off_t)-1) 642 return errno; 643 ret = read(bfh->fd, page_buf, expected); 644 #endif 645 if (ret < 0) 646 return errno; 647 648 if (ret != expected) 649 return EIO; /* XXX ??? */ 650 651 *buf = page_buf; 652 *bytes = expected; 653 return 0; 654 } 655 656 /* 657 * Perform a binary search of a file where each line is a record (LF and 658 * CRLF supported). Each record consists of a key followed by an 659 * optional value separated from the key by whitespace. Whitespace can 660 * be quoted with backslashes. It's the caller's responsibility to 661 * encode/decode keys/values if quoting is desired; newlines should be 662 * encoded such that a newline does not appear in the result. 663 * 664 * The search is done with block-wise I/O (i.e., the whole file is not 665 * read into memory). 666 * 667 * All output arguments are optional. 668 * 669 * Returns 0 if key is found, -1 if not found, or an error code such as 670 * ENOMEM in case of error. 671 * 672 * NOTE: We could improve this by not freeing the buffer, instead 673 * requiring that the caller provide it. Further, we could cache 674 * the top N levels of [double-size] pages (2^N - 1 pages), which 675 * should speed up most searches by reducing the number of reads 676 * by N. 677 * 678 * Inputs: 679 * 680 * @fd File descriptor (file to search) 681 * @page_sz Page size (if zero then the file's st_blksize will be used) 682 * @key Key string to search for 683 * 684 * Outputs: 685 * 686 * @value Location to store a copy of the value (caller must free) 687 * @location Record location if found else the location where the 688 * record should be inserted (index into @buf) 689 * @loops Location to store a count of bisections required for 690 * search (useful for confirming logarithmic performance) 691 * @reads Location to store a count of pages read during search 692 * (useful for confirming logarithmic performance) 693 */ 694 int 695 _bsearch_file(bsearch_file_handle bfh, const char *key, 696 char **value, size_t *location, size_t *loops, size_t *reads) 697 { 698 int ret; 699 const char *buf; 700 size_t buf_sz; 701 size_t page, l, r; 702 size_t my_reads = 0; 703 size_t my_loops_total = 0; 704 size_t my_loops; 705 size_t level; /* level in the tree */ 706 size_t page_idx = 0; /* page number in the tree level */ 707 size_t buf_location; 708 int cmp; 709 int buf_ends_in_eol = 0; 710 int buf_is_start = 0; 711 712 if (reads) 713 *reads = 0; 714 if (value) 715 *value = NULL; 716 if (loops) 717 *loops = 0; 718 719 /* If whole file is in memory then search that and we're done */ 720 if (bfh->file_sz == bfh->cache_sz) 721 return _bsearch_text(bfh->cache, bfh->cache_sz, key, value, location, loops); 722 723 /* Else block-wise binary search */ 724 725 l = 0; 726 r = (bfh->file_sz / bfh->page_sz) + 1; 727 for (level = 0, page = r >> 1; page >= l && page < r ; level++) { 728 ret = read_page(bfh, level, page_idx, page, 0, &buf, &buf_sz); 729 if (ret != 0) 730 return ret; 731 my_reads++; 732 if (buf[buf_sz - 1] == '\r' || buf[buf_sz - 1] == '\n') 733 buf_ends_in_eol = 1; 734 else 735 buf_ends_in_eol = 0; 736 737 buf_is_start = page == 0 ? 1 : 0; 738 ret = bsearch_common(buf, (size_t)buf_sz, key, buf_is_start, 739 value, &buf_location, &cmp, &my_loops); 740 if (ret > 0) 741 return ret; 742 /* Found or no we update stats */ 743 my_loops_total += my_loops; 744 if (loops) 745 *loops = my_loops_total; 746 if (reads) 747 *reads = my_reads; 748 if (location) 749 *location = page * bfh->page_sz + buf_location; 750 if (ret == 0) 751 return 0; /* found! */ 752 /* Not found */ 753 if (cmp < 0) { 754 /* Search left */ 755 page_idx <<= 1; 756 r = page; 757 page = l + ((r - l) >> 1); 758 continue; 759 } else { 760 /* 761 * Search right, but first search the current and next 762 * blocks in case that the record we're looking for either 763 * straddles the boundary between this and the next record, 764 * or in case the record starts exactly at the next page. 765 */ 766 heim_assert(cmp > 0, "cmp > 0"); 767 768 if (!buf_ends_in_eol || page == l || page == (r - 1)) { 769 ret = read_page(bfh, level, page_idx, page, 1, &buf, &buf_sz); 770 if (ret != 0) 771 return ret; 772 my_reads++; 773 774 buf_is_start = page == l ? 1 : 0; 775 776 ret = bsearch_common(buf, (size_t)buf_sz, key, buf_is_start, 777 value, &buf_location, &cmp, &my_loops); 778 if (ret > 0) 779 return ret; 780 my_loops_total += my_loops; 781 if (loops) 782 *loops = my_loops_total; 783 if (reads) 784 *reads = my_reads; 785 if (location) 786 *location = page * bfh->page_sz + buf_location; 787 if (ret == 0) 788 return 0; 789 } 790 791 /* Oh well, search right */ 792 if (l == page && r == (l + 1)) 793 break; 794 page_idx = (page_idx << 1) + 1; 795 l = page; 796 page = l + ((r - l) >> 1); 797 continue; 798 } 799 } 800 return -1; 801 } 802 803 804 static int 805 stdb_open(void *plug, const char *dbtype, const char *dbname, 806 heim_dict_t options, void **db, heim_error_t *error) 807 { 808 bsearch_file_handle bfh; 809 char *p; 810 int ret; 811 812 if (error) 813 *error = NULL; 814 if (dbname == NULL || *dbname == '\0') { 815 if (error) 816 *error = heim_error_create(EINVAL, 817 N_("DB name required for sorted-text DB " 818 "plugin", "")); 819 return EINVAL; 820 } 821 p = strrchr(dbname, '.'); 822 if (p == NULL || strcmp(p, ".txt") != 0) { 823 if (error) 824 *error = heim_error_create(ENOTSUP, 825 N_("Text file (name ending in .txt) " 826 "required for sorted-text DB plugin", 827 "")); 828 return ENOTSUP; 829 } 830 831 ret = _bsearch_file_open(dbname, 0, 0, &bfh, NULL); 832 if (ret) 833 return ret; 834 835 *db = bfh; 836 return 0; 837 } 838 839 static int 840 stdb_close(void *db, heim_error_t *error) 841 { 842 bsearch_file_handle bfh = db; 843 844 if (error) 845 *error = NULL; 846 _bsearch_file_close(&bfh); 847 return 0; 848 } 849 850 static heim_data_t 851 stdb_copy_value(void *db, heim_string_t table, heim_data_t key, 852 heim_error_t *error) 853 { 854 bsearch_file_handle bfh = db; 855 const char *k; 856 char *v = NULL; 857 heim_data_t value; 858 int ret; 859 860 if (error) 861 *error = NULL; 862 863 if (table == NULL) 864 table = HSTR(""); 865 866 if (table != HSTR("")) 867 return NULL; 868 869 if (heim_get_tid(key) == HEIM_TID_STRING) 870 k = heim_string_get_utf8((heim_string_t)key); 871 else 872 k = (const char *)heim_data_get_ptr(key); 873 ret = _bsearch_file(bfh, k, &v, NULL, NULL, NULL); 874 if (ret == 0 && v == NULL) 875 ret = -1; /* Quiet lint */ 876 if (ret != 0) { 877 if (ret > 0 && error) 878 *error = heim_error_create(ret, "%s", strerror(ret)); 879 return NULL; 880 } 881 value = heim_data_create(v, strlen(v)); 882 free(v); 883 /* XXX Handle ENOMEM */ 884 return value; 885 } 886 887 struct heim_db_type heim_sorted_text_file_dbtype = { 888 1, stdb_open, NULL, stdb_close, NULL, NULL, NULL, NULL, NULL, NULL, 889 stdb_copy_value, NULL, NULL, NULL 890 }; 891