1 /* $Id: mansearch.c,v 1.15 2014/03/21 22:52:21 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <assert.h> 19 #include <fcntl.h> 20 #include <getopt.h> 21 #include <limits.h> 22 #include <regex.h> 23 #include <stdio.h> 24 #include <stdint.h> 25 #include <stddef.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <unistd.h> 29 30 #include <ohash.h> 31 #include <sqlite3.h> 32 33 #include "mandoc.h" 34 #include "mandoc_aux.h" 35 #include "manpath.h" 36 #include "mansearch.h" 37 38 extern int mansearch_keymax; 39 extern const char *const mansearch_keynames[]; 40 41 #define SQL_BIND_TEXT(_db, _s, _i, _v) \ 42 do { if (SQLITE_OK != sqlite3_bind_text \ 43 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ 44 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 45 } while (0) 46 #define SQL_BIND_INT64(_db, _s, _i, _v) \ 47 do { if (SQLITE_OK != sqlite3_bind_int64 \ 48 ((_s), (_i)++, (_v))) \ 49 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 50 } while (0) 51 #define SQL_BIND_BLOB(_db, _s, _i, _v) \ 52 do { if (SQLITE_OK != sqlite3_bind_blob \ 53 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \ 54 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 55 } while (0) 56 57 struct expr { 58 uint64_t bits; /* type-mask */ 59 const char *substr; /* to search for, if applicable */ 60 regex_t regexp; /* compiled regexp, if applicable */ 61 int open; /* opening parentheses before */ 62 int and; /* logical AND before */ 63 int close; /* closing parentheses after */ 64 struct expr *next; /* next in sequence */ 65 }; 66 67 struct match { 68 uint64_t id; /* identifier in database */ 69 int form; /* 0 == catpage */ 70 }; 71 72 static void buildnames(struct manpage *, sqlite3 *, 73 sqlite3_stmt *, uint64_t, 74 const char *, int form); 75 static char *buildoutput(sqlite3 *, sqlite3_stmt *, 76 uint64_t, uint64_t); 77 static void *hash_alloc(size_t, void *); 78 static void hash_free(void *, size_t, void *); 79 static void *hash_halloc(size_t, void *); 80 static struct expr *exprcomp(const struct mansearch *, 81 int, char *[]); 82 static void exprfree(struct expr *); 83 static struct expr *exprspec(struct expr *, uint64_t, 84 const char *, const char *); 85 static struct expr *exprterm(const struct mansearch *, char *, int); 86 static void sql_append(char **sql, size_t *sz, 87 const char *newstr, int count); 88 static void sql_match(sqlite3_context *context, 89 int argc, sqlite3_value **argv); 90 static void sql_regexp(sqlite3_context *context, 91 int argc, sqlite3_value **argv); 92 static char *sql_statement(const struct expr *); 93 94 int 95 mansearch(const struct mansearch *search, 96 const struct manpaths *paths, 97 int argc, char *argv[], 98 const char *outkey, 99 struct manpage **res, size_t *sz) 100 { 101 int fd, rc, c, indexbit; 102 int64_t id; 103 uint64_t outbit, iterbit; 104 char buf[PATH_MAX]; 105 char *sql; 106 struct manpage *mpage; 107 struct expr *e, *ep; 108 sqlite3 *db; 109 sqlite3_stmt *s, *s2; 110 struct match *mp; 111 struct ohash_info info; 112 struct ohash htab; 113 unsigned int idx; 114 size_t i, j, cur, maxres; 115 116 memset(&info, 0, sizeof(struct ohash_info)); 117 118 info.halloc = hash_halloc; 119 info.alloc = hash_alloc; 120 info.hfree = hash_free; 121 info.key_offset = offsetof(struct match, id); 122 123 *sz = cur = maxres = 0; 124 sql = NULL; 125 *res = NULL; 126 fd = -1; 127 e = NULL; 128 rc = 0; 129 130 if (0 == argc) 131 goto out; 132 if (NULL == (e = exprcomp(search, argc, argv))) 133 goto out; 134 135 outbit = 0; 136 if (NULL != outkey) { 137 for (indexbit = 0, iterbit = 1; 138 indexbit < mansearch_keymax; 139 indexbit++, iterbit <<= 1) { 140 if (0 == strcasecmp(outkey, 141 mansearch_keynames[indexbit])) { 142 outbit = iterbit; 143 break; 144 } 145 } 146 } 147 148 /* 149 * Save a descriptor to the current working directory. 150 * Since pathnames in the "paths" variable might be relative, 151 * and we'll be chdir()ing into them, we need to keep a handle 152 * on our current directory from which to start the chdir(). 153 */ 154 155 if (NULL == getcwd(buf, PATH_MAX)) { 156 perror(NULL); 157 goto out; 158 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) { 159 perror(buf); 160 goto out; 161 } 162 163 sql = sql_statement(e); 164 165 /* 166 * Loop over the directories (containing databases) for us to 167 * search. 168 * Don't let missing/bad databases/directories phase us. 169 * In each, try to open the resident database and, if it opens, 170 * scan it for our match expression. 171 */ 172 173 for (i = 0; i < paths->sz; i++) { 174 if (-1 == fchdir(fd)) { 175 perror(buf); 176 free(*res); 177 break; 178 } else if (-1 == chdir(paths->paths[i])) { 179 perror(paths->paths[i]); 180 continue; 181 } 182 183 c = sqlite3_open_v2 184 (MANDOC_DB, &db, 185 SQLITE_OPEN_READONLY, NULL); 186 187 if (SQLITE_OK != c) { 188 perror(MANDOC_DB); 189 sqlite3_close(db); 190 continue; 191 } 192 193 /* 194 * Define the SQL functions for substring 195 * and regular expression matching. 196 */ 197 198 c = sqlite3_create_function(db, "match", 2, 199 SQLITE_ANY, NULL, sql_match, NULL, NULL); 200 assert(SQLITE_OK == c); 201 c = sqlite3_create_function(db, "regexp", 2, 202 SQLITE_ANY, NULL, sql_regexp, NULL, NULL); 203 assert(SQLITE_OK == c); 204 205 j = 1; 206 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL); 207 if (SQLITE_OK != c) 208 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 209 210 for (ep = e; NULL != ep; ep = ep->next) { 211 if (NULL == ep->substr) { 212 SQL_BIND_BLOB(db, s, j, ep->regexp); 213 } else 214 SQL_BIND_TEXT(db, s, j, ep->substr); 215 SQL_BIND_INT64(db, s, j, ep->bits); 216 } 217 218 memset(&htab, 0, sizeof(struct ohash)); 219 ohash_init(&htab, 4, &info); 220 221 /* 222 * Hash each entry on its [unique] document identifier. 223 * This is a uint64_t. 224 * Instead of using a hash function, simply convert the 225 * uint64_t to a uint32_t, the hash value's type. 226 * This gives good performance and preserves the 227 * distribution of buckets in the table. 228 */ 229 while (SQLITE_ROW == (c = sqlite3_step(s))) { 230 id = sqlite3_column_int64(s, 1); 231 idx = ohash_lookup_memory 232 (&htab, (char *)&id, 233 sizeof(uint64_t), (uint32_t)id); 234 235 if (NULL != ohash_find(&htab, idx)) 236 continue; 237 238 mp = mandoc_calloc(1, sizeof(struct match)); 239 mp->id = id; 240 mp->form = sqlite3_column_int(s, 0); 241 ohash_insert(&htab, idx, mp); 242 } 243 244 if (SQLITE_DONE != c) 245 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 246 247 sqlite3_finalize(s); 248 249 c = sqlite3_prepare_v2(db, 250 "SELECT * FROM mlinks WHERE pageid=?" 251 " ORDER BY sec, arch, name", 252 -1, &s, NULL); 253 if (SQLITE_OK != c) 254 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 255 256 c = sqlite3_prepare_v2(db, 257 "SELECT * FROM keys WHERE pageid=? AND bits & ?", 258 -1, &s2, NULL); 259 if (SQLITE_OK != c) 260 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 261 262 for (mp = ohash_first(&htab, &idx); 263 NULL != mp; 264 mp = ohash_next(&htab, &idx)) { 265 if (cur + 1 > maxres) { 266 maxres += 1024; 267 *res = mandoc_realloc 268 (*res, maxres * sizeof(struct manpage)); 269 } 270 mpage = *res + cur; 271 mpage->form = mp->form; 272 buildnames(mpage, db, s, mp->id, 273 paths->paths[i], mp->form); 274 mpage->output = outbit ? 275 buildoutput(db, s2, mp->id, outbit) : NULL; 276 277 free(mp); 278 cur++; 279 } 280 281 sqlite3_finalize(s); 282 sqlite3_finalize(s2); 283 sqlite3_close(db); 284 ohash_delete(&htab); 285 } 286 rc = 1; 287 out: 288 exprfree(e); 289 if (-1 != fd) 290 close(fd); 291 free(sql); 292 *sz = cur; 293 return(rc); 294 } 295 296 static void 297 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s, 298 uint64_t id, const char *path, int form) 299 { 300 char *newnames, *prevsec, *prevarch; 301 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec; 302 size_t i; 303 int c; 304 305 mpage->names = NULL; 306 prevsec = prevarch = NULL; 307 i = 1; 308 SQL_BIND_INT64(db, s, i, id); 309 while (SQLITE_ROW == (c = sqlite3_step(s))) { 310 311 /* Decide whether we already have some names. */ 312 313 if (NULL == mpage->names) { 314 oldnames = ""; 315 sep1 = ""; 316 } else { 317 oldnames = mpage->names; 318 sep1 = ", "; 319 } 320 321 /* Fetch the next name. */ 322 323 sec = sqlite3_column_text(s, 0); 324 arch = sqlite3_column_text(s, 1); 325 name = sqlite3_column_text(s, 2); 326 327 /* If the section changed, append the old one. */ 328 329 if (NULL != prevsec && 330 (strcmp(sec, prevsec) || 331 strcmp(arch, prevarch))) { 332 sep2 = '\0' == *prevarch ? "" : "/"; 333 mandoc_asprintf(&newnames, "%s(%s%s%s)", 334 oldnames, prevsec, sep2, prevarch); 335 free(mpage->names); 336 oldnames = mpage->names = newnames; 337 free(prevsec); 338 free(prevarch); 339 prevsec = prevarch = NULL; 340 } 341 342 /* Save the new section, to append it later. */ 343 344 if (NULL == prevsec) { 345 prevsec = mandoc_strdup(sec); 346 prevarch = mandoc_strdup(arch); 347 } 348 349 /* Append the new name. */ 350 351 mandoc_asprintf(&newnames, "%s%s%s", 352 oldnames, sep1, name); 353 free(mpage->names); 354 mpage->names = newnames; 355 356 /* Also save the first file name encountered. */ 357 358 if (NULL != mpage->file) 359 continue; 360 361 if (form) { 362 sep1 = "man"; 363 fsec = sec; 364 } else { 365 sep1 = "cat"; 366 fsec = "0"; 367 } 368 sep2 = '\0' == *arch ? "" : "/"; 369 mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s", 370 path, sep1, sec, sep2, arch, name, fsec); 371 } 372 if (SQLITE_DONE != c) 373 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 374 sqlite3_reset(s); 375 376 /* Append one final section to the names. */ 377 378 if (NULL != prevsec) { 379 sep2 = '\0' == *prevarch ? "" : "/"; 380 mandoc_asprintf(&newnames, "%s(%s%s%s)", 381 mpage->names, prevsec, sep2, prevarch); 382 free(mpage->names); 383 mpage->names = newnames; 384 free(prevsec); 385 free(prevarch); 386 } 387 } 388 389 static char * 390 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit) 391 { 392 char *output, *newoutput; 393 const char *oldoutput, *sep1, *data; 394 size_t i; 395 int c; 396 397 output = NULL; 398 i = 1; 399 SQL_BIND_INT64(db, s, i, id); 400 SQL_BIND_INT64(db, s, i, outbit); 401 while (SQLITE_ROW == (c = sqlite3_step(s))) { 402 if (NULL == output) { 403 oldoutput = ""; 404 sep1 = ""; 405 } else { 406 oldoutput = output; 407 sep1 = " # "; 408 } 409 data = sqlite3_column_text(s, 1); 410 mandoc_asprintf(&newoutput, "%s%s%s", 411 oldoutput, sep1, data); 412 free(output); 413 output = newoutput; 414 } 415 if (SQLITE_DONE != c) 416 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 417 sqlite3_reset(s); 418 return(output); 419 } 420 421 /* 422 * Implement substring match as an application-defined SQL function. 423 * Using the SQL LIKE or GLOB operators instead would be a bad idea 424 * because that would require escaping metacharacters in the string 425 * being searched for. 426 */ 427 static void 428 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv) 429 { 430 431 assert(2 == argc); 432 sqlite3_result_int(context, NULL != strcasestr( 433 (const char *)sqlite3_value_text(argv[1]), 434 (const char *)sqlite3_value_text(argv[0]))); 435 } 436 437 /* 438 * Implement regular expression match 439 * as an application-defined SQL function. 440 */ 441 static void 442 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv) 443 { 444 445 assert(2 == argc); 446 sqlite3_result_int(context, !regexec( 447 (regex_t *)sqlite3_value_blob(argv[0]), 448 (const char *)sqlite3_value_text(argv[1]), 449 0, NULL, 0)); 450 } 451 452 static void 453 sql_append(char **sql, size_t *sz, const char *newstr, int count) 454 { 455 size_t newsz; 456 457 newsz = 1 < count ? (size_t)count : strlen(newstr); 458 *sql = mandoc_realloc(*sql, *sz + newsz + 1); 459 if (1 < count) 460 memset(*sql + *sz, *newstr, (size_t)count); 461 else 462 memcpy(*sql + *sz, newstr, newsz); 463 *sz += newsz; 464 (*sql)[*sz] = '\0'; 465 } 466 467 /* 468 * Prepare the search SQL statement. 469 */ 470 static char * 471 sql_statement(const struct expr *e) 472 { 473 char *sql; 474 size_t sz; 475 int needop; 476 477 sql = mandoc_strdup("SELECT * FROM mpages WHERE "); 478 sz = strlen(sql); 479 480 for (needop = 0; NULL != e; e = e->next) { 481 if (e->and) 482 sql_append(&sql, &sz, " AND ", 1); 483 else if (needop) 484 sql_append(&sql, &sz, " OR ", 1); 485 if (e->open) 486 sql_append(&sql, &sz, "(", e->open); 487 sql_append(&sql, &sz, NULL == e->substr ? 488 "id IN (SELECT pageid FROM keys " 489 "WHERE key REGEXP ? AND bits & ?)" : 490 "id IN (SELECT pageid FROM keys " 491 "WHERE key MATCH ? AND bits & ?)", 1); 492 if (e->close) 493 sql_append(&sql, &sz, ")", e->close); 494 needop = 1; 495 } 496 497 return(sql); 498 } 499 500 /* 501 * Compile a set of string tokens into an expression. 502 * Tokens in "argv" are assumed to be individual expression atoms (e.g., 503 * "(", "foo=bar", etc.). 504 */ 505 static struct expr * 506 exprcomp(const struct mansearch *search, int argc, char *argv[]) 507 { 508 int i, toopen, logic, igncase, toclose; 509 struct expr *first, *next, *cur; 510 511 first = cur = NULL; 512 logic = igncase = toclose = 0; 513 toopen = 1; 514 515 for (i = 0; i < argc; i++) { 516 if (0 == strcmp("(", argv[i])) { 517 if (igncase) 518 goto fail; 519 toopen++; 520 toclose++; 521 continue; 522 } else if (0 == strcmp(")", argv[i])) { 523 if (toopen || logic || igncase || NULL == cur) 524 goto fail; 525 cur->close++; 526 if (0 > --toclose) 527 goto fail; 528 continue; 529 } else if (0 == strcmp("-a", argv[i])) { 530 if (toopen || logic || igncase || NULL == cur) 531 goto fail; 532 logic = 1; 533 continue; 534 } else if (0 == strcmp("-o", argv[i])) { 535 if (toopen || logic || igncase || NULL == cur) 536 goto fail; 537 logic = 2; 538 continue; 539 } else if (0 == strcmp("-i", argv[i])) { 540 if (igncase) 541 goto fail; 542 igncase = 1; 543 continue; 544 } 545 next = exprterm(search, argv[i], !igncase); 546 if (NULL == next) 547 goto fail; 548 next->open = toopen; 549 next->and = (1 == logic); 550 if (NULL != first) { 551 cur->next = next; 552 cur = next; 553 } else 554 cur = first = next; 555 toopen = logic = igncase = 0; 556 } 557 if (toopen || logic || igncase || toclose) 558 goto fail; 559 560 cur->close++; 561 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$"); 562 exprspec(cur, TYPE_sec, search->sec, "^%s$"); 563 564 return(first); 565 566 fail: 567 if (NULL != first) 568 exprfree(first); 569 return(NULL); 570 } 571 572 static struct expr * 573 exprspec(struct expr *cur, uint64_t key, const char *value, 574 const char *format) 575 { 576 char errbuf[BUFSIZ]; 577 char *cp; 578 int irc; 579 580 if (NULL == value) 581 return(cur); 582 583 mandoc_asprintf(&cp, format, value); 584 cur->next = mandoc_calloc(1, sizeof(struct expr)); 585 cur = cur->next; 586 cur->and = 1; 587 cur->bits = key; 588 if (0 != (irc = regcomp(&cur->regexp, cp, 589 REG_EXTENDED | REG_NOSUB | REG_ICASE))) { 590 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf)); 591 fprintf(stderr, "regcomp: %s\n", errbuf); 592 cur->substr = value; 593 } 594 free(cp); 595 return(cur); 596 } 597 598 static struct expr * 599 exprterm(const struct mansearch *search, char *buf, int cs) 600 { 601 char errbuf[BUFSIZ]; 602 struct expr *e; 603 char *key, *v; 604 uint64_t iterbit; 605 int i, irc; 606 607 if ('\0' == *buf) 608 return(NULL); 609 610 e = mandoc_calloc(1, sizeof(struct expr)); 611 612 /*"whatis" mode uses an opaque string and default fields. */ 613 614 if (MANSEARCH_WHATIS & search->flags) { 615 e->substr = buf; 616 e->bits = search->deftype; 617 return(e); 618 } 619 620 /* 621 * If no =~ is specified, search with equality over names and 622 * descriptions. 623 * If =~ begins the phrase, use name and description fields. 624 */ 625 626 if (NULL == (v = strpbrk(buf, "=~"))) { 627 e->substr = buf; 628 e->bits = search->deftype; 629 return(e); 630 } else if (v == buf) 631 e->bits = search->deftype; 632 633 if ('~' == *v++) { 634 if (NULL != strstr(buf, "arch")) 635 cs = 0; 636 if (0 != (irc = regcomp(&e->regexp, v, 637 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) { 638 regerror(irc, &e->regexp, errbuf, sizeof(errbuf)); 639 fprintf(stderr, "regcomp: %s\n", errbuf); 640 free(e); 641 return(NULL); 642 } 643 } else 644 e->substr = v; 645 v[-1] = '\0'; 646 647 /* 648 * Parse out all possible fields. 649 * If the field doesn't resolve, bail. 650 */ 651 652 while (NULL != (key = strsep(&buf, ","))) { 653 if ('\0' == *key) 654 continue; 655 for (i = 0, iterbit = 1; 656 i < mansearch_keymax; 657 i++, iterbit <<= 1) { 658 if (0 == strcasecmp(key, 659 mansearch_keynames[i])) { 660 e->bits |= iterbit; 661 break; 662 } 663 } 664 if (i == mansearch_keymax) { 665 if (strcasecmp(key, "any")) { 666 free(e); 667 return(NULL); 668 } 669 e->bits |= ~0ULL; 670 } 671 } 672 673 return(e); 674 } 675 676 static void 677 exprfree(struct expr *p) 678 { 679 struct expr *pp; 680 681 while (NULL != p) { 682 pp = p->next; 683 free(p); 684 p = pp; 685 } 686 } 687 688 static void * 689 hash_halloc(size_t sz, void *arg) 690 { 691 692 return(mandoc_calloc(sz, 1)); 693 } 694 695 static void * 696 hash_alloc(size_t sz, void *arg) 697 { 698 699 return(mandoc_malloc(sz)); 700 } 701 702 static void 703 hash_free(void *p, size_t sz, void *arg) 704 { 705 706 free(p); 707 } 708