1 /* Id: mansearch.c,v 1.17 2014/01/05 04:13:52 schwarze Exp */ 2 /* 3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <fcntl.h> 24 #include <getopt.h> 25 #include <limits.h> 26 #include <regex.h> 27 #include <stdio.h> 28 #include <stdint.h> 29 #include <stddef.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #ifdef HAVE_OHASH 35 #include <ohash.h> 36 #else 37 #include "compat_ohash.h" 38 #endif 39 #include <sqlite3.h> 40 41 #include "mandoc.h" 42 #include "manpath.h" 43 #include "mansearch.h" 44 45 #define SQL_BIND_TEXT(_db, _s, _i, _v) \ 46 do { if (SQLITE_OK != sqlite3_bind_text \ 47 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ 48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 49 } while (0) 50 #define SQL_BIND_INT64(_db, _s, _i, _v) \ 51 do { if (SQLITE_OK != sqlite3_bind_int64 \ 52 ((_s), (_i)++, (_v))) \ 53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 54 } while (0) 55 #define SQL_BIND_BLOB(_db, _s, _i, _v) \ 56 do { if (SQLITE_OK != sqlite3_bind_blob \ 57 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \ 58 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 59 } while (0) 60 61 struct expr { 62 uint64_t bits; /* type-mask */ 63 const char *substr; /* to search for, if applicable */ 64 regex_t regexp; /* compiled regexp, if applicable */ 65 int open; /* opening parentheses before */ 66 int and; /* logical AND before */ 67 int close; /* closing parentheses after */ 68 struct expr *next; /* next in sequence */ 69 }; 70 71 struct match { 72 uint64_t id; /* identifier in database */ 73 char *desc; /* description of manpage */ 74 int form; /* 0 == catpage */ 75 }; 76 77 struct type { 78 uint64_t bits; 79 const char *name; 80 }; 81 82 static const struct type types[] = { 83 { TYPE_An, "An" }, 84 { TYPE_Ar, "Ar" }, 85 { TYPE_At, "At" }, 86 { TYPE_Bsx, "Bsx" }, 87 { TYPE_Bx, "Bx" }, 88 { TYPE_Cd, "Cd" }, 89 { TYPE_Cm, "Cm" }, 90 { TYPE_Dv, "Dv" }, 91 { TYPE_Dx, "Dx" }, 92 { TYPE_Em, "Em" }, 93 { TYPE_Er, "Er" }, 94 { TYPE_Ev, "Ev" }, 95 { TYPE_Fa, "Fa" }, 96 { TYPE_Fl, "Fl" }, 97 { TYPE_Fn, "Fn" }, 98 { TYPE_Fn, "Fo" }, 99 { TYPE_Ft, "Ft" }, 100 { TYPE_Fx, "Fx" }, 101 { TYPE_Ic, "Ic" }, 102 { TYPE_In, "In" }, 103 { TYPE_Lb, "Lb" }, 104 { TYPE_Li, "Li" }, 105 { TYPE_Lk, "Lk" }, 106 { TYPE_Ms, "Ms" }, 107 { TYPE_Mt, "Mt" }, 108 { TYPE_Nd, "Nd" }, 109 { TYPE_Nm, "Nm" }, 110 { TYPE_Nx, "Nx" }, 111 { TYPE_Ox, "Ox" }, 112 { TYPE_Pa, "Pa" }, 113 { TYPE_Rs, "Rs" }, 114 { TYPE_Sh, "Sh" }, 115 { TYPE_Ss, "Ss" }, 116 { TYPE_St, "St" }, 117 { TYPE_Sy, "Sy" }, 118 { TYPE_Tn, "Tn" }, 119 { TYPE_Va, "Va" }, 120 { TYPE_Va, "Vt" }, 121 { TYPE_Xr, "Xr" }, 122 { TYPE_sec, "sec" }, 123 { TYPE_arch,"arch" }, 124 { ~0ULL, "any" }, 125 { 0ULL, NULL } 126 }; 127 128 static void buildnames(struct manpage *, sqlite3 *, 129 sqlite3_stmt *, uint64_t, const char *); 130 static char *buildoutput(sqlite3 *, sqlite3_stmt *, 131 uint64_t, uint64_t); 132 static void *hash_alloc(size_t, void *); 133 static void hash_free(void *, size_t, void *); 134 static void *hash_halloc(size_t, void *); 135 static struct expr *exprcomp(const struct mansearch *, 136 int, char *[]); 137 static void exprfree(struct expr *); 138 static struct expr *exprspec(struct expr *, uint64_t, 139 const char *, const char *); 140 static struct expr *exprterm(const struct mansearch *, char *, int); 141 static void sql_append(char **sql, size_t *sz, 142 const char *newstr, int count); 143 static void sql_match(sqlite3_context *context, 144 int argc, sqlite3_value **argv); 145 static void sql_regexp(sqlite3_context *context, 146 int argc, sqlite3_value **argv); 147 static char *sql_statement(const struct expr *); 148 149 int 150 mansearch(const struct mansearch *search, 151 const struct manpaths *paths, 152 int argc, char *argv[], 153 const char *outkey, 154 struct manpage **res, size_t *sz) 155 { 156 int fd, rc, c, ibit; 157 int64_t id; 158 uint64_t outbit; 159 char buf[PATH_MAX]; 160 char *sql; 161 struct manpage *mpage; 162 struct expr *e, *ep; 163 sqlite3 *db; 164 sqlite3_stmt *s, *s2; 165 struct match *mp; 166 struct ohash_info info; 167 struct ohash htab; 168 unsigned int idx; 169 size_t i, j, cur, maxres; 170 171 memset(&info, 0, sizeof(struct ohash_info)); 172 173 info.halloc = hash_halloc; 174 info.alloc = hash_alloc; 175 info.hfree = hash_free; 176 info.key_offset = offsetof(struct match, id); 177 178 *sz = cur = maxres = 0; 179 sql = NULL; 180 *res = NULL; 181 fd = -1; 182 e = NULL; 183 rc = 0; 184 185 if (0 == argc) 186 goto out; 187 if (NULL == (e = exprcomp(search, argc, argv))) 188 goto out; 189 190 outbit = 0; 191 if (NULL != outkey) { 192 for (ibit = 0; types[ibit].bits; ibit++) { 193 if (0 == strcasecmp(types[ibit].name, outkey)) { 194 outbit = types[ibit].bits; 195 break; 196 } 197 } 198 } 199 200 /* 201 * Save a descriptor to the current working directory. 202 * Since pathnames in the "paths" variable might be relative, 203 * and we'll be chdir()ing into them, we need to keep a handle 204 * on our current directory from which to start the chdir(). 205 */ 206 207 if (NULL == getcwd(buf, PATH_MAX)) { 208 perror(NULL); 209 goto out; 210 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) { 211 perror(buf); 212 goto out; 213 } 214 215 sql = sql_statement(e); 216 217 /* 218 * Loop over the directories (containing databases) for us to 219 * search. 220 * Don't let missing/bad databases/directories phase us. 221 * In each, try to open the resident database and, if it opens, 222 * scan it for our match expression. 223 */ 224 225 for (i = 0; i < paths->sz; i++) { 226 if (-1 == fchdir(fd)) { 227 perror(buf); 228 free(*res); 229 break; 230 } else if (-1 == chdir(paths->paths[i])) { 231 perror(paths->paths[i]); 232 continue; 233 } 234 235 c = sqlite3_open_v2 236 (MANDOC_DB, &db, 237 SQLITE_OPEN_READONLY, NULL); 238 239 if (SQLITE_OK != c) { 240 perror(MANDOC_DB); 241 sqlite3_close(db); 242 continue; 243 } 244 245 /* 246 * Define the SQL functions for substring 247 * and regular expression matching. 248 */ 249 250 c = sqlite3_create_function(db, "match", 2, 251 SQLITE_ANY, NULL, sql_match, NULL, NULL); 252 assert(SQLITE_OK == c); 253 c = sqlite3_create_function(db, "regexp", 2, 254 SQLITE_ANY, NULL, sql_regexp, NULL, NULL); 255 assert(SQLITE_OK == c); 256 257 j = 1; 258 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL); 259 if (SQLITE_OK != c) 260 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 261 262 for (ep = e; NULL != ep; ep = ep->next) { 263 if (NULL == ep->substr) { 264 SQL_BIND_BLOB(db, s, j, ep->regexp); 265 } else 266 SQL_BIND_TEXT(db, s, j, ep->substr); 267 SQL_BIND_INT64(db, s, j, ep->bits); 268 } 269 270 memset(&htab, 0, sizeof(struct ohash)); 271 ohash_init(&htab, 4, &info); 272 273 /* 274 * Hash each entry on its [unique] document identifier. 275 * This is a uint64_t. 276 * Instead of using a hash function, simply convert the 277 * uint64_t to a uint32_t, the hash value's type. 278 * This gives good performance and preserves the 279 * distribution of buckets in the table. 280 */ 281 while (SQLITE_ROW == (c = sqlite3_step(s))) { 282 id = sqlite3_column_int64(s, 2); 283 idx = ohash_lookup_memory 284 (&htab, (char *)&id, 285 sizeof(uint64_t), (uint32_t)id); 286 287 if (NULL != ohash_find(&htab, idx)) 288 continue; 289 290 mp = mandoc_calloc(1, sizeof(struct match)); 291 mp->id = id; 292 mp->desc = mandoc_strdup 293 ((char *)sqlite3_column_text(s, 0)); 294 mp->form = sqlite3_column_int(s, 1); 295 ohash_insert(&htab, idx, mp); 296 } 297 298 if (SQLITE_DONE != c) 299 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 300 301 sqlite3_finalize(s); 302 303 c = sqlite3_prepare_v2(db, 304 "SELECT * FROM mlinks WHERE pageid=?", 305 -1, &s, NULL); 306 if (SQLITE_OK != c) 307 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 308 309 c = sqlite3_prepare_v2(db, 310 "SELECT * FROM keys WHERE pageid=? AND bits & ?", 311 -1, &s2, NULL); 312 if (SQLITE_OK != c) 313 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 314 315 for (mp = ohash_first(&htab, &idx); 316 NULL != mp; 317 mp = ohash_next(&htab, &idx)) { 318 if (cur + 1 > maxres) { 319 maxres += 1024; 320 *res = mandoc_realloc 321 (*res, maxres * sizeof(struct manpage)); 322 } 323 mpage = *res + cur; 324 mpage->desc = mp->desc; 325 mpage->form = mp->form; 326 buildnames(mpage, db, s, mp->id, paths->paths[i]); 327 mpage->output = outbit ? 328 buildoutput(db, s2, mp->id, outbit) : NULL; 329 330 free(mp); 331 cur++; 332 } 333 334 sqlite3_finalize(s); 335 sqlite3_finalize(s2); 336 sqlite3_close(db); 337 ohash_delete(&htab); 338 } 339 rc = 1; 340 out: 341 exprfree(e); 342 if (-1 != fd) 343 close(fd); 344 free(sql); 345 *sz = cur; 346 return(rc); 347 } 348 349 static void 350 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s, 351 uint64_t id, const char *path) 352 { 353 char *newnames; 354 const char *oldnames, *sep1, *name, *sec, *sep2, *arch; 355 size_t i; 356 int c; 357 358 mpage->names = NULL; 359 i = 1; 360 SQL_BIND_INT64(db, s, i, id); 361 while (SQLITE_ROW == (c = sqlite3_step(s))) { 362 363 /* Assemble the list of names. */ 364 365 if (NULL == mpage->names) { 366 oldnames = ""; 367 sep1 = ""; 368 } else { 369 oldnames = mpage->names; 370 sep1 = ", "; 371 } 372 sec = sqlite3_column_text(s, 1); 373 arch = sqlite3_column_text(s, 2); 374 name = sqlite3_column_text(s, 3); 375 sep2 = '\0' == *arch ? "" : "/"; 376 if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)", 377 oldnames, sep1, name, sec, sep2, arch)) { 378 perror(0); 379 exit((int)MANDOCLEVEL_SYSERR); 380 } 381 free(mpage->names); 382 mpage->names = newnames; 383 384 /* Also save the first file name encountered. */ 385 386 if (NULL != mpage->file) 387 continue; 388 389 name = sqlite3_column_text(s, 0); 390 if (-1 == asprintf(&mpage->file, "%s/%s", path, name)) { 391 perror(0); 392 exit((int)MANDOCLEVEL_SYSERR); 393 } 394 } 395 if (SQLITE_DONE != c) 396 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 397 sqlite3_reset(s); 398 } 399 400 static char * 401 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit) 402 { 403 char *output, *newoutput; 404 const char *oldoutput, *sep1, *data; 405 size_t i; 406 int c; 407 408 output = NULL; 409 i = 1; 410 SQL_BIND_INT64(db, s, i, id); 411 SQL_BIND_INT64(db, s, i, outbit); 412 while (SQLITE_ROW == (c = sqlite3_step(s))) { 413 if (NULL == output) { 414 oldoutput = ""; 415 sep1 = ""; 416 } else { 417 oldoutput = output; 418 sep1 = " # "; 419 } 420 data = sqlite3_column_text(s, 1); 421 if (-1 == asprintf(&newoutput, "%s%s%s", 422 oldoutput, sep1, data)) { 423 perror(0); 424 exit((int)MANDOCLEVEL_SYSERR); 425 } 426 free(output); 427 output = newoutput; 428 } 429 if (SQLITE_DONE != c) 430 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 431 sqlite3_reset(s); 432 return(output); 433 } 434 435 /* 436 * Implement substring match as an application-defined SQL function. 437 * Using the SQL LIKE or GLOB operators instead would be a bad idea 438 * because that would require escaping metacharacters in the string 439 * being searched for. 440 */ 441 static void 442 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv) 443 { 444 445 assert(2 == argc); 446 sqlite3_result_int(context, NULL != strcasestr( 447 (const char *)sqlite3_value_text(argv[1]), 448 (const char *)sqlite3_value_text(argv[0]))); 449 } 450 451 /* 452 * Implement regular expression match 453 * as an application-defined SQL function. 454 */ 455 static void 456 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv) 457 { 458 459 assert(2 == argc); 460 sqlite3_result_int(context, !regexec( 461 (regex_t *)sqlite3_value_blob(argv[0]), 462 (const char *)sqlite3_value_text(argv[1]), 463 0, NULL, 0)); 464 } 465 466 static void 467 sql_append(char **sql, size_t *sz, const char *newstr, int count) 468 { 469 size_t newsz; 470 471 newsz = 1 < count ? (size_t)count : strlen(newstr); 472 *sql = mandoc_realloc(*sql, *sz + newsz + 1); 473 if (1 < count) 474 memset(*sql + *sz, *newstr, (size_t)count); 475 else 476 memcpy(*sql + *sz, newstr, newsz); 477 *sz += newsz; 478 (*sql)[*sz] = '\0'; 479 } 480 481 /* 482 * Prepare the search SQL statement. 483 */ 484 static char * 485 sql_statement(const struct expr *e) 486 { 487 char *sql; 488 size_t sz; 489 int needop; 490 491 sql = mandoc_strdup("SELECT * FROM mpages WHERE "); 492 sz = strlen(sql); 493 494 for (needop = 0; NULL != e; e = e->next) { 495 if (e->and) 496 sql_append(&sql, &sz, " AND ", 1); 497 else if (needop) 498 sql_append(&sql, &sz, " OR ", 1); 499 if (e->open) 500 sql_append(&sql, &sz, "(", e->open); 501 sql_append(&sql, &sz, NULL == e->substr ? 502 "id IN (SELECT pageid FROM keys " 503 "WHERE key REGEXP ? AND bits & ?)" : 504 "id IN (SELECT pageid FROM keys " 505 "WHERE key MATCH ? AND bits & ?)", 1); 506 if (e->close) 507 sql_append(&sql, &sz, ")", e->close); 508 needop = 1; 509 } 510 511 return(sql); 512 } 513 514 /* 515 * Compile a set of string tokens into an expression. 516 * Tokens in "argv" are assumed to be individual expression atoms (e.g., 517 * "(", "foo=bar", etc.). 518 */ 519 static struct expr * 520 exprcomp(const struct mansearch *search, int argc, char *argv[]) 521 { 522 int i, toopen, logic, igncase, toclose; 523 struct expr *first, *next, *cur; 524 525 first = cur = NULL; 526 logic = igncase = toclose = 0; 527 toopen = 1; 528 529 for (i = 0; i < argc; i++) { 530 if (0 == strcmp("(", argv[i])) { 531 if (igncase) 532 goto fail; 533 toopen++; 534 toclose++; 535 continue; 536 } else if (0 == strcmp(")", argv[i])) { 537 if (toopen || logic || igncase || NULL == cur) 538 goto fail; 539 cur->close++; 540 if (0 > --toclose) 541 goto fail; 542 continue; 543 } else if (0 == strcmp("-a", argv[i])) { 544 if (toopen || logic || igncase || NULL == cur) 545 goto fail; 546 logic = 1; 547 continue; 548 } else if (0 == strcmp("-o", argv[i])) { 549 if (toopen || logic || igncase || NULL == cur) 550 goto fail; 551 logic = 2; 552 continue; 553 } else if (0 == strcmp("-i", argv[i])) { 554 if (igncase) 555 goto fail; 556 igncase = 1; 557 continue; 558 } 559 next = exprterm(search, argv[i], !igncase); 560 if (NULL == next) 561 goto fail; 562 next->open = toopen; 563 next->and = (1 == logic); 564 if (NULL != first) { 565 cur->next = next; 566 cur = next; 567 } else 568 cur = first = next; 569 toopen = logic = igncase = 0; 570 } 571 if (toopen || logic || igncase || toclose) 572 goto fail; 573 574 cur->close++; 575 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$"); 576 exprspec(cur, TYPE_sec, search->sec, "^%s$"); 577 578 return(first); 579 580 fail: 581 if (NULL != first) 582 exprfree(first); 583 return(NULL); 584 } 585 586 static struct expr * 587 exprspec(struct expr *cur, uint64_t key, const char *value, 588 const char *format) 589 { 590 char errbuf[BUFSIZ]; 591 char *cp; 592 int irc; 593 594 if (NULL == value) 595 return(cur); 596 597 if (-1 == asprintf(&cp, format, value)) { 598 perror(0); 599 exit((int)MANDOCLEVEL_SYSERR); 600 } 601 cur->next = mandoc_calloc(1, sizeof(struct expr)); 602 cur = cur->next; 603 cur->and = 1; 604 cur->bits = key; 605 if (0 != (irc = regcomp(&cur->regexp, cp, 606 REG_EXTENDED | REG_NOSUB | REG_ICASE))) { 607 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf)); 608 fprintf(stderr, "regcomp: %s\n", errbuf); 609 cur->substr = value; 610 } 611 free(cp); 612 return(cur); 613 } 614 615 static struct expr * 616 exprterm(const struct mansearch *search, char *buf, int cs) 617 { 618 char errbuf[BUFSIZ]; 619 struct expr *e; 620 char *key, *v; 621 size_t i; 622 int irc; 623 624 if ('\0' == *buf) 625 return(NULL); 626 627 e = mandoc_calloc(1, sizeof(struct expr)); 628 629 /*"whatis" mode uses an opaque string and default fields. */ 630 631 if (MANSEARCH_WHATIS & search->flags) { 632 e->substr = buf; 633 e->bits = search->deftype; 634 return(e); 635 } 636 637 /* 638 * If no =~ is specified, search with equality over names and 639 * descriptions. 640 * If =~ begins the phrase, use name and description fields. 641 */ 642 643 if (NULL == (v = strpbrk(buf, "=~"))) { 644 e->substr = buf; 645 e->bits = search->deftype; 646 return(e); 647 } else if (v == buf) 648 e->bits = search->deftype; 649 650 if ('~' == *v++) { 651 if (0 != (irc = regcomp(&e->regexp, v, 652 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) { 653 regerror(irc, &e->regexp, errbuf, sizeof(errbuf)); 654 fprintf(stderr, "regcomp: %s\n", errbuf); 655 free(e); 656 return(NULL); 657 } 658 } else 659 e->substr = v; 660 v[-1] = '\0'; 661 662 /* 663 * Parse out all possible fields. 664 * If the field doesn't resolve, bail. 665 */ 666 667 while (NULL != (key = strsep(&buf, ","))) { 668 if ('\0' == *key) 669 continue; 670 i = 0; 671 while (types[i].bits && 672 strcasecmp(types[i].name, key)) 673 i++; 674 if (0 == types[i].bits) { 675 free(e); 676 return(NULL); 677 } 678 e->bits |= types[i].bits; 679 } 680 681 return(e); 682 } 683 684 static void 685 exprfree(struct expr *p) 686 { 687 struct expr *pp; 688 689 while (NULL != p) { 690 pp = p->next; 691 free(p); 692 p = pp; 693 } 694 } 695 696 static void * 697 hash_halloc(size_t sz, void *arg) 698 { 699 700 return(mandoc_calloc(sz, 1)); 701 } 702 703 static void * 704 hash_alloc(size_t sz, void *arg) 705 { 706 707 return(mandoc_malloc(sz)); 708 } 709 710 static void 711 hash_free(void *p, size_t sz, void *arg) 712 { 713 714 free(p); 715 } 716