1 /* $OpenBSD: mansearch.c,v 1.57 2017/07/01 09:47:23 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/mman.h> 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <err.h> 24 #include <errno.h> 25 #include <fcntl.h> 26 #include <glob.h> 27 #include <limits.h> 28 #include <regex.h> 29 #include <stdio.h> 30 #include <stdint.h> 31 #include <stddef.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 36 #include "mandoc.h" 37 #include "mandoc_aux.h" 38 #include "mandoc_ohash.h" 39 #include "manconf.h" 40 #include "mansearch.h" 41 #include "dbm.h" 42 43 struct expr { 44 /* Used for terms: */ 45 struct dbm_match match; /* Match type and expression. */ 46 uint64_t bits; /* Type mask. */ 47 /* Used for OR and AND groups: */ 48 struct expr *next; /* Next child in the parent group. */ 49 struct expr *child; /* First child in this group. */ 50 enum { EXPR_TERM, EXPR_OR, EXPR_AND } type; 51 }; 52 53 const char *const mansearch_keynames[KEY_MAX] = { 54 "arch", "sec", "Xr", "Ar", "Fa", "Fl", "Dv", "Fn", 55 "Ic", "Pa", "Cm", "Li", "Em", "Cd", "Va", "Ft", 56 "Tn", "Er", "Ev", "Sy", "Sh", "In", "Ss", "Ox", 57 "An", "Mt", "St", "Bx", "At", "Nx", "Fx", "Lk", 58 "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb", "Nm", "Nd" 59 }; 60 61 62 static struct ohash *manmerge(struct expr *, struct ohash *); 63 static struct ohash *manmerge_term(struct expr *, struct ohash *); 64 static struct ohash *manmerge_or(struct expr *, struct ohash *); 65 static struct ohash *manmerge_and(struct expr *, struct ohash *); 66 static char *buildnames(const struct dbm_page *); 67 static char *buildoutput(size_t, struct dbm_page *); 68 static size_t lstlen(const char *, size_t); 69 static void lstcat(char *, size_t *, const char *, const char *); 70 static int lstmatch(const char *, const char *); 71 static struct expr *exprcomp(const struct mansearch *, 72 int, char *[], int *); 73 static struct expr *expr_and(const struct mansearch *, 74 int, char *[], int *); 75 static struct expr *exprterm(const struct mansearch *, 76 int, char *[], int *); 77 static void exprfree(struct expr *); 78 static int manpage_compare(const void *, const void *); 79 80 81 int 82 mansearch(const struct mansearch *search, 83 const struct manpaths *paths, 84 int argc, char *argv[], 85 struct manpage **res, size_t *sz) 86 { 87 char buf[PATH_MAX]; 88 struct dbm_res *rp; 89 struct expr *e; 90 struct dbm_page *page; 91 struct manpage *mpage; 92 struct ohash *htab; 93 size_t cur, i, maxres, outkey; 94 unsigned int slot; 95 int argi, chdir_status, getcwd_status, im; 96 97 argi = 0; 98 if ((e = exprcomp(search, argc, argv, &argi)) == NULL) { 99 *sz = 0; 100 return 0; 101 } 102 103 cur = maxres = 0; 104 if (res != NULL) 105 *res = NULL; 106 107 outkey = KEY_Nd; 108 if (search->outkey != NULL) 109 for (im = 0; im < KEY_MAX; im++) 110 if (0 == strcasecmp(search->outkey, 111 mansearch_keynames[im])) { 112 outkey = im; 113 break; 114 } 115 116 /* 117 * Remember the original working directory, if possible. 118 * This will be needed if the second or a later directory 119 * is given as a relative path. 120 * Do not error out if the current directory is not 121 * searchable: Maybe it won't be needed after all. 122 */ 123 124 if (getcwd(buf, PATH_MAX) == NULL) { 125 getcwd_status = 0; 126 (void)strlcpy(buf, strerror(errno), sizeof(buf)); 127 } else 128 getcwd_status = 1; 129 130 /* 131 * Loop over the directories (containing databases) for us to 132 * search. 133 * Don't let missing/bad databases/directories phase us. 134 * In each, try to open the resident database and, if it opens, 135 * scan it for our match expression. 136 */ 137 138 chdir_status = 0; 139 for (i = 0; i < paths->sz; i++) { 140 if (chdir_status && paths->paths[i][0] != '/') { 141 if ( ! getcwd_status) { 142 warnx("%s: getcwd: %s", paths->paths[i], buf); 143 continue; 144 } else if (chdir(buf) == -1) { 145 warn("%s", buf); 146 continue; 147 } 148 } 149 if (chdir(paths->paths[i]) == -1) { 150 warn("%s", paths->paths[i]); 151 continue; 152 } 153 chdir_status = 1; 154 155 if (dbm_open(MANDOC_DB) == -1) { 156 if (errno != ENOENT) 157 warn("%s/%s", paths->paths[i], MANDOC_DB); 158 continue; 159 } 160 161 if ((htab = manmerge(e, NULL)) == NULL) { 162 dbm_close(); 163 continue; 164 } 165 166 for (rp = ohash_first(htab, &slot); rp != NULL; 167 rp = ohash_next(htab, &slot)) { 168 page = dbm_page_get(rp->page); 169 170 if (lstmatch(search->sec, page->sect) == 0 || 171 lstmatch(search->arch, page->arch) == 0) 172 continue; 173 174 if (res == NULL) { 175 cur = 1; 176 break; 177 } 178 if (cur + 1 > maxres) { 179 maxres += 1024; 180 *res = mandoc_reallocarray(*res, 181 maxres, sizeof(**res)); 182 } 183 mpage = *res + cur; 184 mandoc_asprintf(&mpage->file, "%s/%s", 185 paths->paths[i], page->file + 1); 186 mpage->names = buildnames(page); 187 mpage->output = buildoutput(outkey, page); 188 mpage->ipath = i; 189 mpage->bits = rp->bits; 190 mpage->sec = *page->sect - '0'; 191 if (mpage->sec < 0 || mpage->sec > 9) 192 mpage->sec = 10; 193 mpage->form = *page->file; 194 free(rp); 195 cur++; 196 } 197 ohash_delete(htab); 198 free(htab); 199 dbm_close(); 200 201 /* 202 * In man(1) mode, prefer matches in earlier trees 203 * over matches in later trees. 204 */ 205 206 if (cur && search->firstmatch) 207 break; 208 } 209 if (res != NULL) 210 qsort(*res, cur, sizeof(struct manpage), manpage_compare); 211 if (chdir_status && getcwd_status && chdir(buf) == -1) 212 warn("%s", buf); 213 exprfree(e); 214 *sz = cur; 215 return res != NULL || cur; 216 } 217 218 /* 219 * Merge the results for the expression tree rooted at e 220 * into the the result list htab. 221 */ 222 static struct ohash * 223 manmerge(struct expr *e, struct ohash *htab) 224 { 225 switch (e->type) { 226 case EXPR_TERM: 227 return manmerge_term(e, htab); 228 case EXPR_OR: 229 return manmerge_or(e->child, htab); 230 case EXPR_AND: 231 return manmerge_and(e->child, htab); 232 default: 233 abort(); 234 } 235 } 236 237 static struct ohash * 238 manmerge_term(struct expr *e, struct ohash *htab) 239 { 240 struct dbm_res res, *rp; 241 uint64_t ib; 242 unsigned int slot; 243 int im; 244 245 if (htab == NULL) { 246 htab = mandoc_malloc(sizeof(*htab)); 247 mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page)); 248 } 249 250 for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) { 251 if ((e->bits & ib) == 0) 252 continue; 253 254 switch (ib) { 255 case TYPE_arch: 256 dbm_page_byarch(&e->match); 257 break; 258 case TYPE_sec: 259 dbm_page_bysect(&e->match); 260 break; 261 case TYPE_Nm: 262 dbm_page_byname(&e->match); 263 break; 264 case TYPE_Nd: 265 dbm_page_bydesc(&e->match); 266 break; 267 default: 268 dbm_page_bymacro(im - 2, &e->match); 269 break; 270 } 271 272 /* 273 * When hashing for deduplication, use the unique 274 * page ID itself instead of a hash function; 275 * that is quite efficient. 276 */ 277 278 for (;;) { 279 res = dbm_page_next(); 280 if (res.page == -1) 281 break; 282 slot = ohash_lookup_memory(htab, 283 (char *)&res, sizeof(res.page), res.page); 284 if ((rp = ohash_find(htab, slot)) != NULL) { 285 rp->bits |= res.bits; 286 continue; 287 } 288 rp = mandoc_malloc(sizeof(*rp)); 289 *rp = res; 290 ohash_insert(htab, slot, rp); 291 } 292 } 293 return htab; 294 } 295 296 static struct ohash * 297 manmerge_or(struct expr *e, struct ohash *htab) 298 { 299 while (e != NULL) { 300 htab = manmerge(e, htab); 301 e = e->next; 302 } 303 return htab; 304 } 305 306 static struct ohash * 307 manmerge_and(struct expr *e, struct ohash *htab) 308 { 309 struct ohash *hand, *h1, *h2; 310 struct dbm_res *res; 311 unsigned int slot1, slot2; 312 313 /* Evaluate the first term of the AND clause. */ 314 315 hand = manmerge(e, NULL); 316 317 while ((e = e->next) != NULL) { 318 319 /* Evaluate the next term and prepare for ANDing. */ 320 321 h2 = manmerge(e, NULL); 322 if (ohash_entries(h2) < ohash_entries(hand)) { 323 h1 = h2; 324 h2 = hand; 325 } else 326 h1 = hand; 327 hand = mandoc_malloc(sizeof(*hand)); 328 mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page)); 329 330 /* Keep all pages that are in both result sets. */ 331 332 for (res = ohash_first(h1, &slot1); res != NULL; 333 res = ohash_next(h1, &slot1)) { 334 if (ohash_find(h2, ohash_lookup_memory(h2, 335 (char *)res, sizeof(res->page), 336 res->page)) == NULL) 337 free(res); 338 else 339 ohash_insert(hand, ohash_lookup_memory(hand, 340 (char *)res, sizeof(res->page), 341 res->page), res); 342 } 343 344 /* Discard the merged results. */ 345 346 for (res = ohash_first(h2, &slot2); res != NULL; 347 res = ohash_next(h2, &slot2)) 348 free(res); 349 ohash_delete(h2); 350 free(h2); 351 ohash_delete(h1); 352 free(h1); 353 } 354 355 /* Merge the result of the AND into htab. */ 356 357 if (htab == NULL) 358 return hand; 359 360 for (res = ohash_first(hand, &slot1); res != NULL; 361 res = ohash_next(hand, &slot1)) { 362 slot2 = ohash_lookup_memory(htab, 363 (char *)res, sizeof(res->page), res->page); 364 if (ohash_find(htab, slot2) == NULL) 365 ohash_insert(htab, slot2, res); 366 else 367 free(res); 368 } 369 370 /* Discard the merged result. */ 371 372 ohash_delete(hand); 373 free(hand); 374 return htab; 375 } 376 377 void 378 mansearch_free(struct manpage *res, size_t sz) 379 { 380 size_t i; 381 382 for (i = 0; i < sz; i++) { 383 free(res[i].file); 384 free(res[i].names); 385 free(res[i].output); 386 } 387 free(res); 388 } 389 390 static int 391 manpage_compare(const void *vp1, const void *vp2) 392 { 393 const struct manpage *mp1, *mp2; 394 int diff; 395 396 mp1 = vp1; 397 mp2 = vp2; 398 return (diff = mp2->bits - mp1->bits) ? diff : 399 (diff = mp1->sec - mp2->sec) ? diff : 400 strcasecmp(mp1->names, mp2->names); 401 } 402 403 static char * 404 buildnames(const struct dbm_page *page) 405 { 406 char *buf; 407 size_t i, sz; 408 409 sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) + 410 (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2; 411 buf = mandoc_malloc(sz); 412 i = 0; 413 lstcat(buf, &i, page->name, ", "); 414 buf[i++] = '('; 415 lstcat(buf, &i, page->sect, ", "); 416 if (page->arch != NULL) { 417 buf[i++] = '/'; 418 lstcat(buf, &i, page->arch, ", "); 419 } 420 buf[i++] = ')'; 421 buf[i++] = '\0'; 422 assert(i == sz); 423 return buf; 424 } 425 426 /* 427 * Count the buffer space needed to print the NUL-terminated 428 * list of NUL-terminated strings, when printing sep separator 429 * characters between strings. 430 */ 431 static size_t 432 lstlen(const char *cp, size_t sep) 433 { 434 size_t sz; 435 436 for (sz = 0;; sz++) { 437 if (cp[0] == '\0') { 438 if (cp[1] == '\0') 439 break; 440 sz += sep - 1; 441 } else if (cp[0] < ' ') 442 sz--; 443 cp++; 444 } 445 return sz; 446 } 447 448 /* 449 * Print the NUL-terminated list of NUL-terminated strings 450 * into the buffer, seperating strings with sep. 451 */ 452 static void 453 lstcat(char *buf, size_t *i, const char *cp, const char *sep) 454 { 455 const char *s; 456 457 for (;;) { 458 if (cp[0] == '\0') { 459 if (cp[1] == '\0') 460 break; 461 s = sep; 462 while (*s != '\0') 463 buf[(*i)++] = *s++; 464 } else if (cp[0] >= ' ') 465 buf[(*i)++] = cp[0]; 466 cp++; 467 } 468 } 469 470 /* 471 * Return 1 if the string *want occurs in any of the strings 472 * in the NUL-terminated string list *have, or 0 otherwise. 473 * If either argument is NULL or empty, assume no filtering 474 * is desired and return 1. 475 */ 476 static int 477 lstmatch(const char *want, const char *have) 478 { 479 if (want == NULL || have == NULL || *have == '\0') 480 return 1; 481 while (*have != '\0') { 482 if (strcasestr(have, want) != NULL) 483 return 1; 484 have = strchr(have, '\0') + 1; 485 } 486 return 0; 487 } 488 489 /* 490 * Build a list of values taken by the macro im in the manual page. 491 */ 492 static char * 493 buildoutput(size_t im, struct dbm_page *page) 494 { 495 const char *oldoutput, *sep, *input; 496 char *output, *newoutput, *value; 497 size_t sz, i; 498 499 switch (im) { 500 case KEY_Nd: 501 return mandoc_strdup(page->desc); 502 case KEY_Nm: 503 input = page->name; 504 break; 505 case KEY_sec: 506 input = page->sect; 507 break; 508 case KEY_arch: 509 input = page->arch; 510 if (input == NULL) 511 input = "all\0"; 512 break; 513 default: 514 input = NULL; 515 break; 516 } 517 518 if (input != NULL) { 519 sz = lstlen(input, 3) + 1; 520 output = mandoc_malloc(sz); 521 i = 0; 522 lstcat(output, &i, input, " # "); 523 output[i++] = '\0'; 524 assert(i == sz); 525 return output; 526 } 527 528 output = NULL; 529 dbm_macro_bypage(im - 2, page->addr); 530 while ((value = dbm_macro_next()) != NULL) { 531 if (output == NULL) { 532 oldoutput = ""; 533 sep = ""; 534 } else { 535 oldoutput = output; 536 sep = " # "; 537 } 538 mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value); 539 free(output); 540 output = newoutput; 541 } 542 return output; 543 } 544 545 /* 546 * Compile a set of string tokens into an expression. 547 * Tokens in "argv" are assumed to be individual expression atoms (e.g., 548 * "(", "foo=bar", etc.). 549 */ 550 static struct expr * 551 exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi) 552 { 553 struct expr *parent, *child; 554 int needterm, nested; 555 556 if ((nested = *argi) == argc) 557 return NULL; 558 needterm = 1; 559 parent = child = NULL; 560 while (*argi < argc) { 561 if (strcmp(")", argv[*argi]) == 0) { 562 if (needterm) 563 warnx("missing term " 564 "before closing parenthesis"); 565 needterm = 0; 566 if (nested) 567 break; 568 warnx("ignoring unmatched right parenthesis"); 569 ++*argi; 570 continue; 571 } 572 if (strcmp("-o", argv[*argi]) == 0) { 573 if (needterm) { 574 if (*argi > 0) 575 warnx("ignoring -o after %s", 576 argv[*argi - 1]); 577 else 578 warnx("ignoring initial -o"); 579 } 580 needterm = 1; 581 ++*argi; 582 continue; 583 } 584 needterm = 0; 585 if (child == NULL) { 586 child = expr_and(search, argc, argv, argi); 587 continue; 588 } 589 if (parent == NULL) { 590 parent = mandoc_calloc(1, sizeof(*parent)); 591 parent->type = EXPR_OR; 592 parent->next = NULL; 593 parent->child = child; 594 } 595 child->next = expr_and(search, argc, argv, argi); 596 child = child->next; 597 } 598 if (needterm && *argi) 599 warnx("ignoring trailing %s", argv[*argi - 1]); 600 return parent == NULL ? child : parent; 601 } 602 603 static struct expr * 604 expr_and(const struct mansearch *search, int argc, char *argv[], int *argi) 605 { 606 struct expr *parent, *child; 607 int needterm; 608 609 needterm = 1; 610 parent = child = NULL; 611 while (*argi < argc) { 612 if (strcmp(")", argv[*argi]) == 0) { 613 if (needterm) 614 warnx("missing term " 615 "before closing parenthesis"); 616 needterm = 0; 617 break; 618 } 619 if (strcmp("-o", argv[*argi]) == 0) 620 break; 621 if (strcmp("-a", argv[*argi]) == 0) { 622 if (needterm) { 623 if (*argi > 0) 624 warnx("ignoring -a after %s", 625 argv[*argi - 1]); 626 else 627 warnx("ignoring initial -a"); 628 } 629 needterm = 1; 630 ++*argi; 631 continue; 632 } 633 if (needterm == 0) 634 break; 635 if (child == NULL) { 636 child = exprterm(search, argc, argv, argi); 637 if (child != NULL) 638 needterm = 0; 639 continue; 640 } 641 needterm = 0; 642 if (parent == NULL) { 643 parent = mandoc_calloc(1, sizeof(*parent)); 644 parent->type = EXPR_AND; 645 parent->next = NULL; 646 parent->child = child; 647 } 648 child->next = exprterm(search, argc, argv, argi); 649 if (child->next != NULL) { 650 child = child->next; 651 needterm = 0; 652 } 653 } 654 if (needterm && *argi) 655 warnx("ignoring trailing %s", argv[*argi - 1]); 656 return parent == NULL ? child : parent; 657 } 658 659 static struct expr * 660 exprterm(const struct mansearch *search, int argc, char *argv[], int *argi) 661 { 662 char errbuf[BUFSIZ]; 663 struct expr *e; 664 char *key, *val; 665 uint64_t iterbit; 666 int cs, i, irc; 667 668 if (strcmp("(", argv[*argi]) == 0) { 669 ++*argi; 670 e = exprcomp(search, argc, argv, argi); 671 if (*argi < argc) { 672 assert(strcmp(")", argv[*argi]) == 0); 673 ++*argi; 674 } else 675 warnx("unclosed parenthesis"); 676 return e; 677 } 678 679 if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) { 680 cs = 0; 681 ++*argi; 682 } else 683 cs = 1; 684 685 e = mandoc_calloc(1, sizeof(*e)); 686 e->type = EXPR_TERM; 687 e->bits = 0; 688 e->next = NULL; 689 e->child = NULL; 690 691 if (search->argmode == ARG_NAME) { 692 e->bits = TYPE_Nm; 693 e->match.type = DBM_EXACT; 694 e->match.str = argv[(*argi)++]; 695 return e; 696 } 697 698 /* 699 * Separate macro keys from search string. 700 * If needed, request regular expression handling. 701 */ 702 703 if (search->argmode == ARG_WORD) { 704 e->bits = TYPE_Nm; 705 e->match.type = DBM_REGEX; 706 mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]); 707 cs = 0; 708 } else if ((val = strpbrk(argv[*argi], "=~")) == NULL) { 709 e->bits = TYPE_Nm | TYPE_Nd; 710 e->match.type = DBM_SUB; 711 e->match.str = argv[*argi]; 712 } else { 713 if (val == argv[*argi]) 714 e->bits = TYPE_Nm | TYPE_Nd; 715 if (*val == '=') { 716 e->match.type = DBM_SUB; 717 e->match.str = val + 1; 718 } else 719 e->match.type = DBM_REGEX; 720 *val++ = '\0'; 721 if (strstr(argv[*argi], "arch") != NULL) 722 cs = 0; 723 } 724 725 /* Compile regular expressions. */ 726 727 if (e->match.type == DBM_REGEX) { 728 e->match.re = mandoc_malloc(sizeof(*e->match.re)); 729 irc = regcomp(e->match.re, val, 730 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)); 731 if (irc) { 732 regerror(irc, e->match.re, errbuf, sizeof(errbuf)); 733 warnx("regcomp /%s/: %s", val, errbuf); 734 } 735 if (search->argmode == ARG_WORD) 736 free(val); 737 if (irc) { 738 free(e->match.re); 739 free(e); 740 ++*argi; 741 return NULL; 742 } 743 } 744 745 if (e->bits) { 746 ++*argi; 747 return e; 748 } 749 750 /* 751 * Parse out all possible fields. 752 * If the field doesn't resolve, bail. 753 */ 754 755 while (NULL != (key = strsep(&argv[*argi], ","))) { 756 if ('\0' == *key) 757 continue; 758 for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) { 759 if (0 == strcasecmp(key, mansearch_keynames[i])) { 760 e->bits |= iterbit; 761 break; 762 } 763 } 764 if (i == KEY_MAX) { 765 if (strcasecmp(key, "any")) 766 warnx("treating unknown key " 767 "\"%s\" as \"any\"", key); 768 e->bits |= ~0ULL; 769 } 770 } 771 772 ++*argi; 773 return e; 774 } 775 776 static void 777 exprfree(struct expr *e) 778 { 779 if (e->next != NULL) 780 exprfree(e->next); 781 if (e->child != NULL) 782 exprfree(e->child); 783 free(e); 784 } 785