1 /* $Vendor-Id: mandocdb.c,v 1.6 2011/09/17 13:54:27 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/param.h> 22 23 #include <assert.h> 24 #include <dirent.h> 25 #include <fcntl.h> 26 #include <getopt.h> 27 #include <stdio.h> 28 #include <stdint.h> 29 #include <stdlib.h> 30 #include <string.h> 31 32 #ifdef __linux__ 33 # include <db_185.h> 34 #else 35 # include <db.h> 36 #endif 37 38 #include "man.h" 39 #include "mdoc.h" 40 #include "mandoc.h" 41 42 #define MANDOC_DB "mandoc.db" 43 #define MANDOC_IDX "mandoc.index" 44 #define MANDOC_BUFSZ BUFSIZ 45 #define MANDOC_SLOP 1024 46 47 /* Bit-fields. See mandocdb.8. */ 48 49 #define TYPE_NAME 0x01 50 #define TYPE_FUNCTION 0x02 51 #define TYPE_UTILITY 0x04 52 #define TYPE_INCLUDES 0x08 53 #define TYPE_VARIABLE 0x10 54 #define TYPE_STANDARD 0x20 55 #define TYPE_AUTHOR 0x40 56 #define TYPE_CONFIG 0x80 57 #define TYPE_DESC 0x100 58 #define TYPE_XREF 0x200 59 #define TYPE_PATH 0x400 60 #define TYPE_ENV 0x800 61 #define TYPE_ERR 0x1000 62 63 /* Tiny list for files. No need to bring in QUEUE. */ 64 65 struct of { 66 char *fname; /* heap-allocated */ 67 struct of *next; /* NULL for last one */ 68 struct of *first; /* first in list */ 69 }; 70 71 /* Buffer for storing growable data. */ 72 73 struct buf { 74 char *cp; 75 size_t len; /* current length */ 76 size_t size; /* total buffer size */ 77 }; 78 79 /* Operation we're going to perform. */ 80 81 enum op { 82 OP_NEW = 0, /* new database */ 83 OP_UPDATE, /* delete/add entries in existing database */ 84 OP_DELETE /* delete entries from existing database */ 85 }; 86 87 #define MAN_ARGS DB *hash, \ 88 struct buf *buf, \ 89 struct buf *dbuf, \ 90 const struct man_node *n 91 #define MDOC_ARGS DB *hash, \ 92 struct buf *buf, \ 93 struct buf *dbuf, \ 94 const struct mdoc_node *n, \ 95 const struct mdoc_meta *m 96 97 static void buf_appendmdoc(struct buf *, 98 const struct mdoc_node *, int); 99 static void buf_append(struct buf *, const char *); 100 static void buf_appendb(struct buf *, 101 const void *, size_t); 102 static void dbt_put(DB *, const char *, DBT *, DBT *); 103 static void hash_put(DB *, const struct buf *, int); 104 static void hash_reset(DB **); 105 static void index_merge(const struct of *, struct mparse *, 106 struct buf *, struct buf *, 107 DB *, DB *, const char *, 108 DB *, const char *, int, 109 recno_t, const recno_t *, size_t); 110 static void index_prune(const struct of *, DB *, 111 const char *, DB *, const char *, 112 int, recno_t *, recno_t **, size_t *); 113 static void ofile_argbuild(char *[], int, int, struct of **); 114 static int ofile_dirbuild(const char *, int, struct of **); 115 static void ofile_free(struct of *); 116 static int pman_node(MAN_ARGS); 117 static void pmdoc_node(MDOC_ARGS); 118 static void pmdoc_An(MDOC_ARGS); 119 static void pmdoc_Cd(MDOC_ARGS); 120 static void pmdoc_Er(MDOC_ARGS); 121 static void pmdoc_Ev(MDOC_ARGS); 122 static void pmdoc_Fd(MDOC_ARGS); 123 static void pmdoc_In(MDOC_ARGS); 124 static void pmdoc_Fn(MDOC_ARGS); 125 static void pmdoc_Fo(MDOC_ARGS); 126 static void pmdoc_Nd(MDOC_ARGS); 127 static void pmdoc_Nm(MDOC_ARGS); 128 static void pmdoc_Pa(MDOC_ARGS); 129 static void pmdoc_St(MDOC_ARGS); 130 static void pmdoc_Vt(MDOC_ARGS); 131 static void pmdoc_Xr(MDOC_ARGS); 132 static void usage(void); 133 134 typedef void (*pmdoc_nf)(MDOC_ARGS); 135 136 static const pmdoc_nf mdocs[MDOC_MAX] = { 137 NULL, /* Ap */ 138 NULL, /* Dd */ 139 NULL, /* Dt */ 140 NULL, /* Os */ 141 NULL, /* Sh */ 142 NULL, /* Ss */ 143 NULL, /* Pp */ 144 NULL, /* D1 */ 145 NULL, /* Dl */ 146 NULL, /* Bd */ 147 NULL, /* Ed */ 148 NULL, /* Bl */ 149 NULL, /* El */ 150 NULL, /* It */ 151 NULL, /* Ad */ 152 pmdoc_An, /* An */ 153 NULL, /* Ar */ 154 pmdoc_Cd, /* Cd */ 155 NULL, /* Cm */ 156 NULL, /* Dv */ 157 pmdoc_Er, /* Er */ 158 pmdoc_Ev, /* Ev */ 159 NULL, /* Ex */ 160 NULL, /* Fa */ 161 pmdoc_Fd, /* Fd */ 162 NULL, /* Fl */ 163 pmdoc_Fn, /* Fn */ 164 NULL, /* Ft */ 165 NULL, /* Ic */ 166 pmdoc_In, /* In */ 167 NULL, /* Li */ 168 pmdoc_Nd, /* Nd */ 169 pmdoc_Nm, /* Nm */ 170 NULL, /* Op */ 171 NULL, /* Ot */ 172 pmdoc_Pa, /* Pa */ 173 NULL, /* Rv */ 174 pmdoc_St, /* St */ 175 pmdoc_Vt, /* Va */ 176 pmdoc_Vt, /* Vt */ 177 pmdoc_Xr, /* Xr */ 178 NULL, /* %A */ 179 NULL, /* %B */ 180 NULL, /* %D */ 181 NULL, /* %I */ 182 NULL, /* %J */ 183 NULL, /* %N */ 184 NULL, /* %O */ 185 NULL, /* %P */ 186 NULL, /* %R */ 187 NULL, /* %T */ 188 NULL, /* %V */ 189 NULL, /* Ac */ 190 NULL, /* Ao */ 191 NULL, /* Aq */ 192 NULL, /* At */ 193 NULL, /* Bc */ 194 NULL, /* Bf */ 195 NULL, /* Bo */ 196 NULL, /* Bq */ 197 NULL, /* Bsx */ 198 NULL, /* Bx */ 199 NULL, /* Db */ 200 NULL, /* Dc */ 201 NULL, /* Do */ 202 NULL, /* Dq */ 203 NULL, /* Ec */ 204 NULL, /* Ef */ 205 NULL, /* Em */ 206 NULL, /* Eo */ 207 NULL, /* Fx */ 208 NULL, /* Ms */ 209 NULL, /* No */ 210 NULL, /* Ns */ 211 NULL, /* Nx */ 212 NULL, /* Ox */ 213 NULL, /* Pc */ 214 NULL, /* Pf */ 215 NULL, /* Po */ 216 NULL, /* Pq */ 217 NULL, /* Qc */ 218 NULL, /* Ql */ 219 NULL, /* Qo */ 220 NULL, /* Qq */ 221 NULL, /* Re */ 222 NULL, /* Rs */ 223 NULL, /* Sc */ 224 NULL, /* So */ 225 NULL, /* Sq */ 226 NULL, /* Sm */ 227 NULL, /* Sx */ 228 NULL, /* Sy */ 229 NULL, /* Tn */ 230 NULL, /* Ux */ 231 NULL, /* Xc */ 232 NULL, /* Xo */ 233 pmdoc_Fo, /* Fo */ 234 NULL, /* Fc */ 235 NULL, /* Oo */ 236 NULL, /* Oc */ 237 NULL, /* Bk */ 238 NULL, /* Ek */ 239 NULL, /* Bt */ 240 NULL, /* Hf */ 241 NULL, /* Fr */ 242 NULL, /* Ud */ 243 NULL, /* Lb */ 244 NULL, /* Lp */ 245 NULL, /* Lk */ 246 NULL, /* Mt */ 247 NULL, /* Brq */ 248 NULL, /* Bro */ 249 NULL, /* Brc */ 250 NULL, /* %C */ 251 NULL, /* Es */ 252 NULL, /* En */ 253 NULL, /* Dx */ 254 NULL, /* %Q */ 255 NULL, /* br */ 256 NULL, /* sp */ 257 NULL, /* %U */ 258 NULL, /* Ta */ 259 }; 260 261 static const char *progname; 262 263 int 264 main(int argc, char *argv[]) 265 { 266 struct mparse *mp; /* parse sequence */ 267 enum op op; /* current operation */ 268 const char *dir; 269 char ibuf[MAXPATHLEN], /* index fname */ 270 fbuf[MAXPATHLEN]; /* btree fname */ 271 int verb, /* output verbosity */ 272 ch, i, flags; 273 DB *idx, /* index database */ 274 *db, /* keyword database */ 275 *hash; /* temporary keyword hashtable */ 276 BTREEINFO info; /* btree configuration */ 277 recno_t maxrec; /* supremum of all records */ 278 recno_t *recs; /* buffer of empty records */ 279 size_t sz1, sz2, 280 recsz, /* buffer size of recs */ 281 reccur; /* valid number of recs */ 282 struct buf buf, /* keyword buffer */ 283 dbuf; /* description buffer */ 284 struct of *of; /* list of files for processing */ 285 extern int optind; 286 extern char *optarg; 287 288 progname = strrchr(argv[0], '/'); 289 if (progname == NULL) 290 progname = argv[0]; 291 else 292 ++progname; 293 294 verb = 0; 295 of = NULL; 296 db = idx = NULL; 297 mp = NULL; 298 hash = NULL; 299 recs = NULL; 300 recsz = reccur = 0; 301 maxrec = 0; 302 op = OP_NEW; 303 dir = NULL; 304 305 while (-1 != (ch = getopt(argc, argv, "d:u:v"))) 306 switch (ch) { 307 case ('d'): 308 dir = optarg; 309 op = OP_UPDATE; 310 break; 311 case ('u'): 312 dir = optarg; 313 op = OP_DELETE; 314 break; 315 case ('v'): 316 verb++; 317 break; 318 default: 319 usage(); 320 return((int)MANDOCLEVEL_BADARG); 321 } 322 323 argc -= optind; 324 argv += optind; 325 326 memset(&info, 0, sizeof(BTREEINFO)); 327 info.flags = R_DUP; 328 329 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 330 331 memset(&buf, 0, sizeof(struct buf)); 332 memset(&dbuf, 0, sizeof(struct buf)); 333 334 buf.size = dbuf.size = MANDOC_BUFSZ; 335 336 buf.cp = mandoc_malloc(buf.size); 337 dbuf.cp = mandoc_malloc(dbuf.size); 338 339 flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR; 340 341 if (OP_UPDATE == op || OP_DELETE == op) { 342 ibuf[0] = fbuf[0] = '\0'; 343 344 strlcat(fbuf, dir, MAXPATHLEN); 345 strlcat(fbuf, "/", MAXPATHLEN); 346 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); 347 348 strlcat(ibuf, dir, MAXPATHLEN); 349 strlcat(ibuf, "/", MAXPATHLEN); 350 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); 351 352 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 353 fprintf(stderr, "%s: Path too long\n", dir); 354 exit((int)MANDOCLEVEL_BADARG); 355 } 356 357 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); 358 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); 359 360 if (NULL == db) { 361 perror(fbuf); 362 exit((int)MANDOCLEVEL_SYSERR); 363 } else if (NULL == db) { 364 perror(ibuf); 365 exit((int)MANDOCLEVEL_SYSERR); 366 } 367 368 if (verb > 2) { 369 printf("%s: Opened\n", fbuf); 370 printf("%s: Opened\n", ibuf); 371 } 372 373 ofile_argbuild(argv, argc, verb, &of); 374 if (NULL == of) 375 goto out; 376 377 of = of->first; 378 379 index_prune(of, db, fbuf, idx, ibuf, verb, 380 &maxrec, &recs, &recsz); 381 382 if (OP_UPDATE == op) 383 index_merge(of, mp, &dbuf, &buf, hash, 384 db, fbuf, idx, ibuf, verb, 385 maxrec, recs, reccur); 386 387 goto out; 388 } 389 390 for (i = 0; i < argc; i++) { 391 ibuf[0] = fbuf[0] = '\0'; 392 393 strlcat(fbuf, argv[i], MAXPATHLEN); 394 strlcat(fbuf, "/", MAXPATHLEN); 395 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); 396 397 strlcat(ibuf, argv[i], MAXPATHLEN); 398 strlcat(ibuf, "/", MAXPATHLEN); 399 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); 400 401 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 402 fprintf(stderr, "%s: Path too long\n", argv[i]); 403 exit((int)MANDOCLEVEL_BADARG); 404 } 405 406 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); 407 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); 408 409 if (NULL == db) { 410 perror(fbuf); 411 exit((int)MANDOCLEVEL_SYSERR); 412 } else if (NULL == db) { 413 perror(ibuf); 414 exit((int)MANDOCLEVEL_SYSERR); 415 } 416 417 if (verb > 2) { 418 printf("%s: Truncated\n", fbuf); 419 printf("%s: Truncated\n", ibuf); 420 } 421 422 ofile_free(of); 423 of = NULL; 424 425 if ( ! ofile_dirbuild(argv[i], verb, &of)) 426 exit((int)MANDOCLEVEL_SYSERR); 427 428 if (NULL == of) 429 continue; 430 431 of = of->first; 432 433 index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, 434 idx, ibuf, verb, maxrec, recs, reccur); 435 } 436 437 out: 438 if (db) 439 (*db->close)(db); 440 if (idx) 441 (*idx->close)(idx); 442 if (hash) 443 (*hash->close)(hash); 444 if (mp) 445 mparse_free(mp); 446 447 ofile_free(of); 448 free(buf.cp); 449 free(dbuf.cp); 450 free(recs); 451 452 return(MANDOCLEVEL_OK); 453 } 454 455 void 456 index_merge(const struct of *of, struct mparse *mp, 457 struct buf *dbuf, struct buf *buf, 458 DB *hash, DB *db, const char *dbf, 459 DB *idx, const char *idxf, int verb, 460 recno_t maxrec, const recno_t *recs, size_t reccur) 461 { 462 recno_t rec; 463 int ch; 464 DBT key, val; 465 struct mdoc *mdoc; 466 struct man *man; 467 const char *fn, *msec, *mtitle, *arch; 468 size_t sv; 469 unsigned seq; 470 char vbuf[8]; 471 472 for (rec = 0; of; of = of->next) { 473 fn = of->fname; 474 if (reccur > 0) { 475 --reccur; 476 rec = recs[(int)reccur]; 477 } else if (maxrec > 0) { 478 rec = maxrec; 479 maxrec = 0; 480 } else 481 rec++; 482 483 mparse_reset(mp); 484 hash_reset(&hash); 485 486 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { 487 fprintf(stderr, "%s: Parse failure\n", fn); 488 continue; 489 } 490 491 mparse_result(mp, &mdoc, &man); 492 if (NULL == mdoc && NULL == man) 493 continue; 494 495 msec = NULL != mdoc ? 496 mdoc_meta(mdoc)->msec : man_meta(man)->msec; 497 mtitle = NULL != mdoc ? 498 mdoc_meta(mdoc)->title : man_meta(man)->title; 499 arch = NULL != mdoc ? 500 mdoc_meta(mdoc)->arch : NULL; 501 502 if (NULL == arch) 503 arch = ""; 504 505 /* 506 * The index record value consists of a nil-terminated 507 * filename, a nil-terminated manual section, and a 508 * nil-terminated description. Since the description 509 * may not be set, we set a sentinel to see if we're 510 * going to write a nil byte in its place. 511 */ 512 513 dbuf->len = 0; 514 buf_appendb(dbuf, fn, strlen(fn) + 1); 515 buf_appendb(dbuf, msec, strlen(msec) + 1); 516 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); 517 buf_appendb(dbuf, arch, strlen(arch) + 1); 518 519 sv = dbuf->len; 520 521 /* Fix the record number in the btree value. */ 522 523 if (mdoc) 524 pmdoc_node(hash, buf, dbuf, 525 mdoc_node(mdoc), mdoc_meta(mdoc)); 526 else 527 pman_node(hash, buf, dbuf, man_node(man)); 528 529 /* 530 * Copy from the in-memory hashtable of pending keywords 531 * into the database. 532 */ 533 534 memset(vbuf, 0, sizeof(uint32_t)); 535 memcpy(vbuf + 4, &rec, sizeof(uint32_t)); 536 537 seq = R_FIRST; 538 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 539 seq = R_NEXT; 540 541 memcpy(vbuf, val.data, sizeof(uint32_t)); 542 val.size = sizeof(vbuf); 543 val.data = vbuf; 544 545 if (verb > 1) 546 printf("%s: Added keyword: %s\n", 547 fn, (char *)key.data); 548 dbt_put(db, dbf, &key, &val); 549 } 550 if (ch < 0) { 551 perror("hash"); 552 exit((int)MANDOCLEVEL_SYSERR); 553 } 554 555 /* 556 * Apply to the index. If we haven't had a description 557 * set, put an empty one in now. 558 */ 559 560 if (dbuf->len == sv) 561 buf_appendb(dbuf, "", 1); 562 563 key.data = &rec; 564 key.size = sizeof(recno_t); 565 566 val.data = dbuf->cp; 567 val.size = dbuf->len; 568 569 if (verb) 570 printf("%s: Added index\n", fn); 571 dbt_put(idx, idxf, &key, &val); 572 } 573 } 574 575 /* 576 * Scan through all entries in the index file `idx' and prune those 577 * entries in `ofile'. 578 * Pruning consists of removing from `db', then invalidating the entry 579 * in `idx' (zeroing its value size). 580 */ 581 static void 582 index_prune(const struct of *ofile, DB *db, const char *dbf, 583 DB *idx, const char *idxf, int verb, 584 recno_t *maxrec, recno_t **recs, size_t *recsz) 585 { 586 const struct of *of; 587 const char *fn; 588 unsigned seq, sseq; 589 DBT key, val; 590 size_t reccur; 591 int ch; 592 593 reccur = 0; 594 seq = R_FIRST; 595 while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { 596 seq = R_NEXT; 597 *maxrec = *(recno_t *)key.data; 598 if (0 == val.size) { 599 if (reccur >= *recsz) { 600 *recsz += MANDOC_SLOP; 601 *recs = mandoc_realloc(*recs, 602 *recsz * sizeof(recno_t)); 603 } 604 (*recs)[(int)reccur] = *maxrec; 605 reccur++; 606 continue; 607 } 608 609 fn = (char *)val.data; 610 for (of = ofile; of; of = of->next) 611 if (0 == strcmp(fn, of->fname)) 612 break; 613 614 if (NULL == of) 615 continue; 616 617 sseq = R_FIRST; 618 while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { 619 sseq = R_NEXT; 620 assert(8 == val.size); 621 if (*maxrec != *(recno_t *)(val.data + 4)) 622 continue; 623 if (verb) 624 printf("%s: Deleted keyword: %s\n", 625 fn, (char *)key.data); 626 ch = (*db->del)(db, &key, R_CURSOR); 627 if (ch < 0) 628 break; 629 } 630 if (ch < 0) { 631 perror(dbf); 632 exit((int)MANDOCLEVEL_SYSERR); 633 } 634 635 if (verb) 636 printf("%s: Deleted index\n", fn); 637 638 val.size = 0; 639 ch = (*idx->put)(idx, &key, &val, R_CURSOR); 640 if (ch < 0) { 641 perror(idxf); 642 exit((int)MANDOCLEVEL_SYSERR); 643 } 644 645 if (reccur >= *recsz) { 646 *recsz += MANDOC_SLOP; 647 *recs = mandoc_realloc 648 (*recs, *recsz * sizeof(recno_t)); 649 } 650 651 (*recs)[(int)reccur] = *maxrec; 652 reccur++; 653 } 654 (*maxrec)++; 655 } 656 657 /* 658 * Grow the buffer (if necessary) and copy in a binary string. 659 */ 660 static void 661 buf_appendb(struct buf *buf, const void *cp, size_t sz) 662 { 663 664 /* Overshoot by MANDOC_BUFSZ. */ 665 666 while (buf->len + sz >= buf->size) { 667 buf->size = buf->len + sz + MANDOC_BUFSZ; 668 buf->cp = mandoc_realloc(buf->cp, buf->size); 669 } 670 671 memcpy(buf->cp + (int)buf->len, cp, sz); 672 buf->len += sz; 673 } 674 675 /* 676 * Append a nil-terminated string to the buffer. 677 * This can be invoked multiple times. 678 * The buffer string will be nil-terminated. 679 * If invoked multiple times, a space is put between strings. 680 */ 681 static void 682 buf_append(struct buf *buf, const char *cp) 683 { 684 size_t sz; 685 686 if (0 == (sz = strlen(cp))) 687 return; 688 689 if (buf->len) 690 buf->cp[(int)buf->len - 1] = ' '; 691 692 buf_appendb(buf, cp, sz + 1); 693 } 694 695 /* 696 * Recursively add all text from a given node. 697 * This is optimised for general mdoc nodes in this context, which do 698 * not consist of subexpressions and having a recursive call for n->next 699 * would be wasteful. 700 * The "f" variable should be 0 unless called from pmdoc_Nd for the 701 * description buffer, which does not start at the beginning of the 702 * buffer. 703 */ 704 static void 705 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 706 { 707 708 for ( ; n; n = n->next) { 709 if (n->child) 710 buf_appendmdoc(buf, n->child, f); 711 712 if (MDOC_TEXT == n->type && f) { 713 f = 0; 714 buf_appendb(buf, n->string, 715 strlen(n->string) + 1); 716 } else if (MDOC_TEXT == n->type) 717 buf_append(buf, n->string); 718 719 } 720 } 721 722 /* ARGSUSED */ 723 static void 724 pmdoc_An(MDOC_ARGS) 725 { 726 727 if (SEC_AUTHORS != n->sec) 728 return; 729 730 buf_appendmdoc(buf, n->child, 0); 731 hash_put(hash, buf, TYPE_AUTHOR); 732 } 733 734 static void 735 hash_reset(DB **db) 736 { 737 DB *hash; 738 739 if (NULL != (hash = *db)) 740 (*hash->close)(hash); 741 742 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 743 if (NULL == *db) { 744 perror("hash"); 745 exit((int)MANDOCLEVEL_SYSERR); 746 } 747 } 748 749 /* ARGSUSED */ 750 static void 751 pmdoc_Fd(MDOC_ARGS) 752 { 753 const char *start, *end; 754 size_t sz; 755 756 if (SEC_SYNOPSIS != n->sec) 757 return; 758 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 759 return; 760 761 /* 762 * Only consider those `Fd' macro fields that begin with an 763 * "inclusion" token (versus, e.g., #define). 764 */ 765 if (strcmp("#include", n->string)) 766 return; 767 768 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 769 return; 770 771 /* 772 * Strip away the enclosing angle brackets and make sure we're 773 * not zero-length. 774 */ 775 776 start = n->string; 777 if ('<' == *start || '"' == *start) 778 start++; 779 780 if (0 == (sz = strlen(start))) 781 return; 782 783 end = &start[(int)sz - 1]; 784 if ('>' == *end || '"' == *end) 785 end--; 786 787 assert(end >= start); 788 789 buf_appendb(buf, start, (size_t)(end - start + 1)); 790 buf_appendb(buf, "", 1); 791 792 hash_put(hash, buf, TYPE_INCLUDES); 793 } 794 795 /* ARGSUSED */ 796 static void 797 pmdoc_Cd(MDOC_ARGS) 798 { 799 800 if (SEC_SYNOPSIS != n->sec) 801 return; 802 803 buf_appendmdoc(buf, n->child, 0); 804 hash_put(hash, buf, TYPE_CONFIG); 805 } 806 807 /* ARGSUSED */ 808 static void 809 pmdoc_In(MDOC_ARGS) 810 { 811 812 if (SEC_SYNOPSIS != n->sec) 813 return; 814 if (NULL == n->child || MDOC_TEXT != n->child->type) 815 return; 816 817 buf_append(buf, n->child->string); 818 hash_put(hash, buf, TYPE_INCLUDES); 819 } 820 821 /* ARGSUSED */ 822 static void 823 pmdoc_Fn(MDOC_ARGS) 824 { 825 const char *cp; 826 827 if (SEC_SYNOPSIS != n->sec) 828 return; 829 if (NULL == n->child || MDOC_TEXT != n->child->type) 830 return; 831 832 /* .Fn "struct type *arg" "foo" */ 833 834 cp = strrchr(n->child->string, ' '); 835 if (NULL == cp) 836 cp = n->child->string; 837 838 /* Strip away pointer symbol. */ 839 840 while ('*' == *cp) 841 cp++; 842 843 buf_append(buf, cp); 844 hash_put(hash, buf, TYPE_FUNCTION); 845 } 846 847 /* ARGSUSED */ 848 static void 849 pmdoc_St(MDOC_ARGS) 850 { 851 852 if (SEC_STANDARDS != n->sec) 853 return; 854 if (NULL == n->child || MDOC_TEXT != n->child->type) 855 return; 856 857 buf_append(buf, n->child->string); 858 hash_put(hash, buf, TYPE_STANDARD); 859 } 860 861 /* ARGSUSED */ 862 static void 863 pmdoc_Xr(MDOC_ARGS) 864 { 865 866 if (NULL == (n = n->child)) 867 return; 868 869 buf_appendb(buf, n->string, strlen(n->string)); 870 871 if (NULL != (n = n->next)) { 872 buf_appendb(buf, ".", 1); 873 buf_appendb(buf, n->string, strlen(n->string) + 1); 874 } else 875 buf_appendb(buf, ".", 2); 876 877 hash_put(hash, buf, TYPE_XREF); 878 } 879 880 /* ARGSUSED */ 881 static void 882 pmdoc_Vt(MDOC_ARGS) 883 { 884 const char *start; 885 size_t sz; 886 887 if (SEC_SYNOPSIS != n->sec) 888 return; 889 if (MDOC_Vt == n->tok && MDOC_BODY != n->type) 890 return; 891 if (NULL == n->last || MDOC_TEXT != n->last->type) 892 return; 893 894 /* 895 * Strip away leading pointer symbol '*' and trailing ';'. 896 */ 897 898 start = n->last->string; 899 900 while ('*' == *start) 901 start++; 902 903 if (0 == (sz = strlen(start))) 904 return; 905 906 if (';' == start[(int)sz - 1]) 907 sz--; 908 909 if (0 == sz) 910 return; 911 912 buf_appendb(buf, start, sz); 913 buf_appendb(buf, "", 1); 914 hash_put(hash, buf, TYPE_VARIABLE); 915 } 916 917 /* ARGSUSED */ 918 static void 919 pmdoc_Fo(MDOC_ARGS) 920 { 921 922 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 923 return; 924 if (NULL == n->child || MDOC_TEXT != n->child->type) 925 return; 926 927 buf_append(buf, n->child->string); 928 hash_put(hash, buf, TYPE_FUNCTION); 929 } 930 931 932 /* ARGSUSED */ 933 static void 934 pmdoc_Nd(MDOC_ARGS) 935 { 936 937 if (MDOC_BODY != n->type) 938 return; 939 940 buf_appendmdoc(dbuf, n->child, 1); 941 buf_appendmdoc(buf, n->child, 0); 942 943 hash_put(hash, buf, TYPE_DESC); 944 } 945 946 /* ARGSUSED */ 947 static void 948 pmdoc_Er(MDOC_ARGS) 949 { 950 951 if (SEC_ERRORS != n->sec) 952 return; 953 954 buf_appendmdoc(buf, n->child, 0); 955 hash_put(hash, buf, TYPE_ERR); 956 } 957 958 /* ARGSUSED */ 959 static void 960 pmdoc_Ev(MDOC_ARGS) 961 { 962 963 if (SEC_ENVIRONMENT != n->sec) 964 return; 965 966 buf_appendmdoc(buf, n->child, 0); 967 hash_put(hash, buf, TYPE_ENV); 968 } 969 970 /* ARGSUSED */ 971 static void 972 pmdoc_Pa(MDOC_ARGS) 973 { 974 975 if (SEC_FILES != n->sec) 976 return; 977 978 buf_appendmdoc(buf, n->child, 0); 979 hash_put(hash, buf, TYPE_PATH); 980 } 981 982 /* ARGSUSED */ 983 static void 984 pmdoc_Nm(MDOC_ARGS) 985 { 986 987 if (SEC_NAME == n->sec) { 988 buf_appendmdoc(buf, n->child, 0); 989 hash_put(hash, buf, TYPE_NAME); 990 return; 991 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 992 return; 993 994 if (NULL == n->child) 995 buf_append(buf, m->name); 996 997 buf_appendmdoc(buf, n->child, 0); 998 hash_put(hash, buf, TYPE_UTILITY); 999 } 1000 1001 static void 1002 hash_put(DB *db, const struct buf *buf, int mask) 1003 { 1004 DBT key, val; 1005 int rc; 1006 1007 if (buf->len < 2) 1008 return; 1009 1010 key.data = buf->cp; 1011 key.size = buf->len; 1012 1013 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1014 perror("hash"); 1015 exit((int)MANDOCLEVEL_SYSERR); 1016 } else if (0 == rc) 1017 mask |= *(int *)val.data; 1018 1019 val.data = &mask; 1020 val.size = sizeof(int); 1021 1022 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1023 perror("hash"); 1024 exit((int)MANDOCLEVEL_SYSERR); 1025 } 1026 } 1027 1028 static void 1029 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1030 { 1031 1032 assert(key->size); 1033 assert(val->size); 1034 1035 if (0 == (*db->put)(db, key, val, 0)) 1036 return; 1037 1038 perror(dbn); 1039 exit((int)MANDOCLEVEL_SYSERR); 1040 /* NOTREACHED */ 1041 } 1042 1043 /* 1044 * Call out to per-macro handlers after clearing the persistent database 1045 * key. If the macro sets the database key, flush it to the database. 1046 */ 1047 static void 1048 pmdoc_node(MDOC_ARGS) 1049 { 1050 1051 if (NULL == n) 1052 return; 1053 1054 switch (n->type) { 1055 case (MDOC_HEAD): 1056 /* FALLTHROUGH */ 1057 case (MDOC_BODY): 1058 /* FALLTHROUGH */ 1059 case (MDOC_TAIL): 1060 /* FALLTHROUGH */ 1061 case (MDOC_BLOCK): 1062 /* FALLTHROUGH */ 1063 case (MDOC_ELEM): 1064 if (NULL == mdocs[n->tok]) 1065 break; 1066 1067 buf->len = 0; 1068 (*mdocs[n->tok])(hash, buf, dbuf, n, m); 1069 break; 1070 default: 1071 break; 1072 } 1073 1074 pmdoc_node(hash, buf, dbuf, n->child, m); 1075 pmdoc_node(hash, buf, dbuf, n->next, m); 1076 } 1077 1078 static int 1079 pman_node(MAN_ARGS) 1080 { 1081 const struct man_node *head, *body; 1082 const char *start, *sv; 1083 size_t sz; 1084 1085 if (NULL == n) 1086 return(0); 1087 1088 /* 1089 * We're only searching for one thing: the first text child in 1090 * the BODY of a NAME section. Since we don't keep track of 1091 * sections in -man, run some hoops to find out whether we're in 1092 * the correct section or not. 1093 */ 1094 1095 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1096 body = n; 1097 assert(body->parent); 1098 if (NULL != (head = body->parent->head) && 1099 1 == head->nchild && 1100 NULL != (head = (head->child)) && 1101 MAN_TEXT == head->type && 1102 0 == strcmp(head->string, "NAME") && 1103 NULL != (body = body->child) && 1104 MAN_TEXT == body->type) { 1105 1106 assert(body->string); 1107 start = sv = body->string; 1108 1109 /* 1110 * Go through a special heuristic dance here. 1111 * This is why -man manuals are great! 1112 * (I'm being sarcastic: my eyes are bleeding.) 1113 * Conventionally, one or more manual names are 1114 * comma-specified prior to a whitespace, then a 1115 * dash, then a description. Try to puzzle out 1116 * the name parts here. 1117 */ 1118 1119 for ( ;; ) { 1120 sz = strcspn(start, " ,"); 1121 if ('\0' == start[(int)sz]) 1122 break; 1123 1124 buf->len = 0; 1125 buf_appendb(buf, start, sz); 1126 buf_appendb(buf, "", 1); 1127 1128 hash_put(hash, buf, TYPE_NAME); 1129 1130 if (' ' == start[(int)sz]) { 1131 start += (int)sz + 1; 1132 break; 1133 } 1134 1135 assert(',' == start[(int)sz]); 1136 start += (int)sz + 1; 1137 while (' ' == *start) 1138 start++; 1139 } 1140 1141 buf->len = 0; 1142 1143 if (sv == start) { 1144 buf_append(buf, start); 1145 return(1); 1146 } 1147 1148 while (' ' == *start) 1149 start++; 1150 1151 if (0 == strncmp(start, "-", 1)) 1152 start += 1; 1153 else if (0 == strncmp(start, "\\-", 2)) 1154 start += 2; 1155 else if (0 == strncmp(start, "\\(en", 4)) 1156 start += 4; 1157 else if (0 == strncmp(start, "\\(em", 4)) 1158 start += 4; 1159 1160 while (' ' == *start) 1161 start++; 1162 1163 sz = strlen(start) + 1; 1164 buf_appendb(dbuf, start, sz); 1165 buf_appendb(buf, start, sz); 1166 1167 hash_put(hash, buf, TYPE_DESC); 1168 } 1169 } 1170 1171 if (pman_node(hash, buf, dbuf, n->child)) 1172 return(1); 1173 if (pman_node(hash, buf, dbuf, n->next)) 1174 return(1); 1175 1176 return(0); 1177 } 1178 1179 static void 1180 ofile_argbuild(char *argv[], int argc, int verb, struct of **of) 1181 { 1182 int i; 1183 struct of *nof; 1184 1185 for (i = 0; i < argc; i++) { 1186 nof = mandoc_calloc(1, sizeof(struct of)); 1187 nof->fname = strdup(argv[i]); 1188 if (verb > 2) 1189 printf("%s: Scheduling\n", argv[i]); 1190 if (NULL == *of) { 1191 *of = nof; 1192 (*of)->first = nof; 1193 } else { 1194 nof->first = (*of)->first; 1195 (*of)->next = nof; 1196 *of = nof; 1197 } 1198 } 1199 } 1200 1201 /* 1202 * Recursively build up a list of files to parse. 1203 * We use this instead of ftw() and so on because I don't want global 1204 * variables hanging around. 1205 * This ignores the mandoc.db and mandoc.index files, but assumes that 1206 * everything else is a manual. 1207 * Pass in a pointer to a NULL structure for the first invocation. 1208 */ 1209 static int 1210 ofile_dirbuild(const char *dir, int verb, struct of **of) 1211 { 1212 char buf[MAXPATHLEN]; 1213 size_t sz; 1214 DIR *d; 1215 const char *fn; 1216 struct of *nof; 1217 struct dirent *dp; 1218 1219 if (NULL == (d = opendir(dir))) { 1220 perror(dir); 1221 return(0); 1222 } 1223 1224 while (NULL != (dp = readdir(d))) { 1225 fn = dp->d_name; 1226 if (DT_DIR == dp->d_type) { 1227 if (0 == strcmp(".", fn)) 1228 continue; 1229 if (0 == strcmp("..", fn)) 1230 continue; 1231 1232 buf[0] = '\0'; 1233 strlcat(buf, dir, MAXPATHLEN); 1234 strlcat(buf, "/", MAXPATHLEN); 1235 sz = strlcat(buf, fn, MAXPATHLEN); 1236 1237 if (sz < MAXPATHLEN) { 1238 if ( ! ofile_dirbuild(buf, verb, of)) 1239 return(0); 1240 continue; 1241 } else if (sz < MAXPATHLEN) 1242 continue; 1243 1244 fprintf(stderr, "%s: Path too long\n", dir); 1245 return(0); 1246 } 1247 if (DT_REG != dp->d_type) 1248 continue; 1249 1250 if (0 == strcmp(MANDOC_DB, fn) || 1251 0 == strcmp(MANDOC_IDX, fn)) 1252 continue; 1253 1254 buf[0] = '\0'; 1255 strlcat(buf, dir, MAXPATHLEN); 1256 strlcat(buf, "/", MAXPATHLEN); 1257 sz = strlcat(buf, fn, MAXPATHLEN); 1258 if (sz >= MAXPATHLEN) { 1259 fprintf(stderr, "%s: Path too long\n", dir); 1260 return(0); 1261 } 1262 1263 nof = mandoc_calloc(1, sizeof(struct of)); 1264 nof->fname = mandoc_strdup(buf); 1265 1266 if (verb > 2) 1267 printf("%s: Scheduling\n", buf); 1268 1269 if (NULL == *of) { 1270 *of = nof; 1271 (*of)->first = nof; 1272 } else { 1273 nof->first = (*of)->first; 1274 (*of)->next = nof; 1275 *of = nof; 1276 } 1277 } 1278 1279 return(1); 1280 } 1281 1282 static void 1283 ofile_free(struct of *of) 1284 { 1285 struct of *nof; 1286 1287 while (of) { 1288 nof = of->next; 1289 free(of->fname); 1290 free(of); 1291 of = nof; 1292 } 1293 } 1294 1295 static void 1296 usage(void) 1297 { 1298 1299 fprintf(stderr, "usage: %s [-v] " 1300 "[-d dir [files...] |" 1301 " -u dir [files...] |" 1302 " dir...]\n", progname); 1303 } 1304