1 /* $Id: mandocdb.c,v 1.2 2011/09/17 13:45:28 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/param.h> 18 19 #include <assert.h> 20 #include <dirent.h> 21 #include <fcntl.h> 22 #include <getopt.h> 23 #include <stdio.h> 24 #include <stdint.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <db.h> 28 29 #include "man.h" 30 #include "mdoc.h" 31 #include "mandoc.h" 32 33 #define MANDOC_DB "mandoc.db" 34 #define MANDOC_IDX "mandoc.index" 35 #define MANDOC_BUFSZ BUFSIZ 36 #define MANDOC_SLOP 1024 37 38 /* Bit-fields. See mandocdb.8. */ 39 40 #define TYPE_NAME 0x01 41 #define TYPE_FUNCTION 0x02 42 #define TYPE_UTILITY 0x04 43 #define TYPE_INCLUDES 0x08 44 #define TYPE_VARIABLE 0x10 45 #define TYPE_STANDARD 0x20 46 #define TYPE_AUTHOR 0x40 47 #define TYPE_CONFIG 0x80 48 #define TYPE_DESC 0x100 49 #define TYPE_XREF 0x200 50 #define TYPE_PATH 0x400 51 #define TYPE_ENV 0x800 52 #define TYPE_ERR 0x1000 53 54 /* Tiny list for files. No need to bring in QUEUE. */ 55 56 struct of { 57 char *fname; /* heap-allocated */ 58 struct of *next; /* NULL for last one */ 59 struct of *first; /* first in list */ 60 }; 61 62 /* Buffer for storing growable data. */ 63 64 struct buf { 65 char *cp; 66 size_t len; /* current length */ 67 size_t size; /* total buffer size */ 68 }; 69 70 /* Operation we're going to perform. */ 71 72 enum op { 73 OP_NEW = 0, /* new database */ 74 OP_UPDATE, /* delete/add entries in existing database */ 75 OP_DELETE /* delete entries from existing database */ 76 }; 77 78 #define MAN_ARGS DB *hash, \ 79 struct buf *buf, \ 80 struct buf *dbuf, \ 81 const struct man_node *n 82 #define MDOC_ARGS DB *hash, \ 83 struct buf *buf, \ 84 struct buf *dbuf, \ 85 const struct mdoc_node *n, \ 86 const struct mdoc_meta *m 87 88 static void buf_appendmdoc(struct buf *, 89 const struct mdoc_node *, int); 90 static void buf_append(struct buf *, const char *); 91 static void buf_appendb(struct buf *, 92 const void *, size_t); 93 static void dbt_put(DB *, const char *, DBT *, DBT *); 94 static void hash_put(DB *, const struct buf *, int); 95 static void hash_reset(DB **); 96 static void index_merge(const struct of *, struct mparse *, 97 struct buf *, struct buf *, 98 DB *, DB *, const char *, 99 DB *, const char *, int, 100 recno_t, const recno_t *, size_t); 101 static void index_prune(const struct of *, DB *, 102 const char *, DB *, const char *, 103 int, recno_t *, recno_t **, size_t *); 104 static void ofile_argbuild(char *[], int, int, struct of **); 105 static int ofile_dirbuild(const char *, int, struct of **); 106 static void ofile_free(struct of *); 107 static int pman_node(MAN_ARGS); 108 static void pmdoc_node(MDOC_ARGS); 109 static void pmdoc_An(MDOC_ARGS); 110 static void pmdoc_Cd(MDOC_ARGS); 111 static void pmdoc_Er(MDOC_ARGS); 112 static void pmdoc_Ev(MDOC_ARGS); 113 static void pmdoc_Fd(MDOC_ARGS); 114 static void pmdoc_In(MDOC_ARGS); 115 static void pmdoc_Fn(MDOC_ARGS); 116 static void pmdoc_Fo(MDOC_ARGS); 117 static void pmdoc_Nd(MDOC_ARGS); 118 static void pmdoc_Nm(MDOC_ARGS); 119 static void pmdoc_Pa(MDOC_ARGS); 120 static void pmdoc_St(MDOC_ARGS); 121 static void pmdoc_Vt(MDOC_ARGS); 122 static void pmdoc_Xr(MDOC_ARGS); 123 static void usage(void); 124 125 typedef void (*pmdoc_nf)(MDOC_ARGS); 126 127 static const pmdoc_nf mdocs[MDOC_MAX] = { 128 NULL, /* Ap */ 129 NULL, /* Dd */ 130 NULL, /* Dt */ 131 NULL, /* Os */ 132 NULL, /* Sh */ 133 NULL, /* Ss */ 134 NULL, /* Pp */ 135 NULL, /* D1 */ 136 NULL, /* Dl */ 137 NULL, /* Bd */ 138 NULL, /* Ed */ 139 NULL, /* Bl */ 140 NULL, /* El */ 141 NULL, /* It */ 142 NULL, /* Ad */ 143 pmdoc_An, /* An */ 144 NULL, /* Ar */ 145 pmdoc_Cd, /* Cd */ 146 NULL, /* Cm */ 147 NULL, /* Dv */ 148 pmdoc_Er, /* Er */ 149 pmdoc_Ev, /* Ev */ 150 NULL, /* Ex */ 151 NULL, /* Fa */ 152 pmdoc_Fd, /* Fd */ 153 NULL, /* Fl */ 154 pmdoc_Fn, /* Fn */ 155 NULL, /* Ft */ 156 NULL, /* Ic */ 157 pmdoc_In, /* In */ 158 NULL, /* Li */ 159 pmdoc_Nd, /* Nd */ 160 pmdoc_Nm, /* Nm */ 161 NULL, /* Op */ 162 NULL, /* Ot */ 163 pmdoc_Pa, /* Pa */ 164 NULL, /* Rv */ 165 pmdoc_St, /* St */ 166 pmdoc_Vt, /* Va */ 167 pmdoc_Vt, /* Vt */ 168 pmdoc_Xr, /* Xr */ 169 NULL, /* %A */ 170 NULL, /* %B */ 171 NULL, /* %D */ 172 NULL, /* %I */ 173 NULL, /* %J */ 174 NULL, /* %N */ 175 NULL, /* %O */ 176 NULL, /* %P */ 177 NULL, /* %R */ 178 NULL, /* %T */ 179 NULL, /* %V */ 180 NULL, /* Ac */ 181 NULL, /* Ao */ 182 NULL, /* Aq */ 183 NULL, /* At */ 184 NULL, /* Bc */ 185 NULL, /* Bf */ 186 NULL, /* Bo */ 187 NULL, /* Bq */ 188 NULL, /* Bsx */ 189 NULL, /* Bx */ 190 NULL, /* Db */ 191 NULL, /* Dc */ 192 NULL, /* Do */ 193 NULL, /* Dq */ 194 NULL, /* Ec */ 195 NULL, /* Ef */ 196 NULL, /* Em */ 197 NULL, /* Eo */ 198 NULL, /* Fx */ 199 NULL, /* Ms */ 200 NULL, /* No */ 201 NULL, /* Ns */ 202 NULL, /* Nx */ 203 NULL, /* Ox */ 204 NULL, /* Pc */ 205 NULL, /* Pf */ 206 NULL, /* Po */ 207 NULL, /* Pq */ 208 NULL, /* Qc */ 209 NULL, /* Ql */ 210 NULL, /* Qo */ 211 NULL, /* Qq */ 212 NULL, /* Re */ 213 NULL, /* Rs */ 214 NULL, /* Sc */ 215 NULL, /* So */ 216 NULL, /* Sq */ 217 NULL, /* Sm */ 218 NULL, /* Sx */ 219 NULL, /* Sy */ 220 NULL, /* Tn */ 221 NULL, /* Ux */ 222 NULL, /* Xc */ 223 NULL, /* Xo */ 224 pmdoc_Fo, /* Fo */ 225 NULL, /* Fc */ 226 NULL, /* Oo */ 227 NULL, /* Oc */ 228 NULL, /* Bk */ 229 NULL, /* Ek */ 230 NULL, /* Bt */ 231 NULL, /* Hf */ 232 NULL, /* Fr */ 233 NULL, /* Ud */ 234 NULL, /* Lb */ 235 NULL, /* Lp */ 236 NULL, /* Lk */ 237 NULL, /* Mt */ 238 NULL, /* Brq */ 239 NULL, /* Bro */ 240 NULL, /* Brc */ 241 NULL, /* %C */ 242 NULL, /* Es */ 243 NULL, /* En */ 244 NULL, /* Dx */ 245 NULL, /* %Q */ 246 NULL, /* br */ 247 NULL, /* sp */ 248 NULL, /* %U */ 249 NULL, /* Ta */ 250 }; 251 252 static const char *progname; 253 254 int 255 main(int argc, char *argv[]) 256 { 257 struct mparse *mp; /* parse sequence */ 258 enum op op; /* current operation */ 259 const char *dir; 260 char ibuf[MAXPATHLEN], /* index fname */ 261 fbuf[MAXPATHLEN]; /* btree fname */ 262 int verb, /* output verbosity */ 263 ch, i, flags; 264 DB *idx, /* index database */ 265 *db, /* keyword database */ 266 *hash; /* temporary keyword hashtable */ 267 BTREEINFO info; /* btree configuration */ 268 recno_t maxrec; /* supremum of all records */ 269 recno_t *recs; /* buffer of empty records */ 270 size_t sz1, sz2, 271 recsz, /* buffer size of recs */ 272 reccur; /* valid number of recs */ 273 struct buf buf, /* keyword buffer */ 274 dbuf; /* description buffer */ 275 struct of *of; /* list of files for processing */ 276 extern int optind; 277 extern char *optarg; 278 279 progname = strrchr(argv[0], '/'); 280 if (progname == NULL) 281 progname = argv[0]; 282 else 283 ++progname; 284 285 verb = 0; 286 of = NULL; 287 db = idx = NULL; 288 mp = NULL; 289 hash = NULL; 290 recs = NULL; 291 recsz = reccur = 0; 292 maxrec = 0; 293 op = OP_NEW; 294 dir = NULL; 295 296 while (-1 != (ch = getopt(argc, argv, "d:u:v"))) 297 switch (ch) { 298 case ('d'): 299 dir = optarg; 300 op = OP_UPDATE; 301 break; 302 case ('u'): 303 dir = optarg; 304 op = OP_DELETE; 305 break; 306 case ('v'): 307 verb++; 308 break; 309 default: 310 usage(); 311 return((int)MANDOCLEVEL_BADARG); 312 } 313 314 argc -= optind; 315 argv += optind; 316 317 memset(&info, 0, sizeof(BTREEINFO)); 318 info.flags = R_DUP; 319 320 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 321 322 memset(&buf, 0, sizeof(struct buf)); 323 memset(&dbuf, 0, sizeof(struct buf)); 324 325 buf.size = dbuf.size = MANDOC_BUFSZ; 326 327 buf.cp = mandoc_malloc(buf.size); 328 dbuf.cp = mandoc_malloc(dbuf.size); 329 330 flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR; 331 332 if (OP_UPDATE == op || OP_DELETE == op) { 333 ibuf[0] = fbuf[0] = '\0'; 334 335 strlcat(fbuf, dir, MAXPATHLEN); 336 strlcat(fbuf, "/", MAXPATHLEN); 337 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); 338 339 strlcat(ibuf, dir, MAXPATHLEN); 340 strlcat(ibuf, "/", MAXPATHLEN); 341 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); 342 343 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 344 fprintf(stderr, "%s: Path too long\n", dir); 345 exit((int)MANDOCLEVEL_BADARG); 346 } 347 348 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); 349 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); 350 351 if (NULL == db) { 352 perror(fbuf); 353 exit((int)MANDOCLEVEL_SYSERR); 354 } else if (NULL == db) { 355 perror(ibuf); 356 exit((int)MANDOCLEVEL_SYSERR); 357 } 358 359 if (verb > 2) { 360 printf("%s: Opened\n", fbuf); 361 printf("%s: Opened\n", ibuf); 362 } 363 364 ofile_argbuild(argv, argc, verb, &of); 365 if (NULL == of) 366 goto out; 367 368 of = of->first; 369 370 index_prune(of, db, fbuf, idx, ibuf, verb, 371 &maxrec, &recs, &recsz); 372 373 if (OP_UPDATE == op) 374 index_merge(of, mp, &dbuf, &buf, hash, 375 db, fbuf, idx, ibuf, verb, 376 maxrec, recs, reccur); 377 378 goto out; 379 } 380 381 for (i = 0; i < argc; i++) { 382 ibuf[0] = fbuf[0] = '\0'; 383 384 strlcat(fbuf, argv[i], MAXPATHLEN); 385 strlcat(fbuf, "/", MAXPATHLEN); 386 sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN); 387 388 strlcat(ibuf, argv[i], MAXPATHLEN); 389 strlcat(ibuf, "/", MAXPATHLEN); 390 sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); 391 392 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 393 fprintf(stderr, "%s: Path too long\n", argv[i]); 394 exit((int)MANDOCLEVEL_BADARG); 395 } 396 397 db = dbopen(fbuf, flags, 0644, DB_BTREE, &info); 398 idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL); 399 400 if (NULL == db) { 401 perror(fbuf); 402 exit((int)MANDOCLEVEL_SYSERR); 403 } else if (NULL == db) { 404 perror(ibuf); 405 exit((int)MANDOCLEVEL_SYSERR); 406 } 407 408 if (verb > 2) { 409 printf("%s: Truncated\n", fbuf); 410 printf("%s: Truncated\n", ibuf); 411 } 412 413 ofile_free(of); 414 of = NULL; 415 416 if ( ! ofile_dirbuild(argv[i], verb, &of)) 417 exit((int)MANDOCLEVEL_SYSERR); 418 419 if (NULL == of) 420 continue; 421 422 of = of->first; 423 424 index_merge(of, mp, &dbuf, &buf, hash, db, fbuf, 425 idx, ibuf, verb, maxrec, recs, reccur); 426 } 427 428 out: 429 if (db) 430 (*db->close)(db); 431 if (idx) 432 (*idx->close)(idx); 433 if (hash) 434 (*hash->close)(hash); 435 if (mp) 436 mparse_free(mp); 437 438 ofile_free(of); 439 free(buf.cp); 440 free(dbuf.cp); 441 free(recs); 442 443 return(MANDOCLEVEL_OK); 444 } 445 446 void 447 index_merge(const struct of *of, struct mparse *mp, 448 struct buf *dbuf, struct buf *buf, 449 DB *hash, DB *db, const char *dbf, 450 DB *idx, const char *idxf, int verb, 451 recno_t maxrec, const recno_t *recs, size_t reccur) 452 { 453 recno_t rec; 454 int ch; 455 DBT key, val; 456 struct mdoc *mdoc; 457 struct man *man; 458 const char *fn, *msec, *mtitle, *arch; 459 size_t sv; 460 unsigned seq; 461 char vbuf[8]; 462 463 for (rec = 0; of; of = of->next) { 464 fn = of->fname; 465 if (reccur > 0) { 466 --reccur; 467 rec = recs[(int)reccur]; 468 } else if (maxrec > 0) { 469 rec = maxrec; 470 maxrec = 0; 471 } else 472 rec++; 473 474 mparse_reset(mp); 475 hash_reset(&hash); 476 477 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { 478 fprintf(stderr, "%s: Parse failure\n", fn); 479 continue; 480 } 481 482 mparse_result(mp, &mdoc, &man); 483 if (NULL == mdoc && NULL == man) 484 continue; 485 486 msec = NULL != mdoc ? 487 mdoc_meta(mdoc)->msec : man_meta(man)->msec; 488 mtitle = NULL != mdoc ? 489 mdoc_meta(mdoc)->title : man_meta(man)->title; 490 arch = NULL != mdoc ? 491 mdoc_meta(mdoc)->arch : NULL; 492 493 if (NULL == arch) 494 arch = ""; 495 496 /* 497 * The index record value consists of a nil-terminated 498 * filename, a nil-terminated manual section, and a 499 * nil-terminated description. Since the description 500 * may not be set, we set a sentinel to see if we're 501 * going to write a nil byte in its place. 502 */ 503 504 dbuf->len = 0; 505 buf_appendb(dbuf, fn, strlen(fn) + 1); 506 buf_appendb(dbuf, msec, strlen(msec) + 1); 507 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); 508 buf_appendb(dbuf, arch, strlen(arch) + 1); 509 510 sv = dbuf->len; 511 512 /* Fix the record number in the btree value. */ 513 514 if (mdoc) 515 pmdoc_node(hash, buf, dbuf, 516 mdoc_node(mdoc), mdoc_meta(mdoc)); 517 else 518 pman_node(hash, buf, dbuf, man_node(man)); 519 520 /* 521 * Copy from the in-memory hashtable of pending keywords 522 * into the database. 523 */ 524 525 memset(vbuf, 0, sizeof(uint32_t)); 526 memcpy(vbuf + 4, &rec, sizeof(uint32_t)); 527 528 seq = R_FIRST; 529 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 530 seq = R_NEXT; 531 532 memcpy(vbuf, val.data, sizeof(uint32_t)); 533 val.size = sizeof(vbuf); 534 val.data = vbuf; 535 536 if (verb > 1) 537 printf("%s: Added keyword: %s\n", 538 fn, (char *)key.data); 539 dbt_put(db, dbf, &key, &val); 540 } 541 if (ch < 0) { 542 perror("hash"); 543 exit((int)MANDOCLEVEL_SYSERR); 544 } 545 546 /* 547 * Apply to the index. If we haven't had a description 548 * set, put an empty one in now. 549 */ 550 551 if (dbuf->len == sv) 552 buf_appendb(dbuf, "", 1); 553 554 key.data = &rec; 555 key.size = sizeof(recno_t); 556 557 val.data = dbuf->cp; 558 val.size = dbuf->len; 559 560 if (verb) 561 printf("%s: Added index\n", fn); 562 dbt_put(idx, idxf, &key, &val); 563 } 564 } 565 566 /* 567 * Scan through all entries in the index file `idx' and prune those 568 * entries in `ofile'. 569 * Pruning consists of removing from `db', then invalidating the entry 570 * in `idx' (zeroing its value size). 571 */ 572 static void 573 index_prune(const struct of *ofile, DB *db, const char *dbf, 574 DB *idx, const char *idxf, int verb, 575 recno_t *maxrec, recno_t **recs, size_t *recsz) 576 { 577 const struct of *of; 578 const char *fn; 579 unsigned seq, sseq; 580 DBT key, val; 581 size_t reccur; 582 int ch; 583 584 reccur = 0; 585 seq = R_FIRST; 586 while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) { 587 seq = R_NEXT; 588 *maxrec = *(recno_t *)key.data; 589 if (0 == val.size) { 590 if (reccur >= *recsz) { 591 *recsz += MANDOC_SLOP; 592 *recs = mandoc_realloc(*recs, 593 *recsz * sizeof(recno_t)); 594 } 595 (*recs)[(int)reccur] = *maxrec; 596 reccur++; 597 continue; 598 } 599 600 fn = (char *)val.data; 601 for (of = ofile; of; of = of->next) 602 if (0 == strcmp(fn, of->fname)) 603 break; 604 605 if (NULL == of) 606 continue; 607 608 sseq = R_FIRST; 609 while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) { 610 sseq = R_NEXT; 611 assert(8 == val.size); 612 if (*maxrec != *(recno_t *)(val.data + 4)) 613 continue; 614 if (verb) 615 printf("%s: Deleted keyword: %s\n", 616 fn, (char *)key.data); 617 ch = (*db->del)(db, &key, R_CURSOR); 618 if (ch < 0) 619 break; 620 } 621 if (ch < 0) { 622 perror(dbf); 623 exit((int)MANDOCLEVEL_SYSERR); 624 } 625 626 if (verb) 627 printf("%s: Deleted index\n", fn); 628 629 val.size = 0; 630 ch = (*idx->put)(idx, &key, &val, R_CURSOR); 631 if (ch < 0) { 632 perror(idxf); 633 exit((int)MANDOCLEVEL_SYSERR); 634 } 635 636 if (reccur >= *recsz) { 637 *recsz += MANDOC_SLOP; 638 *recs = mandoc_realloc 639 (*recs, *recsz * sizeof(recno_t)); 640 } 641 642 (*recs)[(int)reccur] = *maxrec; 643 reccur++; 644 } 645 (*maxrec)++; 646 } 647 648 /* 649 * Grow the buffer (if necessary) and copy in a binary string. 650 */ 651 static void 652 buf_appendb(struct buf *buf, const void *cp, size_t sz) 653 { 654 655 /* Overshoot by MANDOC_BUFSZ. */ 656 657 while (buf->len + sz >= buf->size) { 658 buf->size = buf->len + sz + MANDOC_BUFSZ; 659 buf->cp = mandoc_realloc(buf->cp, buf->size); 660 } 661 662 memcpy(buf->cp + (int)buf->len, cp, sz); 663 buf->len += sz; 664 } 665 666 /* 667 * Append a nil-terminated string to the buffer. 668 * This can be invoked multiple times. 669 * The buffer string will be nil-terminated. 670 * If invoked multiple times, a space is put between strings. 671 */ 672 static void 673 buf_append(struct buf *buf, const char *cp) 674 { 675 size_t sz; 676 677 if (0 == (sz = strlen(cp))) 678 return; 679 680 if (buf->len) 681 buf->cp[(int)buf->len - 1] = ' '; 682 683 buf_appendb(buf, cp, sz + 1); 684 } 685 686 /* 687 * Recursively add all text from a given node. 688 * This is optimised for general mdoc nodes in this context, which do 689 * not consist of subexpressions and having a recursive call for n->next 690 * would be wasteful. 691 * The "f" variable should be 0 unless called from pmdoc_Nd for the 692 * description buffer, which does not start at the beginning of the 693 * buffer. 694 */ 695 static void 696 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 697 { 698 699 for ( ; n; n = n->next) { 700 if (n->child) 701 buf_appendmdoc(buf, n->child, f); 702 703 if (MDOC_TEXT == n->type && f) { 704 f = 0; 705 buf_appendb(buf, n->string, 706 strlen(n->string) + 1); 707 } else if (MDOC_TEXT == n->type) 708 buf_append(buf, n->string); 709 710 } 711 } 712 713 /* ARGSUSED */ 714 static void 715 pmdoc_An(MDOC_ARGS) 716 { 717 718 if (SEC_AUTHORS != n->sec) 719 return; 720 721 buf_appendmdoc(buf, n->child, 0); 722 hash_put(hash, buf, TYPE_AUTHOR); 723 } 724 725 static void 726 hash_reset(DB **db) 727 { 728 DB *hash; 729 730 if (NULL != (hash = *db)) 731 (*hash->close)(hash); 732 733 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 734 if (NULL == *db) { 735 perror("hash"); 736 exit((int)MANDOCLEVEL_SYSERR); 737 } 738 } 739 740 /* ARGSUSED */ 741 static void 742 pmdoc_Fd(MDOC_ARGS) 743 { 744 const char *start, *end; 745 size_t sz; 746 747 if (SEC_SYNOPSIS != n->sec) 748 return; 749 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 750 return; 751 752 /* 753 * Only consider those `Fd' macro fields that begin with an 754 * "inclusion" token (versus, e.g., #define). 755 */ 756 if (strcmp("#include", n->string)) 757 return; 758 759 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 760 return; 761 762 /* 763 * Strip away the enclosing angle brackets and make sure we're 764 * not zero-length. 765 */ 766 767 start = n->string; 768 if ('<' == *start || '"' == *start) 769 start++; 770 771 if (0 == (sz = strlen(start))) 772 return; 773 774 end = &start[(int)sz - 1]; 775 if ('>' == *end || '"' == *end) 776 end--; 777 778 assert(end >= start); 779 780 buf_appendb(buf, start, (size_t)(end - start + 1)); 781 buf_appendb(buf, "", 1); 782 783 hash_put(hash, buf, TYPE_INCLUDES); 784 } 785 786 /* ARGSUSED */ 787 static void 788 pmdoc_Cd(MDOC_ARGS) 789 { 790 791 if (SEC_SYNOPSIS != n->sec) 792 return; 793 794 buf_appendmdoc(buf, n->child, 0); 795 hash_put(hash, buf, TYPE_CONFIG); 796 } 797 798 /* ARGSUSED */ 799 static void 800 pmdoc_In(MDOC_ARGS) 801 { 802 803 if (SEC_SYNOPSIS != n->sec) 804 return; 805 if (NULL == n->child || MDOC_TEXT != n->child->type) 806 return; 807 808 buf_append(buf, n->child->string); 809 hash_put(hash, buf, TYPE_INCLUDES); 810 } 811 812 /* ARGSUSED */ 813 static void 814 pmdoc_Fn(MDOC_ARGS) 815 { 816 const char *cp; 817 818 if (SEC_SYNOPSIS != n->sec) 819 return; 820 if (NULL == n->child || MDOC_TEXT != n->child->type) 821 return; 822 823 /* .Fn "struct type *arg" "foo" */ 824 825 cp = strrchr(n->child->string, ' '); 826 if (NULL == cp) 827 cp = n->child->string; 828 829 /* Strip away pointer symbol. */ 830 831 while ('*' == *cp) 832 cp++; 833 834 buf_append(buf, cp); 835 hash_put(hash, buf, TYPE_FUNCTION); 836 } 837 838 /* ARGSUSED */ 839 static void 840 pmdoc_St(MDOC_ARGS) 841 { 842 843 if (SEC_STANDARDS != n->sec) 844 return; 845 if (NULL == n->child || MDOC_TEXT != n->child->type) 846 return; 847 848 buf_append(buf, n->child->string); 849 hash_put(hash, buf, TYPE_STANDARD); 850 } 851 852 /* ARGSUSED */ 853 static void 854 pmdoc_Xr(MDOC_ARGS) 855 { 856 857 if (NULL == (n = n->child)) 858 return; 859 860 buf_appendb(buf, n->string, strlen(n->string)); 861 862 if (NULL != (n = n->next)) { 863 buf_appendb(buf, ".", 1); 864 buf_appendb(buf, n->string, strlen(n->string) + 1); 865 } else 866 buf_appendb(buf, ".", 2); 867 868 hash_put(hash, buf, TYPE_XREF); 869 } 870 871 /* ARGSUSED */ 872 static void 873 pmdoc_Vt(MDOC_ARGS) 874 { 875 const char *start; 876 size_t sz; 877 878 if (SEC_SYNOPSIS != n->sec) 879 return; 880 if (MDOC_Vt == n->tok && MDOC_BODY != n->type) 881 return; 882 if (NULL == n->last || MDOC_TEXT != n->last->type) 883 return; 884 885 /* 886 * Strip away leading pointer symbol '*' and trailing ';'. 887 */ 888 889 start = n->last->string; 890 891 while ('*' == *start) 892 start++; 893 894 if (0 == (sz = strlen(start))) 895 return; 896 897 if (';' == start[(int)sz - 1]) 898 sz--; 899 900 if (0 == sz) 901 return; 902 903 buf_appendb(buf, start, sz); 904 buf_appendb(buf, "", 1); 905 hash_put(hash, buf, TYPE_VARIABLE); 906 } 907 908 /* ARGSUSED */ 909 static void 910 pmdoc_Fo(MDOC_ARGS) 911 { 912 913 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 914 return; 915 if (NULL == n->child || MDOC_TEXT != n->child->type) 916 return; 917 918 buf_append(buf, n->child->string); 919 hash_put(hash, buf, TYPE_FUNCTION); 920 } 921 922 923 /* ARGSUSED */ 924 static void 925 pmdoc_Nd(MDOC_ARGS) 926 { 927 928 if (MDOC_BODY != n->type) 929 return; 930 931 buf_appendmdoc(dbuf, n->child, 1); 932 buf_appendmdoc(buf, n->child, 0); 933 934 hash_put(hash, buf, TYPE_DESC); 935 } 936 937 /* ARGSUSED */ 938 static void 939 pmdoc_Er(MDOC_ARGS) 940 { 941 942 if (SEC_ERRORS != n->sec) 943 return; 944 945 buf_appendmdoc(buf, n->child, 0); 946 hash_put(hash, buf, TYPE_ERR); 947 } 948 949 /* ARGSUSED */ 950 static void 951 pmdoc_Ev(MDOC_ARGS) 952 { 953 954 if (SEC_ENVIRONMENT != n->sec) 955 return; 956 957 buf_appendmdoc(buf, n->child, 0); 958 hash_put(hash, buf, TYPE_ENV); 959 } 960 961 /* ARGSUSED */ 962 static void 963 pmdoc_Pa(MDOC_ARGS) 964 { 965 966 if (SEC_FILES != n->sec) 967 return; 968 969 buf_appendmdoc(buf, n->child, 0); 970 hash_put(hash, buf, TYPE_PATH); 971 } 972 973 /* ARGSUSED */ 974 static void 975 pmdoc_Nm(MDOC_ARGS) 976 { 977 978 if (SEC_NAME == n->sec) { 979 buf_appendmdoc(buf, n->child, 0); 980 hash_put(hash, buf, TYPE_NAME); 981 return; 982 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 983 return; 984 985 if (NULL == n->child) 986 buf_append(buf, m->name); 987 988 buf_appendmdoc(buf, n->child, 0); 989 hash_put(hash, buf, TYPE_UTILITY); 990 } 991 992 static void 993 hash_put(DB *db, const struct buf *buf, int mask) 994 { 995 DBT key, val; 996 int rc; 997 998 if (buf->len < 2) 999 return; 1000 1001 key.data = buf->cp; 1002 key.size = buf->len; 1003 1004 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1005 perror("hash"); 1006 exit((int)MANDOCLEVEL_SYSERR); 1007 } else if (0 == rc) 1008 mask |= *(int *)val.data; 1009 1010 val.data = &mask; 1011 val.size = sizeof(int); 1012 1013 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1014 perror("hash"); 1015 exit((int)MANDOCLEVEL_SYSERR); 1016 } 1017 } 1018 1019 static void 1020 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1021 { 1022 1023 assert(key->size); 1024 assert(val->size); 1025 1026 if (0 == (*db->put)(db, key, val, 0)) 1027 return; 1028 1029 perror(dbn); 1030 exit((int)MANDOCLEVEL_SYSERR); 1031 /* NOTREACHED */ 1032 } 1033 1034 /* 1035 * Call out to per-macro handlers after clearing the persistent database 1036 * key. If the macro sets the database key, flush it to the database. 1037 */ 1038 static void 1039 pmdoc_node(MDOC_ARGS) 1040 { 1041 1042 if (NULL == n) 1043 return; 1044 1045 switch (n->type) { 1046 case (MDOC_HEAD): 1047 /* FALLTHROUGH */ 1048 case (MDOC_BODY): 1049 /* FALLTHROUGH */ 1050 case (MDOC_TAIL): 1051 /* FALLTHROUGH */ 1052 case (MDOC_BLOCK): 1053 /* FALLTHROUGH */ 1054 case (MDOC_ELEM): 1055 if (NULL == mdocs[n->tok]) 1056 break; 1057 1058 buf->len = 0; 1059 (*mdocs[n->tok])(hash, buf, dbuf, n, m); 1060 break; 1061 default: 1062 break; 1063 } 1064 1065 pmdoc_node(hash, buf, dbuf, n->child, m); 1066 pmdoc_node(hash, buf, dbuf, n->next, m); 1067 } 1068 1069 static int 1070 pman_node(MAN_ARGS) 1071 { 1072 const struct man_node *head, *body; 1073 const char *start, *sv; 1074 size_t sz; 1075 1076 if (NULL == n) 1077 return(0); 1078 1079 /* 1080 * We're only searching for one thing: the first text child in 1081 * the BODY of a NAME section. Since we don't keep track of 1082 * sections in -man, run some hoops to find out whether we're in 1083 * the correct section or not. 1084 */ 1085 1086 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1087 body = n; 1088 assert(body->parent); 1089 if (NULL != (head = body->parent->head) && 1090 1 == head->nchild && 1091 NULL != (head = (head->child)) && 1092 MAN_TEXT == head->type && 1093 0 == strcmp(head->string, "NAME") && 1094 NULL != (body = body->child) && 1095 MAN_TEXT == body->type) { 1096 1097 assert(body->string); 1098 start = sv = body->string; 1099 1100 /* 1101 * Go through a special heuristic dance here. 1102 * This is why -man manuals are great! 1103 * (I'm being sarcastic: my eyes are bleeding.) 1104 * Conventionally, one or more manual names are 1105 * comma-specified prior to a whitespace, then a 1106 * dash, then a description. Try to puzzle out 1107 * the name parts here. 1108 */ 1109 1110 for ( ;; ) { 1111 sz = strcspn(start, " ,"); 1112 if ('\0' == start[(int)sz]) 1113 break; 1114 1115 buf->len = 0; 1116 buf_appendb(buf, start, sz); 1117 buf_appendb(buf, "", 1); 1118 1119 hash_put(hash, buf, TYPE_NAME); 1120 1121 if (' ' == start[(int)sz]) { 1122 start += (int)sz + 1; 1123 break; 1124 } 1125 1126 assert(',' == start[(int)sz]); 1127 start += (int)sz + 1; 1128 while (' ' == *start) 1129 start++; 1130 } 1131 1132 buf->len = 0; 1133 1134 if (sv == start) { 1135 buf_append(buf, start); 1136 return(1); 1137 } 1138 1139 while (' ' == *start) 1140 start++; 1141 1142 if (0 == strncmp(start, "-", 1)) 1143 start += 1; 1144 else if (0 == strncmp(start, "\\-", 2)) 1145 start += 2; 1146 else if (0 == strncmp(start, "\\(en", 4)) 1147 start += 4; 1148 else if (0 == strncmp(start, "\\(em", 4)) 1149 start += 4; 1150 1151 while (' ' == *start) 1152 start++; 1153 1154 sz = strlen(start) + 1; 1155 buf_appendb(dbuf, start, sz); 1156 buf_appendb(buf, start, sz); 1157 1158 hash_put(hash, buf, TYPE_DESC); 1159 } 1160 } 1161 1162 if (pman_node(hash, buf, dbuf, n->child)) 1163 return(1); 1164 if (pman_node(hash, buf, dbuf, n->next)) 1165 return(1); 1166 1167 return(0); 1168 } 1169 1170 static void 1171 ofile_argbuild(char *argv[], int argc, int verb, struct of **of) 1172 { 1173 int i; 1174 struct of *nof; 1175 1176 for (i = 0; i < argc; i++) { 1177 nof = mandoc_calloc(1, sizeof(struct of)); 1178 nof->fname = strdup(argv[i]); 1179 if (verb > 2) 1180 printf("%s: Scheduling\n", argv[i]); 1181 if (NULL == *of) { 1182 *of = nof; 1183 (*of)->first = nof; 1184 } else { 1185 nof->first = (*of)->first; 1186 (*of)->next = nof; 1187 *of = nof; 1188 } 1189 } 1190 } 1191 1192 /* 1193 * Recursively build up a list of files to parse. 1194 * We use this instead of ftw() and so on because I don't want global 1195 * variables hanging around. 1196 * This ignores the mandoc.db and mandoc.index files, but assumes that 1197 * everything else is a manual. 1198 * Pass in a pointer to a NULL structure for the first invocation. 1199 */ 1200 static int 1201 ofile_dirbuild(const char *dir, int verb, struct of **of) 1202 { 1203 char buf[MAXPATHLEN]; 1204 size_t sz; 1205 DIR *d; 1206 const char *fn; 1207 struct of *nof; 1208 struct dirent *dp; 1209 1210 if (NULL == (d = opendir(dir))) { 1211 perror(dir); 1212 return(0); 1213 } 1214 1215 while (NULL != (dp = readdir(d))) { 1216 fn = dp->d_name; 1217 if (DT_DIR == dp->d_type) { 1218 if (0 == strcmp(".", fn)) 1219 continue; 1220 if (0 == strcmp("..", fn)) 1221 continue; 1222 1223 buf[0] = '\0'; 1224 strlcat(buf, dir, MAXPATHLEN); 1225 strlcat(buf, "/", MAXPATHLEN); 1226 sz = strlcat(buf, fn, MAXPATHLEN); 1227 1228 if (sz < MAXPATHLEN) { 1229 if ( ! ofile_dirbuild(buf, verb, of)) 1230 return(0); 1231 continue; 1232 } else if (sz < MAXPATHLEN) 1233 continue; 1234 1235 fprintf(stderr, "%s: Path too long\n", dir); 1236 return(0); 1237 } 1238 if (DT_REG != dp->d_type) 1239 continue; 1240 1241 if (0 == strcmp(MANDOC_DB, fn) || 1242 0 == strcmp(MANDOC_IDX, fn)) 1243 continue; 1244 1245 buf[0] = '\0'; 1246 strlcat(buf, dir, MAXPATHLEN); 1247 strlcat(buf, "/", MAXPATHLEN); 1248 sz = strlcat(buf, fn, MAXPATHLEN); 1249 if (sz >= MAXPATHLEN) { 1250 fprintf(stderr, "%s: Path too long\n", dir); 1251 return(0); 1252 } 1253 1254 nof = mandoc_calloc(1, sizeof(struct of)); 1255 nof->fname = mandoc_strdup(buf); 1256 1257 if (verb > 2) 1258 printf("%s: Scheduling\n", buf); 1259 1260 if (NULL == *of) { 1261 *of = nof; 1262 (*of)->first = nof; 1263 } else { 1264 nof->first = (*of)->first; 1265 (*of)->next = nof; 1266 *of = nof; 1267 } 1268 } 1269 1270 return(1); 1271 } 1272 1273 static void 1274 ofile_free(struct of *of) 1275 { 1276 struct of *nof; 1277 1278 while (of) { 1279 nof = of->next; 1280 free(of->fname); 1281 free(of); 1282 of = nof; 1283 } 1284 } 1285 1286 static void 1287 usage(void) 1288 { 1289 1290 fprintf(stderr, "usage: %s [-v] " 1291 "[-d dir [files...] |" 1292 " -u dir [files...] |" 1293 " dir...]\n", progname); 1294 } 1295