1 /* $Id: mandocdb.c,v 1.42 2012/05/24 23:33:23 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/param.h> 19 #include <sys/types.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <dirent.h> 24 #include <errno.h> 25 #include <fcntl.h> 26 #include <getopt.h> 27 #include <stdio.h> 28 #include <stdint.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <unistd.h> 32 #include <db.h> 33 34 #include "man.h" 35 #include "mdoc.h" 36 #include "mandoc.h" 37 #include "mandocdb.h" 38 #include "manpath.h" 39 40 #define MANDOC_BUFSZ BUFSIZ 41 #define MANDOC_SLOP 1024 42 43 #define MANDOC_SRC 0x1 44 #define MANDOC_FORM 0x2 45 46 /* Access to the mandoc database on disk. */ 47 48 struct mdb { 49 char idxn[MAXPATHLEN]; /* index db filename */ 50 char dbn[MAXPATHLEN]; /* keyword db filename */ 51 DB *idx; /* index recno database */ 52 DB *db; /* keyword btree database */ 53 }; 54 55 /* Stack of temporarily unused index records. */ 56 57 struct recs { 58 recno_t *stack; /* pointer to a malloc'ed array */ 59 size_t size; /* number of allocated slots */ 60 size_t cur; /* current number of empty records */ 61 recno_t last; /* last record number in the index */ 62 }; 63 64 /* Tiny list for files. No need to bring in QUEUE. */ 65 66 struct of { 67 char *fname; /* heap-allocated */ 68 char *sec; 69 char *arch; 70 char *title; 71 int src_form; 72 struct of *next; /* NULL for last one */ 73 struct of *first; /* first in list */ 74 }; 75 76 /* Buffer for storing growable data. */ 77 78 struct buf { 79 char *cp; 80 size_t len; /* current length */ 81 size_t size; /* total buffer size */ 82 }; 83 84 /* Operation we're going to perform. */ 85 86 enum op { 87 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 88 OP_CONFFILE, /* new databases from custom config file */ 89 OP_UPDATE, /* delete/add entries in existing database */ 90 OP_DELETE, /* delete entries from existing database */ 91 OP_TEST /* change no databases, report potential problems */ 92 }; 93 94 #define MAN_ARGS DB *hash, \ 95 struct buf *buf, \ 96 struct buf *dbuf, \ 97 const struct man_node *n 98 #define MDOC_ARGS DB *hash, \ 99 struct buf *buf, \ 100 struct buf *dbuf, \ 101 const struct mdoc_node *n, \ 102 const struct mdoc_meta *m 103 104 static void buf_appendmdoc(struct buf *, 105 const struct mdoc_node *, int); 106 static void buf_append(struct buf *, const char *); 107 static void buf_appendb(struct buf *, 108 const void *, size_t); 109 static void dbt_put(DB *, const char *, DBT *, DBT *); 110 static void hash_put(DB *, const struct buf *, uint64_t); 111 static void hash_reset(DB **); 112 static void index_merge(const struct of *, struct mparse *, 113 struct buf *, struct buf *, DB *, 114 struct mdb *, struct recs *); 115 static void index_prune(const struct of *, struct mdb *, 116 struct recs *); 117 static void ofile_argbuild(int, char *[], struct of **, 118 const char *); 119 static void ofile_dirbuild(const char *, const char *, 120 const char *, int, struct of **); 121 static void ofile_free(struct of *); 122 static void pformatted(DB *, struct buf *, 123 struct buf *, const struct of *); 124 static int pman_node(MAN_ARGS); 125 static void pmdoc_node(MDOC_ARGS); 126 static int pmdoc_head(MDOC_ARGS); 127 static int pmdoc_body(MDOC_ARGS); 128 static int pmdoc_Fd(MDOC_ARGS); 129 static int pmdoc_In(MDOC_ARGS); 130 static int pmdoc_Fn(MDOC_ARGS); 131 static int pmdoc_Nd(MDOC_ARGS); 132 static int pmdoc_Nm(MDOC_ARGS); 133 static int pmdoc_Sh(MDOC_ARGS); 134 static int pmdoc_St(MDOC_ARGS); 135 static int pmdoc_Xr(MDOC_ARGS); 136 137 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ 138 139 struct mdoc_handler { 140 int (*fp)(MDOC_ARGS); /* Optional handler. */ 141 uint64_t mask; /* Set unless handler returns 0. */ 142 int flags; /* For use by pmdoc_node. */ 143 }; 144 145 static const struct mdoc_handler mdocs[MDOC_MAX] = { 146 { NULL, 0, 0 }, /* Ap */ 147 { NULL, 0, 0 }, /* Dd */ 148 { NULL, 0, 0 }, /* Dt */ 149 { NULL, 0, 0 }, /* Os */ 150 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ 151 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ 152 { NULL, 0, 0 }, /* Pp */ 153 { NULL, 0, 0 }, /* D1 */ 154 { NULL, 0, 0 }, /* Dl */ 155 { NULL, 0, 0 }, /* Bd */ 156 { NULL, 0, 0 }, /* Ed */ 157 { NULL, 0, 0 }, /* Bl */ 158 { NULL, 0, 0 }, /* El */ 159 { NULL, 0, 0 }, /* It */ 160 { NULL, 0, 0 }, /* Ad */ 161 { NULL, TYPE_An, MDOCF_CHILD }, /* An */ 162 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ 163 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ 164 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ 165 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ 166 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ 167 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ 168 { NULL, 0, 0 }, /* Ex */ 169 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ 170 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ 171 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ 172 { pmdoc_Fn, 0, 0 }, /* Fn */ 173 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ 174 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ 175 { pmdoc_In, TYPE_In, 0 }, /* In */ 176 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ 177 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ 178 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ 179 { NULL, 0, 0 }, /* Op */ 180 { NULL, 0, 0 }, /* Ot */ 181 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ 182 { NULL, 0, 0 }, /* Rv */ 183 { pmdoc_St, TYPE_St, 0 }, /* St */ 184 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ 185 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ 186 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ 187 { NULL, 0, 0 }, /* %A */ 188 { NULL, 0, 0 }, /* %B */ 189 { NULL, 0, 0 }, /* %D */ 190 { NULL, 0, 0 }, /* %I */ 191 { NULL, 0, 0 }, /* %J */ 192 { NULL, 0, 0 }, /* %N */ 193 { NULL, 0, 0 }, /* %O */ 194 { NULL, 0, 0 }, /* %P */ 195 { NULL, 0, 0 }, /* %R */ 196 { NULL, 0, 0 }, /* %T */ 197 { NULL, 0, 0 }, /* %V */ 198 { NULL, 0, 0 }, /* Ac */ 199 { NULL, 0, 0 }, /* Ao */ 200 { NULL, 0, 0 }, /* Aq */ 201 { NULL, TYPE_At, MDOCF_CHILD }, /* At */ 202 { NULL, 0, 0 }, /* Bc */ 203 { NULL, 0, 0 }, /* Bf */ 204 { NULL, 0, 0 }, /* Bo */ 205 { NULL, 0, 0 }, /* Bq */ 206 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ 207 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ 208 { NULL, 0, 0 }, /* Db */ 209 { NULL, 0, 0 }, /* Dc */ 210 { NULL, 0, 0 }, /* Do */ 211 { NULL, 0, 0 }, /* Dq */ 212 { NULL, 0, 0 }, /* Ec */ 213 { NULL, 0, 0 }, /* Ef */ 214 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ 215 { NULL, 0, 0 }, /* Eo */ 216 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ 217 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ 218 { NULL, 0, 0 }, /* No */ 219 { NULL, 0, 0 }, /* Ns */ 220 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ 221 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ 222 { NULL, 0, 0 }, /* Pc */ 223 { NULL, 0, 0 }, /* Pf */ 224 { NULL, 0, 0 }, /* Po */ 225 { NULL, 0, 0 }, /* Pq */ 226 { NULL, 0, 0 }, /* Qc */ 227 { NULL, 0, 0 }, /* Ql */ 228 { NULL, 0, 0 }, /* Qo */ 229 { NULL, 0, 0 }, /* Qq */ 230 { NULL, 0, 0 }, /* Re */ 231 { NULL, 0, 0 }, /* Rs */ 232 { NULL, 0, 0 }, /* Sc */ 233 { NULL, 0, 0 }, /* So */ 234 { NULL, 0, 0 }, /* Sq */ 235 { NULL, 0, 0 }, /* Sm */ 236 { NULL, 0, 0 }, /* Sx */ 237 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ 238 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ 239 { NULL, 0, 0 }, /* Ux */ 240 { NULL, 0, 0 }, /* Xc */ 241 { NULL, 0, 0 }, /* Xo */ 242 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ 243 { NULL, 0, 0 }, /* Fc */ 244 { NULL, 0, 0 }, /* Oo */ 245 { NULL, 0, 0 }, /* Oc */ 246 { NULL, 0, 0 }, /* Bk */ 247 { NULL, 0, 0 }, /* Ek */ 248 { NULL, 0, 0 }, /* Bt */ 249 { NULL, 0, 0 }, /* Hf */ 250 { NULL, 0, 0 }, /* Fr */ 251 { NULL, 0, 0 }, /* Ud */ 252 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ 253 { NULL, 0, 0 }, /* Lp */ 254 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ 255 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ 256 { NULL, 0, 0 }, /* Brq */ 257 { NULL, 0, 0 }, /* Bro */ 258 { NULL, 0, 0 }, /* Brc */ 259 { NULL, 0, 0 }, /* %C */ 260 { NULL, 0, 0 }, /* Es */ 261 { NULL, 0, 0 }, /* En */ 262 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ 263 { NULL, 0, 0 }, /* %Q */ 264 { NULL, 0, 0 }, /* br */ 265 { NULL, 0, 0 }, /* sp */ 266 { NULL, 0, 0 }, /* %U */ 267 { NULL, 0, 0 }, /* Ta */ 268 }; 269 270 static const char *progname; 271 static int use_all; /* Use all directories and files. */ 272 static int verb; /* Output verbosity level. */ 273 static int warnings; /* Potential problems in manuals. */ 274 275 int 276 mandocdb(int argc, char *argv[]) 277 { 278 struct mparse *mp; /* parse sequence */ 279 struct manpaths dirs; 280 struct mdb mdb; 281 struct recs recs; 282 enum op op; /* current operation */ 283 const char *dir; 284 char *cp; 285 char pbuf[PATH_MAX]; 286 int ch, i, flags; 287 DB *hash; /* temporary keyword hashtable */ 288 BTREEINFO info; /* btree configuration */ 289 size_t sz1, sz2; 290 struct buf buf, /* keyword buffer */ 291 dbuf; /* description buffer */ 292 struct of *of; /* list of files for processing */ 293 extern int optind; 294 extern char *optarg; 295 296 progname = strrchr(argv[0], '/'); 297 if (progname == NULL) 298 progname = argv[0]; 299 else 300 ++progname; 301 302 memset(&dirs, 0, sizeof(struct manpaths)); 303 memset(&mdb, 0, sizeof(struct mdb)); 304 memset(&recs, 0, sizeof(struct recs)); 305 306 of = NULL; 307 mp = NULL; 308 hash = NULL; 309 op = OP_DEFAULT; 310 dir = NULL; 311 312 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) 313 switch (ch) { 314 case ('a'): 315 use_all = 1; 316 break; 317 case ('C'): 318 if (op) { 319 fprintf(stderr, 320 "-C: conflicting options\n"); 321 goto usage; 322 } 323 dir = optarg; 324 op = OP_CONFFILE; 325 break; 326 case ('d'): 327 if (op) { 328 fprintf(stderr, 329 "-d: conflicting options\n"); 330 goto usage; 331 } 332 dir = optarg; 333 op = OP_UPDATE; 334 break; 335 case ('t'): 336 dup2(STDOUT_FILENO, STDERR_FILENO); 337 if (op) { 338 fprintf(stderr, 339 "-t: conflicting options\n"); 340 goto usage; 341 } 342 op = OP_TEST; 343 use_all = 1; 344 warnings = 1; 345 break; 346 case ('u'): 347 if (op) { 348 fprintf(stderr, 349 "-u: conflicting options\n"); 350 goto usage; 351 } 352 dir = optarg; 353 op = OP_DELETE; 354 break; 355 case ('v'): 356 verb++; 357 break; 358 case ('W'): 359 warnings = 1; 360 break; 361 default: 362 goto usage; 363 } 364 365 argc -= optind; 366 argv += optind; 367 368 if (OP_CONFFILE == op && argc > 0) { 369 fprintf(stderr, "-C: too many arguments\n"); 370 goto usage; 371 } 372 373 memset(&info, 0, sizeof(BTREEINFO)); 374 info.lorder = 4321; 375 info.flags = R_DUP; 376 377 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL); 378 379 memset(&buf, 0, sizeof(struct buf)); 380 memset(&dbuf, 0, sizeof(struct buf)); 381 382 buf.size = dbuf.size = MANDOC_BUFSZ; 383 384 buf.cp = mandoc_malloc(buf.size); 385 dbuf.cp = mandoc_malloc(dbuf.size); 386 387 if (OP_TEST == op) { 388 ofile_argbuild(argc, argv, &of, NULL); 389 if (NULL == of) 390 goto out; 391 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); 392 goto out; 393 } 394 395 if (OP_UPDATE == op || OP_DELETE == op) { 396 if (NULL == realpath(dir, pbuf)) { 397 perror(dir); 398 exit((int)MANDOCLEVEL_BADARG); 399 } 400 if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) { 401 fprintf(stderr, "%s: path too long\n", pbuf); 402 exit((int)MANDOCLEVEL_BADARG); 403 } 404 405 strlcat(mdb.dbn, pbuf, MAXPATHLEN); 406 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); 407 408 strlcat(mdb.idxn, pbuf, MAXPATHLEN); 409 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); 410 411 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 412 fprintf(stderr, "%s: path too long\n", mdb.idxn); 413 exit((int)MANDOCLEVEL_BADARG); 414 } 415 416 flags = O_CREAT | O_RDWR; 417 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 418 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 419 420 if (NULL == mdb.db) { 421 perror(mdb.dbn); 422 exit((int)MANDOCLEVEL_SYSERR); 423 } else if (NULL == mdb.idx) { 424 perror(mdb.idxn); 425 exit((int)MANDOCLEVEL_SYSERR); 426 } 427 428 ofile_argbuild(argc, argv, &of, pbuf); 429 430 if (NULL == of) 431 goto out; 432 433 index_prune(of, &mdb, &recs); 434 435 /* 436 * Go to the root of the respective manual tree. 437 * This must work or no manuals may be found (they're 438 * indexed relative to the root). 439 */ 440 441 if (OP_UPDATE == op) { 442 if (-1 == chdir(dir)) { 443 perror(dir); 444 exit((int)MANDOCLEVEL_SYSERR); 445 } 446 index_merge(of, mp, &dbuf, &buf, hash, 447 &mdb, &recs); 448 } 449 450 goto out; 451 } 452 453 /* 454 * Configure the directories we're going to scan. 455 * If we have command-line arguments, use them. 456 * If not, we use man(1)'s method (see mandocdb.8). 457 */ 458 459 if (argc > 0) { 460 dirs.paths = mandoc_calloc(argc, sizeof(char *)); 461 dirs.sz = argc; 462 for (i = 0; i < argc; i++) { 463 if (NULL == (cp = realpath(argv[i], pbuf))) { 464 perror(argv[i]); 465 goto out; 466 } 467 dirs.paths[i] = mandoc_strdup(cp); 468 } 469 } else 470 manpath_parse(&dirs, dir, NULL, NULL); 471 472 for (i = 0; i < dirs.sz; i++) { 473 474 /* 475 * Go to the root of the respective manual tree. 476 * This must work or no manuals may be found: 477 * They are indexed relative to the root. 478 */ 479 480 if (-1 == chdir(dirs.paths[i])) { 481 perror(dirs.paths[i]); 482 exit((int)MANDOCLEVEL_SYSERR); 483 } 484 485 /* Create a new database in two temporary files. */ 486 487 flags = O_CREAT | O_EXCL | O_RDWR; 488 while (NULL == mdb.db) { 489 strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN); 490 strlcat(mdb.dbn, ".XXXXXXXXXX", MAXPATHLEN); 491 if (NULL == mktemp(mdb.dbn)) { 492 perror(mdb.dbn); 493 exit((int)MANDOCLEVEL_SYSERR); 494 } 495 mdb.db = dbopen(mdb.dbn, flags, 0644, 496 DB_BTREE, &info); 497 if (NULL == mdb.db && EEXIST != errno) { 498 perror(mdb.dbn); 499 exit((int)MANDOCLEVEL_SYSERR); 500 } 501 } 502 while (NULL == mdb.idx) { 503 strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN); 504 strlcat(mdb.idxn, ".XXXXXXXXXX", MAXPATHLEN); 505 if (NULL == mktemp(mdb.idxn)) { 506 perror(mdb.idxn); 507 unlink(mdb.dbn); 508 exit((int)MANDOCLEVEL_SYSERR); 509 } 510 mdb.idx = dbopen(mdb.idxn, flags, 0644, 511 DB_RECNO, NULL); 512 if (NULL == mdb.idx && EEXIST != errno) { 513 perror(mdb.idxn); 514 unlink(mdb.dbn); 515 exit((int)MANDOCLEVEL_SYSERR); 516 } 517 } 518 519 /* 520 * Search for manuals and fill the new database. 521 */ 522 523 ofile_dirbuild(".", "", "", 0, &of); 524 525 if (NULL != of) { 526 index_merge(of, mp, &dbuf, &buf, hash, 527 &mdb, &recs); 528 ofile_free(of); 529 of = NULL; 530 } 531 532 (*mdb.db->close)(mdb.db); 533 (*mdb.idx->close)(mdb.idx); 534 mdb.db = NULL; 535 mdb.idx = NULL; 536 537 /* 538 * Replace the old database with the new one. 539 * This is not perfectly atomic, 540 * but i cannot think of a better way. 541 */ 542 543 if (-1 == rename(mdb.dbn, MANDOC_DB)) { 544 perror(MANDOC_DB); 545 unlink(mdb.dbn); 546 unlink(mdb.idxn); 547 exit((int)MANDOCLEVEL_SYSERR); 548 } 549 if (-1 == rename(mdb.idxn, MANDOC_IDX)) { 550 perror(MANDOC_IDX); 551 unlink(MANDOC_DB); 552 unlink(MANDOC_IDX); 553 unlink(mdb.idxn); 554 exit((int)MANDOCLEVEL_SYSERR); 555 } 556 } 557 558 out: 559 if (mdb.db) 560 (*mdb.db->close)(mdb.db); 561 if (mdb.idx) 562 (*mdb.idx->close)(mdb.idx); 563 if (hash) 564 (*hash->close)(hash); 565 if (mp) 566 mparse_free(mp); 567 568 manpath_free(&dirs); 569 ofile_free(of); 570 free(buf.cp); 571 free(dbuf.cp); 572 free(recs.stack); 573 574 return(MANDOCLEVEL_OK); 575 576 usage: 577 fprintf(stderr, 578 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n" 579 " -d dir [file ...] | " 580 "-u dir [file ...]\n", 581 progname); 582 583 return((int)MANDOCLEVEL_BADARG); 584 } 585 586 void 587 index_merge(const struct of *of, struct mparse *mp, 588 struct buf *dbuf, struct buf *buf, DB *hash, 589 struct mdb *mdb, struct recs *recs) 590 { 591 recno_t rec; 592 int ch, skip; 593 DBT key, val; 594 DB *files; /* temporary file name table */ 595 struct mdoc *mdoc; 596 struct man *man; 597 const char *fn, *msec, *march, *mtitle; 598 char *p; 599 uint64_t mask; 600 size_t sv; 601 unsigned seq; 602 uint64_t vbuf[2]; 603 char type; 604 605 if (warnings) { 606 files = NULL; 607 hash_reset(&files); 608 } 609 610 rec = 0; 611 for (of = of->first; of; of = of->next) { 612 fn = of->fname; 613 614 /* 615 * Try interpreting the file as mdoc(7) or man(7) 616 * source code, unless it is already known to be 617 * formatted. Fall back to formatted mode. 618 */ 619 620 mparse_reset(mp); 621 mdoc = NULL; 622 man = NULL; 623 624 if ((MANDOC_SRC & of->src_form || 625 ! (MANDOC_FORM & of->src_form)) && 626 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) 627 mparse_result(mp, &mdoc, &man); 628 629 if (NULL != mdoc) { 630 msec = mdoc_meta(mdoc)->msec; 631 march = mdoc_meta(mdoc)->arch; 632 if (NULL == march) 633 march = ""; 634 mtitle = mdoc_meta(mdoc)->title; 635 } else if (NULL != man) { 636 msec = man_meta(man)->msec; 637 march = ""; 638 mtitle = man_meta(man)->title; 639 } else { 640 msec = of->sec; 641 march = of->arch; 642 mtitle = of->title; 643 } 644 645 /* 646 * Check whether the manual section given in a file 647 * agrees with the directory where the file is located. 648 * Some manuals have suffixes like (3p) on their 649 * section number either inside the file or in the 650 * directory name, some are linked into more than one 651 * section, like encrypt(1) = makekey(8). Do not skip 652 * manuals for such reasons. 653 */ 654 655 skip = 0; 656 assert(of->sec); 657 assert(msec); 658 if (warnings) 659 if (strcasecmp(msec, of->sec)) 660 fprintf(stderr, "%s: " 661 "section \"%s\" manual " 662 "in \"%s\" directory\n", 663 fn, msec, of->sec); 664 665 /* 666 * Manual page directories exist for each kernel 667 * architecture as returned by machine(1). 668 * However, many manuals only depend on the 669 * application architecture as returned by arch(1). 670 * For example, some (2/ARM) manuals are shared 671 * across the "armish" and "zaurus" kernel 672 * architectures. 673 * A few manuals are even shared across completely 674 * different architectures, for example fdformat(1) 675 * on amd64, i386, sparc, and sparc64. 676 * Thus, warn about architecture mismatches, 677 * but don't skip manuals for this reason. 678 */ 679 680 assert(of->arch); 681 assert(march); 682 if (warnings) 683 if (strcasecmp(march, of->arch)) 684 fprintf(stderr, "%s: " 685 "architecture \"%s\" manual " 686 "in \"%s\" directory\n", 687 fn, march, of->arch); 688 689 /* 690 * By default, skip a file if the title given 691 * in the file disagrees with the file name. 692 * Do not warn, this happens for all MLINKs. 693 */ 694 695 assert(of->title); 696 assert(mtitle); 697 if (strcasecmp(mtitle, of->title)) 698 skip = 1; 699 700 /* 701 * Build a title string for the file. If it matches 702 * the location of the file, remember the title as 703 * found; else, remember it as missing. 704 */ 705 706 if (warnings) { 707 buf->len = 0; 708 buf_appendb(buf, mtitle, strlen(mtitle)); 709 buf_appendb(buf, "(", 1); 710 buf_appendb(buf, msec, strlen(msec)); 711 if ('\0' != *march) { 712 buf_appendb(buf, "/", 1); 713 buf_appendb(buf, march, strlen(march)); 714 } 715 buf_appendb(buf, ")", 2); 716 for (p = buf->cp; '\0' != *p; p++) 717 *p = tolower(*p); 718 key.data = buf->cp; 719 key.size = buf->len; 720 val.data = NULL; 721 val.size = 0; 722 if (0 == skip) 723 val.data = ""; 724 else { 725 ch = (*files->get)(files, &key, &val, 0); 726 if (ch < 0) { 727 perror("hash"); 728 exit((int)MANDOCLEVEL_SYSERR); 729 } else if (ch > 0) { 730 val.data = (void *)fn; 731 val.size = strlen(fn) + 1; 732 } else 733 val.data = NULL; 734 } 735 if (NULL != val.data && 736 (*files->put)(files, &key, &val, 0) < 0) { 737 perror("hash"); 738 exit((int)MANDOCLEVEL_SYSERR); 739 } 740 } 741 742 if (skip && !use_all) 743 continue; 744 745 /* 746 * The index record value consists of a nil-terminated 747 * filename, a nil-terminated manual section, and a 748 * nil-terminated description. Use the actual 749 * location of the file, such that the user can find 750 * it with man(1). Since the description may not be 751 * set, we set a sentinel to see if we're going to 752 * write a nil byte in its place. 753 */ 754 755 dbuf->len = 0; 756 type = mdoc ? 'd' : (man ? 'a' : 'c'); 757 buf_appendb(dbuf, &type, 1); 758 buf_appendb(dbuf, fn, strlen(fn) + 1); 759 buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); 760 buf_appendb(dbuf, of->title, strlen(of->title) + 1); 761 buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); 762 763 sv = dbuf->len; 764 765 /* 766 * Collect keyword/mask pairs. 767 * Each pair will become a new btree node. 768 */ 769 770 hash_reset(&hash); 771 if (mdoc) 772 pmdoc_node(hash, buf, dbuf, 773 mdoc_node(mdoc), mdoc_meta(mdoc)); 774 else if (man) 775 pman_node(hash, buf, dbuf, man_node(man)); 776 else 777 pformatted(hash, buf, dbuf, of); 778 779 /* Test mode, do not access any database. */ 780 781 if (NULL == mdb->db || NULL == mdb->idx) 782 continue; 783 784 /* 785 * Make sure the file name is always registered 786 * as an .Nm search key. 787 */ 788 buf->len = 0; 789 buf_append(buf, of->title); 790 hash_put(hash, buf, TYPE_Nm); 791 792 /* 793 * Reclaim an empty index record, if available. 794 * Use its record number for all new btree nodes. 795 */ 796 797 if (recs->cur > 0) { 798 recs->cur--; 799 rec = recs->stack[(int)recs->cur]; 800 } else if (recs->last > 0) { 801 rec = recs->last; 802 recs->last = 0; 803 } else 804 rec++; 805 vbuf[1] = htobe64(rec); 806 807 /* 808 * Copy from the in-memory hashtable of pending 809 * keyword/mask pairs into the database. 810 */ 811 812 seq = R_FIRST; 813 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 814 seq = R_NEXT; 815 assert(sizeof(uint64_t) == val.size); 816 memcpy(&mask, val.data, val.size); 817 vbuf[0] = htobe64(mask); 818 val.size = sizeof(vbuf); 819 val.data = &vbuf; 820 dbt_put(mdb->db, mdb->dbn, &key, &val); 821 } 822 if (ch < 0) { 823 perror("hash"); 824 unlink(mdb->dbn); 825 unlink(mdb->idxn); 826 exit((int)MANDOCLEVEL_SYSERR); 827 } 828 829 /* 830 * Apply to the index. If we haven't had a description 831 * set, put an empty one in now. 832 */ 833 834 if (dbuf->len == sv) 835 buf_appendb(dbuf, "", 1); 836 837 key.data = &rec; 838 key.size = sizeof(recno_t); 839 840 val.data = dbuf->cp; 841 val.size = dbuf->len; 842 843 if (verb) 844 printf("%s: adding to index\n", fn); 845 846 dbt_put(mdb->idx, mdb->idxn, &key, &val); 847 } 848 849 /* 850 * Iterate the remembered file titles and check that 851 * all files can be found by their main title. 852 */ 853 854 if (warnings) { 855 seq = R_FIRST; 856 while (0 == (*files->seq)(files, &key, &val, seq)) { 857 seq = R_NEXT; 858 if (val.size) 859 fprintf(stderr, "%s: probably " 860 "unreachable, title is %s\n", 861 (char *)val.data, (char *)key.data); 862 } 863 (*files->close)(files); 864 } 865 } 866 867 /* 868 * Scan through all entries in the index file `idx' and prune those 869 * entries in `ofile'. 870 * Pruning consists of removing from `db', then invalidating the entry 871 * in `idx' (zeroing its value size). 872 */ 873 static void 874 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) 875 { 876 const struct of *of; 877 const char *fn; 878 uint64_t vbuf[2]; 879 unsigned seq, sseq; 880 DBT key, val; 881 int ch; 882 883 recs->cur = 0; 884 seq = R_FIRST; 885 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { 886 seq = R_NEXT; 887 assert(sizeof(recno_t) == key.size); 888 memcpy(&recs->last, key.data, key.size); 889 890 /* Deleted records are zero-sized. Skip them. */ 891 892 if (0 == val.size) 893 goto cont; 894 895 /* 896 * Make sure we're sane. 897 * Read past our mdoc/man/cat type to the next string, 898 * then make sure it's bounded by a NUL. 899 * Failing any of these, we go into our error handler. 900 */ 901 902 fn = (char *)val.data + 1; 903 if (NULL == memchr(fn, '\0', val.size - 1)) 904 break; 905 906 /* 907 * Search for the file in those we care about. 908 * XXX: build this into a tree. Too slow. 909 */ 910 911 for (of = ofile->first; of; of = of->next) 912 if (0 == strcmp(fn, of->fname)) 913 break; 914 915 if (NULL == of) 916 continue; 917 918 /* 919 * Search through the keyword database, throwing out all 920 * references to our file. 921 */ 922 923 sseq = R_FIRST; 924 while (0 == (ch = (*mdb->db->seq)(mdb->db, 925 &key, &val, sseq))) { 926 sseq = R_NEXT; 927 if (sizeof(vbuf) != val.size) 928 break; 929 930 memcpy(vbuf, val.data, val.size); 931 if (recs->last != betoh64(vbuf[1])) 932 continue; 933 934 if ((ch = (*mdb->db->del)(mdb->db, 935 &key, R_CURSOR)) < 0) 936 break; 937 } 938 939 if (ch < 0) { 940 perror(mdb->dbn); 941 exit((int)MANDOCLEVEL_SYSERR); 942 } else if (1 != ch) { 943 fprintf(stderr, "%s: corrupt database\n", 944 mdb->dbn); 945 exit((int)MANDOCLEVEL_SYSERR); 946 } 947 948 if (verb) 949 printf("%s: deleting from index\n", fn); 950 951 val.size = 0; 952 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); 953 954 if (ch < 0) 955 break; 956 cont: 957 if (recs->cur >= recs->size) { 958 recs->size += MANDOC_SLOP; 959 recs->stack = mandoc_realloc(recs->stack, 960 recs->size * sizeof(recno_t)); 961 } 962 963 recs->stack[(int)recs->cur] = recs->last; 964 recs->cur++; 965 } 966 967 if (ch < 0) { 968 perror(mdb->idxn); 969 exit((int)MANDOCLEVEL_SYSERR); 970 } else if (1 != ch) { 971 fprintf(stderr, "%s: corrupt index\n", mdb->idxn); 972 exit((int)MANDOCLEVEL_SYSERR); 973 } 974 975 recs->last++; 976 } 977 978 /* 979 * Grow the buffer (if necessary) and copy in a binary string. 980 */ 981 static void 982 buf_appendb(struct buf *buf, const void *cp, size_t sz) 983 { 984 985 /* Overshoot by MANDOC_BUFSZ. */ 986 987 while (buf->len + sz >= buf->size) { 988 buf->size = buf->len + sz + MANDOC_BUFSZ; 989 buf->cp = mandoc_realloc(buf->cp, buf->size); 990 } 991 992 memcpy(buf->cp + (int)buf->len, cp, sz); 993 buf->len += sz; 994 } 995 996 /* 997 * Append a nil-terminated string to the buffer. 998 * This can be invoked multiple times. 999 * The buffer string will be nil-terminated. 1000 * If invoked multiple times, a space is put between strings. 1001 */ 1002 static void 1003 buf_append(struct buf *buf, const char *cp) 1004 { 1005 size_t sz; 1006 1007 if (0 == (sz = strlen(cp))) 1008 return; 1009 1010 if (buf->len) 1011 buf->cp[(int)buf->len - 1] = ' '; 1012 1013 buf_appendb(buf, cp, sz + 1); 1014 } 1015 1016 /* 1017 * Recursively add all text from a given node. 1018 * This is optimised for general mdoc nodes in this context, which do 1019 * not consist of subexpressions and having a recursive call for n->next 1020 * would be wasteful. 1021 * The "f" variable should be 0 unless called from pmdoc_Nd for the 1022 * description buffer, which does not start at the beginning of the 1023 * buffer. 1024 */ 1025 static void 1026 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 1027 { 1028 1029 for ( ; n; n = n->next) { 1030 if (n->child) 1031 buf_appendmdoc(buf, n->child, f); 1032 1033 if (MDOC_TEXT == n->type && f) { 1034 f = 0; 1035 buf_appendb(buf, n->string, 1036 strlen(n->string) + 1); 1037 } else if (MDOC_TEXT == n->type) 1038 buf_append(buf, n->string); 1039 1040 } 1041 } 1042 1043 static void 1044 hash_reset(DB **db) 1045 { 1046 DB *hash; 1047 1048 if (NULL != (hash = *db)) 1049 (*hash->close)(hash); 1050 1051 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 1052 if (NULL == *db) { 1053 perror("hash"); 1054 exit((int)MANDOCLEVEL_SYSERR); 1055 } 1056 } 1057 1058 /* ARGSUSED */ 1059 static int 1060 pmdoc_head(MDOC_ARGS) 1061 { 1062 1063 return(MDOC_HEAD == n->type); 1064 } 1065 1066 /* ARGSUSED */ 1067 static int 1068 pmdoc_body(MDOC_ARGS) 1069 { 1070 1071 return(MDOC_BODY == n->type); 1072 } 1073 1074 /* ARGSUSED */ 1075 static int 1076 pmdoc_Fd(MDOC_ARGS) 1077 { 1078 const char *start, *end; 1079 size_t sz; 1080 1081 if (SEC_SYNOPSIS != n->sec) 1082 return(0); 1083 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 1084 return(0); 1085 1086 /* 1087 * Only consider those `Fd' macro fields that begin with an 1088 * "inclusion" token (versus, e.g., #define). 1089 */ 1090 if (strcmp("#include", n->string)) 1091 return(0); 1092 1093 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 1094 return(0); 1095 1096 /* 1097 * Strip away the enclosing angle brackets and make sure we're 1098 * not zero-length. 1099 */ 1100 1101 start = n->string; 1102 if ('<' == *start || '"' == *start) 1103 start++; 1104 1105 if (0 == (sz = strlen(start))) 1106 return(0); 1107 1108 end = &start[(int)sz - 1]; 1109 if ('>' == *end || '"' == *end) 1110 end--; 1111 1112 assert(end >= start); 1113 1114 buf_appendb(buf, start, (size_t)(end - start + 1)); 1115 buf_appendb(buf, "", 1); 1116 return(1); 1117 } 1118 1119 /* ARGSUSED */ 1120 static int 1121 pmdoc_In(MDOC_ARGS) 1122 { 1123 1124 if (NULL == n->child || MDOC_TEXT != n->child->type) 1125 return(0); 1126 1127 buf_append(buf, n->child->string); 1128 return(1); 1129 } 1130 1131 /* ARGSUSED */ 1132 static int 1133 pmdoc_Fn(MDOC_ARGS) 1134 { 1135 struct mdoc_node *nn; 1136 const char *cp; 1137 1138 nn = n->child; 1139 1140 if (NULL == nn || MDOC_TEXT != nn->type) 1141 return(0); 1142 1143 /* .Fn "struct type *name" "char *arg" */ 1144 1145 cp = strrchr(nn->string, ' '); 1146 if (NULL == cp) 1147 cp = nn->string; 1148 1149 /* Strip away pointer symbol. */ 1150 1151 while ('*' == *cp) 1152 cp++; 1153 1154 /* Store the function name. */ 1155 1156 buf_append(buf, cp); 1157 hash_put(hash, buf, TYPE_Fn); 1158 1159 /* Store the function type. */ 1160 1161 if (nn->string < cp) { 1162 buf->len = 0; 1163 buf_appendb(buf, nn->string, cp - nn->string); 1164 buf_appendb(buf, "", 1); 1165 hash_put(hash, buf, TYPE_Ft); 1166 } 1167 1168 /* Store the arguments. */ 1169 1170 for (nn = nn->next; nn; nn = nn->next) { 1171 if (MDOC_TEXT != nn->type) 1172 continue; 1173 buf->len = 0; 1174 buf_append(buf, nn->string); 1175 hash_put(hash, buf, TYPE_Fa); 1176 } 1177 1178 return(0); 1179 } 1180 1181 /* ARGSUSED */ 1182 static int 1183 pmdoc_St(MDOC_ARGS) 1184 { 1185 1186 if (NULL == n->child || MDOC_TEXT != n->child->type) 1187 return(0); 1188 1189 buf_append(buf, n->child->string); 1190 return(1); 1191 } 1192 1193 /* ARGSUSED */ 1194 static int 1195 pmdoc_Xr(MDOC_ARGS) 1196 { 1197 1198 if (NULL == (n = n->child)) 1199 return(0); 1200 1201 buf_appendb(buf, n->string, strlen(n->string)); 1202 1203 if (NULL != (n = n->next)) { 1204 buf_appendb(buf, ".", 1); 1205 buf_appendb(buf, n->string, strlen(n->string) + 1); 1206 } else 1207 buf_appendb(buf, ".", 2); 1208 1209 return(1); 1210 } 1211 1212 /* ARGSUSED */ 1213 static int 1214 pmdoc_Nd(MDOC_ARGS) 1215 { 1216 1217 if (MDOC_BODY != n->type) 1218 return(0); 1219 1220 buf_appendmdoc(dbuf, n->child, 1); 1221 return(1); 1222 } 1223 1224 /* ARGSUSED */ 1225 static int 1226 pmdoc_Nm(MDOC_ARGS) 1227 { 1228 1229 if (SEC_NAME == n->sec) 1230 return(1); 1231 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 1232 return(0); 1233 1234 if (NULL == n->child) 1235 buf_append(buf, m->name); 1236 1237 return(1); 1238 } 1239 1240 /* ARGSUSED */ 1241 static int 1242 pmdoc_Sh(MDOC_ARGS) 1243 { 1244 1245 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); 1246 } 1247 1248 static void 1249 hash_put(DB *db, const struct buf *buf, uint64_t mask) 1250 { 1251 uint64_t oldmask; 1252 DBT key, val; 1253 int rc; 1254 1255 if (buf->len < 2) 1256 return; 1257 1258 key.data = buf->cp; 1259 key.size = buf->len; 1260 1261 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1262 perror("hash"); 1263 exit((int)MANDOCLEVEL_SYSERR); 1264 } else if (0 == rc) { 1265 assert(sizeof(uint64_t) == val.size); 1266 memcpy(&oldmask, val.data, val.size); 1267 mask |= oldmask; 1268 } 1269 1270 val.data = &mask; 1271 val.size = sizeof(uint64_t); 1272 1273 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1274 perror("hash"); 1275 exit((int)MANDOCLEVEL_SYSERR); 1276 } 1277 } 1278 1279 static void 1280 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1281 { 1282 1283 assert(key->size); 1284 assert(val->size); 1285 1286 if (0 == (*db->put)(db, key, val, 0)) 1287 return; 1288 1289 perror(dbn); 1290 exit((int)MANDOCLEVEL_SYSERR); 1291 /* NOTREACHED */ 1292 } 1293 1294 /* 1295 * Call out to per-macro handlers after clearing the persistent database 1296 * key. If the macro sets the database key, flush it to the database. 1297 */ 1298 static void 1299 pmdoc_node(MDOC_ARGS) 1300 { 1301 1302 if (NULL == n) 1303 return; 1304 1305 switch (n->type) { 1306 case (MDOC_HEAD): 1307 /* FALLTHROUGH */ 1308 case (MDOC_BODY): 1309 /* FALLTHROUGH */ 1310 case (MDOC_TAIL): 1311 /* FALLTHROUGH */ 1312 case (MDOC_BLOCK): 1313 /* FALLTHROUGH */ 1314 case (MDOC_ELEM): 1315 buf->len = 0; 1316 1317 /* 1318 * Both NULL handlers and handlers returning true 1319 * request using the data. Only skip the element 1320 * when the handler returns false. 1321 */ 1322 1323 if (NULL != mdocs[n->tok].fp && 1324 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) 1325 break; 1326 1327 /* 1328 * For many macros, use the text from all children. 1329 * Set zero flags for macros not needing this. 1330 * In that case, the handler must fill the buffer. 1331 */ 1332 1333 if (MDOCF_CHILD & mdocs[n->tok].flags) 1334 buf_appendmdoc(buf, n->child, 0); 1335 1336 /* 1337 * Cover the most common case: 1338 * Automatically stage one string per element. 1339 * Set a zero mask for macros not needing this. 1340 * Additional staging can be done in the handler. 1341 */ 1342 1343 if (mdocs[n->tok].mask) 1344 hash_put(hash, buf, mdocs[n->tok].mask); 1345 break; 1346 default: 1347 break; 1348 } 1349 1350 pmdoc_node(hash, buf, dbuf, n->child, m); 1351 pmdoc_node(hash, buf, dbuf, n->next, m); 1352 } 1353 1354 static int 1355 pman_node(MAN_ARGS) 1356 { 1357 const struct man_node *head, *body; 1358 char *start, *sv, *title; 1359 size_t sz, titlesz; 1360 1361 if (NULL == n) 1362 return(0); 1363 1364 /* 1365 * We're only searching for one thing: the first text child in 1366 * the BODY of a NAME section. Since we don't keep track of 1367 * sections in -man, run some hoops to find out whether we're in 1368 * the correct section or not. 1369 */ 1370 1371 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1372 body = n; 1373 assert(body->parent); 1374 if (NULL != (head = body->parent->head) && 1375 1 == head->nchild && 1376 NULL != (head = (head->child)) && 1377 MAN_TEXT == head->type && 1378 0 == strcmp(head->string, "NAME") && 1379 NULL != (body = body->child) && 1380 MAN_TEXT == body->type) { 1381 1382 title = NULL; 1383 titlesz = 0; 1384 /* 1385 * Suck the entire NAME section into memory. 1386 * Yes, we might run away. 1387 * But too many manuals have big, spread-out 1388 * NAME sections over many lines. 1389 */ 1390 for ( ; NULL != body; body = body->next) { 1391 if (MAN_TEXT != body->type) 1392 break; 1393 if (0 == (sz = strlen(body->string))) 1394 continue; 1395 title = mandoc_realloc 1396 (title, titlesz + sz + 1); 1397 memcpy(title + titlesz, body->string, sz); 1398 titlesz += sz + 1; 1399 title[(int)titlesz - 1] = ' '; 1400 } 1401 if (NULL == title) 1402 return(0); 1403 1404 title = mandoc_realloc(title, titlesz + 1); 1405 title[(int)titlesz] = '\0'; 1406 1407 /* Skip leading space. */ 1408 1409 sv = title; 1410 while (isspace((unsigned char)*sv)) 1411 sv++; 1412 1413 if (0 == (sz = strlen(sv))) { 1414 free(title); 1415 return(0); 1416 } 1417 1418 /* Erase trailing space. */ 1419 1420 start = &sv[sz - 1]; 1421 while (start > sv && isspace((unsigned char)*start)) 1422 *start-- = '\0'; 1423 1424 if (start == sv) { 1425 free(title); 1426 return(0); 1427 } 1428 1429 start = sv; 1430 1431 /* 1432 * Go through a special heuristic dance here. 1433 * This is why -man manuals are great! 1434 * (I'm being sarcastic: my eyes are bleeding.) 1435 * Conventionally, one or more manual names are 1436 * comma-specified prior to a whitespace, then a 1437 * dash, then a description. Try to puzzle out 1438 * the name parts here. 1439 */ 1440 1441 for ( ;; ) { 1442 sz = strcspn(start, " ,"); 1443 if ('\0' == start[(int)sz]) 1444 break; 1445 1446 buf->len = 0; 1447 buf_appendb(buf, start, sz); 1448 buf_appendb(buf, "", 1); 1449 1450 hash_put(hash, buf, TYPE_Nm); 1451 1452 if (' ' == start[(int)sz]) { 1453 start += (int)sz + 1; 1454 break; 1455 } 1456 1457 assert(',' == start[(int)sz]); 1458 start += (int)sz + 1; 1459 while (' ' == *start) 1460 start++; 1461 } 1462 1463 buf->len = 0; 1464 1465 if (sv == start) { 1466 buf_append(buf, start); 1467 free(title); 1468 return(1); 1469 } 1470 1471 while (isspace((unsigned char)*start)) 1472 start++; 1473 1474 if (0 == strncmp(start, "-", 1)) 1475 start += 1; 1476 else if (0 == strncmp(start, "\\-\\-", 4)) 1477 start += 4; 1478 else if (0 == strncmp(start, "\\-", 2)) 1479 start += 2; 1480 else if (0 == strncmp(start, "\\(en", 4)) 1481 start += 4; 1482 else if (0 == strncmp(start, "\\(em", 4)) 1483 start += 4; 1484 1485 while (' ' == *start) 1486 start++; 1487 1488 sz = strlen(start) + 1; 1489 buf_appendb(dbuf, start, sz); 1490 buf_appendb(buf, start, sz); 1491 1492 hash_put(hash, buf, TYPE_Nd); 1493 free(title); 1494 } 1495 } 1496 1497 for (n = n->child; n; n = n->next) 1498 if (pman_node(hash, buf, dbuf, n)) 1499 return(1); 1500 1501 return(0); 1502 } 1503 1504 /* 1505 * Parse a formatted manual page. 1506 * By necessity, this involves rather crude guesswork. 1507 */ 1508 static void 1509 pformatted(DB *hash, struct buf *buf, 1510 struct buf *dbuf, const struct of *of) 1511 { 1512 FILE *stream; 1513 char *line, *p, *title; 1514 size_t len, plen, titlesz; 1515 1516 if (NULL == (stream = fopen(of->fname, "r"))) { 1517 if (warnings) 1518 perror(of->fname); 1519 return; 1520 } 1521 1522 /* 1523 * Always use the title derived from the filename up front, 1524 * do not even try to find it in the file. This also makes 1525 * sure we don't end up with an orphan index record, even if 1526 * the file content turns out to be completely unintelligible. 1527 */ 1528 1529 buf->len = 0; 1530 buf_append(buf, of->title); 1531 hash_put(hash, buf, TYPE_Nm); 1532 1533 /* Skip to first blank line. */ 1534 1535 while (NULL != (line = fgetln(stream, &len))) 1536 if ('\n' == *line) 1537 break; 1538 1539 /* 1540 * Assume the first line that is not indented 1541 * is the first section header. Skip to it. 1542 */ 1543 1544 while (NULL != (line = fgetln(stream, &len))) 1545 if ('\n' != *line && ' ' != *line) 1546 break; 1547 1548 /* 1549 * Read up until the next section into a buffer. 1550 * Strip the leading and trailing newline from each read line, 1551 * appending a trailing space. 1552 * Ignore empty (whitespace-only) lines. 1553 */ 1554 1555 titlesz = 0; 1556 title = NULL; 1557 1558 while (NULL != (line = fgetln(stream, &len))) { 1559 if (' ' != *line || '\n' != line[(int)len - 1]) 1560 break; 1561 while (len > 0 && isspace((unsigned char)*line)) { 1562 line++; 1563 len--; 1564 } 1565 if (1 == len) 1566 continue; 1567 title = mandoc_realloc(title, titlesz + len); 1568 memcpy(title + titlesz, line, len); 1569 titlesz += len; 1570 title[(int)titlesz - 1] = ' '; 1571 } 1572 1573 1574 /* 1575 * If no page content can be found, or the input line 1576 * is already the next section header, or there is no 1577 * trailing newline, reuse the page title as the page 1578 * description. 1579 */ 1580 1581 if (NULL == title || '\0' == *title) { 1582 if (warnings) 1583 fprintf(stderr, "%s: cannot find NAME section\n", 1584 of->fname); 1585 buf_appendb(dbuf, buf->cp, buf->size); 1586 hash_put(hash, buf, TYPE_Nd); 1587 fclose(stream); 1588 free(title); 1589 return; 1590 } 1591 1592 title = mandoc_realloc(title, titlesz + 1); 1593 title[(int)titlesz] = '\0'; 1594 1595 /* 1596 * Skip to the first dash. 1597 * Use the remaining line as the description (no more than 70 1598 * bytes). 1599 */ 1600 1601 if (NULL != (p = strstr(title, "- "))) { 1602 for (p += 2; ' ' == *p || '\b' == *p; p++) 1603 /* Skip to next word. */ ; 1604 } else { 1605 if (warnings) 1606 fprintf(stderr, "%s: no dash in title line\n", 1607 of->fname); 1608 p = title; 1609 } 1610 1611 plen = strlen(p); 1612 1613 /* Strip backspace-encoding from line. */ 1614 1615 while (NULL != (line = memchr(p, '\b', plen))) { 1616 len = line - p; 1617 if (0 == len) { 1618 memmove(line, line + 1, plen--); 1619 continue; 1620 } 1621 memmove(line - 1, line + 1, plen - len); 1622 plen -= 2; 1623 } 1624 1625 buf_appendb(dbuf, p, plen + 1); 1626 buf->len = 0; 1627 buf_appendb(buf, p, plen + 1); 1628 hash_put(hash, buf, TYPE_Nd); 1629 fclose(stream); 1630 free(title); 1631 } 1632 1633 static void 1634 ofile_argbuild(int argc, char *argv[], struct of **of, 1635 const char *basedir) 1636 { 1637 char buf[MAXPATHLEN]; 1638 char pbuf[PATH_MAX]; 1639 const char *sec, *arch, *title; 1640 char *relpath, *p; 1641 int i, src_form; 1642 struct of *nof; 1643 1644 for (i = 0; i < argc; i++) { 1645 if (NULL == (relpath = realpath(argv[i], pbuf))) { 1646 perror(argv[i]); 1647 continue; 1648 } 1649 if (NULL != basedir) { 1650 if (strstr(pbuf, basedir) != pbuf) { 1651 fprintf(stderr, "%s: file outside " 1652 "base directory %s\n", 1653 pbuf, basedir); 1654 continue; 1655 } 1656 relpath = pbuf + strlen(basedir); 1657 } 1658 1659 /* 1660 * Try to infer the manual section, architecture and 1661 * page title from the path, assuming it looks like 1662 * man*[/<arch>]/<title>.<section> or 1663 * cat<section>[/<arch>]/<title>.0 1664 */ 1665 1666 if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) { 1667 fprintf(stderr, "%s: path too long\n", relpath); 1668 continue; 1669 } 1670 sec = arch = title = ""; 1671 src_form = 0; 1672 p = strrchr(buf, '\0'); 1673 while (p-- > buf) { 1674 if ('\0' == *sec && '.' == *p) { 1675 sec = p + 1; 1676 *p = '\0'; 1677 if ('0' == *sec) 1678 src_form |= MANDOC_FORM; 1679 else if ('1' <= *sec && '9' >= *sec) 1680 src_form |= MANDOC_SRC; 1681 continue; 1682 } 1683 if ('/' != *p) 1684 continue; 1685 if ('\0' == *title) { 1686 title = p + 1; 1687 *p = '\0'; 1688 continue; 1689 } 1690 if (0 == strncmp("man", p + 1, 3)) 1691 src_form |= MANDOC_SRC; 1692 else if (0 == strncmp("cat", p + 1, 3)) 1693 src_form |= MANDOC_FORM; 1694 else 1695 arch = p + 1; 1696 break; 1697 } 1698 if ('\0' == *title) { 1699 if (warnings) 1700 fprintf(stderr, 1701 "%s: cannot deduce title " 1702 "from filename\n", 1703 relpath); 1704 title = buf; 1705 } 1706 1707 /* 1708 * Build the file structure. 1709 */ 1710 1711 nof = mandoc_calloc(1, sizeof(struct of)); 1712 nof->fname = mandoc_strdup(relpath); 1713 nof->sec = mandoc_strdup(sec); 1714 nof->arch = mandoc_strdup(arch); 1715 nof->title = mandoc_strdup(title); 1716 nof->src_form = src_form; 1717 1718 /* 1719 * Add the structure to the list. 1720 */ 1721 1722 if (verb > 1) 1723 printf("%s: scheduling\n", relpath); 1724 if (NULL == *of) { 1725 *of = nof; 1726 (*of)->first = nof; 1727 } else { 1728 nof->first = (*of)->first; 1729 (*of)->next = nof; 1730 *of = nof; 1731 } 1732 } 1733 } 1734 1735 /* 1736 * Recursively build up a list of files to parse. 1737 * We use this instead of ftw() and so on because I don't want global 1738 * variables hanging around. 1739 * This ignores the mandoc.db and mandoc.index files, but assumes that 1740 * everything else is a manual. 1741 * Pass in a pointer to a NULL structure for the first invocation. 1742 */ 1743 static void 1744 ofile_dirbuild(const char *dir, const char* psec, const char *parch, 1745 int p_src_form, struct of **of) 1746 { 1747 char buf[MAXPATHLEN]; 1748 size_t sz; 1749 DIR *d; 1750 const char *fn, *sec, *arch; 1751 char *p, *q, *suffix; 1752 struct of *nof; 1753 struct dirent *dp; 1754 int src_form; 1755 1756 if (NULL == (d = opendir(dir))) { 1757 if (warnings) 1758 perror(dir); 1759 return; 1760 } 1761 1762 while (NULL != (dp = readdir(d))) { 1763 fn = dp->d_name; 1764 1765 if ('.' == *fn) 1766 continue; 1767 1768 src_form = p_src_form; 1769 1770 if (DT_DIR == dp->d_type) { 1771 sec = psec; 1772 arch = parch; 1773 1774 /* 1775 * By default, only use directories called: 1776 * man<section>/[<arch>/] or 1777 * cat<section>/[<arch>/] 1778 */ 1779 1780 if ('\0' == *sec) { 1781 if(0 == strncmp("man", fn, 3)) { 1782 src_form |= MANDOC_SRC; 1783 sec = fn + 3; 1784 } else if (0 == strncmp("cat", fn, 3)) { 1785 src_form |= MANDOC_FORM; 1786 sec = fn + 3; 1787 } else { 1788 if (warnings) fprintf(stderr, 1789 "%s/%s: bad section\n", 1790 dir, fn); 1791 if (use_all) 1792 sec = fn; 1793 else 1794 continue; 1795 } 1796 } else if ('\0' == *arch) { 1797 if (NULL != strchr(fn, '.')) { 1798 if (warnings) fprintf(stderr, 1799 "%s/%s: bad architecture\n", 1800 dir, fn); 1801 if (0 == use_all) 1802 continue; 1803 } 1804 arch = fn; 1805 } else { 1806 if (warnings) fprintf(stderr, "%s/%s: " 1807 "excessive subdirectory\n", dir, fn); 1808 if (0 == use_all) 1809 continue; 1810 } 1811 1812 buf[0] = '\0'; 1813 strlcat(buf, dir, MAXPATHLEN); 1814 strlcat(buf, "/", MAXPATHLEN); 1815 sz = strlcat(buf, fn, MAXPATHLEN); 1816 1817 if (MAXPATHLEN <= sz) { 1818 if (warnings) fprintf(stderr, "%s/%s: " 1819 "path too long\n", dir, fn); 1820 continue; 1821 } 1822 1823 if (verb > 1) 1824 printf("%s: scanning\n", buf); 1825 1826 ofile_dirbuild(buf, sec, arch, src_form, of); 1827 continue; 1828 } 1829 1830 if (DT_REG != dp->d_type) { 1831 if (warnings) 1832 fprintf(stderr, 1833 "%s/%s: not a regular file\n", 1834 dir, fn); 1835 continue; 1836 } 1837 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) 1838 continue; 1839 if ('\0' == *psec) { 1840 if (warnings) 1841 fprintf(stderr, 1842 "%s/%s: file outside section\n", 1843 dir, fn); 1844 if (0 == use_all) 1845 continue; 1846 } 1847 1848 /* 1849 * By default, skip files where the file name suffix 1850 * does not agree with the section directory 1851 * they are located in. 1852 */ 1853 1854 suffix = strrchr(fn, '.'); 1855 if (NULL == suffix) { 1856 if (warnings) 1857 fprintf(stderr, 1858 "%s/%s: no filename suffix\n", 1859 dir, fn); 1860 if (0 == use_all) 1861 continue; 1862 } else if ((MANDOC_SRC & src_form && 1863 strcmp(suffix + 1, psec)) || 1864 (MANDOC_FORM & src_form && 1865 strcmp(suffix + 1, "0"))) { 1866 if (warnings) 1867 fprintf(stderr, 1868 "%s/%s: wrong filename suffix\n", 1869 dir, fn); 1870 if (0 == use_all) 1871 continue; 1872 if ('0' == suffix[1]) 1873 src_form |= MANDOC_FORM; 1874 else if ('1' <= suffix[1] && '9' >= suffix[1]) 1875 src_form |= MANDOC_SRC; 1876 } 1877 1878 /* 1879 * Skip formatted manuals if a source version is 1880 * available. Ignore the age: it is very unlikely 1881 * that people install newer formatted base manuals 1882 * when they used to have source manuals before, 1883 * and in ports, old manuals get removed on update. 1884 */ 1885 if (0 == use_all && MANDOC_FORM & src_form && 1886 '\0' != *psec) { 1887 buf[0] = '\0'; 1888 strlcat(buf, dir, MAXPATHLEN); 1889 p = strrchr(buf, '/'); 1890 if ('\0' != *parch && NULL != p) 1891 for (p--; p > buf; p--) 1892 if ('/' == *p) 1893 break; 1894 if (NULL == p) 1895 p = buf; 1896 else 1897 p++; 1898 if (0 == strncmp("cat", p, 3)) 1899 memcpy(p, "man", 3); 1900 strlcat(buf, "/", MAXPATHLEN); 1901 sz = strlcat(buf, fn, MAXPATHLEN); 1902 if (sz >= MAXPATHLEN) { 1903 if (warnings) fprintf(stderr, 1904 "%s/%s: path too long\n", 1905 dir, fn); 1906 continue; 1907 } 1908 q = strrchr(buf, '.'); 1909 if (NULL != q && p < q++) { 1910 *q = '\0'; 1911 sz = strlcat(buf, psec, MAXPATHLEN); 1912 if (sz >= MAXPATHLEN) { 1913 if (warnings) fprintf(stderr, 1914 "%s/%s: path too long\n", 1915 dir, fn); 1916 continue; 1917 } 1918 if (0 == access(buf, R_OK)) 1919 continue; 1920 } 1921 } 1922 1923 buf[0] = '\0'; 1924 assert('.' == dir[0]); 1925 if ('/' == dir[1]) { 1926 strlcat(buf, dir + 2, MAXPATHLEN); 1927 strlcat(buf, "/", MAXPATHLEN); 1928 } 1929 sz = strlcat(buf, fn, MAXPATHLEN); 1930 if (sz >= MAXPATHLEN) { 1931 if (warnings) fprintf(stderr, 1932 "%s/%s: path too long\n", dir, fn); 1933 continue; 1934 } 1935 1936 nof = mandoc_calloc(1, sizeof(struct of)); 1937 nof->fname = mandoc_strdup(buf); 1938 nof->sec = mandoc_strdup(psec); 1939 nof->arch = mandoc_strdup(parch); 1940 nof->src_form = src_form; 1941 1942 /* 1943 * Remember the file name without the extension, 1944 * to be used as the page title in the database. 1945 */ 1946 1947 if (NULL != suffix) 1948 *suffix = '\0'; 1949 nof->title = mandoc_strdup(fn); 1950 1951 /* 1952 * Add the structure to the list. 1953 */ 1954 1955 if (verb > 1) 1956 printf("%s: scheduling\n", buf); 1957 1958 if (NULL == *of) { 1959 *of = nof; 1960 (*of)->first = nof; 1961 } else { 1962 nof->first = (*of)->first; 1963 (*of)->next = nof; 1964 *of = nof; 1965 } 1966 } 1967 1968 closedir(d); 1969 } 1970 1971 static void 1972 ofile_free(struct of *of) 1973 { 1974 struct of *nof; 1975 1976 if (NULL != of) 1977 of = of->first; 1978 1979 while (NULL != of) { 1980 nof = of->next; 1981 free(of->fname); 1982 free(of->sec); 1983 free(of->arch); 1984 free(of->title); 1985 free(of); 1986 of = nof; 1987 } 1988 } 1989