1 /* $Id: main.c,v 1.23 2010/04/13 06:52:12 jmc Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/stat.h> 18 19 #include <assert.h> 20 #include <fcntl.h> 21 #include <stdio.h> 22 #include <stdint.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include "mdoc.h" 28 #include "man.h" 29 #include "main.h" 30 31 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) 32 33 typedef void (*out_mdoc)(void *, const struct mdoc *); 34 typedef void (*out_man)(void *, const struct man *); 35 typedef void (*out_free)(void *); 36 37 struct buf { 38 char *buf; 39 size_t sz; 40 }; 41 42 enum intt { 43 INTT_AUTO, 44 INTT_MDOC, 45 INTT_MAN 46 }; 47 48 enum outt { 49 OUTT_ASCII = 0, 50 OUTT_TREE, 51 OUTT_HTML, 52 OUTT_XHTML, 53 OUTT_LINT 54 }; 55 56 struct curparse { 57 const char *file; /* Current parse. */ 58 int fd; /* Current parse. */ 59 int wflags; 60 #define WARN_WALL (1 << 0) /* All-warnings mask. */ 61 #define WARN_WERR (1 << 2) /* Warnings->errors. */ 62 int fflags; 63 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */ 64 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */ 65 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */ 66 #define FL_NIGN_CHARS (1 << 3) /* Don't ignore bad chars. */ 67 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */ 68 enum intt inttype; /* Input parsers... */ 69 struct man *man; 70 struct man *lastman; 71 struct mdoc *mdoc; 72 struct mdoc *lastmdoc; 73 enum outt outtype; /* Output devices... */ 74 out_mdoc outmdoc; 75 out_man outman; 76 out_free outfree; 77 void *outdata; 78 char outopts[BUFSIZ]; 79 }; 80 81 #define FL_STRICT FL_NIGN_ESCAPE | \ 82 FL_NIGN_MACRO | \ 83 FL_NIGN_CHARS 84 85 static int foptions(int *, char *); 86 static int toptions(struct curparse *, char *); 87 static int moptions(enum intt *, char *); 88 static int woptions(int *, char *); 89 static int merr(void *, int, int, const char *); 90 static int mwarn(void *, int, int, const char *); 91 static int ffile(struct buf *, struct buf *, 92 const char *, struct curparse *); 93 static int fdesc(struct buf *, struct buf *, 94 struct curparse *); 95 static int pset(const char *, int, struct curparse *, 96 struct man **, struct mdoc **); 97 static struct man *man_init(struct curparse *); 98 static struct mdoc *mdoc_init(struct curparse *); 99 static void version(void) __attribute__((noreturn)); 100 static void usage(void) __attribute__((noreturn)); 101 102 static const char *progname; 103 104 105 int 106 main(int argc, char *argv[]) 107 { 108 int c, rc; 109 struct buf ln, blk; 110 struct curparse curp; 111 112 progname = strrchr(argv[0], '/'); 113 if (progname == NULL) 114 progname = argv[0]; 115 else 116 ++progname; 117 118 memset(&curp, 0, sizeof(struct curparse)); 119 120 curp.inttype = INTT_AUTO; 121 curp.outtype = OUTT_ASCII; 122 123 /* LINTED */ 124 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:"))) 125 switch (c) { 126 case ('f'): 127 if ( ! foptions(&curp.fflags, optarg)) 128 return(EXIT_FAILURE); 129 break; 130 case ('m'): 131 if ( ! moptions(&curp.inttype, optarg)) 132 return(EXIT_FAILURE); 133 break; 134 case ('O'): 135 (void)strlcat(curp.outopts, optarg, BUFSIZ); 136 (void)strlcat(curp.outopts, ",", BUFSIZ); 137 break; 138 case ('T'): 139 if ( ! toptions(&curp, optarg)) 140 return(EXIT_FAILURE); 141 break; 142 case ('W'): 143 if ( ! woptions(&curp.wflags, optarg)) 144 return(EXIT_FAILURE); 145 break; 146 case ('V'): 147 version(); 148 /* NOTREACHED */ 149 default: 150 usage(); 151 /* NOTREACHED */ 152 } 153 154 argc -= optind; 155 argv += optind; 156 157 memset(&ln, 0, sizeof(struct buf)); 158 memset(&blk, 0, sizeof(struct buf)); 159 160 rc = 1; 161 162 if (NULL == *argv) { 163 curp.file = "<stdin>"; 164 curp.fd = STDIN_FILENO; 165 166 c = fdesc(&blk, &ln, &curp); 167 if ( ! (FL_IGN_ERRORS & curp.fflags)) 168 rc = 1 == c ? 1 : 0; 169 else 170 rc = -1 == c ? 0 : 1; 171 } 172 173 while (rc && *argv) { 174 c = ffile(&blk, &ln, *argv, &curp); 175 if ( ! (FL_IGN_ERRORS & curp.fflags)) 176 rc = 1 == c ? 1 : 0; 177 else 178 rc = -1 == c ? 0 : 1; 179 180 argv++; 181 if (*argv && rc) { 182 if (curp.lastman) 183 man_reset(curp.lastman); 184 if (curp.lastmdoc) 185 mdoc_reset(curp.lastmdoc); 186 curp.lastman = NULL; 187 curp.lastmdoc = NULL; 188 } 189 } 190 191 if (blk.buf) 192 free(blk.buf); 193 if (ln.buf) 194 free(ln.buf); 195 if (curp.outfree) 196 (*curp.outfree)(curp.outdata); 197 if (curp.mdoc) 198 mdoc_free(curp.mdoc); 199 if (curp.man) 200 man_free(curp.man); 201 202 return(rc ? EXIT_SUCCESS : EXIT_FAILURE); 203 } 204 205 206 static void 207 version(void) 208 { 209 210 (void)printf("%s %s\n", progname, VERSION); 211 exit(EXIT_SUCCESS); 212 } 213 214 215 static void 216 usage(void) 217 { 218 219 (void)fprintf(stderr, "usage: %s [-V] [-foption] " 220 "[-mformat] [-Ooption] [-Toutput] " 221 "[-Werr] [file...]\n", progname); 222 exit(EXIT_FAILURE); 223 } 224 225 226 static struct man * 227 man_init(struct curparse *curp) 228 { 229 int pflags; 230 struct man_cb mancb; 231 232 mancb.man_err = merr; 233 mancb.man_warn = mwarn; 234 235 /* Defaults from mandoc.1. */ 236 237 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS; 238 239 if (curp->fflags & FL_NIGN_MACRO) 240 pflags &= ~MAN_IGN_MACRO; 241 if (curp->fflags & FL_NIGN_CHARS) 242 pflags &= ~MAN_IGN_CHARS; 243 if (curp->fflags & FL_NIGN_ESCAPE) 244 pflags &= ~MAN_IGN_ESCAPE; 245 246 return(man_alloc(curp, pflags, &mancb)); 247 } 248 249 250 static struct mdoc * 251 mdoc_init(struct curparse *curp) 252 { 253 int pflags; 254 struct mdoc_cb mdoccb; 255 256 mdoccb.mdoc_err = merr; 257 mdoccb.mdoc_warn = mwarn; 258 259 /* Defaults from mandoc.1. */ 260 261 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS; 262 263 if (curp->fflags & FL_IGN_SCOPE) 264 pflags |= MDOC_IGN_SCOPE; 265 if (curp->fflags & FL_NIGN_ESCAPE) 266 pflags &= ~MDOC_IGN_ESCAPE; 267 if (curp->fflags & FL_NIGN_MACRO) 268 pflags &= ~MDOC_IGN_MACRO; 269 if (curp->fflags & FL_NIGN_CHARS) 270 pflags &= ~MDOC_IGN_CHARS; 271 272 return(mdoc_alloc(curp, pflags, &mdoccb)); 273 } 274 275 276 static int 277 ffile(struct buf *blk, struct buf *ln, 278 const char *file, struct curparse *curp) 279 { 280 int c; 281 282 curp->file = file; 283 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) { 284 perror(curp->file); 285 return(-1); 286 } 287 288 c = fdesc(blk, ln, curp); 289 290 if (-1 == close(curp->fd)) 291 perror(curp->file); 292 293 return(c); 294 } 295 296 297 static int 298 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp) 299 { 300 size_t sz; 301 ssize_t ssz; 302 struct stat st; 303 int j, i, pos, lnn, comment; 304 struct man *man; 305 struct mdoc *mdoc; 306 307 sz = BUFSIZ; 308 man = NULL; 309 mdoc = NULL; 310 311 /* 312 * Two buffers: ln and buf. buf is the input buffer optimised 313 * here for each file's block size. ln is a line buffer. Both 314 * growable, hence passed in by ptr-ptr. 315 */ 316 317 if (-1 == fstat(curp->fd, &st)) 318 perror(curp->file); 319 else if ((size_t)st.st_blksize > sz) 320 sz = st.st_blksize; 321 322 if (sz > blk->sz) { 323 blk->buf = realloc(blk->buf, sz); 324 if (NULL == blk->buf) { 325 perror(NULL); 326 exit(EXIT_FAILURE); 327 } 328 blk->sz = sz; 329 } 330 331 /* Fill buf with file blocksize. */ 332 333 for (lnn = pos = comment = 0; ; ) { 334 if (-1 == (ssz = read(curp->fd, blk->buf, sz))) { 335 perror(curp->file); 336 return(-1); 337 } else if (0 == ssz) 338 break; 339 340 /* Parse the read block into partial or full lines. */ 341 342 for (i = 0; i < (int)ssz; i++) { 343 if (pos >= (int)ln->sz) { 344 ln->sz += 256; /* Step-size. */ 345 ln->buf = realloc(ln->buf, ln->sz); 346 if (NULL == ln->buf) { 347 perror(NULL); 348 return(EXIT_FAILURE); 349 } 350 } 351 352 if ('\n' != blk->buf[i]) { 353 if (comment) 354 continue; 355 ln->buf[pos++] = blk->buf[i]; 356 357 /* Handle in-line `\"' comments. */ 358 359 if (1 == pos || '\"' != ln->buf[pos - 1]) 360 continue; 361 362 for (j = pos - 2; j >= 0; j--) 363 if ('\\' != ln->buf[j]) 364 break; 365 366 if ( ! ((pos - 2 - j) % 2)) 367 continue; 368 369 comment = 1; 370 pos -= 2; 371 continue; 372 } 373 374 /* Handle escaped `\\n' newlines. */ 375 376 if (pos > 0 && 0 == comment && 377 '\\' == ln->buf[pos - 1]) { 378 for (j = pos - 1; j >= 0; j--) 379 if ('\\' != ln->buf[j]) 380 break; 381 if ( ! ((pos - j) % 2)) { 382 pos--; 383 lnn++; 384 continue; 385 } 386 } 387 388 ln->buf[pos] = 0; 389 lnn++; 390 391 /* If unset, assign parser in pset(). */ 392 393 if ( ! (man || mdoc) && ! pset(ln->buf, 394 pos, curp, &man, &mdoc)) 395 return(-1); 396 397 pos = comment = 0; 398 399 /* Pass down into parsers. */ 400 401 if (man && ! man_parseln(man, lnn, ln->buf)) 402 return(0); 403 if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf)) 404 return(0); 405 } 406 } 407 408 /* NOTE a parser may not have been assigned, yet. */ 409 410 if ( ! (man || mdoc)) { 411 fprintf(stderr, "%s: Not a manual\n", curp->file); 412 return(0); 413 } 414 415 if (mdoc && ! mdoc_endparse(mdoc)) 416 return(0); 417 if (man && ! man_endparse(man)) 418 return(0); 419 420 /* If unset, allocate output dev now (if applicable). */ 421 422 if ( ! (curp->outman && curp->outmdoc)) { 423 switch (curp->outtype) { 424 case (OUTT_XHTML): 425 curp->outdata = xhtml_alloc(curp->outopts); 426 curp->outman = html_man; 427 curp->outmdoc = html_mdoc; 428 curp->outfree = html_free; 429 break; 430 case (OUTT_HTML): 431 curp->outdata = html_alloc(curp->outopts); 432 curp->outman = html_man; 433 curp->outmdoc = html_mdoc; 434 curp->outfree = html_free; 435 break; 436 case (OUTT_TREE): 437 curp->outman = tree_man; 438 curp->outmdoc = tree_mdoc; 439 break; 440 case (OUTT_LINT): 441 break; 442 default: 443 curp->outdata = ascii_alloc(); 444 curp->outman = terminal_man; 445 curp->outmdoc = terminal_mdoc; 446 curp->outfree = terminal_free; 447 break; 448 } 449 } 450 451 /* Execute the out device, if it exists. */ 452 453 if (man && curp->outman) 454 (*curp->outman)(curp->outdata, man); 455 if (mdoc && curp->outmdoc) 456 (*curp->outmdoc)(curp->outdata, mdoc); 457 458 return(1); 459 } 460 461 462 static int 463 pset(const char *buf, int pos, struct curparse *curp, 464 struct man **man, struct mdoc **mdoc) 465 { 466 int i; 467 468 /* 469 * Try to intuit which kind of manual parser should be used. If 470 * passed in by command-line (-man, -mdoc), then use that 471 * explicitly. If passed as -mandoc, then try to guess from the 472 * line: either skip dot-lines, use -mdoc when finding `.Dt', or 473 * default to -man, which is more lenient. 474 */ 475 476 if (buf[0] == '.') { 477 for (i = 1; buf[i]; i++) 478 if (' ' != buf[i] && '\t' != buf[i]) 479 break; 480 if (0 == buf[i]) 481 return(1); 482 } 483 484 switch (curp->inttype) { 485 case (INTT_MDOC): 486 if (NULL == curp->mdoc) 487 curp->mdoc = mdoc_init(curp); 488 if (NULL == (*mdoc = curp->mdoc)) 489 return(0); 490 curp->lastmdoc = *mdoc; 491 return(1); 492 case (INTT_MAN): 493 if (NULL == curp->man) 494 curp->man = man_init(curp); 495 if (NULL == (*man = curp->man)) 496 return(0); 497 curp->lastman = *man; 498 return(1); 499 default: 500 break; 501 } 502 503 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { 504 if (NULL == curp->mdoc) 505 curp->mdoc = mdoc_init(curp); 506 if (NULL == (*mdoc = curp->mdoc)) 507 return(0); 508 curp->lastmdoc = *mdoc; 509 return(1); 510 } 511 512 if (NULL == curp->man) 513 curp->man = man_init(curp); 514 if (NULL == (*man = curp->man)) 515 return(0); 516 curp->lastman = *man; 517 return(1); 518 } 519 520 521 static int 522 moptions(enum intt *tflags, char *arg) 523 { 524 525 if (0 == strcmp(arg, "doc")) 526 *tflags = INTT_MDOC; 527 else if (0 == strcmp(arg, "andoc")) 528 *tflags = INTT_AUTO; 529 else if (0 == strcmp(arg, "an")) 530 *tflags = INTT_MAN; 531 else { 532 fprintf(stderr, "%s: Bad argument\n", arg); 533 return(0); 534 } 535 536 return(1); 537 } 538 539 540 static int 541 toptions(struct curparse *curp, char *arg) 542 { 543 544 if (0 == strcmp(arg, "ascii")) 545 curp->outtype = OUTT_ASCII; 546 else if (0 == strcmp(arg, "lint")) { 547 curp->outtype = OUTT_LINT; 548 curp->wflags |= WARN_WALL; 549 curp->fflags |= FL_STRICT; 550 } 551 else if (0 == strcmp(arg, "tree")) 552 curp->outtype = OUTT_TREE; 553 else if (0 == strcmp(arg, "html")) 554 curp->outtype = OUTT_HTML; 555 else if (0 == strcmp(arg, "xhtml")) 556 curp->outtype = OUTT_XHTML; 557 else { 558 fprintf(stderr, "%s: Bad argument\n", arg); 559 return(0); 560 } 561 562 return(1); 563 } 564 565 566 static int 567 foptions(int *fflags, char *arg) 568 { 569 char *v, *o; 570 const char *toks[8]; 571 572 toks[0] = "ign-scope"; 573 toks[1] = "no-ign-escape"; 574 toks[2] = "no-ign-macro"; 575 toks[3] = "no-ign-chars"; 576 toks[4] = "ign-errors"; 577 toks[5] = "strict"; 578 toks[6] = "ign-escape"; 579 toks[7] = NULL; 580 581 while (*arg) { 582 o = arg; 583 switch (getsubopt(&arg, UNCONST(toks), &v)) { 584 case (0): 585 *fflags |= FL_IGN_SCOPE; 586 break; 587 case (1): 588 *fflags |= FL_NIGN_ESCAPE; 589 break; 590 case (2): 591 *fflags |= FL_NIGN_MACRO; 592 break; 593 case (3): 594 *fflags |= FL_NIGN_CHARS; 595 break; 596 case (4): 597 *fflags |= FL_IGN_ERRORS; 598 break; 599 case (5): 600 *fflags |= FL_STRICT; 601 break; 602 case (6): 603 *fflags &= ~FL_NIGN_ESCAPE; 604 break; 605 default: 606 fprintf(stderr, "%s: Bad argument\n", o); 607 return(0); 608 } 609 } 610 611 return(1); 612 } 613 614 615 static int 616 woptions(int *wflags, char *arg) 617 { 618 char *v, *o; 619 const char *toks[3]; 620 621 toks[0] = "all"; 622 toks[1] = "error"; 623 toks[2] = NULL; 624 625 while (*arg) { 626 o = arg; 627 switch (getsubopt(&arg, UNCONST(toks), &v)) { 628 case (0): 629 *wflags |= WARN_WALL; 630 break; 631 case (1): 632 *wflags |= WARN_WERR; 633 break; 634 default: 635 fprintf(stderr, "%s: Bad argument\n", o); 636 return(0); 637 } 638 } 639 640 return(1); 641 } 642 643 644 /* ARGSUSED */ 645 static int 646 merr(void *arg, int line, int col, const char *msg) 647 { 648 struct curparse *curp; 649 650 curp = (struct curparse *)arg; 651 652 (void)fprintf(stderr, "%s:%d:%d: error: %s\n", 653 curp->file, line, col + 1, msg); 654 655 return(0); 656 } 657 658 659 static int 660 mwarn(void *arg, int line, int col, const char *msg) 661 { 662 struct curparse *curp; 663 664 curp = (struct curparse *)arg; 665 666 if ( ! (curp->wflags & WARN_WALL)) 667 return(1); 668 669 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n", 670 curp->file, line, col + 1, msg); 671 672 if ( ! (curp->wflags & WARN_WERR)) 673 return(1); 674 675 return(0); 676 } 677 678