1 /* $OpenBSD: read.c,v 1.130 2017/01/09 01:36:22 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 #include <sys/mman.h> 21 #include <sys/stat.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <err.h> 26 #include <errno.h> 27 #include <fcntl.h> 28 #include <stdarg.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <zlib.h> 34 35 #include "mandoc_aux.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "mdoc.h" 39 #include "man.h" 40 #include "libmandoc.h" 41 #include "roff_int.h" 42 43 #define REPARSE_LIMIT 1000 44 45 struct mparse { 46 struct roff_man *man; /* man parser */ 47 struct roff *roff; /* roff parser (!NULL) */ 48 char *sodest; /* filename pointed to by .so */ 49 const char *file; /* filename of current input file */ 50 struct buf *primary; /* buffer currently being parsed */ 51 struct buf *secondary; /* preprocessed copy of input */ 52 const char *defos; /* default operating system */ 53 mandocmsg mmsg; /* warning/error message handler */ 54 enum mandoclevel file_status; /* status of current parse */ 55 enum mandoclevel wlevel; /* ignore messages below this */ 56 int options; /* parser options */ 57 int gzip; /* current input file is gzipped */ 58 int filenc; /* encoding of the current file */ 59 int reparse_count; /* finite interp. stack */ 60 int line; /* line number in the file */ 61 }; 62 63 static void choose_parser(struct mparse *); 64 static void resize_buf(struct buf *, size_t); 65 static void mparse_buf_r(struct mparse *, struct buf, size_t, int); 66 static int read_whole_file(struct mparse *, const char *, int, 67 struct buf *, int *); 68 static void mparse_end(struct mparse *); 69 static void mparse_parse_buffer(struct mparse *, struct buf, 70 const char *); 71 72 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 73 MANDOCERR_OK, 74 MANDOCERR_WARNING, 75 MANDOCERR_WARNING, 76 MANDOCERR_ERROR, 77 MANDOCERR_UNSUPP, 78 MANDOCERR_MAX, 79 MANDOCERR_MAX 80 }; 81 82 static const char * const mandocerrs[MANDOCERR_MAX] = { 83 "ok", 84 85 "generic warning", 86 87 /* related to the prologue */ 88 "missing manual title, using UNTITLED", 89 "missing manual title, using \"\"", 90 "lower case character in document title", 91 "missing manual section, using \"\"", 92 "unknown manual section", 93 "missing date, using today's date", 94 "cannot parse date, using it verbatim", 95 "missing Os macro, using \"\"", 96 "duplicate prologue macro", 97 "late prologue macro", 98 "skipping late title macro", 99 "prologue macros out of order", 100 101 /* related to document structure */ 102 ".so is fragile, better use ln(1)", 103 "no document body", 104 "content before first section header", 105 "first section is not \"NAME\"", 106 "NAME section without Nm before Nd", 107 "NAME section without description", 108 "description not at the end of NAME", 109 "bad NAME section content", 110 "missing comma before name", 111 "missing description line, using \"\"", 112 "sections out of conventional order", 113 "duplicate section title", 114 "unexpected section", 115 "unusual Xr order", 116 "unusual Xr punctuation", 117 "AUTHORS section without An macro", 118 119 /* related to macros and nesting */ 120 "obsolete macro", 121 "macro neither callable nor escaped", 122 "skipping paragraph macro", 123 "moving paragraph macro out of list", 124 "skipping no-space macro", 125 "blocks badly nested", 126 "nested displays are not portable", 127 "moving content out of list", 128 "fill mode already enabled, skipping", 129 "fill mode already disabled, skipping", 130 "line scope broken", 131 132 /* related to missing macro arguments */ 133 "skipping empty request", 134 "conditional request controls empty scope", 135 "skipping empty macro", 136 "empty block", 137 "empty argument, using 0n", 138 "missing display type, using -ragged", 139 "list type is not the first argument", 140 "missing -width in -tag list, using 6n", 141 "missing utility name, using \"\"", 142 "missing function name, using \"\"", 143 "empty head in list item", 144 "empty list item", 145 "missing font type, using \\fR", 146 "unknown font type, using \\fR", 147 "nothing follows prefix", 148 "empty reference block", 149 "missing section argument", 150 "missing -std argument, adding it", 151 "missing option string, using \"\"", 152 "missing resource identifier, using \"\"", 153 "missing eqn box, using \"\"", 154 155 /* related to bad macro arguments */ 156 "unterminated quoted argument", 157 "duplicate argument", 158 "skipping duplicate argument", 159 "skipping duplicate display type", 160 "skipping duplicate list type", 161 "skipping -width argument", 162 "wrong number of cells", 163 "unknown AT&T UNIX version", 164 "comma in function argument", 165 "parenthesis in function name", 166 "invalid content in Rs block", 167 "invalid Boolean argument", 168 "unknown font, skipping request", 169 "odd number of characters in request", 170 171 /* related to plain text */ 172 "blank line in fill mode, using .sp", 173 "tab in filled text", 174 "whitespace at end of input line", 175 "bad comment style", 176 "invalid escape sequence", 177 "undefined string, using \"\"", 178 179 /* related to tables */ 180 "tbl line starts with span", 181 "tbl column starts with span", 182 "skipping vertical bar in tbl layout", 183 184 "generic error", 185 186 /* related to tables */ 187 "non-alphabetic character in tbl options", 188 "skipping unknown tbl option", 189 "missing tbl option argument", 190 "wrong tbl option argument size", 191 "empty tbl layout", 192 "invalid character in tbl layout", 193 "unmatched parenthesis in tbl layout", 194 "tbl without any data cells", 195 "ignoring data in spanned tbl cell", 196 "ignoring extra tbl data cells", 197 "data block open at end of tbl", 198 199 /* related to document structure and macros */ 200 NULL, 201 "input stack limit exceeded, infinite loop?", 202 "skipping bad character", 203 "skipping unknown macro", 204 "skipping insecure request", 205 "skipping item outside list", 206 "skipping column outside column list", 207 "skipping end of block that is not open", 208 "fewer RS blocks open, skipping", 209 "inserting missing end of block", 210 "appending missing end of block", 211 212 /* related to request and macro arguments */ 213 "escaped character not allowed in a name", 214 "NOT IMPLEMENTED: Bd -file", 215 "skipping display without arguments", 216 "missing list type, using -item", 217 "missing manual name, using \"\"", 218 "uname(3) system call failed, using UNKNOWN", 219 "unknown standard specifier", 220 "skipping request without numeric argument", 221 "NOT IMPLEMENTED: .so with absolute path or \"..\"", 222 ".so request failed", 223 "skipping all arguments", 224 "skipping excess arguments", 225 "divide by zero", 226 227 "unsupported feature", 228 "input too large", 229 "unsupported control character", 230 "unsupported roff request", 231 "eqn delim option in tbl", 232 "unsupported tbl layout modifier", 233 "ignoring macro in table", 234 }; 235 236 static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 237 "SUCCESS", 238 "RESERVED", 239 "WARNING", 240 "ERROR", 241 "UNSUPP", 242 "BADARG", 243 "SYSERR" 244 }; 245 246 247 static void 248 resize_buf(struct buf *buf, size_t initial) 249 { 250 251 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 252 buf->buf = mandoc_realloc(buf->buf, buf->sz); 253 } 254 255 static void 256 choose_parser(struct mparse *curp) 257 { 258 char *cp, *ep; 259 int format; 260 261 /* 262 * If neither command line arguments -mdoc or -man select 263 * a parser nor the roff parser found a .Dd or .TH macro 264 * yet, look ahead in the main input buffer. 265 */ 266 267 if ((format = roff_getformat(curp->roff)) == 0) { 268 cp = curp->primary->buf; 269 ep = cp + curp->primary->sz; 270 while (cp < ep) { 271 if (*cp == '.' || *cp == '\'') { 272 cp++; 273 if (cp[0] == 'D' && cp[1] == 'd') { 274 format = MPARSE_MDOC; 275 break; 276 } 277 if (cp[0] == 'T' && cp[1] == 'H') { 278 format = MPARSE_MAN; 279 break; 280 } 281 } 282 cp = memchr(cp, '\n', ep - cp); 283 if (cp == NULL) 284 break; 285 cp++; 286 } 287 } 288 289 if (format == MPARSE_MDOC) { 290 mdoc_hash_init(); 291 curp->man->macroset = MACROSET_MDOC; 292 curp->man->first->tok = TOKEN_NONE; 293 } else { 294 man_hash_init(); 295 curp->man->macroset = MACROSET_MAN; 296 curp->man->first->tok = TOKEN_NONE; 297 } 298 } 299 300 /* 301 * Main parse routine for a buffer. 302 * It assumes encoding and line numbering are already set up. 303 * It can recurse directly (for invocations of user-defined 304 * macros, inline equations, and input line traps) 305 * and indirectly (for .so file inclusion). 306 */ 307 static void 308 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) 309 { 310 const struct tbl_span *span; 311 struct buf ln; 312 const char *save_file; 313 char *cp; 314 size_t pos; /* byte number in the ln buffer */ 315 size_t j; /* auxiliary byte number in the blk buffer */ 316 enum rofferr rr; 317 int of; 318 int lnn; /* line number in the real file */ 319 int fd; 320 unsigned char c; 321 322 memset(&ln, 0, sizeof(ln)); 323 324 lnn = curp->line; 325 pos = 0; 326 327 while (i < blk.sz) { 328 if (0 == pos && '\0' == blk.buf[i]) 329 break; 330 331 if (start) { 332 curp->line = lnn; 333 curp->reparse_count = 0; 334 335 if (lnn < 3 && 336 curp->filenc & MPARSE_UTF8 && 337 curp->filenc & MPARSE_LATIN1) 338 curp->filenc = preconv_cue(&blk, i); 339 } 340 341 while (i < blk.sz && (start || blk.buf[i] != '\0')) { 342 343 /* 344 * When finding an unescaped newline character, 345 * leave the character loop to process the line. 346 * Skip a preceding carriage return, if any. 347 */ 348 349 if ('\r' == blk.buf[i] && i + 1 < blk.sz && 350 '\n' == blk.buf[i + 1]) 351 ++i; 352 if ('\n' == blk.buf[i]) { 353 ++i; 354 ++lnn; 355 break; 356 } 357 358 /* 359 * Make sure we have space for the worst 360 * case of 11 bytes: "\\[u10ffff]\0" 361 */ 362 363 if (pos + 11 > ln.sz) 364 resize_buf(&ln, 256); 365 366 /* 367 * Encode 8-bit input. 368 */ 369 370 c = blk.buf[i]; 371 if (c & 0x80) { 372 if ( ! (curp->filenc && preconv_encode( 373 &blk, &i, &ln, &pos, &curp->filenc))) { 374 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 375 curp->line, pos, "0x%x", c); 376 ln.buf[pos++] = '?'; 377 i++; 378 } 379 continue; 380 } 381 382 /* 383 * Exclude control characters. 384 */ 385 386 if (c == 0x7f || (c < 0x20 && c != 0x09)) { 387 mandoc_vmsg(c == 0x00 || c == 0x04 || 388 c > 0x0a ? MANDOCERR_CHAR_BAD : 389 MANDOCERR_CHAR_UNSUPP, 390 curp, curp->line, pos, "0x%x", c); 391 i++; 392 if (c != '\r') 393 ln.buf[pos++] = '?'; 394 continue; 395 } 396 397 /* Trailing backslash = a plain char. */ 398 399 if (blk.buf[i] != '\\' || i + 1 == blk.sz) { 400 ln.buf[pos++] = blk.buf[i++]; 401 continue; 402 } 403 404 /* 405 * Found escape and at least one other character. 406 * When it's a newline character, skip it. 407 * When there is a carriage return in between, 408 * skip that one as well. 409 */ 410 411 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && 412 '\n' == blk.buf[i + 2]) 413 ++i; 414 if ('\n' == blk.buf[i + 1]) { 415 i += 2; 416 ++lnn; 417 continue; 418 } 419 420 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { 421 j = i; 422 i += 2; 423 /* Comment, skip to end of line */ 424 for (; i < blk.sz; ++i) { 425 if (blk.buf[i] != '\n') 426 continue; 427 if (blk.buf[i - 1] == ' ' || 428 blk.buf[i - 1] == '\t') 429 mandoc_msg( 430 MANDOCERR_SPACE_EOL, 431 curp, curp->line, 432 pos + i-1 - j, NULL); 433 ++i; 434 ++lnn; 435 break; 436 } 437 438 /* Backout trailing whitespaces */ 439 for (; pos > 0; --pos) { 440 if (ln.buf[pos - 1] != ' ') 441 break; 442 if (pos > 2 && ln.buf[pos - 2] == '\\') 443 break; 444 } 445 break; 446 } 447 448 /* Catch escaped bogus characters. */ 449 450 c = (unsigned char) blk.buf[i+1]; 451 452 if ( ! (isascii(c) && 453 (isgraph(c) || isblank(c)))) { 454 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 455 curp->line, pos, "0x%x", c); 456 i += 2; 457 ln.buf[pos++] = '?'; 458 continue; 459 } 460 461 /* Some other escape sequence, copy & cont. */ 462 463 ln.buf[pos++] = blk.buf[i++]; 464 ln.buf[pos++] = blk.buf[i++]; 465 } 466 467 if (pos >= ln.sz) 468 resize_buf(&ln, 256); 469 470 ln.buf[pos] = '\0'; 471 472 /* 473 * A significant amount of complexity is contained by 474 * the roff preprocessor. It's line-oriented but can be 475 * expressed on one line, so we need at times to 476 * readjust our starting point and re-run it. The roff 477 * preprocessor can also readjust the buffers with new 478 * data, so we pass them in wholesale. 479 */ 480 481 of = 0; 482 483 /* 484 * Maintain a lookaside buffer of all parsed lines. We 485 * only do this if mparse_keep() has been invoked (the 486 * buffer may be accessed with mparse_getkeep()). 487 */ 488 489 if (curp->secondary) { 490 curp->secondary->buf = mandoc_realloc( 491 curp->secondary->buf, 492 curp->secondary->sz + pos + 2); 493 memcpy(curp->secondary->buf + 494 curp->secondary->sz, 495 ln.buf, pos); 496 curp->secondary->sz += pos; 497 curp->secondary->buf 498 [curp->secondary->sz] = '\n'; 499 curp->secondary->sz++; 500 curp->secondary->buf 501 [curp->secondary->sz] = '\0'; 502 } 503 rerun: 504 rr = roff_parseln(curp->roff, curp->line, &ln, &of); 505 506 switch (rr) { 507 case ROFF_REPARSE: 508 if (REPARSE_LIMIT >= ++curp->reparse_count) 509 mparse_buf_r(curp, ln, of, 0); 510 else 511 mandoc_msg(MANDOCERR_ROFFLOOP, curp, 512 curp->line, pos, NULL); 513 pos = 0; 514 continue; 515 case ROFF_APPEND: 516 pos = strlen(ln.buf); 517 continue; 518 case ROFF_RERUN: 519 goto rerun; 520 case ROFF_IGN: 521 pos = 0; 522 continue; 523 case ROFF_SO: 524 if ( ! (curp->options & MPARSE_SO) && 525 (i >= blk.sz || blk.buf[i] == '\0')) { 526 curp->sodest = mandoc_strdup(ln.buf + of); 527 free(ln.buf); 528 return; 529 } 530 /* 531 * We remove `so' clauses from our lookaside 532 * buffer because we're going to descend into 533 * the file recursively. 534 */ 535 if (curp->secondary) 536 curp->secondary->sz -= pos + 1; 537 save_file = curp->file; 538 if ((fd = mparse_open(curp, ln.buf + of)) != -1) { 539 mparse_readfd(curp, fd, ln.buf + of); 540 close(fd); 541 curp->file = save_file; 542 } else { 543 curp->file = save_file; 544 mandoc_vmsg(MANDOCERR_SO_FAIL, 545 curp, curp->line, pos, 546 ".so %s", ln.buf + of); 547 ln.sz = mandoc_asprintf(&cp, 548 ".sp\nSee the file %s.\n.sp", 549 ln.buf + of); 550 free(ln.buf); 551 ln.buf = cp; 552 of = 0; 553 mparse_buf_r(curp, ln, of, 0); 554 } 555 pos = 0; 556 continue; 557 default: 558 break; 559 } 560 561 if (curp->man->macroset == MACROSET_NONE) 562 choose_parser(curp); 563 564 /* 565 * Lastly, push down into the parsers themselves. 566 * If libroff returns ROFF_TBL, then add it to the 567 * currently open parse. Since we only get here if 568 * there does exist data (see tbl_data.c), we're 569 * guaranteed that something's been allocated. 570 * Do the same for ROFF_EQN. 571 */ 572 573 if (rr == ROFF_TBL) 574 while ((span = roff_span(curp->roff)) != NULL) 575 roff_addtbl(curp->man, span); 576 else if (rr == ROFF_EQN) 577 roff_addeqn(curp->man, roff_eqn(curp->roff)); 578 else if ((curp->man->macroset == MACROSET_MDOC ? 579 mdoc_parseln(curp->man, curp->line, ln.buf, of) : 580 man_parseln(curp->man, curp->line, ln.buf, of)) == 2) 581 break; 582 583 /* Temporary buffers typically are not full. */ 584 585 if (0 == start && '\0' == blk.buf[i]) 586 break; 587 588 /* Start the next input line. */ 589 590 pos = 0; 591 } 592 593 free(ln.buf); 594 } 595 596 static int 597 read_whole_file(struct mparse *curp, const char *file, int fd, 598 struct buf *fb, int *with_mmap) 599 { 600 struct stat st; 601 gzFile gz; 602 size_t off; 603 ssize_t ssz; 604 605 if (fstat(fd, &st) == -1) 606 err((int)MANDOCLEVEL_SYSERR, "%s", file); 607 608 /* 609 * If we're a regular file, try just reading in the whole entry 610 * via mmap(). This is faster than reading it into blocks, and 611 * since each file is only a few bytes to begin with, I'm not 612 * concerned that this is going to tank any machines. 613 */ 614 615 if (curp->gzip == 0 && S_ISREG(st.st_mode)) { 616 if (st.st_size > 0x7fffffff) { 617 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); 618 return 0; 619 } 620 *with_mmap = 1; 621 fb->sz = (size_t)st.st_size; 622 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); 623 if (fb->buf != MAP_FAILED) 624 return 1; 625 } 626 627 if (curp->gzip) { 628 if ((gz = gzdopen(fd, "rb")) == NULL) 629 err((int)MANDOCLEVEL_SYSERR, "%s", file); 630 } else 631 gz = NULL; 632 633 /* 634 * If this isn't a regular file (like, say, stdin), then we must 635 * go the old way and just read things in bit by bit. 636 */ 637 638 *with_mmap = 0; 639 off = 0; 640 fb->sz = 0; 641 fb->buf = NULL; 642 for (;;) { 643 if (off == fb->sz) { 644 if (fb->sz == (1U << 31)) { 645 mandoc_msg(MANDOCERR_TOOLARGE, curp, 646 0, 0, NULL); 647 break; 648 } 649 resize_buf(fb, 65536); 650 } 651 ssz = curp->gzip ? 652 gzread(gz, fb->buf + (int)off, fb->sz - off) : 653 read(fd, fb->buf + (int)off, fb->sz - off); 654 if (ssz == 0) { 655 fb->sz = off; 656 return 1; 657 } 658 if (ssz == -1) 659 err((int)MANDOCLEVEL_SYSERR, "%s", file); 660 off += (size_t)ssz; 661 } 662 663 free(fb->buf); 664 fb->buf = NULL; 665 return 0; 666 } 667 668 static void 669 mparse_end(struct mparse *curp) 670 { 671 if (curp->man->macroset == MACROSET_NONE) 672 curp->man->macroset = MACROSET_MAN; 673 if (curp->man->macroset == MACROSET_MDOC) 674 mdoc_endparse(curp->man); 675 else 676 man_endparse(curp->man); 677 roff_endparse(curp->roff); 678 } 679 680 static void 681 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) 682 { 683 struct buf *svprimary; 684 const char *svfile; 685 size_t offset; 686 static int recursion_depth; 687 688 if (64 < recursion_depth) { 689 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); 690 return; 691 } 692 693 /* Line number is per-file. */ 694 svfile = curp->file; 695 curp->file = file; 696 svprimary = curp->primary; 697 curp->primary = &blk; 698 curp->line = 1; 699 recursion_depth++; 700 701 /* Skip an UTF-8 byte order mark. */ 702 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && 703 (unsigned char)blk.buf[0] == 0xef && 704 (unsigned char)blk.buf[1] == 0xbb && 705 (unsigned char)blk.buf[2] == 0xbf) { 706 offset = 3; 707 curp->filenc &= ~MPARSE_LATIN1; 708 } else 709 offset = 0; 710 711 mparse_buf_r(curp, blk, offset, 1); 712 713 if (--recursion_depth == 0) 714 mparse_end(curp); 715 716 curp->primary = svprimary; 717 curp->file = svfile; 718 } 719 720 /* 721 * Read the whole file into memory and call the parsers. 722 * Called recursively when an .so request is encountered. 723 */ 724 enum mandoclevel 725 mparse_readfd(struct mparse *curp, int fd, const char *file) 726 { 727 struct buf blk; 728 int with_mmap; 729 int save_filenc; 730 731 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { 732 save_filenc = curp->filenc; 733 curp->filenc = curp->options & 734 (MPARSE_UTF8 | MPARSE_LATIN1); 735 mparse_parse_buffer(curp, blk, file); 736 curp->filenc = save_filenc; 737 if (with_mmap) 738 munmap(blk.buf, blk.sz); 739 else 740 free(blk.buf); 741 } 742 return curp->file_status; 743 } 744 745 int 746 mparse_open(struct mparse *curp, const char *file) 747 { 748 char *cp; 749 int fd; 750 751 curp->file = file; 752 cp = strrchr(file, '.'); 753 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); 754 755 /* First try to use the filename as it is. */ 756 757 if ((fd = open(file, O_RDONLY)) != -1) 758 return fd; 759 760 /* 761 * If that doesn't work and the filename doesn't 762 * already end in .gz, try appending .gz. 763 */ 764 765 if ( ! curp->gzip) { 766 mandoc_asprintf(&cp, "%s.gz", file); 767 fd = open(cp, O_RDONLY); 768 free(cp); 769 if (fd != -1) { 770 curp->gzip = 1; 771 return fd; 772 } 773 } 774 775 /* Neither worked, give up. */ 776 777 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); 778 return -1; 779 } 780 781 struct mparse * 782 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, 783 const char *defos) 784 { 785 struct mparse *curp; 786 787 curp = mandoc_calloc(1, sizeof(struct mparse)); 788 789 curp->options = options; 790 curp->wlevel = wlevel; 791 curp->mmsg = mmsg; 792 curp->defos = defos; 793 794 curp->roff = roff_alloc(curp, options); 795 curp->man = roff_man_alloc( curp->roff, curp, curp->defos, 796 curp->options & MPARSE_QUICK ? 1 : 0); 797 if (curp->options & MPARSE_MDOC) { 798 mdoc_hash_init(); 799 curp->man->macroset = MACROSET_MDOC; 800 } else if (curp->options & MPARSE_MAN) { 801 man_hash_init(); 802 curp->man->macroset = MACROSET_MAN; 803 } 804 curp->man->first->tok = TOKEN_NONE; 805 return curp; 806 } 807 808 void 809 mparse_reset(struct mparse *curp) 810 { 811 roff_reset(curp->roff); 812 roff_man_reset(curp->man); 813 if (curp->secondary) 814 curp->secondary->sz = 0; 815 816 curp->file_status = MANDOCLEVEL_OK; 817 818 free(curp->sodest); 819 curp->sodest = NULL; 820 } 821 822 void 823 mparse_free(struct mparse *curp) 824 { 825 826 roff_man_free(curp->man); 827 if (curp->roff) 828 roff_free(curp->roff); 829 if (curp->secondary) 830 free(curp->secondary->buf); 831 832 free(curp->secondary); 833 free(curp->sodest); 834 free(curp); 835 } 836 837 void 838 mparse_result(struct mparse *curp, struct roff_man **man, 839 char **sodest) 840 { 841 842 if (sodest && NULL != (*sodest = curp->sodest)) { 843 *man = NULL; 844 return; 845 } 846 if (man) 847 *man = curp->man; 848 } 849 850 void 851 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) 852 { 853 if (curp->file_status > *rc) 854 *rc = curp->file_status; 855 } 856 857 void 858 mandoc_vmsg(enum mandocerr t, struct mparse *m, 859 int ln, int pos, const char *fmt, ...) 860 { 861 char buf[256]; 862 va_list ap; 863 864 va_start(ap, fmt); 865 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 866 va_end(ap); 867 868 mandoc_msg(t, m, ln, pos, buf); 869 } 870 871 void 872 mandoc_msg(enum mandocerr er, struct mparse *m, 873 int ln, int col, const char *msg) 874 { 875 enum mandoclevel level; 876 877 level = MANDOCLEVEL_UNSUPP; 878 while (er < mandoclimits[level]) 879 level--; 880 881 if (level < m->wlevel && er != MANDOCERR_FILE) 882 return; 883 884 if (m->mmsg) 885 (*m->mmsg)(er, level, m->file, ln, col, msg); 886 887 if (m->file_status < level) 888 m->file_status = level; 889 } 890 891 const char * 892 mparse_strerror(enum mandocerr er) 893 { 894 895 return mandocerrs[er]; 896 } 897 898 const char * 899 mparse_strlevel(enum mandoclevel lvl) 900 { 901 return mandoclevels[lvl]; 902 } 903 904 void 905 mparse_keep(struct mparse *p) 906 { 907 908 assert(NULL == p->secondary); 909 p->secondary = mandoc_calloc(1, sizeof(struct buf)); 910 } 911 912 const char * 913 mparse_getkeep(const struct mparse *p) 914 { 915 916 assert(p->secondary); 917 return p->secondary->sz ? p->secondary->buf : NULL; 918 } 919