1 /* $OpenBSD: read.c,v 1.167 2018/03/16 15:05:33 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 #include <sys/mman.h> 21 #include <sys/stat.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <stdarg.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <unistd.h> 32 #include <zlib.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "man.h" 39 #include "libmandoc.h" 40 41 #define REPARSE_LIMIT 1000 42 43 struct mparse { 44 struct roff *roff; /* roff parser (!NULL) */ 45 struct roff_man *man; /* man parser */ 46 char *sodest; /* filename pointed to by .so */ 47 const char *file; /* filename of current input file */ 48 struct buf *primary; /* buffer currently being parsed */ 49 struct buf *secondary; /* preprocessed copy of input */ 50 const char *os_s; /* default operating system */ 51 mandocmsg mmsg; /* warning/error message handler */ 52 enum mandoclevel file_status; /* status of current parse */ 53 enum mandocerr mmin; /* ignore messages below this */ 54 int options; /* parser options */ 55 int gzip; /* current input file is gzipped */ 56 int filenc; /* encoding of the current file */ 57 int reparse_count; /* finite interp. stack */ 58 int line; /* line number in the file */ 59 }; 60 61 static void choose_parser(struct mparse *); 62 static void resize_buf(struct buf *, size_t); 63 static int mparse_buf_r(struct mparse *, struct buf, size_t, int); 64 static int read_whole_file(struct mparse *, const char *, int, 65 struct buf *, int *); 66 static void mparse_end(struct mparse *); 67 static void mparse_parse_buffer(struct mparse *, struct buf, 68 const char *); 69 70 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 71 MANDOCERR_OK, 72 MANDOCERR_OK, 73 MANDOCERR_WARNING, 74 MANDOCERR_ERROR, 75 MANDOCERR_UNSUPP, 76 MANDOCERR_MAX, 77 MANDOCERR_MAX 78 }; 79 80 static const char * const mandocerrs[MANDOCERR_MAX] = { 81 "ok", 82 83 "base system convention", 84 85 "Mdocdate found", 86 "Mdocdate missing", 87 "unknown architecture", 88 "operating system explicitly specified", 89 "RCS id missing", 90 "referenced manual not found", 91 92 "generic style suggestion", 93 94 "legacy man(7) date format", 95 "lower case character in document title", 96 "duplicate RCS id", 97 "possible typo in section name", 98 "unterminated quoted argument", 99 "useless macro", 100 "consider using OS macro", 101 "errnos out of order", 102 "duplicate errno", 103 "trailing delimiter", 104 "no blank before trailing delimiter", 105 "fill mode already enabled, skipping", 106 "fill mode already disabled, skipping", 107 "verbatim \"--\", maybe consider using \\(em", 108 "function name without markup", 109 "whitespace at end of input line", 110 "bad comment style", 111 112 "generic warning", 113 114 /* related to the prologue */ 115 "missing manual title, using UNTITLED", 116 "missing manual title, using \"\"", 117 "missing manual section, using \"\"", 118 "unknown manual section", 119 "missing date, using today's date", 120 "cannot parse date, using it verbatim", 121 "date in the future, using it anyway", 122 "missing Os macro, using \"\"", 123 "late prologue macro", 124 "prologue macros out of order", 125 126 /* related to document structure */ 127 ".so is fragile, better use ln(1)", 128 "no document body", 129 "content before first section header", 130 "first section is not \"NAME\"", 131 "NAME section without Nm before Nd", 132 "NAME section without description", 133 "description not at the end of NAME", 134 "bad NAME section content", 135 "missing comma before name", 136 "missing description line, using \"\"", 137 "description line outside NAME section", 138 "sections out of conventional order", 139 "duplicate section title", 140 "unexpected section", 141 "cross reference to self", 142 "unusual Xr order", 143 "unusual Xr punctuation", 144 "AUTHORS section without An macro", 145 146 /* related to macros and nesting */ 147 "obsolete macro", 148 "macro neither callable nor escaped", 149 "skipping paragraph macro", 150 "moving paragraph macro out of list", 151 "skipping no-space macro", 152 "blocks badly nested", 153 "nested displays are not portable", 154 "moving content out of list", 155 "first macro on line", 156 "line scope broken", 157 "skipping blank line in line scope", 158 159 /* related to missing macro arguments */ 160 "skipping empty request", 161 "conditional request controls empty scope", 162 "skipping empty macro", 163 "empty block", 164 "empty argument, using 0n", 165 "missing display type, using -ragged", 166 "list type is not the first argument", 167 "missing -width in -tag list, using 6n", 168 "missing utility name, using \"\"", 169 "missing function name, using \"\"", 170 "empty head in list item", 171 "empty list item", 172 "missing argument, using next line", 173 "missing font type, using \\fR", 174 "unknown font type, using \\fR", 175 "nothing follows prefix", 176 "empty reference block", 177 "missing section argument", 178 "missing -std argument, adding it", 179 "missing option string, using \"\"", 180 "missing resource identifier, using \"\"", 181 "missing eqn box, using \"\"", 182 183 /* related to bad macro arguments */ 184 "duplicate argument", 185 "skipping duplicate argument", 186 "skipping duplicate display type", 187 "skipping duplicate list type", 188 "skipping -width argument", 189 "wrong number of cells", 190 "unknown AT&T UNIX version", 191 "comma in function argument", 192 "parenthesis in function name", 193 "unknown library name", 194 "invalid content in Rs block", 195 "invalid Boolean argument", 196 "unknown font, skipping request", 197 "odd number of characters in request", 198 199 /* related to plain text */ 200 "blank line in fill mode, using .sp", 201 "tab in filled text", 202 "new sentence, new line", 203 "invalid escape sequence", 204 "undefined string, using \"\"", 205 206 /* related to tables */ 207 "tbl line starts with span", 208 "tbl column starts with span", 209 "skipping vertical bar in tbl layout", 210 211 "generic error", 212 213 /* related to tables */ 214 "non-alphabetic character in tbl options", 215 "skipping unknown tbl option", 216 "missing tbl option argument", 217 "wrong tbl option argument size", 218 "empty tbl layout", 219 "invalid character in tbl layout", 220 "unmatched parenthesis in tbl layout", 221 "tbl without any data cells", 222 "ignoring data in spanned tbl cell", 223 "ignoring extra tbl data cells", 224 "data block open at end of tbl", 225 226 /* related to document structure and macros */ 227 NULL, 228 "duplicate prologue macro", 229 "skipping late title macro", 230 "input stack limit exceeded, infinite loop?", 231 "skipping bad character", 232 "skipping unknown macro", 233 "skipping insecure request", 234 "skipping item outside list", 235 "skipping column outside column list", 236 "skipping end of block that is not open", 237 "fewer RS blocks open, skipping", 238 "inserting missing end of block", 239 "appending missing end of block", 240 241 /* related to request and macro arguments */ 242 "escaped character not allowed in a name", 243 "NOT IMPLEMENTED: Bd -file", 244 "skipping display without arguments", 245 "missing list type, using -item", 246 "argument is not numeric, using 1", 247 "missing manual name, using \"\"", 248 "uname(3) system call failed, using UNKNOWN", 249 "unknown standard specifier", 250 "skipping request without numeric argument", 251 "NOT IMPLEMENTED: .so with absolute path or \"..\"", 252 ".so request failed", 253 "skipping all arguments", 254 "skipping excess arguments", 255 "divide by zero", 256 257 "unsupported feature", 258 "input too large", 259 "unsupported control character", 260 "unsupported roff request", 261 "eqn delim option in tbl", 262 "unsupported tbl layout modifier", 263 "ignoring macro in table", 264 }; 265 266 static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 267 "SUCCESS", 268 "STYLE", 269 "WARNING", 270 "ERROR", 271 "UNSUPP", 272 "BADARG", 273 "SYSERR" 274 }; 275 276 277 static void 278 resize_buf(struct buf *buf, size_t initial) 279 { 280 281 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 282 buf->buf = mandoc_realloc(buf->buf, buf->sz); 283 } 284 285 static void 286 choose_parser(struct mparse *curp) 287 { 288 char *cp, *ep; 289 int format; 290 291 /* 292 * If neither command line arguments -mdoc or -man select 293 * a parser nor the roff parser found a .Dd or .TH macro 294 * yet, look ahead in the main input buffer. 295 */ 296 297 if ((format = roff_getformat(curp->roff)) == 0) { 298 cp = curp->primary->buf; 299 ep = cp + curp->primary->sz; 300 while (cp < ep) { 301 if (*cp == '.' || *cp == '\'') { 302 cp++; 303 if (cp[0] == 'D' && cp[1] == 'd') { 304 format = MPARSE_MDOC; 305 break; 306 } 307 if (cp[0] == 'T' && cp[1] == 'H') { 308 format = MPARSE_MAN; 309 break; 310 } 311 } 312 cp = memchr(cp, '\n', ep - cp); 313 if (cp == NULL) 314 break; 315 cp++; 316 } 317 } 318 319 if (format == MPARSE_MDOC) { 320 curp->man->macroset = MACROSET_MDOC; 321 if (curp->man->mdocmac == NULL) 322 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); 323 } else { 324 curp->man->macroset = MACROSET_MAN; 325 if (curp->man->manmac == NULL) 326 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); 327 } 328 curp->man->first->tok = TOKEN_NONE; 329 } 330 331 /* 332 * Main parse routine for a buffer. 333 * It assumes encoding and line numbering are already set up. 334 * It can recurse directly (for invocations of user-defined 335 * macros, inline equations, and input line traps) 336 * and indirectly (for .so file inclusion). 337 */ 338 static int 339 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) 340 { 341 struct buf ln; 342 const char *save_file; 343 char *cp; 344 size_t pos; /* byte number in the ln buffer */ 345 enum rofferr rr; 346 int of; 347 int lnn; /* line number in the real file */ 348 int fd; 349 unsigned char c; 350 351 memset(&ln, 0, sizeof(ln)); 352 353 lnn = curp->line; 354 pos = 0; 355 356 while (i < blk.sz) { 357 if (0 == pos && '\0' == blk.buf[i]) 358 break; 359 360 if (start) { 361 curp->line = lnn; 362 curp->reparse_count = 0; 363 364 if (lnn < 3 && 365 curp->filenc & MPARSE_UTF8 && 366 curp->filenc & MPARSE_LATIN1) 367 curp->filenc = preconv_cue(&blk, i); 368 } 369 370 while (i < blk.sz && (start || blk.buf[i] != '\0')) { 371 372 /* 373 * When finding an unescaped newline character, 374 * leave the character loop to process the line. 375 * Skip a preceding carriage return, if any. 376 */ 377 378 if ('\r' == blk.buf[i] && i + 1 < blk.sz && 379 '\n' == blk.buf[i + 1]) 380 ++i; 381 if ('\n' == blk.buf[i]) { 382 ++i; 383 ++lnn; 384 break; 385 } 386 387 /* 388 * Make sure we have space for the worst 389 * case of 11 bytes: "\\[u10ffff]\0" 390 */ 391 392 if (pos + 11 > ln.sz) 393 resize_buf(&ln, 256); 394 395 /* 396 * Encode 8-bit input. 397 */ 398 399 c = blk.buf[i]; 400 if (c & 0x80) { 401 if ( ! (curp->filenc && preconv_encode( 402 &blk, &i, &ln, &pos, &curp->filenc))) { 403 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 404 curp->line, pos, "0x%x", c); 405 ln.buf[pos++] = '?'; 406 i++; 407 } 408 continue; 409 } 410 411 /* 412 * Exclude control characters. 413 */ 414 415 if (c == 0x7f || (c < 0x20 && c != 0x09)) { 416 mandoc_vmsg(c == 0x00 || c == 0x04 || 417 c > 0x0a ? MANDOCERR_CHAR_BAD : 418 MANDOCERR_CHAR_UNSUPP, 419 curp, curp->line, pos, "0x%x", c); 420 i++; 421 if (c != '\r') 422 ln.buf[pos++] = '?'; 423 continue; 424 } 425 426 ln.buf[pos++] = blk.buf[i++]; 427 } 428 429 if (pos + 1 >= ln.sz) 430 resize_buf(&ln, 256); 431 432 if (i == blk.sz || blk.buf[i] == '\0') 433 ln.buf[pos++] = '\n'; 434 ln.buf[pos] = '\0'; 435 436 /* 437 * A significant amount of complexity is contained by 438 * the roff preprocessor. It's line-oriented but can be 439 * expressed on one line, so we need at times to 440 * readjust our starting point and re-run it. The roff 441 * preprocessor can also readjust the buffers with new 442 * data, so we pass them in wholesale. 443 */ 444 445 of = 0; 446 447 /* 448 * Maintain a lookaside buffer of all parsed lines. We 449 * only do this if mparse_keep() has been invoked (the 450 * buffer may be accessed with mparse_getkeep()). 451 */ 452 453 if (curp->secondary) { 454 curp->secondary->buf = mandoc_realloc( 455 curp->secondary->buf, 456 curp->secondary->sz + pos + 2); 457 memcpy(curp->secondary->buf + 458 curp->secondary->sz, 459 ln.buf, pos); 460 curp->secondary->sz += pos; 461 curp->secondary->buf 462 [curp->secondary->sz] = '\n'; 463 curp->secondary->sz++; 464 curp->secondary->buf 465 [curp->secondary->sz] = '\0'; 466 } 467 rerun: 468 rr = roff_parseln(curp->roff, curp->line, &ln, &of); 469 470 switch (rr) { 471 case ROFF_REPARSE: 472 if (++curp->reparse_count > REPARSE_LIMIT) 473 mandoc_msg(MANDOCERR_ROFFLOOP, curp, 474 curp->line, pos, NULL); 475 else if (mparse_buf_r(curp, ln, of, 0) == 1 || 476 start == 1) { 477 pos = 0; 478 continue; 479 } 480 free(ln.buf); 481 return 0; 482 case ROFF_APPEND: 483 pos = strlen(ln.buf); 484 continue; 485 case ROFF_RERUN: 486 goto rerun; 487 case ROFF_IGN: 488 pos = 0; 489 continue; 490 case ROFF_SO: 491 if ( ! (curp->options & MPARSE_SO) && 492 (i >= blk.sz || blk.buf[i] == '\0')) { 493 curp->sodest = mandoc_strdup(ln.buf + of); 494 free(ln.buf); 495 return 1; 496 } 497 /* 498 * We remove `so' clauses from our lookaside 499 * buffer because we're going to descend into 500 * the file recursively. 501 */ 502 if (curp->secondary) 503 curp->secondary->sz -= pos + 1; 504 save_file = curp->file; 505 if ((fd = mparse_open(curp, ln.buf + of)) != -1) { 506 mparse_readfd(curp, fd, ln.buf + of); 507 close(fd); 508 curp->file = save_file; 509 } else { 510 curp->file = save_file; 511 mandoc_vmsg(MANDOCERR_SO_FAIL, 512 curp, curp->line, pos, 513 ".so %s", ln.buf + of); 514 ln.sz = mandoc_asprintf(&cp, 515 ".sp\nSee the file %s.\n.sp", 516 ln.buf + of); 517 free(ln.buf); 518 ln.buf = cp; 519 of = 0; 520 mparse_buf_r(curp, ln, of, 0); 521 } 522 pos = 0; 523 continue; 524 default: 525 break; 526 } 527 528 if (curp->man->macroset == MACROSET_NONE) 529 choose_parser(curp); 530 531 if ((curp->man->macroset == MACROSET_MDOC ? 532 mdoc_parseln(curp->man, curp->line, ln.buf, of) : 533 man_parseln(curp->man, curp->line, ln.buf, of)) == 2) 534 break; 535 536 /* Temporary buffers typically are not full. */ 537 538 if (0 == start && '\0' == blk.buf[i]) 539 break; 540 541 /* Start the next input line. */ 542 543 pos = 0; 544 } 545 546 free(ln.buf); 547 return 1; 548 } 549 550 static int 551 read_whole_file(struct mparse *curp, const char *file, int fd, 552 struct buf *fb, int *with_mmap) 553 { 554 struct stat st; 555 gzFile gz; 556 size_t off; 557 ssize_t ssz; 558 int gzerrnum, retval; 559 560 if (fstat(fd, &st) == -1) { 561 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 562 "fstat: %s", strerror(errno)); 563 return 0; 564 } 565 566 /* 567 * If we're a regular file, try just reading in the whole entry 568 * via mmap(). This is faster than reading it into blocks, and 569 * since each file is only a few bytes to begin with, I'm not 570 * concerned that this is going to tank any machines. 571 */ 572 573 if (curp->gzip == 0 && S_ISREG(st.st_mode)) { 574 if (st.st_size > 0x7fffffff) { 575 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); 576 return 0; 577 } 578 *with_mmap = 1; 579 fb->sz = (size_t)st.st_size; 580 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); 581 if (fb->buf != MAP_FAILED) 582 return 1; 583 } 584 585 if (curp->gzip) { 586 /* 587 * Duplicating the file descriptor is required 588 * because we will have to call gzclose(3) 589 * to free memory used internally by zlib, 590 * but that will also close the file descriptor, 591 * which this function must not do. 592 */ 593 if ((fd = dup(fd)) == -1) { 594 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 595 "dup: %s", strerror(errno)); 596 return 0; 597 } 598 if ((gz = gzdopen(fd, "rb")) == NULL) { 599 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 600 "gzdopen: %s", strerror(errno)); 601 close(fd); 602 return 0; 603 } 604 } else 605 gz = NULL; 606 607 /* 608 * If this isn't a regular file (like, say, stdin), then we must 609 * go the old way and just read things in bit by bit. 610 */ 611 612 *with_mmap = 0; 613 off = 0; 614 retval = 0; 615 fb->sz = 0; 616 fb->buf = NULL; 617 for (;;) { 618 if (off == fb->sz) { 619 if (fb->sz == (1U << 31)) { 620 mandoc_msg(MANDOCERR_TOOLARGE, curp, 621 0, 0, NULL); 622 break; 623 } 624 resize_buf(fb, 65536); 625 } 626 ssz = curp->gzip ? 627 gzread(gz, fb->buf + (int)off, fb->sz - off) : 628 read(fd, fb->buf + (int)off, fb->sz - off); 629 if (ssz == 0) { 630 fb->sz = off; 631 retval = 1; 632 break; 633 } 634 if (ssz == -1) { 635 if (curp->gzip) 636 (void)gzerror(gz, &gzerrnum); 637 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s", 638 curp->gzip && gzerrnum != Z_ERRNO ? 639 zError(gzerrnum) : strerror(errno)); 640 break; 641 } 642 off += (size_t)ssz; 643 } 644 645 if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) 646 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s", 647 gzerrnum == Z_ERRNO ? strerror(errno) : 648 zError(gzerrnum)); 649 if (retval == 0) { 650 free(fb->buf); 651 fb->buf = NULL; 652 } 653 return retval; 654 } 655 656 static void 657 mparse_end(struct mparse *curp) 658 { 659 if (curp->man->macroset == MACROSET_NONE) 660 curp->man->macroset = MACROSET_MAN; 661 if (curp->man->macroset == MACROSET_MDOC) 662 mdoc_endparse(curp->man); 663 else 664 man_endparse(curp->man); 665 roff_endparse(curp->roff); 666 } 667 668 static void 669 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) 670 { 671 struct buf *svprimary; 672 const char *svfile; 673 size_t offset; 674 static int recursion_depth; 675 676 if (64 < recursion_depth) { 677 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); 678 return; 679 } 680 681 /* Line number is per-file. */ 682 svfile = curp->file; 683 curp->file = file; 684 svprimary = curp->primary; 685 curp->primary = &blk; 686 curp->line = 1; 687 recursion_depth++; 688 689 /* Skip an UTF-8 byte order mark. */ 690 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && 691 (unsigned char)blk.buf[0] == 0xef && 692 (unsigned char)blk.buf[1] == 0xbb && 693 (unsigned char)blk.buf[2] == 0xbf) { 694 offset = 3; 695 curp->filenc &= ~MPARSE_LATIN1; 696 } else 697 offset = 0; 698 699 mparse_buf_r(curp, blk, offset, 1); 700 701 if (--recursion_depth == 0) 702 mparse_end(curp); 703 704 curp->primary = svprimary; 705 curp->file = svfile; 706 } 707 708 /* 709 * Read the whole file into memory and call the parsers. 710 * Called recursively when an .so request is encountered. 711 */ 712 enum mandoclevel 713 mparse_readfd(struct mparse *curp, int fd, const char *file) 714 { 715 struct buf blk; 716 int with_mmap; 717 int save_filenc; 718 719 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { 720 save_filenc = curp->filenc; 721 curp->filenc = curp->options & 722 (MPARSE_UTF8 | MPARSE_LATIN1); 723 mparse_parse_buffer(curp, blk, file); 724 curp->filenc = save_filenc; 725 if (with_mmap) 726 munmap(blk.buf, blk.sz); 727 else 728 free(blk.buf); 729 } 730 return curp->file_status; 731 } 732 733 int 734 mparse_open(struct mparse *curp, const char *file) 735 { 736 char *cp; 737 int fd; 738 739 curp->file = file; 740 cp = strrchr(file, '.'); 741 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); 742 743 /* First try to use the filename as it is. */ 744 745 if ((fd = open(file, O_RDONLY)) != -1) 746 return fd; 747 748 /* 749 * If that doesn't work and the filename doesn't 750 * already end in .gz, try appending .gz. 751 */ 752 753 if ( ! curp->gzip) { 754 mandoc_asprintf(&cp, "%s.gz", file); 755 fd = open(cp, O_RDONLY); 756 free(cp); 757 if (fd != -1) { 758 curp->gzip = 1; 759 return fd; 760 } 761 } 762 763 /* Neither worked, give up. */ 764 765 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); 766 return -1; 767 } 768 769 struct mparse * 770 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg, 771 enum mandoc_os os_e, const char *os_s) 772 { 773 struct mparse *curp; 774 775 curp = mandoc_calloc(1, sizeof(struct mparse)); 776 777 curp->options = options; 778 curp->mmin = mmin; 779 curp->mmsg = mmsg; 780 curp->os_s = os_s; 781 782 curp->roff = roff_alloc(curp, options); 783 curp->man = roff_man_alloc(curp->roff, curp, curp->os_s, 784 curp->options & MPARSE_QUICK ? 1 : 0); 785 if (curp->options & MPARSE_MDOC) { 786 curp->man->macroset = MACROSET_MDOC; 787 if (curp->man->mdocmac == NULL) 788 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); 789 } else if (curp->options & MPARSE_MAN) { 790 curp->man->macroset = MACROSET_MAN; 791 if (curp->man->manmac == NULL) 792 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); 793 } 794 curp->man->first->tok = TOKEN_NONE; 795 curp->man->meta.os_e = os_e; 796 return curp; 797 } 798 799 void 800 mparse_reset(struct mparse *curp) 801 { 802 roff_reset(curp->roff); 803 roff_man_reset(curp->man); 804 805 free(curp->sodest); 806 curp->sodest = NULL; 807 808 if (curp->secondary) 809 curp->secondary->sz = 0; 810 811 curp->file_status = MANDOCLEVEL_OK; 812 curp->gzip = 0; 813 } 814 815 void 816 mparse_free(struct mparse *curp) 817 { 818 819 roffhash_free(curp->man->mdocmac); 820 roffhash_free(curp->man->manmac); 821 roff_man_free(curp->man); 822 roff_free(curp->roff); 823 if (curp->secondary) 824 free(curp->secondary->buf); 825 826 free(curp->secondary); 827 free(curp->sodest); 828 free(curp); 829 } 830 831 void 832 mparse_result(struct mparse *curp, struct roff_man **man, 833 char **sodest) 834 { 835 836 if (sodest && NULL != (*sodest = curp->sodest)) { 837 *man = NULL; 838 return; 839 } 840 if (man) 841 *man = curp->man; 842 } 843 844 void 845 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) 846 { 847 if (curp->file_status > *rc) 848 *rc = curp->file_status; 849 } 850 851 void 852 mandoc_vmsg(enum mandocerr t, struct mparse *m, 853 int ln, int pos, const char *fmt, ...) 854 { 855 char buf[256]; 856 va_list ap; 857 858 va_start(ap, fmt); 859 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 860 va_end(ap); 861 862 mandoc_msg(t, m, ln, pos, buf); 863 } 864 865 void 866 mandoc_msg(enum mandocerr er, struct mparse *m, 867 int ln, int col, const char *msg) 868 { 869 enum mandoclevel level; 870 871 if (er < m->mmin && er != MANDOCERR_FILE) 872 return; 873 874 level = MANDOCLEVEL_UNSUPP; 875 while (er < mandoclimits[level]) 876 level--; 877 878 if (m->mmsg) 879 (*m->mmsg)(er, level, m->file, ln, col, msg); 880 881 if (m->file_status < level) 882 m->file_status = level; 883 } 884 885 const char * 886 mparse_strerror(enum mandocerr er) 887 { 888 889 return mandocerrs[er]; 890 } 891 892 const char * 893 mparse_strlevel(enum mandoclevel lvl) 894 { 895 return mandoclevels[lvl]; 896 } 897 898 void 899 mparse_keep(struct mparse *p) 900 { 901 902 assert(NULL == p->secondary); 903 p->secondary = mandoc_calloc(1, sizeof(struct buf)); 904 } 905 906 const char * 907 mparse_getkeep(const struct mparse *p) 908 { 909 910 assert(p->secondary); 911 return p->secondary->sz ? p->secondary->buf : NULL; 912 } 913