1 /* $OpenBSD: read.c,v 1.166 2018/02/23 21:34:37 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 #include <sys/mman.h> 21 #include <sys/stat.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <stdarg.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <unistd.h> 32 #include <zlib.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "man.h" 39 #include "libmandoc.h" 40 41 #define REPARSE_LIMIT 1000 42 43 struct mparse { 44 struct roff *roff; /* roff parser (!NULL) */ 45 struct roff_man *man; /* man parser */ 46 char *sodest; /* filename pointed to by .so */ 47 const char *file; /* filename of current input file */ 48 struct buf *primary; /* buffer currently being parsed */ 49 struct buf *secondary; /* preprocessed copy of input */ 50 const char *os_s; /* default operating system */ 51 mandocmsg mmsg; /* warning/error message handler */ 52 enum mandoclevel file_status; /* status of current parse */ 53 enum mandocerr mmin; /* ignore messages below this */ 54 int options; /* parser options */ 55 int gzip; /* current input file is gzipped */ 56 int filenc; /* encoding of the current file */ 57 int reparse_count; /* finite interp. stack */ 58 int line; /* line number in the file */ 59 }; 60 61 static void choose_parser(struct mparse *); 62 static void resize_buf(struct buf *, size_t); 63 static int mparse_buf_r(struct mparse *, struct buf, size_t, int); 64 static int read_whole_file(struct mparse *, const char *, int, 65 struct buf *, int *); 66 static void mparse_end(struct mparse *); 67 static void mparse_parse_buffer(struct mparse *, struct buf, 68 const char *); 69 70 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 71 MANDOCERR_OK, 72 MANDOCERR_OK, 73 MANDOCERR_WARNING, 74 MANDOCERR_ERROR, 75 MANDOCERR_UNSUPP, 76 MANDOCERR_MAX, 77 MANDOCERR_MAX 78 }; 79 80 static const char * const mandocerrs[MANDOCERR_MAX] = { 81 "ok", 82 83 "base system convention", 84 85 "Mdocdate found", 86 "Mdocdate missing", 87 "unknown architecture", 88 "operating system explicitly specified", 89 "RCS id missing", 90 "referenced manual not found", 91 92 "generic style suggestion", 93 94 "legacy man(7) date format", 95 "lower case character in document title", 96 "duplicate RCS id", 97 "possible typo in section name", 98 "unterminated quoted argument", 99 "useless macro", 100 "consider using OS macro", 101 "errnos out of order", 102 "duplicate errno", 103 "trailing delimiter", 104 "no blank before trailing delimiter", 105 "fill mode already enabled, skipping", 106 "fill mode already disabled, skipping", 107 "function name without markup", 108 "whitespace at end of input line", 109 "bad comment style", 110 111 "generic warning", 112 113 /* related to the prologue */ 114 "missing manual title, using UNTITLED", 115 "missing manual title, using \"\"", 116 "missing manual section, using \"\"", 117 "unknown manual section", 118 "missing date, using today's date", 119 "cannot parse date, using it verbatim", 120 "date in the future, using it anyway", 121 "missing Os macro, using \"\"", 122 "late prologue macro", 123 "prologue macros out of order", 124 125 /* related to document structure */ 126 ".so is fragile, better use ln(1)", 127 "no document body", 128 "content before first section header", 129 "first section is not \"NAME\"", 130 "NAME section without Nm before Nd", 131 "NAME section without description", 132 "description not at the end of NAME", 133 "bad NAME section content", 134 "missing comma before name", 135 "missing description line, using \"\"", 136 "description line outside NAME section", 137 "sections out of conventional order", 138 "duplicate section title", 139 "unexpected section", 140 "cross reference to self", 141 "unusual Xr order", 142 "unusual Xr punctuation", 143 "AUTHORS section without An macro", 144 145 /* related to macros and nesting */ 146 "obsolete macro", 147 "macro neither callable nor escaped", 148 "skipping paragraph macro", 149 "moving paragraph macro out of list", 150 "skipping no-space macro", 151 "blocks badly nested", 152 "nested displays are not portable", 153 "moving content out of list", 154 "first macro on line", 155 "line scope broken", 156 "skipping blank line in line scope", 157 158 /* related to missing macro arguments */ 159 "skipping empty request", 160 "conditional request controls empty scope", 161 "skipping empty macro", 162 "empty block", 163 "empty argument, using 0n", 164 "missing display type, using -ragged", 165 "list type is not the first argument", 166 "missing -width in -tag list, using 6n", 167 "missing utility name, using \"\"", 168 "missing function name, using \"\"", 169 "empty head in list item", 170 "empty list item", 171 "missing argument, using next line", 172 "missing font type, using \\fR", 173 "unknown font type, using \\fR", 174 "nothing follows prefix", 175 "empty reference block", 176 "missing section argument", 177 "missing -std argument, adding it", 178 "missing option string, using \"\"", 179 "missing resource identifier, using \"\"", 180 "missing eqn box, using \"\"", 181 182 /* related to bad macro arguments */ 183 "duplicate argument", 184 "skipping duplicate argument", 185 "skipping duplicate display type", 186 "skipping duplicate list type", 187 "skipping -width argument", 188 "wrong number of cells", 189 "unknown AT&T UNIX version", 190 "comma in function argument", 191 "parenthesis in function name", 192 "unknown library name", 193 "invalid content in Rs block", 194 "invalid Boolean argument", 195 "unknown font, skipping request", 196 "odd number of characters in request", 197 198 /* related to plain text */ 199 "blank line in fill mode, using .sp", 200 "tab in filled text", 201 "new sentence, new line", 202 "invalid escape sequence", 203 "undefined string, using \"\"", 204 205 /* related to tables */ 206 "tbl line starts with span", 207 "tbl column starts with span", 208 "skipping vertical bar in tbl layout", 209 210 "generic error", 211 212 /* related to tables */ 213 "non-alphabetic character in tbl options", 214 "skipping unknown tbl option", 215 "missing tbl option argument", 216 "wrong tbl option argument size", 217 "empty tbl layout", 218 "invalid character in tbl layout", 219 "unmatched parenthesis in tbl layout", 220 "tbl without any data cells", 221 "ignoring data in spanned tbl cell", 222 "ignoring extra tbl data cells", 223 "data block open at end of tbl", 224 225 /* related to document structure and macros */ 226 NULL, 227 "duplicate prologue macro", 228 "skipping late title macro", 229 "input stack limit exceeded, infinite loop?", 230 "skipping bad character", 231 "skipping unknown macro", 232 "skipping insecure request", 233 "skipping item outside list", 234 "skipping column outside column list", 235 "skipping end of block that is not open", 236 "fewer RS blocks open, skipping", 237 "inserting missing end of block", 238 "appending missing end of block", 239 240 /* related to request and macro arguments */ 241 "escaped character not allowed in a name", 242 "NOT IMPLEMENTED: Bd -file", 243 "skipping display without arguments", 244 "missing list type, using -item", 245 "argument is not numeric, using 1", 246 "missing manual name, using \"\"", 247 "uname(3) system call failed, using UNKNOWN", 248 "unknown standard specifier", 249 "skipping request without numeric argument", 250 "NOT IMPLEMENTED: .so with absolute path or \"..\"", 251 ".so request failed", 252 "skipping all arguments", 253 "skipping excess arguments", 254 "divide by zero", 255 256 "unsupported feature", 257 "input too large", 258 "unsupported control character", 259 "unsupported roff request", 260 "eqn delim option in tbl", 261 "unsupported tbl layout modifier", 262 "ignoring macro in table", 263 }; 264 265 static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 266 "SUCCESS", 267 "STYLE", 268 "WARNING", 269 "ERROR", 270 "UNSUPP", 271 "BADARG", 272 "SYSERR" 273 }; 274 275 276 static void 277 resize_buf(struct buf *buf, size_t initial) 278 { 279 280 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 281 buf->buf = mandoc_realloc(buf->buf, buf->sz); 282 } 283 284 static void 285 choose_parser(struct mparse *curp) 286 { 287 char *cp, *ep; 288 int format; 289 290 /* 291 * If neither command line arguments -mdoc or -man select 292 * a parser nor the roff parser found a .Dd or .TH macro 293 * yet, look ahead in the main input buffer. 294 */ 295 296 if ((format = roff_getformat(curp->roff)) == 0) { 297 cp = curp->primary->buf; 298 ep = cp + curp->primary->sz; 299 while (cp < ep) { 300 if (*cp == '.' || *cp == '\'') { 301 cp++; 302 if (cp[0] == 'D' && cp[1] == 'd') { 303 format = MPARSE_MDOC; 304 break; 305 } 306 if (cp[0] == 'T' && cp[1] == 'H') { 307 format = MPARSE_MAN; 308 break; 309 } 310 } 311 cp = memchr(cp, '\n', ep - cp); 312 if (cp == NULL) 313 break; 314 cp++; 315 } 316 } 317 318 if (format == MPARSE_MDOC) { 319 curp->man->macroset = MACROSET_MDOC; 320 if (curp->man->mdocmac == NULL) 321 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); 322 } else { 323 curp->man->macroset = MACROSET_MAN; 324 if (curp->man->manmac == NULL) 325 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); 326 } 327 curp->man->first->tok = TOKEN_NONE; 328 } 329 330 /* 331 * Main parse routine for a buffer. 332 * It assumes encoding and line numbering are already set up. 333 * It can recurse directly (for invocations of user-defined 334 * macros, inline equations, and input line traps) 335 * and indirectly (for .so file inclusion). 336 */ 337 static int 338 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) 339 { 340 struct buf ln; 341 const char *save_file; 342 char *cp; 343 size_t pos; /* byte number in the ln buffer */ 344 enum rofferr rr; 345 int of; 346 int lnn; /* line number in the real file */ 347 int fd; 348 unsigned char c; 349 350 memset(&ln, 0, sizeof(ln)); 351 352 lnn = curp->line; 353 pos = 0; 354 355 while (i < blk.sz) { 356 if (0 == pos && '\0' == blk.buf[i]) 357 break; 358 359 if (start) { 360 curp->line = lnn; 361 curp->reparse_count = 0; 362 363 if (lnn < 3 && 364 curp->filenc & MPARSE_UTF8 && 365 curp->filenc & MPARSE_LATIN1) 366 curp->filenc = preconv_cue(&blk, i); 367 } 368 369 while (i < blk.sz && (start || blk.buf[i] != '\0')) { 370 371 /* 372 * When finding an unescaped newline character, 373 * leave the character loop to process the line. 374 * Skip a preceding carriage return, if any. 375 */ 376 377 if ('\r' == blk.buf[i] && i + 1 < blk.sz && 378 '\n' == blk.buf[i + 1]) 379 ++i; 380 if ('\n' == blk.buf[i]) { 381 ++i; 382 ++lnn; 383 break; 384 } 385 386 /* 387 * Make sure we have space for the worst 388 * case of 11 bytes: "\\[u10ffff]\0" 389 */ 390 391 if (pos + 11 > ln.sz) 392 resize_buf(&ln, 256); 393 394 /* 395 * Encode 8-bit input. 396 */ 397 398 c = blk.buf[i]; 399 if (c & 0x80) { 400 if ( ! (curp->filenc && preconv_encode( 401 &blk, &i, &ln, &pos, &curp->filenc))) { 402 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 403 curp->line, pos, "0x%x", c); 404 ln.buf[pos++] = '?'; 405 i++; 406 } 407 continue; 408 } 409 410 /* 411 * Exclude control characters. 412 */ 413 414 if (c == 0x7f || (c < 0x20 && c != 0x09)) { 415 mandoc_vmsg(c == 0x00 || c == 0x04 || 416 c > 0x0a ? MANDOCERR_CHAR_BAD : 417 MANDOCERR_CHAR_UNSUPP, 418 curp, curp->line, pos, "0x%x", c); 419 i++; 420 if (c != '\r') 421 ln.buf[pos++] = '?'; 422 continue; 423 } 424 425 ln.buf[pos++] = blk.buf[i++]; 426 } 427 428 if (pos + 1 >= ln.sz) 429 resize_buf(&ln, 256); 430 431 if (i == blk.sz || blk.buf[i] == '\0') 432 ln.buf[pos++] = '\n'; 433 ln.buf[pos] = '\0'; 434 435 /* 436 * A significant amount of complexity is contained by 437 * the roff preprocessor. It's line-oriented but can be 438 * expressed on one line, so we need at times to 439 * readjust our starting point and re-run it. The roff 440 * preprocessor can also readjust the buffers with new 441 * data, so we pass them in wholesale. 442 */ 443 444 of = 0; 445 446 /* 447 * Maintain a lookaside buffer of all parsed lines. We 448 * only do this if mparse_keep() has been invoked (the 449 * buffer may be accessed with mparse_getkeep()). 450 */ 451 452 if (curp->secondary) { 453 curp->secondary->buf = mandoc_realloc( 454 curp->secondary->buf, 455 curp->secondary->sz + pos + 2); 456 memcpy(curp->secondary->buf + 457 curp->secondary->sz, 458 ln.buf, pos); 459 curp->secondary->sz += pos; 460 curp->secondary->buf 461 [curp->secondary->sz] = '\n'; 462 curp->secondary->sz++; 463 curp->secondary->buf 464 [curp->secondary->sz] = '\0'; 465 } 466 rerun: 467 rr = roff_parseln(curp->roff, curp->line, &ln, &of); 468 469 switch (rr) { 470 case ROFF_REPARSE: 471 if (++curp->reparse_count > REPARSE_LIMIT) 472 mandoc_msg(MANDOCERR_ROFFLOOP, curp, 473 curp->line, pos, NULL); 474 else if (mparse_buf_r(curp, ln, of, 0) == 1 || 475 start == 1) { 476 pos = 0; 477 continue; 478 } 479 free(ln.buf); 480 return 0; 481 case ROFF_APPEND: 482 pos = strlen(ln.buf); 483 continue; 484 case ROFF_RERUN: 485 goto rerun; 486 case ROFF_IGN: 487 pos = 0; 488 continue; 489 case ROFF_SO: 490 if ( ! (curp->options & MPARSE_SO) && 491 (i >= blk.sz || blk.buf[i] == '\0')) { 492 curp->sodest = mandoc_strdup(ln.buf + of); 493 free(ln.buf); 494 return 1; 495 } 496 /* 497 * We remove `so' clauses from our lookaside 498 * buffer because we're going to descend into 499 * the file recursively. 500 */ 501 if (curp->secondary) 502 curp->secondary->sz -= pos + 1; 503 save_file = curp->file; 504 if ((fd = mparse_open(curp, ln.buf + of)) != -1) { 505 mparse_readfd(curp, fd, ln.buf + of); 506 close(fd); 507 curp->file = save_file; 508 } else { 509 curp->file = save_file; 510 mandoc_vmsg(MANDOCERR_SO_FAIL, 511 curp, curp->line, pos, 512 ".so %s", ln.buf + of); 513 ln.sz = mandoc_asprintf(&cp, 514 ".sp\nSee the file %s.\n.sp", 515 ln.buf + of); 516 free(ln.buf); 517 ln.buf = cp; 518 of = 0; 519 mparse_buf_r(curp, ln, of, 0); 520 } 521 pos = 0; 522 continue; 523 default: 524 break; 525 } 526 527 if (curp->man->macroset == MACROSET_NONE) 528 choose_parser(curp); 529 530 if ((curp->man->macroset == MACROSET_MDOC ? 531 mdoc_parseln(curp->man, curp->line, ln.buf, of) : 532 man_parseln(curp->man, curp->line, ln.buf, of)) == 2) 533 break; 534 535 /* Temporary buffers typically are not full. */ 536 537 if (0 == start && '\0' == blk.buf[i]) 538 break; 539 540 /* Start the next input line. */ 541 542 pos = 0; 543 } 544 545 free(ln.buf); 546 return 1; 547 } 548 549 static int 550 read_whole_file(struct mparse *curp, const char *file, int fd, 551 struct buf *fb, int *with_mmap) 552 { 553 struct stat st; 554 gzFile gz; 555 size_t off; 556 ssize_t ssz; 557 int gzerrnum, retval; 558 559 if (fstat(fd, &st) == -1) { 560 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 561 "fstat: %s", strerror(errno)); 562 return 0; 563 } 564 565 /* 566 * If we're a regular file, try just reading in the whole entry 567 * via mmap(). This is faster than reading it into blocks, and 568 * since each file is only a few bytes to begin with, I'm not 569 * concerned that this is going to tank any machines. 570 */ 571 572 if (curp->gzip == 0 && S_ISREG(st.st_mode)) { 573 if (st.st_size > 0x7fffffff) { 574 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); 575 return 0; 576 } 577 *with_mmap = 1; 578 fb->sz = (size_t)st.st_size; 579 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); 580 if (fb->buf != MAP_FAILED) 581 return 1; 582 } 583 584 if (curp->gzip) { 585 /* 586 * Duplicating the file descriptor is required 587 * because we will have to call gzclose(3) 588 * to free memory used internally by zlib, 589 * but that will also close the file descriptor, 590 * which this function must not do. 591 */ 592 if ((fd = dup(fd)) == -1) { 593 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 594 "dup: %s", strerror(errno)); 595 return 0; 596 } 597 if ((gz = gzdopen(fd, "rb")) == NULL) { 598 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, 599 "gzdopen: %s", strerror(errno)); 600 close(fd); 601 return 0; 602 } 603 } else 604 gz = NULL; 605 606 /* 607 * If this isn't a regular file (like, say, stdin), then we must 608 * go the old way and just read things in bit by bit. 609 */ 610 611 *with_mmap = 0; 612 off = 0; 613 retval = 0; 614 fb->sz = 0; 615 fb->buf = NULL; 616 for (;;) { 617 if (off == fb->sz) { 618 if (fb->sz == (1U << 31)) { 619 mandoc_msg(MANDOCERR_TOOLARGE, curp, 620 0, 0, NULL); 621 break; 622 } 623 resize_buf(fb, 65536); 624 } 625 ssz = curp->gzip ? 626 gzread(gz, fb->buf + (int)off, fb->sz - off) : 627 read(fd, fb->buf + (int)off, fb->sz - off); 628 if (ssz == 0) { 629 fb->sz = off; 630 retval = 1; 631 break; 632 } 633 if (ssz == -1) { 634 if (curp->gzip) 635 (void)gzerror(gz, &gzerrnum); 636 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s", 637 curp->gzip && gzerrnum != Z_ERRNO ? 638 zError(gzerrnum) : strerror(errno)); 639 break; 640 } 641 off += (size_t)ssz; 642 } 643 644 if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) 645 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s", 646 gzerrnum == Z_ERRNO ? strerror(errno) : 647 zError(gzerrnum)); 648 if (retval == 0) { 649 free(fb->buf); 650 fb->buf = NULL; 651 } 652 return retval; 653 } 654 655 static void 656 mparse_end(struct mparse *curp) 657 { 658 if (curp->man->macroset == MACROSET_NONE) 659 curp->man->macroset = MACROSET_MAN; 660 if (curp->man->macroset == MACROSET_MDOC) 661 mdoc_endparse(curp->man); 662 else 663 man_endparse(curp->man); 664 roff_endparse(curp->roff); 665 } 666 667 static void 668 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) 669 { 670 struct buf *svprimary; 671 const char *svfile; 672 size_t offset; 673 static int recursion_depth; 674 675 if (64 < recursion_depth) { 676 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); 677 return; 678 } 679 680 /* Line number is per-file. */ 681 svfile = curp->file; 682 curp->file = file; 683 svprimary = curp->primary; 684 curp->primary = &blk; 685 curp->line = 1; 686 recursion_depth++; 687 688 /* Skip an UTF-8 byte order mark. */ 689 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && 690 (unsigned char)blk.buf[0] == 0xef && 691 (unsigned char)blk.buf[1] == 0xbb && 692 (unsigned char)blk.buf[2] == 0xbf) { 693 offset = 3; 694 curp->filenc &= ~MPARSE_LATIN1; 695 } else 696 offset = 0; 697 698 mparse_buf_r(curp, blk, offset, 1); 699 700 if (--recursion_depth == 0) 701 mparse_end(curp); 702 703 curp->primary = svprimary; 704 curp->file = svfile; 705 } 706 707 /* 708 * Read the whole file into memory and call the parsers. 709 * Called recursively when an .so request is encountered. 710 */ 711 enum mandoclevel 712 mparse_readfd(struct mparse *curp, int fd, const char *file) 713 { 714 struct buf blk; 715 int with_mmap; 716 int save_filenc; 717 718 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { 719 save_filenc = curp->filenc; 720 curp->filenc = curp->options & 721 (MPARSE_UTF8 | MPARSE_LATIN1); 722 mparse_parse_buffer(curp, blk, file); 723 curp->filenc = save_filenc; 724 if (with_mmap) 725 munmap(blk.buf, blk.sz); 726 else 727 free(blk.buf); 728 } 729 return curp->file_status; 730 } 731 732 int 733 mparse_open(struct mparse *curp, const char *file) 734 { 735 char *cp; 736 int fd; 737 738 curp->file = file; 739 cp = strrchr(file, '.'); 740 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); 741 742 /* First try to use the filename as it is. */ 743 744 if ((fd = open(file, O_RDONLY)) != -1) 745 return fd; 746 747 /* 748 * If that doesn't work and the filename doesn't 749 * already end in .gz, try appending .gz. 750 */ 751 752 if ( ! curp->gzip) { 753 mandoc_asprintf(&cp, "%s.gz", file); 754 fd = open(cp, O_RDONLY); 755 free(cp); 756 if (fd != -1) { 757 curp->gzip = 1; 758 return fd; 759 } 760 } 761 762 /* Neither worked, give up. */ 763 764 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); 765 return -1; 766 } 767 768 struct mparse * 769 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg, 770 enum mandoc_os os_e, const char *os_s) 771 { 772 struct mparse *curp; 773 774 curp = mandoc_calloc(1, sizeof(struct mparse)); 775 776 curp->options = options; 777 curp->mmin = mmin; 778 curp->mmsg = mmsg; 779 curp->os_s = os_s; 780 781 curp->roff = roff_alloc(curp, options); 782 curp->man = roff_man_alloc(curp->roff, curp, curp->os_s, 783 curp->options & MPARSE_QUICK ? 1 : 0); 784 if (curp->options & MPARSE_MDOC) { 785 curp->man->macroset = MACROSET_MDOC; 786 if (curp->man->mdocmac == NULL) 787 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); 788 } else if (curp->options & MPARSE_MAN) { 789 curp->man->macroset = MACROSET_MAN; 790 if (curp->man->manmac == NULL) 791 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); 792 } 793 curp->man->first->tok = TOKEN_NONE; 794 curp->man->meta.os_e = os_e; 795 return curp; 796 } 797 798 void 799 mparse_reset(struct mparse *curp) 800 { 801 roff_reset(curp->roff); 802 roff_man_reset(curp->man); 803 804 free(curp->sodest); 805 curp->sodest = NULL; 806 807 if (curp->secondary) 808 curp->secondary->sz = 0; 809 810 curp->file_status = MANDOCLEVEL_OK; 811 curp->gzip = 0; 812 } 813 814 void 815 mparse_free(struct mparse *curp) 816 { 817 818 roffhash_free(curp->man->mdocmac); 819 roffhash_free(curp->man->manmac); 820 roff_man_free(curp->man); 821 roff_free(curp->roff); 822 if (curp->secondary) 823 free(curp->secondary->buf); 824 825 free(curp->secondary); 826 free(curp->sodest); 827 free(curp); 828 } 829 830 void 831 mparse_result(struct mparse *curp, struct roff_man **man, 832 char **sodest) 833 { 834 835 if (sodest && NULL != (*sodest = curp->sodest)) { 836 *man = NULL; 837 return; 838 } 839 if (man) 840 *man = curp->man; 841 } 842 843 void 844 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) 845 { 846 if (curp->file_status > *rc) 847 *rc = curp->file_status; 848 } 849 850 void 851 mandoc_vmsg(enum mandocerr t, struct mparse *m, 852 int ln, int pos, const char *fmt, ...) 853 { 854 char buf[256]; 855 va_list ap; 856 857 va_start(ap, fmt); 858 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 859 va_end(ap); 860 861 mandoc_msg(t, m, ln, pos, buf); 862 } 863 864 void 865 mandoc_msg(enum mandocerr er, struct mparse *m, 866 int ln, int col, const char *msg) 867 { 868 enum mandoclevel level; 869 870 if (er < m->mmin && er != MANDOCERR_FILE) 871 return; 872 873 level = MANDOCLEVEL_UNSUPP; 874 while (er < mandoclimits[level]) 875 level--; 876 877 if (m->mmsg) 878 (*m->mmsg)(er, level, m->file, ln, col, msg); 879 880 if (m->file_status < level) 881 m->file_status = level; 882 } 883 884 const char * 885 mparse_strerror(enum mandocerr er) 886 { 887 888 return mandocerrs[er]; 889 } 890 891 const char * 892 mparse_strlevel(enum mandoclevel lvl) 893 { 894 return mandoclevels[lvl]; 895 } 896 897 void 898 mparse_keep(struct mparse *p) 899 { 900 901 assert(NULL == p->secondary); 902 p->secondary = mandoc_calloc(1, sizeof(struct buf)); 903 } 904 905 const char * 906 mparse_getkeep(const struct mparse *p) 907 { 908 909 assert(p->secondary); 910 return p->secondary->sz ? p->secondary->buf : NULL; 911 } 912