1 /* $OpenBSD: read.c,v 1.124 2016/07/19 16:22:34 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 #include <sys/mman.h> 21 #include <sys/stat.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <err.h> 26 #include <errno.h> 27 #include <fcntl.h> 28 #include <stdarg.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <zlib.h> 34 35 #include "mandoc_aux.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "mdoc.h" 39 #include "man.h" 40 #include "libmandoc.h" 41 #include "roff_int.h" 42 43 #define REPARSE_LIMIT 1000 44 45 struct mparse { 46 struct roff_man *man; /* man parser */ 47 struct roff *roff; /* roff parser (!NULL) */ 48 char *sodest; /* filename pointed to by .so */ 49 const char *file; /* filename of current input file */ 50 struct buf *primary; /* buffer currently being parsed */ 51 struct buf *secondary; /* preprocessed copy of input */ 52 const char *defos; /* default operating system */ 53 mandocmsg mmsg; /* warning/error message handler */ 54 enum mandoclevel file_status; /* status of current parse */ 55 enum mandoclevel wlevel; /* ignore messages below this */ 56 int options; /* parser options */ 57 int gzip; /* current input file is gzipped */ 58 int filenc; /* encoding of the current file */ 59 int reparse_count; /* finite interp. stack */ 60 int line; /* line number in the file */ 61 }; 62 63 static void choose_parser(struct mparse *); 64 static void resize_buf(struct buf *, size_t); 65 static void mparse_buf_r(struct mparse *, struct buf, size_t, int); 66 static int read_whole_file(struct mparse *, const char *, int, 67 struct buf *, int *); 68 static void mparse_end(struct mparse *); 69 static void mparse_parse_buffer(struct mparse *, struct buf, 70 const char *); 71 72 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 73 MANDOCERR_OK, 74 MANDOCERR_WARNING, 75 MANDOCERR_WARNING, 76 MANDOCERR_ERROR, 77 MANDOCERR_UNSUPP, 78 MANDOCERR_MAX, 79 MANDOCERR_MAX 80 }; 81 82 static const char * const mandocerrs[MANDOCERR_MAX] = { 83 "ok", 84 85 "generic warning", 86 87 /* related to the prologue */ 88 "missing manual title, using UNTITLED", 89 "missing manual title, using \"\"", 90 "lower case character in document title", 91 "missing manual section, using \"\"", 92 "unknown manual section", 93 "missing date, using today's date", 94 "cannot parse date, using it verbatim", 95 "missing Os macro, using \"\"", 96 "duplicate prologue macro", 97 "late prologue macro", 98 "skipping late title macro", 99 "prologue macros out of order", 100 101 /* related to document structure */ 102 ".so is fragile, better use ln(1)", 103 "no document body", 104 "content before first section header", 105 "first section is not \"NAME\"", 106 "NAME section without name", 107 "NAME section without description", 108 "description not at the end of NAME", 109 "bad NAME section content", 110 "missing description line, using \"\"", 111 "sections out of conventional order", 112 "duplicate section title", 113 "unexpected section", 114 "unusual Xr order", 115 "unusual Xr punctuation", 116 "AUTHORS section without An macro", 117 118 /* related to macros and nesting */ 119 "obsolete macro", 120 "macro neither callable nor escaped", 121 "skipping paragraph macro", 122 "moving paragraph macro out of list", 123 "skipping no-space macro", 124 "blocks badly nested", 125 "nested displays are not portable", 126 "moving content out of list", 127 "fill mode already enabled, skipping", 128 "fill mode already disabled, skipping", 129 "line scope broken", 130 131 /* related to missing macro arguments */ 132 "skipping empty request", 133 "conditional request controls empty scope", 134 "skipping empty macro", 135 "empty block", 136 "empty argument, using 0n", 137 "missing display type, using -ragged", 138 "list type is not the first argument", 139 "missing -width in -tag list, using 8n", 140 "missing utility name, using \"\"", 141 "missing function name, using \"\"", 142 "empty head in list item", 143 "empty list item", 144 "missing font type, using \\fR", 145 "unknown font type, using \\fR", 146 "nothing follows prefix", 147 "empty reference block", 148 "missing -std argument, adding it", 149 "missing option string, using \"\"", 150 "missing resource identifier, using \"\"", 151 "missing eqn box, using \"\"", 152 153 /* related to bad macro arguments */ 154 "unterminated quoted argument", 155 "duplicate argument", 156 "skipping duplicate argument", 157 "skipping duplicate display type", 158 "skipping duplicate list type", 159 "skipping -width argument", 160 "wrong number of cells", 161 "unknown AT&T UNIX version", 162 "comma in function argument", 163 "parenthesis in function name", 164 "invalid content in Rs block", 165 "invalid Boolean argument", 166 "unknown font, skipping request", 167 "odd number of characters in request", 168 169 /* related to plain text */ 170 "blank line in fill mode, using .sp", 171 "tab in filled text", 172 "whitespace at end of input line", 173 "bad comment style", 174 "invalid escape sequence", 175 "undefined string, using \"\"", 176 177 /* related to tables */ 178 "tbl line starts with span", 179 "tbl column starts with span", 180 "skipping vertical bar in tbl layout", 181 182 "generic error", 183 184 /* related to tables */ 185 "non-alphabetic character in tbl options", 186 "skipping unknown tbl option", 187 "missing tbl option argument", 188 "wrong tbl option argument size", 189 "empty tbl layout", 190 "invalid character in tbl layout", 191 "unmatched parenthesis in tbl layout", 192 "tbl without any data cells", 193 "ignoring data in spanned tbl cell", 194 "ignoring extra tbl data cells", 195 "data block open at end of tbl", 196 197 /* related to document structure and macros */ 198 NULL, 199 "input stack limit exceeded, infinite loop?", 200 "skipping bad character", 201 "skipping unknown macro", 202 "skipping insecure request", 203 "skipping item outside list", 204 "skipping column outside column list", 205 "skipping end of block that is not open", 206 "fewer RS blocks open, skipping", 207 "inserting missing end of block", 208 "appending missing end of block", 209 210 /* related to request and macro arguments */ 211 "escaped character not allowed in a name", 212 "NOT IMPLEMENTED: Bd -file", 213 "skipping display without arguments", 214 "missing list type, using -item", 215 "missing manual name, using \"\"", 216 "uname(3) system call failed, using UNKNOWN", 217 "unknown standard specifier", 218 "skipping request without numeric argument", 219 "NOT IMPLEMENTED: .so with absolute path or \"..\"", 220 ".so request failed", 221 "skipping all arguments", 222 "skipping excess arguments", 223 "divide by zero", 224 225 "unsupported feature", 226 "input too large", 227 "unsupported control character", 228 "unsupported roff request", 229 "eqn delim option in tbl", 230 "unsupported tbl layout modifier", 231 "ignoring macro in table", 232 }; 233 234 static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 235 "SUCCESS", 236 "RESERVED", 237 "WARNING", 238 "ERROR", 239 "UNSUPP", 240 "BADARG", 241 "SYSERR" 242 }; 243 244 245 static void 246 resize_buf(struct buf *buf, size_t initial) 247 { 248 249 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 250 buf->buf = mandoc_realloc(buf->buf, buf->sz); 251 } 252 253 static void 254 choose_parser(struct mparse *curp) 255 { 256 char *cp, *ep; 257 int format; 258 259 /* 260 * If neither command line arguments -mdoc or -man select 261 * a parser nor the roff parser found a .Dd or .TH macro 262 * yet, look ahead in the main input buffer. 263 */ 264 265 if ((format = roff_getformat(curp->roff)) == 0) { 266 cp = curp->primary->buf; 267 ep = cp + curp->primary->sz; 268 while (cp < ep) { 269 if (*cp == '.' || *cp == '\'') { 270 cp++; 271 if (cp[0] == 'D' && cp[1] == 'd') { 272 format = MPARSE_MDOC; 273 break; 274 } 275 if (cp[0] == 'T' && cp[1] == 'H') { 276 format = MPARSE_MAN; 277 break; 278 } 279 } 280 cp = memchr(cp, '\n', ep - cp); 281 if (cp == NULL) 282 break; 283 cp++; 284 } 285 } 286 287 if (format == MPARSE_MDOC) { 288 mdoc_hash_init(); 289 curp->man->macroset = MACROSET_MDOC; 290 curp->man->first->tok = TOKEN_NONE; 291 } else { 292 man_hash_init(); 293 curp->man->macroset = MACROSET_MAN; 294 curp->man->first->tok = TOKEN_NONE; 295 } 296 } 297 298 /* 299 * Main parse routine for a buffer. 300 * It assumes encoding and line numbering are already set up. 301 * It can recurse directly (for invocations of user-defined 302 * macros, inline equations, and input line traps) 303 * and indirectly (for .so file inclusion). 304 */ 305 static void 306 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) 307 { 308 const struct tbl_span *span; 309 struct buf ln; 310 const char *save_file; 311 char *cp; 312 size_t pos; /* byte number in the ln buffer */ 313 enum rofferr rr; 314 int of; 315 int lnn; /* line number in the real file */ 316 int fd; 317 unsigned char c; 318 319 memset(&ln, 0, sizeof(ln)); 320 321 lnn = curp->line; 322 pos = 0; 323 324 while (i < blk.sz) { 325 if (0 == pos && '\0' == blk.buf[i]) 326 break; 327 328 if (start) { 329 curp->line = lnn; 330 curp->reparse_count = 0; 331 332 if (lnn < 3 && 333 curp->filenc & MPARSE_UTF8 && 334 curp->filenc & MPARSE_LATIN1) 335 curp->filenc = preconv_cue(&blk, i); 336 } 337 338 while (i < blk.sz && (start || blk.buf[i] != '\0')) { 339 340 /* 341 * When finding an unescaped newline character, 342 * leave the character loop to process the line. 343 * Skip a preceding carriage return, if any. 344 */ 345 346 if ('\r' == blk.buf[i] && i + 1 < blk.sz && 347 '\n' == blk.buf[i + 1]) 348 ++i; 349 if ('\n' == blk.buf[i]) { 350 ++i; 351 ++lnn; 352 break; 353 } 354 355 /* 356 * Make sure we have space for the worst 357 * case of 11 bytes: "\\[u10ffff]\0" 358 */ 359 360 if (pos + 11 > ln.sz) 361 resize_buf(&ln, 256); 362 363 /* 364 * Encode 8-bit input. 365 */ 366 367 c = blk.buf[i]; 368 if (c & 0x80) { 369 if ( ! (curp->filenc && preconv_encode( 370 &blk, &i, &ln, &pos, &curp->filenc))) { 371 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 372 curp->line, pos, "0x%x", c); 373 ln.buf[pos++] = '?'; 374 i++; 375 } 376 continue; 377 } 378 379 /* 380 * Exclude control characters. 381 */ 382 383 if (c == 0x7f || (c < 0x20 && c != 0x09)) { 384 mandoc_vmsg(c == 0x00 || c == 0x04 || 385 c > 0x0a ? MANDOCERR_CHAR_BAD : 386 MANDOCERR_CHAR_UNSUPP, 387 curp, curp->line, pos, "0x%x", c); 388 i++; 389 if (c != '\r') 390 ln.buf[pos++] = '?'; 391 continue; 392 } 393 394 /* Trailing backslash = a plain char. */ 395 396 if (blk.buf[i] != '\\' || i + 1 == blk.sz) { 397 ln.buf[pos++] = blk.buf[i++]; 398 continue; 399 } 400 401 /* 402 * Found escape and at least one other character. 403 * When it's a newline character, skip it. 404 * When there is a carriage return in between, 405 * skip that one as well. 406 */ 407 408 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && 409 '\n' == blk.buf[i + 2]) 410 ++i; 411 if ('\n' == blk.buf[i + 1]) { 412 i += 2; 413 ++lnn; 414 continue; 415 } 416 417 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { 418 i += 2; 419 /* Comment, skip to end of line */ 420 for (; i < blk.sz; ++i) { 421 if ('\n' == blk.buf[i]) { 422 ++i; 423 ++lnn; 424 break; 425 } 426 } 427 428 /* Backout trailing whitespaces */ 429 for (; pos > 0; --pos) { 430 if (ln.buf[pos - 1] != ' ') 431 break; 432 if (pos > 2 && ln.buf[pos - 2] == '\\') 433 break; 434 } 435 break; 436 } 437 438 /* Catch escaped bogus characters. */ 439 440 c = (unsigned char) blk.buf[i+1]; 441 442 if ( ! (isascii(c) && 443 (isgraph(c) || isblank(c)))) { 444 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, 445 curp->line, pos, "0x%x", c); 446 i += 2; 447 ln.buf[pos++] = '?'; 448 continue; 449 } 450 451 /* Some other escape sequence, copy & cont. */ 452 453 ln.buf[pos++] = blk.buf[i++]; 454 ln.buf[pos++] = blk.buf[i++]; 455 } 456 457 if (pos >= ln.sz) 458 resize_buf(&ln, 256); 459 460 ln.buf[pos] = '\0'; 461 462 /* 463 * A significant amount of complexity is contained by 464 * the roff preprocessor. It's line-oriented but can be 465 * expressed on one line, so we need at times to 466 * readjust our starting point and re-run it. The roff 467 * preprocessor can also readjust the buffers with new 468 * data, so we pass them in wholesale. 469 */ 470 471 of = 0; 472 473 /* 474 * Maintain a lookaside buffer of all parsed lines. We 475 * only do this if mparse_keep() has been invoked (the 476 * buffer may be accessed with mparse_getkeep()). 477 */ 478 479 if (curp->secondary) { 480 curp->secondary->buf = mandoc_realloc( 481 curp->secondary->buf, 482 curp->secondary->sz + pos + 2); 483 memcpy(curp->secondary->buf + 484 curp->secondary->sz, 485 ln.buf, pos); 486 curp->secondary->sz += pos; 487 curp->secondary->buf 488 [curp->secondary->sz] = '\n'; 489 curp->secondary->sz++; 490 curp->secondary->buf 491 [curp->secondary->sz] = '\0'; 492 } 493 rerun: 494 rr = roff_parseln(curp->roff, curp->line, &ln, &of); 495 496 switch (rr) { 497 case ROFF_REPARSE: 498 if (REPARSE_LIMIT >= ++curp->reparse_count) 499 mparse_buf_r(curp, ln, of, 0); 500 else 501 mandoc_msg(MANDOCERR_ROFFLOOP, curp, 502 curp->line, pos, NULL); 503 pos = 0; 504 continue; 505 case ROFF_APPEND: 506 pos = strlen(ln.buf); 507 continue; 508 case ROFF_RERUN: 509 goto rerun; 510 case ROFF_IGN: 511 pos = 0; 512 continue; 513 case ROFF_SO: 514 if ( ! (curp->options & MPARSE_SO) && 515 (i >= blk.sz || blk.buf[i] == '\0')) { 516 curp->sodest = mandoc_strdup(ln.buf + of); 517 free(ln.buf); 518 return; 519 } 520 /* 521 * We remove `so' clauses from our lookaside 522 * buffer because we're going to descend into 523 * the file recursively. 524 */ 525 if (curp->secondary) 526 curp->secondary->sz -= pos + 1; 527 save_file = curp->file; 528 if ((fd = mparse_open(curp, ln.buf + of)) != -1) { 529 mparse_readfd(curp, fd, ln.buf + of); 530 close(fd); 531 curp->file = save_file; 532 } else { 533 curp->file = save_file; 534 mandoc_vmsg(MANDOCERR_SO_FAIL, 535 curp, curp->line, pos, 536 ".so %s", ln.buf + of); 537 ln.sz = mandoc_asprintf(&cp, 538 ".sp\nSee the file %s.\n.sp", 539 ln.buf + of); 540 free(ln.buf); 541 ln.buf = cp; 542 of = 0; 543 mparse_buf_r(curp, ln, of, 0); 544 } 545 pos = 0; 546 continue; 547 default: 548 break; 549 } 550 551 if (curp->man->macroset == MACROSET_NONE) 552 choose_parser(curp); 553 554 /* 555 * Lastly, push down into the parsers themselves. 556 * If libroff returns ROFF_TBL, then add it to the 557 * currently open parse. Since we only get here if 558 * there does exist data (see tbl_data.c), we're 559 * guaranteed that something's been allocated. 560 * Do the same for ROFF_EQN. 561 */ 562 563 if (rr == ROFF_TBL) 564 while ((span = roff_span(curp->roff)) != NULL) 565 roff_addtbl(curp->man, span); 566 else if (rr == ROFF_EQN) 567 roff_addeqn(curp->man, roff_eqn(curp->roff)); 568 else if ((curp->man->macroset == MACROSET_MDOC ? 569 mdoc_parseln(curp->man, curp->line, ln.buf, of) : 570 man_parseln(curp->man, curp->line, ln.buf, of)) == 2) 571 break; 572 573 /* Temporary buffers typically are not full. */ 574 575 if (0 == start && '\0' == blk.buf[i]) 576 break; 577 578 /* Start the next input line. */ 579 580 pos = 0; 581 } 582 583 free(ln.buf); 584 } 585 586 static int 587 read_whole_file(struct mparse *curp, const char *file, int fd, 588 struct buf *fb, int *with_mmap) 589 { 590 struct stat st; 591 gzFile gz; 592 size_t off; 593 ssize_t ssz; 594 595 if (fstat(fd, &st) == -1) 596 err((int)MANDOCLEVEL_SYSERR, "%s", file); 597 598 /* 599 * If we're a regular file, try just reading in the whole entry 600 * via mmap(). This is faster than reading it into blocks, and 601 * since each file is only a few bytes to begin with, I'm not 602 * concerned that this is going to tank any machines. 603 */ 604 605 if (curp->gzip == 0 && S_ISREG(st.st_mode)) { 606 if (st.st_size > 0x7fffffff) { 607 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); 608 return 0; 609 } 610 *with_mmap = 1; 611 fb->sz = (size_t)st.st_size; 612 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); 613 if (fb->buf != MAP_FAILED) 614 return 1; 615 } 616 617 if (curp->gzip) { 618 if ((gz = gzdopen(fd, "rb")) == NULL) 619 err((int)MANDOCLEVEL_SYSERR, "%s", file); 620 } else 621 gz = NULL; 622 623 /* 624 * If this isn't a regular file (like, say, stdin), then we must 625 * go the old way and just read things in bit by bit. 626 */ 627 628 *with_mmap = 0; 629 off = 0; 630 fb->sz = 0; 631 fb->buf = NULL; 632 for (;;) { 633 if (off == fb->sz) { 634 if (fb->sz == (1U << 31)) { 635 mandoc_msg(MANDOCERR_TOOLARGE, curp, 636 0, 0, NULL); 637 break; 638 } 639 resize_buf(fb, 65536); 640 } 641 ssz = curp->gzip ? 642 gzread(gz, fb->buf + (int)off, fb->sz - off) : 643 read(fd, fb->buf + (int)off, fb->sz - off); 644 if (ssz == 0) { 645 fb->sz = off; 646 return 1; 647 } 648 if (ssz == -1) 649 err((int)MANDOCLEVEL_SYSERR, "%s", file); 650 off += (size_t)ssz; 651 } 652 653 free(fb->buf); 654 fb->buf = NULL; 655 return 0; 656 } 657 658 static void 659 mparse_end(struct mparse *curp) 660 { 661 if (curp->man->macroset == MACROSET_NONE) 662 curp->man->macroset = MACROSET_MAN; 663 if (curp->man->macroset == MACROSET_MDOC) 664 mdoc_endparse(curp->man); 665 else 666 man_endparse(curp->man); 667 roff_endparse(curp->roff); 668 } 669 670 static void 671 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) 672 { 673 struct buf *svprimary; 674 const char *svfile; 675 size_t offset; 676 static int recursion_depth; 677 678 if (64 < recursion_depth) { 679 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); 680 return; 681 } 682 683 /* Line number is per-file. */ 684 svfile = curp->file; 685 curp->file = file; 686 svprimary = curp->primary; 687 curp->primary = &blk; 688 curp->line = 1; 689 recursion_depth++; 690 691 /* Skip an UTF-8 byte order mark. */ 692 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && 693 (unsigned char)blk.buf[0] == 0xef && 694 (unsigned char)blk.buf[1] == 0xbb && 695 (unsigned char)blk.buf[2] == 0xbf) { 696 offset = 3; 697 curp->filenc &= ~MPARSE_LATIN1; 698 } else 699 offset = 0; 700 701 mparse_buf_r(curp, blk, offset, 1); 702 703 if (--recursion_depth == 0) 704 mparse_end(curp); 705 706 curp->primary = svprimary; 707 curp->file = svfile; 708 } 709 710 /* 711 * Read the whole file into memory and call the parsers. 712 * Called recursively when an .so request is encountered. 713 */ 714 enum mandoclevel 715 mparse_readfd(struct mparse *curp, int fd, const char *file) 716 { 717 struct buf blk; 718 int with_mmap; 719 int save_filenc; 720 721 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { 722 save_filenc = curp->filenc; 723 curp->filenc = curp->options & 724 (MPARSE_UTF8 | MPARSE_LATIN1); 725 mparse_parse_buffer(curp, blk, file); 726 curp->filenc = save_filenc; 727 if (with_mmap) 728 munmap(blk.buf, blk.sz); 729 else 730 free(blk.buf); 731 } 732 return curp->file_status; 733 } 734 735 int 736 mparse_open(struct mparse *curp, const char *file) 737 { 738 char *cp; 739 int fd; 740 741 curp->file = file; 742 cp = strrchr(file, '.'); 743 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); 744 745 /* First try to use the filename as it is. */ 746 747 if ((fd = open(file, O_RDONLY)) != -1) 748 return fd; 749 750 /* 751 * If that doesn't work and the filename doesn't 752 * already end in .gz, try appending .gz. 753 */ 754 755 if ( ! curp->gzip) { 756 mandoc_asprintf(&cp, "%s.gz", file); 757 fd = open(cp, O_RDONLY); 758 free(cp); 759 if (fd != -1) { 760 curp->gzip = 1; 761 return fd; 762 } 763 } 764 765 /* Neither worked, give up. */ 766 767 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); 768 return -1; 769 } 770 771 struct mparse * 772 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, 773 const char *defos) 774 { 775 struct mparse *curp; 776 777 curp = mandoc_calloc(1, sizeof(struct mparse)); 778 779 curp->options = options; 780 curp->wlevel = wlevel; 781 curp->mmsg = mmsg; 782 curp->defos = defos; 783 784 curp->roff = roff_alloc(curp, options); 785 curp->man = roff_man_alloc( curp->roff, curp, curp->defos, 786 curp->options & MPARSE_QUICK ? 1 : 0); 787 if (curp->options & MPARSE_MDOC) { 788 mdoc_hash_init(); 789 curp->man->macroset = MACROSET_MDOC; 790 } else if (curp->options & MPARSE_MAN) { 791 man_hash_init(); 792 curp->man->macroset = MACROSET_MAN; 793 } 794 curp->man->first->tok = TOKEN_NONE; 795 return curp; 796 } 797 798 void 799 mparse_reset(struct mparse *curp) 800 { 801 roff_reset(curp->roff); 802 roff_man_reset(curp->man); 803 if (curp->secondary) 804 curp->secondary->sz = 0; 805 806 curp->file_status = MANDOCLEVEL_OK; 807 808 free(curp->sodest); 809 curp->sodest = NULL; 810 } 811 812 void 813 mparse_free(struct mparse *curp) 814 { 815 816 roff_man_free(curp->man); 817 if (curp->roff) 818 roff_free(curp->roff); 819 if (curp->secondary) 820 free(curp->secondary->buf); 821 822 free(curp->secondary); 823 free(curp->sodest); 824 free(curp); 825 } 826 827 void 828 mparse_result(struct mparse *curp, struct roff_man **man, 829 char **sodest) 830 { 831 832 if (sodest && NULL != (*sodest = curp->sodest)) { 833 *man = NULL; 834 return; 835 } 836 if (man) 837 *man = curp->man; 838 } 839 840 void 841 mandoc_vmsg(enum mandocerr t, struct mparse *m, 842 int ln, int pos, const char *fmt, ...) 843 { 844 char buf[256]; 845 va_list ap; 846 847 va_start(ap, fmt); 848 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 849 va_end(ap); 850 851 mandoc_msg(t, m, ln, pos, buf); 852 } 853 854 void 855 mandoc_msg(enum mandocerr er, struct mparse *m, 856 int ln, int col, const char *msg) 857 { 858 enum mandoclevel level; 859 860 level = MANDOCLEVEL_UNSUPP; 861 while (er < mandoclimits[level]) 862 level--; 863 864 if (level < m->wlevel && er != MANDOCERR_FILE) 865 return; 866 867 if (m->mmsg) 868 (*m->mmsg)(er, level, m->file, ln, col, msg); 869 870 if (m->file_status < level) 871 m->file_status = level; 872 } 873 874 const char * 875 mparse_strerror(enum mandocerr er) 876 { 877 878 return mandocerrs[er]; 879 } 880 881 const char * 882 mparse_strlevel(enum mandoclevel lvl) 883 { 884 return mandoclevels[lvl]; 885 } 886 887 void 888 mparse_keep(struct mparse *p) 889 { 890 891 assert(NULL == p->secondary); 892 p->secondary = mandoc_calloc(1, sizeof(struct buf)); 893 } 894 895 const char * 896 mparse_getkeep(const struct mparse *p) 897 { 898 899 assert(p->secondary); 900 return p->secondary->sz ? p->secondary->buf : NULL; 901 } 902