1 /* $Vendor-Id: mdoc.c,v 1.116 2010/01/07 10:24:43 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #include "libmdoc.h" 31 #include "libmandoc.h" 32 33 const char *const __mdoc_merrnames[MERRMAX] = { 34 "trailing whitespace", /* ETAILWS */ 35 "unexpected quoted parameter", /* EQUOTPARM */ 36 "unterminated quoted parameter", /* EQUOTTERM */ 37 "argument parameter suggested", /* EARGVAL */ 38 "macro disallowed in prologue", /* EBODYPROL */ 39 "macro disallowed in body", /* EPROLBODY */ 40 "text disallowed in prologue", /* ETEXTPROL */ 41 "blank line disallowed", /* ENOBLANK */ 42 "text parameter too long", /* ETOOLONG */ 43 "invalid escape sequence", /* EESCAPE */ 44 "invalid character", /* EPRINT */ 45 "document has no body", /* ENODAT */ 46 "document has no prologue", /* ENOPROLOGUE */ 47 "expected line arguments", /* ELINE */ 48 "invalid AT&T argument", /* EATT */ 49 "default name not yet set", /* ENAME */ 50 "missing list type", /* ELISTTYPE */ 51 "missing display type", /* EDISPTYPE */ 52 "too many display types", /* EMULTIDISP */ 53 "too many list types", /* EMULTILIST */ 54 "NAME section must be first", /* ESECNAME */ 55 "badly-formed NAME section", /* ENAMESECINC */ 56 "argument repeated", /* EARGREP */ 57 "expected boolean parameter", /* EBOOL */ 58 "inconsistent column syntax", /* ECOLMIS */ 59 "nested display invalid", /* ENESTDISP */ 60 "width argument missing", /* EMISSWIDTH */ 61 "invalid section for this manual section", /* EWRONGMSEC */ 62 "section out of conventional order", /* ESECOOO */ 63 "section repeated", /* ESECREP */ 64 "invalid standard argument", /* EBADSTAND */ 65 "multi-line arguments discouraged", /* ENOMULTILINE */ 66 "multi-line arguments suggested", /* EMULTILINE */ 67 "line arguments discouraged", /* ENOLINE */ 68 "prologue macro out of conventional order", /* EPROLOOO */ 69 "prologue macro repeated", /* EPROLREP */ 70 "invalid manual section", /* EBADMSEC */ 71 "invalid section", /* EBADSEC */ 72 "invalid font mode", /* EFONT */ 73 "invalid date syntax", /* EBADDATE */ 74 "invalid number format", /* ENUMFMT */ 75 "superfluous width argument", /* ENOWIDTH */ 76 "system: utsname error", /* EUTSNAME */ 77 "obsolete macro", /* EOBS */ 78 "end-of-line scope violation", /* EIMPBRK */ 79 "empty macro ignored", /* EIGNE */ 80 "unclosed explicit scope", /* EOPEN */ 81 "unterminated quoted phrase", /* EQUOTPHR */ 82 "closure macro without prior context", /* ENOCTX */ 83 "no description found for library", /* ELIB */ 84 "bad child for parent context", /* EBADCHILD */ 85 "list arguments preceding type", /* ENOTYPE */ 86 }; 87 88 const char *const __mdoc_macronames[MDOC_MAX] = { 89 "Ap", "Dd", "Dt", "Os", 90 "Sh", "Ss", "Pp", "D1", 91 "Dl", "Bd", "Ed", "Bl", 92 "El", "It", "Ad", "An", 93 "Ar", "Cd", "Cm", "Dv", 94 "Er", "Ev", "Ex", "Fa", 95 "Fd", "Fl", "Fn", "Ft", 96 "Ic", "In", "Li", "Nd", 97 "Nm", "Op", "Ot", "Pa", 98 "Rv", "St", "Va", "Vt", 99 /* LINTED */ 100 "Xr", "%A", "%B", "%D", 101 /* LINTED */ 102 "%I", "%J", "%N", "%O", 103 /* LINTED */ 104 "%P", "%R", "%T", "%V", 105 "Ac", "Ao", "Aq", "At", 106 "Bc", "Bf", "Bo", "Bq", 107 "Bsx", "Bx", "Db", "Dc", 108 "Do", "Dq", "Ec", "Ef", 109 "Em", "Eo", "Fx", "Ms", 110 "No", "Ns", "Nx", "Ox", 111 "Pc", "Pf", "Po", "Pq", 112 "Qc", "Ql", "Qo", "Qq", 113 "Re", "Rs", "Sc", "So", 114 "Sq", "Sm", "Sx", "Sy", 115 "Tn", "Ux", "Xc", "Xo", 116 "Fo", "Fc", "Oo", "Oc", 117 "Bk", "Ek", "Bt", "Hf", 118 "Fr", "Ud", "Lb", "Lp", 119 "Lk", "Mt", "Brq", "Bro", 120 /* LINTED */ 121 "Brc", "%C", "Es", "En", 122 /* LINTED */ 123 "Dx", "%Q", "br", "sp", 124 /* LINTED */ 125 "%U" 126 }; 127 128 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 129 "split", "nosplit", "ragged", 130 "unfilled", "literal", "file", 131 "offset", "bullet", "dash", 132 "hyphen", "item", "enum", 133 "tag", "diag", "hang", 134 "ohang", "inset", "column", 135 "width", "compact", "std", 136 "filled", "words", "emphasis", 137 "symbolic", "nested", "centered" 138 }; 139 140 const char * const *mdoc_macronames = __mdoc_macronames; 141 const char * const *mdoc_argnames = __mdoc_argnames; 142 143 static void mdoc_free1(struct mdoc *); 144 static void mdoc_alloc1(struct mdoc *); 145 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 146 int, enum mdoc_type); 147 static int node_append(struct mdoc *, 148 struct mdoc_node *); 149 static int parsetext(struct mdoc *, int, char *); 150 static int parsemacro(struct mdoc *, int, char *); 151 static int macrowarn(struct mdoc *, int, const char *); 152 static int pstring(struct mdoc *, int, int, 153 const char *, size_t); 154 155 const struct mdoc_node * 156 mdoc_node(const struct mdoc *m) 157 { 158 159 return(MDOC_HALT & m->flags ? NULL : m->first); 160 } 161 162 163 const struct mdoc_meta * 164 mdoc_meta(const struct mdoc *m) 165 { 166 167 return(MDOC_HALT & m->flags ? NULL : &m->meta); 168 } 169 170 171 /* 172 * Frees volatile resources (parse tree, meta-data, fields). 173 */ 174 static void 175 mdoc_free1(struct mdoc *mdoc) 176 { 177 178 if (mdoc->first) 179 mdoc_node_freelist(mdoc->first); 180 if (mdoc->meta.title) 181 free(mdoc->meta.title); 182 if (mdoc->meta.os) 183 free(mdoc->meta.os); 184 if (mdoc->meta.name) 185 free(mdoc->meta.name); 186 if (mdoc->meta.arch) 187 free(mdoc->meta.arch); 188 if (mdoc->meta.vol) 189 free(mdoc->meta.vol); 190 } 191 192 193 /* 194 * Allocate all volatile resources (parse tree, meta-data, fields). 195 */ 196 static void 197 mdoc_alloc1(struct mdoc *mdoc) 198 { 199 200 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 201 mdoc->flags = 0; 202 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 203 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 204 mdoc->first = mdoc->last; 205 mdoc->last->type = MDOC_ROOT; 206 mdoc->next = MDOC_NEXT_CHILD; 207 } 208 209 210 /* 211 * Free up volatile resources (see mdoc_free1()) then re-initialises the 212 * data with mdoc_alloc1(). After invocation, parse data has been reset 213 * and the parser is ready for re-invocation on a new tree; however, 214 * cross-parse non-volatile data is kept intact. 215 */ 216 void 217 mdoc_reset(struct mdoc *mdoc) 218 { 219 220 mdoc_free1(mdoc); 221 mdoc_alloc1(mdoc); 222 } 223 224 225 /* 226 * Completely free up all volatile and non-volatile parse resources. 227 * After invocation, the pointer is no longer usable. 228 */ 229 void 230 mdoc_free(struct mdoc *mdoc) 231 { 232 233 mdoc_free1(mdoc); 234 free(mdoc); 235 } 236 237 238 /* 239 * Allocate volatile and non-volatile parse resources. 240 */ 241 struct mdoc * 242 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 243 { 244 struct mdoc *p; 245 246 p = mandoc_calloc(1, sizeof(struct mdoc)); 247 248 if (cb) 249 memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 250 251 p->data = data; 252 p->pflags = pflags; 253 254 mdoc_hash_init(); 255 mdoc_alloc1(p); 256 return(p); 257 } 258 259 260 /* 261 * Climb back up the parse tree, validating open scopes. Mostly calls 262 * through to macro_end() in macro.c. 263 */ 264 int 265 mdoc_endparse(struct mdoc *m) 266 { 267 268 if (MDOC_HALT & m->flags) 269 return(0); 270 else if (mdoc_macroend(m)) 271 return(1); 272 m->flags |= MDOC_HALT; 273 return(0); 274 } 275 276 277 /* 278 * Main parse routine. Parses a single line -- really just hands off to 279 * the macro (parsemacro()) or text parser (parsetext()). 280 */ 281 int 282 mdoc_parseln(struct mdoc *m, int ln, char *buf) 283 { 284 285 if (MDOC_HALT & m->flags) 286 return(0); 287 288 return('.' == *buf ? parsemacro(m, ln, buf) : 289 parsetext(m, ln, buf)); 290 } 291 292 293 int 294 mdoc_verr(struct mdoc *mdoc, int ln, int pos, 295 const char *fmt, ...) 296 { 297 char buf[256]; 298 va_list ap; 299 300 if (NULL == mdoc->cb.mdoc_err) 301 return(0); 302 303 va_start(ap, fmt); 304 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 305 va_end(ap); 306 307 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 308 } 309 310 311 int 312 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 313 { 314 char buf[256]; 315 va_list ap; 316 317 if (NULL == mdoc->cb.mdoc_warn) 318 return(0); 319 320 va_start(ap, fmt); 321 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 322 va_end(ap); 323 324 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 325 } 326 327 328 int 329 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 330 { 331 const char *p; 332 333 p = __mdoc_merrnames[(int)type]; 334 assert(p); 335 336 if (iserr) 337 return(mdoc_verr(m, line, pos, p)); 338 339 return(mdoc_vwarn(m, line, pos, p)); 340 } 341 342 343 int 344 mdoc_macro(struct mdoc *m, int tok, 345 int ln, int pp, int *pos, char *buf) 346 { 347 /* 348 * If we're in the prologue, deny "body" macros. Similarly, if 349 * we're in the body, deny prologue calls. 350 */ 351 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 352 MDOC_PBODY & m->flags) 353 return(mdoc_perr(m, ln, pp, EPROLBODY)); 354 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 355 ! (MDOC_PBODY & m->flags)) 356 return(mdoc_perr(m, ln, pp, EBODYPROL)); 357 358 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 359 } 360 361 362 static int 363 node_append(struct mdoc *mdoc, struct mdoc_node *p) 364 { 365 366 assert(mdoc->last); 367 assert(mdoc->first); 368 assert(MDOC_ROOT != p->type); 369 370 switch (mdoc->next) { 371 case (MDOC_NEXT_SIBLING): 372 mdoc->last->next = p; 373 p->prev = mdoc->last; 374 p->parent = mdoc->last->parent; 375 break; 376 case (MDOC_NEXT_CHILD): 377 mdoc->last->child = p; 378 p->parent = mdoc->last; 379 break; 380 default: 381 abort(); 382 /* NOTREACHED */ 383 } 384 385 p->parent->nchild++; 386 387 if ( ! mdoc_valid_pre(mdoc, p)) 388 return(0); 389 if ( ! mdoc_action_pre(mdoc, p)) 390 return(0); 391 392 switch (p->type) { 393 case (MDOC_HEAD): 394 assert(MDOC_BLOCK == p->parent->type); 395 p->parent->head = p; 396 break; 397 case (MDOC_TAIL): 398 assert(MDOC_BLOCK == p->parent->type); 399 p->parent->tail = p; 400 break; 401 case (MDOC_BODY): 402 assert(MDOC_BLOCK == p->parent->type); 403 p->parent->body = p; 404 break; 405 default: 406 break; 407 } 408 409 mdoc->last = p; 410 411 switch (p->type) { 412 case (MDOC_TEXT): 413 if ( ! mdoc_valid_post(mdoc)) 414 return(0); 415 if ( ! mdoc_action_post(mdoc)) 416 return(0); 417 break; 418 default: 419 break; 420 } 421 422 return(1); 423 } 424 425 426 static struct mdoc_node * 427 node_alloc(struct mdoc *m, int line, 428 int pos, int tok, enum mdoc_type type) 429 { 430 struct mdoc_node *p; 431 432 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 433 p->sec = m->lastsec; 434 p->line = line; 435 p->pos = pos; 436 p->tok = tok; 437 if (MDOC_TEXT != (p->type = type)) 438 assert(p->tok >= 0); 439 440 return(p); 441 } 442 443 444 int 445 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok) 446 { 447 struct mdoc_node *p; 448 449 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 450 if ( ! node_append(m, p)) 451 return(0); 452 m->next = MDOC_NEXT_CHILD; 453 return(1); 454 } 455 456 457 int 458 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok) 459 { 460 struct mdoc_node *p; 461 462 assert(m->first); 463 assert(m->last); 464 465 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 466 if ( ! node_append(m, p)) 467 return(0); 468 m->next = MDOC_NEXT_CHILD; 469 return(1); 470 } 471 472 473 int 474 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok) 475 { 476 struct mdoc_node *p; 477 478 p = node_alloc(m, line, pos, tok, MDOC_BODY); 479 if ( ! node_append(m, p)) 480 return(0); 481 m->next = MDOC_NEXT_CHILD; 482 return(1); 483 } 484 485 486 int 487 mdoc_block_alloc(struct mdoc *m, int line, int pos, 488 int tok, struct mdoc_arg *args) 489 { 490 struct mdoc_node *p; 491 492 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 493 p->args = args; 494 if (p->args) 495 (args->refcnt)++; 496 if ( ! node_append(m, p)) 497 return(0); 498 m->next = MDOC_NEXT_CHILD; 499 return(1); 500 } 501 502 503 int 504 mdoc_elem_alloc(struct mdoc *m, int line, int pos, 505 int tok, struct mdoc_arg *args) 506 { 507 struct mdoc_node *p; 508 509 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 510 p->args = args; 511 if (p->args) 512 (args->refcnt)++; 513 if ( ! node_append(m, p)) 514 return(0); 515 m->next = MDOC_NEXT_CHILD; 516 return(1); 517 } 518 519 520 static int 521 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 522 { 523 struct mdoc_node *n; 524 size_t sv; 525 526 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 527 n->string = mandoc_malloc(len + 1); 528 sv = strlcpy(n->string, p, len + 1); 529 530 /* Prohibit truncation. */ 531 assert(sv < len + 1); 532 533 if ( ! node_append(m, n)) 534 return(0); 535 m->next = MDOC_NEXT_SIBLING; 536 return(1); 537 } 538 539 540 int 541 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 542 { 543 544 return(pstring(m, line, pos, p, strlen(p))); 545 } 546 547 548 void 549 mdoc_node_free(struct mdoc_node *p) 550 { 551 552 if (p->parent) 553 p->parent->nchild--; 554 if (p->string) 555 free(p->string); 556 if (p->args) 557 mdoc_argv_free(p->args); 558 free(p); 559 } 560 561 562 void 563 mdoc_node_freelist(struct mdoc_node *p) 564 { 565 566 if (p->child) 567 mdoc_node_freelist(p->child); 568 if (p->next) 569 mdoc_node_freelist(p->next); 570 571 assert(0 == p->nchild); 572 mdoc_node_free(p); 573 } 574 575 576 /* 577 * Parse free-form text, that is, a line that does not begin with the 578 * control character. 579 */ 580 static int 581 parsetext(struct mdoc *m, int line, char *buf) 582 { 583 int i, j; 584 char sv; 585 586 if (SEC_NONE == m->lastnamed) 587 return(mdoc_perr(m, line, 0, ETEXTPROL)); 588 589 /* 590 * If in literal mode, then pass the buffer directly to the 591 * back-end, as it should be preserved as a single term. 592 */ 593 594 if (MDOC_LITERAL & m->flags) 595 return(mdoc_word_alloc(m, line, 0, buf)); 596 597 /* Disallow blank/white-space lines in non-literal mode. */ 598 599 for (i = 0; ' ' == buf[i]; i++) 600 /* Skip leading whitespace. */ ; 601 602 if ('\0' == buf[i]) 603 return(mdoc_perr(m, line, 0, ENOBLANK)); 604 605 /* 606 * Break apart a free-form line into tokens. Spaces are 607 * stripped out of the input. 608 */ 609 610 for (j = i; buf[i]; i++) { 611 if (' ' != buf[i]) 612 continue; 613 614 /* Escaped whitespace. */ 615 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 616 continue; 617 618 sv = buf[i]; 619 buf[i++] = '\0'; 620 621 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 622 return(0); 623 624 /* Trailing whitespace? Check at overwritten byte. */ 625 626 if (' ' == sv && '\0' == buf[i]) 627 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) 628 return(0); 629 630 for ( ; ' ' == buf[i]; i++) 631 /* Skip trailing whitespace. */ ; 632 633 j = i; 634 635 /* Trailing whitespace? */ 636 637 if (' ' == buf[i - 1] && '\0' == buf[i]) 638 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) 639 return(0); 640 641 if ('\0' == buf[i]) 642 break; 643 } 644 645 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 646 return(0); 647 648 m->next = MDOC_NEXT_SIBLING; 649 return(1); 650 } 651 652 653 654 static int 655 macrowarn(struct mdoc *m, int ln, const char *buf) 656 { 657 if ( ! (MDOC_IGN_MACRO & m->pflags)) 658 return(mdoc_verr(m, ln, 0, 659 "unknown macro: %s%s", 660 buf, strlen(buf) > 3 ? "..." : "")); 661 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", 662 buf, strlen(buf) > 3 ? "..." : "")); 663 } 664 665 666 /* 667 * Parse a macro line, that is, a line beginning with the control 668 * character. 669 */ 670 int 671 parsemacro(struct mdoc *m, int ln, char *buf) 672 { 673 int i, j, c; 674 char mac[5]; 675 676 /* Empty lines are ignored. */ 677 678 if ('\0' == buf[1]) 679 return(1); 680 681 i = 1; 682 683 /* Accept whitespace after the initial control char. */ 684 685 if (' ' == buf[i]) { 686 i++; 687 while (buf[i] && ' ' == buf[i]) 688 i++; 689 if ('\0' == buf[i]) 690 return(1); 691 } 692 693 /* Copy the first word into a nil-terminated buffer. */ 694 695 for (j = 0; j < 4; j++, i++) { 696 if ('\0' == (mac[j] = buf[i])) 697 break; 698 else if (' ' == buf[i]) 699 break; 700 701 /* Check for invalid characters. */ 702 703 if (isgraph((u_char)buf[i])) 704 continue; 705 return(mdoc_perr(m, ln, i, EPRINT)); 706 } 707 708 mac[j] = 0; 709 710 if (j == 4 || j < 2) { 711 if ( ! macrowarn(m, ln, mac)) 712 goto err; 713 return(1); 714 } 715 716 if (MDOC_MAX == (c = mdoc_hash_find(mac))) { 717 if ( ! macrowarn(m, ln, mac)) 718 goto err; 719 return(1); 720 } 721 722 /* The macro is sane. Jump to the next word. */ 723 724 while (buf[i] && ' ' == buf[i]) 725 i++; 726 727 /* Trailing whitespace? */ 728 729 if ('\0' == buf[i] && ' ' == buf[i - 1]) 730 if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS)) 731 goto err; 732 733 /* 734 * Begin recursive parse sequence. Since we're at the start of 735 * the line, we don't need to do callable/parseable checks. 736 */ 737 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 738 goto err; 739 740 return(1); 741 742 err: /* Error out. */ 743 744 m->flags |= MDOC_HALT; 745 return(0); 746 } 747 748 749