1 /* $Id: mdoc.c,v 1.31 2009/10/27 21:40:07 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/types.h> 18 19 #include <assert.h> 20 #include <ctype.h> 21 #include <stdarg.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "libmdoc.h" 27 28 const char *const __mdoc_merrnames[MERRMAX] = { 29 "trailing whitespace", /* ETAILWS */ 30 "unexpected quoted parameter", /* EQUOTPARM */ 31 "unterminated quoted parameter", /* EQUOTTERM */ 32 "system: malloc error", /* EMALLOC */ 33 "argument parameter suggested", /* EARGVAL */ 34 "macro disallowed in prologue", /* EBODYPROL */ 35 "macro disallowed in body", /* EPROLBODY */ 36 "text disallowed in prologue", /* ETEXTPROL */ 37 "blank line disallowed", /* ENOBLANK */ 38 "text parameter too long", /* ETOOLONG */ 39 "invalid escape sequence", /* EESCAPE */ 40 "invalid character", /* EPRINT */ 41 "document has no body", /* ENODAT */ 42 "document has no prologue", /* ENOPROLOGUE */ 43 "expected line arguments", /* ELINE */ 44 "invalid AT&T argument", /* EATT */ 45 "default name not yet set", /* ENAME */ 46 "missing list type", /* ELISTTYPE */ 47 "missing display type", /* EDISPTYPE */ 48 "too many display types", /* EMULTIDISP */ 49 "too many list types", /* EMULTILIST */ 50 "NAME section must be first", /* ESECNAME */ 51 "badly-formed NAME section", /* ENAMESECINC */ 52 "argument repeated", /* EARGREP */ 53 "expected boolean parameter", /* EBOOL */ 54 "inconsistent column syntax", /* ECOLMIS */ 55 "nested display invalid", /* ENESTDISP */ 56 "width argument missing", /* EMISSWIDTH */ 57 "invalid section for this manual section", /* EWRONGMSEC */ 58 "section out of conventional order", /* ESECOOO */ 59 "section repeated", /* ESECREP */ 60 "invalid standard argument", /* EBADSTAND */ 61 "multi-line arguments discouraged", /* ENOMULTILINE */ 62 "multi-line arguments suggested", /* EMULTILINE */ 63 "line arguments discouraged", /* ENOLINE */ 64 "prologue macro out of conventional order", /* EPROLOOO */ 65 "prologue macro repeated", /* EPROLREP */ 66 "invalid manual section", /* EBADMSEC */ 67 "invalid section", /* EBADSEC */ 68 "invalid font mode", /* EFONT */ 69 "invalid date syntax", /* EBADDATE */ 70 "invalid number format", /* ENUMFMT */ 71 "superfluous width argument", /* ENOWIDTH */ 72 "system: utsname error", /* EUTSNAME */ 73 "obsolete macro", /* EOBS */ 74 "end-of-line scope violation", /* EIMPBRK */ 75 "empty macro ignored", /* EIGNE */ 76 "unclosed explicit scope", /* EOPEN */ 77 "unterminated quoted phrase", /* EQUOTPHR */ 78 "closure macro without prior context", /* ENOCTX */ 79 "no description found for library", /* ELIB */ 80 "bad child for parent context", /* EBADCHILD */ 81 "list arguments preceding type", /* ENOTYPE */ 82 }; 83 84 const char *const __mdoc_macronames[MDOC_MAX] = { 85 "Ap", "Dd", "Dt", "Os", 86 "Sh", "Ss", "Pp", "D1", 87 "Dl", "Bd", "Ed", "Bl", 88 "El", "It", "Ad", "An", 89 "Ar", "Cd", "Cm", "Dv", 90 "Er", "Ev", "Ex", "Fa", 91 "Fd", "Fl", "Fn", "Ft", 92 "Ic", "In", "Li", "Nd", 93 "Nm", "Op", "Ot", "Pa", 94 "Rv", "St", "Va", "Vt", 95 /* LINTED */ 96 "Xr", "\%A", "\%B", "\%D", 97 /* LINTED */ 98 "\%I", "\%J", "\%N", "\%O", 99 /* LINTED */ 100 "\%P", "\%R", "\%T", "\%V", 101 "Ac", "Ao", "Aq", "At", 102 "Bc", "Bf", "Bo", "Bq", 103 "Bsx", "Bx", "Db", "Dc", 104 "Do", "Dq", "Ec", "Ef", 105 "Em", "Eo", "Fx", "Ms", 106 "No", "Ns", "Nx", "Ox", 107 "Pc", "Pf", "Po", "Pq", 108 "Qc", "Ql", "Qo", "Qq", 109 "Re", "Rs", "Sc", "So", 110 "Sq", "Sm", "Sx", "Sy", 111 "Tn", "Ux", "Xc", "Xo", 112 "Fo", "Fc", "Oo", "Oc", 113 "Bk", "Ek", "Bt", "Hf", 114 "Fr", "Ud", "Lb", "Lp", 115 "Lk", "Mt", "Brq", "Bro", 116 /* LINTED */ 117 "Brc", "\%C", "Es", "En", 118 /* LINTED */ 119 "Dx", "\%Q", "br", "sp", 120 /* LINTED */ 121 "\%U" 122 }; 123 124 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 125 "split", "nosplit", "ragged", 126 "unfilled", "literal", "file", 127 "offset", "bullet", "dash", 128 "hyphen", "item", "enum", 129 "tag", "diag", "hang", 130 "ohang", "inset", "column", 131 "width", "compact", "std", 132 "filled", "words", "emphasis", 133 "symbolic", "nested", "centered" 134 }; 135 136 const char * const *mdoc_macronames = __mdoc_macronames; 137 const char * const *mdoc_argnames = __mdoc_argnames; 138 139 static void mdoc_free1(struct mdoc *); 140 static int mdoc_alloc1(struct mdoc *); 141 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 142 int, enum mdoc_type); 143 static int node_append(struct mdoc *, 144 struct mdoc_node *); 145 static int parsetext(struct mdoc *, int, char *); 146 static int parsemacro(struct mdoc *, int, char *); 147 static int macrowarn(struct mdoc *, int, const char *); 148 static int pstring(struct mdoc *, int, int, 149 const char *, size_t); 150 151 152 const struct mdoc_node * 153 mdoc_node(const struct mdoc *m) 154 { 155 156 return(MDOC_HALT & m->flags ? NULL : m->first); 157 } 158 159 160 const struct mdoc_meta * 161 mdoc_meta(const struct mdoc *m) 162 { 163 164 return(MDOC_HALT & m->flags ? NULL : &m->meta); 165 } 166 167 168 /* 169 * Frees volatile resources (parse tree, meta-data, fields). 170 */ 171 static void 172 mdoc_free1(struct mdoc *mdoc) 173 { 174 175 if (mdoc->first) 176 mdoc_node_freelist(mdoc->first); 177 if (mdoc->meta.title) 178 free(mdoc->meta.title); 179 if (mdoc->meta.os) 180 free(mdoc->meta.os); 181 if (mdoc->meta.name) 182 free(mdoc->meta.name); 183 if (mdoc->meta.arch) 184 free(mdoc->meta.arch); 185 if (mdoc->meta.vol) 186 free(mdoc->meta.vol); 187 } 188 189 190 /* 191 * Allocate all volatile resources (parse tree, meta-data, fields). 192 */ 193 static int 194 mdoc_alloc1(struct mdoc *mdoc) 195 { 196 197 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 198 mdoc->flags = 0; 199 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 200 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 201 if (NULL == mdoc->last) 202 return(0); 203 204 mdoc->first = mdoc->last; 205 mdoc->last->type = MDOC_ROOT; 206 mdoc->next = MDOC_NEXT_CHILD; 207 return(1); 208 } 209 210 211 /* 212 * Free up volatile resources (see mdoc_free1()) then re-initialises the 213 * data with mdoc_alloc1(). After invocation, parse data has been reset 214 * and the parser is ready for re-invocation on a new tree; however, 215 * cross-parse non-volatile data is kept intact. 216 */ 217 int 218 mdoc_reset(struct mdoc *mdoc) 219 { 220 221 mdoc_free1(mdoc); 222 return(mdoc_alloc1(mdoc)); 223 } 224 225 226 /* 227 * Completely free up all volatile and non-volatile parse resources. 228 * After invocation, the pointer is no longer usable. 229 */ 230 void 231 mdoc_free(struct mdoc *mdoc) 232 { 233 234 mdoc_free1(mdoc); 235 free(mdoc); 236 } 237 238 239 /* 240 * Allocate volatile and non-volatile parse resources. 241 */ 242 struct mdoc * 243 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 244 { 245 struct mdoc *p; 246 247 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 248 return(NULL); 249 if (cb) 250 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 251 252 mdoc_hash_init(); 253 254 p->data = data; 255 p->pflags = pflags; 256 257 if (mdoc_alloc1(p)) 258 return(p); 259 260 free(p); 261 return(NULL); 262 } 263 264 265 /* 266 * Climb back up the parse tree, validating open scopes. Mostly calls 267 * through to macro_end() in macro.c. 268 */ 269 int 270 mdoc_endparse(struct mdoc *m) 271 { 272 273 if (MDOC_HALT & m->flags) 274 return(0); 275 else if (mdoc_macroend(m)) 276 return(1); 277 m->flags |= MDOC_HALT; 278 return(0); 279 } 280 281 282 /* 283 * Main parse routine. Parses a single line -- really just hands off to 284 * the macro (parsemacro()) or text parser (parsetext()). 285 */ 286 int 287 mdoc_parseln(struct mdoc *m, int ln, char *buf) 288 { 289 290 if (MDOC_HALT & m->flags) 291 return(0); 292 293 return('.' == *buf ? parsemacro(m, ln, buf) : 294 parsetext(m, ln, buf)); 295 } 296 297 298 int 299 mdoc_verr(struct mdoc *mdoc, int ln, int pos, 300 const char *fmt, ...) 301 { 302 char buf[256]; 303 va_list ap; 304 305 if (NULL == mdoc->cb.mdoc_err) 306 return(0); 307 308 va_start(ap, fmt); 309 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 310 va_end(ap); 311 312 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 313 } 314 315 316 int 317 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 318 { 319 char buf[256]; 320 va_list ap; 321 322 if (NULL == mdoc->cb.mdoc_warn) 323 return(0); 324 325 va_start(ap, fmt); 326 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 327 va_end(ap); 328 329 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 330 } 331 332 333 int 334 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 335 { 336 const char *p; 337 338 p = __mdoc_merrnames[(int)type]; 339 assert(p); 340 341 if (iserr) 342 return(mdoc_verr(m, line, pos, p)); 343 344 return(mdoc_vwarn(m, line, pos, p)); 345 } 346 347 348 int 349 mdoc_macro(struct mdoc *m, int tok, 350 int ln, int pp, int *pos, char *buf) 351 { 352 /* 353 * If we're in the prologue, deny "body" macros. Similarly, if 354 * we're in the body, deny prologue calls. 355 */ 356 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 357 MDOC_PBODY & m->flags) 358 return(mdoc_perr(m, ln, pp, EPROLBODY)); 359 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 360 ! (MDOC_PBODY & m->flags)) 361 return(mdoc_perr(m, ln, pp, EBODYPROL)); 362 363 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 364 } 365 366 367 static int 368 node_append(struct mdoc *mdoc, struct mdoc_node *p) 369 { 370 371 assert(mdoc->last); 372 assert(mdoc->first); 373 assert(MDOC_ROOT != p->type); 374 375 switch (mdoc->next) { 376 case (MDOC_NEXT_SIBLING): 377 mdoc->last->next = p; 378 p->prev = mdoc->last; 379 p->parent = mdoc->last->parent; 380 break; 381 case (MDOC_NEXT_CHILD): 382 mdoc->last->child = p; 383 p->parent = mdoc->last; 384 break; 385 default: 386 abort(); 387 /* NOTREACHED */ 388 } 389 390 p->parent->nchild++; 391 392 if ( ! mdoc_valid_pre(mdoc, p)) 393 return(0); 394 if ( ! mdoc_action_pre(mdoc, p)) 395 return(0); 396 397 switch (p->type) { 398 case (MDOC_HEAD): 399 assert(MDOC_BLOCK == p->parent->type); 400 p->parent->head = p; 401 break; 402 case (MDOC_TAIL): 403 assert(MDOC_BLOCK == p->parent->type); 404 p->parent->tail = p; 405 break; 406 case (MDOC_BODY): 407 assert(MDOC_BLOCK == p->parent->type); 408 p->parent->body = p; 409 break; 410 default: 411 break; 412 } 413 414 mdoc->last = p; 415 416 switch (p->type) { 417 case (MDOC_TEXT): 418 if ( ! mdoc_valid_post(mdoc)) 419 return(0); 420 if ( ! mdoc_action_post(mdoc)) 421 return(0); 422 break; 423 default: 424 break; 425 } 426 427 return(1); 428 } 429 430 431 static struct mdoc_node * 432 node_alloc(struct mdoc *m, int line, 433 int pos, int tok, enum mdoc_type type) 434 { 435 struct mdoc_node *p; 436 437 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 438 (void)mdoc_nerr(m, m->last, EMALLOC); 439 return(NULL); 440 } 441 442 p->sec = m->lastsec; 443 p->line = line; 444 p->pos = pos; 445 p->tok = tok; 446 if (MDOC_TEXT != (p->type = type)) 447 assert(p->tok >= 0); 448 449 return(p); 450 } 451 452 453 int 454 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok) 455 { 456 struct mdoc_node *p; 457 458 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 459 if (NULL == p) 460 return(0); 461 if ( ! node_append(m, p)) 462 return(0); 463 m->next = MDOC_NEXT_CHILD; 464 return(1); 465 } 466 467 468 int 469 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok) 470 { 471 struct mdoc_node *p; 472 473 assert(m->first); 474 assert(m->last); 475 476 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 477 if (NULL == p) 478 return(0); 479 if ( ! node_append(m, p)) 480 return(0); 481 m->next = MDOC_NEXT_CHILD; 482 return(1); 483 } 484 485 486 int 487 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok) 488 { 489 struct mdoc_node *p; 490 491 p = node_alloc(m, line, pos, tok, MDOC_BODY); 492 if (NULL == p) 493 return(0); 494 if ( ! node_append(m, p)) 495 return(0); 496 m->next = MDOC_NEXT_CHILD; 497 return(1); 498 } 499 500 501 int 502 mdoc_block_alloc(struct mdoc *m, int line, int pos, 503 int tok, struct mdoc_arg *args) 504 { 505 struct mdoc_node *p; 506 507 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 508 if (NULL == p) 509 return(0); 510 p->args = args; 511 if (p->args) 512 (args->refcnt)++; 513 if ( ! node_append(m, p)) 514 return(0); 515 m->next = MDOC_NEXT_CHILD; 516 return(1); 517 } 518 519 520 int 521 mdoc_elem_alloc(struct mdoc *m, int line, int pos, 522 int tok, struct mdoc_arg *args) 523 { 524 struct mdoc_node *p; 525 526 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 527 if (NULL == p) 528 return(0); 529 p->args = args; 530 if (p->args) 531 (args->refcnt)++; 532 if ( ! node_append(m, p)) 533 return(0); 534 m->next = MDOC_NEXT_CHILD; 535 return(1); 536 } 537 538 539 static int 540 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 541 { 542 struct mdoc_node *n; 543 size_t sv; 544 545 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 546 if (NULL == n) 547 return(mdoc_nerr(m, m->last, EMALLOC)); 548 549 n->string = malloc(len + 1); 550 if (NULL == n->string) { 551 free(n); 552 return(mdoc_nerr(m, m->last, EMALLOC)); 553 } 554 555 sv = strlcpy(n->string, p, len + 1); 556 557 /* Prohibit truncation. */ 558 assert(sv < len + 1); 559 560 if ( ! node_append(m, n)) 561 return(0); 562 m->next = MDOC_NEXT_SIBLING; 563 return(1); 564 } 565 566 567 int 568 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 569 { 570 571 return(pstring(m, line, pos, p, strlen(p))); 572 } 573 574 575 void 576 mdoc_node_free(struct mdoc_node *p) 577 { 578 579 if (p->parent) 580 p->parent->nchild--; 581 if (p->string) 582 free(p->string); 583 if (p->args) 584 mdoc_argv_free(p->args); 585 free(p); 586 } 587 588 589 void 590 mdoc_node_freelist(struct mdoc_node *p) 591 { 592 593 if (p->child) 594 mdoc_node_freelist(p->child); 595 if (p->next) 596 mdoc_node_freelist(p->next); 597 598 assert(0 == p->nchild); 599 mdoc_node_free(p); 600 } 601 602 603 /* 604 * Parse free-form text, that is, a line that does not begin with the 605 * control character. 606 */ 607 static int 608 parsetext(struct mdoc *m, int line, char *buf) 609 { 610 int i, j; 611 612 if (SEC_NONE == m->lastnamed) 613 return(mdoc_perr(m, line, 0, ETEXTPROL)); 614 615 /* 616 * If in literal mode, then pass the buffer directly to the 617 * back-end, as it should be preserved as a single term. 618 */ 619 620 if (MDOC_LITERAL & m->flags) 621 return(mdoc_word_alloc(m, line, 0, buf)); 622 623 /* Disallow blank/white-space lines in non-literal mode. */ 624 625 for (i = 0; ' ' == buf[i]; i++) 626 /* Skip leading whitespace. */ ; 627 if (0 == buf[i]) 628 return(mdoc_perr(m, line, 0, ENOBLANK)); 629 630 /* 631 * Break apart a free-form line into tokens. Spaces are 632 * stripped out of the input. 633 */ 634 635 for (j = i; buf[i]; i++) { 636 if (' ' != buf[i]) 637 continue; 638 639 /* Escaped whitespace. */ 640 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 641 continue; 642 643 buf[i++] = 0; 644 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 645 return(0); 646 647 for ( ; ' ' == buf[i]; i++) 648 /* Skip trailing whitespace. */ ; 649 650 j = i; 651 if (0 == buf[i]) 652 break; 653 } 654 655 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 656 return(0); 657 658 m->next = MDOC_NEXT_SIBLING; 659 return(1); 660 } 661 662 663 664 static int 665 macrowarn(struct mdoc *m, int ln, const char *buf) 666 { 667 if ( ! (MDOC_IGN_MACRO & m->pflags)) 668 return(mdoc_verr(m, ln, 0, 669 "unknown macro: %s%s", 670 buf, strlen(buf) > 3 ? "..." : "")); 671 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", 672 buf, strlen(buf) > 3 ? "..." : "")); 673 } 674 675 676 /* 677 * Parse a macro line, that is, a line beginning with the control 678 * character. 679 */ 680 int 681 parsemacro(struct mdoc *m, int ln, char *buf) 682 { 683 int i, j, c; 684 char mac[5]; 685 686 /* Empty lines are ignored. */ 687 688 if (0 == buf[1]) 689 return(1); 690 691 i = 1; 692 693 /* Accept whitespace after the initial control char. */ 694 695 if (' ' == buf[i]) { 696 i++; 697 while (buf[i] && ' ' == buf[i]) 698 i++; 699 if (0 == buf[i]) 700 return(1); 701 } 702 703 /* Copy the first word into a nil-terminated buffer. */ 704 705 for (j = 0; j < 4; j++, i++) { 706 if (0 == (mac[j] = buf[i])) 707 break; 708 else if (' ' == buf[i]) 709 break; 710 711 /* Check for invalid characters. */ 712 713 if (isgraph((u_char)buf[i])) 714 continue; 715 return(mdoc_perr(m, ln, i, EPRINT)); 716 } 717 718 mac[j] = 0; 719 720 if (j == 4 || j < 2) { 721 if ( ! macrowarn(m, ln, mac)) 722 goto err; 723 return(1); 724 } 725 726 if (MDOC_MAX == (c = mdoc_hash_find(mac))) { 727 if ( ! macrowarn(m, ln, mac)) 728 goto err; 729 return(1); 730 } 731 732 /* The macro is sane. Jump to the next word. */ 733 734 while (buf[i] && ' ' == buf[i]) 735 i++; 736 737 /* 738 * Begin recursive parse sequence. Since we're at the start of 739 * the line, we don't need to do callable/parseable checks. 740 */ 741 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 742 goto err; 743 744 return(1); 745 746 err: /* Error out. */ 747 748 m->flags |= MDOC_HALT; 749 return(0); 750 } 751 752 753