1 /* $Id: mdoc.c,v 1.101 2014/03/23 12:44:18 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mdoc.h" 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "libmdoc.h" 32 #include "libmandoc.h" 33 34 const char *const __mdoc_macronames[MDOC_MAX] = { 35 "Ap", "Dd", "Dt", "Os", 36 "Sh", "Ss", "Pp", "D1", 37 "Dl", "Bd", "Ed", "Bl", 38 "El", "It", "Ad", "An", 39 "Ar", "Cd", "Cm", "Dv", 40 "Er", "Ev", "Ex", "Fa", 41 "Fd", "Fl", "Fn", "Ft", 42 "Ic", "In", "Li", "Nd", 43 "Nm", "Op", "Ot", "Pa", 44 "Rv", "St", "Va", "Vt", 45 /* LINTED */ 46 "Xr", "%A", "%B", "%D", 47 /* LINTED */ 48 "%I", "%J", "%N", "%O", 49 /* LINTED */ 50 "%P", "%R", "%T", "%V", 51 "Ac", "Ao", "Aq", "At", 52 "Bc", "Bf", "Bo", "Bq", 53 "Bsx", "Bx", "Db", "Dc", 54 "Do", "Dq", "Ec", "Ef", 55 "Em", "Eo", "Fx", "Ms", 56 "No", "Ns", "Nx", "Ox", 57 "Pc", "Pf", "Po", "Pq", 58 "Qc", "Ql", "Qo", "Qq", 59 "Re", "Rs", "Sc", "So", 60 "Sq", "Sm", "Sx", "Sy", 61 "Tn", "Ux", "Xc", "Xo", 62 "Fo", "Fc", "Oo", "Oc", 63 "Bk", "Ek", "Bt", "Hf", 64 "Fr", "Ud", "Lb", "Lp", 65 "Lk", "Mt", "Brq", "Bro", 66 /* LINTED */ 67 "Brc", "%C", "Es", "En", 68 /* LINTED */ 69 "Dx", "%Q", "br", "sp", 70 /* LINTED */ 71 "%U", "Ta" 72 }; 73 74 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 75 "split", "nosplit", "ragged", 76 "unfilled", "literal", "file", 77 "offset", "bullet", "dash", 78 "hyphen", "item", "enum", 79 "tag", "diag", "hang", 80 "ohang", "inset", "column", 81 "width", "compact", "std", 82 "filled", "words", "emphasis", 83 "symbolic", "nested", "centered" 84 }; 85 86 const char * const *mdoc_macronames = __mdoc_macronames; 87 const char * const *mdoc_argnames = __mdoc_argnames; 88 89 static void mdoc_node_free(struct mdoc_node *); 90 static void mdoc_node_unlink(struct mdoc *, 91 struct mdoc_node *); 92 static void mdoc_free1(struct mdoc *); 93 static void mdoc_alloc1(struct mdoc *); 94 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 95 enum mdoct, enum mdoc_type); 96 static int node_append(struct mdoc *, 97 struct mdoc_node *); 98 #if 0 99 static int mdoc_preptext(struct mdoc *, int, char *, int); 100 #endif 101 static int mdoc_ptext(struct mdoc *, int, char *, int); 102 static int mdoc_pmacro(struct mdoc *, int, char *, int); 103 104 const struct mdoc_node * 105 mdoc_node(const struct mdoc *mdoc) 106 { 107 108 assert( ! (MDOC_HALT & mdoc->flags)); 109 return(mdoc->first); 110 } 111 112 113 const struct mdoc_meta * 114 mdoc_meta(const struct mdoc *mdoc) 115 { 116 117 assert( ! (MDOC_HALT & mdoc->flags)); 118 return(&mdoc->meta); 119 } 120 121 122 /* 123 * Frees volatile resources (parse tree, meta-data, fields). 124 */ 125 static void 126 mdoc_free1(struct mdoc *mdoc) 127 { 128 129 if (mdoc->first) 130 mdoc_node_delete(mdoc, mdoc->first); 131 if (mdoc->meta.title) 132 free(mdoc->meta.title); 133 if (mdoc->meta.os) 134 free(mdoc->meta.os); 135 if (mdoc->meta.name) 136 free(mdoc->meta.name); 137 if (mdoc->meta.arch) 138 free(mdoc->meta.arch); 139 if (mdoc->meta.vol) 140 free(mdoc->meta.vol); 141 if (mdoc->meta.msec) 142 free(mdoc->meta.msec); 143 if (mdoc->meta.date) 144 free(mdoc->meta.date); 145 } 146 147 148 /* 149 * Allocate all volatile resources (parse tree, meta-data, fields). 150 */ 151 static void 152 mdoc_alloc1(struct mdoc *mdoc) 153 { 154 155 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 156 mdoc->flags = 0; 157 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 158 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 159 mdoc->first = mdoc->last; 160 mdoc->last->type = MDOC_ROOT; 161 mdoc->last->tok = MDOC_MAX; 162 mdoc->next = MDOC_NEXT_CHILD; 163 } 164 165 166 /* 167 * Free up volatile resources (see mdoc_free1()) then re-initialises the 168 * data with mdoc_alloc1(). After invocation, parse data has been reset 169 * and the parser is ready for re-invocation on a new tree; however, 170 * cross-parse non-volatile data is kept intact. 171 */ 172 void 173 mdoc_reset(struct mdoc *mdoc) 174 { 175 176 mdoc_free1(mdoc); 177 mdoc_alloc1(mdoc); 178 } 179 180 181 /* 182 * Completely free up all volatile and non-volatile parse resources. 183 * After invocation, the pointer is no longer usable. 184 */ 185 void 186 mdoc_free(struct mdoc *mdoc) 187 { 188 189 mdoc_free1(mdoc); 190 free(mdoc); 191 } 192 193 194 /* 195 * Allocate volatile and non-volatile parse resources. 196 */ 197 struct mdoc * 198 mdoc_alloc(struct roff *roff, struct mparse *parse, 199 char *defos, int quick) 200 { 201 struct mdoc *p; 202 203 p = mandoc_calloc(1, sizeof(struct mdoc)); 204 205 p->parse = parse; 206 p->defos = defos; 207 p->quick = quick; 208 p->roff = roff; 209 210 mdoc_hash_init(); 211 mdoc_alloc1(p); 212 return(p); 213 } 214 215 216 /* 217 * Climb back up the parse tree, validating open scopes. Mostly calls 218 * through to macro_end() in macro.c. 219 */ 220 int 221 mdoc_endparse(struct mdoc *mdoc) 222 { 223 224 assert( ! (MDOC_HALT & mdoc->flags)); 225 if (mdoc_macroend(mdoc)) 226 return(1); 227 mdoc->flags |= MDOC_HALT; 228 return(0); 229 } 230 231 int 232 mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 233 { 234 struct mdoc_node *n; 235 236 assert( ! (MDOC_HALT & mdoc->flags)); 237 238 /* No text before an initial macro. */ 239 240 if (SEC_NONE == mdoc->lastnamed) { 241 mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT); 242 return(1); 243 } 244 245 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 246 n->eqn = ep; 247 248 if ( ! node_append(mdoc, n)) 249 return(0); 250 251 mdoc->next = MDOC_NEXT_SIBLING; 252 return(1); 253 } 254 255 int 256 mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 257 { 258 struct mdoc_node *n; 259 260 assert( ! (MDOC_HALT & mdoc->flags)); 261 262 /* No text before an initial macro. */ 263 264 if (SEC_NONE == mdoc->lastnamed) { 265 mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT); 266 return(1); 267 } 268 269 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 270 n->span = sp; 271 272 if ( ! node_append(mdoc, n)) 273 return(0); 274 275 mdoc->next = MDOC_NEXT_SIBLING; 276 return(1); 277 } 278 279 280 /* 281 * Main parse routine. Parses a single line -- really just hands off to 282 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 283 */ 284 int 285 mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 286 { 287 288 assert( ! (MDOC_HALT & mdoc->flags)); 289 290 mdoc->flags |= MDOC_NEWLINE; 291 292 /* 293 * Let the roff nS register switch SYNOPSIS mode early, 294 * such that the parser knows at all times 295 * whether this mode is on or off. 296 * Note that this mode is also switched by the Sh macro. 297 */ 298 if (roff_getreg(mdoc->roff, "nS")) 299 mdoc->flags |= MDOC_SYNOPSIS; 300 else 301 mdoc->flags &= ~MDOC_SYNOPSIS; 302 303 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 304 mdoc_pmacro(mdoc, ln, buf, offs) : 305 mdoc_ptext(mdoc, ln, buf, offs)); 306 } 307 308 int 309 mdoc_macro(MACRO_PROT_ARGS) 310 { 311 assert(tok < MDOC_MAX); 312 313 /* If we're in the body, deny prologue calls. */ 314 315 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 316 MDOC_PBODY & mdoc->flags) { 317 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY); 318 return(1); 319 } 320 321 /* If we're in the prologue, deny "body" macros. */ 322 323 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 324 ! (MDOC_PBODY & mdoc->flags)) { 325 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG); 326 if (NULL == mdoc->meta.msec) 327 mdoc->meta.msec = mandoc_strdup("1"); 328 if (NULL == mdoc->meta.title) 329 mdoc->meta.title = mandoc_strdup("UNKNOWN"); 330 if (NULL == mdoc->meta.vol) 331 mdoc->meta.vol = mandoc_strdup("LOCAL"); 332 if (NULL == mdoc->meta.os) 333 mdoc->meta.os = mandoc_strdup("LOCAL"); 334 if (NULL == mdoc->meta.date) 335 mdoc->meta.date = mandoc_normdate 336 (mdoc->parse, NULL, line, ppos); 337 mdoc->flags |= MDOC_PBODY; 338 } 339 340 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 341 } 342 343 344 static int 345 node_append(struct mdoc *mdoc, struct mdoc_node *p) 346 { 347 348 assert(mdoc->last); 349 assert(mdoc->first); 350 assert(MDOC_ROOT != p->type); 351 352 switch (mdoc->next) { 353 case (MDOC_NEXT_SIBLING): 354 mdoc->last->next = p; 355 p->prev = mdoc->last; 356 p->parent = mdoc->last->parent; 357 break; 358 case (MDOC_NEXT_CHILD): 359 mdoc->last->child = p; 360 p->parent = mdoc->last; 361 break; 362 default: 363 abort(); 364 /* NOTREACHED */ 365 } 366 367 p->parent->nchild++; 368 369 /* 370 * Copy over the normalised-data pointer of our parent. Not 371 * everybody has one, but copying a null pointer is fine. 372 */ 373 374 switch (p->type) { 375 case (MDOC_BODY): 376 if (ENDBODY_NOT != p->end) 377 break; 378 /* FALLTHROUGH */ 379 case (MDOC_TAIL): 380 /* FALLTHROUGH */ 381 case (MDOC_HEAD): 382 p->norm = p->parent->norm; 383 break; 384 default: 385 break; 386 } 387 388 if ( ! mdoc_valid_pre(mdoc, p)) 389 return(0); 390 391 switch (p->type) { 392 case (MDOC_HEAD): 393 assert(MDOC_BLOCK == p->parent->type); 394 p->parent->head = p; 395 break; 396 case (MDOC_TAIL): 397 assert(MDOC_BLOCK == p->parent->type); 398 p->parent->tail = p; 399 break; 400 case (MDOC_BODY): 401 if (p->end) 402 break; 403 assert(MDOC_BLOCK == p->parent->type); 404 p->parent->body = p; 405 break; 406 default: 407 break; 408 } 409 410 mdoc->last = p; 411 412 switch (p->type) { 413 case (MDOC_TBL): 414 /* FALLTHROUGH */ 415 case (MDOC_TEXT): 416 if ( ! mdoc_valid_post(mdoc)) 417 return(0); 418 break; 419 default: 420 break; 421 } 422 423 return(1); 424 } 425 426 427 static struct mdoc_node * 428 node_alloc(struct mdoc *mdoc, int line, int pos, 429 enum mdoct tok, enum mdoc_type type) 430 { 431 struct mdoc_node *p; 432 433 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 434 p->sec = mdoc->lastsec; 435 p->line = line; 436 p->pos = pos; 437 p->lastline = line; 438 p->tok = tok; 439 p->type = type; 440 441 /* Flag analysis. */ 442 443 if (MDOC_SYNOPSIS & mdoc->flags) 444 p->flags |= MDOC_SYNPRETTY; 445 else 446 p->flags &= ~MDOC_SYNPRETTY; 447 if (MDOC_NEWLINE & mdoc->flags) 448 p->flags |= MDOC_LINE; 449 mdoc->flags &= ~MDOC_NEWLINE; 450 451 return(p); 452 } 453 454 455 int 456 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 457 { 458 struct mdoc_node *p; 459 460 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 461 if ( ! node_append(mdoc, p)) 462 return(0); 463 mdoc->next = MDOC_NEXT_CHILD; 464 return(1); 465 } 466 467 468 int 469 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 470 { 471 struct mdoc_node *p; 472 473 assert(mdoc->first); 474 assert(mdoc->last); 475 476 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 477 if ( ! node_append(mdoc, p)) 478 return(0); 479 mdoc->next = MDOC_NEXT_CHILD; 480 return(1); 481 } 482 483 484 int 485 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 486 { 487 struct mdoc_node *p; 488 489 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 490 if ( ! node_append(mdoc, p)) 491 return(0); 492 mdoc->next = MDOC_NEXT_CHILD; 493 return(1); 494 } 495 496 497 int 498 mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 499 struct mdoc_node *body, enum mdoc_endbody end) 500 { 501 struct mdoc_node *p; 502 503 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 504 p->pending = body; 505 p->norm = body->norm; 506 p->end = end; 507 if ( ! node_append(mdoc, p)) 508 return(0); 509 mdoc->next = MDOC_NEXT_SIBLING; 510 return(1); 511 } 512 513 514 int 515 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 516 enum mdoct tok, struct mdoc_arg *args) 517 { 518 struct mdoc_node *p; 519 520 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 521 p->args = args; 522 if (p->args) 523 (args->refcnt)++; 524 525 switch (tok) { 526 case (MDOC_Bd): 527 /* FALLTHROUGH */ 528 case (MDOC_Bf): 529 /* FALLTHROUGH */ 530 case (MDOC_Bl): 531 /* FALLTHROUGH */ 532 case (MDOC_Rs): 533 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 534 break; 535 default: 536 break; 537 } 538 539 if ( ! node_append(mdoc, p)) 540 return(0); 541 mdoc->next = MDOC_NEXT_CHILD; 542 return(1); 543 } 544 545 546 int 547 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 548 enum mdoct tok, struct mdoc_arg *args) 549 { 550 struct mdoc_node *p; 551 552 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 553 p->args = args; 554 if (p->args) 555 (args->refcnt)++; 556 557 switch (tok) { 558 case (MDOC_An): 559 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 560 break; 561 default: 562 break; 563 } 564 565 if ( ! node_append(mdoc, p)) 566 return(0); 567 mdoc->next = MDOC_NEXT_CHILD; 568 return(1); 569 } 570 571 int 572 mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 573 { 574 struct mdoc_node *n; 575 576 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 577 n->string = roff_strdup(mdoc->roff, p); 578 579 if ( ! node_append(mdoc, n)) 580 return(0); 581 582 mdoc->next = MDOC_NEXT_SIBLING; 583 return(1); 584 } 585 586 void 587 mdoc_word_append(struct mdoc *mdoc, const char *p) 588 { 589 struct mdoc_node *n; 590 char *addstr, *newstr; 591 592 n = mdoc->last; 593 addstr = roff_strdup(mdoc->roff, p); 594 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 595 free(addstr); 596 free(n->string); 597 n->string = newstr; 598 mdoc->next = MDOC_NEXT_SIBLING; 599 } 600 601 static void 602 mdoc_node_free(struct mdoc_node *p) 603 { 604 605 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 606 free(p->norm); 607 if (p->string) 608 free(p->string); 609 if (p->args) 610 mdoc_argv_free(p->args); 611 free(p); 612 } 613 614 615 static void 616 mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 617 { 618 619 /* Adjust siblings. */ 620 621 if (n->prev) 622 n->prev->next = n->next; 623 if (n->next) 624 n->next->prev = n->prev; 625 626 /* Adjust parent. */ 627 628 if (n->parent) { 629 n->parent->nchild--; 630 if (n->parent->child == n) 631 n->parent->child = n->prev ? n->prev : n->next; 632 if (n->parent->last == n) 633 n->parent->last = n->prev ? n->prev : NULL; 634 } 635 636 /* Adjust parse point, if applicable. */ 637 638 if (mdoc && mdoc->last == n) { 639 if (n->prev) { 640 mdoc->last = n->prev; 641 mdoc->next = MDOC_NEXT_SIBLING; 642 } else { 643 mdoc->last = n->parent; 644 mdoc->next = MDOC_NEXT_CHILD; 645 } 646 } 647 648 if (mdoc && mdoc->first == n) 649 mdoc->first = NULL; 650 } 651 652 653 void 654 mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 655 { 656 657 while (p->child) { 658 assert(p->nchild); 659 mdoc_node_delete(mdoc, p->child); 660 } 661 assert(0 == p->nchild); 662 663 mdoc_node_unlink(mdoc, p); 664 mdoc_node_free(p); 665 } 666 667 int 668 mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 669 { 670 671 mdoc_node_unlink(mdoc, p); 672 return(node_append(mdoc, p)); 673 } 674 675 #if 0 676 /* 677 * Pre-treat a text line. 678 * Text lines can consist of equations, which must be handled apart from 679 * the regular text. 680 * Thus, use this function to step through a line checking if it has any 681 * equations embedded in it. 682 * This must handle multiple equations AND equations that do not end at 683 * the end-of-line, i.e., will re-enter in the next roff parse. 684 */ 685 static int 686 mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 687 { 688 char *start, *end; 689 char delim; 690 691 while ('\0' != buf[offs]) { 692 /* Mark starting position if eqn is set. */ 693 start = NULL; 694 if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 695 if (NULL != (start = strchr(buf + offs, delim))) 696 *start++ = '\0'; 697 698 /* Parse text as normal. */ 699 if ( ! mdoc_ptext(mdoc, line, buf, offs)) 700 return(0); 701 702 /* Continue only if an equation exists. */ 703 if (NULL == start) 704 break; 705 706 /* Read past the end of the equation. */ 707 offs += start - (buf + offs); 708 assert(start == &buf[offs]); 709 if (NULL != (end = strchr(buf + offs, delim))) { 710 *end++ = '\0'; 711 while (' ' == *end) 712 end++; 713 } 714 715 /* Parse the equation itself. */ 716 roff_openeqn(mdoc->roff, NULL, line, offs, buf); 717 718 /* Process a finished equation? */ 719 if (roff_closeeqn(mdoc->roff)) 720 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 721 return(0); 722 offs += (end - (buf + offs)); 723 } 724 725 return(1); 726 } 727 #endif 728 729 /* 730 * Parse free-form text, that is, a line that does not begin with the 731 * control character. 732 */ 733 static int 734 mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 735 { 736 char *c, *ws, *end; 737 struct mdoc_node *n; 738 739 /* No text before an initial macro. */ 740 741 if (SEC_NONE == mdoc->lastnamed) { 742 mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT); 743 return(1); 744 } 745 746 assert(mdoc->last); 747 n = mdoc->last; 748 749 /* 750 * Divert directly to list processing if we're encountering a 751 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 752 * (a MDOC_BODY means it's already open, in which case we should 753 * process within its context in the normal way). 754 */ 755 756 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 757 LIST_column == n->norm->Bl.type) { 758 /* `Bl' is open without any children. */ 759 mdoc->flags |= MDOC_FREECOL; 760 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 761 } 762 763 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 764 NULL != n->parent && 765 MDOC_Bl == n->parent->tok && 766 LIST_column == n->parent->norm->Bl.type) { 767 /* `Bl' has block-level `It' children. */ 768 mdoc->flags |= MDOC_FREECOL; 769 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 770 } 771 772 /* 773 * Search for the beginning of unescaped trailing whitespace (ws) 774 * and for the first character not to be output (end). 775 */ 776 777 /* FIXME: replace with strcspn(). */ 778 ws = NULL; 779 for (c = end = buf + offs; *c; c++) { 780 switch (*c) { 781 case ' ': 782 if (NULL == ws) 783 ws = c; 784 continue; 785 case '\t': 786 /* 787 * Always warn about trailing tabs, 788 * even outside literal context, 789 * where they should be put on the next line. 790 */ 791 if (NULL == ws) 792 ws = c; 793 /* 794 * Strip trailing tabs in literal context only; 795 * outside, they affect the next line. 796 */ 797 if (MDOC_LITERAL & mdoc->flags) 798 continue; 799 break; 800 case '\\': 801 /* Skip the escaped character, too, if any. */ 802 if (c[1]) 803 c++; 804 /* FALLTHROUGH */ 805 default: 806 ws = NULL; 807 break; 808 } 809 end = c + 1; 810 } 811 *end = '\0'; 812 813 if (ws) 814 mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 815 816 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 817 mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 818 819 /* 820 * Insert a `sp' in the case of a blank line. Technically, 821 * blank lines aren't allowed, but enough manuals assume this 822 * behaviour that we want to work around it. 823 */ 824 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 825 return(0); 826 827 mdoc->next = MDOC_NEXT_SIBLING; 828 829 return(mdoc_valid_post(mdoc)); 830 } 831 832 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 833 return(0); 834 835 if (MDOC_LITERAL & mdoc->flags) 836 return(1); 837 838 /* 839 * End-of-sentence check. If the last character is an unescaped 840 * EOS character, then flag the node as being the end of a 841 * sentence. The front-end will know how to interpret this. 842 */ 843 844 assert(buf < end); 845 846 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 847 mdoc->last->flags |= MDOC_EOS; 848 849 return(1); 850 } 851 852 853 /* 854 * Parse a macro line, that is, a line beginning with the control 855 * character. 856 */ 857 static int 858 mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 859 { 860 enum mdoct tok; 861 int i, sv; 862 char mac[5]; 863 struct mdoc_node *n; 864 865 /* Empty post-control lines are ignored. */ 866 867 if ('"' == buf[offs]) { 868 mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT); 869 return(1); 870 } else if ('\0' == buf[offs]) 871 return(1); 872 873 sv = offs; 874 875 /* 876 * Copy the first word into a nil-terminated buffer. 877 * Stop copying when a tab, space, or eoln is encountered. 878 */ 879 880 i = 0; 881 while (i < 4 && '\0' != buf[offs] && 882 ' ' != buf[offs] && '\t' != buf[offs]) 883 mac[i++] = buf[offs++]; 884 885 mac[i] = '\0'; 886 887 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 888 889 if (MDOC_MAX == tok) { 890 mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse, 891 ln, sv, "%s", buf + sv - 1); 892 return(1); 893 } 894 895 /* Disregard the first trailing tab, if applicable. */ 896 897 if ('\t' == buf[offs]) 898 offs++; 899 900 /* Jump to the next non-whitespace word. */ 901 902 while (buf[offs] && ' ' == buf[offs]) 903 offs++; 904 905 /* 906 * Trailing whitespace. Note that tabs are allowed to be passed 907 * into the parser as "text", so we only warn about spaces here. 908 */ 909 910 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 911 mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE); 912 913 /* 914 * If an initial macro or a list invocation, divert directly 915 * into macro processing. 916 */ 917 918 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 919 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 920 goto err; 921 return(1); 922 } 923 924 n = mdoc->last; 925 assert(mdoc->last); 926 927 /* 928 * If the first macro of a `Bl -column', open an `It' block 929 * context around the parsed macro. 930 */ 931 932 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 933 LIST_column == n->norm->Bl.type) { 934 mdoc->flags |= MDOC_FREECOL; 935 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 936 goto err; 937 return(1); 938 } 939 940 /* 941 * If we're following a block-level `It' within a `Bl -column' 942 * context (perhaps opened in the above block or in ptext()), 943 * then open an `It' block context around the parsed macro. 944 */ 945 946 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 947 NULL != n->parent && 948 MDOC_Bl == n->parent->tok && 949 LIST_column == n->parent->norm->Bl.type) { 950 mdoc->flags |= MDOC_FREECOL; 951 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 952 goto err; 953 return(1); 954 } 955 956 /* Normal processing of a macro. */ 957 958 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 959 goto err; 960 961 /* In quick mode (for mandocdb), abort after the NAME section. */ 962 963 if (mdoc->quick && MDOC_Sh == tok && 964 SEC_NAME != mdoc->last->sec) 965 return(2); 966 967 return(1); 968 969 err: /* Error out. */ 970 971 mdoc->flags |= MDOC_HALT; 972 return(0); 973 } 974 975 enum mdelim 976 mdoc_isdelim(const char *p) 977 { 978 979 if ('\0' == p[0]) 980 return(DELIM_NONE); 981 982 if ('\0' == p[1]) 983 switch (p[0]) { 984 case('('): 985 /* FALLTHROUGH */ 986 case('['): 987 return(DELIM_OPEN); 988 case('|'): 989 return(DELIM_MIDDLE); 990 case('.'): 991 /* FALLTHROUGH */ 992 case(','): 993 /* FALLTHROUGH */ 994 case(';'): 995 /* FALLTHROUGH */ 996 case(':'): 997 /* FALLTHROUGH */ 998 case('?'): 999 /* FALLTHROUGH */ 1000 case('!'): 1001 /* FALLTHROUGH */ 1002 case(')'): 1003 /* FALLTHROUGH */ 1004 case(']'): 1005 return(DELIM_CLOSE); 1006 default: 1007 return(DELIM_NONE); 1008 } 1009 1010 if ('\\' != p[0]) 1011 return(DELIM_NONE); 1012 1013 if (0 == strcmp(p + 1, ".")) 1014 return(DELIM_CLOSE); 1015 if (0 == strcmp(p + 1, "fR|\\fP")) 1016 return(DELIM_MIDDLE); 1017 1018 return(DELIM_NONE); 1019 } 1020 1021 void 1022 mdoc_deroff(char **dest, const struct mdoc_node *n) 1023 { 1024 char *cp; 1025 size_t sz; 1026 1027 if (MDOC_TEXT != n->type) { 1028 for (n = n->child; n; n = n->next) 1029 mdoc_deroff(dest, n); 1030 return; 1031 } 1032 1033 /* Skip leading whitespace. */ 1034 1035 for (cp = n->string; '\0' != *cp; cp++) 1036 if (0 == isspace((unsigned char)*cp)) 1037 break; 1038 1039 /* Skip trailing whitespace. */ 1040 1041 for (sz = strlen(cp); sz; sz--) 1042 if (0 == isspace((unsigned char)cp[sz-1])) 1043 break; 1044 1045 /* Skip empty strings. */ 1046 1047 if (0 == sz) 1048 return; 1049 1050 if (NULL == *dest) { 1051 *dest = mandoc_strndup(cp, sz); 1052 return; 1053 } 1054 1055 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1056 free(*dest); 1057 *dest = cp; 1058 } 1059