1 /* $Id: mdoc.c,v 1.93 2012/11/17 00:25:20 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <stdarg.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <time.h> 26 27 #include "mdoc.h" 28 #include "mandoc.h" 29 #include "libmdoc.h" 30 #include "libmandoc.h" 31 32 const char *const __mdoc_macronames[MDOC_MAX] = { 33 "Ap", "Dd", "Dt", "Os", 34 "Sh", "Ss", "Pp", "D1", 35 "Dl", "Bd", "Ed", "Bl", 36 "El", "It", "Ad", "An", 37 "Ar", "Cd", "Cm", "Dv", 38 "Er", "Ev", "Ex", "Fa", 39 "Fd", "Fl", "Fn", "Ft", 40 "Ic", "In", "Li", "Nd", 41 "Nm", "Op", "Ot", "Pa", 42 "Rv", "St", "Va", "Vt", 43 /* LINTED */ 44 "Xr", "%A", "%B", "%D", 45 /* LINTED */ 46 "%I", "%J", "%N", "%O", 47 /* LINTED */ 48 "%P", "%R", "%T", "%V", 49 "Ac", "Ao", "Aq", "At", 50 "Bc", "Bf", "Bo", "Bq", 51 "Bsx", "Bx", "Db", "Dc", 52 "Do", "Dq", "Ec", "Ef", 53 "Em", "Eo", "Fx", "Ms", 54 "No", "Ns", "Nx", "Ox", 55 "Pc", "Pf", "Po", "Pq", 56 "Qc", "Ql", "Qo", "Qq", 57 "Re", "Rs", "Sc", "So", 58 "Sq", "Sm", "Sx", "Sy", 59 "Tn", "Ux", "Xc", "Xo", 60 "Fo", "Fc", "Oo", "Oc", 61 "Bk", "Ek", "Bt", "Hf", 62 "Fr", "Ud", "Lb", "Lp", 63 "Lk", "Mt", "Brq", "Bro", 64 /* LINTED */ 65 "Brc", "%C", "Es", "En", 66 /* LINTED */ 67 "Dx", "%Q", "br", "sp", 68 /* LINTED */ 69 "%U", "Ta" 70 }; 71 72 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 73 "split", "nosplit", "ragged", 74 "unfilled", "literal", "file", 75 "offset", "bullet", "dash", 76 "hyphen", "item", "enum", 77 "tag", "diag", "hang", 78 "ohang", "inset", "column", 79 "width", "compact", "std", 80 "filled", "words", "emphasis", 81 "symbolic", "nested", "centered" 82 }; 83 84 const char * const *mdoc_macronames = __mdoc_macronames; 85 const char * const *mdoc_argnames = __mdoc_argnames; 86 87 static void mdoc_node_free(struct mdoc_node *); 88 static void mdoc_node_unlink(struct mdoc *, 89 struct mdoc_node *); 90 static void mdoc_free1(struct mdoc *); 91 static void mdoc_alloc1(struct mdoc *); 92 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 93 enum mdoct, enum mdoc_type); 94 static int node_append(struct mdoc *, 95 struct mdoc_node *); 96 #if 0 97 static int mdoc_preptext(struct mdoc *, int, char *, int); 98 #endif 99 static int mdoc_ptext(struct mdoc *, int, char *, int); 100 static int mdoc_pmacro(struct mdoc *, int, char *, int); 101 102 const struct mdoc_node * 103 mdoc_node(const struct mdoc *mdoc) 104 { 105 106 assert( ! (MDOC_HALT & mdoc->flags)); 107 return(mdoc->first); 108 } 109 110 111 const struct mdoc_meta * 112 mdoc_meta(const struct mdoc *mdoc) 113 { 114 115 assert( ! (MDOC_HALT & mdoc->flags)); 116 return(&mdoc->meta); 117 } 118 119 120 /* 121 * Frees volatile resources (parse tree, meta-data, fields). 122 */ 123 static void 124 mdoc_free1(struct mdoc *mdoc) 125 { 126 127 if (mdoc->first) 128 mdoc_node_delete(mdoc, mdoc->first); 129 if (mdoc->meta.title) 130 free(mdoc->meta.title); 131 if (mdoc->meta.os) 132 free(mdoc->meta.os); 133 if (mdoc->meta.name) 134 free(mdoc->meta.name); 135 if (mdoc->meta.arch) 136 free(mdoc->meta.arch); 137 if (mdoc->meta.vol) 138 free(mdoc->meta.vol); 139 if (mdoc->meta.msec) 140 free(mdoc->meta.msec); 141 if (mdoc->meta.date) 142 free(mdoc->meta.date); 143 } 144 145 146 /* 147 * Allocate all volatile resources (parse tree, meta-data, fields). 148 */ 149 static void 150 mdoc_alloc1(struct mdoc *mdoc) 151 { 152 153 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 154 mdoc->flags = 0; 155 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 156 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 157 mdoc->first = mdoc->last; 158 mdoc->last->type = MDOC_ROOT; 159 mdoc->last->tok = MDOC_MAX; 160 mdoc->next = MDOC_NEXT_CHILD; 161 } 162 163 164 /* 165 * Free up volatile resources (see mdoc_free1()) then re-initialises the 166 * data with mdoc_alloc1(). After invocation, parse data has been reset 167 * and the parser is ready for re-invocation on a new tree; however, 168 * cross-parse non-volatile data is kept intact. 169 */ 170 void 171 mdoc_reset(struct mdoc *mdoc) 172 { 173 174 mdoc_free1(mdoc); 175 mdoc_alloc1(mdoc); 176 } 177 178 179 /* 180 * Completely free up all volatile and non-volatile parse resources. 181 * After invocation, the pointer is no longer usable. 182 */ 183 void 184 mdoc_free(struct mdoc *mdoc) 185 { 186 187 mdoc_free1(mdoc); 188 free(mdoc); 189 } 190 191 192 /* 193 * Allocate volatile and non-volatile parse resources. 194 */ 195 struct mdoc * 196 mdoc_alloc(struct roff *roff, struct mparse *parse, char *defos) 197 { 198 struct mdoc *p; 199 200 p = mandoc_calloc(1, sizeof(struct mdoc)); 201 202 p->parse = parse; 203 p->defos = defos; 204 p->roff = roff; 205 206 mdoc_hash_init(); 207 mdoc_alloc1(p); 208 return(p); 209 } 210 211 212 /* 213 * Climb back up the parse tree, validating open scopes. Mostly calls 214 * through to macro_end() in macro.c. 215 */ 216 int 217 mdoc_endparse(struct mdoc *mdoc) 218 { 219 220 assert( ! (MDOC_HALT & mdoc->flags)); 221 if (mdoc_macroend(mdoc)) 222 return(1); 223 mdoc->flags |= MDOC_HALT; 224 return(0); 225 } 226 227 int 228 mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 229 { 230 struct mdoc_node *n; 231 232 assert( ! (MDOC_HALT & mdoc->flags)); 233 234 /* No text before an initial macro. */ 235 236 if (SEC_NONE == mdoc->lastnamed) { 237 mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT); 238 return(1); 239 } 240 241 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 242 n->eqn = ep; 243 244 if ( ! node_append(mdoc, n)) 245 return(0); 246 247 mdoc->next = MDOC_NEXT_SIBLING; 248 return(1); 249 } 250 251 int 252 mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 253 { 254 struct mdoc_node *n; 255 256 assert( ! (MDOC_HALT & mdoc->flags)); 257 258 /* No text before an initial macro. */ 259 260 if (SEC_NONE == mdoc->lastnamed) { 261 mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT); 262 return(1); 263 } 264 265 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 266 n->span = sp; 267 268 if ( ! node_append(mdoc, n)) 269 return(0); 270 271 mdoc->next = MDOC_NEXT_SIBLING; 272 return(1); 273 } 274 275 276 /* 277 * Main parse routine. Parses a single line -- really just hands off to 278 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 279 */ 280 int 281 mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 282 { 283 284 assert( ! (MDOC_HALT & mdoc->flags)); 285 286 mdoc->flags |= MDOC_NEWLINE; 287 288 /* 289 * Let the roff nS register switch SYNOPSIS mode early, 290 * such that the parser knows at all times 291 * whether this mode is on or off. 292 * Note that this mode is also switched by the Sh macro. 293 */ 294 if (roff_regisset(mdoc->roff, REG_nS)) { 295 if (roff_regget(mdoc->roff, REG_nS)) 296 mdoc->flags |= MDOC_SYNOPSIS; 297 else 298 mdoc->flags &= ~MDOC_SYNOPSIS; 299 } 300 301 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 302 mdoc_pmacro(mdoc, ln, buf, offs) : 303 mdoc_ptext(mdoc, ln, buf, offs)); 304 } 305 306 int 307 mdoc_macro(MACRO_PROT_ARGS) 308 { 309 assert(tok < MDOC_MAX); 310 311 /* If we're in the body, deny prologue calls. */ 312 313 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 314 MDOC_PBODY & mdoc->flags) { 315 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY); 316 return(1); 317 } 318 319 /* If we're in the prologue, deny "body" macros. */ 320 321 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 322 ! (MDOC_PBODY & mdoc->flags)) { 323 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG); 324 if (NULL == mdoc->meta.msec) 325 mdoc->meta.msec = mandoc_strdup("1"); 326 if (NULL == mdoc->meta.title) 327 mdoc->meta.title = mandoc_strdup("UNKNOWN"); 328 if (NULL == mdoc->meta.vol) 329 mdoc->meta.vol = mandoc_strdup("LOCAL"); 330 if (NULL == mdoc->meta.os) 331 mdoc->meta.os = mandoc_strdup("LOCAL"); 332 if (NULL == mdoc->meta.date) 333 mdoc->meta.date = mandoc_normdate 334 (mdoc->parse, NULL, line, ppos); 335 mdoc->flags |= MDOC_PBODY; 336 } 337 338 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 339 } 340 341 342 static int 343 node_append(struct mdoc *mdoc, struct mdoc_node *p) 344 { 345 346 assert(mdoc->last); 347 assert(mdoc->first); 348 assert(MDOC_ROOT != p->type); 349 350 switch (mdoc->next) { 351 case (MDOC_NEXT_SIBLING): 352 mdoc->last->next = p; 353 p->prev = mdoc->last; 354 p->parent = mdoc->last->parent; 355 break; 356 case (MDOC_NEXT_CHILD): 357 mdoc->last->child = p; 358 p->parent = mdoc->last; 359 break; 360 default: 361 abort(); 362 /* NOTREACHED */ 363 } 364 365 p->parent->nchild++; 366 367 /* 368 * Copy over the normalised-data pointer of our parent. Not 369 * everybody has one, but copying a null pointer is fine. 370 */ 371 372 switch (p->type) { 373 case (MDOC_BODY): 374 if (ENDBODY_NOT != p->end) 375 break; 376 /* FALLTHROUGH */ 377 case (MDOC_TAIL): 378 /* FALLTHROUGH */ 379 case (MDOC_HEAD): 380 p->norm = p->parent->norm; 381 break; 382 default: 383 break; 384 } 385 386 if ( ! mdoc_valid_pre(mdoc, p)) 387 return(0); 388 389 switch (p->type) { 390 case (MDOC_HEAD): 391 assert(MDOC_BLOCK == p->parent->type); 392 p->parent->head = p; 393 break; 394 case (MDOC_TAIL): 395 assert(MDOC_BLOCK == p->parent->type); 396 p->parent->tail = p; 397 break; 398 case (MDOC_BODY): 399 if (p->end) 400 break; 401 assert(MDOC_BLOCK == p->parent->type); 402 p->parent->body = p; 403 break; 404 default: 405 break; 406 } 407 408 mdoc->last = p; 409 410 switch (p->type) { 411 case (MDOC_TBL): 412 /* FALLTHROUGH */ 413 case (MDOC_TEXT): 414 if ( ! mdoc_valid_post(mdoc)) 415 return(0); 416 break; 417 default: 418 break; 419 } 420 421 return(1); 422 } 423 424 425 static struct mdoc_node * 426 node_alloc(struct mdoc *mdoc, int line, int pos, 427 enum mdoct tok, enum mdoc_type type) 428 { 429 struct mdoc_node *p; 430 431 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 432 p->sec = mdoc->lastsec; 433 p->line = line; 434 p->pos = pos; 435 p->tok = tok; 436 p->type = type; 437 438 /* Flag analysis. */ 439 440 if (MDOC_SYNOPSIS & mdoc->flags) 441 p->flags |= MDOC_SYNPRETTY; 442 else 443 p->flags &= ~MDOC_SYNPRETTY; 444 if (MDOC_NEWLINE & mdoc->flags) 445 p->flags |= MDOC_LINE; 446 mdoc->flags &= ~MDOC_NEWLINE; 447 448 return(p); 449 } 450 451 452 int 453 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 454 { 455 struct mdoc_node *p; 456 457 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 458 if ( ! node_append(mdoc, p)) 459 return(0); 460 mdoc->next = MDOC_NEXT_CHILD; 461 return(1); 462 } 463 464 465 int 466 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 467 { 468 struct mdoc_node *p; 469 470 assert(mdoc->first); 471 assert(mdoc->last); 472 473 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 474 if ( ! node_append(mdoc, p)) 475 return(0); 476 mdoc->next = MDOC_NEXT_CHILD; 477 return(1); 478 } 479 480 481 int 482 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 483 { 484 struct mdoc_node *p; 485 486 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 487 if ( ! node_append(mdoc, p)) 488 return(0); 489 mdoc->next = MDOC_NEXT_CHILD; 490 return(1); 491 } 492 493 494 int 495 mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 496 struct mdoc_node *body, enum mdoc_endbody end) 497 { 498 struct mdoc_node *p; 499 500 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 501 p->pending = body; 502 p->norm = body->norm; 503 p->end = end; 504 if ( ! node_append(mdoc, p)) 505 return(0); 506 mdoc->next = MDOC_NEXT_SIBLING; 507 return(1); 508 } 509 510 511 int 512 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 513 enum mdoct tok, struct mdoc_arg *args) 514 { 515 struct mdoc_node *p; 516 517 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 518 p->args = args; 519 if (p->args) 520 (args->refcnt)++; 521 522 switch (tok) { 523 case (MDOC_Bd): 524 /* FALLTHROUGH */ 525 case (MDOC_Bf): 526 /* FALLTHROUGH */ 527 case (MDOC_Bl): 528 /* FALLTHROUGH */ 529 case (MDOC_Rs): 530 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 531 break; 532 default: 533 break; 534 } 535 536 if ( ! node_append(mdoc, p)) 537 return(0); 538 mdoc->next = MDOC_NEXT_CHILD; 539 return(1); 540 } 541 542 543 int 544 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 545 enum mdoct tok, struct mdoc_arg *args) 546 { 547 struct mdoc_node *p; 548 549 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 550 p->args = args; 551 if (p->args) 552 (args->refcnt)++; 553 554 switch (tok) { 555 case (MDOC_An): 556 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 557 break; 558 default: 559 break; 560 } 561 562 if ( ! node_append(mdoc, p)) 563 return(0); 564 mdoc->next = MDOC_NEXT_CHILD; 565 return(1); 566 } 567 568 int 569 mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 570 { 571 struct mdoc_node *n; 572 573 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 574 n->string = roff_strdup(mdoc->roff, p); 575 576 if ( ! node_append(mdoc, n)) 577 return(0); 578 579 mdoc->next = MDOC_NEXT_SIBLING; 580 return(1); 581 } 582 583 584 static void 585 mdoc_node_free(struct mdoc_node *p) 586 { 587 588 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 589 free(p->norm); 590 if (p->string) 591 free(p->string); 592 if (p->args) 593 mdoc_argv_free(p->args); 594 free(p); 595 } 596 597 598 static void 599 mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 600 { 601 602 /* Adjust siblings. */ 603 604 if (n->prev) 605 n->prev->next = n->next; 606 if (n->next) 607 n->next->prev = n->prev; 608 609 /* Adjust parent. */ 610 611 if (n->parent) { 612 n->parent->nchild--; 613 if (n->parent->child == n) 614 n->parent->child = n->prev ? n->prev : n->next; 615 if (n->parent->last == n) 616 n->parent->last = n->prev ? n->prev : NULL; 617 } 618 619 /* Adjust parse point, if applicable. */ 620 621 if (mdoc && mdoc->last == n) { 622 if (n->prev) { 623 mdoc->last = n->prev; 624 mdoc->next = MDOC_NEXT_SIBLING; 625 } else { 626 mdoc->last = n->parent; 627 mdoc->next = MDOC_NEXT_CHILD; 628 } 629 } 630 631 if (mdoc && mdoc->first == n) 632 mdoc->first = NULL; 633 } 634 635 636 void 637 mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 638 { 639 640 while (p->child) { 641 assert(p->nchild); 642 mdoc_node_delete(mdoc, p->child); 643 } 644 assert(0 == p->nchild); 645 646 mdoc_node_unlink(mdoc, p); 647 mdoc_node_free(p); 648 } 649 650 int 651 mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 652 { 653 654 mdoc_node_unlink(mdoc, p); 655 return(node_append(mdoc, p)); 656 } 657 658 #if 0 659 /* 660 * Pre-treat a text line. 661 * Text lines can consist of equations, which must be handled apart from 662 * the regular text. 663 * Thus, use this function to step through a line checking if it has any 664 * equations embedded in it. 665 * This must handle multiple equations AND equations that do not end at 666 * the end-of-line, i.e., will re-enter in the next roff parse. 667 */ 668 static int 669 mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 670 { 671 char *start, *end; 672 char delim; 673 674 while ('\0' != buf[offs]) { 675 /* Mark starting position if eqn is set. */ 676 start = NULL; 677 if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 678 if (NULL != (start = strchr(buf + offs, delim))) 679 *start++ = '\0'; 680 681 /* Parse text as normal. */ 682 if ( ! mdoc_ptext(mdoc, line, buf, offs)) 683 return(0); 684 685 /* Continue only if an equation exists. */ 686 if (NULL == start) 687 break; 688 689 /* Read past the end of the equation. */ 690 offs += start - (buf + offs); 691 assert(start == &buf[offs]); 692 if (NULL != (end = strchr(buf + offs, delim))) { 693 *end++ = '\0'; 694 while (' ' == *end) 695 end++; 696 } 697 698 /* Parse the equation itself. */ 699 roff_openeqn(mdoc->roff, NULL, line, offs, buf); 700 701 /* Process a finished equation? */ 702 if (roff_closeeqn(mdoc->roff)) 703 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 704 return(0); 705 offs += (end - (buf + offs)); 706 } 707 708 return(1); 709 } 710 #endif 711 712 /* 713 * Parse free-form text, that is, a line that does not begin with the 714 * control character. 715 */ 716 static int 717 mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 718 { 719 char *c, *ws, *end; 720 struct mdoc_node *n; 721 722 /* No text before an initial macro. */ 723 724 if (SEC_NONE == mdoc->lastnamed) { 725 mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT); 726 return(1); 727 } 728 729 assert(mdoc->last); 730 n = mdoc->last; 731 732 /* 733 * Divert directly to list processing if we're encountering a 734 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 735 * (a MDOC_BODY means it's already open, in which case we should 736 * process within its context in the normal way). 737 */ 738 739 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 740 LIST_column == n->norm->Bl.type) { 741 /* `Bl' is open without any children. */ 742 mdoc->flags |= MDOC_FREECOL; 743 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 744 } 745 746 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 747 NULL != n->parent && 748 MDOC_Bl == n->parent->tok && 749 LIST_column == n->parent->norm->Bl.type) { 750 /* `Bl' has block-level `It' children. */ 751 mdoc->flags |= MDOC_FREECOL; 752 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 753 } 754 755 /* 756 * Search for the beginning of unescaped trailing whitespace (ws) 757 * and for the first character not to be output (end). 758 */ 759 760 /* FIXME: replace with strcspn(). */ 761 ws = NULL; 762 for (c = end = buf + offs; *c; c++) { 763 switch (*c) { 764 case ' ': 765 if (NULL == ws) 766 ws = c; 767 continue; 768 case '\t': 769 /* 770 * Always warn about trailing tabs, 771 * even outside literal context, 772 * where they should be put on the next line. 773 */ 774 if (NULL == ws) 775 ws = c; 776 /* 777 * Strip trailing tabs in literal context only; 778 * outside, they affect the next line. 779 */ 780 if (MDOC_LITERAL & mdoc->flags) 781 continue; 782 break; 783 case '\\': 784 /* Skip the escaped character, too, if any. */ 785 if (c[1]) 786 c++; 787 /* FALLTHROUGH */ 788 default: 789 ws = NULL; 790 break; 791 } 792 end = c + 1; 793 } 794 *end = '\0'; 795 796 if (ws) 797 mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 798 799 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 800 mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 801 802 /* 803 * Insert a `sp' in the case of a blank line. Technically, 804 * blank lines aren't allowed, but enough manuals assume this 805 * behaviour that we want to work around it. 806 */ 807 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 808 return(0); 809 810 mdoc->next = MDOC_NEXT_SIBLING; 811 812 return(mdoc_valid_post(mdoc)); 813 } 814 815 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 816 return(0); 817 818 if (MDOC_LITERAL & mdoc->flags) 819 return(1); 820 821 /* 822 * End-of-sentence check. If the last character is an unescaped 823 * EOS character, then flag the node as being the end of a 824 * sentence. The front-end will know how to interpret this. 825 */ 826 827 assert(buf < end); 828 829 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 830 mdoc->last->flags |= MDOC_EOS; 831 832 return(1); 833 } 834 835 836 /* 837 * Parse a macro line, that is, a line beginning with the control 838 * character. 839 */ 840 static int 841 mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 842 { 843 enum mdoct tok; 844 int i, sv; 845 char mac[5]; 846 struct mdoc_node *n; 847 848 /* Empty post-control lines are ignored. */ 849 850 if ('"' == buf[offs]) { 851 mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT); 852 return(1); 853 } else if ('\0' == buf[offs]) 854 return(1); 855 856 sv = offs; 857 858 /* 859 * Copy the first word into a nil-terminated buffer. 860 * Stop copying when a tab, space, or eoln is encountered. 861 */ 862 863 i = 0; 864 while (i < 4 && '\0' != buf[offs] && 865 ' ' != buf[offs] && '\t' != buf[offs]) 866 mac[i++] = buf[offs++]; 867 868 mac[i] = '\0'; 869 870 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 871 872 if (MDOC_MAX == tok) { 873 mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse, 874 ln, sv, "%s", buf + sv - 1); 875 return(1); 876 } 877 878 /* Disregard the first trailing tab, if applicable. */ 879 880 if ('\t' == buf[offs]) 881 offs++; 882 883 /* Jump to the next non-whitespace word. */ 884 885 while (buf[offs] && ' ' == buf[offs]) 886 offs++; 887 888 /* 889 * Trailing whitespace. Note that tabs are allowed to be passed 890 * into the parser as "text", so we only warn about spaces here. 891 */ 892 893 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 894 mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE); 895 896 /* 897 * If an initial macro or a list invocation, divert directly 898 * into macro processing. 899 */ 900 901 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 902 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 903 goto err; 904 return(1); 905 } 906 907 n = mdoc->last; 908 assert(mdoc->last); 909 910 /* 911 * If the first macro of a `Bl -column', open an `It' block 912 * context around the parsed macro. 913 */ 914 915 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 916 LIST_column == n->norm->Bl.type) { 917 mdoc->flags |= MDOC_FREECOL; 918 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 919 goto err; 920 return(1); 921 } 922 923 /* 924 * If we're following a block-level `It' within a `Bl -column' 925 * context (perhaps opened in the above block or in ptext()), 926 * then open an `It' block context around the parsed macro. 927 */ 928 929 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 930 NULL != n->parent && 931 MDOC_Bl == n->parent->tok && 932 LIST_column == n->parent->norm->Bl.type) { 933 mdoc->flags |= MDOC_FREECOL; 934 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 935 goto err; 936 return(1); 937 } 938 939 /* Normal processing of a macro. */ 940 941 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 942 goto err; 943 944 return(1); 945 946 err: /* Error out. */ 947 948 mdoc->flags |= MDOC_HALT; 949 return(0); 950 } 951 952 enum mdelim 953 mdoc_isdelim(const char *p) 954 { 955 956 if ('\0' == p[0]) 957 return(DELIM_NONE); 958 959 if ('\0' == p[1]) 960 switch (p[0]) { 961 case('('): 962 /* FALLTHROUGH */ 963 case('['): 964 return(DELIM_OPEN); 965 case('|'): 966 return(DELIM_MIDDLE); 967 case('.'): 968 /* FALLTHROUGH */ 969 case(','): 970 /* FALLTHROUGH */ 971 case(';'): 972 /* FALLTHROUGH */ 973 case(':'): 974 /* FALLTHROUGH */ 975 case('?'): 976 /* FALLTHROUGH */ 977 case('!'): 978 /* FALLTHROUGH */ 979 case(')'): 980 /* FALLTHROUGH */ 981 case(']'): 982 return(DELIM_CLOSE); 983 default: 984 return(DELIM_NONE); 985 } 986 987 if ('\\' != p[0]) 988 return(DELIM_NONE); 989 990 if (0 == strcmp(p + 1, ".")) 991 return(DELIM_CLOSE); 992 if (0 == strcmp(p + 1, "fR|\\fP")) 993 return(DELIM_MIDDLE); 994 995 return(DELIM_NONE); 996 } 997