1 /* $Id: mdoc.c,v 1.85 2011/09/18 15:54:48 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <stdarg.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <time.h> 26 27 #include "mdoc.h" 28 #include "mandoc.h" 29 #include "libmdoc.h" 30 #include "libmandoc.h" 31 32 const char *const __mdoc_macronames[MDOC_MAX] = { 33 "Ap", "Dd", "Dt", "Os", 34 "Sh", "Ss", "Pp", "D1", 35 "Dl", "Bd", "Ed", "Bl", 36 "El", "It", "Ad", "An", 37 "Ar", "Cd", "Cm", "Dv", 38 "Er", "Ev", "Ex", "Fa", 39 "Fd", "Fl", "Fn", "Ft", 40 "Ic", "In", "Li", "Nd", 41 "Nm", "Op", "Ot", "Pa", 42 "Rv", "St", "Va", "Vt", 43 /* LINTED */ 44 "Xr", "%A", "%B", "%D", 45 /* LINTED */ 46 "%I", "%J", "%N", "%O", 47 /* LINTED */ 48 "%P", "%R", "%T", "%V", 49 "Ac", "Ao", "Aq", "At", 50 "Bc", "Bf", "Bo", "Bq", 51 "Bsx", "Bx", "Db", "Dc", 52 "Do", "Dq", "Ec", "Ef", 53 "Em", "Eo", "Fx", "Ms", 54 "No", "Ns", "Nx", "Ox", 55 "Pc", "Pf", "Po", "Pq", 56 "Qc", "Ql", "Qo", "Qq", 57 "Re", "Rs", "Sc", "So", 58 "Sq", "Sm", "Sx", "Sy", 59 "Tn", "Ux", "Xc", "Xo", 60 "Fo", "Fc", "Oo", "Oc", 61 "Bk", "Ek", "Bt", "Hf", 62 "Fr", "Ud", "Lb", "Lp", 63 "Lk", "Mt", "Brq", "Bro", 64 /* LINTED */ 65 "Brc", "%C", "Es", "En", 66 /* LINTED */ 67 "Dx", "%Q", "br", "sp", 68 /* LINTED */ 69 "%U", "Ta" 70 }; 71 72 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 73 "split", "nosplit", "ragged", 74 "unfilled", "literal", "file", 75 "offset", "bullet", "dash", 76 "hyphen", "item", "enum", 77 "tag", "diag", "hang", 78 "ohang", "inset", "column", 79 "width", "compact", "std", 80 "filled", "words", "emphasis", 81 "symbolic", "nested", "centered" 82 }; 83 84 const char * const *mdoc_macronames = __mdoc_macronames; 85 const char * const *mdoc_argnames = __mdoc_argnames; 86 87 static void mdoc_node_free(struct mdoc_node *); 88 static void mdoc_node_unlink(struct mdoc *, 89 struct mdoc_node *); 90 static void mdoc_free1(struct mdoc *); 91 static void mdoc_alloc1(struct mdoc *); 92 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 93 enum mdoct, enum mdoc_type); 94 static int node_append(struct mdoc *, 95 struct mdoc_node *); 96 #if 0 97 static int mdoc_preptext(struct mdoc *, int, char *, int); 98 #endif 99 static int mdoc_ptext(struct mdoc *, int, char *, int); 100 static int mdoc_pmacro(struct mdoc *, int, char *, int); 101 102 const struct mdoc_node * 103 mdoc_node(const struct mdoc *m) 104 { 105 106 assert( ! (MDOC_HALT & m->flags)); 107 return(m->first); 108 } 109 110 111 const struct mdoc_meta * 112 mdoc_meta(const struct mdoc *m) 113 { 114 115 assert( ! (MDOC_HALT & m->flags)); 116 return(&m->meta); 117 } 118 119 120 /* 121 * Frees volatile resources (parse tree, meta-data, fields). 122 */ 123 static void 124 mdoc_free1(struct mdoc *mdoc) 125 { 126 127 if (mdoc->first) 128 mdoc_node_delete(mdoc, mdoc->first); 129 if (mdoc->meta.title) 130 free(mdoc->meta.title); 131 if (mdoc->meta.os) 132 free(mdoc->meta.os); 133 if (mdoc->meta.name) 134 free(mdoc->meta.name); 135 if (mdoc->meta.arch) 136 free(mdoc->meta.arch); 137 if (mdoc->meta.vol) 138 free(mdoc->meta.vol); 139 if (mdoc->meta.msec) 140 free(mdoc->meta.msec); 141 if (mdoc->meta.date) 142 free(mdoc->meta.date); 143 } 144 145 146 /* 147 * Allocate all volatile resources (parse tree, meta-data, fields). 148 */ 149 static void 150 mdoc_alloc1(struct mdoc *mdoc) 151 { 152 153 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 154 mdoc->flags = 0; 155 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 156 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 157 mdoc->first = mdoc->last; 158 mdoc->last->type = MDOC_ROOT; 159 mdoc->next = MDOC_NEXT_CHILD; 160 } 161 162 163 /* 164 * Free up volatile resources (see mdoc_free1()) then re-initialises the 165 * data with mdoc_alloc1(). After invocation, parse data has been reset 166 * and the parser is ready for re-invocation on a new tree; however, 167 * cross-parse non-volatile data is kept intact. 168 */ 169 void 170 mdoc_reset(struct mdoc *mdoc) 171 { 172 173 mdoc_free1(mdoc); 174 mdoc_alloc1(mdoc); 175 } 176 177 178 /* 179 * Completely free up all volatile and non-volatile parse resources. 180 * After invocation, the pointer is no longer usable. 181 */ 182 void 183 mdoc_free(struct mdoc *mdoc) 184 { 185 186 mdoc_free1(mdoc); 187 free(mdoc); 188 } 189 190 191 /* 192 * Allocate volatile and non-volatile parse resources. 193 */ 194 struct mdoc * 195 mdoc_alloc(struct roff *roff, struct mparse *parse) 196 { 197 struct mdoc *p; 198 199 p = mandoc_calloc(1, sizeof(struct mdoc)); 200 201 p->parse = parse; 202 p->roff = roff; 203 204 mdoc_hash_init(); 205 mdoc_alloc1(p); 206 return(p); 207 } 208 209 210 /* 211 * Climb back up the parse tree, validating open scopes. Mostly calls 212 * through to macro_end() in macro.c. 213 */ 214 int 215 mdoc_endparse(struct mdoc *m) 216 { 217 218 assert( ! (MDOC_HALT & m->flags)); 219 if (mdoc_macroend(m)) 220 return(1); 221 m->flags |= MDOC_HALT; 222 return(0); 223 } 224 225 int 226 mdoc_addeqn(struct mdoc *m, const struct eqn *ep) 227 { 228 struct mdoc_node *n; 229 230 assert( ! (MDOC_HALT & m->flags)); 231 232 /* No text before an initial macro. */ 233 234 if (SEC_NONE == m->lastnamed) { 235 mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT); 236 return(1); 237 } 238 239 n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 240 n->eqn = ep; 241 242 if ( ! node_append(m, n)) 243 return(0); 244 245 m->next = MDOC_NEXT_SIBLING; 246 return(1); 247 } 248 249 int 250 mdoc_addspan(struct mdoc *m, const struct tbl_span *sp) 251 { 252 struct mdoc_node *n; 253 254 assert( ! (MDOC_HALT & m->flags)); 255 256 /* No text before an initial macro. */ 257 258 if (SEC_NONE == m->lastnamed) { 259 mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT); 260 return(1); 261 } 262 263 n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL); 264 n->span = sp; 265 266 if ( ! node_append(m, n)) 267 return(0); 268 269 m->next = MDOC_NEXT_SIBLING; 270 return(1); 271 } 272 273 274 /* 275 * Main parse routine. Parses a single line -- really just hands off to 276 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 277 */ 278 int 279 mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) 280 { 281 282 assert( ! (MDOC_HALT & m->flags)); 283 284 m->flags |= MDOC_NEWLINE; 285 286 /* 287 * Let the roff nS register switch SYNOPSIS mode early, 288 * such that the parser knows at all times 289 * whether this mode is on or off. 290 * Note that this mode is also switched by the Sh macro. 291 */ 292 if (roff_regisset(m->roff, REG_nS)) { 293 if (roff_regget(m->roff, REG_nS)) 294 m->flags |= MDOC_SYNOPSIS; 295 else 296 m->flags &= ~MDOC_SYNOPSIS; 297 } 298 299 return(mandoc_getcontrol(buf, &offs) ? 300 mdoc_pmacro(m, ln, buf, offs) : 301 mdoc_ptext(m, ln, buf, offs)); 302 } 303 304 int 305 mdoc_macro(MACRO_PROT_ARGS) 306 { 307 assert(tok < MDOC_MAX); 308 309 /* If we're in the body, deny prologue calls. */ 310 311 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 312 MDOC_PBODY & m->flags) { 313 mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY); 314 return(1); 315 } 316 317 /* If we're in the prologue, deny "body" macros. */ 318 319 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 320 ! (MDOC_PBODY & m->flags)) { 321 mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG); 322 if (NULL == m->meta.msec) 323 m->meta.msec = mandoc_strdup("1"); 324 if (NULL == m->meta.title) 325 m->meta.title = mandoc_strdup("UNKNOWN"); 326 if (NULL == m->meta.vol) 327 m->meta.vol = mandoc_strdup("LOCAL"); 328 if (NULL == m->meta.os) 329 m->meta.os = mandoc_strdup("LOCAL"); 330 if (NULL == m->meta.date) 331 m->meta.date = mandoc_normdate 332 (m->parse, NULL, line, ppos); 333 m->flags |= MDOC_PBODY; 334 } 335 336 return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf)); 337 } 338 339 340 static int 341 node_append(struct mdoc *mdoc, struct mdoc_node *p) 342 { 343 344 assert(mdoc->last); 345 assert(mdoc->first); 346 assert(MDOC_ROOT != p->type); 347 348 switch (mdoc->next) { 349 case (MDOC_NEXT_SIBLING): 350 mdoc->last->next = p; 351 p->prev = mdoc->last; 352 p->parent = mdoc->last->parent; 353 break; 354 case (MDOC_NEXT_CHILD): 355 mdoc->last->child = p; 356 p->parent = mdoc->last; 357 break; 358 default: 359 abort(); 360 /* NOTREACHED */ 361 } 362 363 p->parent->nchild++; 364 365 /* 366 * Copy over the normalised-data pointer of our parent. Not 367 * everybody has one, but copying a null pointer is fine. 368 */ 369 370 switch (p->type) { 371 case (MDOC_BODY): 372 /* FALLTHROUGH */ 373 case (MDOC_TAIL): 374 /* FALLTHROUGH */ 375 case (MDOC_HEAD): 376 p->norm = p->parent->norm; 377 break; 378 default: 379 break; 380 } 381 382 if ( ! mdoc_valid_pre(mdoc, p)) 383 return(0); 384 385 switch (p->type) { 386 case (MDOC_HEAD): 387 assert(MDOC_BLOCK == p->parent->type); 388 p->parent->head = p; 389 break; 390 case (MDOC_TAIL): 391 assert(MDOC_BLOCK == p->parent->type); 392 p->parent->tail = p; 393 break; 394 case (MDOC_BODY): 395 if (p->end) 396 break; 397 assert(MDOC_BLOCK == p->parent->type); 398 p->parent->body = p; 399 break; 400 default: 401 break; 402 } 403 404 mdoc->last = p; 405 406 switch (p->type) { 407 case (MDOC_TBL): 408 /* FALLTHROUGH */ 409 case (MDOC_TEXT): 410 if ( ! mdoc_valid_post(mdoc)) 411 return(0); 412 break; 413 default: 414 break; 415 } 416 417 return(1); 418 } 419 420 421 static struct mdoc_node * 422 node_alloc(struct mdoc *m, int line, int pos, 423 enum mdoct tok, enum mdoc_type type) 424 { 425 struct mdoc_node *p; 426 427 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 428 p->sec = m->lastsec; 429 p->line = line; 430 p->pos = pos; 431 p->tok = tok; 432 p->type = type; 433 434 /* Flag analysis. */ 435 436 if (MDOC_SYNOPSIS & m->flags) 437 p->flags |= MDOC_SYNPRETTY; 438 else 439 p->flags &= ~MDOC_SYNPRETTY; 440 if (MDOC_NEWLINE & m->flags) 441 p->flags |= MDOC_LINE; 442 m->flags &= ~MDOC_NEWLINE; 443 444 return(p); 445 } 446 447 448 int 449 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 450 { 451 struct mdoc_node *p; 452 453 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 454 if ( ! node_append(m, p)) 455 return(0); 456 m->next = MDOC_NEXT_CHILD; 457 return(1); 458 } 459 460 461 int 462 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 463 { 464 struct mdoc_node *p; 465 466 assert(m->first); 467 assert(m->last); 468 469 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 470 if ( ! node_append(m, p)) 471 return(0); 472 m->next = MDOC_NEXT_CHILD; 473 return(1); 474 } 475 476 477 int 478 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 479 { 480 struct mdoc_node *p; 481 482 p = node_alloc(m, line, pos, tok, MDOC_BODY); 483 if ( ! node_append(m, p)) 484 return(0); 485 m->next = MDOC_NEXT_CHILD; 486 return(1); 487 } 488 489 490 int 491 mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok, 492 struct mdoc_node *body, enum mdoc_endbody end) 493 { 494 struct mdoc_node *p; 495 496 p = node_alloc(m, line, pos, tok, MDOC_BODY); 497 p->pending = body; 498 p->end = end; 499 if ( ! node_append(m, p)) 500 return(0); 501 m->next = MDOC_NEXT_SIBLING; 502 return(1); 503 } 504 505 506 int 507 mdoc_block_alloc(struct mdoc *m, int line, int pos, 508 enum mdoct tok, struct mdoc_arg *args) 509 { 510 struct mdoc_node *p; 511 512 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 513 p->args = args; 514 if (p->args) 515 (args->refcnt)++; 516 517 switch (tok) { 518 case (MDOC_Bd): 519 /* FALLTHROUGH */ 520 case (MDOC_Bf): 521 /* FALLTHROUGH */ 522 case (MDOC_Bl): 523 /* FALLTHROUGH */ 524 case (MDOC_Rs): 525 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 526 break; 527 default: 528 break; 529 } 530 531 if ( ! node_append(m, p)) 532 return(0); 533 m->next = MDOC_NEXT_CHILD; 534 return(1); 535 } 536 537 538 int 539 mdoc_elem_alloc(struct mdoc *m, int line, int pos, 540 enum mdoct tok, struct mdoc_arg *args) 541 { 542 struct mdoc_node *p; 543 544 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 545 p->args = args; 546 if (p->args) 547 (args->refcnt)++; 548 549 switch (tok) { 550 case (MDOC_An): 551 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 552 break; 553 default: 554 break; 555 } 556 557 if ( ! node_append(m, p)) 558 return(0); 559 m->next = MDOC_NEXT_CHILD; 560 return(1); 561 } 562 563 int 564 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 565 { 566 struct mdoc_node *n; 567 568 n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT); 569 n->string = roff_strdup(m->roff, p); 570 571 if ( ! node_append(m, n)) 572 return(0); 573 574 m->next = MDOC_NEXT_SIBLING; 575 return(1); 576 } 577 578 579 static void 580 mdoc_node_free(struct mdoc_node *p) 581 { 582 583 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 584 free(p->norm); 585 if (p->string) 586 free(p->string); 587 if (p->args) 588 mdoc_argv_free(p->args); 589 free(p); 590 } 591 592 593 static void 594 mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) 595 { 596 597 /* Adjust siblings. */ 598 599 if (n->prev) 600 n->prev->next = n->next; 601 if (n->next) 602 n->next->prev = n->prev; 603 604 /* Adjust parent. */ 605 606 if (n->parent) { 607 n->parent->nchild--; 608 if (n->parent->child == n) 609 n->parent->child = n->prev ? n->prev : n->next; 610 if (n->parent->last == n) 611 n->parent->last = n->prev ? n->prev : NULL; 612 } 613 614 /* Adjust parse point, if applicable. */ 615 616 if (m && m->last == n) { 617 if (n->prev) { 618 m->last = n->prev; 619 m->next = MDOC_NEXT_SIBLING; 620 } else { 621 m->last = n->parent; 622 m->next = MDOC_NEXT_CHILD; 623 } 624 } 625 626 if (m && m->first == n) 627 m->first = NULL; 628 } 629 630 631 void 632 mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) 633 { 634 635 while (p->child) { 636 assert(p->nchild); 637 mdoc_node_delete(m, p->child); 638 } 639 assert(0 == p->nchild); 640 641 mdoc_node_unlink(m, p); 642 mdoc_node_free(p); 643 } 644 645 #if 0 646 /* 647 * Pre-treat a text line. 648 * Text lines can consist of equations, which must be handled apart from 649 * the regular text. 650 * Thus, use this function to step through a line checking if it has any 651 * equations embedded in it. 652 * This must handle multiple equations AND equations that do not end at 653 * the end-of-line, i.e., will re-enter in the next roff parse. 654 */ 655 static int 656 mdoc_preptext(struct mdoc *m, int line, char *buf, int offs) 657 { 658 char *start, *end; 659 char delim; 660 661 while ('\0' != buf[offs]) { 662 /* Mark starting position if eqn is set. */ 663 start = NULL; 664 if ('\0' != (delim = roff_eqndelim(m->roff))) 665 if (NULL != (start = strchr(buf + offs, delim))) 666 *start++ = '\0'; 667 668 /* Parse text as normal. */ 669 if ( ! mdoc_ptext(m, line, buf, offs)) 670 return(0); 671 672 /* Continue only if an equation exists. */ 673 if (NULL == start) 674 break; 675 676 /* Read past the end of the equation. */ 677 offs += start - (buf + offs); 678 assert(start == &buf[offs]); 679 if (NULL != (end = strchr(buf + offs, delim))) { 680 *end++ = '\0'; 681 while (' ' == *end) 682 end++; 683 } 684 685 /* Parse the equation itself. */ 686 roff_openeqn(m->roff, NULL, line, offs, buf); 687 688 /* Process a finished equation? */ 689 if (roff_closeeqn(m->roff)) 690 if ( ! mdoc_addeqn(m, roff_eqn(m->roff))) 691 return(0); 692 offs += (end - (buf + offs)); 693 } 694 695 return(1); 696 } 697 #endif 698 699 /* 700 * Parse free-form text, that is, a line that does not begin with the 701 * control character. 702 */ 703 static int 704 mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) 705 { 706 char *c, *ws, *end; 707 struct mdoc_node *n; 708 709 /* No text before an initial macro. */ 710 711 if (SEC_NONE == m->lastnamed) { 712 mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT); 713 return(1); 714 } 715 716 assert(m->last); 717 n = m->last; 718 719 /* 720 * Divert directly to list processing if we're encountering a 721 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 722 * (a MDOC_BODY means it's already open, in which case we should 723 * process within its context in the normal way). 724 */ 725 726 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 727 LIST_column == n->norm->Bl.type) { 728 /* `Bl' is open without any children. */ 729 m->flags |= MDOC_FREECOL; 730 return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 731 } 732 733 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 734 NULL != n->parent && 735 MDOC_Bl == n->parent->tok && 736 LIST_column == n->parent->norm->Bl.type) { 737 /* `Bl' has block-level `It' children. */ 738 m->flags |= MDOC_FREECOL; 739 return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 740 } 741 742 /* 743 * Search for the beginning of unescaped trailing whitespace (ws) 744 * and for the first character not to be output (end). 745 */ 746 747 /* FIXME: replace with strcspn(). */ 748 ws = NULL; 749 for (c = end = buf + offs; *c; c++) { 750 switch (*c) { 751 case ' ': 752 if (NULL == ws) 753 ws = c; 754 continue; 755 case '\t': 756 /* 757 * Always warn about trailing tabs, 758 * even outside literal context, 759 * where they should be put on the next line. 760 */ 761 if (NULL == ws) 762 ws = c; 763 /* 764 * Strip trailing tabs in literal context only; 765 * outside, they affect the next line. 766 */ 767 if (MDOC_LITERAL & m->flags) 768 continue; 769 break; 770 case '\\': 771 /* Skip the escaped character, too, if any. */ 772 if (c[1]) 773 c++; 774 /* FALLTHROUGH */ 775 default: 776 ws = NULL; 777 break; 778 } 779 end = c + 1; 780 } 781 *end = '\0'; 782 783 if (ws) 784 mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 785 786 if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { 787 mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 788 789 /* 790 * Insert a `sp' in the case of a blank line. Technically, 791 * blank lines aren't allowed, but enough manuals assume this 792 * behaviour that we want to work around it. 793 */ 794 if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL)) 795 return(0); 796 797 m->next = MDOC_NEXT_SIBLING; 798 return(1); 799 } 800 801 if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) 802 return(0); 803 804 if (MDOC_LITERAL & m->flags) 805 return(1); 806 807 /* 808 * End-of-sentence check. If the last character is an unescaped 809 * EOS character, then flag the node as being the end of a 810 * sentence. The front-end will know how to interpret this. 811 */ 812 813 assert(buf < end); 814 815 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 816 m->last->flags |= MDOC_EOS; 817 818 return(1); 819 } 820 821 822 /* 823 * Parse a macro line, that is, a line beginning with the control 824 * character. 825 */ 826 static int 827 mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) 828 { 829 enum mdoct tok; 830 int i, sv; 831 char mac[5]; 832 struct mdoc_node *n; 833 834 /* Empty post-control lines are ignored. */ 835 836 if ('"' == buf[offs]) { 837 mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); 838 return(1); 839 } else if ('\0' == buf[offs]) 840 return(1); 841 842 sv = offs; 843 844 /* 845 * Copy the first word into a nil-terminated buffer. 846 * Stop copying when a tab, space, or eoln is encountered. 847 */ 848 849 i = 0; 850 while (i < 4 && '\0' != buf[offs] && 851 ' ' != buf[offs] && '\t' != buf[offs]) 852 mac[i++] = buf[offs++]; 853 854 mac[i] = '\0'; 855 856 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 857 858 if (MDOC_MAX == tok) { 859 mandoc_vmsg(MANDOCERR_MACRO, m->parse, 860 ln, sv, "%s", buf + sv - 1); 861 return(1); 862 } 863 864 /* Disregard the first trailing tab, if applicable. */ 865 866 if ('\t' == buf[offs]) 867 offs++; 868 869 /* Jump to the next non-whitespace word. */ 870 871 while (buf[offs] && ' ' == buf[offs]) 872 offs++; 873 874 /* 875 * Trailing whitespace. Note that tabs are allowed to be passed 876 * into the parser as "text", so we only warn about spaces here. 877 */ 878 879 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 880 mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); 881 882 /* 883 * If an initial macro or a list invocation, divert directly 884 * into macro processing. 885 */ 886 887 if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { 888 if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 889 goto err; 890 return(1); 891 } 892 893 n = m->last; 894 assert(m->last); 895 896 /* 897 * If the first macro of a `Bl -column', open an `It' block 898 * context around the parsed macro. 899 */ 900 901 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 902 LIST_column == n->norm->Bl.type) { 903 m->flags |= MDOC_FREECOL; 904 if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 905 goto err; 906 return(1); 907 } 908 909 /* 910 * If we're following a block-level `It' within a `Bl -column' 911 * context (perhaps opened in the above block or in ptext()), 912 * then open an `It' block context around the parsed macro. 913 */ 914 915 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 916 NULL != n->parent && 917 MDOC_Bl == n->parent->tok && 918 LIST_column == n->parent->norm->Bl.type) { 919 m->flags |= MDOC_FREECOL; 920 if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 921 goto err; 922 return(1); 923 } 924 925 /* Normal processing of a macro. */ 926 927 if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 928 goto err; 929 930 return(1); 931 932 err: /* Error out. */ 933 934 m->flags |= MDOC_HALT; 935 return(0); 936 } 937 938 enum mdelim 939 mdoc_isdelim(const char *p) 940 { 941 942 if ('\0' == p[0]) 943 return(DELIM_NONE); 944 945 if ('\0' == p[1]) 946 switch (p[0]) { 947 case('('): 948 /* FALLTHROUGH */ 949 case('['): 950 return(DELIM_OPEN); 951 case('|'): 952 return(DELIM_MIDDLE); 953 case('.'): 954 /* FALLTHROUGH */ 955 case(','): 956 /* FALLTHROUGH */ 957 case(';'): 958 /* FALLTHROUGH */ 959 case(':'): 960 /* FALLTHROUGH */ 961 case('?'): 962 /* FALLTHROUGH */ 963 case('!'): 964 /* FALLTHROUGH */ 965 case(')'): 966 /* FALLTHROUGH */ 967 case(']'): 968 return(DELIM_CLOSE); 969 default: 970 return(DELIM_NONE); 971 } 972 973 if ('\\' != p[0]) 974 return(DELIM_NONE); 975 976 if (0 == strcmp(p + 1, ".")) 977 return(DELIM_CLOSE); 978 if (0 == strcmp(p + 1, "*(Ba")) 979 return(DELIM_MIDDLE); 980 981 return(DELIM_NONE); 982 } 983