1 /* $Id: man.c,v 1.77 2014/03/23 20:57:23 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <stdarg.h> 24 #include <stdlib.h> 25 #include <stdio.h> 26 #include <string.h> 27 28 #include "man.h" 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "libman.h" 32 #include "libmandoc.h" 33 34 const char *const __man_macronames[MAN_MAX] = { 35 "br", "TH", "SH", "SS", 36 "TP", "LP", "PP", "P", 37 "IP", "HP", "SM", "SB", 38 "BI", "IB", "BR", "RB", 39 "R", "B", "I", "IR", 40 "RI", "na", "sp", "nf", 41 "fi", "RE", "RS", "DT", 42 "UC", "PD", "AT", "in", 43 "ft", "OP", "EX", "EE", 44 "UR", "UE" 45 }; 46 47 const char * const *man_macronames = __man_macronames; 48 49 static struct man_node *man_node_alloc(struct man *, int, int, 50 enum man_type, enum mant); 51 static int man_node_append(struct man *, 52 struct man_node *); 53 static void man_node_free(struct man_node *); 54 static void man_node_unlink(struct man *, 55 struct man_node *); 56 static int man_ptext(struct man *, int, char *, int); 57 static int man_pmacro(struct man *, int, char *, int); 58 static void man_free1(struct man *); 59 static void man_alloc1(struct man *); 60 static int man_descope(struct man *, int, int); 61 62 63 const struct man_node * 64 man_node(const struct man *man) 65 { 66 67 assert( ! (MAN_HALT & man->flags)); 68 return(man->first); 69 } 70 71 72 const struct man_meta * 73 man_meta(const struct man *man) 74 { 75 76 assert( ! (MAN_HALT & man->flags)); 77 return(&man->meta); 78 } 79 80 81 void 82 man_reset(struct man *man) 83 { 84 85 man_free1(man); 86 man_alloc1(man); 87 } 88 89 90 void 91 man_free(struct man *man) 92 { 93 94 man_free1(man); 95 free(man); 96 } 97 98 99 struct man * 100 man_alloc(struct roff *roff, struct mparse *parse, int quick) 101 { 102 struct man *p; 103 104 p = mandoc_calloc(1, sizeof(struct man)); 105 106 man_hash_init(); 107 p->parse = parse; 108 p->quick = quick; 109 p->roff = roff; 110 111 man_alloc1(p); 112 return(p); 113 } 114 115 116 int 117 man_endparse(struct man *man) 118 { 119 120 assert( ! (MAN_HALT & man->flags)); 121 if (man_macroend(man)) 122 return(1); 123 man->flags |= MAN_HALT; 124 return(0); 125 } 126 127 128 int 129 man_parseln(struct man *man, int ln, char *buf, int offs) 130 { 131 132 man->flags |= MAN_NEWLINE; 133 134 assert( ! (MAN_HALT & man->flags)); 135 136 return (roff_getcontrol(man->roff, buf, &offs) ? 137 man_pmacro(man, ln, buf, offs) : 138 man_ptext(man, ln, buf, offs)); 139 } 140 141 142 static void 143 man_free1(struct man *man) 144 { 145 146 if (man->first) 147 man_node_delete(man, man->first); 148 if (man->meta.title) 149 free(man->meta.title); 150 if (man->meta.source) 151 free(man->meta.source); 152 if (man->meta.date) 153 free(man->meta.date); 154 if (man->meta.vol) 155 free(man->meta.vol); 156 if (man->meta.msec) 157 free(man->meta.msec); 158 } 159 160 161 static void 162 man_alloc1(struct man *man) 163 { 164 165 memset(&man->meta, 0, sizeof(struct man_meta)); 166 man->flags = 0; 167 man->last = mandoc_calloc(1, sizeof(struct man_node)); 168 man->first = man->last; 169 man->last->type = MAN_ROOT; 170 man->last->tok = MAN_MAX; 171 man->next = MAN_NEXT_CHILD; 172 } 173 174 175 static int 176 man_node_append(struct man *man, struct man_node *p) 177 { 178 179 assert(man->last); 180 assert(man->first); 181 assert(MAN_ROOT != p->type); 182 183 switch (man->next) { 184 case (MAN_NEXT_SIBLING): 185 man->last->next = p; 186 p->prev = man->last; 187 p->parent = man->last->parent; 188 break; 189 case (MAN_NEXT_CHILD): 190 man->last->child = p; 191 p->parent = man->last; 192 break; 193 default: 194 abort(); 195 /* NOTREACHED */ 196 } 197 198 assert(p->parent); 199 p->parent->nchild++; 200 201 if ( ! man_valid_pre(man, p)) 202 return(0); 203 204 switch (p->type) { 205 case (MAN_HEAD): 206 assert(MAN_BLOCK == p->parent->type); 207 p->parent->head = p; 208 break; 209 case (MAN_TAIL): 210 assert(MAN_BLOCK == p->parent->type); 211 p->parent->tail = p; 212 break; 213 case (MAN_BODY): 214 assert(MAN_BLOCK == p->parent->type); 215 p->parent->body = p; 216 break; 217 default: 218 break; 219 } 220 221 man->last = p; 222 223 switch (p->type) { 224 case (MAN_TBL): 225 /* FALLTHROUGH */ 226 case (MAN_TEXT): 227 if ( ! man_valid_post(man)) 228 return(0); 229 break; 230 default: 231 break; 232 } 233 234 return(1); 235 } 236 237 238 static struct man_node * 239 man_node_alloc(struct man *man, int line, int pos, 240 enum man_type type, enum mant tok) 241 { 242 struct man_node *p; 243 244 p = mandoc_calloc(1, sizeof(struct man_node)); 245 p->line = line; 246 p->pos = pos; 247 p->type = type; 248 p->tok = tok; 249 250 if (MAN_NEWLINE & man->flags) 251 p->flags |= MAN_LINE; 252 man->flags &= ~MAN_NEWLINE; 253 return(p); 254 } 255 256 257 int 258 man_elem_alloc(struct man *man, int line, int pos, enum mant tok) 259 { 260 struct man_node *p; 261 262 p = man_node_alloc(man, line, pos, MAN_ELEM, tok); 263 if ( ! man_node_append(man, p)) 264 return(0); 265 man->next = MAN_NEXT_CHILD; 266 return(1); 267 } 268 269 270 int 271 man_tail_alloc(struct man *man, int line, int pos, enum mant tok) 272 { 273 struct man_node *p; 274 275 p = man_node_alloc(man, line, pos, MAN_TAIL, tok); 276 if ( ! man_node_append(man, p)) 277 return(0); 278 man->next = MAN_NEXT_CHILD; 279 return(1); 280 } 281 282 283 int 284 man_head_alloc(struct man *man, int line, int pos, enum mant tok) 285 { 286 struct man_node *p; 287 288 p = man_node_alloc(man, line, pos, MAN_HEAD, tok); 289 if ( ! man_node_append(man, p)) 290 return(0); 291 man->next = MAN_NEXT_CHILD; 292 return(1); 293 } 294 295 296 int 297 man_body_alloc(struct man *man, int line, int pos, enum mant tok) 298 { 299 struct man_node *p; 300 301 p = man_node_alloc(man, line, pos, MAN_BODY, tok); 302 if ( ! man_node_append(man, p)) 303 return(0); 304 man->next = MAN_NEXT_CHILD; 305 return(1); 306 } 307 308 309 int 310 man_block_alloc(struct man *man, int line, int pos, enum mant tok) 311 { 312 struct man_node *p; 313 314 p = man_node_alloc(man, line, pos, MAN_BLOCK, tok); 315 if ( ! man_node_append(man, p)) 316 return(0); 317 man->next = MAN_NEXT_CHILD; 318 return(1); 319 } 320 321 int 322 man_word_alloc(struct man *man, int line, int pos, const char *word) 323 { 324 struct man_node *n; 325 326 n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX); 327 n->string = roff_strdup(man->roff, word); 328 329 if ( ! man_node_append(man, n)) 330 return(0); 331 332 man->next = MAN_NEXT_SIBLING; 333 return(1); 334 } 335 336 337 /* 338 * Free all of the resources held by a node. This does NOT unlink a 339 * node from its context; for that, see man_node_unlink(). 340 */ 341 static void 342 man_node_free(struct man_node *p) 343 { 344 345 if (p->string) 346 free(p->string); 347 free(p); 348 } 349 350 351 void 352 man_node_delete(struct man *man, struct man_node *p) 353 { 354 355 while (p->child) 356 man_node_delete(man, p->child); 357 358 man_node_unlink(man, p); 359 man_node_free(p); 360 } 361 362 int 363 man_addeqn(struct man *man, const struct eqn *ep) 364 { 365 struct man_node *n; 366 367 assert( ! (MAN_HALT & man->flags)); 368 369 n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 370 n->eqn = ep; 371 372 if ( ! man_node_append(man, n)) 373 return(0); 374 375 man->next = MAN_NEXT_SIBLING; 376 return(man_descope(man, ep->ln, ep->pos)); 377 } 378 379 int 380 man_addspan(struct man *man, const struct tbl_span *sp) 381 { 382 struct man_node *n; 383 384 assert( ! (MAN_HALT & man->flags)); 385 386 n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX); 387 n->span = sp; 388 389 if ( ! man_node_append(man, n)) 390 return(0); 391 392 man->next = MAN_NEXT_SIBLING; 393 return(man_descope(man, sp->line, 0)); 394 } 395 396 static int 397 man_descope(struct man *man, int line, int offs) 398 { 399 /* 400 * Co-ordinate what happens with having a next-line scope open: 401 * first close out the element scope (if applicable), then close 402 * out the block scope (also if applicable). 403 */ 404 405 if (MAN_ELINE & man->flags) { 406 man->flags &= ~MAN_ELINE; 407 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 408 return(0); 409 } 410 411 if ( ! (MAN_BLINE & man->flags)) 412 return(1); 413 man->flags &= ~MAN_BLINE; 414 415 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 416 return(0); 417 return(man_body_alloc(man, line, offs, man->last->tok)); 418 } 419 420 static int 421 man_ptext(struct man *man, int line, char *buf, int offs) 422 { 423 int i; 424 425 /* Literal free-form text whitespace is preserved. */ 426 427 if (MAN_LITERAL & man->flags) { 428 if ( ! man_word_alloc(man, line, offs, buf + offs)) 429 return(0); 430 return(man_descope(man, line, offs)); 431 } 432 433 for (i = offs; ' ' == buf[i]; i++) 434 /* Skip leading whitespace. */ ; 435 436 /* 437 * Blank lines are ignored right after headings 438 * but add a single vertical space elsewhere. 439 */ 440 441 if ('\0' == buf[i]) { 442 /* Allocate a blank entry. */ 443 if (MAN_SH != man->last->tok && 444 MAN_SS != man->last->tok) { 445 if ( ! man_elem_alloc(man, line, offs, MAN_sp)) 446 return(0); 447 man->next = MAN_NEXT_SIBLING; 448 } 449 return(1); 450 } 451 452 /* 453 * Warn if the last un-escaped character is whitespace. Then 454 * strip away the remaining spaces (tabs stay!). 455 */ 456 457 i = (int)strlen(buf); 458 assert(i); 459 460 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 461 if (i > 1 && '\\' != buf[i - 2]) 462 man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE); 463 464 for (--i; i && ' ' == buf[i]; i--) 465 /* Spin back to non-space. */ ; 466 467 /* Jump ahead of escaped whitespace. */ 468 i += '\\' == buf[i] ? 2 : 1; 469 470 buf[i] = '\0'; 471 } 472 473 if ( ! man_word_alloc(man, line, offs, buf + offs)) 474 return(0); 475 476 /* 477 * End-of-sentence check. If the last character is an unescaped 478 * EOS character, then flag the node as being the end of a 479 * sentence. The front-end will know how to interpret this. 480 */ 481 482 assert(i); 483 if (mandoc_eos(buf, (size_t)i)) 484 man->last->flags |= MAN_EOS; 485 486 return(man_descope(man, line, offs)); 487 } 488 489 static int 490 man_pmacro(struct man *man, int ln, char *buf, int offs) 491 { 492 int i, ppos; 493 enum mant tok; 494 char mac[5]; 495 struct man_node *n; 496 497 if ('"' == buf[offs]) { 498 man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT); 499 return(1); 500 } else if ('\0' == buf[offs]) 501 return(1); 502 503 ppos = offs; 504 505 /* 506 * Copy the first word into a nil-terminated buffer. 507 * Stop copying when a tab, space, or eoln is encountered. 508 */ 509 510 i = 0; 511 while (i < 4 && '\0' != buf[offs] && 512 ' ' != buf[offs] && '\t' != buf[offs]) 513 mac[i++] = buf[offs++]; 514 515 mac[i] = '\0'; 516 517 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 518 519 if (MAN_MAX == tok) { 520 mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln, 521 ppos, "%s", buf + ppos - 1); 522 return(1); 523 } 524 525 /* The macro is sane. Jump to the next word. */ 526 527 while (buf[offs] && ' ' == buf[offs]) 528 offs++; 529 530 /* 531 * Trailing whitespace. Note that tabs are allowed to be passed 532 * into the parser as "text", so we only warn about spaces here. 533 */ 534 535 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 536 man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE); 537 538 /* 539 * Remove prior ELINE macro, as it's being clobbered by a new 540 * macro. Note that NSCOPED macros do not close out ELINE 541 * macros---they don't print text---so we let those slip by. 542 */ 543 544 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 545 man->flags & MAN_ELINE) { 546 n = man->last; 547 assert(MAN_TEXT != n->type); 548 549 /* Remove repeated NSCOPED macros causing ELINE. */ 550 551 if (MAN_NSCOPED & man_macros[n->tok].flags) 552 n = n->parent; 553 554 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 555 n->pos, "%s breaks %s", man_macronames[tok], 556 man_macronames[n->tok]); 557 558 man_node_delete(man, n); 559 man->flags &= ~MAN_ELINE; 560 } 561 562 /* 563 * Remove prior BLINE macro that is being clobbered. 564 */ 565 if ((man->flags & MAN_BLINE) && 566 (MAN_BSCOPE & man_macros[tok].flags)) { 567 n = man->last; 568 569 /* Might be a text node like 8 in 570 * .TP 8 571 * .SH foo 572 */ 573 if (MAN_TEXT == n->type) 574 n = n->parent; 575 576 /* Remove element that didn't end BLINE, if any. */ 577 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags)) 578 n = n->parent; 579 580 assert(MAN_HEAD == n->type); 581 n = n->parent; 582 assert(MAN_BLOCK == n->type); 583 assert(MAN_SCOPED & man_macros[n->tok].flags); 584 585 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 586 n->pos, "%s breaks %s", man_macronames[tok], 587 man_macronames[n->tok]); 588 589 man_node_delete(man, n); 590 man->flags &= ~MAN_BLINE; 591 } 592 593 /* 594 * Save the fact that we're in the next-line for a block. In 595 * this way, embedded roff instructions can "remember" state 596 * when they exit. 597 */ 598 599 if (MAN_BLINE & man->flags) 600 man->flags |= MAN_BPLINE; 601 602 /* Call to handler... */ 603 604 assert(man_macros[tok].fp); 605 if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf)) 606 goto err; 607 608 /* In quick mode (for mandocdb), abort after the NAME section. */ 609 610 if (man->quick && MAN_SH == tok && 611 strcmp(man->last->prev->child->string, "NAME")) 612 return(2); 613 614 /* 615 * We weren't in a block-line scope when entering the 616 * above-parsed macro, so return. 617 */ 618 619 if ( ! (MAN_BPLINE & man->flags)) { 620 man->flags &= ~MAN_ILINE; 621 return(1); 622 } 623 man->flags &= ~MAN_BPLINE; 624 625 /* 626 * If we're in a block scope, then allow this macro to slip by 627 * without closing scope around it. 628 */ 629 630 if (MAN_ILINE & man->flags) { 631 man->flags &= ~MAN_ILINE; 632 return(1); 633 } 634 635 /* 636 * If we've opened a new next-line element scope, then return 637 * now, as the next line will close out the block scope. 638 */ 639 640 if (MAN_ELINE & man->flags) 641 return(1); 642 643 /* Close out the block scope opened in the prior line. */ 644 645 assert(MAN_BLINE & man->flags); 646 man->flags &= ~MAN_BLINE; 647 648 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 649 return(0); 650 return(man_body_alloc(man, ln, ppos, man->last->tok)); 651 652 err: /* Error out. */ 653 654 man->flags |= MAN_HALT; 655 return(0); 656 } 657 658 /* 659 * Unlink a node from its context. If "man" is provided, the last parse 660 * point will also be adjusted accordingly. 661 */ 662 static void 663 man_node_unlink(struct man *man, struct man_node *n) 664 { 665 666 /* Adjust siblings. */ 667 668 if (n->prev) 669 n->prev->next = n->next; 670 if (n->next) 671 n->next->prev = n->prev; 672 673 /* Adjust parent. */ 674 675 if (n->parent) { 676 n->parent->nchild--; 677 if (n->parent->child == n) 678 n->parent->child = n->prev ? n->prev : n->next; 679 } 680 681 /* Adjust parse point, if applicable. */ 682 683 if (man && man->last == n) { 684 /*XXX: this can occur when bailing from validation. */ 685 /*assert(NULL == n->next);*/ 686 if (n->prev) { 687 man->last = n->prev; 688 man->next = MAN_NEXT_SIBLING; 689 } else { 690 man->last = n->parent; 691 man->next = MAN_NEXT_CHILD; 692 } 693 } 694 695 if (man && man->first == n) 696 man->first = NULL; 697 } 698 699 const struct mparse * 700 man_mparse(const struct man *man) 701 { 702 703 assert(man && man->parse); 704 return(man->parse); 705 } 706 707 void 708 man_deroff(char **dest, const struct man_node *n) 709 { 710 char *cp; 711 size_t sz; 712 713 if (MAN_TEXT != n->type) { 714 for (n = n->child; n; n = n->next) 715 man_deroff(dest, n); 716 return; 717 } 718 719 /* Skip leading whitespace and escape sequences. */ 720 721 cp = n->string; 722 while ('\0' != *cp) { 723 if ('\\' == *cp) { 724 cp++; 725 mandoc_escape((const char **)&cp, NULL, NULL); 726 } else if (isspace((unsigned char)*cp)) 727 cp++; 728 else 729 break; 730 } 731 732 /* Skip trailing whitespace. */ 733 734 for (sz = strlen(cp); sz; sz--) 735 if (0 == isspace((unsigned char)cp[sz-1])) 736 break; 737 738 /* Skip empty strings. */ 739 740 if (0 == sz) 741 return; 742 743 if (NULL == *dest) { 744 *dest = mandoc_strndup(cp, sz); 745 return; 746 } 747 748 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 749 free(*dest); 750 *dest = cp; 751 } 752