1 /* $Id: man.c,v 1.80 2014/05/07 14:11:24 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/types.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <stdarg.h> 24 #include <stdlib.h> 25 #include <stdio.h> 26 #include <string.h> 27 28 #include "man.h" 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "libman.h" 32 #include "libmandoc.h" 33 34 const char *const __man_macronames[MAN_MAX] = { 35 "br", "TH", "SH", "SS", 36 "TP", "LP", "PP", "P", 37 "IP", "HP", "SM", "SB", 38 "BI", "IB", "BR", "RB", 39 "R", "B", "I", "IR", 40 "RI", "na", "sp", "nf", 41 "fi", "RE", "RS", "DT", 42 "UC", "PD", "AT", "in", 43 "ft", "OP", "EX", "EE", 44 "UR", "UE", "ll" 45 }; 46 47 const char * const *man_macronames = __man_macronames; 48 49 static struct man_node *man_node_alloc(struct man *, int, int, 50 enum man_type, enum mant); 51 static int man_node_append(struct man *, 52 struct man_node *); 53 static void man_node_free(struct man_node *); 54 static void man_node_unlink(struct man *, 55 struct man_node *); 56 static int man_ptext(struct man *, int, char *, int); 57 static int man_pmacro(struct man *, int, char *, int); 58 static void man_free1(struct man *); 59 static void man_alloc1(struct man *); 60 static int man_descope(struct man *, int, int); 61 62 63 const struct man_node * 64 man_node(const struct man *man) 65 { 66 67 assert( ! (MAN_HALT & man->flags)); 68 return(man->first); 69 } 70 71 const struct man_meta * 72 man_meta(const struct man *man) 73 { 74 75 assert( ! (MAN_HALT & man->flags)); 76 return(&man->meta); 77 } 78 79 void 80 man_reset(struct man *man) 81 { 82 83 man_free1(man); 84 man_alloc1(man); 85 } 86 87 void 88 man_free(struct man *man) 89 { 90 91 man_free1(man); 92 free(man); 93 } 94 95 struct man * 96 man_alloc(struct roff *roff, struct mparse *parse, int quick) 97 { 98 struct man *p; 99 100 p = mandoc_calloc(1, sizeof(struct man)); 101 102 man_hash_init(); 103 p->parse = parse; 104 p->quick = quick; 105 p->roff = roff; 106 107 man_alloc1(p); 108 return(p); 109 } 110 111 int 112 man_endparse(struct man *man) 113 { 114 115 assert( ! (MAN_HALT & man->flags)); 116 if (man_macroend(man)) 117 return(1); 118 man->flags |= MAN_HALT; 119 return(0); 120 } 121 122 int 123 man_parseln(struct man *man, int ln, char *buf, int offs) 124 { 125 126 man->flags |= MAN_NEWLINE; 127 128 assert( ! (MAN_HALT & man->flags)); 129 130 return (roff_getcontrol(man->roff, buf, &offs) ? 131 man_pmacro(man, ln, buf, offs) : 132 man_ptext(man, ln, buf, offs)); 133 } 134 135 static void 136 man_free1(struct man *man) 137 { 138 139 if (man->first) 140 man_node_delete(man, man->first); 141 if (man->meta.title) 142 free(man->meta.title); 143 if (man->meta.source) 144 free(man->meta.source); 145 if (man->meta.date) 146 free(man->meta.date); 147 if (man->meta.vol) 148 free(man->meta.vol); 149 if (man->meta.msec) 150 free(man->meta.msec); 151 } 152 153 static void 154 man_alloc1(struct man *man) 155 { 156 157 memset(&man->meta, 0, sizeof(struct man_meta)); 158 man->flags = 0; 159 man->last = mandoc_calloc(1, sizeof(struct man_node)); 160 man->first = man->last; 161 man->last->type = MAN_ROOT; 162 man->last->tok = MAN_MAX; 163 man->next = MAN_NEXT_CHILD; 164 } 165 166 167 static int 168 man_node_append(struct man *man, struct man_node *p) 169 { 170 171 assert(man->last); 172 assert(man->first); 173 assert(MAN_ROOT != p->type); 174 175 switch (man->next) { 176 case MAN_NEXT_SIBLING: 177 man->last->next = p; 178 p->prev = man->last; 179 p->parent = man->last->parent; 180 break; 181 case MAN_NEXT_CHILD: 182 man->last->child = p; 183 p->parent = man->last; 184 break; 185 default: 186 abort(); 187 /* NOTREACHED */ 188 } 189 190 assert(p->parent); 191 p->parent->nchild++; 192 193 if ( ! man_valid_pre(man, p)) 194 return(0); 195 196 switch (p->type) { 197 case MAN_HEAD: 198 assert(MAN_BLOCK == p->parent->type); 199 p->parent->head = p; 200 break; 201 case MAN_TAIL: 202 assert(MAN_BLOCK == p->parent->type); 203 p->parent->tail = p; 204 break; 205 case MAN_BODY: 206 assert(MAN_BLOCK == p->parent->type); 207 p->parent->body = p; 208 break; 209 default: 210 break; 211 } 212 213 man->last = p; 214 215 switch (p->type) { 216 case MAN_TBL: 217 /* FALLTHROUGH */ 218 case MAN_TEXT: 219 if ( ! man_valid_post(man)) 220 return(0); 221 break; 222 default: 223 break; 224 } 225 226 return(1); 227 } 228 229 static struct man_node * 230 man_node_alloc(struct man *man, int line, int pos, 231 enum man_type type, enum mant tok) 232 { 233 struct man_node *p; 234 235 p = mandoc_calloc(1, sizeof(struct man_node)); 236 p->line = line; 237 p->pos = pos; 238 p->type = type; 239 p->tok = tok; 240 241 if (MAN_NEWLINE & man->flags) 242 p->flags |= MAN_LINE; 243 man->flags &= ~MAN_NEWLINE; 244 return(p); 245 } 246 247 int 248 man_elem_alloc(struct man *man, int line, int pos, enum mant tok) 249 { 250 struct man_node *p; 251 252 p = man_node_alloc(man, line, pos, MAN_ELEM, tok); 253 if ( ! man_node_append(man, p)) 254 return(0); 255 man->next = MAN_NEXT_CHILD; 256 return(1); 257 } 258 259 int 260 man_tail_alloc(struct man *man, int line, int pos, enum mant tok) 261 { 262 struct man_node *p; 263 264 p = man_node_alloc(man, line, pos, MAN_TAIL, tok); 265 if ( ! man_node_append(man, p)) 266 return(0); 267 man->next = MAN_NEXT_CHILD; 268 return(1); 269 } 270 271 int 272 man_head_alloc(struct man *man, int line, int pos, enum mant tok) 273 { 274 struct man_node *p; 275 276 p = man_node_alloc(man, line, pos, MAN_HEAD, tok); 277 if ( ! man_node_append(man, p)) 278 return(0); 279 man->next = MAN_NEXT_CHILD; 280 return(1); 281 } 282 283 int 284 man_body_alloc(struct man *man, int line, int pos, enum mant tok) 285 { 286 struct man_node *p; 287 288 p = man_node_alloc(man, line, pos, MAN_BODY, tok); 289 if ( ! man_node_append(man, p)) 290 return(0); 291 man->next = MAN_NEXT_CHILD; 292 return(1); 293 } 294 295 int 296 man_block_alloc(struct man *man, int line, int pos, enum mant tok) 297 { 298 struct man_node *p; 299 300 p = man_node_alloc(man, line, pos, MAN_BLOCK, tok); 301 if ( ! man_node_append(man, p)) 302 return(0); 303 man->next = MAN_NEXT_CHILD; 304 return(1); 305 } 306 307 int 308 man_word_alloc(struct man *man, int line, int pos, const char *word) 309 { 310 struct man_node *n; 311 312 n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX); 313 n->string = roff_strdup(man->roff, word); 314 315 if ( ! man_node_append(man, n)) 316 return(0); 317 318 man->next = MAN_NEXT_SIBLING; 319 return(1); 320 } 321 322 /* 323 * Free all of the resources held by a node. This does NOT unlink a 324 * node from its context; for that, see man_node_unlink(). 325 */ 326 static void 327 man_node_free(struct man_node *p) 328 { 329 330 if (p->string) 331 free(p->string); 332 free(p); 333 } 334 335 void 336 man_node_delete(struct man *man, struct man_node *p) 337 { 338 339 while (p->child) 340 man_node_delete(man, p->child); 341 342 man_node_unlink(man, p); 343 man_node_free(p); 344 } 345 346 int 347 man_addeqn(struct man *man, const struct eqn *ep) 348 { 349 struct man_node *n; 350 351 assert( ! (MAN_HALT & man->flags)); 352 353 n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 354 n->eqn = ep; 355 356 if ( ! man_node_append(man, n)) 357 return(0); 358 359 man->next = MAN_NEXT_SIBLING; 360 return(man_descope(man, ep->ln, ep->pos)); 361 } 362 363 int 364 man_addspan(struct man *man, const struct tbl_span *sp) 365 { 366 struct man_node *n; 367 368 assert( ! (MAN_HALT & man->flags)); 369 370 n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX); 371 n->span = sp; 372 373 if ( ! man_node_append(man, n)) 374 return(0); 375 376 man->next = MAN_NEXT_SIBLING; 377 return(man_descope(man, sp->line, 0)); 378 } 379 380 static int 381 man_descope(struct man *man, int line, int offs) 382 { 383 /* 384 * Co-ordinate what happens with having a next-line scope open: 385 * first close out the element scope (if applicable), then close 386 * out the block scope (also if applicable). 387 */ 388 389 if (MAN_ELINE & man->flags) { 390 man->flags &= ~MAN_ELINE; 391 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 392 return(0); 393 } 394 395 if ( ! (MAN_BLINE & man->flags)) 396 return(1); 397 man->flags &= ~MAN_BLINE; 398 399 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 400 return(0); 401 return(man_body_alloc(man, line, offs, man->last->tok)); 402 } 403 404 static int 405 man_ptext(struct man *man, int line, char *buf, int offs) 406 { 407 int i; 408 409 /* Literal free-form text whitespace is preserved. */ 410 411 if (MAN_LITERAL & man->flags) { 412 if ( ! man_word_alloc(man, line, offs, buf + offs)) 413 return(0); 414 return(man_descope(man, line, offs)); 415 } 416 417 for (i = offs; ' ' == buf[i]; i++) 418 /* Skip leading whitespace. */ ; 419 420 /* 421 * Blank lines are ignored right after headings 422 * but add a single vertical space elsewhere. 423 */ 424 425 if ('\0' == buf[i]) { 426 /* Allocate a blank entry. */ 427 if (MAN_SH != man->last->tok && 428 MAN_SS != man->last->tok) { 429 if ( ! man_elem_alloc(man, line, offs, MAN_sp)) 430 return(0); 431 man->next = MAN_NEXT_SIBLING; 432 } 433 return(1); 434 } 435 436 /* 437 * Warn if the last un-escaped character is whitespace. Then 438 * strip away the remaining spaces (tabs stay!). 439 */ 440 441 i = (int)strlen(buf); 442 assert(i); 443 444 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 445 if (i > 1 && '\\' != buf[i - 2]) 446 man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE); 447 448 for (--i; i && ' ' == buf[i]; i--) 449 /* Spin back to non-space. */ ; 450 451 /* Jump ahead of escaped whitespace. */ 452 i += '\\' == buf[i] ? 2 : 1; 453 454 buf[i] = '\0'; 455 } 456 457 if ( ! man_word_alloc(man, line, offs, buf + offs)) 458 return(0); 459 460 /* 461 * End-of-sentence check. If the last character is an unescaped 462 * EOS character, then flag the node as being the end of a 463 * sentence. The front-end will know how to interpret this. 464 */ 465 466 assert(i); 467 if (mandoc_eos(buf, (size_t)i)) 468 man->last->flags |= MAN_EOS; 469 470 return(man_descope(man, line, offs)); 471 } 472 473 static int 474 man_pmacro(struct man *man, int ln, char *buf, int offs) 475 { 476 int i, ppos; 477 enum mant tok; 478 char mac[5]; 479 struct man_node *n; 480 481 if ('"' == buf[offs]) { 482 man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT); 483 return(1); 484 } else if ('\0' == buf[offs]) 485 return(1); 486 487 ppos = offs; 488 489 /* 490 * Copy the first word into a nil-terminated buffer. 491 * Stop copying when a tab, space, or eoln is encountered. 492 */ 493 494 i = 0; 495 while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] && 496 '\t' != buf[offs]) 497 mac[i++] = buf[offs++]; 498 499 mac[i] = '\0'; 500 501 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 502 503 if (MAN_MAX == tok) { 504 mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln, ppos, 505 "%s", buf + ppos - 1); 506 return(1); 507 } 508 509 /* The macro is sane. Jump to the next word. */ 510 511 while (buf[offs] && ' ' == buf[offs]) 512 offs++; 513 514 /* 515 * Trailing whitespace. Note that tabs are allowed to be passed 516 * into the parser as "text", so we only warn about spaces here. 517 */ 518 519 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 520 man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE); 521 522 /* 523 * Remove prior ELINE macro, as it's being clobbered by a new 524 * macro. Note that NSCOPED macros do not close out ELINE 525 * macros---they don't print text---so we let those slip by. 526 */ 527 528 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 529 man->flags & MAN_ELINE) { 530 n = man->last; 531 assert(MAN_TEXT != n->type); 532 533 /* Remove repeated NSCOPED macros causing ELINE. */ 534 535 if (MAN_NSCOPED & man_macros[n->tok].flags) 536 n = n->parent; 537 538 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 539 n->pos, "%s breaks %s", man_macronames[tok], 540 man_macronames[n->tok]); 541 542 man_node_delete(man, n); 543 man->flags &= ~MAN_ELINE; 544 } 545 546 /* 547 * Remove prior BLINE macro that is being clobbered. 548 */ 549 if ((man->flags & MAN_BLINE) && 550 (MAN_BSCOPE & man_macros[tok].flags)) { 551 n = man->last; 552 553 /* Might be a text node like 8 in 554 * .TP 8 555 * .SH foo 556 */ 557 if (MAN_TEXT == n->type) 558 n = n->parent; 559 560 /* Remove element that didn't end BLINE, if any. */ 561 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags)) 562 n = n->parent; 563 564 assert(MAN_HEAD == n->type); 565 n = n->parent; 566 assert(MAN_BLOCK == n->type); 567 assert(MAN_SCOPED & man_macros[n->tok].flags); 568 569 mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line, 570 n->pos, "%s breaks %s", man_macronames[tok], 571 man_macronames[n->tok]); 572 573 man_node_delete(man, n); 574 man->flags &= ~MAN_BLINE; 575 } 576 577 /* 578 * Save the fact that we're in the next-line for a block. In 579 * this way, embedded roff instructions can "remember" state 580 * when they exit. 581 */ 582 583 if (MAN_BLINE & man->flags) 584 man->flags |= MAN_BPLINE; 585 586 /* Call to handler... */ 587 588 assert(man_macros[tok].fp); 589 if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf)) 590 goto err; 591 592 /* In quick mode (for mandocdb), abort after the NAME section. */ 593 594 if (man->quick && MAN_SH == tok) { 595 n = man->last; 596 if (MAN_BODY == n->type && 597 strcmp(n->prev->child->string, "NAME")) 598 return(2); 599 } 600 601 /* 602 * We weren't in a block-line scope when entering the 603 * above-parsed macro, so return. 604 */ 605 606 if ( ! (MAN_BPLINE & man->flags)) { 607 man->flags &= ~MAN_ILINE; 608 return(1); 609 } 610 man->flags &= ~MAN_BPLINE; 611 612 /* 613 * If we're in a block scope, then allow this macro to slip by 614 * without closing scope around it. 615 */ 616 617 if (MAN_ILINE & man->flags) { 618 man->flags &= ~MAN_ILINE; 619 return(1); 620 } 621 622 /* 623 * If we've opened a new next-line element scope, then return 624 * now, as the next line will close out the block scope. 625 */ 626 627 if (MAN_ELINE & man->flags) 628 return(1); 629 630 /* Close out the block scope opened in the prior line. */ 631 632 assert(MAN_BLINE & man->flags); 633 man->flags &= ~MAN_BLINE; 634 635 if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX)) 636 return(0); 637 return(man_body_alloc(man, ln, ppos, man->last->tok)); 638 639 err: /* Error out. */ 640 641 man->flags |= MAN_HALT; 642 return(0); 643 } 644 645 /* 646 * Unlink a node from its context. If "man" is provided, the last parse 647 * point will also be adjusted accordingly. 648 */ 649 static void 650 man_node_unlink(struct man *man, struct man_node *n) 651 { 652 653 /* Adjust siblings. */ 654 655 if (n->prev) 656 n->prev->next = n->next; 657 if (n->next) 658 n->next->prev = n->prev; 659 660 /* Adjust parent. */ 661 662 if (n->parent) { 663 n->parent->nchild--; 664 if (n->parent->child == n) 665 n->parent->child = n->prev ? n->prev : n->next; 666 } 667 668 /* Adjust parse point, if applicable. */ 669 670 if (man && man->last == n) { 671 /*XXX: this can occur when bailing from validation. */ 672 /*assert(NULL == n->next);*/ 673 if (n->prev) { 674 man->last = n->prev; 675 man->next = MAN_NEXT_SIBLING; 676 } else { 677 man->last = n->parent; 678 man->next = MAN_NEXT_CHILD; 679 } 680 } 681 682 if (man && man->first == n) 683 man->first = NULL; 684 } 685 686 const struct mparse * 687 man_mparse(const struct man *man) 688 { 689 690 assert(man && man->parse); 691 return(man->parse); 692 } 693 694 void 695 man_deroff(char **dest, const struct man_node *n) 696 { 697 char *cp; 698 size_t sz; 699 700 if (MAN_TEXT != n->type) { 701 for (n = n->child; n; n = n->next) 702 man_deroff(dest, n); 703 return; 704 } 705 706 /* Skip leading whitespace and escape sequences. */ 707 708 cp = n->string; 709 while ('\0' != *cp) { 710 if ('\\' == *cp) { 711 cp++; 712 mandoc_escape((const char **)&cp, NULL, NULL); 713 } else if (isspace((unsigned char)*cp)) 714 cp++; 715 else 716 break; 717 } 718 719 /* Skip trailing whitespace. */ 720 721 for (sz = strlen(cp); sz; sz--) 722 if (0 == isspace((unsigned char)cp[sz-1])) 723 break; 724 725 /* Skip empty strings. */ 726 727 if (0 == sz) 728 return; 729 730 if (NULL == *dest) { 731 *dest = mandoc_strndup(cp, sz); 732 return; 733 } 734 735 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 736 free(*dest); 737 *dest = cp; 738 } 739