1 /* $Id: man.c,v 1.26 2010/05/08 01:52:07 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/types.h> 18 19 #include <assert.h> 20 #include <ctype.h> 21 #include <stdarg.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <string.h> 25 26 #include "libman.h" 27 #include "libmandoc.h" 28 29 const char *const __man_merrnames[WERRMAX] = { 30 "invalid character", /* WNPRINT */ 31 "invalid manual section", /* WMSEC */ 32 "invalid date format", /* WDATE */ 33 "scope of prior line violated", /* WLNSCOPE */ 34 "over-zealous prior line scope violation", /* WLNSCOPE2 */ 35 "trailing whitespace", /* WTSPACE */ 36 "unterminated quoted parameter", /* WTQUOTE */ 37 "document has no body", /* WNODATA */ 38 "document has no title/section", /* WNOTITLE */ 39 "invalid escape sequence", /* WESCAPE */ 40 "invalid number format", /* WNUMFMT */ 41 "expected block head arguments", /* WHEADARGS */ 42 "expected block body arguments", /* WBODYARGS */ 43 "expected empty block head", /* WNHEADARGS */ 44 "ill-formed macro", /* WMACROFORM */ 45 "scope open on exit", /* WEXITSCOPE */ 46 "no scope context", /* WNOSCOPE */ 47 "literal context already open", /* WOLITERAL */ 48 "no literal context open", /* WNLITERAL */ 49 "invalid nesting of roff declarations", /* WROFFNEST */ 50 "scope in roff instructions broken", /* WROFFSCOPE */ 51 "document title should be uppercase", /* WTITLECASE */ 52 "deprecated comment style", /* WBADCOMMENT */ 53 }; 54 55 const char *const __man_macronames[MAN_MAX] = { 56 "br", "TH", "SH", "SS", 57 "TP", "LP", "PP", "P", 58 "IP", "HP", "SM", "SB", 59 "BI", "IB", "BR", "RB", 60 "R", "B", "I", "IR", 61 "RI", "na", "i", "sp", 62 "nf", "fi", "r", "RE", 63 "RS", "DT", "UC", "PD", 64 "Sp", "Vb", "Ve", "de", 65 "dei", "am", "ami", "ig", 66 ".", "if", "ie", "el", 67 }; 68 69 const char * const *man_macronames = __man_macronames; 70 71 static struct man_node *man_node_alloc(int, int, 72 enum man_type, enum mant); 73 static int man_node_append(struct man *, 74 struct man_node *); 75 static void man_node_free(struct man_node *); 76 static void man_node_unlink(struct man *, 77 struct man_node *); 78 static int man_ptext(struct man *, int, char *); 79 static int man_pmacro(struct man *, int, char *); 80 static void man_free1(struct man *); 81 static void man_alloc1(struct man *); 82 static int pstring(struct man *, int, int, 83 const char *, size_t); 84 static int macrowarn(struct man *, int, const char *); 85 86 87 const struct man_node * 88 man_node(const struct man *m) 89 { 90 91 return(MAN_HALT & m->flags ? NULL : m->first); 92 } 93 94 95 const struct man_meta * 96 man_meta(const struct man *m) 97 { 98 99 return(MAN_HALT & m->flags ? NULL : &m->meta); 100 } 101 102 103 void 104 man_reset(struct man *man) 105 { 106 107 man_free1(man); 108 man_alloc1(man); 109 } 110 111 112 void 113 man_free(struct man *man) 114 { 115 116 man_free1(man); 117 free(man); 118 } 119 120 121 struct man * 122 man_alloc(void *data, int pflags, const struct man_cb *cb) 123 { 124 struct man *p; 125 126 p = mandoc_calloc(1, sizeof(struct man)); 127 128 if (cb) 129 memcpy(&p->cb, cb, sizeof(struct man_cb)); 130 131 man_hash_init(); 132 p->data = data; 133 p->pflags = pflags; 134 135 man_alloc1(p); 136 return(p); 137 } 138 139 140 int 141 man_endparse(struct man *m) 142 { 143 144 if (MAN_HALT & m->flags) 145 return(0); 146 else if (man_macroend(m)) 147 return(1); 148 m->flags |= MAN_HALT; 149 return(0); 150 } 151 152 153 int 154 man_parseln(struct man *m, int ln, char *buf) 155 { 156 char *p; 157 size_t len; 158 int brace_close = 0; 159 160 if ((len = strlen(buf)) > 1) { 161 p = buf + (len - 2); 162 if (p[0] == '\\' && p[1] == '}') { 163 brace_close = 1; 164 *p = '\0'; 165 } 166 } 167 168 if ('.' == *buf || '\'' == *buf) { 169 if ( ! man_pmacro(m, ln, buf)) 170 return(0); 171 } else { 172 if ( ! man_ptext(m, ln, buf)) 173 return(0); 174 } 175 176 return(brace_close ? man_brace_close(m, ln, len-2) : 1); 177 } 178 179 180 static void 181 man_free1(struct man *man) 182 { 183 184 if (man->first) 185 man_node_delete(man, man->first); 186 if (man->meta.title) 187 free(man->meta.title); 188 if (man->meta.source) 189 free(man->meta.source); 190 if (man->meta.vol) 191 free(man->meta.vol); 192 } 193 194 195 static void 196 man_alloc1(struct man *m) 197 { 198 199 memset(&m->meta, 0, sizeof(struct man_meta)); 200 m->flags = 0; 201 m->last = mandoc_calloc(1, sizeof(struct man_node)); 202 m->first = m->last; 203 m->last->type = MAN_ROOT; 204 m->last->tok = MAN_MAX; 205 m->next = MAN_NEXT_CHILD; 206 } 207 208 209 static int 210 man_node_append(struct man *man, struct man_node *p) 211 { 212 213 assert(man->last); 214 assert(man->first); 215 assert(MAN_ROOT != p->type); 216 217 switch (man->next) { 218 case (MAN_NEXT_SIBLING): 219 man->last->next = p; 220 p->prev = man->last; 221 p->parent = man->last->parent; 222 break; 223 case (MAN_NEXT_CHILD): 224 man->last->child = p; 225 p->parent = man->last; 226 break; 227 default: 228 abort(); 229 /* NOTREACHED */ 230 } 231 232 assert(p->parent); 233 p->parent->nchild++; 234 235 if ( ! man_valid_pre(man, p)) 236 return(0); 237 238 switch (p->type) { 239 case (MAN_HEAD): 240 assert(MAN_BLOCK == p->parent->type); 241 p->parent->head = p; 242 break; 243 case (MAN_BODY): 244 assert(MAN_BLOCK == p->parent->type); 245 p->parent->body = p; 246 break; 247 default: 248 break; 249 } 250 251 man->last = p; 252 253 switch (p->type) { 254 case (MAN_TEXT): 255 if ( ! man_valid_post(man)) 256 return(0); 257 if ( ! man_action_post(man)) 258 return(0); 259 break; 260 default: 261 break; 262 } 263 264 return(1); 265 } 266 267 268 static struct man_node * 269 man_node_alloc(int line, int pos, enum man_type type, enum mant tok) 270 { 271 struct man_node *p; 272 273 p = mandoc_calloc(1, sizeof(struct man_node)); 274 p->line = line; 275 p->pos = pos; 276 p->type = type; 277 p->tok = tok; 278 return(p); 279 } 280 281 282 int 283 man_elem_alloc(struct man *m, int line, int pos, enum mant tok) 284 { 285 struct man_node *p; 286 287 p = man_node_alloc(line, pos, MAN_ELEM, tok); 288 if ( ! man_node_append(m, p)) 289 return(0); 290 m->next = MAN_NEXT_CHILD; 291 return(1); 292 } 293 294 295 int 296 man_head_alloc(struct man *m, int line, int pos, enum mant tok) 297 { 298 struct man_node *p; 299 300 p = man_node_alloc(line, pos, MAN_HEAD, tok); 301 if ( ! man_node_append(m, p)) 302 return(0); 303 m->next = MAN_NEXT_CHILD; 304 return(1); 305 } 306 307 308 int 309 man_body_alloc(struct man *m, int line, int pos, enum mant tok) 310 { 311 struct man_node *p; 312 313 p = man_node_alloc(line, pos, MAN_BODY, tok); 314 if ( ! man_node_append(m, p)) 315 return(0); 316 m->next = MAN_NEXT_CHILD; 317 return(1); 318 } 319 320 321 int 322 man_block_alloc(struct man *m, int line, int pos, enum mant tok) 323 { 324 struct man_node *p; 325 326 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 327 if ( ! man_node_append(m, p)) 328 return(0); 329 m->next = MAN_NEXT_CHILD; 330 return(1); 331 } 332 333 334 static int 335 pstring(struct man *m, int line, int pos, 336 const char *p, size_t len) 337 { 338 struct man_node *n; 339 size_t sv; 340 341 n = man_node_alloc(line, pos, MAN_TEXT, MAN_MAX); 342 n->string = mandoc_malloc(len + 1); 343 sv = strlcpy(n->string, p, len + 1); 344 345 /* Prohibit truncation. */ 346 assert(sv < len + 1); 347 348 if ( ! man_node_append(m, n)) 349 return(0); 350 m->next = MAN_NEXT_SIBLING; 351 return(1); 352 } 353 354 355 int 356 man_word_alloc(struct man *m, int line, int pos, const char *word) 357 { 358 359 return(pstring(m, line, pos, word, strlen(word))); 360 } 361 362 363 /* 364 * Free all of the resources held by a node. This does NOT unlink a 365 * node from its context; for that, see man_node_unlink(). 366 */ 367 static void 368 man_node_free(struct man_node *p) 369 { 370 371 if (p->string) 372 free(p->string); 373 free(p); 374 } 375 376 377 void 378 man_node_delete(struct man *m, struct man_node *p) 379 { 380 381 while (p->child) 382 man_node_delete(m, p->child); 383 384 man_node_unlink(m, p); 385 man_node_free(p); 386 } 387 388 389 static int 390 man_ptext(struct man *m, int line, char *buf) 391 { 392 int i, j; 393 char sv; 394 395 /* Ignore bogus comments. */ 396 397 if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) 398 return(man_pwarn(m, line, 0, WBADCOMMENT)); 399 400 /* Literal free-form text whitespace is preserved. */ 401 402 if (MAN_LITERAL & m->flags) { 403 if ( ! man_word_alloc(m, line, 0, buf)) 404 return(0); 405 goto descope; 406 } 407 408 /* First de-chunk and allocate words. */ 409 410 for (i = 0; ' ' == buf[i]; i++) 411 /* Skip leading whitespace. */ ; 412 413 if ('\0' == buf[i]) { 414 /* Trailing whitespace? */ 415 if (i && ' ' == buf[i - 1]) 416 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 417 return(0); 418 if ( ! pstring(m, line, 0, &buf[i], 0)) 419 return(0); 420 goto descope; 421 } 422 423 for (j = i; buf[i]; i++) { 424 if (' ' != buf[i]) 425 continue; 426 427 /* Escaped whitespace. */ 428 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 429 continue; 430 431 sv = buf[i]; 432 buf[i++] = '\0'; 433 434 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 435 return(0); 436 437 /* Trailing whitespace? Check at overwritten byte. */ 438 439 if (' ' == sv && '\0' == buf[i]) 440 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 441 return(0); 442 443 for ( ; ' ' == buf[i]; i++) 444 /* Skip trailing whitespace. */ ; 445 446 j = i; 447 448 /* Trailing whitespace? */ 449 450 if (' ' == buf[i - 1] && '\0' == buf[i]) 451 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 452 return(0); 453 454 if ('\0' == buf[i]) 455 break; 456 } 457 458 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 459 return(0); 460 461 descope: 462 463 /* 464 * Co-ordinate what happens with having a next-line scope open: 465 * first close out the element scope (if applicable), then close 466 * out the block scope (also if applicable). 467 */ 468 469 if (MAN_ELINE & m->flags) { 470 m->flags &= ~MAN_ELINE; 471 if ( ! man_unscope(m, m->last->parent, WERRMAX)) 472 return(0); 473 } 474 475 if ( ! (MAN_BLINE & m->flags)) 476 return(1); 477 m->flags &= ~MAN_BLINE; 478 479 if ( ! man_unscope(m, m->last->parent, WERRMAX)) 480 return(0); 481 return(man_body_alloc(m, line, 0, m->last->tok)); 482 } 483 484 485 static int 486 macrowarn(struct man *m, int ln, const char *buf) 487 { 488 if ( ! (MAN_IGN_MACRO & m->pflags)) 489 return(man_verr(m, ln, 0, 490 "unknown macro: %s%s", 491 buf, strlen(buf) > 3 ? "..." : "")); 492 return(man_vwarn(m, ln, 0, "unknown macro: %s%s", 493 buf, strlen(buf) > 3 ? "..." : "")); 494 } 495 496 497 int 498 man_pmacro(struct man *m, int ln, char *buf) 499 { 500 int i, j, ppos; 501 enum mant tok; 502 char mac[5]; 503 struct man_node *n; 504 505 /* Comments and empties are quickly ignored. */ 506 507 if ('\0' == buf[1]) 508 return(1); 509 510 i = 1; 511 512 /* 513 * Skip whitespace between the control character and initial 514 * text. "Whitespace" is both spaces and tabs. 515 */ 516 if (' ' == buf[i] || '\t' == buf[i]) { 517 i++; 518 while (buf[i] && (' ' == buf[i] || '\t' == buf[i])) 519 i++; 520 if ('\0' == buf[i]) 521 goto out; 522 } 523 524 ppos = i; 525 526 /* Copy the first word into a nil-terminated buffer. */ 527 528 for (j = 0; j < 4; j++, i++) { 529 if ('\0' == (mac[j] = buf[i])) 530 break; 531 else if (' ' == buf[i]) 532 break; 533 534 /* Check for invalid characters. */ 535 536 if (isgraph((u_char)buf[i])) 537 continue; 538 return(man_perr(m, ln, i, WNPRINT)); 539 } 540 541 mac[j] = '\0'; 542 543 if (j == 4 || j < 1) { 544 if ( ! (MAN_IGN_MACRO & m->pflags)) { 545 (void)man_perr(m, ln, ppos, WMACROFORM); 546 goto err; 547 } 548 if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) 549 goto err; 550 return(1); 551 } 552 553 if (MAN_MAX == (tok = man_hash_find(mac))) { 554 if ( ! macrowarn(m, ln, mac)) 555 goto err; 556 return(1); 557 } 558 559 /* The macro is sane. Jump to the next word. */ 560 561 while (buf[i] && ' ' == buf[i]) 562 i++; 563 564 /* Trailing whitespace? */ 565 566 if ('\0' == buf[i] && ' ' == buf[i - 1]) 567 if ( ! man_pwarn(m, ln, i - 1, WTSPACE)) 568 goto err; 569 570 /* 571 * Remove prior ELINE macro, as it's being clobbering by a new 572 * macro. Note that NSCOPED macros do not close out ELINE 573 * macros---they don't print text---so we let those slip by. 574 * NOTE: we don't allow roff blocks (NOCLOSE) to be embedded 575 * here because that would stipulate blocks as children of 576 * elements! 577 */ 578 579 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 580 m->flags & MAN_ELINE) { 581 assert(MAN_TEXT != m->last->type); 582 583 /* 584 * This occurs in the following construction: 585 * .B 586 * .br 587 * .B 588 * .br 589 * I hate man macros. 590 * Flat-out disallow this madness. 591 */ 592 if (MAN_NSCOPED & man_macros[m->last->tok].flags) 593 return(man_perr(m, ln, ppos, WLNSCOPE)); 594 595 n = m->last; 596 597 assert(n); 598 assert(NULL == n->child); 599 assert(0 == n->nchild); 600 601 if ( ! man_nwarn(m, n, WLNSCOPE)) 602 return(0); 603 604 man_node_delete(m, n); 605 m->flags &= ~MAN_ELINE; 606 } 607 608 /* 609 * Save the fact that we're in the next-line for a block. In 610 * this way, embedded roff instructions can "remember" state 611 * when they exit. 612 */ 613 614 if (MAN_BLINE & m->flags) 615 m->flags |= MAN_BPLINE; 616 617 /* Call to handler... */ 618 619 assert(man_macros[tok].fp); 620 if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &i, buf)) 621 goto err; 622 623 out: 624 /* 625 * We weren't in a block-line scope when entering the 626 * above-parsed macro, so return. 627 */ 628 629 if ( ! (MAN_BPLINE & m->flags)) { 630 m->flags &= ~MAN_ILINE; 631 return(1); 632 } 633 m->flags &= ~MAN_BPLINE; 634 635 /* 636 * If we're in a block scope, then allow this macro to slip by 637 * without closing scope around it. 638 */ 639 640 if (MAN_ILINE & m->flags) { 641 m->flags &= ~MAN_ILINE; 642 return(1); 643 } 644 645 /* 646 * If we've opened a new next-line element scope, then return 647 * now, as the next line will close out the block scope. 648 */ 649 650 if (MAN_ELINE & m->flags) 651 return(1); 652 653 /* Close out the block scope opened in the prior line. */ 654 655 assert(MAN_BLINE & m->flags); 656 m->flags &= ~MAN_BLINE; 657 658 if ( ! man_unscope(m, m->last->parent, WERRMAX)) 659 return(0); 660 return(man_body_alloc(m, ln, 0, m->last->tok)); 661 662 err: /* Error out. */ 663 664 m->flags |= MAN_HALT; 665 return(0); 666 } 667 668 669 int 670 man_verr(struct man *man, int ln, int pos, const char *fmt, ...) 671 { 672 char buf[256]; 673 va_list ap; 674 675 if (NULL == man->cb.man_err) 676 return(0); 677 678 va_start(ap, fmt); 679 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 680 va_end(ap); 681 return((*man->cb.man_err)(man->data, ln, pos, buf)); 682 } 683 684 685 int 686 man_vwarn(struct man *man, int ln, int pos, const char *fmt, ...) 687 { 688 char buf[256]; 689 va_list ap; 690 691 if (NULL == man->cb.man_warn) 692 return(0); 693 694 va_start(ap, fmt); 695 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 696 va_end(ap); 697 return((*man->cb.man_warn)(man->data, ln, pos, buf)); 698 } 699 700 701 int 702 man_err(struct man *m, int line, int pos, int iserr, enum merr type) 703 { 704 const char *p; 705 706 p = __man_merrnames[(int)type]; 707 assert(p); 708 709 if (iserr) 710 return(man_verr(m, line, pos, p)); 711 712 return(man_vwarn(m, line, pos, p)); 713 } 714 715 716 /* 717 * Unlink a node from its context. If "m" is provided, the last parse 718 * point will also be adjusted accordingly. 719 */ 720 static void 721 man_node_unlink(struct man *m, struct man_node *n) 722 { 723 724 /* Adjust siblings. */ 725 726 if (n->prev) 727 n->prev->next = n->next; 728 if (n->next) 729 n->next->prev = n->prev; 730 731 /* Adjust parent. */ 732 733 if (n->parent) { 734 n->parent->nchild--; 735 if (n->parent->child == n) 736 n->parent->child = n->prev ? n->prev : n->next; 737 } 738 739 /* Adjust parse point, if applicable. */ 740 741 if (m && m->last == n) { 742 /*XXX: this can occur when bailing from validation. */ 743 /*assert(NULL == n->next);*/ 744 if (n->prev) { 745 m->last = n->prev; 746 m->next = MAN_NEXT_SIBLING; 747 } else { 748 m->last = n->parent; 749 m->next = MAN_NEXT_CHILD; 750 } 751 } 752 753 if (m && m->first == n) 754 m->first = NULL; 755 } 756