1 /* $Vendor-Id: man.c,v 1.59 2010/03/29 10:10:35 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 30 #include "libman.h" 31 #include "libmandoc.h" 32 33 const char *const __man_merrnames[WERRMAX] = { 34 "invalid character", /* WNPRINT */ 35 "invalid manual section", /* WMSEC */ 36 "invalid date format", /* WDATE */ 37 "scope of prior line violated", /* WLNSCOPE */ 38 "over-zealous prior line scope violation", /* WLNSCOPE2 */ 39 "trailing whitespace", /* WTSPACE */ 40 "unterminated quoted parameter", /* WTQUOTE */ 41 "document has no body", /* WNODATA */ 42 "document has no title/section", /* WNOTITLE */ 43 "invalid escape sequence", /* WESCAPE */ 44 "invalid number format", /* WNUMFMT */ 45 "expected block head arguments", /* WHEADARGS */ 46 "expected block body arguments", /* WBODYARGS */ 47 "expected empty block head", /* WNHEADARGS */ 48 "ill-formed macro", /* WMACROFORM */ 49 "scope open on exit", /* WEXITSCOPE */ 50 "no scope context", /* WNOSCOPE */ 51 "literal context already open", /* WOLITERAL */ 52 "no literal context open", /* WNLITERAL */ 53 "invalid nesting of roff declarations", /* WROFFNEST */ 54 "scope in roff instructions broken", /* WROFFSCOPE */ 55 "document title should be uppercase", /* WTITLECASE */ 56 }; 57 58 const char *const __man_macronames[MAN_MAX] = { 59 "br", "TH", "SH", "SS", 60 "TP", "LP", "PP", "P", 61 "IP", "HP", "SM", "SB", 62 "BI", "IB", "BR", "RB", 63 "R", "B", "I", "IR", 64 "RI", "na", "i", "sp", 65 "nf", "fi", "r", "RE", 66 "RS", "DT", "UC", "PD", 67 "Sp", "Vb", "Ve", "de", 68 "dei", "am", "ami", "ig", 69 ".", 70 }; 71 72 const char * const *man_macronames = __man_macronames; 73 74 static struct man_node *man_node_alloc(int, int, 75 enum man_type, enum mant); 76 static int man_node_append(struct man *, 77 struct man_node *); 78 static void man_node_free(struct man_node *); 79 static void man_node_unlink(struct man *, 80 struct man_node *); 81 static int man_ptext(struct man *, int, char *); 82 static int man_pmacro(struct man *, int, char *); 83 static void man_free1(struct man *); 84 static void man_alloc1(struct man *); 85 static int pstring(struct man *, int, int, 86 const char *, size_t); 87 static int macrowarn(struct man *, int, const char *); 88 89 90 const struct man_node * 91 man_node(const struct man *m) 92 { 93 94 return(MAN_HALT & m->flags ? NULL : m->first); 95 } 96 97 98 const struct man_meta * 99 man_meta(const struct man *m) 100 { 101 102 return(MAN_HALT & m->flags ? NULL : &m->meta); 103 } 104 105 106 void 107 man_reset(struct man *man) 108 { 109 110 man_free1(man); 111 man_alloc1(man); 112 } 113 114 115 void 116 man_free(struct man *man) 117 { 118 119 man_free1(man); 120 free(man); 121 } 122 123 124 struct man * 125 man_alloc(void *data, int pflags, const struct man_cb *cb) 126 { 127 struct man *p; 128 129 p = mandoc_calloc(1, sizeof(struct man)); 130 131 if (cb) 132 memcpy(&p->cb, cb, sizeof(struct man_cb)); 133 134 man_hash_init(); 135 p->data = data; 136 p->pflags = pflags; 137 138 man_alloc1(p); 139 return(p); 140 } 141 142 143 int 144 man_endparse(struct man *m) 145 { 146 147 if (MAN_HALT & m->flags) 148 return(0); 149 else if (man_macroend(m)) 150 return(1); 151 m->flags |= MAN_HALT; 152 return(0); 153 } 154 155 156 int 157 man_parseln(struct man *m, int ln, char *buf) 158 { 159 160 return('.' == *buf || '\'' == *buf ? 161 man_pmacro(m, ln, buf) : 162 man_ptext(m, ln, buf)); 163 } 164 165 166 static void 167 man_free1(struct man *man) 168 { 169 170 if (man->first) 171 man_node_delete(man, man->first); 172 if (man->meta.title) 173 free(man->meta.title); 174 if (man->meta.source) 175 free(man->meta.source); 176 if (man->meta.vol) 177 free(man->meta.vol); 178 } 179 180 181 static void 182 man_alloc1(struct man *m) 183 { 184 185 memset(&m->meta, 0, sizeof(struct man_meta)); 186 m->flags = 0; 187 m->last = mandoc_calloc(1, sizeof(struct man_node)); 188 m->first = m->last; 189 m->last->type = MAN_ROOT; 190 m->last->tok = MAN_MAX; 191 m->next = MAN_NEXT_CHILD; 192 } 193 194 195 static int 196 man_node_append(struct man *man, struct man_node *p) 197 { 198 199 assert(man->last); 200 assert(man->first); 201 assert(MAN_ROOT != p->type); 202 203 switch (man->next) { 204 case (MAN_NEXT_SIBLING): 205 man->last->next = p; 206 p->prev = man->last; 207 p->parent = man->last->parent; 208 break; 209 case (MAN_NEXT_CHILD): 210 man->last->child = p; 211 p->parent = man->last; 212 break; 213 default: 214 abort(); 215 /* NOTREACHED */ 216 } 217 218 assert(p->parent); 219 p->parent->nchild++; 220 221 if ( ! man_valid_pre(man, p)) 222 return(0); 223 224 switch (p->type) { 225 case (MAN_HEAD): 226 assert(MAN_BLOCK == p->parent->type); 227 p->parent->head = p; 228 break; 229 case (MAN_BODY): 230 assert(MAN_BLOCK == p->parent->type); 231 p->parent->body = p; 232 break; 233 default: 234 break; 235 } 236 237 man->last = p; 238 239 switch (p->type) { 240 case (MAN_TEXT): 241 if ( ! man_valid_post(man)) 242 return(0); 243 if ( ! man_action_post(man)) 244 return(0); 245 break; 246 default: 247 break; 248 } 249 250 return(1); 251 } 252 253 254 static struct man_node * 255 man_node_alloc(int line, int pos, enum man_type type, enum mant tok) 256 { 257 struct man_node *p; 258 259 p = mandoc_calloc(1, sizeof(struct man_node)); 260 p->line = line; 261 p->pos = pos; 262 p->type = type; 263 p->tok = tok; 264 return(p); 265 } 266 267 268 int 269 man_elem_alloc(struct man *m, int line, int pos, enum mant tok) 270 { 271 struct man_node *p; 272 273 p = man_node_alloc(line, pos, MAN_ELEM, tok); 274 if ( ! man_node_append(m, p)) 275 return(0); 276 m->next = MAN_NEXT_CHILD; 277 return(1); 278 } 279 280 281 int 282 man_head_alloc(struct man *m, int line, int pos, enum mant tok) 283 { 284 struct man_node *p; 285 286 p = man_node_alloc(line, pos, MAN_HEAD, tok); 287 if ( ! man_node_append(m, p)) 288 return(0); 289 m->next = MAN_NEXT_CHILD; 290 return(1); 291 } 292 293 294 int 295 man_body_alloc(struct man *m, int line, int pos, enum mant tok) 296 { 297 struct man_node *p; 298 299 p = man_node_alloc(line, pos, MAN_BODY, tok); 300 if ( ! man_node_append(m, p)) 301 return(0); 302 m->next = MAN_NEXT_CHILD; 303 return(1); 304 } 305 306 307 int 308 man_block_alloc(struct man *m, int line, int pos, enum mant tok) 309 { 310 struct man_node *p; 311 312 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 313 if ( ! man_node_append(m, p)) 314 return(0); 315 m->next = MAN_NEXT_CHILD; 316 return(1); 317 } 318 319 320 static int 321 pstring(struct man *m, int line, int pos, 322 const char *p, size_t len) 323 { 324 struct man_node *n; 325 size_t sv; 326 327 n = man_node_alloc(line, pos, MAN_TEXT, MAN_MAX); 328 n->string = mandoc_malloc(len + 1); 329 sv = strlcpy(n->string, p, len + 1); 330 331 /* Prohibit truncation. */ 332 assert(sv < len + 1); 333 334 if ( ! man_node_append(m, n)) 335 return(0); 336 m->next = MAN_NEXT_SIBLING; 337 return(1); 338 } 339 340 341 int 342 man_word_alloc(struct man *m, int line, int pos, const char *word) 343 { 344 345 return(pstring(m, line, pos, word, strlen(word))); 346 } 347 348 349 /* 350 * Free all of the resources held by a node. This does NOT unlink a 351 * node from its context; for that, see man_node_unlink(). 352 */ 353 static void 354 man_node_free(struct man_node *p) 355 { 356 357 if (p->string) 358 free(p->string); 359 free(p); 360 } 361 362 363 void 364 man_node_delete(struct man *m, struct man_node *p) 365 { 366 367 while (p->child) 368 man_node_delete(m, p->child); 369 370 man_node_unlink(m, p); 371 man_node_free(p); 372 } 373 374 375 static int 376 man_ptext(struct man *m, int line, char *buf) 377 { 378 int i, j; 379 char sv; 380 381 /* Literal free-form text whitespace is preserved. */ 382 383 if (MAN_LITERAL & m->flags) { 384 if ( ! man_word_alloc(m, line, 0, buf)) 385 return(0); 386 goto descope; 387 } 388 389 /* First de-chunk and allocate words. */ 390 391 for (i = 0; ' ' == buf[i]; i++) 392 /* Skip leading whitespace. */ ; 393 394 if ('\0' == buf[i]) { 395 /* Trailing whitespace? */ 396 if (i && ' ' == buf[i - 1]) 397 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 398 return(0); 399 if ( ! pstring(m, line, 0, &buf[i], 0)) 400 return(0); 401 goto descope; 402 } 403 404 for (j = i; buf[i]; i++) { 405 if (' ' != buf[i]) 406 continue; 407 408 /* Escaped whitespace. */ 409 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 410 continue; 411 412 sv = buf[i]; 413 buf[i++] = '\0'; 414 415 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 416 return(0); 417 418 /* Trailing whitespace? Check at overwritten byte. */ 419 420 if (' ' == sv && '\0' == buf[i]) 421 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 422 return(0); 423 424 for ( ; ' ' == buf[i]; i++) 425 /* Skip trailing whitespace. */ ; 426 427 j = i; 428 429 /* Trailing whitespace? */ 430 431 if (' ' == buf[i - 1] && '\0' == buf[i]) 432 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 433 return(0); 434 435 if ('\0' == buf[i]) 436 break; 437 } 438 439 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 440 return(0); 441 442 descope: 443 444 /* 445 * Co-ordinate what happens with having a next-line scope open: 446 * first close out the element scope (if applicable), then close 447 * out the block scope (also if applicable). 448 */ 449 450 if (MAN_ELINE & m->flags) { 451 m->flags &= ~MAN_ELINE; 452 if ( ! man_unscope(m, m->last->parent, WERRMAX)) 453 return(0); 454 } 455 456 if ( ! (MAN_BLINE & m->flags)) 457 return(1); 458 m->flags &= ~MAN_BLINE; 459 460 if ( ! man_unscope(m, m->last->parent, WERRMAX)) 461 return(0); 462 return(man_body_alloc(m, line, 0, m->last->tok)); 463 } 464 465 466 static int 467 macrowarn(struct man *m, int ln, const char *buf) 468 { 469 if ( ! (MAN_IGN_MACRO & m->pflags)) 470 return(man_verr(m, ln, 0, 471 "unknown macro: %s%s", 472 buf, strlen(buf) > 3 ? "..." : "")); 473 return(man_vwarn(m, ln, 0, "unknown macro: %s%s", 474 buf, strlen(buf) > 3 ? "..." : "")); 475 } 476 477 478 int 479 man_pmacro(struct man *m, int ln, char *buf) 480 { 481 int i, j, ppos; 482 enum mant tok; 483 char mac[5]; 484 struct man_node *n; 485 486 /* Comments and empties are quickly ignored. */ 487 488 if ('\0' == buf[1]) 489 return(1); 490 491 i = 1; 492 493 /* 494 * Skip whitespace between the control character and initial 495 * text. "Whitespace" is both spaces and tabs. 496 */ 497 if (' ' == buf[i] || '\t' == buf[i]) { 498 i++; 499 while (buf[i] && (' ' == buf[i] || '\t' == buf[i])) 500 i++; 501 if ('\0' == buf[i]) 502 goto out; 503 } 504 505 ppos = i; 506 507 /* Copy the first word into a nil-terminated buffer. */ 508 509 for (j = 0; j < 4; j++, i++) { 510 if ('\0' == (mac[j] = buf[i])) 511 break; 512 else if (' ' == buf[i]) 513 break; 514 515 /* Check for invalid characters. */ 516 517 if (isgraph((u_char)buf[i])) 518 continue; 519 return(man_perr(m, ln, i, WNPRINT)); 520 } 521 522 mac[j] = '\0'; 523 524 if (j == 4 || j < 1) { 525 if ( ! (MAN_IGN_MACRO & m->pflags)) { 526 (void)man_perr(m, ln, ppos, WMACROFORM); 527 goto err; 528 } 529 if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) 530 goto err; 531 return(1); 532 } 533 534 if (MAN_MAX == (tok = man_hash_find(mac))) { 535 if ( ! macrowarn(m, ln, mac)) 536 goto err; 537 return(1); 538 } 539 540 /* The macro is sane. Jump to the next word. */ 541 542 while (buf[i] && ' ' == buf[i]) 543 i++; 544 545 /* Trailing whitespace? */ 546 547 if ('\0' == buf[i] && ' ' == buf[i - 1]) 548 if ( ! man_pwarn(m, ln, i - 1, WTSPACE)) 549 goto err; 550 551 /* 552 * Remove prior ELINE macro, as it's being clobbering by a new 553 * macro. Note that NSCOPED macros do not close out ELINE 554 * macros---they don't print text---so we let those slip by. 555 * NOTE: we don't allow roff blocks (NOCLOSE) to be embedded 556 * here because that would stipulate blocks as children of 557 * elements! 558 */ 559 560 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 561 m->flags & MAN_ELINE) { 562 assert(MAN_TEXT != m->last->type); 563 564 /* 565 * This occurs in the following construction: 566 * .B 567 * .br 568 * .B 569 * .br 570 * I hate man macros. 571 * Flat-out disallow this madness. 572 */ 573 if (MAN_NSCOPED & man_macros[m->last->tok].flags) 574 return(man_perr(m, ln, ppos, WLNSCOPE)); 575 576 n = m->last; 577 578 assert(n); 579 assert(NULL == n->child); 580 assert(0 == n->nchild); 581 582 if ( ! man_nwarn(m, n, WLNSCOPE)) 583 return(0); 584 585 man_node_delete(m, n); 586 m->flags &= ~MAN_ELINE; 587 } 588 589 /* 590 * Save the fact that we're in the next-line for a block. In 591 * this way, embedded roff instructions can "remember" state 592 * when they exit. 593 */ 594 595 if (MAN_BLINE & m->flags) 596 m->flags |= MAN_BPLINE; 597 598 /* Call to handler... */ 599 600 assert(man_macros[tok].fp); 601 if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &i, buf)) 602 goto err; 603 604 out: 605 /* 606 * We weren't in a block-line scope when entering the 607 * above-parsed macro, so return. 608 */ 609 610 if ( ! (MAN_BPLINE & m->flags)) { 611 m->flags &= ~MAN_ILINE; 612 return(1); 613 } 614 m->flags &= ~MAN_BPLINE; 615 616 /* 617 * If we're in a block scope, then allow this macro to slip by 618 * without closing scope around it. 619 */ 620 621 if (MAN_ILINE & m->flags) { 622 m->flags &= ~MAN_ILINE; 623 return(1); 624 } 625 626 /* 627 * If we've opened a new next-line element scope, then return 628 * now, as the next line will close out the block scope. 629 */ 630 631 if (MAN_ELINE & m->flags) 632 return(1); 633 634 /* Close out the block scope opened in the prior line. */ 635 636 assert(MAN_BLINE & m->flags); 637 m->flags &= ~MAN_BLINE; 638 639 if ( ! man_unscope(m, m->last->parent, WERRMAX)) 640 return(0); 641 return(man_body_alloc(m, ln, 0, m->last->tok)); 642 643 err: /* Error out. */ 644 645 m->flags |= MAN_HALT; 646 return(0); 647 } 648 649 650 int 651 man_verr(struct man *man, int ln, int pos, const char *fmt, ...) 652 { 653 char buf[256]; 654 va_list ap; 655 656 if (NULL == man->cb.man_err) 657 return(0); 658 659 va_start(ap, fmt); 660 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 661 va_end(ap); 662 return((*man->cb.man_err)(man->data, ln, pos, buf)); 663 } 664 665 666 int 667 man_vwarn(struct man *man, int ln, int pos, const char *fmt, ...) 668 { 669 char buf[256]; 670 va_list ap; 671 672 if (NULL == man->cb.man_warn) 673 return(0); 674 675 va_start(ap, fmt); 676 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 677 va_end(ap); 678 return((*man->cb.man_warn)(man->data, ln, pos, buf)); 679 } 680 681 682 int 683 man_err(struct man *m, int line, int pos, int iserr, enum merr type) 684 { 685 const char *p; 686 687 p = __man_merrnames[(int)type]; 688 assert(p); 689 690 if (iserr) 691 return(man_verr(m, line, pos, p)); 692 693 return(man_vwarn(m, line, pos, p)); 694 } 695 696 697 /* 698 * Unlink a node from its context. If "m" is provided, the last parse 699 * point will also be adjusted accordingly. 700 */ 701 static void 702 man_node_unlink(struct man *m, struct man_node *n) 703 { 704 705 /* Adjust siblings. */ 706 707 if (n->prev) 708 n->prev->next = n->next; 709 if (n->next) 710 n->next->prev = n->prev; 711 712 /* Adjust parent. */ 713 714 if (n->parent) { 715 n->parent->nchild--; 716 if (n->parent->child == n) 717 n->parent->child = n->prev ? n->prev : n->next; 718 } 719 720 /* Adjust parse point, if applicable. */ 721 722 if (m && m->last == n) { 723 /*XXX: this can occur when bailing from validation. */ 724 /*assert(NULL == n->next);*/ 725 if (n->prev) { 726 m->last = n->prev; 727 m->next = MAN_NEXT_SIBLING; 728 } else { 729 m->last = n->parent; 730 m->next = MAN_NEXT_CHILD; 731 } 732 } 733 734 if (m && m->first == n) 735 m->first = NULL; 736 } 737