1 /* $Vendor-Id: man.c,v 1.84 2010/07/22 23:03:15 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 30 #include "mandoc.h" 31 #include "libman.h" 32 #include "libmandoc.h" 33 34 const char *const __man_macronames[MAN_MAX] = { 35 "br", "TH", "SH", "SS", 36 "TP", "LP", "PP", "P", 37 "IP", "HP", "SM", "SB", 38 "BI", "IB", "BR", "RB", 39 "R", "B", "I", "IR", 40 "RI", "na", "i", "sp", 41 "nf", "fi", "r", "RE", 42 "RS", "DT", "UC", "PD", 43 "Sp", "Vb", "Ve", "AT", 44 "in" 45 }; 46 47 const char * const *man_macronames = __man_macronames; 48 49 static struct man_node *man_node_alloc(int, int, 50 enum man_type, enum mant); 51 static int man_node_append(struct man *, 52 struct man_node *); 53 static void man_node_free(struct man_node *); 54 static void man_node_unlink(struct man *, 55 struct man_node *); 56 static int man_ptext(struct man *, int, char *, int); 57 static int man_pmacro(struct man *, int, char *, int); 58 static void man_free1(struct man *); 59 static void man_alloc1(struct man *); 60 static int macrowarn(struct man *, int, const char *, int); 61 62 63 const struct man_node * 64 man_node(const struct man *m) 65 { 66 67 return(MAN_HALT & m->flags ? NULL : m->first); 68 } 69 70 71 const struct man_meta * 72 man_meta(const struct man *m) 73 { 74 75 return(MAN_HALT & m->flags ? NULL : &m->meta); 76 } 77 78 79 void 80 man_reset(struct man *man) 81 { 82 83 man_free1(man); 84 man_alloc1(man); 85 } 86 87 88 void 89 man_free(struct man *man) 90 { 91 92 man_free1(man); 93 free(man); 94 } 95 96 97 struct man * 98 man_alloc(struct regset *regs, void *data, 99 int pflags, mandocmsg msg) 100 { 101 struct man *p; 102 103 p = mandoc_calloc(1, sizeof(struct man)); 104 105 man_hash_init(); 106 p->data = data; 107 p->pflags = pflags; 108 p->msg = msg; 109 p->regs = regs; 110 111 man_alloc1(p); 112 return(p); 113 } 114 115 116 int 117 man_endparse(struct man *m) 118 { 119 120 if (MAN_HALT & m->flags) 121 return(0); 122 else if (man_macroend(m)) 123 return(1); 124 m->flags |= MAN_HALT; 125 return(0); 126 } 127 128 129 int 130 man_parseln(struct man *m, int ln, char *buf, int offs) 131 { 132 133 if (MAN_HALT & m->flags) 134 return(0); 135 136 return(('.' == buf[offs] || '\'' == buf[offs]) ? 137 man_pmacro(m, ln, buf, offs) : 138 man_ptext(m, ln, buf, offs)); 139 } 140 141 142 static void 143 man_free1(struct man *man) 144 { 145 146 if (man->first) 147 man_node_delete(man, man->first); 148 if (man->meta.title) 149 free(man->meta.title); 150 if (man->meta.source) 151 free(man->meta.source); 152 if (man->meta.rawdate) 153 free(man->meta.rawdate); 154 if (man->meta.vol) 155 free(man->meta.vol); 156 if (man->meta.msec) 157 free(man->meta.msec); 158 } 159 160 161 static void 162 man_alloc1(struct man *m) 163 { 164 165 memset(&m->meta, 0, sizeof(struct man_meta)); 166 m->flags = 0; 167 m->last = mandoc_calloc(1, sizeof(struct man_node)); 168 m->first = m->last; 169 m->last->type = MAN_ROOT; 170 m->last->tok = MAN_MAX; 171 m->next = MAN_NEXT_CHILD; 172 } 173 174 175 static int 176 man_node_append(struct man *man, struct man_node *p) 177 { 178 179 assert(man->last); 180 assert(man->first); 181 assert(MAN_ROOT != p->type); 182 183 switch (man->next) { 184 case (MAN_NEXT_SIBLING): 185 man->last->next = p; 186 p->prev = man->last; 187 p->parent = man->last->parent; 188 break; 189 case (MAN_NEXT_CHILD): 190 man->last->child = p; 191 p->parent = man->last; 192 break; 193 default: 194 abort(); 195 /* NOTREACHED */ 196 } 197 198 assert(p->parent); 199 p->parent->nchild++; 200 201 if ( ! man_valid_pre(man, p)) 202 return(0); 203 204 switch (p->type) { 205 case (MAN_HEAD): 206 assert(MAN_BLOCK == p->parent->type); 207 p->parent->head = p; 208 break; 209 case (MAN_BODY): 210 assert(MAN_BLOCK == p->parent->type); 211 p->parent->body = p; 212 break; 213 default: 214 break; 215 } 216 217 man->last = p; 218 219 switch (p->type) { 220 case (MAN_TEXT): 221 if ( ! man_valid_post(man)) 222 return(0); 223 if ( ! man_action_post(man)) 224 return(0); 225 break; 226 default: 227 break; 228 } 229 230 return(1); 231 } 232 233 234 static struct man_node * 235 man_node_alloc(int line, int pos, enum man_type type, enum mant tok) 236 { 237 struct man_node *p; 238 239 p = mandoc_calloc(1, sizeof(struct man_node)); 240 p->line = line; 241 p->pos = pos; 242 p->type = type; 243 p->tok = tok; 244 return(p); 245 } 246 247 248 int 249 man_elem_alloc(struct man *m, int line, int pos, enum mant tok) 250 { 251 struct man_node *p; 252 253 p = man_node_alloc(line, pos, MAN_ELEM, tok); 254 if ( ! man_node_append(m, p)) 255 return(0); 256 m->next = MAN_NEXT_CHILD; 257 return(1); 258 } 259 260 261 int 262 man_head_alloc(struct man *m, int line, int pos, enum mant tok) 263 { 264 struct man_node *p; 265 266 p = man_node_alloc(line, pos, MAN_HEAD, tok); 267 if ( ! man_node_append(m, p)) 268 return(0); 269 m->next = MAN_NEXT_CHILD; 270 return(1); 271 } 272 273 274 int 275 man_body_alloc(struct man *m, int line, int pos, enum mant tok) 276 { 277 struct man_node *p; 278 279 p = man_node_alloc(line, pos, MAN_BODY, tok); 280 if ( ! man_node_append(m, p)) 281 return(0); 282 m->next = MAN_NEXT_CHILD; 283 return(1); 284 } 285 286 287 int 288 man_block_alloc(struct man *m, int line, int pos, enum mant tok) 289 { 290 struct man_node *p; 291 292 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 293 if ( ! man_node_append(m, p)) 294 return(0); 295 m->next = MAN_NEXT_CHILD; 296 return(1); 297 } 298 299 300 int 301 man_word_alloc(struct man *m, int line, int pos, const char *word) 302 { 303 struct man_node *n; 304 size_t sv, len; 305 306 len = strlen(word); 307 308 n = man_node_alloc(line, pos, MAN_TEXT, MAN_MAX); 309 n->string = mandoc_malloc(len + 1); 310 sv = strlcpy(n->string, word, len + 1); 311 312 /* Prohibit truncation. */ 313 assert(sv < len + 1); 314 315 if ( ! man_node_append(m, n)) 316 return(0); 317 318 m->next = MAN_NEXT_SIBLING; 319 return(1); 320 } 321 322 323 /* 324 * Free all of the resources held by a node. This does NOT unlink a 325 * node from its context; for that, see man_node_unlink(). 326 */ 327 static void 328 man_node_free(struct man_node *p) 329 { 330 331 if (p->string) 332 free(p->string); 333 free(p); 334 } 335 336 337 void 338 man_node_delete(struct man *m, struct man_node *p) 339 { 340 341 while (p->child) 342 man_node_delete(m, p->child); 343 344 man_node_unlink(m, p); 345 man_node_free(p); 346 } 347 348 349 static int 350 man_ptext(struct man *m, int line, char *buf, int offs) 351 { 352 int i; 353 354 /* Ignore bogus comments. */ 355 356 if ('\\' == buf[offs] && 357 '.' == buf[offs + 1] && 358 '"' == buf[offs + 2]) 359 return(man_pmsg(m, line, offs, MANDOCERR_BADCOMMENT)); 360 361 /* Literal free-form text whitespace is preserved. */ 362 363 if (MAN_LITERAL & m->flags) { 364 if ( ! man_word_alloc(m, line, offs, buf + offs)) 365 return(0); 366 goto descope; 367 } 368 369 /* Pump blank lines directly into the backend. */ 370 371 for (i = offs; ' ' == buf[i]; i++) 372 /* Skip leading whitespace. */ ; 373 374 if ('\0' == buf[i]) { 375 /* Allocate a blank entry. */ 376 if ( ! man_word_alloc(m, line, offs, "")) 377 return(0); 378 goto descope; 379 } 380 381 /* 382 * Warn if the last un-escaped character is whitespace. Then 383 * strip away the remaining spaces (tabs stay!). 384 */ 385 386 i = (int)strlen(buf); 387 assert(i); 388 389 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 390 if (i > 1 && '\\' != buf[i - 2]) 391 if ( ! man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE)) 392 return(0); 393 394 for (--i; i && ' ' == buf[i]; i--) 395 /* Spin back to non-space. */ ; 396 397 /* Jump ahead of escaped whitespace. */ 398 i += '\\' == buf[i] ? 2 : 1; 399 400 buf[i] = '\0'; 401 } 402 403 if ( ! man_word_alloc(m, line, offs, buf + offs)) 404 return(0); 405 406 /* 407 * End-of-sentence check. If the last character is an unescaped 408 * EOS character, then flag the node as being the end of a 409 * sentence. The front-end will know how to interpret this. 410 */ 411 412 assert(i); 413 if (mandoc_eos(buf, (size_t)i, 0)) 414 m->last->flags |= MAN_EOS; 415 416 descope: 417 /* 418 * Co-ordinate what happens with having a next-line scope open: 419 * first close out the element scope (if applicable), then close 420 * out the block scope (also if applicable). 421 */ 422 423 if (MAN_ELINE & m->flags) { 424 m->flags &= ~MAN_ELINE; 425 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 426 return(0); 427 } 428 429 if ( ! (MAN_BLINE & m->flags)) 430 return(1); 431 m->flags &= ~MAN_BLINE; 432 433 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 434 return(0); 435 return(man_body_alloc(m, line, offs, m->last->tok)); 436 } 437 438 439 static int 440 macrowarn(struct man *m, int ln, const char *buf, int offs) 441 { 442 int rc; 443 444 rc = man_vmsg(m, MANDOCERR_MACRO, ln, offs, 445 "unknown macro: %s%s", 446 buf, strlen(buf) > 3 ? "..." : ""); 447 448 return(MAN_IGN_MACRO & m->pflags ? rc : 0); 449 } 450 451 452 int 453 man_pmacro(struct man *m, int ln, char *buf, int offs) 454 { 455 int i, j, ppos; 456 enum mant tok; 457 char mac[5]; 458 struct man_node *n; 459 460 /* Comments and empties are quickly ignored. */ 461 462 offs++; 463 464 if ('\0' == buf[offs]) 465 return(1); 466 467 i = offs; 468 469 /* 470 * Skip whitespace between the control character and initial 471 * text. "Whitespace" is both spaces and tabs. 472 */ 473 474 if (' ' == buf[i] || '\t' == buf[i]) { 475 i++; 476 while (buf[i] && (' ' == buf[i] || '\t' == buf[i])) 477 i++; 478 if ('\0' == buf[i]) 479 goto out; 480 } 481 482 ppos = i; 483 484 /* Copy the first word into a nil-terminated buffer. */ 485 486 for (j = 0; j < 4; j++, i++) { 487 if ('\0' == (mac[j] = buf[i])) 488 break; 489 else if (' ' == buf[i]) 490 break; 491 492 /* Check for invalid characters. */ 493 494 if (isgraph((u_char)buf[i])) 495 continue; 496 if ( ! man_pmsg(m, ln, i, MANDOCERR_BADCHAR)) 497 return(0); 498 i--; 499 } 500 501 mac[j] = '\0'; 502 503 if (j == 4 || j < 1) { 504 if ( ! macrowarn(m, ln, mac, ppos)) 505 goto err; 506 return(1); 507 } 508 509 if (MAN_MAX == (tok = man_hash_find(mac))) { 510 if ( ! macrowarn(m, ln, mac, ppos)) 511 goto err; 512 return(1); 513 } 514 515 /* The macro is sane. Jump to the next word. */ 516 517 while (buf[i] && ' ' == buf[i]) 518 i++; 519 520 /* 521 * Trailing whitespace. Note that tabs are allowed to be passed 522 * into the parser as "text", so we only warn about spaces here. 523 */ 524 525 if ('\0' == buf[i] && ' ' == buf[i - 1]) 526 if ( ! man_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE)) 527 goto err; 528 529 /* 530 * Remove prior ELINE macro, as it's being clobbering by a new 531 * macro. Note that NSCOPED macros do not close out ELINE 532 * macros---they don't print text---so we let those slip by. 533 */ 534 535 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 536 m->flags & MAN_ELINE) { 537 assert(MAN_TEXT != m->last->type); 538 539 /* 540 * This occurs in the following construction: 541 * .B 542 * .br 543 * .B 544 * .br 545 * I hate man macros. 546 * Flat-out disallow this madness. 547 */ 548 if (MAN_NSCOPED & man_macros[m->last->tok].flags) { 549 man_pmsg(m, ln, ppos, MANDOCERR_SYNTLINESCOPE); 550 return(0); 551 } 552 553 n = m->last; 554 555 assert(n); 556 assert(NULL == n->child); 557 assert(0 == n->nchild); 558 559 if ( ! man_nmsg(m, n, MANDOCERR_LINESCOPE)) 560 return(0); 561 562 man_node_delete(m, n); 563 m->flags &= ~MAN_ELINE; 564 } 565 566 /* 567 * Save the fact that we're in the next-line for a block. In 568 * this way, embedded roff instructions can "remember" state 569 * when they exit. 570 */ 571 572 if (MAN_BLINE & m->flags) 573 m->flags |= MAN_BPLINE; 574 575 /* Call to handler... */ 576 577 assert(man_macros[tok].fp); 578 if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &i, buf)) 579 goto err; 580 581 out: 582 /* 583 * We weren't in a block-line scope when entering the 584 * above-parsed macro, so return. 585 */ 586 587 if ( ! (MAN_BPLINE & m->flags)) { 588 m->flags &= ~MAN_ILINE; 589 return(1); 590 } 591 m->flags &= ~MAN_BPLINE; 592 593 /* 594 * If we're in a block scope, then allow this macro to slip by 595 * without closing scope around it. 596 */ 597 598 if (MAN_ILINE & m->flags) { 599 m->flags &= ~MAN_ILINE; 600 return(1); 601 } 602 603 /* 604 * If we've opened a new next-line element scope, then return 605 * now, as the next line will close out the block scope. 606 */ 607 608 if (MAN_ELINE & m->flags) 609 return(1); 610 611 /* Close out the block scope opened in the prior line. */ 612 613 assert(MAN_BLINE & m->flags); 614 m->flags &= ~MAN_BLINE; 615 616 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 617 return(0); 618 return(man_body_alloc(m, ln, offs, m->last->tok)); 619 620 err: /* Error out. */ 621 622 m->flags |= MAN_HALT; 623 return(0); 624 } 625 626 627 int 628 man_vmsg(struct man *man, enum mandocerr t, 629 int ln, int pos, const char *fmt, ...) 630 { 631 char buf[256]; 632 va_list ap; 633 634 va_start(ap, fmt); 635 vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 636 va_end(ap); 637 return((*man->msg)(t, man->data, ln, pos, buf)); 638 } 639 640 641 /* 642 * Unlink a node from its context. If "m" is provided, the last parse 643 * point will also be adjusted accordingly. 644 */ 645 static void 646 man_node_unlink(struct man *m, struct man_node *n) 647 { 648 649 /* Adjust siblings. */ 650 651 if (n->prev) 652 n->prev->next = n->next; 653 if (n->next) 654 n->next->prev = n->prev; 655 656 /* Adjust parent. */ 657 658 if (n->parent) { 659 n->parent->nchild--; 660 if (n->parent->child == n) 661 n->parent->child = n->prev ? n->prev : n->next; 662 } 663 664 /* Adjust parse point, if applicable. */ 665 666 if (m && m->last == n) { 667 /*XXX: this can occur when bailing from validation. */ 668 /*assert(NULL == n->next);*/ 669 if (n->prev) { 670 m->last = n->prev; 671 m->next = MAN_NEXT_SIBLING; 672 } else { 673 m->last = n->parent; 674 m->next = MAN_NEXT_CHILD; 675 } 676 } 677 678 if (m && m->first == n) 679 m->first = NULL; 680 } 681