1 /* $Id: man.c,v 1.96 2011/01/03 11:31:26 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <stdio.h> 27 #include <string.h> 28 29 #include "mandoc.h" 30 #include "libman.h" 31 #include "libmandoc.h" 32 33 const char *const __man_macronames[MAN_MAX] = { 34 "br", "TH", "SH", "SS", 35 "TP", "LP", "PP", "P", 36 "IP", "HP", "SM", "SB", 37 "BI", "IB", "BR", "RB", 38 "R", "B", "I", "IR", 39 "RI", "na", "sp", "nf", 40 "fi", "RE", "RS", "DT", 41 "UC", "PD", "AT", "in", 42 "ft" 43 }; 44 45 const char * const *man_macronames = __man_macronames; 46 47 static struct man_node *man_node_alloc(int, int, 48 enum man_type, enum mant); 49 static int man_node_append(struct man *, 50 struct man_node *); 51 static int man_span_alloc(struct man *, 52 const struct tbl_span *); 53 static void man_node_free(struct man_node *); 54 static void man_node_unlink(struct man *, 55 struct man_node *); 56 static int man_ptext(struct man *, int, char *, int); 57 static int man_pmacro(struct man *, int, char *, int); 58 static void man_free1(struct man *); 59 static void man_alloc1(struct man *); 60 static int man_descope(struct man *, int, int); 61 62 63 const struct man_node * 64 man_node(const struct man *m) 65 { 66 67 assert( ! (MAN_HALT & m->flags)); 68 return(m->first); 69 } 70 71 72 const struct man_meta * 73 man_meta(const struct man *m) 74 { 75 76 assert( ! (MAN_HALT & m->flags)); 77 return(&m->meta); 78 } 79 80 81 void 82 man_reset(struct man *man) 83 { 84 85 man_free1(man); 86 man_alloc1(man); 87 } 88 89 90 void 91 man_free(struct man *man) 92 { 93 94 man_free1(man); 95 free(man); 96 } 97 98 99 struct man * 100 man_alloc(struct regset *regs, void *data, mandocmsg msg) 101 { 102 struct man *p; 103 104 p = mandoc_calloc(1, sizeof(struct man)); 105 106 man_hash_init(); 107 p->data = data; 108 p->msg = msg; 109 p->regs = regs; 110 111 man_alloc1(p); 112 return(p); 113 } 114 115 116 int 117 man_endparse(struct man *m) 118 { 119 120 assert( ! (MAN_HALT & m->flags)); 121 if (man_macroend(m)) 122 return(1); 123 m->flags |= MAN_HALT; 124 return(0); 125 } 126 127 128 int 129 man_parseln(struct man *m, int ln, char *buf, int offs) 130 { 131 132 assert( ! (MAN_HALT & m->flags)); 133 return(('.' == buf[offs] || '\'' == buf[offs]) ? 134 man_pmacro(m, ln, buf, offs) : 135 man_ptext(m, ln, buf, offs)); 136 } 137 138 139 static void 140 man_free1(struct man *man) 141 { 142 143 if (man->first) 144 man_node_delete(man, man->first); 145 if (man->meta.title) 146 free(man->meta.title); 147 if (man->meta.source) 148 free(man->meta.source); 149 if (man->meta.rawdate) 150 free(man->meta.rawdate); 151 if (man->meta.vol) 152 free(man->meta.vol); 153 if (man->meta.msec) 154 free(man->meta.msec); 155 } 156 157 158 static void 159 man_alloc1(struct man *m) 160 { 161 162 memset(&m->meta, 0, sizeof(struct man_meta)); 163 m->flags = 0; 164 m->last = mandoc_calloc(1, sizeof(struct man_node)); 165 m->first = m->last; 166 m->last->type = MAN_ROOT; 167 m->last->tok = MAN_MAX; 168 m->next = MAN_NEXT_CHILD; 169 } 170 171 172 static int 173 man_node_append(struct man *man, struct man_node *p) 174 { 175 176 assert(man->last); 177 assert(man->first); 178 assert(MAN_ROOT != p->type); 179 180 switch (man->next) { 181 case (MAN_NEXT_SIBLING): 182 man->last->next = p; 183 p->prev = man->last; 184 p->parent = man->last->parent; 185 break; 186 case (MAN_NEXT_CHILD): 187 man->last->child = p; 188 p->parent = man->last; 189 break; 190 default: 191 abort(); 192 /* NOTREACHED */ 193 } 194 195 assert(p->parent); 196 p->parent->nchild++; 197 198 if ( ! man_valid_pre(man, p)) 199 return(0); 200 201 switch (p->type) { 202 case (MAN_HEAD): 203 assert(MAN_BLOCK == p->parent->type); 204 p->parent->head = p; 205 break; 206 case (MAN_BODY): 207 assert(MAN_BLOCK == p->parent->type); 208 p->parent->body = p; 209 break; 210 default: 211 break; 212 } 213 214 man->last = p; 215 216 switch (p->type) { 217 case (MAN_TBL): 218 /* FALLTHROUGH */ 219 case (MAN_TEXT): 220 if ( ! man_valid_post(man)) 221 return(0); 222 break; 223 default: 224 break; 225 } 226 227 return(1); 228 } 229 230 231 static struct man_node * 232 man_node_alloc(int line, int pos, enum man_type type, enum mant tok) 233 { 234 struct man_node *p; 235 236 p = mandoc_calloc(1, sizeof(struct man_node)); 237 p->line = line; 238 p->pos = pos; 239 p->type = type; 240 p->tok = tok; 241 return(p); 242 } 243 244 245 int 246 man_elem_alloc(struct man *m, int line, int pos, enum mant tok) 247 { 248 struct man_node *p; 249 250 p = man_node_alloc(line, pos, MAN_ELEM, tok); 251 if ( ! man_node_append(m, p)) 252 return(0); 253 m->next = MAN_NEXT_CHILD; 254 return(1); 255 } 256 257 258 int 259 man_head_alloc(struct man *m, int line, int pos, enum mant tok) 260 { 261 struct man_node *p; 262 263 p = man_node_alloc(line, pos, MAN_HEAD, tok); 264 if ( ! man_node_append(m, p)) 265 return(0); 266 m->next = MAN_NEXT_CHILD; 267 return(1); 268 } 269 270 271 int 272 man_body_alloc(struct man *m, int line, int pos, enum mant tok) 273 { 274 struct man_node *p; 275 276 p = man_node_alloc(line, pos, MAN_BODY, tok); 277 if ( ! man_node_append(m, p)) 278 return(0); 279 m->next = MAN_NEXT_CHILD; 280 return(1); 281 } 282 283 284 int 285 man_block_alloc(struct man *m, int line, int pos, enum mant tok) 286 { 287 struct man_node *p; 288 289 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 290 if ( ! man_node_append(m, p)) 291 return(0); 292 m->next = MAN_NEXT_CHILD; 293 return(1); 294 } 295 296 static int 297 man_span_alloc(struct man *m, const struct tbl_span *span) 298 { 299 struct man_node *n; 300 301 /* FIXME: grab from span */ 302 n = man_node_alloc(0, 0, MAN_TBL, MAN_MAX); 303 n->span = span; 304 305 if ( ! man_node_append(m, n)) 306 return(0); 307 308 m->next = MAN_NEXT_SIBLING; 309 return(1); 310 } 311 312 int 313 man_word_alloc(struct man *m, int line, int pos, const char *word) 314 { 315 struct man_node *n; 316 size_t sv, len; 317 318 len = strlen(word); 319 320 n = man_node_alloc(line, pos, MAN_TEXT, MAN_MAX); 321 n->string = mandoc_malloc(len + 1); 322 sv = strlcpy(n->string, word, len + 1); 323 324 /* Prohibit truncation. */ 325 assert(sv < len + 1); 326 327 if ( ! man_node_append(m, n)) 328 return(0); 329 330 m->next = MAN_NEXT_SIBLING; 331 return(1); 332 } 333 334 335 /* 336 * Free all of the resources held by a node. This does NOT unlink a 337 * node from its context; for that, see man_node_unlink(). 338 */ 339 static void 340 man_node_free(struct man_node *p) 341 { 342 343 if (p->string) 344 free(p->string); 345 free(p); 346 } 347 348 349 void 350 man_node_delete(struct man *m, struct man_node *p) 351 { 352 353 while (p->child) 354 man_node_delete(m, p->child); 355 356 man_node_unlink(m, p); 357 man_node_free(p); 358 } 359 360 361 int 362 man_addspan(struct man *m, const struct tbl_span *sp) 363 { 364 365 assert( ! (MAN_HALT & m->flags)); 366 if ( ! man_span_alloc(m, sp)) 367 return(0); 368 return(man_descope(m, 0, 0)); 369 } 370 371 static int 372 man_descope(struct man *m, int line, int offs) 373 { 374 /* 375 * Co-ordinate what happens with having a next-line scope open: 376 * first close out the element scope (if applicable), then close 377 * out the block scope (also if applicable). 378 */ 379 380 if (MAN_ELINE & m->flags) { 381 m->flags &= ~MAN_ELINE; 382 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 383 return(0); 384 } 385 386 if ( ! (MAN_BLINE & m->flags)) 387 return(1); 388 m->flags &= ~MAN_BLINE; 389 390 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 391 return(0); 392 return(man_body_alloc(m, line, offs, m->last->tok)); 393 } 394 395 396 static int 397 man_ptext(struct man *m, int line, char *buf, int offs) 398 { 399 int i; 400 401 /* Ignore bogus comments. */ 402 403 if ('\\' == buf[offs] && 404 '.' == buf[offs + 1] && 405 '"' == buf[offs + 2]) { 406 man_pmsg(m, line, offs, MANDOCERR_BADCOMMENT); 407 return(1); 408 } 409 410 /* Literal free-form text whitespace is preserved. */ 411 412 if (MAN_LITERAL & m->flags) { 413 if ( ! man_word_alloc(m, line, offs, buf + offs)) 414 return(0); 415 return(man_descope(m, line, offs)); 416 } 417 418 /* Pump blank lines directly into the backend. */ 419 420 for (i = offs; ' ' == buf[i]; i++) 421 /* Skip leading whitespace. */ ; 422 423 if ('\0' == buf[i]) { 424 /* Allocate a blank entry. */ 425 if ( ! man_word_alloc(m, line, offs, "")) 426 return(0); 427 return(man_descope(m, line, offs)); 428 } 429 430 /* 431 * Warn if the last un-escaped character is whitespace. Then 432 * strip away the remaining spaces (tabs stay!). 433 */ 434 435 i = (int)strlen(buf); 436 assert(i); 437 438 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 439 if (i > 1 && '\\' != buf[i - 2]) 440 man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE); 441 442 for (--i; i && ' ' == buf[i]; i--) 443 /* Spin back to non-space. */ ; 444 445 /* Jump ahead of escaped whitespace. */ 446 i += '\\' == buf[i] ? 2 : 1; 447 448 buf[i] = '\0'; 449 } 450 451 if ( ! man_word_alloc(m, line, offs, buf + offs)) 452 return(0); 453 454 /* 455 * End-of-sentence check. If the last character is an unescaped 456 * EOS character, then flag the node as being the end of a 457 * sentence. The front-end will know how to interpret this. 458 */ 459 460 assert(i); 461 if (mandoc_eos(buf, (size_t)i, 0)) 462 m->last->flags |= MAN_EOS; 463 464 return(man_descope(m, line, offs)); 465 } 466 467 468 static int 469 man_pmacro(struct man *m, int ln, char *buf, int offs) 470 { 471 int i, j, ppos; 472 enum mant tok; 473 char mac[5]; 474 struct man_node *n; 475 476 /* Comments and empties are quickly ignored. */ 477 478 offs++; 479 480 if ('\0' == buf[offs]) 481 return(1); 482 483 i = offs; 484 485 /* 486 * Skip whitespace between the control character and initial 487 * text. "Whitespace" is both spaces and tabs. 488 */ 489 490 if (' ' == buf[i] || '\t' == buf[i]) { 491 i++; 492 while (buf[i] && (' ' == buf[i] || '\t' == buf[i])) 493 i++; 494 if ('\0' == buf[i]) 495 goto out; 496 } 497 498 ppos = i; 499 500 /* 501 * Copy the first word into a nil-terminated buffer. 502 * Stop copying when a tab, space, or eoln is encountered. 503 */ 504 505 j = 0; 506 while (j < 4 && '\0' != buf[i] && ' ' != buf[i] && '\t' != buf[i]) 507 mac[j++] = buf[i++]; 508 mac[j] = '\0'; 509 510 tok = (j > 0 && j < 4) ? man_hash_find(mac) : MAN_MAX; 511 if (MAN_MAX == tok) { 512 man_vmsg(m, MANDOCERR_MACRO, ln, ppos, "%s", buf + ppos - 1); 513 return(1); 514 } 515 516 /* The macro is sane. Jump to the next word. */ 517 518 while (buf[i] && ' ' == buf[i]) 519 i++; 520 521 /* 522 * Trailing whitespace. Note that tabs are allowed to be passed 523 * into the parser as "text", so we only warn about spaces here. 524 */ 525 526 if ('\0' == buf[i] && ' ' == buf[i - 1]) 527 man_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE); 528 529 /* 530 * Remove prior ELINE macro, as it's being clobbered by a new 531 * macro. Note that NSCOPED macros do not close out ELINE 532 * macros---they don't print text---so we let those slip by. 533 */ 534 535 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 536 m->flags & MAN_ELINE) { 537 n = m->last; 538 assert(MAN_TEXT != n->type); 539 540 /* Remove repeated NSCOPED macros causing ELINE. */ 541 542 if (MAN_NSCOPED & man_macros[n->tok].flags) 543 n = n->parent; 544 545 man_vmsg(m, MANDOCERR_LINESCOPE, n->line, n->pos, 546 "%s", man_macronames[n->tok]); 547 548 man_node_delete(m, n); 549 m->flags &= ~MAN_ELINE; 550 } 551 552 /* 553 * Save the fact that we're in the next-line for a block. In 554 * this way, embedded roff instructions can "remember" state 555 * when they exit. 556 */ 557 558 if (MAN_BLINE & m->flags) 559 m->flags |= MAN_BPLINE; 560 561 /* Call to handler... */ 562 563 assert(man_macros[tok].fp); 564 if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &i, buf)) 565 goto err; 566 567 out: 568 /* 569 * We weren't in a block-line scope when entering the 570 * above-parsed macro, so return. 571 */ 572 573 if ( ! (MAN_BPLINE & m->flags)) { 574 m->flags &= ~MAN_ILINE; 575 return(1); 576 } 577 m->flags &= ~MAN_BPLINE; 578 579 /* 580 * If we're in a block scope, then allow this macro to slip by 581 * without closing scope around it. 582 */ 583 584 if (MAN_ILINE & m->flags) { 585 m->flags &= ~MAN_ILINE; 586 return(1); 587 } 588 589 /* 590 * If we've opened a new next-line element scope, then return 591 * now, as the next line will close out the block scope. 592 */ 593 594 if (MAN_ELINE & m->flags) 595 return(1); 596 597 /* Close out the block scope opened in the prior line. */ 598 599 assert(MAN_BLINE & m->flags); 600 m->flags &= ~MAN_BLINE; 601 602 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 603 return(0); 604 return(man_body_alloc(m, ln, offs, m->last->tok)); 605 606 err: /* Error out. */ 607 608 m->flags |= MAN_HALT; 609 return(0); 610 } 611 612 613 int 614 man_vmsg(struct man *man, enum mandocerr t, 615 int ln, int pos, const char *fmt, ...) 616 { 617 char buf[256]; 618 va_list ap; 619 620 va_start(ap, fmt); 621 vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 622 va_end(ap); 623 return((*man->msg)(t, man->data, ln, pos, buf)); 624 } 625 626 627 /* 628 * Unlink a node from its context. If "m" is provided, the last parse 629 * point will also be adjusted accordingly. 630 */ 631 static void 632 man_node_unlink(struct man *m, struct man_node *n) 633 { 634 635 /* Adjust siblings. */ 636 637 if (n->prev) 638 n->prev->next = n->next; 639 if (n->next) 640 n->next->prev = n->prev; 641 642 /* Adjust parent. */ 643 644 if (n->parent) { 645 n->parent->nchild--; 646 if (n->parent->child == n) 647 n->parent->child = n->prev ? n->prev : n->next; 648 } 649 650 /* Adjust parse point, if applicable. */ 651 652 if (m && m->last == n) { 653 /*XXX: this can occur when bailing from validation. */ 654 /*assert(NULL == n->next);*/ 655 if (n->prev) { 656 m->last = n->prev; 657 m->next = MAN_NEXT_SIBLING; 658 } else { 659 m->last = n->parent; 660 m->next = MAN_NEXT_CHILD; 661 } 662 } 663 664 if (m && m->first == n) 665 m->first = NULL; 666 } 667