1 /* $Vendor-Id: man.c,v 1.49 2010/01/07 10:24:43 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 30 #include "libman.h" 31 #include "libmandoc.h" 32 33 const char *const __man_merrnames[WERRMAX] = { 34 "invalid character", /* WNPRINT */ 35 "invalid manual section", /* WMSEC */ 36 "invalid date format", /* WDATE */ 37 "scope of prior line violated", /* WLNSCOPE */ 38 "trailing whitespace", /* WTSPACE */ 39 "unterminated quoted parameter", /* WTQUOTE */ 40 "document has no body", /* WNODATA */ 41 "document has no title/section", /* WNOTITLE */ 42 "invalid escape sequence", /* WESCAPE */ 43 "invalid number format", /* WNUMFMT */ 44 "expected block head arguments", /* WHEADARGS */ 45 "expected block body arguments", /* WBODYARGS */ 46 "expected empty block head", /* WNHEADARGS */ 47 "ill-formed macro", /* WMACROFORM */ 48 "scope open on exit", /* WEXITSCOPE */ 49 "no scope context", /* WNOSCOPE */ 50 "literal context already open", /* WOLITERAL */ 51 "no literal context open" /* WNLITERAL */ 52 }; 53 54 const char *const __man_macronames[MAN_MAX] = { 55 "br", "TH", "SH", "SS", 56 "TP", "LP", "PP", "P", 57 "IP", "HP", "SM", "SB", 58 "BI", "IB", "BR", "RB", 59 "R", "B", "I", "IR", 60 "RI", "na", "i", "sp", 61 "nf", "fi", "r", "RE", 62 "RS", "DT", "UC", "PD" 63 }; 64 65 const char * const *man_macronames = __man_macronames; 66 67 static struct man_node *man_node_alloc(int, int, 68 enum man_type, int); 69 static int man_node_append(struct man *, 70 struct man_node *); 71 static int man_ptext(struct man *, int, char *); 72 static int man_pmacro(struct man *, int, char *); 73 static void man_free1(struct man *); 74 static void man_alloc1(struct man *); 75 static int pstring(struct man *, int, int, 76 const char *, size_t); 77 static int macrowarn(struct man *, int, const char *); 78 79 80 const struct man_node * 81 man_node(const struct man *m) 82 { 83 84 return(MAN_HALT & m->flags ? NULL : m->first); 85 } 86 87 88 const struct man_meta * 89 man_meta(const struct man *m) 90 { 91 92 return(MAN_HALT & m->flags ? NULL : &m->meta); 93 } 94 95 96 void 97 man_reset(struct man *man) 98 { 99 100 man_free1(man); 101 man_alloc1(man); 102 } 103 104 105 void 106 man_free(struct man *man) 107 { 108 109 man_free1(man); 110 free(man); 111 } 112 113 114 struct man * 115 man_alloc(void *data, int pflags, const struct man_cb *cb) 116 { 117 struct man *p; 118 119 p = mandoc_calloc(1, sizeof(struct man)); 120 121 if (cb) 122 memcpy(&p->cb, cb, sizeof(struct man_cb)); 123 124 man_hash_init(); 125 p->data = data; 126 p->pflags = pflags; 127 128 man_alloc1(p); 129 return(p); 130 } 131 132 133 int 134 man_endparse(struct man *m) 135 { 136 137 if (MAN_HALT & m->flags) 138 return(0); 139 else if (man_macroend(m)) 140 return(1); 141 m->flags |= MAN_HALT; 142 return(0); 143 } 144 145 146 int 147 man_parseln(struct man *m, int ln, char *buf) 148 { 149 150 return('.' == *buf ? 151 man_pmacro(m, ln, buf) : 152 man_ptext(m, ln, buf)); 153 } 154 155 156 static void 157 man_free1(struct man *man) 158 { 159 160 if (man->first) 161 man_node_freelist(man->first); 162 if (man->meta.title) 163 free(man->meta.title); 164 if (man->meta.source) 165 free(man->meta.source); 166 if (man->meta.vol) 167 free(man->meta.vol); 168 } 169 170 171 static void 172 man_alloc1(struct man *m) 173 { 174 175 memset(&m->meta, 0, sizeof(struct man_meta)); 176 m->flags = 0; 177 m->last = mandoc_calloc(1, sizeof(struct man_node)); 178 m->first = m->last; 179 m->last->type = MAN_ROOT; 180 m->next = MAN_NEXT_CHILD; 181 } 182 183 184 static int 185 man_node_append(struct man *man, struct man_node *p) 186 { 187 188 assert(man->last); 189 assert(man->first); 190 assert(MAN_ROOT != p->type); 191 192 switch (man->next) { 193 case (MAN_NEXT_SIBLING): 194 man->last->next = p; 195 p->prev = man->last; 196 p->parent = man->last->parent; 197 break; 198 case (MAN_NEXT_CHILD): 199 man->last->child = p; 200 p->parent = man->last; 201 break; 202 default: 203 abort(); 204 /* NOTREACHED */ 205 } 206 207 p->parent->nchild++; 208 209 if ( ! man_valid_pre(man, p)) 210 return(0); 211 212 switch (p->type) { 213 case (MAN_HEAD): 214 assert(MAN_BLOCK == p->parent->type); 215 p->parent->head = p; 216 break; 217 case (MAN_BODY): 218 assert(MAN_BLOCK == p->parent->type); 219 p->parent->body = p; 220 break; 221 default: 222 break; 223 } 224 225 man->last = p; 226 227 switch (p->type) { 228 case (MAN_TEXT): 229 if ( ! man_valid_post(man)) 230 return(0); 231 if ( ! man_action_post(man)) 232 return(0); 233 break; 234 default: 235 break; 236 } 237 238 return(1); 239 } 240 241 242 static struct man_node * 243 man_node_alloc(int line, int pos, enum man_type type, int tok) 244 { 245 struct man_node *p; 246 247 p = mandoc_calloc(1, sizeof(struct man_node)); 248 p->line = line; 249 p->pos = pos; 250 p->type = type; 251 p->tok = tok; 252 return(p); 253 } 254 255 256 int 257 man_elem_alloc(struct man *m, int line, int pos, int tok) 258 { 259 struct man_node *p; 260 261 p = man_node_alloc(line, pos, MAN_ELEM, tok); 262 if ( ! man_node_append(m, p)) 263 return(0); 264 m->next = MAN_NEXT_CHILD; 265 return(1); 266 } 267 268 269 int 270 man_head_alloc(struct man *m, int line, int pos, int tok) 271 { 272 struct man_node *p; 273 274 p = man_node_alloc(line, pos, MAN_HEAD, tok); 275 if ( ! man_node_append(m, p)) 276 return(0); 277 m->next = MAN_NEXT_CHILD; 278 return(1); 279 } 280 281 282 int 283 man_body_alloc(struct man *m, int line, int pos, int tok) 284 { 285 struct man_node *p; 286 287 p = man_node_alloc(line, pos, MAN_BODY, tok); 288 if ( ! man_node_append(m, p)) 289 return(0); 290 m->next = MAN_NEXT_CHILD; 291 return(1); 292 } 293 294 295 int 296 man_block_alloc(struct man *m, int line, int pos, int tok) 297 { 298 struct man_node *p; 299 300 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 301 if ( ! man_node_append(m, p)) 302 return(0); 303 m->next = MAN_NEXT_CHILD; 304 return(1); 305 } 306 307 308 static int 309 pstring(struct man *m, int line, int pos, 310 const char *p, size_t len) 311 { 312 struct man_node *n; 313 size_t sv; 314 315 n = man_node_alloc(line, pos, MAN_TEXT, -1); 316 n->string = mandoc_malloc(len + 1); 317 sv = strlcpy(n->string, p, len + 1); 318 319 /* Prohibit truncation. */ 320 assert(sv < len + 1); 321 322 if ( ! man_node_append(m, n)) 323 return(0); 324 m->next = MAN_NEXT_SIBLING; 325 return(1); 326 } 327 328 329 int 330 man_word_alloc(struct man *m, int line, int pos, const char *word) 331 { 332 333 return(pstring(m, line, pos, word, strlen(word))); 334 } 335 336 337 void 338 man_node_free(struct man_node *p) 339 { 340 341 if (p->string) 342 free(p->string); 343 if (p->parent) 344 p->parent->nchild--; 345 free(p); 346 } 347 348 349 void 350 man_node_freelist(struct man_node *p) 351 { 352 struct man_node *n; 353 354 if (p->child) 355 man_node_freelist(p->child); 356 assert(0 == p->nchild); 357 n = p->next; 358 man_node_free(p); 359 if (n) 360 man_node_freelist(n); 361 } 362 363 364 static int 365 man_ptext(struct man *m, int line, char *buf) 366 { 367 int i, j; 368 char sv; 369 370 /* Literal free-form text whitespace is preserved. */ 371 372 if (MAN_LITERAL & m->flags) { 373 if ( ! man_word_alloc(m, line, 0, buf)) 374 return(0); 375 goto descope; 376 } 377 378 /* First de-chunk and allocate words. */ 379 380 for (i = 0; ' ' == buf[i]; i++) 381 /* Skip leading whitespace. */ ; 382 383 if ('\0' == buf[i]) { 384 /* Trailing whitespace? */ 385 if (i && ' ' == buf[i - 1]) 386 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 387 return(0); 388 if ( ! pstring(m, line, 0, &buf[i], 0)) 389 return(0); 390 goto descope; 391 } 392 393 for (j = i; buf[i]; i++) { 394 if (' ' != buf[i]) 395 continue; 396 397 /* Escaped whitespace. */ 398 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 399 continue; 400 401 sv = buf[i]; 402 buf[i++] = '\0'; 403 404 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 405 return(0); 406 407 /* Trailing whitespace? Check at overwritten byte. */ 408 409 if (' ' == sv && '\0' == buf[i]) 410 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 411 return(0); 412 413 for ( ; ' ' == buf[i]; i++) 414 /* Skip trailing whitespace. */ ; 415 416 j = i; 417 418 /* Trailing whitespace? */ 419 420 if (' ' == buf[i - 1] && '\0' == buf[i]) 421 if ( ! man_pwarn(m, line, i - 1, WTSPACE)) 422 return(0); 423 424 if ('\0' == buf[i]) 425 break; 426 } 427 428 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 429 return(0); 430 431 descope: 432 433 /* 434 * Co-ordinate what happens with having a next-line scope open: 435 * first close out the element scope (if applicable), then close 436 * out the block scope (also if applicable). 437 */ 438 439 if (MAN_ELINE & m->flags) { 440 m->flags &= ~MAN_ELINE; 441 if ( ! man_unscope(m, m->last->parent)) 442 return(0); 443 } 444 445 if ( ! (MAN_BLINE & m->flags)) 446 return(1); 447 m->flags &= ~MAN_BLINE; 448 449 if ( ! man_unscope(m, m->last->parent)) 450 return(0); 451 return(man_body_alloc(m, line, 0, m->last->tok)); 452 } 453 454 455 static int 456 macrowarn(struct man *m, int ln, const char *buf) 457 { 458 if ( ! (MAN_IGN_MACRO & m->pflags)) 459 return(man_verr(m, ln, 0, 460 "unknown macro: %s%s", 461 buf, strlen(buf) > 3 ? "..." : "")); 462 return(man_vwarn(m, ln, 0, "unknown macro: %s%s", 463 buf, strlen(buf) > 3 ? "..." : "")); 464 } 465 466 467 int 468 man_pmacro(struct man *m, int ln, char *buf) 469 { 470 int i, j, c, ppos, fl; 471 char mac[5]; 472 struct man_node *n; 473 474 /* Comments and empties are quickly ignored. */ 475 476 fl = m->flags; 477 478 if ('\0' == buf[1]) 479 return(1); 480 481 i = 1; 482 483 if (' ' == buf[i]) { 484 i++; 485 while (buf[i] && ' ' == buf[i]) 486 i++; 487 if ('\0' == buf[i]) 488 goto out; 489 } 490 491 ppos = i; 492 493 /* Copy the first word into a nil-terminated buffer. */ 494 495 for (j = 0; j < 4; j++, i++) { 496 if ('\0' == (mac[j] = buf[i])) 497 break; 498 else if (' ' == buf[i]) 499 break; 500 501 /* Check for invalid characters. */ 502 503 if (isgraph((u_char)buf[i])) 504 continue; 505 return(man_perr(m, ln, i, WNPRINT)); 506 } 507 508 mac[j] = '\0'; 509 510 if (j == 4 || j < 1) { 511 if ( ! (MAN_IGN_MACRO & m->pflags)) { 512 (void)man_perr(m, ln, ppos, WMACROFORM); 513 goto err; 514 } 515 if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) 516 goto err; 517 return(1); 518 } 519 520 if (MAN_MAX == (c = man_hash_find(mac))) { 521 if ( ! macrowarn(m, ln, mac)) 522 goto err; 523 return(1); 524 } 525 526 /* The macro is sane. Jump to the next word. */ 527 528 while (buf[i] && ' ' == buf[i]) 529 i++; 530 531 /* Trailing whitespace? */ 532 533 if ('\0' == buf[i] && ' ' == buf[i - 1]) 534 if ( ! man_pwarn(m, ln, i - 1, WTSPACE)) 535 goto err; 536 537 /* Remove prior ELINE macro, if applicable. */ 538 539 if (m->flags & MAN_ELINE) { 540 n = m->last; 541 assert(NULL == n->child); 542 assert(0 == n->nchild); 543 if ( ! man_nwarn(m, n, WLNSCOPE)) 544 return(0); 545 546 if (n->prev) { 547 assert(n != n->parent->child); 548 assert(n == n->prev->next); 549 n->prev->next = NULL; 550 m->last = n->prev; 551 m->next = MAN_NEXT_SIBLING; 552 } else { 553 assert(n == n->parent->child); 554 n->parent->child = NULL; 555 m->last = n->parent; 556 m->next = MAN_NEXT_CHILD; 557 } 558 559 man_node_free(n); 560 m->flags &= ~MAN_ELINE; 561 } 562 563 /* Begin recursive parse sequence. */ 564 565 assert(man_macros[c].fp); 566 567 if ( ! (*man_macros[c].fp)(m, c, ln, ppos, &i, buf)) 568 goto err; 569 570 out: 571 if ( ! (MAN_BLINE & fl)) 572 return(1); 573 574 /* 575 * If we've opened a new next-line element scope, then return 576 * now, as the next line will close out the block scope. 577 */ 578 579 if (MAN_ELINE & m->flags) 580 return(1); 581 582 /* Close out the block scope opened in the prior line. */ 583 584 assert(MAN_BLINE & m->flags); 585 m->flags &= ~MAN_BLINE; 586 587 if ( ! man_unscope(m, m->last->parent)) 588 return(0); 589 return(man_body_alloc(m, ln, 0, m->last->tok)); 590 591 err: /* Error out. */ 592 593 m->flags |= MAN_HALT; 594 return(0); 595 } 596 597 598 int 599 man_verr(struct man *man, int ln, int pos, const char *fmt, ...) 600 { 601 char buf[256]; 602 va_list ap; 603 604 if (NULL == man->cb.man_err) 605 return(0); 606 607 va_start(ap, fmt); 608 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 609 va_end(ap); 610 return((*man->cb.man_err)(man->data, ln, pos, buf)); 611 } 612 613 614 int 615 man_vwarn(struct man *man, int ln, int pos, const char *fmt, ...) 616 { 617 char buf[256]; 618 va_list ap; 619 620 if (NULL == man->cb.man_warn) 621 return(0); 622 623 va_start(ap, fmt); 624 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 625 va_end(ap); 626 return((*man->cb.man_warn)(man->data, ln, pos, buf)); 627 } 628 629 630 int 631 man_err(struct man *m, int line, int pos, int iserr, enum merr type) 632 { 633 const char *p; 634 635 p = __man_merrnames[(int)type]; 636 assert(p); 637 638 if (iserr) 639 return(man_verr(m, line, pos, p)); 640 641 return(man_vwarn(m, line, pos, p)); 642 } 643