1 /* $Vendor-Id: man.c,v 1.46 2009/11/02 08:40:31 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/types.h> 18 19 #include <assert.h> 20 #include <ctype.h> 21 #include <stdarg.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <string.h> 25 26 #include "libman.h" 27 #include "libmandoc.h" 28 29 const char *const __man_merrnames[WERRMAX] = { 30 "invalid character", /* WNPRINT */ 31 "invalid manual section", /* WMSEC */ 32 "invalid date format", /* WDATE */ 33 "scope of prior line violated", /* WLNSCOPE */ 34 "trailing whitespace", /* WTSPACE */ 35 "unterminated quoted parameter", /* WTQUOTE */ 36 "document has no body", /* WNODATA */ 37 "document has no title/section", /* WNOTITLE */ 38 "invalid escape sequence", /* WESCAPE */ 39 "invalid number format", /* WNUMFMT */ 40 "expected block head arguments", /* WHEADARGS */ 41 "expected block body arguments", /* WBODYARGS */ 42 "expected empty block head", /* WNHEADARGS */ 43 "ill-formed macro", /* WMACROFORM */ 44 "scope open on exit", /* WEXITSCOPE */ 45 "no scope context", /* WNOSCOPE */ 46 "literal context already open", /* WOLITERAL */ 47 "no literal context open" /* WNLITERAL */ 48 }; 49 50 const char *const __man_macronames[MAN_MAX] = { 51 "br", "TH", "SH", "SS", 52 "TP", "LP", "PP", "P", 53 "IP", "HP", "SM", "SB", 54 "BI", "IB", "BR", "RB", 55 "R", "B", "I", "IR", 56 "RI", "na", "i", "sp", 57 "nf", "fi", "r", "RE", 58 "RS", "DT", "UC", "PD" 59 }; 60 61 const char * const *man_macronames = __man_macronames; 62 63 static struct man_node *man_node_alloc(int, int, 64 enum man_type, int); 65 static int man_node_append(struct man *, 66 struct man_node *); 67 static int man_ptext(struct man *, int, char *); 68 static int man_pmacro(struct man *, int, char *); 69 static void man_free1(struct man *); 70 static void man_alloc1(struct man *); 71 static int pstring(struct man *, int, int, 72 const char *, size_t); 73 static int macrowarn(struct man *, int, const char *); 74 75 #ifdef __linux__ 76 extern size_t strlcpy(char *, const char *, size_t); 77 #endif 78 79 80 const struct man_node * 81 man_node(const struct man *m) 82 { 83 84 return(MAN_HALT & m->flags ? NULL : m->first); 85 } 86 87 88 const struct man_meta * 89 man_meta(const struct man *m) 90 { 91 92 return(MAN_HALT & m->flags ? NULL : &m->meta); 93 } 94 95 96 void 97 man_reset(struct man *man) 98 { 99 100 man_free1(man); 101 man_alloc1(man); 102 } 103 104 105 void 106 man_free(struct man *man) 107 { 108 109 man_free1(man); 110 free(man); 111 } 112 113 114 struct man * 115 man_alloc(void *data, int pflags, const struct man_cb *cb) 116 { 117 struct man *p; 118 119 p = mandoc_calloc(1, sizeof(struct man)); 120 121 if (cb) 122 memcpy(&p->cb, cb, sizeof(struct man_cb)); 123 124 man_hash_init(); 125 p->data = data; 126 p->pflags = pflags; 127 128 man_alloc1(p); 129 return(p); 130 } 131 132 133 int 134 man_endparse(struct man *m) 135 { 136 137 if (MAN_HALT & m->flags) 138 return(0); 139 else if (man_macroend(m)) 140 return(1); 141 m->flags |= MAN_HALT; 142 return(0); 143 } 144 145 146 int 147 man_parseln(struct man *m, int ln, char *buf) 148 { 149 150 return('.' == *buf ? 151 man_pmacro(m, ln, buf) : 152 man_ptext(m, ln, buf)); 153 } 154 155 156 static void 157 man_free1(struct man *man) 158 { 159 160 if (man->first) 161 man_node_freelist(man->first); 162 if (man->meta.title) 163 free(man->meta.title); 164 if (man->meta.source) 165 free(man->meta.source); 166 if (man->meta.vol) 167 free(man->meta.vol); 168 } 169 170 171 static void 172 man_alloc1(struct man *m) 173 { 174 175 memset(&m->meta, 0, sizeof(struct man_meta)); 176 m->flags = 0; 177 m->last = mandoc_calloc(1, sizeof(struct man_node)); 178 m->first = m->last; 179 m->last->type = MAN_ROOT; 180 m->next = MAN_NEXT_CHILD; 181 } 182 183 184 static int 185 man_node_append(struct man *man, struct man_node *p) 186 { 187 188 assert(man->last); 189 assert(man->first); 190 assert(MAN_ROOT != p->type); 191 192 switch (man->next) { 193 case (MAN_NEXT_SIBLING): 194 man->last->next = p; 195 p->prev = man->last; 196 p->parent = man->last->parent; 197 break; 198 case (MAN_NEXT_CHILD): 199 man->last->child = p; 200 p->parent = man->last; 201 break; 202 default: 203 abort(); 204 /* NOTREACHED */ 205 } 206 207 p->parent->nchild++; 208 209 if ( ! man_valid_pre(man, p)) 210 return(0); 211 212 switch (p->type) { 213 case (MAN_HEAD): 214 assert(MAN_BLOCK == p->parent->type); 215 p->parent->head = p; 216 break; 217 case (MAN_BODY): 218 assert(MAN_BLOCK == p->parent->type); 219 p->parent->body = p; 220 break; 221 default: 222 break; 223 } 224 225 man->last = p; 226 227 switch (p->type) { 228 case (MAN_TEXT): 229 if ( ! man_valid_post(man)) 230 return(0); 231 if ( ! man_action_post(man)) 232 return(0); 233 break; 234 default: 235 break; 236 } 237 238 return(1); 239 } 240 241 242 static struct man_node * 243 man_node_alloc(int line, int pos, enum man_type type, int tok) 244 { 245 struct man_node *p; 246 247 p = mandoc_calloc(1, sizeof(struct man_node)); 248 p->line = line; 249 p->pos = pos; 250 p->type = type; 251 p->tok = tok; 252 return(p); 253 } 254 255 256 int 257 man_elem_alloc(struct man *m, int line, int pos, int tok) 258 { 259 struct man_node *p; 260 261 p = man_node_alloc(line, pos, MAN_ELEM, tok); 262 if ( ! man_node_append(m, p)) 263 return(0); 264 m->next = MAN_NEXT_CHILD; 265 return(1); 266 } 267 268 269 int 270 man_head_alloc(struct man *m, int line, int pos, int tok) 271 { 272 struct man_node *p; 273 274 p = man_node_alloc(line, pos, MAN_HEAD, tok); 275 if ( ! man_node_append(m, p)) 276 return(0); 277 m->next = MAN_NEXT_CHILD; 278 return(1); 279 } 280 281 282 int 283 man_body_alloc(struct man *m, int line, int pos, int tok) 284 { 285 struct man_node *p; 286 287 p = man_node_alloc(line, pos, MAN_BODY, tok); 288 if ( ! man_node_append(m, p)) 289 return(0); 290 m->next = MAN_NEXT_CHILD; 291 return(1); 292 } 293 294 295 int 296 man_block_alloc(struct man *m, int line, int pos, int tok) 297 { 298 struct man_node *p; 299 300 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 301 if ( ! man_node_append(m, p)) 302 return(0); 303 m->next = MAN_NEXT_CHILD; 304 return(1); 305 } 306 307 308 static int 309 pstring(struct man *m, int line, int pos, 310 const char *p, size_t len) 311 { 312 struct man_node *n; 313 size_t sv; 314 315 n = man_node_alloc(line, pos, MAN_TEXT, -1); 316 n->string = mandoc_malloc(len + 1); 317 sv = strlcpy(n->string, p, len + 1); 318 319 /* Prohibit truncation. */ 320 assert(sv < len + 1); 321 322 if ( ! man_node_append(m, n)) 323 return(0); 324 m->next = MAN_NEXT_SIBLING; 325 return(1); 326 } 327 328 329 int 330 man_word_alloc(struct man *m, int line, int pos, const char *word) 331 { 332 333 return(pstring(m, line, pos, word, strlen(word))); 334 } 335 336 337 void 338 man_node_free(struct man_node *p) 339 { 340 341 if (p->string) 342 free(p->string); 343 if (p->parent) 344 p->parent->nchild--; 345 free(p); 346 } 347 348 349 void 350 man_node_freelist(struct man_node *p) 351 { 352 struct man_node *n; 353 354 if (p->child) 355 man_node_freelist(p->child); 356 assert(0 == p->nchild); 357 n = p->next; 358 man_node_free(p); 359 if (n) 360 man_node_freelist(n); 361 } 362 363 364 static int 365 man_ptext(struct man *m, int line, char *buf) 366 { 367 int i, j; 368 369 /* Literal free-form text whitespace is preserved. */ 370 371 if (MAN_LITERAL & m->flags) { 372 if ( ! man_word_alloc(m, line, 0, buf)) 373 return(0); 374 goto descope; 375 } 376 377 /* First de-chunk and allocate words. */ 378 379 for (i = 0; ' ' == buf[i]; i++) 380 /* Skip leading whitespace. */ ; 381 if (0 == buf[i]) { 382 if ( ! pstring(m, line, 0, &buf[i], 0)) 383 return(0); 384 goto descope; 385 } 386 387 for (j = i; buf[i]; i++) { 388 if (' ' != buf[i]) 389 continue; 390 391 /* Escaped whitespace. */ 392 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 393 continue; 394 395 buf[i++] = 0; 396 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 397 return(0); 398 399 for ( ; ' ' == buf[i]; i++) 400 /* Skip trailing whitespace. */ ; 401 402 j = i; 403 if (0 == buf[i]) 404 break; 405 } 406 407 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 408 return(0); 409 410 descope: 411 412 /* 413 * Co-ordinate what happens with having a next-line scope open: 414 * first close out the element scope (if applicable), then close 415 * out the block scope (also if applicable). 416 */ 417 418 if (MAN_ELINE & m->flags) { 419 m->flags &= ~MAN_ELINE; 420 if ( ! man_unscope(m, m->last->parent)) 421 return(0); 422 } 423 424 if ( ! (MAN_BLINE & m->flags)) 425 return(1); 426 m->flags &= ~MAN_BLINE; 427 428 if ( ! man_unscope(m, m->last->parent)) 429 return(0); 430 return(man_body_alloc(m, line, 0, m->last->tok)); 431 } 432 433 434 static int 435 macrowarn(struct man *m, int ln, const char *buf) 436 { 437 if ( ! (MAN_IGN_MACRO & m->pflags)) 438 return(man_verr(m, ln, 0, 439 "unknown macro: %s%s", 440 buf, strlen(buf) > 3 ? "..." : "")); 441 return(man_vwarn(m, ln, 0, "unknown macro: %s%s", 442 buf, strlen(buf) > 3 ? "..." : "")); 443 } 444 445 446 int 447 man_pmacro(struct man *m, int ln, char *buf) 448 { 449 int i, j, c, ppos, fl; 450 char mac[5]; 451 struct man_node *n; 452 453 /* Comments and empties are quickly ignored. */ 454 455 fl = m->flags; 456 457 if ('\0' == buf[1]) 458 return(1); 459 460 i = 1; 461 462 if (' ' == buf[i]) { 463 i++; 464 while (buf[i] && ' ' == buf[i]) 465 i++; 466 if (0 == buf[i]) 467 goto out; 468 } 469 470 ppos = i; 471 472 /* Copy the first word into a nil-terminated buffer. */ 473 474 for (j = 0; j < 4; j++, i++) { 475 if (0 == (mac[j] = buf[i])) 476 break; 477 else if (' ' == buf[i]) 478 break; 479 480 /* Check for invalid characters. */ 481 482 if (isgraph((u_char)buf[i])) 483 continue; 484 return(man_perr(m, ln, i, WNPRINT)); 485 } 486 487 mac[j] = '\0'; 488 489 if (j == 4 || j < 1) { 490 if ( ! (MAN_IGN_MACRO & m->pflags)) { 491 (void)man_perr(m, ln, ppos, WMACROFORM); 492 goto err; 493 } 494 if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) 495 goto err; 496 return(1); 497 } 498 499 if (MAN_MAX == (c = man_hash_find(mac))) { 500 if ( ! macrowarn(m, ln, mac)) 501 goto err; 502 return(1); 503 } 504 505 /* The macro is sane. Jump to the next word. */ 506 507 while (buf[i] && ' ' == buf[i]) 508 i++; 509 510 /* Remove prior ELINE macro, if applicable. */ 511 512 if (m->flags & MAN_ELINE) { 513 n = m->last; 514 assert(NULL == n->child); 515 assert(0 == n->nchild); 516 if ( ! man_nwarn(m, n, WLNSCOPE)) 517 return(0); 518 519 if (n->prev) { 520 assert(n != n->parent->child); 521 assert(n == n->prev->next); 522 n->prev->next = NULL; 523 m->last = n->prev; 524 m->next = MAN_NEXT_SIBLING; 525 } else { 526 assert(n == n->parent->child); 527 n->parent->child = NULL; 528 m->last = n->parent; 529 m->next = MAN_NEXT_CHILD; 530 } 531 532 man_node_free(n); 533 m->flags &= ~MAN_ELINE; 534 } 535 536 /* Begin recursive parse sequence. */ 537 538 assert(man_macros[c].fp); 539 540 if ( ! (*man_macros[c].fp)(m, c, ln, ppos, &i, buf)) 541 goto err; 542 543 out: 544 if ( ! (MAN_BLINE & fl)) 545 return(1); 546 547 /* 548 * If we've opened a new next-line element scope, then return 549 * now, as the next line will close out the block scope. 550 */ 551 552 if (MAN_ELINE & m->flags) 553 return(1); 554 555 /* Close out the block scope opened in the prior line. */ 556 557 assert(MAN_BLINE & m->flags); 558 m->flags &= ~MAN_BLINE; 559 560 if ( ! man_unscope(m, m->last->parent)) 561 return(0); 562 return(man_body_alloc(m, ln, 0, m->last->tok)); 563 564 err: /* Error out. */ 565 566 m->flags |= MAN_HALT; 567 return(0); 568 } 569 570 571 int 572 man_verr(struct man *man, int ln, int pos, const char *fmt, ...) 573 { 574 char buf[256]; 575 va_list ap; 576 577 if (NULL == man->cb.man_err) 578 return(0); 579 580 va_start(ap, fmt); 581 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 582 va_end(ap); 583 return((*man->cb.man_err)(man->data, ln, pos, buf)); 584 } 585 586 587 int 588 man_vwarn(struct man *man, int ln, int pos, const char *fmt, ...) 589 { 590 char buf[256]; 591 va_list ap; 592 593 if (NULL == man->cb.man_warn) 594 return(0); 595 596 va_start(ap, fmt); 597 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 598 va_end(ap); 599 return((*man->cb.man_warn)(man->data, ln, pos, buf)); 600 } 601 602 603 int 604 man_err(struct man *m, int line, int pos, int iserr, enum merr type) 605 { 606 const char *p; 607 608 p = __man_merrnames[(int)type]; 609 assert(p); 610 611 if (iserr) 612 return(man_verr(m, line, pos, p)); 613 614 return(man_vwarn(m, line, pos, p)); 615 } 616