1 /* $Id: man.c,v 1.15 2009/10/27 21:40:07 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/types.h> 18 19 #include <assert.h> 20 #include <ctype.h> 21 #include <stdarg.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <string.h> 25 26 #include "libman.h" 27 28 const char *const __man_merrnames[WERRMAX] = { 29 "invalid character", /* WNPRINT */ 30 "system: malloc error", /* WNMEM */ 31 "invalid manual section", /* WMSEC */ 32 "invalid date format", /* WDATE */ 33 "scope of prior line violated", /* WLNSCOPE */ 34 "trailing whitespace", /* WTSPACE */ 35 "unterminated quoted parameter", /* WTQUOTE */ 36 "document has no body", /* WNODATA */ 37 "document has no title/section", /* WNOTITLE */ 38 "invalid escape sequence", /* WESCAPE */ 39 "invalid number format", /* WNUMFMT */ 40 "expected block head arguments", /* WHEADARGS */ 41 "expected block body arguments", /* WBODYARGS */ 42 "expected empty block head", /* WNHEADARGS */ 43 "ill-formed macro", /* WMACROFORM */ 44 "scope open on exit", /* WEXITSCOPE */ 45 "no scope context", /* WNOSCOPE */ 46 "literal context already open", /* WOLITERAL */ 47 "no literal context open" /* WNLITERAL */ 48 }; 49 50 const char *const __man_macronames[MAN_MAX] = { 51 "br", "TH", "SH", "SS", 52 "TP", "LP", "PP", "P", 53 "IP", "HP", "SM", "SB", 54 "BI", "IB", "BR", "RB", 55 "R", "B", "I", "IR", 56 "RI", "na", "i", "sp", 57 "nf", "fi", "r", "RE", 58 "RS", "DT", "UC", "PD" 59 }; 60 61 const char * const *man_macronames = __man_macronames; 62 63 static struct man_node *man_node_alloc(int, int, 64 enum man_type, int); 65 static int man_node_append(struct man *, 66 struct man_node *); 67 static int man_ptext(struct man *, int, char *); 68 static int man_pmacro(struct man *, int, char *); 69 static void man_free1(struct man *); 70 static int man_alloc1(struct man *); 71 static int pstring(struct man *, int, int, 72 const char *, size_t); 73 static int macrowarn(struct man *, int, const char *); 74 75 76 const struct man_node * 77 man_node(const struct man *m) 78 { 79 80 return(MAN_HALT & m->flags ? NULL : m->first); 81 } 82 83 84 const struct man_meta * 85 man_meta(const struct man *m) 86 { 87 88 return(MAN_HALT & m->flags ? NULL : &m->meta); 89 } 90 91 92 int 93 man_reset(struct man *man) 94 { 95 96 man_free1(man); 97 return(man_alloc1(man)); 98 } 99 100 101 void 102 man_free(struct man *man) 103 { 104 105 man_free1(man); 106 free(man); 107 } 108 109 110 struct man * 111 man_alloc(void *data, int pflags, const struct man_cb *cb) 112 { 113 struct man *p; 114 115 if (NULL == (p = calloc(1, sizeof(struct man)))) 116 return(NULL); 117 118 if ( ! man_alloc1(p)) { 119 free(p); 120 return(NULL); 121 } 122 123 man_hash_init(); 124 125 p->data = data; 126 p->pflags = pflags; 127 (void)memcpy(&p->cb, cb, sizeof(struct man_cb)); 128 return(p); 129 } 130 131 132 int 133 man_endparse(struct man *m) 134 { 135 136 if (MAN_HALT & m->flags) 137 return(0); 138 else if (man_macroend(m)) 139 return(1); 140 m->flags |= MAN_HALT; 141 return(0); 142 } 143 144 145 int 146 man_parseln(struct man *m, int ln, char *buf) 147 { 148 149 return('.' == *buf ? 150 man_pmacro(m, ln, buf) : 151 man_ptext(m, ln, buf)); 152 } 153 154 155 static void 156 man_free1(struct man *man) 157 { 158 159 if (man->first) 160 man_node_freelist(man->first); 161 if (man->meta.title) 162 free(man->meta.title); 163 if (man->meta.source) 164 free(man->meta.source); 165 if (man->meta.vol) 166 free(man->meta.vol); 167 } 168 169 170 static int 171 man_alloc1(struct man *m) 172 { 173 174 bzero(&m->meta, sizeof(struct man_meta)); 175 m->flags = 0; 176 m->last = calloc(1, sizeof(struct man_node)); 177 if (NULL == m->last) 178 return(0); 179 m->first = m->last; 180 m->last->type = MAN_ROOT; 181 m->next = MAN_NEXT_CHILD; 182 return(1); 183 } 184 185 186 static int 187 man_node_append(struct man *man, struct man_node *p) 188 { 189 190 assert(man->last); 191 assert(man->first); 192 assert(MAN_ROOT != p->type); 193 194 switch (man->next) { 195 case (MAN_NEXT_SIBLING): 196 man->last->next = p; 197 p->prev = man->last; 198 p->parent = man->last->parent; 199 break; 200 case (MAN_NEXT_CHILD): 201 man->last->child = p; 202 p->parent = man->last; 203 break; 204 default: 205 abort(); 206 /* NOTREACHED */ 207 } 208 209 p->parent->nchild++; 210 211 if ( ! man_valid_pre(man, p)) 212 return(0); 213 214 switch (p->type) { 215 case (MAN_HEAD): 216 assert(MAN_BLOCK == p->parent->type); 217 p->parent->head = p; 218 break; 219 case (MAN_BODY): 220 assert(MAN_BLOCK == p->parent->type); 221 p->parent->body = p; 222 break; 223 default: 224 break; 225 } 226 227 man->last = p; 228 229 switch (p->type) { 230 case (MAN_TEXT): 231 if ( ! man_valid_post(man)) 232 return(0); 233 if ( ! man_action_post(man)) 234 return(0); 235 break; 236 default: 237 break; 238 } 239 240 return(1); 241 } 242 243 244 static struct man_node * 245 man_node_alloc(int line, int pos, enum man_type type, int tok) 246 { 247 struct man_node *p; 248 249 p = calloc(1, sizeof(struct man_node)); 250 if (NULL == p) 251 return(NULL); 252 253 p->line = line; 254 p->pos = pos; 255 p->type = type; 256 p->tok = tok; 257 return(p); 258 } 259 260 261 int 262 man_elem_alloc(struct man *m, int line, int pos, int tok) 263 { 264 struct man_node *p; 265 266 p = man_node_alloc(line, pos, MAN_ELEM, tok); 267 if (NULL == p) 268 return(0); 269 if ( ! man_node_append(m, p)) 270 return(0); 271 m->next = MAN_NEXT_CHILD; 272 return(1); 273 } 274 275 276 int 277 man_head_alloc(struct man *m, int line, int pos, int tok) 278 { 279 struct man_node *p; 280 281 p = man_node_alloc(line, pos, MAN_HEAD, tok); 282 if (NULL == p) 283 return(0); 284 if ( ! man_node_append(m, p)) 285 return(0); 286 m->next = MAN_NEXT_CHILD; 287 return(1); 288 } 289 290 291 int 292 man_body_alloc(struct man *m, int line, int pos, int tok) 293 { 294 struct man_node *p; 295 296 p = man_node_alloc(line, pos, MAN_BODY, tok); 297 if (NULL == p) 298 return(0); 299 if ( ! man_node_append(m, p)) 300 return(0); 301 m->next = MAN_NEXT_CHILD; 302 return(1); 303 } 304 305 306 int 307 man_block_alloc(struct man *m, int line, int pos, int tok) 308 { 309 struct man_node *p; 310 311 p = man_node_alloc(line, pos, MAN_BLOCK, tok); 312 if (NULL == p) 313 return(0); 314 if ( ! man_node_append(m, p)) 315 return(0); 316 m->next = MAN_NEXT_CHILD; 317 return(1); 318 } 319 320 321 static int 322 pstring(struct man *m, int line, int pos, 323 const char *p, size_t len) 324 { 325 struct man_node *n; 326 size_t sv; 327 328 n = man_node_alloc(line, pos, MAN_TEXT, -1); 329 if (NULL == n) 330 return(0); 331 332 n->string = malloc(len + 1); 333 if (NULL == n->string) { 334 free(n); 335 return(0); 336 } 337 338 sv = strlcpy(n->string, p, len + 1); 339 340 /* Prohibit truncation. */ 341 assert(sv < len + 1); 342 343 if ( ! man_node_append(m, n)) 344 return(0); 345 m->next = MAN_NEXT_SIBLING; 346 return(1); 347 } 348 349 350 int 351 man_word_alloc(struct man *m, int line, int pos, const char *word) 352 { 353 354 return(pstring(m, line, pos, word, strlen(word))); 355 } 356 357 358 void 359 man_node_free(struct man_node *p) 360 { 361 362 if (p->string) 363 free(p->string); 364 if (p->parent) 365 p->parent->nchild--; 366 free(p); 367 } 368 369 370 void 371 man_node_freelist(struct man_node *p) 372 { 373 struct man_node *n; 374 375 if (p->child) 376 man_node_freelist(p->child); 377 assert(0 == p->nchild); 378 n = p->next; 379 man_node_free(p); 380 if (n) 381 man_node_freelist(n); 382 } 383 384 385 static int 386 man_ptext(struct man *m, int line, char *buf) 387 { 388 int i, j; 389 390 /* Literal free-form text whitespace is preserved. */ 391 392 if (MAN_LITERAL & m->flags) { 393 if ( ! man_word_alloc(m, line, 0, buf)) 394 return(0); 395 goto descope; 396 } 397 398 /* First de-chunk and allocate words. */ 399 400 for (i = 0; ' ' == buf[i]; i++) 401 /* Skip leading whitespace. */ ; 402 if (0 == buf[i]) { 403 if ( ! pstring(m, line, 0, &buf[i], 0)) 404 return(0); 405 goto descope; 406 } 407 408 for (j = i; buf[i]; i++) { 409 if (' ' != buf[i]) 410 continue; 411 412 /* Escaped whitespace. */ 413 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 414 continue; 415 416 buf[i++] = 0; 417 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 418 return(0); 419 420 for ( ; ' ' == buf[i]; i++) 421 /* Skip trailing whitespace. */ ; 422 423 j = i; 424 if (0 == buf[i]) 425 break; 426 } 427 428 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 429 return(0); 430 431 descope: 432 433 /* 434 * Co-ordinate what happens with having a next-line scope open: 435 * first close out the element scope (if applicable), then close 436 * out the block scope (also if applicable). 437 */ 438 439 if (MAN_ELINE & m->flags) { 440 m->flags &= ~MAN_ELINE; 441 if ( ! man_unscope(m, m->last->parent)) 442 return(0); 443 } 444 445 if ( ! (MAN_BLINE & m->flags)) 446 return(1); 447 m->flags &= ~MAN_BLINE; 448 449 if ( ! man_unscope(m, m->last->parent)) 450 return(0); 451 return(man_body_alloc(m, line, 0, m->last->tok)); 452 } 453 454 455 static int 456 macrowarn(struct man *m, int ln, const char *buf) 457 { 458 if ( ! (MAN_IGN_MACRO & m->pflags)) 459 return(man_verr(m, ln, 0, 460 "unknown macro: %s%s", 461 buf, strlen(buf) > 3 ? "..." : "")); 462 return(man_vwarn(m, ln, 0, "unknown macro: %s%s", 463 buf, strlen(buf) > 3 ? "..." : "")); 464 } 465 466 467 int 468 man_pmacro(struct man *m, int ln, char *buf) 469 { 470 int i, j, c, ppos, fl; 471 char mac[5]; 472 struct man_node *n; 473 474 /* Comments and empties are quickly ignored. */ 475 476 fl = m->flags; 477 478 if (0 == buf[1]) 479 goto out; 480 481 i = 1; 482 483 if (' ' == buf[i]) { 484 i++; 485 while (buf[i] && ' ' == buf[i]) 486 i++; 487 if (0 == buf[i]) 488 goto out; 489 } 490 491 ppos = i; 492 493 /* Copy the first word into a nil-terminated buffer. */ 494 495 for (j = 0; j < 4; j++, i++) { 496 if (0 == (mac[j] = buf[i])) 497 break; 498 else if (' ' == buf[i]) 499 break; 500 501 /* Check for invalid characters. */ 502 503 if (isgraph((u_char)buf[i])) 504 continue; 505 return(man_perr(m, ln, i, WNPRINT)); 506 } 507 508 mac[j] = 0; 509 510 if (j == 4 || j < 1) { 511 if ( ! (MAN_IGN_MACRO & m->pflags)) { 512 (void)man_perr(m, ln, ppos, WMACROFORM); 513 goto err; 514 } 515 if ( ! man_pwarn(m, ln, ppos, WMACROFORM)) 516 goto err; 517 return(1); 518 } 519 520 if (MAN_MAX == (c = man_hash_find(mac))) { 521 if ( ! macrowarn(m, ln, mac)) 522 goto err; 523 return(1); 524 } 525 526 /* The macro is sane. Jump to the next word. */ 527 528 while (buf[i] && ' ' == buf[i]) 529 i++; 530 531 /* Remove prior ELINE macro, if applicable. */ 532 533 if (m->flags & MAN_ELINE) { 534 n = m->last; 535 assert(NULL == n->child); 536 assert(0 == n->nchild); 537 if ( ! man_nwarn(m, n, WLNSCOPE)) 538 return(0); 539 540 if (n->prev) { 541 assert(n != n->parent->child); 542 assert(n == n->prev->next); 543 n->prev->next = NULL; 544 m->last = n->prev; 545 m->next = MAN_NEXT_SIBLING; 546 } else { 547 assert(n == n->parent->child); 548 n->parent->child = NULL; 549 m->last = n->parent; 550 m->next = MAN_NEXT_CHILD; 551 } 552 553 man_node_free(n); 554 m->flags &= ~MAN_ELINE; 555 } 556 557 /* Begin recursive parse sequence. */ 558 559 assert(man_macros[c].fp); 560 561 if ( ! (*man_macros[c].fp)(m, c, ln, ppos, &i, buf)) 562 goto err; 563 564 out: 565 if ( ! (MAN_BLINE & fl)) 566 return(1); 567 568 /* 569 * If we've opened a new next-line element scope, then return 570 * now, as the next line will close out the block scope. 571 */ 572 573 if (MAN_ELINE & m->flags) 574 return(1); 575 576 /* Close out the block scope opened in the prior line. */ 577 578 assert(MAN_BLINE & m->flags); 579 m->flags &= ~MAN_BLINE; 580 581 if ( ! man_unscope(m, m->last->parent)) 582 return(0); 583 return(man_body_alloc(m, ln, 0, m->last->tok)); 584 585 err: /* Error out. */ 586 587 m->flags |= MAN_HALT; 588 return(0); 589 } 590 591 592 int 593 man_verr(struct man *man, int ln, int pos, const char *fmt, ...) 594 { 595 char buf[256]; 596 va_list ap; 597 598 if (NULL == man->cb.man_err) 599 return(0); 600 601 va_start(ap, fmt); 602 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 603 va_end(ap); 604 return((*man->cb.man_err)(man->data, ln, pos, buf)); 605 } 606 607 608 int 609 man_vwarn(struct man *man, int ln, int pos, const char *fmt, ...) 610 { 611 char buf[256]; 612 va_list ap; 613 614 if (NULL == man->cb.man_warn) 615 return(0); 616 617 va_start(ap, fmt); 618 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 619 va_end(ap); 620 return((*man->cb.man_warn)(man->data, ln, pos, buf)); 621 } 622 623 624 int 625 man_err(struct man *m, int line, int pos, int iserr, enum merr type) 626 { 627 const char *p; 628 629 p = __man_merrnames[(int)type]; 630 assert(p); 631 632 if (iserr) 633 return(man_verr(m, line, pos, p)); 634 635 return(man_vwarn(m, line, pos, p)); 636 } 637