1 /* $Id: mdoc.c,v 1.1 2009/04/06 20:30:40 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the 7 * above copyright notice and this permission notice appear in all 8 * copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 17 * PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <assert.h> 20 #include <ctype.h> 21 #include <stdarg.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "libmdoc.h" 27 28 enum merr { 29 ENOCALL, 30 EBODYPROL, 31 EPROLBODY, 32 ESPACE, 33 ETEXTPROL, 34 ENOBLANK, 35 EMALLOC 36 }; 37 38 const char *const __mdoc_macronames[MDOC_MAX] = { 39 "\\\"", "Dd", "Dt", "Os", 40 "Sh", "Ss", "Pp", "D1", 41 "Dl", "Bd", "Ed", "Bl", 42 "El", "It", "Ad", "An", 43 "Ar", "Cd", "Cm", "Dv", 44 "Er", "Ev", "Ex", "Fa", 45 "Fd", "Fl", "Fn", "Ft", 46 "Ic", "In", "Li", "Nd", 47 "Nm", "Op", "Ot", "Pa", 48 "Rv", "St", "Va", "Vt", 49 /* LINTED */ 50 "Xr", "\%A", "\%B", "\%D", 51 /* LINTED */ 52 "\%I", "\%J", "\%N", "\%O", 53 /* LINTED */ 54 "\%P", "\%R", "\%T", "\%V", 55 "Ac", "Ao", "Aq", "At", 56 "Bc", "Bf", "Bo", "Bq", 57 "Bsx", "Bx", "Db", "Dc", 58 "Do", "Dq", "Ec", "Ef", 59 "Em", "Eo", "Fx", "Ms", 60 "No", "Ns", "Nx", "Ox", 61 "Pc", "Pf", "Po", "Pq", 62 "Qc", "Ql", "Qo", "Qq", 63 "Re", "Rs", "Sc", "So", 64 "Sq", "Sm", "Sx", "Sy", 65 "Tn", "Ux", "Xc", "Xo", 66 "Fo", "Fc", "Oo", "Oc", 67 "Bk", "Ek", "Bt", "Hf", 68 "Fr", "Ud", "Lb", "Ap", 69 "Lp", "Lk", "Mt", "Brq", 70 /* LINTED */ 71 "Bro", "Brc", "\%C", "Es", 72 /* LINTED */ 73 "En", "Dx", "\%Q" 74 }; 75 76 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 77 "split", "nosplit", "ragged", 78 "unfilled", "literal", "file", 79 "offset", "bullet", "dash", 80 "hyphen", "item", "enum", 81 "tag", "diag", "hang", 82 "ohang", "inset", "column", 83 "width", "compact", "std", 84 "filled", "words", "emphasis", 85 "symbolic", "nested" 86 }; 87 88 const char * const *mdoc_macronames = __mdoc_macronames; 89 const char * const *mdoc_argnames = __mdoc_argnames; 90 91 static void mdoc_free1(struct mdoc *); 92 static int mdoc_alloc1(struct mdoc *); 93 static struct mdoc_node *node_alloc(struct mdoc *, int, int, 94 int, enum mdoc_type); 95 static int node_append(struct mdoc *, 96 struct mdoc_node *); 97 static int parsetext(struct mdoc *, int, char *); 98 static int parsemacro(struct mdoc *, int, char *); 99 static int macrowarn(struct mdoc *, int, const char *); 100 static int perr(struct mdoc *, int, int, enum merr); 101 102 #define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t)) 103 104 /* 105 * Get the first (root) node of the parse tree. 106 */ 107 const struct mdoc_node * 108 mdoc_node(const struct mdoc *m) 109 { 110 111 return(MDOC_HALT & m->flags ? NULL : m->first); 112 } 113 114 115 const struct mdoc_meta * 116 mdoc_meta(const struct mdoc *m) 117 { 118 119 return(MDOC_HALT & m->flags ? NULL : &m->meta); 120 } 121 122 123 static void 124 mdoc_free1(struct mdoc *mdoc) 125 { 126 127 if (mdoc->first) 128 mdoc_node_freelist(mdoc->first); 129 if (mdoc->meta.title) 130 free(mdoc->meta.title); 131 if (mdoc->meta.os) 132 free(mdoc->meta.os); 133 if (mdoc->meta.name) 134 free(mdoc->meta.name); 135 if (mdoc->meta.arch) 136 free(mdoc->meta.arch); 137 if (mdoc->meta.vol) 138 free(mdoc->meta.vol); 139 } 140 141 142 static int 143 mdoc_alloc1(struct mdoc *mdoc) 144 { 145 146 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 147 mdoc->flags = 0; 148 mdoc->lastnamed = mdoc->lastsec = 0; 149 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 150 if (NULL == mdoc->last) 151 return(0); 152 153 mdoc->first = mdoc->last; 154 mdoc->last->type = MDOC_ROOT; 155 mdoc->next = MDOC_NEXT_CHILD; 156 return(1); 157 } 158 159 160 /* 161 * Free up all resources contributed by a parse: the node tree, 162 * meta-data and so on. Then reallocate the root node for another 163 * parse. 164 */ 165 int 166 mdoc_reset(struct mdoc *mdoc) 167 { 168 169 mdoc_free1(mdoc); 170 return(mdoc_alloc1(mdoc)); 171 } 172 173 174 /* 175 * Completely free up all resources. 176 */ 177 void 178 mdoc_free(struct mdoc *mdoc) 179 { 180 181 mdoc_free1(mdoc); 182 if (mdoc->htab) 183 mdoc_hash_free(mdoc->htab); 184 free(mdoc); 185 } 186 187 188 struct mdoc * 189 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 190 { 191 struct mdoc *p; 192 193 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 194 return(NULL); 195 if (cb) 196 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 197 198 p->data = data; 199 p->pflags = pflags; 200 201 if (NULL == (p->htab = mdoc_hash_alloc())) { 202 free(p); 203 return(NULL); 204 } else if (mdoc_alloc1(p)) 205 return(p); 206 207 free(p); 208 return(NULL); 209 } 210 211 212 /* 213 * Climb back up the parse tree, validating open scopes. Mostly calls 214 * through to macro_end in macro.c. 215 */ 216 int 217 mdoc_endparse(struct mdoc *m) 218 { 219 220 if (MDOC_HALT & m->flags) 221 return(0); 222 else if (mdoc_macroend(m)) 223 return(1); 224 m->flags |= MDOC_HALT; 225 return(0); 226 } 227 228 229 /* 230 * Main parse routine. Parses a single line -- really just hands off to 231 * the macro or text parser. 232 */ 233 int 234 mdoc_parseln(struct mdoc *m, int ln, char *buf) 235 { 236 237 /* If in error-mode, then we parse no more. */ 238 239 if (MDOC_HALT & m->flags) 240 return(0); 241 242 return('.' == *buf ? parsemacro(m, ln, buf) : 243 parsetext(m, ln, buf)); 244 } 245 246 247 void 248 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 249 { 250 char buf[256]; 251 va_list ap; 252 253 if (NULL == mdoc->cb.mdoc_msg) 254 return; 255 256 va_start(ap, fmt); 257 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 258 va_end(ap); 259 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf); 260 } 261 262 263 int 264 mdoc_verr(struct mdoc *mdoc, int ln, int pos, 265 const char *fmt, ...) 266 { 267 char buf[256]; 268 va_list ap; 269 270 if (NULL == mdoc->cb.mdoc_err) 271 return(0); 272 273 va_start(ap, fmt); 274 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 275 va_end(ap); 276 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 277 } 278 279 280 int 281 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, 282 enum mdoc_warn type, const char *fmt, ...) 283 { 284 char buf[256]; 285 va_list ap; 286 287 if (NULL == mdoc->cb.mdoc_warn) 288 return(0); 289 290 va_start(ap, fmt); 291 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 292 va_end(ap); 293 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf)); 294 } 295 296 297 int 298 mdoc_macro(struct mdoc *m, int tok, 299 int ln, int pp, int *pos, char *buf) 300 { 301 302 /* FIXME - these should happen during validation. */ 303 304 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 305 SEC_PROLOGUE != m->lastnamed) 306 return(perr(m, ln, pp, EPROLBODY)); 307 308 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 309 SEC_PROLOGUE == m->lastnamed) 310 return(perr(m, ln, pp, EBODYPROL)); 311 312 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) 313 return(perr(m, ln, pp, ENOCALL)); 314 315 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 316 } 317 318 319 static int 320 perr(struct mdoc *m, int line, int pos, enum merr type) 321 { 322 char *p; 323 324 p = NULL; 325 switch (type) { 326 case (ENOCALL): 327 p = "not callable"; 328 break; 329 case (EPROLBODY): 330 p = "macro disallowed in document body"; 331 break; 332 case (EBODYPROL): 333 p = "macro disallowed in document prologue"; 334 break; 335 case (EMALLOC): 336 p = "memory exhausted"; 337 break; 338 case (ETEXTPROL): 339 p = "text disallowed in document prologue"; 340 break; 341 case (ENOBLANK): 342 p = "blank lines disallowed in non-literal contexts"; 343 break; 344 case (ESPACE): 345 p = "whitespace disallowed after delimiter"; 346 break; 347 } 348 assert(p); 349 return(mdoc_perr(m, line, pos, p)); 350 } 351 352 353 static int 354 node_append(struct mdoc *mdoc, struct mdoc_node *p) 355 { 356 357 assert(mdoc->last); 358 assert(mdoc->first); 359 assert(MDOC_ROOT != p->type); 360 361 switch (mdoc->next) { 362 case (MDOC_NEXT_SIBLING): 363 mdoc->last->next = p; 364 p->prev = mdoc->last; 365 p->parent = mdoc->last->parent; 366 break; 367 case (MDOC_NEXT_CHILD): 368 mdoc->last->child = p; 369 p->parent = mdoc->last; 370 break; 371 default: 372 abort(); 373 /* NOTREACHED */ 374 } 375 376 if ( ! mdoc_valid_pre(mdoc, p)) 377 return(0); 378 if ( ! mdoc_action_pre(mdoc, p)) 379 return(0); 380 381 switch (p->type) { 382 case (MDOC_HEAD): 383 assert(MDOC_BLOCK == p->parent->type); 384 p->parent->head = p; 385 break; 386 case (MDOC_TAIL): 387 assert(MDOC_BLOCK == p->parent->type); 388 p->parent->tail = p; 389 break; 390 case (MDOC_BODY): 391 assert(MDOC_BLOCK == p->parent->type); 392 p->parent->body = p; 393 break; 394 default: 395 break; 396 } 397 398 mdoc->last = p; 399 400 switch (p->type) { 401 case (MDOC_TEXT): 402 if ( ! mdoc_valid_post(mdoc)) 403 return(0); 404 if ( ! mdoc_action_post(mdoc)) 405 return(0); 406 break; 407 default: 408 break; 409 } 410 411 return(1); 412 } 413 414 415 static struct mdoc_node * 416 node_alloc(struct mdoc *mdoc, int line, 417 int pos, int tok, enum mdoc_type type) 418 { 419 struct mdoc_node *p; 420 421 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 422 (void)verr(mdoc, EMALLOC); 423 return(NULL); 424 } 425 426 p->sec = mdoc->lastsec; 427 p->line = line; 428 p->pos = pos; 429 p->tok = tok; 430 if (MDOC_TEXT != (p->type = type)) 431 assert(p->tok >= 0); 432 433 return(p); 434 } 435 436 437 int 438 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok) 439 { 440 struct mdoc_node *p; 441 442 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 443 if (NULL == p) 444 return(0); 445 return(node_append(mdoc, p)); 446 } 447 448 449 int 450 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok) 451 { 452 struct mdoc_node *p; 453 454 assert(mdoc->first); 455 assert(mdoc->last); 456 457 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 458 if (NULL == p) 459 return(0); 460 return(node_append(mdoc, p)); 461 } 462 463 464 int 465 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok) 466 { 467 struct mdoc_node *p; 468 469 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 470 if (NULL == p) 471 return(0); 472 return(node_append(mdoc, p)); 473 } 474 475 476 int 477 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 478 int tok, struct mdoc_arg *args) 479 { 480 struct mdoc_node *p; 481 482 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 483 if (NULL == p) 484 return(0); 485 if ((p->args = args)) 486 (args->refcnt)++; 487 return(node_append(mdoc, p)); 488 } 489 490 491 int 492 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 493 int tok, struct mdoc_arg *args) 494 { 495 struct mdoc_node *p; 496 497 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 498 if (NULL == p) 499 return(0); 500 if ((p->args = args)) 501 (args->refcnt)++; 502 return(node_append(mdoc, p)); 503 } 504 505 506 int 507 mdoc_word_alloc(struct mdoc *mdoc, 508 int line, int pos, const char *word) 509 { 510 struct mdoc_node *p; 511 512 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT); 513 if (NULL == p) 514 return(0); 515 if (NULL == (p->string = strdup(word))) { 516 (void)verr(mdoc, EMALLOC); 517 return(0); 518 } 519 return(node_append(mdoc, p)); 520 } 521 522 523 void 524 mdoc_node_free(struct mdoc_node *p) 525 { 526 527 if (p->string) 528 free(p->string); 529 if (p->args) 530 mdoc_argv_free(p->args); 531 free(p); 532 } 533 534 535 void 536 mdoc_node_freelist(struct mdoc_node *p) 537 { 538 539 if (p->child) 540 mdoc_node_freelist(p->child); 541 if (p->next) 542 mdoc_node_freelist(p->next); 543 544 mdoc_node_free(p); 545 } 546 547 548 /* 549 * Parse free-form text, that is, a line that does not begin with the 550 * control character. 551 */ 552 static int 553 parsetext(struct mdoc *m, int line, char *buf) 554 { 555 556 if (SEC_PROLOGUE == m->lastnamed) 557 return(perr(m, line, 0, ETEXTPROL)); 558 559 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags)) 560 return(perr(m, line, 0, ENOBLANK)); 561 562 if ( ! mdoc_word_alloc(m, line, 0, buf)) 563 return(0); 564 565 m->next = MDOC_NEXT_SIBLING; 566 return(1); 567 } 568 569 570 static int 571 macrowarn(struct mdoc *m, int ln, const char *buf) 572 { 573 if ( ! (MDOC_IGN_MACRO & m->pflags)) 574 return(mdoc_perr(m, ln, 1, 575 "unknown macro: %s%s", 576 buf, strlen(buf) > 3 ? "..." : "")); 577 return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX, 578 "unknown macro: %s%s", 579 buf, strlen(buf) > 3 ? "..." : "")); 580 } 581 582 583 584 /* 585 * Parse a macro line, that is, a line beginning with the control 586 * character. 587 */ 588 int 589 parsemacro(struct mdoc *m, int ln, char *buf) 590 { 591 int i, c; 592 char mac[5]; 593 594 /* Comments and empties are quickly ignored. */ 595 596 if (0 == buf[1]) 597 return(1); 598 599 if (' ' == buf[1]) { 600 i = 2; 601 while (buf[i] && ' ' == buf[i]) 602 i++; 603 if (0 == buf[i]) 604 return(1); 605 return(perr(m, ln, 1, ESPACE)); 606 } 607 608 if (buf[1] && '\\' == buf[1]) 609 if (buf[2] && '\"' == buf[2]) 610 return(1); 611 612 /* Copy the first word into a nil-terminated buffer. */ 613 614 for (i = 1; i < 5; i++) { 615 if (0 == (mac[i - 1] = buf[i])) 616 break; 617 else if (' ' == buf[i]) 618 break; 619 } 620 621 mac[i - 1] = 0; 622 623 if (i == 5 || i <= 2) { 624 if ( ! macrowarn(m, ln, mac)) 625 goto err; 626 return(1); 627 } 628 629 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) { 630 if ( ! macrowarn(m, ln, mac)) 631 goto err; 632 return(1); 633 } 634 635 /* The macro is sane. Jump to the next word. */ 636 637 while (buf[i] && ' ' == buf[i]) 638 i++; 639 640 /* Begin recursive parse sequence. */ 641 642 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 643 goto err; 644 645 return(1); 646 647 err: /* Error out. */ 648 649 m->flags |= MDOC_HALT; 650 return(0); 651 } 652