1 /* $Id: main.c,v 1.1 2016/03/30 21:30:20 christos Exp $ */ 2 /* 3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef __linux__ 18 #define _GNU_SOURCE 19 #endif 20 #include <sys/queue.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <err.h> 25 #include <getopt.h> 26 #include <search.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #ifdef __linux__ 31 #include <bsd/stdio.h> 32 #include <bsd/stdlib.h> 33 #include <bsd/string.h> 34 #endif 35 36 /* 37 * Phase of parsing input file. 38 */ 39 enum phase { 40 PHASE_INIT = 0, /* waiting to encounter definition */ 41 PHASE_KEYS, /* have definition, now keywords */ 42 PHASE_DESC, /* have keywords, now description */ 43 PHASE_SEEALSO, 44 PHASE_DECL /* have description, now declarations */ 45 }; 46 47 /* 48 * What kind of declaration (preliminary analysis). 49 */ 50 enum decltype { 51 DECLTYPE_CPP, /* pre-processor */ 52 DECLTYPE_C, /* semicolon-closed non-preprocessor */ 53 DECLTYPE_NEITHER /* non-preprocessor, no semicolon */ 54 }; 55 56 /* 57 * In variables and function declarations, we toss these. 58 */ 59 enum preproc { 60 PREPROC_SQLITE_API, 61 PREPROC_SQLITE_DEPRECATED, 62 PREPROC_SQLITE_EXPERIMENTAL, 63 PREPROC_SQLITE_EXTERN, 64 PREPROC__MAX 65 }; 66 67 /* 68 * HTML tags that we recognise. 69 */ 70 enum tag { 71 TAG_B_CLOSE, 72 TAG_B_OPEN, 73 TAG_BLOCK_CLOSE, 74 TAG_BLOCK_OPEN, 75 TAG_DD_CLOSE, 76 TAG_DD_OPEN, 77 TAG_DL_CLOSE, 78 TAG_DL_OPEN, 79 TAG_DT_CLOSE, 80 TAG_DT_OPEN, 81 TAG_H3_CLOSE, 82 TAG_H3_OPEN, 83 TAG_LI_CLOSE, 84 TAG_LI_OPEN, 85 TAG_OL_CLOSE, 86 TAG_OL_OPEN, 87 TAG_PRE_CLOSE, 88 TAG_PRE_OPEN, 89 TAG_UL_CLOSE, 90 TAG_UL_OPEN, 91 TAG__MAX 92 }; 93 94 TAILQ_HEAD(defnq, defn); 95 TAILQ_HEAD(declq, decl); 96 97 /* 98 * A declaration of type DECLTYPE_CPP or DECLTYPE_C. 99 * These need not be unique (if ifdef'd). 100 */ 101 struct decl { 102 enum decltype type; /* type of declaration */ 103 char *text; /* text */ 104 size_t textsz; /* strlen(text) */ 105 TAILQ_ENTRY(decl) entries; 106 }; 107 108 /* 109 * A definition is basically the manpage contents. 110 */ 111 struct defn { 112 char *name; /* really Nd */ 113 TAILQ_ENTRY(defn) entries; 114 char *desc; /* long description */ 115 size_t descsz; /* strlen(desc) */ 116 struct declq dcqhead; /* declarations */ 117 int multiline; /* used when parsing */ 118 int instruct; /* used when parsing */ 119 const char *fn; /* parsed from file */ 120 size_t ln; /* parsed at line */ 121 int postprocessed; /* good for emission? */ 122 char *dt; /* manpage title */ 123 char **nms; /* manpage names */ 124 size_t nmsz; /* number of names */ 125 char *fname; /* manpage filename */ 126 char *keybuf; /* raw keywords */ 127 size_t keybufsz; /* length of "keysbuf" */ 128 char *seealso; /* see also tags */ 129 size_t seealsosz; /* length of seealso */ 130 char **xrs; /* parsed "see also" references */ 131 size_t xrsz; /* number of references */ 132 char **keys; /* parsed keywords */ 133 size_t keysz; /* number of keywords */ 134 }; 135 136 /* 137 * Entire parse routine. 138 */ 139 struct parse { 140 enum phase phase; /* phase of parse */ 141 size_t ln; /* line number */ 142 const char *fn; /* open file */ 143 struct defnq dqhead; /* definitions */ 144 }; 145 146 /* 147 * How to handle HTML tags we find in the text. 148 */ 149 struct taginfo { 150 const char *html; /* HTML to key on */ 151 const char *mdoc; /* generate mdoc(7) */ 152 unsigned int flags; 153 #define TAGINFO_NOBR 0x01 /* follow w/space, not newline */ 154 #define TAGINFO_NOOP 0x02 /* just strip out */ 155 #define TAGINFO_NOSP 0x04 /* follow w/o space or newline */ 156 #define TAGINFO_INLINE 0x08 /* inline block (notused) */ 157 }; 158 159 static const struct taginfo tags[TAG__MAX] = { 160 { "</b>", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */ 161 { "<b>", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */ 162 { "</blockquote>", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */ 163 { "<blockquote>", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */ 164 { "</dd>", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */ 165 { "<dd>", "", TAGINFO_NOOP }, /* TAG_DD_OPEN */ 166 { "</dl>", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */ 167 { "<dl>", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */ 168 { "</dt>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */ 169 { "<dt>", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */ 170 { "</h3>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */ 171 { "<h3>", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */ 172 { "</li>", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */ 173 { "<li>", ".It", 0 }, /* TAG_LI_OPEN */ 174 { "</ol>", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */ 175 { "<ol>", ".Bl -enum", 0 }, /* TAG_OL_OPEN */ 176 { "</pre>", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */ 177 { "<pre>", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */ 178 { "</ul>", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */ 179 { "<ul>", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */ 180 }; 181 182 static const char *const preprocs[TAG__MAX] = { 183 "SQLITE_API", /* PREPROC_SQLITE_API */ 184 "SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */ 185 "SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */ 186 "SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */ 187 }; 188 189 /* Verbose reporting. */ 190 static int verbose; 191 /* Don't output any files: use stdout. */ 192 static int nofile; 193 194 static void 195 decl_function_add(struct parse *p, char **etext, 196 size_t *etextsz, const char *cp, size_t len) 197 { 198 199 if (' ' != (*etext)[*etextsz - 1]) { 200 *etext = realloc(*etext, *etextsz + 2); 201 if (NULL == *etext) 202 err(EXIT_FAILURE, "%s:%zu: " 203 "realloc", p->fn, p->ln); 204 (*etextsz)++; 205 strlcat(*etext, " ", *etextsz + 1); 206 } 207 *etext = realloc(*etext, *etextsz + len + 1); 208 if (NULL == *etext) 209 err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln); 210 memcpy(*etext + *etextsz, cp, len); 211 *etextsz += len; 212 (*etext)[*etextsz] = '\0'; 213 } 214 215 static void 216 decl_function_copy(struct parse *p, char **etext, 217 size_t *etextsz, const char *cp, size_t len) 218 { 219 220 *etext = malloc(len + 1); 221 if (NULL == *etext) 222 err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln); 223 memcpy(*etext, cp, len); 224 *etextsz = len; 225 (*etext)[*etextsz] = '\0'; 226 } 227 228 /* 229 * A C function (or variable, or whatever). 230 * This is more specifically any non-preprocessor text. 231 */ 232 static int 233 decl_function(struct parse *p, char *cp, size_t len) 234 { 235 char *ep, *ncp, *lcp, *rcp; 236 size_t nlen; 237 struct defn *d; 238 struct decl *e; 239 240 /* Fetch current interface definition. */ 241 d = TAILQ_LAST(&p->dqhead, defnq); 242 assert(NULL != d); 243 244 /* 245 * Since C tokens are semicolon-separated, we may be invoked any 246 * number of times per a single line. 247 */ 248 again: 249 while (isspace((int)*cp)) { 250 cp++; 251 len--; 252 } 253 if ('\0' == *cp) 254 return(1); 255 256 /* Whether we're a continuation clause. */ 257 if (d->multiline) { 258 /* This might be NULL if we're not a continuation. */ 259 e = TAILQ_LAST(&d->dcqhead, declq); 260 assert(DECLTYPE_C == e->type); 261 assert(NULL != e); 262 assert(NULL != e->text); 263 assert(e->textsz); 264 } else { 265 assert(0 == d->instruct); 266 e = calloc(1, sizeof(struct decl)); 267 e->type = DECLTYPE_C; 268 if (NULL == e) 269 err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); 270 TAILQ_INSERT_TAIL(&d->dcqhead, e, entries); 271 } 272 273 /* 274 * We begin by seeing if there's a semicolon on this line. 275 * If there is, we'll need to do some special handling. 276 */ 277 ep = strchr(cp, ';'); 278 lcp = strchr(cp, '{'); 279 rcp = strchr(cp, '}'); 280 281 /* We're only a partial statement (i.e., no closure). */ 282 if (NULL == ep && d->multiline) { 283 assert(NULL != e->text); 284 assert(e->textsz > 0); 285 /* Is a struct starting or ending here? */ 286 if (d->instruct && NULL != rcp) 287 d->instruct--; 288 else if (NULL != lcp) 289 d->instruct++; 290 decl_function_add(p, &e->text, &e->textsz, cp, len); 291 return(1); 292 } else if (NULL == ep && ! d->multiline) { 293 d->multiline = 1; 294 /* Is a structure starting in this line? */ 295 if (NULL != lcp && 296 (NULL == rcp || rcp < lcp)) 297 d->instruct++; 298 decl_function_copy(p, &e->text, &e->textsz, cp, len); 299 return(1); 300 } 301 302 /* Position ourselves after the semicolon. */ 303 assert(NULL != ep); 304 ncp = cp; 305 nlen = (ep - cp) + 1; 306 cp = ep + 1; 307 len -= nlen; 308 309 if (d->multiline) { 310 assert(NULL != e->text); 311 /* Don't stop the multi-line if we're in a struct. */ 312 if (0 == d->instruct) { 313 if (NULL == lcp || lcp > cp) 314 d->multiline = 0; 315 } else if (NULL != rcp && rcp < cp) 316 if (0 == --d->instruct) 317 d->multiline = 0; 318 decl_function_add(p, &e->text, &e->textsz, ncp, nlen); 319 } else { 320 assert(NULL == e->text); 321 if (NULL != lcp && lcp < cp) { 322 d->multiline = 1; 323 d->instruct++; 324 } 325 decl_function_copy(p, &e->text, &e->textsz, ncp, nlen); 326 } 327 328 goto again; 329 } 330 331 /* 332 * A definition is just #define followed by space followed by the name, 333 * then the value of that name. 334 * We ignore the latter. 335 * FIXME: this does not understand multi-line CPP, but I don't think 336 * there are any instances of that in sqlite.h. 337 */ 338 static int 339 decl_define(struct parse *p, char *cp, size_t len) 340 { 341 struct defn *d; 342 struct decl *e; 343 size_t sz; 344 345 while (isspace((int)*cp)) { 346 cp++; 347 len--; 348 } 349 if (0 == len) { 350 warnx("%s:%zu: empty pre-processor " 351 "constant", p->fn, p->ln); 352 return(1); 353 } 354 355 d = TAILQ_LAST(&p->dqhead, defnq); 356 assert(NULL != d); 357 358 /* 359 * We're parsing a preprocessor definition, but we're still 360 * waiting on a semicolon from a function definition. 361 * It might be a comment or an error. 362 */ 363 if (d->multiline) { 364 warnx("%s:%zu: multiline declaration " 365 "still open (harmless?)", p->fn, p->ln); 366 e = TAILQ_LAST(&d->dcqhead, declq); 367 assert(NULL != e); 368 e->type = DECLTYPE_NEITHER; 369 d->multiline = d->instruct = 0; 370 } 371 372 sz = 0; 373 while ('\0' != cp[sz] && ! isspace((int)cp[sz])) 374 sz++; 375 376 e = calloc(1, sizeof(struct decl)); 377 if (NULL == e) 378 err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); 379 e->type = DECLTYPE_CPP; 380 e->text = calloc(1, sz + 1); 381 if (NULL == e->text) 382 err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); 383 strlcpy(e->text, cp, sz + 1); 384 e->textsz = sz; 385 TAILQ_INSERT_TAIL(&d->dcqhead, e, entries); 386 return(1); 387 } 388 389 /* 390 * A declaration is a function, variable, preprocessor definition, or 391 * really anything else until we reach a blank line. 392 */ 393 static void 394 decl(struct parse *p, char *cp, size_t len) 395 { 396 struct defn *d; 397 struct decl *e; 398 399 while (isspace((int)*cp)) { 400 cp++; 401 len--; 402 } 403 404 /* Check closure. */ 405 if ('\0' == *cp) { 406 p->phase = PHASE_INIT; 407 /* Check multiline status. */ 408 d = TAILQ_LAST(&p->dqhead, defnq); 409 assert(NULL != d); 410 if (d->multiline) { 411 warnx("%s:%zu: multiline declaration " 412 "still open (harmless?)", p->fn, p->ln); 413 e = TAILQ_LAST(&d->dcqhead, declq); 414 assert(NULL != e); 415 e->type = DECLTYPE_NEITHER; 416 d->multiline = d->instruct = 0; 417 } 418 return; 419 } 420 421 /* 422 * Catch preprocessor defines, but discard all other types of 423 * preprocessor statements. 424 */ 425 if ('#' == *cp) { 426 len--; 427 cp++; 428 while (isspace((int)*cp)) { 429 len--; 430 cp++; 431 } 432 if (0 == strncmp(cp, "define", 6)) 433 decl_define(p, cp + 6, len - 6); 434 return; 435 } 436 437 decl_function(p, cp, len); 438 } 439 440 /* 441 * Parse "SEE ALSO" phrases, which can come at any point in the 442 * interface description (unlike what they claim). 443 */ 444 static void 445 seealso(struct parse *p, char *cp, size_t len) 446 { 447 struct defn *d; 448 449 if ('\0' == *cp) { 450 warnx("%s:%zu: warn: unexpected end of " 451 "interface description", p->fn, p->ln); 452 p->phase = PHASE_INIT; 453 return; 454 } else if (0 == strcmp(cp, "*/")) { 455 p->phase = PHASE_DECL; 456 return; 457 } else if ('*' != cp[0] || '*' != cp[1]) { 458 warnx("%s:%zu: warn: unexpected end of " 459 "interface description", p->fn, p->ln); 460 p->phase = PHASE_INIT; 461 return; 462 } 463 464 cp += 2; 465 len -= 2; 466 while (isspace((int)*cp)) { 467 cp++; 468 len--; 469 } 470 471 /* Blank line: back to description part. */ 472 if (0 == len) { 473 p->phase = PHASE_DESC; 474 return; 475 } 476 477 /* Fetch current interface definition. */ 478 d = TAILQ_LAST(&p->dqhead, defnq); 479 assert(NULL != d); 480 481 d->seealso = realloc(d->seealso, 482 d->seealsosz + len + 1); 483 memcpy(d->seealso + d->seealsosz, cp, len); 484 d->seealsosz += len; 485 d->seealso[d->seealsosz] = '\0'; 486 } 487 488 /* 489 * A definition description is a block of text that we'll later format 490 * in mdoc(7). 491 * It extends from the name of the definition down to the declarations 492 * themselves. 493 */ 494 static void 495 desc(struct parse *p, char *cp, size_t len) 496 { 497 struct defn *d; 498 size_t nsz; 499 500 if ('\0' == *cp) { 501 warnx("%s:%zu: warn: unexpected end of " 502 "interface description", p->fn, p->ln); 503 p->phase = PHASE_INIT; 504 return; 505 } else if (0 == strcmp(cp, "*/")) { 506 /* End of comment area, start of declarations. */ 507 p->phase = PHASE_DECL; 508 return; 509 } else if ('*' != cp[0] || '*' != cp[1]) { 510 warnx("%s:%zu: warn: unexpected end of " 511 "interface description", p->fn, p->ln); 512 p->phase = PHASE_INIT; 513 return; 514 } 515 516 cp += 2; 517 len -= 2; 518 519 while (isspace((int)*cp)) { 520 cp++; 521 len--; 522 } 523 524 /* Fetch current interface definition. */ 525 d = TAILQ_LAST(&p->dqhead, defnq); 526 assert(NULL != d); 527 528 /* Ignore leading blank lines. */ 529 if (0 == len && NULL == d->desc) 530 return; 531 532 /* Collect SEE ALSO clauses. */ 533 if (0 == strncasecmp(cp, "see also:", 9)) { 534 cp += 9; 535 len -= 9; 536 while (isspace((int)*cp)) { 537 cp++; 538 len--; 539 } 540 p->phase = PHASE_SEEALSO; 541 d->seealso = realloc(d->seealso, 542 d->seealsosz + len + 1); 543 memcpy(d->seealso + d->seealsosz, cp, len); 544 d->seealsosz += len; 545 d->seealso[d->seealsosz] = '\0'; 546 return; 547 } 548 549 /* White-space padding between lines. */ 550 if (NULL != d->desc && 551 ' ' != d->desc[d->descsz - 1] && 552 '\n' != d->desc[d->descsz - 1]) { 553 d->desc = realloc(d->desc, d->descsz + 2); 554 if (NULL == d->desc) 555 err(EXIT_FAILURE, "%s:%zu: realloc", 556 p->fn, p->ln); 557 d->descsz++; 558 strlcat(d->desc, " ", d->descsz + 1); 559 } 560 561 /* Either append the line of a newline, if blank. */ 562 nsz = 0 == len ? 1 : len; 563 if (NULL == d->desc) { 564 d->desc = calloc(1, nsz + 1); 565 if (NULL == d->desc) 566 err(EXIT_FAILURE, "%s:%zu: calloc", 567 p->fn, p->ln); 568 } else { 569 d->desc = realloc(d->desc, d->descsz + nsz + 1); 570 if (NULL == d->desc) 571 err(EXIT_FAILURE, "%s:%zu: realloc", 572 p->fn, p->ln); 573 } 574 d->descsz += nsz; 575 strlcat(d->desc, 0 == len ? "\n" : cp, d->descsz + 1); 576 } 577 578 /* 579 * Copy all KEYWORDS into a buffer. 580 */ 581 static void 582 keys(struct parse *p, char *cp, size_t len) 583 { 584 struct defn *d; 585 586 if ('\0' == *cp) { 587 warnx("%s:%zu: warn: unexpected end of " 588 "interface keywords", p->fn, p->ln); 589 p->phase = PHASE_INIT; 590 return; 591 } else if (0 == strcmp(cp, "*/")) { 592 /* End of comment area, start of declarations. */ 593 p->phase = PHASE_DECL; 594 return; 595 } else if ('*' != cp[0] || '*' != cp[1]) { 596 if ('\0' != cp[1]) { 597 warnx("%s:%zu: warn: unexpected end of " 598 "interface keywords", p->fn, p->ln); 599 p->phase = PHASE_INIT; 600 return; 601 } else 602 warnx("%s:%zu: warn: workaround in effect " 603 "for unexpected end of " 604 "interface keywords", p->fn, p->ln); 605 } 606 607 cp += 2; 608 len -= 2; 609 while (isspace((int)*cp)) { 610 cp++; 611 len--; 612 } 613 614 if (0 == len) { 615 p->phase = PHASE_DESC; 616 return; 617 } else if (strncmp(cp, "KEYWORDS:", 9)) 618 return; 619 620 cp += 9; 621 len -= 9; 622 623 d = TAILQ_LAST(&p->dqhead, defnq); 624 assert(NULL != d); 625 d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1); 626 if (NULL == d->keybuf) 627 err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln); 628 memcpy(d->keybuf + d->keybufsz, cp, len); 629 d->keybufsz += len; 630 d->keybuf[d->keybufsz] = '\0'; 631 } 632 633 /* 634 * Initial state is where we're scanning forward to find commented 635 * instances of CAPI3REF. 636 */ 637 static void 638 init(struct parse *p, char *cp) 639 { 640 struct defn *d; 641 642 /* Look for comment hook. */ 643 if ('*' != cp[0] || '*' != cp[1]) 644 return; 645 cp += 2; 646 while (isspace((int)*cp)) 647 cp++; 648 649 /* Look for beginning of definition. */ 650 if (strncmp(cp, "CAPI3REF:", 9)) 651 return; 652 cp += 9; 653 while (isspace((int)*cp)) 654 cp++; 655 if ('\0' == *cp) { 656 warnx("%s:%zu: warn: unexpected end of " 657 "interface definition", p->fn, p->ln); 658 return; 659 } 660 661 /* Add definition to list of existing ones. */ 662 d = calloc(1, sizeof(struct defn)); 663 if (NULL == d) 664 err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); 665 d->name = strdup(cp); 666 if (NULL == d->name) 667 err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln); 668 d->fn = p->fn; 669 d->ln = p->ln; 670 p->phase = PHASE_KEYS; 671 TAILQ_INIT(&d->dcqhead); 672 TAILQ_INSERT_TAIL(&p->dqhead, d, entries); 673 } 674 675 #define BPOINT(_cp) \ 676 (';' == (_cp)[0] || \ 677 '[' == (_cp)[0] || \ 678 ('(' == (_cp)[0] && '*' != (_cp)[1]) || \ 679 ')' == (_cp)[0] || \ 680 '{' == (_cp)[0]) 681 682 /* 683 * Given a declaration (be it preprocessor or C), try to parse out a 684 * reasonable "name" for the affair. 685 * For a struct, for example, it'd be the struct name. 686 * For a typedef, it'd be the type name. 687 * For a function, it'd be the function name. 688 */ 689 static void 690 grok_name(const struct decl *e, 691 const char **start, size_t *sz) 692 { 693 const char *cp; 694 695 *start = NULL; 696 *sz = 0; 697 698 if (DECLTYPE_CPP != e->type) { 699 assert(';' == e->text[e->textsz - 1]); 700 cp = e->text; 701 do { 702 while (isspace((int)*cp)) 703 cp++; 704 if (BPOINT(cp)) 705 break; 706 /* Function pointers... */ 707 if ('(' == *cp) 708 cp++; 709 /* Pass over pointers. */ 710 while ('*' == *cp) 711 cp++; 712 *start = cp; 713 *sz = 0; 714 while ( ! isspace((int)*cp)) { 715 if (BPOINT(cp)) 716 break; 717 cp++; 718 (*sz)++; 719 } 720 } while ( ! BPOINT(cp)); 721 } else { 722 *sz = e->textsz; 723 *start = e->text; 724 } 725 } 726 727 static int 728 xrcmp(const void *p1, const void *p2) 729 { 730 const char *s1 = *(const char **)p1, 731 *s2 = *(const char **)p2; 732 733 return(strcasecmp(s1, s2)); 734 } 735 736 /* 737 * Extract information from the interface definition. 738 * Mark it as "postprocessed" on success. 739 */ 740 static void 741 postprocess(const char *prefix, struct defn *d) 742 { 743 struct decl *first; 744 const char *start; 745 size_t offs, sz, i; 746 ENTRY ent; 747 748 if (TAILQ_EMPTY(&d->dcqhead)) 749 return; 750 751 /* Find the first #define or declaration. */ 752 TAILQ_FOREACH(first, &d->dcqhead, entries) 753 if (DECLTYPE_CPP == first->type || 754 DECLTYPE_C == first->type) 755 break; 756 757 if (NULL == first) { 758 warnx("%s:%zu: no entry to document", d->fn, d->ln); 759 return; 760 } 761 762 /* 763 * Now compute the document name (`Dt'). 764 * We'll also use this for the filename. 765 */ 766 grok_name(first, &start, &sz); 767 if (NULL == start) { 768 warnx("%s:%zu: couldn't deduce " 769 "entry name", d->fn, d->ln); 770 return; 771 } 772 773 /* Document name needs all-caps. */ 774 d->dt = malloc(sz + 1); 775 if (NULL == d->dt) 776 err(EXIT_FAILURE, "malloc"); 777 memcpy(d->dt, start, sz); 778 d->dt[sz] = '\0'; 779 for (i = 0; i < sz; i++) 780 d->dt[i] = toupper((int)d->dt[i]); 781 782 /* Filename needs no special chars. */ 783 asprintf(&d->fname, "%s/%.*s.3", 784 prefix, (int)sz, start); 785 if (NULL == d->fname) 786 err(EXIT_FAILURE, "asprintf"); 787 788 offs = strlen(prefix) + 1; 789 for (i = 0; i < sz; i++) { 790 if (isalnum((int)d->fname[offs + i]) || 791 '_' == d->fname[offs + i] || 792 '-' == d->fname[offs + i]) 793 continue; 794 d->fname[offs + i] = '_'; 795 } 796 797 /* 798 * First, extract all keywords. 799 */ 800 for (i = 0; i < d->keybufsz; ) { 801 while (isspace((int)d->keybuf[i])) 802 i++; 803 if (i == d->keybufsz) 804 break; 805 sz = 0; 806 start = &d->keybuf[i]; 807 if ('{' == d->keybuf[i]) { 808 start = &d->keybuf[++i]; 809 for ( ; i < d->keybufsz; i++, sz++) 810 if ('}' == d->keybuf[i]) 811 break; 812 if ('}' == d->keybuf[i]) 813 i++; 814 } else 815 for ( ; i < d->keybufsz; i++, sz++) 816 if (isspace((int)d->keybuf[i])) 817 break; 818 if (0 == sz) 819 continue; 820 d->keys = realloc(d->keys, 821 (d->keysz + 1) * sizeof(char *)); 822 if (NULL == d->keys) 823 err(EXIT_FAILURE, "realloc"); 824 d->keys[d->keysz] = malloc(sz + 1); 825 if (NULL == d->keys[d->keysz]) 826 err(EXIT_FAILURE, "malloc"); 827 memcpy(d->keys[d->keysz], start, sz); 828 d->keys[d->keysz][sz] = '\0'; 829 d->keysz++; 830 831 /* Hash the keyword. */ 832 ent.key = d->keys[d->keysz - 1]; 833 ent.data = d; 834 (void)hsearch(ent, ENTER); 835 } 836 837 /* 838 * Now extract all `Nm' values for this document. 839 * We only use CPP and C references, and hope for the best when 840 * doing so. 841 * Enter each one of these as a searchable keyword. 842 */ 843 TAILQ_FOREACH(first, &d->dcqhead, entries) { 844 if (DECLTYPE_CPP != first->type && 845 DECLTYPE_C != first->type) 846 continue; 847 grok_name(first, &start, &sz); 848 if (NULL == start) 849 continue; 850 d->nms = realloc(d->nms, 851 (d->nmsz + 1) * sizeof(char *)); 852 if (NULL == d->nms) 853 err(EXIT_FAILURE, "realloc"); 854 d->nms[d->nmsz] = malloc(sz + 1); 855 if (NULL == d->nms[d->nmsz]) 856 err(EXIT_FAILURE, "malloc"); 857 memcpy(d->nms[d->nmsz], start, sz); 858 d->nms[d->nmsz][sz] = '\0'; 859 d->nmsz++; 860 861 /* Hash the name. */ 862 ent.key = d->nms[d->nmsz - 1]; 863 ent.data = d; 864 (void)hsearch(ent, ENTER); 865 } 866 867 if (0 == d->nmsz) { 868 warnx("%s:%zu: couldn't deduce " 869 "any names", d->fn, d->ln); 870 return; 871 } 872 873 /* 874 * Next, scan for all `Xr' values. 875 * We'll add more to this list later. 876 */ 877 for (i = 0; i < d->seealsosz; i++) { 878 /* 879 * Find next value starting with `['. 880 * There's other stuff in there (whitespace or 881 * free text leading up to these) that we're ok 882 * to ignore. 883 */ 884 while (i < d->seealsosz && '[' != d->seealso[i]) 885 i++; 886 if (i == d->seealsosz) 887 break; 888 889 /* 890 * Now scan for the matching `]'. 891 * We can also have a vertical bar if we're separating a 892 * keyword and its shown name. 893 */ 894 start = &d->seealso[++i]; 895 sz = 0; 896 while (i < d->seealsosz && 897 ']' != d->seealso[i] && 898 '|' != d->seealso[i]) { 899 i++; 900 sz++; 901 } 902 if (i == d->seealsosz) 903 break; 904 if (0 == sz) 905 continue; 906 907 /* 908 * Continue on to the end-of-reference, if we weren't 909 * there to begin with. 910 */ 911 if (']' != d->seealso[i]) 912 while (i < d->seealsosz && 913 ']' != d->seealso[i]) 914 i++; 915 916 /* Strip trailing whitespace. */ 917 while (sz > 1 && ' ' == start[sz - 1]) 918 sz--; 919 920 /* Strip trailing parenthesis. */ 921 if (sz > 2 && 922 '(' == start[sz - 2] && 923 ')' == start[sz - 1]) 924 sz -= 2; 925 926 d->xrs = realloc(d->xrs, 927 (d->xrsz + 1) * sizeof(char *)); 928 if (NULL == d->xrs) 929 err(EXIT_FAILURE, "realloc"); 930 d->xrs[d->xrsz] = malloc(sz + 1); 931 if (NULL == d->xrs[d->xrsz]) 932 err(EXIT_FAILURE, "malloc"); 933 memcpy(d->xrs[d->xrsz], start, sz); 934 d->xrs[d->xrsz][sz] = '\0'; 935 d->xrsz++; 936 } 937 938 /* 939 * Next, extract all references. 940 * We'll accumulate these into a list of SEE ALSO tags, after. 941 * See how these are parsed above for a description: this is 942 * basically the same thing. 943 */ 944 for (i = 0; i < d->descsz; i++) { 945 if ('[' != d->desc[i]) 946 continue; 947 i++; 948 if ('[' == d->desc[i]) 949 continue; 950 951 start = &d->desc[i]; 952 for (sz = 0; i < d->descsz; i++, sz++) 953 if (']' == d->desc[i] || 954 '|' == d->desc[i]) 955 break; 956 957 if (i == d->descsz) 958 break; 959 else if (sz == 0) 960 continue; 961 962 if (']' != d->desc[i]) 963 while (i < d->descsz && 964 ']' != d->desc[i]) 965 i++; 966 967 while (sz > 1 && ' ' == start[sz - 1]) 968 sz--; 969 970 if (sz > 2 && 971 '(' == start[sz - 2] && 972 ')' == start[sz - 1]) 973 sz -= 2; 974 975 d->xrs = realloc(d->xrs, 976 (d->xrsz + 1) * sizeof(char *)); 977 if (NULL == d->xrs) 978 err(EXIT_FAILURE, "realloc"); 979 d->xrs[d->xrsz] = malloc(sz + 1); 980 if (NULL == d->xrs[d->xrsz]) 981 err(EXIT_FAILURE, "malloc"); 982 memcpy(d->xrs[d->xrsz], start, sz); 983 d->xrs[d->xrsz][sz] = '\0'; 984 d->xrsz++; 985 } 986 987 qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp); 988 d->postprocessed = 1; 989 } 990 991 /* 992 * Convenience function to look up a keyword. 993 * Returns the keyword's file if found or NULL. 994 */ 995 static const char * 996 lookup(char *key) 997 { 998 ENTRY ent; 999 ENTRY *res; 1000 struct defn *d; 1001 1002 ent.key = key; 1003 res = hsearch(ent, FIND); 1004 if (NULL == res) 1005 return(NULL); 1006 d = (struct defn *)res->data; 1007 if (0 == d->nmsz) 1008 return(NULL); 1009 assert(NULL != d->nms[0]); 1010 return(d->nms[0]); 1011 } 1012 1013 /* 1014 * Emit a valid mdoc(7) document within the given prefix. 1015 */ 1016 static void 1017 emit(const struct defn *d) 1018 { 1019 struct decl *first; 1020 size_t sz, i, col, last, ns; 1021 FILE *f; 1022 char *cp; 1023 const char *res, *lastres, *args, *str, *end; 1024 enum tag tag; 1025 enum preproc pre; 1026 1027 if ( ! d->postprocessed) { 1028 warnx("%s:%zu: interface has errors, not " 1029 "producing manpage", d->fn, d->ln); 1030 return; 1031 } 1032 1033 if (0 == nofile) { 1034 if (NULL == (f = fopen(d->fname, "w"))) { 1035 warn("%s: fopen", d->fname); 1036 return; 1037 } 1038 } else 1039 f = stdout; 1040 1041 /* Begin by outputting the mdoc(7) header. */ 1042 fputs(".Dd $" "Mdocdate$\n", f); 1043 fprintf(f, ".Dt %s 3\n", d->dt); 1044 fputs(".Os\n", f); 1045 fputs(".Sh NAME\n", f); 1046 1047 /* Now print the name bits of each declaration. */ 1048 for (i = 0; i < d->nmsz; i++) 1049 fprintf(f, ".Nm %s%s\n", d->nms[i], 1050 i < d->nmsz - 1 ? " ," : ""); 1051 1052 fprintf(f, ".Nd %s\n", d->name); 1053 fputs(".Sh SYNOPSIS\n", f); 1054 1055 TAILQ_FOREACH(first, &d->dcqhead, entries) { 1056 if (DECLTYPE_CPP != first->type && 1057 DECLTYPE_C != first->type) 1058 continue; 1059 1060 /* Easy: just print the CPP name. */ 1061 if (DECLTYPE_CPP == first->type) { 1062 fprintf(f, ".Fd #define %s\n", 1063 first->text); 1064 continue; 1065 } 1066 1067 /* First, strip out the sqlite CPPs. */ 1068 for (i = 0; i < first->textsz; ) { 1069 for (pre = 0; pre < PREPROC__MAX; pre++) { 1070 sz = strlen(preprocs[pre]); 1071 if (strncmp(preprocs[pre], 1072 &first->text[i], sz)) 1073 continue; 1074 i += sz; 1075 while (isspace((int)first->text[i])) 1076 i++; 1077 break; 1078 } 1079 if (pre == PREPROC__MAX) 1080 break; 1081 } 1082 1083 /* If we're a typedef, immediately print Vt. */ 1084 if (0 == strncmp(&first->text[i], "typedef", 7)) { 1085 fprintf(f, ".Vt %s\n", &first->text[i]); 1086 continue; 1087 } 1088 1089 /* Are we a struct? */ 1090 if (first->textsz > 2 && 1091 '}' == first->text[first->textsz - 2] && 1092 NULL != (cp = strchr(&first->text[i], '{'))) { 1093 *cp = '\0'; 1094 fprintf(f, ".Vt %s;\n", &first->text[i]); 1095 /* Restore brace for later usage. */ 1096 *cp = '{'; 1097 continue; 1098 } 1099 1100 /* Catch remaining non-functions. */ 1101 if (first->textsz > 2 && 1102 ')' != first->text[first->textsz - 2]) { 1103 fprintf(f, ".Vt %s\n", &first->text[i]); 1104 continue; 1105 } 1106 1107 str = &first->text[i]; 1108 if (NULL == (args = strchr(str, '('))) { 1109 /* What is this? */ 1110 fputs(".Bd -literal\n", f); 1111 fputs(&first->text[i], f); 1112 fputs("\n.Ed\n", f); 1113 continue; 1114 } 1115 1116 /* Scroll back to end of function name. */ 1117 end = args - 1; 1118 while (end > str && isspace((int)*end)) 1119 end--; 1120 1121 /* Scroll back to what comes before. */ 1122 for ( ; end > str; end--) 1123 if (isspace((int)*end) || '*' == *end) 1124 break; 1125 1126 /* 1127 * If we can't find what came before, then the function 1128 * has no type, which is odd... let's just call it void. 1129 */ 1130 if (end > str) { 1131 fprintf(f, ".Ft %.*s\n", 1132 (int)(end - str + 1), str); 1133 fprintf(f, ".Fo %.*s\n", 1134 (int)(args - end - 1), end + 1); 1135 } else { 1136 fputs(".Ft void\n", f); 1137 fprintf(f, ".Fo %.*s\n", (int)(args - end), end); 1138 } 1139 1140 /* 1141 * Convert function arguments into `Fa' clauses. 1142 * This also handles nested function pointers, which 1143 * would otherwise throw off the delimeters. 1144 */ 1145 for (;;) { 1146 str = ++args; 1147 while (isspace((int)*str)) 1148 str++; 1149 fputs(".Fa \"", f); 1150 ns = 0; 1151 while ('\0' != *str && 1152 (ns || ',' != *str) && 1153 (ns || ')' != *str)) { 1154 if ('/' == str[0] && '*' == str[1]) { 1155 str += 2; 1156 for ( ; '\0' != str[0]; str++) 1157 if ('*' == str[0] && '/' == str[1]) 1158 break; 1159 if ('\0' == *str) 1160 break; 1161 str += 2; 1162 while (isspace((int)*str)) 1163 str++; 1164 if ('\0' == *str || 1165 (0 == ns && ',' == *str) || 1166 (0 == ns && ')' == *str)) 1167 break; 1168 } 1169 if ('(' == *str) 1170 ns++; 1171 else if (')' == *str) 1172 ns--; 1173 fputc(*str, f); 1174 str++; 1175 } 1176 fputs("\"\n", f); 1177 if ('\0' == *str || ')' == *str) 1178 break; 1179 args = str; 1180 } 1181 1182 fputs(".Fc\n", f); 1183 } 1184 1185 fputs(".Sh DESCRIPTION\n", f); 1186 1187 /* 1188 * Strip the crap out of the description. 1189 * "Crap" consists of things I don't understand that mess up 1190 * parsing of the HTML, for instance, 1191 * <dl>[[foo bar]]<dt>foo bar</dt>...</dl> 1192 * These are not well-formed HTML. 1193 */ 1194 for (i = 0; i < d->descsz; i++) { 1195 if ('^' == d->desc[i] && 1196 '(' == d->desc[i + 1]) { 1197 d->desc[i] = d->desc[i + 1] = ' '; 1198 i++; 1199 continue; 1200 } else if (')' == d->desc[i] && 1201 '^' == d->desc[i + 1]) { 1202 d->desc[i] = d->desc[i + 1] = ' '; 1203 i++; 1204 continue; 1205 } else if ('^' == d->desc[i]) { 1206 d->desc[i] = ' '; 1207 continue; 1208 } else if ('[' != d->desc[i] || 1209 '[' != d->desc[i + 1]) 1210 continue; 1211 d->desc[i] = d->desc[i + 1] = ' '; 1212 for (i += 2; i < d->descsz; i++) { 1213 if (']' == d->desc[i] && 1214 ']' == d->desc[i + 1]) 1215 break; 1216 d->desc[i] = ' '; 1217 } 1218 if (i == d->descsz) 1219 continue; 1220 d->desc[i] = d->desc[i + 1] = ' '; 1221 i++; 1222 } 1223 1224 /* 1225 * Here we go! 1226 * Print out the description as best we can. 1227 * Do on-the-fly processing of any HTML we encounter into 1228 * mdoc(7) and try to break lines up. 1229 */ 1230 col = 0; 1231 for (i = 0; i < d->descsz; ) { 1232 /* 1233 * Newlines are paragraph breaks. 1234 * If we have multiple newlines, then keep to a single 1235 * `Pp' to keep it clean. 1236 * Only do this if we're not before a block-level HTML, 1237 * as this would mean, for instance, a `Pp'-`Bd' pair. 1238 */ 1239 if ('\n' == d->desc[i]) { 1240 while (isspace((int)d->desc[i])) 1241 i++; 1242 for (tag = 0; tag < TAG__MAX; tag++) { 1243 sz = strlen(tags[tag].html); 1244 if (0 == strncmp(&d->desc[i], tags[tag].html, sz)) 1245 break; 1246 } 1247 if (TAG__MAX == tag || 1248 TAGINFO_INLINE & tags[tag].flags) { 1249 if (col > 0) 1250 fputs("\n", f); 1251 fputs(".Pp\n", f); 1252 /* We're on a new line. */ 1253 col = 0; 1254 } 1255 continue; 1256 } 1257 1258 /* 1259 * New sentence, new line. 1260 * We guess whether this is the case by using the 1261 * dumbest possible heuristic. 1262 */ 1263 if (' ' == d->desc[i] && i && 1264 '.' == d->desc[i - 1]) { 1265 while (' ' == d->desc[i]) 1266 i++; 1267 fputs("\n", f); 1268 col = 0; 1269 continue; 1270 } 1271 /* 1272 * After 65 characters, force a break when we encounter 1273 * white-space to keep our lines more or less tidy. 1274 */ 1275 if (col > 65 && ' ' == d->desc[i]) { 1276 while (' ' == d->desc[i]) 1277 i++; 1278 fputs("\n", f); 1279 col = 0; 1280 continue; 1281 } 1282 1283 /* 1284 * Parsing HTML tags. 1285 * Why, sqlite guys, couldn't you have used something 1286 * like markdown or something? 1287 * Sheesh. 1288 */ 1289 if ('<' == d->desc[i]) { 1290 for (tag = 0; tag < TAG__MAX; tag++) { 1291 sz = strlen(tags[tag].html); 1292 if (strncmp(&d->desc[i], 1293 tags[tag].html, sz)) 1294 continue; 1295 /* 1296 * NOOP tags don't do anything, such as 1297 * the case of `</dd>', which only 1298 * serves to end an `It' block that will 1299 * be closed out by a subsequent `It' or 1300 * end of clause `El' anyway. 1301 * Skip the trailing space. 1302 */ 1303 if (TAGINFO_NOOP & tags[tag].flags) { 1304 i += sz; 1305 while (isspace((int)d->desc[i])) 1306 i++; 1307 break; 1308 } else if (TAGINFO_INLINE & tags[tag].flags) { 1309 fputs(tags[tag].mdoc, f); 1310 i += sz; 1311 break; 1312 } 1313 1314 /* 1315 * A breaking mdoc(7) statement. 1316 * Break the current line, output the 1317 * macro, and conditionally break 1318 * following that (or we might do 1319 * nothing at all). 1320 */ 1321 if (col > 0) { 1322 fputs("\n", f); 1323 col = 0; 1324 } 1325 fputs(tags[tag].mdoc, f); 1326 if ( ! (TAGINFO_NOBR & tags[tag].flags)) { 1327 fputs("\n", f); 1328 col = 0; 1329 } else if ( ! (TAGINFO_NOSP & tags[tag].flags)) { 1330 fputs(" ", f); 1331 col++; 1332 } 1333 i += sz; 1334 while (isspace((int)d->desc[i])) 1335 i++; 1336 break; 1337 } 1338 if (tag < TAG__MAX) 1339 continue; 1340 } else if ('[' == d->desc[i] && 1341 ']' != d->desc[i + 1]) { 1342 /* Do we start at the bracket or bar? */ 1343 for (sz = i + 1; sz < d->descsz; sz++) 1344 if ('|' == d->desc[sz] || 1345 ']' == d->desc[sz]) 1346 break; 1347 1348 if (sz == d->descsz) 1349 continue; 1350 else if ('|' == d->desc[sz]) 1351 i = sz + 1; 1352 else 1353 i = i + 1; 1354 1355 /* 1356 * Now handle in-page references. 1357 * Print them out as-is: we've already 1358 * accumulated them into our "SEE ALSO" values, 1359 * which we'll use below. 1360 */ 1361 for ( ; i < d->descsz; i++, col++) { 1362 if (']' == d->desc[i]) { 1363 i++; 1364 break; 1365 } 1366 fputc(d->desc[i], f); 1367 col++; 1368 } 1369 continue; 1370 } 1371 1372 if (' ' == d->desc[i] && 0 == col) { 1373 while (' ' == d->desc[i]) 1374 i++; 1375 continue; 1376 } 1377 1378 assert('\n' != d->desc[i]); 1379 1380 /* 1381 * Handle some oddities. 1382 * The following HTML escapes exist in the output that I 1383 * could find. 1384 * There might be others... 1385 */ 1386 if (0 == strncmp(&d->desc[i], " ", 6)) { 1387 i += 6; 1388 fputc(' ', f); 1389 } else if (0 == strncmp(&d->desc[i], "<", 4)) { 1390 i += 4; 1391 fputc('<', f); 1392 } else if (0 == strncmp(&d->desc[i], ">", 4)) { 1393 i += 4; 1394 fputc('>', f); 1395 } else if (0 == strncmp(&d->desc[i], "[", 5)) { 1396 i += 5; 1397 fputc('[', f); 1398 } else { 1399 /* Make sure we don't trigger a macro. */ 1400 if (0 == col && '.' == d->desc[i]) 1401 fputs("\\&", f); 1402 fputc(d->desc[i], f); 1403 i++; 1404 } 1405 1406 col++; 1407 } 1408 1409 if (col > 0) 1410 fputs("\n", f); 1411 1412 if (d->xrsz > 0) { 1413 /* 1414 * Look up all of our keywords (which are in the xrs 1415 * field) in the table of all known keywords. 1416 * Don't print duplicates. 1417 */ 1418 lastres = NULL; 1419 for (last = 0, i = 0; i < d->xrsz; i++) { 1420 res = lookup(d->xrs[i]); 1421 /* Ignore self-reference. */ 1422 if (res == d->nms[0] && verbose) 1423 warnx("%s:%zu: self-reference: %s", 1424 d->fn, d->ln, d->xrs[i]); 1425 if (res == d->nms[0] && verbose) 1426 continue; 1427 if (NULL == res && verbose) 1428 warnx("%s:%zu: ref not found: %s", 1429 d->fn, d->ln, d->xrs[i]); 1430 if (NULL == res) 1431 continue; 1432 1433 /* Ignore duplicates. */ 1434 if (NULL != lastres && lastres == res) 1435 continue; 1436 if (last) 1437 fputs(" ,\n", f); 1438 else 1439 fputs(".Sh SEE ALSO\n", f); 1440 fprintf(f, ".Xr %s 3", res); 1441 last = 1; 1442 lastres = res; 1443 } 1444 if (last) 1445 fputs("\n", f); 1446 } 1447 1448 if (0 == nofile) 1449 fclose(f); 1450 } 1451 1452 int 1453 main(int argc, char *argv[]) 1454 { 1455 size_t i, len; 1456 FILE *f; 1457 char *cp; 1458 const char *prefix; 1459 struct parse p; 1460 int rc, ch; 1461 struct defn *d; 1462 struct decl *e; 1463 1464 rc = 0; 1465 prefix = "."; 1466 f = stdin; 1467 memset(&p, 0, sizeof(struct parse)); 1468 p.fn = "<stdin>"; 1469 p.ln = 0; 1470 p.phase = PHASE_INIT; 1471 TAILQ_INIT(&p.dqhead); 1472 1473 while (-1 != (ch = getopt(argc, argv, "np:v"))) 1474 switch (ch) { 1475 case ('n'): 1476 nofile = 1; 1477 break; 1478 case ('p'): 1479 prefix = optarg; 1480 break; 1481 case ('v'): 1482 verbose = 1; 1483 break; 1484 default: 1485 goto usage; 1486 } 1487 1488 /* 1489 * Read in line-by-line and process in the phase dictated by our 1490 * finite state automaton. 1491 */ 1492 while (NULL != (cp = fgetln(f, &len))) { 1493 assert(len > 0); 1494 p.ln++; 1495 if ('\n' != cp[len - 1]) { 1496 warnx("%s:%zu: unterminated line", p.fn, p.ln); 1497 break; 1498 } 1499 cp[--len] = '\0'; 1500 /* Lines are always nil-terminated. */ 1501 switch (p.phase) { 1502 case (PHASE_INIT): 1503 init(&p, cp); 1504 break; 1505 case (PHASE_KEYS): 1506 keys(&p, cp, len); 1507 break; 1508 case (PHASE_DESC): 1509 desc(&p, cp, len); 1510 break; 1511 case (PHASE_SEEALSO): 1512 seealso(&p, cp, len); 1513 break; 1514 case (PHASE_DECL): 1515 decl(&p, cp, len); 1516 break; 1517 } 1518 } 1519 1520 /* 1521 * If we hit the last line, then try to process. 1522 * Otherwise, we failed along the way. 1523 */ 1524 if (NULL == cp) { 1525 /* 1526 * Allow us to be at the declarations or scanning for 1527 * the next clause. 1528 */ 1529 if (PHASE_INIT == p.phase || 1530 PHASE_DECL == p.phase) { 1531 if (0 == hcreate(5000)) 1532 err(EXIT_FAILURE, "hcreate"); 1533 TAILQ_FOREACH(d, &p.dqhead, entries) 1534 postprocess(prefix, d); 1535 TAILQ_FOREACH(d, &p.dqhead, entries) 1536 emit(d); 1537 rc = 1; 1538 } else if (PHASE_DECL != p.phase) 1539 warnx("%s:%zu: exit when not in " 1540 "initial state", p.fn, p.ln); 1541 } 1542 1543 while ( ! TAILQ_EMPTY(&p.dqhead)) { 1544 d = TAILQ_FIRST(&p.dqhead); 1545 TAILQ_REMOVE(&p.dqhead, d, entries); 1546 while ( ! TAILQ_EMPTY(&d->dcqhead)) { 1547 e = TAILQ_FIRST(&d->dcqhead); 1548 TAILQ_REMOVE(&d->dcqhead, e, entries); 1549 free(e->text); 1550 free(e); 1551 } 1552 free(d->name); 1553 free(d->desc); 1554 free(d->dt); 1555 for (i = 0; i < d->nmsz; i++) 1556 free(d->nms[i]); 1557 for (i = 0; i < d->xrsz; i++) 1558 free(d->xrs[i]); 1559 for (i = 0; i < d->keysz; i++) 1560 free(d->keys[i]); 1561 free(d->keys); 1562 free(d->nms); 1563 free(d->xrs); 1564 free(d->fname); 1565 free(d->seealso); 1566 free(d->keybuf); 1567 free(d); 1568 } 1569 1570 return(rc ? EXIT_SUCCESS : EXIT_FAILURE); 1571 usage: 1572 fprintf(stderr, "usage: %s [-nv] [-p prefix]\n", getprogname()); 1573 return(EXIT_FAILURE); 1574 } 1575