1 /* 2 * Copyright (c) 2016, 2018, 2023 Kristaps Dzonsons <kristaps@bsd.lv> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 #include "config.h" 17 18 #if HAVE_SYS_QUEUE 19 # include <sys/queue.h> 20 #endif 21 22 #include <assert.h> 23 #include <ctype.h> 24 #if HAVE_ERR 25 # include <err.h> 26 #endif 27 #include <getopt.h> 28 #if HAVE_SANDBOX_INIT 29 # include <sandbox.h> 30 #endif 31 #include <search.h> 32 #include <stdint.h> /* uintptr_t */ 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <unistd.h> 37 38 /* 39 * Phase of parsing input file. 40 */ 41 enum phase { 42 PHASE_INIT = 0, /* waiting to encounter definition */ 43 PHASE_KEYS, /* have definition, now keywords */ 44 PHASE_DESC, /* have keywords, now description */ 45 PHASE_SEEALSO, 46 PHASE_DECL /* have description, now declarations */ 47 }; 48 49 /* 50 * What kind of declaration (preliminary analysis). 51 */ 52 enum decltype { 53 DECLTYPE_CPP, /* pre-processor */ 54 DECLTYPE_C, /* semicolon-closed non-preprocessor */ 55 DECLTYPE_NEITHER /* non-preprocessor, no semicolon */ 56 }; 57 58 /* 59 * In variables and function declarations, we toss these. 60 */ 61 enum preproc { 62 PREPROC_SQLITE_API, 63 PREPROC_SQLITE_DEPRECATED, 64 PREPROC_SQLITE_EXPERIMENTAL, 65 PREPROC_SQLITE_EXTERN, 66 PREPROC_SQLITE_STDCALL, 67 PREPROC__MAX 68 }; 69 70 /* 71 * HTML tags that we recognise. 72 */ 73 enum tag { 74 TAG_A_CLOSE, 75 TAG_A_OPEN_ATTRS, 76 TAG_B_CLOSE, 77 TAG_B_OPEN, 78 TAG_BLOCK_CLOSE, 79 TAG_BLOCK_OPEN, 80 TAG_BR_OPEN, 81 TAG_DD_CLOSE, 82 TAG_DD_OPEN, 83 TAG_DL_CLOSE, 84 TAG_DL_OPEN, 85 TAG_DT_CLOSE, 86 TAG_DT_OPEN, 87 TAG_EM_CLOSE, 88 TAG_EM_OPEN, 89 TAG_H3_CLOSE, 90 TAG_H3_OPEN, 91 TAG_I_CLOSE, 92 TAG_I_OPEN, 93 TAG_LI_CLOSE, 94 TAG_LI_OPEN, 95 TAG_LI_OPEN_ATTRS, 96 TAG_OL_CLOSE, 97 TAG_OL_OPEN, 98 TAG_P_OPEN, 99 TAG_PRE_CLOSE, 100 TAG_PRE_OPEN, 101 TAG_SPAN_CLOSE, 102 TAG_SPAN_OPEN_ATTRS, 103 TAG_TABLE_CLOSE, 104 TAG_TABLE_OPEN, 105 TAG_TABLE_OPEN_ATTRS, 106 TAG_TD_CLOSE, 107 TAG_TD_OPEN, 108 TAG_TD_OPEN_ATTRS, 109 TAG_TH_CLOSE, 110 TAG_TH_OPEN, 111 TAG_TH_OPEN_ATTRS, 112 TAG_TR_CLOSE, 113 TAG_TR_OPEN, 114 TAG_U_CLOSE, 115 TAG_U_OPEN, 116 TAG_UL_CLOSE, 117 TAG_UL_OPEN, 118 TAG__MAX 119 }; 120 121 TAILQ_HEAD(defnq, defn); 122 TAILQ_HEAD(declq, decl); 123 124 /* 125 * A declaration of type DECLTYPE_CPP or DECLTYPE_C. 126 * These need not be unique (if ifdef'd). 127 */ 128 struct decl { 129 enum decltype type; /* type of declaration */ 130 char *text; /* text */ 131 size_t textsz; /* strlen(text) */ 132 TAILQ_ENTRY(decl) entries; 133 }; 134 135 /* 136 * A definition is basically the manpage contents. 137 */ 138 struct defn { 139 char *name; /* really Nd */ 140 TAILQ_ENTRY(defn) entries; 141 char *desc; /* long description */ 142 size_t descsz; /* strlen(desc) */ 143 char *fulldesc; /* description w/newlns */ 144 size_t fulldescsz; /* strlen(fulldesc) */ 145 struct declq dcqhead; /* declarations */ 146 int multiline; /* used when parsing */ 147 int instruct; /* used when parsing */ 148 const char *fn; /* parsed from file */ 149 size_t ln; /* parsed at line */ 150 int postprocessed; /* good for emission? */ 151 char *dt; /* manpage title */ 152 char **nms; /* manpage names */ 153 size_t nmsz; /* number of names */ 154 char *fname; /* manpage filename */ 155 char *keybuf; /* raw keywords */ 156 size_t keybufsz; /* length of "keysbuf" */ 157 char *seealso; /* see also tags */ 158 size_t seealsosz; /* length of seealso */ 159 char **xrs; /* parsed "see also" references */ 160 size_t xrsz; /* number of references */ 161 char **keys; /* parsed keywords */ 162 size_t keysz; /* number of keywords */ 163 }; 164 165 /* 166 * Entire parse routine. 167 */ 168 struct parse { 169 enum phase phase; /* phase of parse */ 170 size_t ln; /* line number */ 171 const char *fn; /* open file */ 172 struct defnq dqhead; /* definitions */ 173 }; 174 175 /* 176 * How to handle HTML tags we find in the text. 177 */ 178 struct taginfo { 179 const char *html; /* HTML to key on */ 180 const char *mdoc; /* generate mdoc(7) */ 181 unsigned int flags; 182 #define TAGINFO_NOBR 0x01 /* follow w/space, not newline */ 183 #define TAGINFO_NOOP 0x02 /* just strip out */ 184 #define TAGINFO_NOSP 0x04 /* follow w/o space or newline */ 185 #define TAGINFO_INLINE 0x08 /* inline block (notused) */ 186 #define TAGINFO_ATTRS 0x10 /* ignore attributes */ 187 }; 188 189 static const struct taginfo tags[TAG__MAX] = { 190 { "</a>", "", TAGINFO_INLINE }, /* TAG_A_CLOSE */ 191 { "<a ", "", TAGINFO_INLINE | TAGINFO_ATTRS }, /* TAG_A_OPEN_ATTRS */ 192 { "</b>", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */ 193 { "<b>", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */ 194 { "<br>", " ", TAGINFO_INLINE }, /* TAG_BR_OPEN */ 195 { "</blockquote>", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */ 196 { "<blockquote>", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */ 197 { "</dd>", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */ 198 { "<dd>", "", TAGINFO_NOBR | TAGINFO_NOSP }, /* TAG_DD_OPEN */ 199 { "</dl>", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */ 200 { "<dl>", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */ 201 { "</dt>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */ 202 { "<dt>", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */ 203 { "</em>", "\\fP", TAGINFO_INLINE }, /* TAG_EM_CLOSE */ 204 { "<em>", "\\fB", TAGINFO_INLINE }, /* TAG_EM_OPEN */ 205 { "</h3>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */ 206 { "<h3>", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */ 207 { "</i>", "\\fP", TAGINFO_INLINE }, /* TAG_I_CLOSE */ 208 { "<i>", "\\fI", TAGINFO_INLINE }, /* TAG_I_OPEN */ 209 { "</li>", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */ 210 { "<li>", ".It", 0 }, /* TAG_LI_OPEN */ 211 { "<li ", ".It", TAGINFO_ATTRS }, /* TAG_LI_OPEN_ATTRS */ 212 { "</ol>", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */ 213 { "<ol>", ".Bl -enum", 0 }, /* TAG_OL_OPEN */ 214 { "<p>", ".Pp", 0 }, /* TAG_P_OPEN */ 215 { "</pre>", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */ 216 { "<pre>", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */ 217 { "</span>", "", TAGINFO_INLINE }, /* TAG_SPAN_CLOSE */ 218 { "<span ", "", TAGINFO_INLINE | TAGINFO_ATTRS }, /* TAG_SPAN_OPEN_ATTRS */ 219 { "</table>", ".Pp", 0 }, /* TAG_TABLE_CLOSE */ 220 { "<table>", ".Pp", 0 }, /* TAG_TABLE_OPEN */ 221 { "<table ", ".Pp", TAGINFO_ATTRS }, /* TAG_TABLE_OPEN_ATTRS */ 222 { "</td>", "", TAGINFO_NOOP }, /* TAG_TD_CLOSE */ 223 { "<td>", " ", TAGINFO_INLINE }, /* TAG_TD_OPEN */ 224 { "<td ", " ", TAGINFO_INLINE | TAGINFO_ATTRS}, /* TAG_TD_OPEN_ATTRS */ 225 { "</th>", "", TAGINFO_NOOP }, /* TAG_TH_CLOSE */ 226 { "<th>", " ", TAGINFO_INLINE }, /* TAG_TH_OPEN */ 227 { "<th ", " ", TAGINFO_INLINE | TAGINFO_ATTRS}, /* TAG_TH_OPEN_ATTRS */ 228 { "</tr>", "", TAGINFO_NOOP}, /* TAG_TR_CLOSE */ 229 { "<tr>", "", TAGINFO_NOBR }, /* TAG_TR_OPEN */ 230 { "</u>", "\\fP", TAGINFO_INLINE }, /* TAG_U_CLOSE */ 231 { "<u>", "\\fI", TAGINFO_INLINE }, /* TAG_U_OPEN */ 232 { "</ul>", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */ 233 { "<ul>", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */ 234 }; 235 236 static const char *const preprocs[TAG__MAX] = { 237 "SQLITE_API", /* PREPROC_SQLITE_API */ 238 "SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */ 239 "SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */ 240 "SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */ 241 "SQLITE_STDCALL", /* PREPROC_SQLITE_STDCALL */ 242 }; 243 244 /* Verbose reporting. */ 245 static int verbose; 246 247 /* Don't output any files: use stdout. */ 248 static int nofile; 249 250 /* Print out only filename. */ 251 static int filename; 252 253 static void 254 decl_function_add(struct parse *p __unused, char **etext, 255 size_t *etextsz, const char *cp, size_t len) 256 { 257 258 if ((*etext)[*etextsz - 1] != ' ') { 259 *etext = realloc(*etext, *etextsz + 2); 260 if (*etext == NULL) 261 err(1, NULL); 262 (*etextsz)++; 263 strlcat(*etext, " ", *etextsz + 1); 264 } 265 *etext = realloc(*etext, *etextsz + len + 1); 266 if (*etext == NULL) 267 err(1, NULL); 268 memcpy(*etext + *etextsz, cp, len); 269 *etextsz += len; 270 (*etext)[*etextsz] = '\0'; 271 } 272 273 static void 274 decl_function_copy(struct parse *p __unused, char **etext, 275 size_t *etextsz, const char *cp, size_t len) 276 { 277 278 *etext = malloc(len + 1); 279 if (*etext == NULL) 280 err(1, NULL); 281 memcpy(*etext, cp, len); 282 *etextsz = len; 283 (*etext)[*etextsz] = '\0'; 284 } 285 286 /* 287 * A C function (or variable, or whatever). 288 * This is more specifically any non-preprocessor text. 289 */ 290 static int 291 decl_function(struct parse *p, const char *cp, size_t len) 292 { 293 char *ep, *lcp, *rcp; 294 const char *ncp; 295 size_t nlen; 296 struct defn *d; 297 struct decl *e; 298 299 /* Fetch current interface definition. */ 300 d = TAILQ_LAST(&p->dqhead, defnq); 301 assert(NULL != d); 302 303 /* 304 * Since C tokens are semicolon-separated, we may be invoked any 305 * number of times per a single line. 306 */ 307 again: 308 while (isspace((unsigned char)*cp)) { 309 cp++; 310 len--; 311 } 312 if (*cp == '\0') 313 return(1); 314 315 /* Whether we're a continuation clause. */ 316 if (d->multiline) { 317 /* This might be NULL if we're not a continuation. */ 318 e = TAILQ_LAST(&d->dcqhead, declq); 319 assert(DECLTYPE_C == e->type); 320 assert(NULL != e); 321 assert(NULL != e->text); 322 assert(e->textsz); 323 } else { 324 assert(d->instruct == 0); 325 e = calloc(1, sizeof(struct decl)); 326 if (e == NULL) 327 err(1, NULL); 328 e->type = DECLTYPE_C; 329 TAILQ_INSERT_TAIL(&d->dcqhead, e, entries); 330 } 331 332 /* 333 * We begin by seeing if there's a semicolon on this line. 334 * If there is, we'll need to do some special handling. 335 */ 336 ep = strchr(cp, ';'); 337 lcp = strchr(cp, '{'); 338 rcp = strchr(cp, '}'); 339 340 /* We're only a partial statement (i.e., no closure). */ 341 if (ep == NULL && d->multiline) { 342 assert(e->text != NULL); 343 assert(e->textsz > 0); 344 /* Is a struct starting or ending here? */ 345 if (d->instruct && NULL != rcp) 346 d->instruct--; 347 else if (NULL != lcp) 348 d->instruct++; 349 decl_function_add(p, &e->text, &e->textsz, cp, len); 350 return(1); 351 } else if (ep == NULL && !d->multiline) { 352 d->multiline = 1; 353 /* Is a structure starting in this line? */ 354 if (NULL != lcp && 355 (rcp == NULL || rcp < lcp)) 356 d->instruct++; 357 decl_function_copy(p, &e->text, &e->textsz, cp, len); 358 return(1); 359 } 360 361 /* Position ourselves after the semicolon. */ 362 assert(NULL != ep); 363 ncp = cp; 364 nlen = (ep - cp) + 1; 365 cp = ep + 1; 366 len -= nlen; 367 368 if (d->multiline) { 369 assert(NULL != e->text); 370 /* Don't stop the multi-line if we're in a struct. */ 371 if (d->instruct == 0) { 372 if (lcp == NULL || lcp > cp) 373 d->multiline = 0; 374 } else if (NULL != rcp && rcp < cp) 375 if (--d->instruct == 0) 376 d->multiline = 0; 377 decl_function_add(p, &e->text, &e->textsz, ncp, nlen); 378 } else { 379 assert(e->text == NULL); 380 if (NULL != lcp && lcp < cp) { 381 d->multiline = 1; 382 d->instruct++; 383 } 384 decl_function_copy(p, &e->text, &e->textsz, ncp, nlen); 385 } 386 387 goto again; 388 } 389 390 /* 391 * A definition is just #define followed by space followed by the name, 392 * then the value of that name. 393 * We ignore the latter. 394 * FIXME: this does not understand multi-line CPP, but I don't think 395 * there are any instances of that in sqlite3.h. 396 */ 397 static int 398 decl_define(struct parse *p, const char *cp, size_t len) 399 { 400 struct defn *d; 401 struct decl *e; 402 size_t sz; 403 404 while (isspace((unsigned char)*cp)) { 405 cp++; 406 len--; 407 } 408 if (len == 0) { 409 warnx("%s:%zu: empty pre-processor " 410 "constant", p->fn, p->ln); 411 return(1); 412 } 413 414 d = TAILQ_LAST(&p->dqhead, defnq); 415 assert(NULL != d); 416 417 /* 418 * We're parsing a preprocessor definition, but we're still 419 * waiting on a semicolon from a function definition. 420 * It might be a comment or an error. 421 */ 422 if (d->multiline) { 423 if (verbose) 424 warnx("%s:%zu: multiline declaration " 425 "still open", p->fn, p->ln); 426 e = TAILQ_LAST(&d->dcqhead, declq); 427 assert(NULL != e); 428 e->type = DECLTYPE_NEITHER; 429 d->multiline = d->instruct = 0; 430 } 431 432 sz = 0; 433 while (cp[sz] != '\0' && !isspace((unsigned char)cp[sz])) 434 sz++; 435 436 e = calloc(1, sizeof(struct decl)); 437 if (e == NULL) 438 err(1, NULL); 439 e->type = DECLTYPE_CPP; 440 e->text = calloc(1, sz + 1); 441 if (e->text == NULL) 442 err(1, NULL); 443 strlcpy(e->text, cp, sz + 1); 444 e->textsz = sz; 445 TAILQ_INSERT_TAIL(&d->dcqhead, e, entries); 446 return(1); 447 } 448 449 /* 450 * A declaration is a function, variable, preprocessor definition, or 451 * really anything else until we reach a blank line. 452 */ 453 static void 454 decl(struct parse *p, const char *cp, size_t len) 455 { 456 struct defn *d; 457 struct decl *e; 458 const char *oldcp; 459 size_t oldlen; 460 461 oldcp = cp; 462 oldlen = len; 463 464 while (isspace((unsigned char)*cp)) { 465 cp++; 466 len--; 467 } 468 469 d = TAILQ_LAST(&p->dqhead, defnq); 470 assert(NULL != d); 471 472 /* Check closure. */ 473 if (*cp == '\0') { 474 p->phase = PHASE_INIT; 475 /* Check multiline status. */ 476 if (d->multiline) { 477 if (verbose) 478 warnx("%s:%zu: multiline declaration " 479 "still open", p->fn, p->ln); 480 e = TAILQ_LAST(&d->dcqhead, declq); 481 assert(NULL != e); 482 e->type = DECLTYPE_NEITHER; 483 d->multiline = d->instruct = 0; 484 } 485 return; 486 } 487 488 d->fulldesc = realloc(d->fulldesc, 489 d->fulldescsz + oldlen + 2); 490 if (d->fulldesc == NULL) 491 err(1, NULL); 492 if (d->fulldescsz == 0) 493 d->fulldesc[0] = '\0'; 494 d->fulldescsz += oldlen + 2; 495 strlcat(d->fulldesc, oldcp, d->fulldescsz); 496 strlcat(d->fulldesc, "\n", d->fulldescsz); 497 498 /* 499 * Catch preprocessor defines, but discard all other types of 500 * preprocessor statements. 501 * We might already be in the middle of a declaration (a 502 * function declaration), but that's ok. 503 */ 504 505 if (*cp == '#') { 506 len--; 507 cp++; 508 while (isspace((unsigned char)*cp)) { 509 len--; 510 cp++; 511 } 512 if (strncmp(cp, "define", 6) == 0) 513 decl_define(p, cp + 6, len - 6); 514 return; 515 } 516 517 /* Skip one-liner comments. */ 518 519 if (len > 4 && 520 cp[0] == '/' && cp[1] == '*' && 521 cp[len - 2] == '*' && cp[len - 1] == '/') 522 return; 523 524 decl_function(p, cp, len); 525 } 526 527 /* 528 * Whether to end an interface description phase with an asterisk-slash. 529 * This is run within a phase already opened with slash-asterisk. It 530 * adjusts the parse state on ending a phase or syntax errors. It has 531 * various hacks around lacks syntax (e.g., starting single-asterisk 532 * instead of double-asterisk) found in the wild. 533 * 534 * Returns zero if not ending the phase, non-zero if ending. 535 */ 536 static int 537 endphase(struct parse *p, const char *cp) 538 { 539 540 if (*cp == '\0') { 541 /* 542 * Error: empty line. 543 */ 544 warnx("%s:%zu: warn: unexpected empty line in " 545 "interface description", p->fn, p->ln); 546 p->phase = PHASE_INIT; 547 return 1; 548 } else if (strcmp(cp, "*/") == 0) { 549 /* 550 * End of the interface description. 551 */ 552 p->phase = PHASE_DECL; 553 return 1; 554 } else if (!(cp[0] == '*' && cp[1] == '*')) { 555 /* 556 * Error: bad syntax, not end or continuation. 557 */ 558 if (cp[0] == '*' && cp[1] == '\0') { 559 if (verbose) 560 warnx("%s:%zu: warn: ignoring " 561 "standalone asterisk " 562 "in interface description", 563 p->fn, p->ln); 564 return 0; 565 } else if (cp[0] == '*' && cp[1] == ' ') { 566 if (verbose) 567 warnx("%s:%zu: warn: ignoring " 568 "leading single asterisk " 569 "in interface description", 570 p->fn, p->ln); 571 return 0; 572 } 573 warnx("%s:%zu: warn: ambiguous leading characters in " 574 "interface description", p->fn, p->ln); 575 p->phase = PHASE_INIT; 576 return 1; 577 } 578 579 /* If here, at a continuation ('**'). */ 580 581 return 0; 582 } 583 584 /* 585 * Parse a "SEE ALSO" phase, which can come at any point in the 586 * interface description (unlike what they claim). 587 */ 588 static void 589 seealso(struct parse *p, const char *cp, size_t len) 590 { 591 struct defn *d; 592 593 if (endphase(p, cp) || len < 2) 594 return; 595 596 cp += 2; 597 len -= 2; 598 599 while (isspace((unsigned char)*cp)) { 600 cp++; 601 len--; 602 } 603 604 /* Blank line: back to description part. */ 605 if (len == 0) { 606 p->phase = PHASE_DESC; 607 return; 608 } 609 610 /* Fetch current interface definition. */ 611 d = TAILQ_LAST(&p->dqhead, defnq); 612 assert(NULL != d); 613 614 d->seealso = realloc(d->seealso, 615 d->seealsosz + len + 1); 616 memcpy(d->seealso + d->seealsosz, cp, len); 617 d->seealsosz += len; 618 d->seealso[d->seealsosz] = '\0'; 619 } 620 621 /* 622 * A definition description is a block of text that we'll later format 623 * in mdoc(7). 624 * It extends from the name of the definition down to the declarations 625 * themselves. 626 */ 627 static void 628 desc(struct parse *p, const char *cp, size_t len) 629 { 630 struct defn *d; 631 size_t nsz; 632 633 if (endphase(p, cp) || len < 2) 634 return; 635 636 cp += 2; 637 len -= 2; 638 639 while (isspace((unsigned char)*cp)) { 640 cp++; 641 len--; 642 } 643 644 /* Fetch current interface definition. */ 645 646 d = TAILQ_LAST(&p->dqhead, defnq); 647 assert(NULL != d); 648 649 /* Ignore leading blank lines. */ 650 651 if (len == 0 && d->desc == NULL) 652 return; 653 654 /* Collect SEE ALSO clauses. */ 655 656 if (strncasecmp(cp, "see also:", 9) == 0) { 657 cp += 9; 658 len -= 9; 659 while (isspace((unsigned char)*cp)) { 660 cp++; 661 len--; 662 } 663 p->phase = PHASE_SEEALSO; 664 d->seealso = realloc(d->seealso, 665 d->seealsosz + len + 1); 666 memcpy(d->seealso + d->seealsosz, cp, len); 667 d->seealsosz += len; 668 d->seealso[d->seealsosz] = '\0'; 669 return; 670 } 671 672 /* White-space padding between lines. */ 673 674 if (d->desc != NULL && 675 d->descsz > 0 && 676 d->desc[d->descsz - 1] != ' ' && 677 d->desc[d->descsz - 1] != '\n') { 678 d->desc = realloc(d->desc, d->descsz + 2); 679 if (d->desc == NULL) 680 err(1, NULL); 681 d->descsz++; 682 strlcat(d->desc, " ", d->descsz + 1); 683 } 684 685 /* Either append the line of a newline, if blank. */ 686 687 nsz = len == 0 ? 1 : len; 688 if (d->desc == NULL) { 689 assert(d->descsz == 0); 690 d->desc = calloc(1, nsz + 1); 691 if (d->desc == NULL) 692 err(1, NULL); 693 } else { 694 d->desc = realloc(d->desc, d->descsz + nsz + 1); 695 if (d->desc == NULL) 696 err(1, NULL); 697 } 698 699 d->descsz += nsz; 700 strlcat(d->desc, len == 0 ? "\n" : cp, d->descsz + 1); 701 } 702 703 /* 704 * Copy all KEYWORDS into a buffer. 705 */ 706 static void 707 keys(struct parse *p, const char *cp, size_t len) 708 { 709 struct defn *d; 710 711 if (endphase(p, cp) || len < 2) 712 return; 713 714 cp += 2; 715 len -= 2; 716 while (isspace((unsigned char)*cp)) { 717 cp++; 718 len--; 719 } 720 721 if (len == 0) { 722 p->phase = PHASE_DESC; 723 return; 724 } else if (strncmp(cp, "KEYWORDS:", 9)) 725 return; 726 727 cp += 9; 728 len -= 9; 729 730 d = TAILQ_LAST(&p->dqhead, defnq); 731 assert(NULL != d); 732 d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1); 733 if (d->keybuf == NULL) 734 err(1, NULL); 735 memcpy(d->keybuf + d->keybufsz, cp, len); 736 d->keybufsz += len; 737 d->keybuf[d->keybufsz] = '\0'; 738 } 739 740 /* 741 * Initial state is where we're scanning forward to find commented 742 * instances of CAPI3REF. 743 */ 744 static void 745 init(struct parse *p, const char *cp) 746 { 747 struct defn *d; 748 size_t i, sz; 749 750 /* Look for comment hook. */ 751 752 if (cp[0] != '*' || cp[1] != '*') 753 return; 754 cp += 2; 755 while (isspace((unsigned char)*cp)) 756 cp++; 757 758 /* Look for beginning of definition. */ 759 760 if (strncmp(cp, "CAPI3REF:", 9)) 761 return; 762 cp += 9; 763 while (isspace((unsigned char)*cp)) 764 cp++; 765 if (*cp == '\0') { 766 warnx("%s:%zu: warn: unexpected end of " 767 "interface definition", p->fn, p->ln); 768 return; 769 } 770 771 /* Add definition to list of existing ones. */ 772 773 if ((d = calloc(1, sizeof(struct defn))) == NULL) 774 err(1, NULL); 775 if ((d->name = strdup(cp)) == NULL) 776 err(1, NULL); 777 778 /* Strip trailing spaces and periods. */ 779 780 for (sz = strlen(d->name); sz > 0; sz--) 781 if (d->name[sz - 1] == '.' || 782 d->name[sz - 1] == ' ') 783 d->name[sz - 1] = '\0'; 784 else 785 break; 786 787 /* 788 * Un-title case. Use a simple heuristic where all words 789 * starting with an upper case letter followed by a not 790 * uppercase letter are lowercased. 791 */ 792 793 for (i = 0; sz > 0 && i < sz - 1; i++) 794 if ((i == 0 || d->name[i - 1] == ' ') && 795 isupper((unsigned char)d->name[i]) && 796 !isupper((unsigned char)d->name[i + 1]) && 797 !ispunct((unsigned char)d->name[i + 1])) 798 d->name[i] = tolower((unsigned char)d->name[i]); 799 800 d->fn = p->fn; 801 d->ln = p->ln; 802 p->phase = PHASE_KEYS; 803 TAILQ_INIT(&d->dcqhead); 804 TAILQ_INSERT_TAIL(&p->dqhead, d, entries); 805 } 806 807 #define BPOINT(_cp) \ 808 (';' == (_cp)[0] || \ 809 '[' == (_cp)[0] || \ 810 ('(' == (_cp)[0] && '*' != (_cp)[1]) || \ 811 ')' == (_cp)[0] || \ 812 '{' == (_cp)[0]) 813 814 /* 815 * Given a declaration (be it preprocessor or C), try to parse out a 816 * reasonable "name" for the affair. 817 * For a struct, for example, it'd be the struct name. 818 * For a typedef, it'd be the type name. 819 * For a function, it'd be the function name. 820 */ 821 static void 822 grok_name(const struct decl *e, 823 const char **start, size_t *sz) 824 { 825 const char *cp; 826 827 *start = NULL; 828 *sz = 0; 829 830 if (DECLTYPE_CPP != e->type) { 831 if (e->text[e->textsz - 1] != ';') 832 return; 833 cp = e->text; 834 do { 835 while (isspace((unsigned char)*cp)) 836 cp++; 837 if (BPOINT(cp)) 838 break; 839 /* Function pointers... */ 840 if (*cp == '(') 841 cp++; 842 /* Pass over pointers. */ 843 while (*cp == '*') 844 cp++; 845 *start = cp; 846 *sz = 0; 847 while (!isspace((unsigned char)*cp)) { 848 if (BPOINT(cp)) 849 break; 850 cp++; 851 (*sz)++; 852 } 853 } while (!BPOINT(cp)); 854 } else { 855 *sz = e->textsz; 856 *start = e->text; 857 } 858 } 859 860 /* 861 * Extract information from the interface definition. 862 * Mark it as "postprocessed" on success. 863 */ 864 static void 865 postprocess(const char *prefix, struct defn *d) 866 { 867 struct decl *first; 868 const char *start; 869 size_t offs, sz, i; 870 ENTRY ent; 871 872 if (TAILQ_EMPTY(&d->dcqhead)) 873 return; 874 875 /* Find the first #define or declaration. */ 876 877 TAILQ_FOREACH(first, &d->dcqhead, entries) 878 if (DECLTYPE_CPP == first->type || 879 DECLTYPE_C == first->type) 880 break; 881 882 if (first == NULL) { 883 warnx("%s:%zu: no entry to document", d->fn, d->ln); 884 return; 885 } 886 887 /* 888 * Now compute the document name (`Dt'). 889 * We'll also use this for the filename. 890 */ 891 892 grok_name(first, &start, &sz); 893 if (start == NULL) { 894 warnx("%s:%zu: couldn't deduce " 895 "entry name", d->fn, d->ln); 896 return; 897 } 898 899 /* Document name needs all-caps. */ 900 901 if ((d->dt = strndup(start, sz)) == NULL) 902 err(1, NULL); 903 sz = strlen(d->dt); 904 for (i = 0; i < sz; i++) 905 d->dt[i] = toupper((unsigned char)d->dt[i]); 906 907 /* Filename needs no special chars. */ 908 909 if (filename) { 910 asprintf(&d->fname, "%.*s.3", (int)sz, start); 911 offs = 0; 912 } else { 913 asprintf(&d->fname, "%s/%.*s.3", 914 prefix, (int)sz, start); 915 offs = strlen(prefix) + 1; 916 } 917 918 if (d->fname == NULL) 919 err(1, NULL); 920 921 for (i = 0; i < sz; i++) { 922 if (isalnum((unsigned char)d->fname[offs + i]) || 923 d->fname[offs + i] == '_' || 924 d->fname[offs + i] == '-') 925 continue; 926 d->fname[offs + i] = '_'; 927 } 928 929 /* 930 * First, extract all keywords. 931 */ 932 for (i = 0; i < d->keybufsz; ) { 933 while (isspace((unsigned char)d->keybuf[i])) 934 i++; 935 if (i == d->keybufsz) 936 break; 937 sz = 0; 938 start = &d->keybuf[i]; 939 if (d->keybuf[i] == '{') { 940 start = &d->keybuf[++i]; 941 for ( ; i < d->keybufsz; i++, sz++) 942 if (d->keybuf[i] == '}') 943 break; 944 if (d->keybuf[i] == '}') 945 i++; 946 } else 947 for ( ; i < d->keybufsz; i++, sz++) 948 if (isspace((unsigned char)d->keybuf[i])) 949 break; 950 if (sz == 0) 951 continue; 952 d->keys = reallocarray(d->keys, 953 d->keysz + 1, sizeof(char *)); 954 if (d->keys == NULL) 955 err(1, NULL); 956 d->keys[d->keysz] = malloc(sz + 1); 957 if (d->keys[d->keysz] == NULL) 958 err(1, NULL); 959 memcpy(d->keys[d->keysz], start, sz); 960 d->keys[d->keysz][sz] = '\0'; 961 d->keysz++; 962 963 /* Hash the keyword. */ 964 ent.key = d->keys[d->keysz - 1]; 965 ent.data = d; 966 (void)hsearch(ent, ENTER); 967 } 968 969 /* 970 * Now extract all `Nm' values for this document. 971 * We only use CPP and C references, and hope for the best when 972 * doing so. 973 * Enter each one of these as a searchable keyword. 974 */ 975 TAILQ_FOREACH(first, &d->dcqhead, entries) { 976 if (DECLTYPE_CPP != first->type && 977 DECLTYPE_C != first->type) 978 continue; 979 grok_name(first, &start, &sz); 980 if (start == NULL) 981 continue; 982 d->nms = reallocarray(d->nms, 983 d->nmsz + 1, sizeof(char *)); 984 if (d->nms == NULL) 985 err(1, NULL); 986 d->nms[d->nmsz] = malloc(sz + 1); 987 if (d->nms[d->nmsz] == NULL) 988 err(1, NULL); 989 memcpy(d->nms[d->nmsz], start, sz); 990 d->nms[d->nmsz][sz] = '\0'; 991 d->nmsz++; 992 993 /* Hash the name. */ 994 ent.key = d->nms[d->nmsz - 1]; 995 ent.data = d; 996 (void)hsearch(ent, ENTER); 997 } 998 999 if (d->nmsz == 0) { 1000 warnx("%s:%zu: couldn't deduce " 1001 "any names", d->fn, d->ln); 1002 return; 1003 } 1004 1005 /* 1006 * Next, scan for all `Xr' values. 1007 * We'll add more to this list later. 1008 */ 1009 for (i = 0; i < d->seealsosz; i++) { 1010 /* 1011 * Find next value starting with `['. 1012 * There's other stuff in there (whitespace or 1013 * free text leading up to these) that we're ok 1014 * to ignore. 1015 */ 1016 while (i < d->seealsosz && d->seealso[i] != '[') 1017 i++; 1018 if (i == d->seealsosz) 1019 break; 1020 1021 /* 1022 * Now scan for the matching `]'. 1023 * We can also have a vertical bar if we're separating a 1024 * keyword and its shown name. 1025 */ 1026 start = &d->seealso[++i]; 1027 sz = 0; 1028 while (i < d->seealsosz && 1029 d->seealso[i] != ']' && 1030 d->seealso[i] != '|') { 1031 i++; 1032 sz++; 1033 } 1034 if (i == d->seealsosz) 1035 break; 1036 if (sz == 0) 1037 continue; 1038 1039 /* 1040 * Continue on to the end-of-reference, if we weren't 1041 * there to begin with. 1042 */ 1043 if (d->seealso[i] != ']') 1044 while (i < d->seealsosz && 1045 d->seealso[i] != ']') 1046 i++; 1047 1048 /* Strip trailing whitespace. */ 1049 while (sz > 1 && start[sz - 1] == ' ') 1050 sz--; 1051 1052 /* Strip trailing parenthesis. */ 1053 if (sz > 2 && 1054 start[sz - 2] == '(' && 1055 start[sz - 1] == ')') 1056 sz -= 2; 1057 1058 d->xrs = reallocarray(d->xrs, 1059 d->xrsz + 1, sizeof(char *)); 1060 if (d->xrs == NULL) 1061 err(1, NULL); 1062 d->xrs[d->xrsz] = malloc(sz + 1); 1063 if (d->xrs[d->xrsz] == NULL) 1064 err(1, NULL); 1065 memcpy(d->xrs[d->xrsz], start, sz); 1066 d->xrs[d->xrsz][sz] = '\0'; 1067 d->xrsz++; 1068 } 1069 1070 /* 1071 * Next, extract all references. 1072 * We'll accumulate these into a list of SEE ALSO tags, after. 1073 * See how these are parsed above for a description: this is 1074 * basically the same thing. 1075 */ 1076 for (i = 0; i < d->descsz; i++) { 1077 if (d->desc[i] != '[') 1078 continue; 1079 i++; 1080 if (d->desc[i] == '[') 1081 continue; 1082 1083 start = &d->desc[i]; 1084 for (sz = 0; i < d->descsz; i++, sz++) 1085 if (d->desc[i] == ']' || 1086 d->desc[i] == '|') 1087 break; 1088 1089 if (i == d->descsz) 1090 break; 1091 else if (sz == 0) 1092 continue; 1093 1094 if (d->desc[i] != ']') 1095 while (i < d->descsz && d->desc[i] != ']') 1096 i++; 1097 1098 while (sz > 1 && start[sz - 1] == ' ') 1099 sz--; 1100 1101 if (sz > 2 && 1102 start[sz - 2] == '(' && 1103 start[sz - 1] == ')') 1104 sz -= 2; 1105 1106 d->xrs = reallocarray(d->xrs, 1107 d->xrsz + 1, sizeof(char *)); 1108 if (d->xrs == NULL) 1109 err(1, NULL); 1110 d->xrs[d->xrsz] = malloc(sz + 1); 1111 if (d->xrs[d->xrsz] == NULL) 1112 err(1, NULL); 1113 memcpy(d->xrs[d->xrsz], start, sz); 1114 d->xrs[d->xrsz][sz] = '\0'; 1115 d->xrsz++; 1116 } 1117 1118 d->postprocessed = 1; 1119 } 1120 1121 /* 1122 * Convenience function to look up which manpage "hosts" a certain 1123 * keyword. For example, SQLITE_OK(3) also handles SQLITE_TOOBIG and so 1124 * on, so a reference to SQLITE_TOOBIG should actually point to 1125 * SQLITE_OK. 1126 * Returns the keyword's file if found or NULL. 1127 */ 1128 static const char * 1129 lookup(const char *key) 1130 { 1131 ENTRY ent; 1132 ENTRY *res; 1133 const struct defn *d; 1134 1135 ent.key = (char *)(uintptr_t)key; 1136 ent.data = NULL; 1137 1138 if ((res = hsearch(ent, FIND)) == NULL) 1139 return NULL; 1140 1141 d = (const struct defn *)res->data; 1142 if (d->nmsz == 0) 1143 return NULL; 1144 1145 assert(d->nms[0] != NULL); 1146 return d->nms[0]; 1147 } 1148 1149 static int 1150 xrcmp(const void *p1, const void *p2) 1151 { 1152 /* Silence bogus warnings about un-consting. */ 1153 1154 const char *s1 = lookup(*(const char **)(uintptr_t)p1), 1155 *s2 = lookup(*(const char **)(uintptr_t)p2); 1156 1157 if (s1 == NULL) 1158 s1 = ""; 1159 if (s2 == NULL) 1160 s2 = ""; 1161 1162 return strcasecmp(s1, s2); 1163 } 1164 1165 /* 1166 * Return non-zero if "new sentence, new line" is in effect, zero 1167 * otherwise. 1168 * Accepts the start and finish offset of a buffer. 1169 */ 1170 static int 1171 newsentence(size_t start, size_t finish, const char *buf) 1172 { 1173 size_t span = finish - start; 1174 1175 assert(finish >= start); 1176 1177 /* Ignore "i.e." and "e.g.". */ 1178 1179 if ((span >= 4 && 1180 strncasecmp(&buf[finish - 4], "i.e.", 4) == 0) || 1181 (span >= 4 && 1182 strncasecmp(&buf[finish - 4], "e.g.", 4) == 0)) 1183 return 0; 1184 1185 return 1; 1186 } 1187 1188 /* 1189 * Emit a valid mdoc(7) document within the given prefix. 1190 */ 1191 static void 1192 emit(struct defn *d) 1193 { 1194 struct decl *first; 1195 size_t sz, i, j, col, last, ns, fnsz, stripspace; 1196 FILE *f; 1197 char *cp; 1198 const char *res, *lastres, *args, *str, *end, *fn; 1199 enum tag tag; 1200 enum preproc pre; 1201 1202 if (!d->postprocessed) { 1203 warnx("%s:%zu: interface has errors, not " 1204 "producing manpage", d->fn, d->ln); 1205 return; 1206 } 1207 1208 if (nofile == 0) { 1209 if ((f = fopen(d->fname, "w")) == NULL) { 1210 warn("%s: fopen", d->fname); 1211 return; 1212 } 1213 } else if (filename) { 1214 printf("%s\n", d->fname); 1215 return; 1216 } else 1217 f = stdout; 1218 1219 /* Begin by outputting the mdoc(7) header. */ 1220 1221 fputs(".Dd $" "Mdocdate$\n", f); 1222 fprintf(f, ".Dt %s 3\n", d->dt); 1223 fputs(".Os\n", f); 1224 fputs(".Sh NAME\n", f); 1225 1226 /* Now print the name bits of each declaration. */ 1227 1228 for (i = 0; i < d->nmsz; i++) 1229 fprintf(f, ".Nm %s%s\n", d->nms[i], 1230 i < d->nmsz - 1 ? " ," : ""); 1231 1232 fprintf(f, ".Nd %s\n", d->name); 1233 fputs(".Sh SYNOPSIS\n", f); 1234 fputs(".In sqlite3.h\n", f); 1235 1236 TAILQ_FOREACH(first, &d->dcqhead, entries) { 1237 if (first->type != DECLTYPE_CPP && 1238 first->type != DECLTYPE_C) 1239 continue; 1240 1241 /* Easy: just print the CPP name. */ 1242 1243 if (first->type == DECLTYPE_CPP) { 1244 fprintf(f, ".Fd #define %s\n", 1245 first->text); 1246 continue; 1247 } 1248 1249 /* First, strip out the sqlite CPPs. */ 1250 1251 for (i = 0; i < first->textsz; ) { 1252 for (pre = 0; pre < PREPROC__MAX; pre++) { 1253 sz = strlen(preprocs[pre]); 1254 if (strncmp(preprocs[pre], 1255 &first->text[i], sz)) 1256 continue; 1257 i += sz; 1258 while (isspace((unsigned char)first->text[i])) 1259 i++; 1260 break; 1261 } 1262 if (pre == PREPROC__MAX) 1263 break; 1264 } 1265 1266 /* If we're a typedef, immediately print Vt. */ 1267 1268 if (strncmp(&first->text[i], "typedef", 7) == 0) { 1269 fprintf(f, ".Vt %s\n", &first->text[i]); 1270 continue; 1271 } 1272 1273 /* Are we a struct? */ 1274 1275 if (first->textsz > 2 && 1276 first->text[first->textsz - 2] == '}' && 1277 (cp = strchr(&first->text[i], '{')) != NULL) { 1278 *cp = '\0'; 1279 fprintf(f, ".Vt %s;\n", &first->text[i]); 1280 /* Restore brace for later usage. */ 1281 *cp = '{'; 1282 continue; 1283 } 1284 1285 /* Catch remaining non-functions. */ 1286 1287 if (first->textsz > 2 && 1288 first->text[first->textsz - 2] != ')') { 1289 fprintf(f, ".Vt %s\n", &first->text[i]); 1290 continue; 1291 } 1292 1293 str = &first->text[i]; 1294 if ((args = strchr(str, '(')) == NULL || args == str) { 1295 /* What is this? */ 1296 fputs(".Bd -literal\n", f); 1297 fputs(&first->text[i], f); 1298 fputs("\n.Ed\n", f); 1299 continue; 1300 } 1301 1302 /* 1303 * Current state: 1304 * type_t *function (args...) 1305 * ^str ^args 1306 * Scroll back to end of function name. 1307 */ 1308 1309 end = args - 1; 1310 while (end > str && isspace((unsigned char)*end)) 1311 end--; 1312 1313 /* 1314 * Current state: 1315 * type_t *function (args...) 1316 * ^str ^end ^args 1317 * Scroll back to what comes before. 1318 */ 1319 1320 for (fnsz = 0; end > str; end--, fnsz++) 1321 if (isspace((unsigned char)*end) || *end == '*') 1322 break; 1323 1324 if (fnsz == 0) 1325 warnx("%s:%zu: zero-length " 1326 "function name", d->fn, d->ln); 1327 fn = end + 1; 1328 1329 /* 1330 * Current state: 1331 * type_t *function (args...) 1332 * ^str ^end ^args 1333 * type_t function (args...) 1334 * ^str ^end ^args 1335 * Strip away whitespace. 1336 */ 1337 1338 while (end > str && isspace((unsigned char)*end)) 1339 end--; 1340 1341 /* 1342 * type_t *function (args...) 1343 * ^str ^end ^args 1344 * type_t function (args...) 1345 * ^str ^end ^args 1346 */ 1347 1348 /* 1349 * If we can't find what came before, then the function 1350 * has no type, which is odd... let's just call it void. 1351 */ 1352 1353 if (end > str) { 1354 fprintf(f, ".Ft %.*s\n", 1355 (int)(end - str + 1), str); 1356 fprintf(f, ".Fo %.*s\n", (int)fnsz, fn); 1357 } else { 1358 fputs(".Ft void\n", f); 1359 fprintf(f, ".Fo %.*s\n", (int)fnsz, fn); 1360 } 1361 1362 /* 1363 * Convert function arguments into `Fa' clauses. 1364 * This also handles nested function pointers, which 1365 * would otherwise throw off the delimeters. 1366 */ 1367 1368 for (;;) { 1369 str = ++args; 1370 while (isspace((unsigned char)*str)) 1371 str++; 1372 fputs(".Fa \"", f); 1373 ns = 0; 1374 while (*str != '\0' && 1375 (ns || *str != ',') && 1376 (ns || *str != ')')) { 1377 /* 1378 * Handle comments in the declarations. 1379 */ 1380 if (str[0] == '/' && str[1] == '*') { 1381 str += 2; 1382 for ( ; str[0] != '\0'; str++) 1383 if (str[0] == '*' && str[1] == '/') 1384 break; 1385 if (*str == '\0') 1386 break; 1387 str += 2; 1388 while (isspace((unsigned char)*str)) 1389 str++; 1390 if (*str == '\0' || 1391 (ns == 0 && *str == ',') || 1392 (ns == 0 && *str == ')')) 1393 break; 1394 } 1395 if (*str == '(') 1396 ns++; 1397 else if (*str == ')') 1398 ns--; 1399 1400 /* 1401 * Handle some instances of whitespace 1402 * by compressing it down. 1403 * However, if the whitespace ends at 1404 * the end-of-definition, then don't 1405 * print it at all. 1406 */ 1407 1408 if (isspace((unsigned char)*str)) { 1409 while (isspace((unsigned char)*str)) 1410 str++; 1411 /* Are we at a comment? */ 1412 if (str[0] == '/' && str[1] == '*') 1413 continue; 1414 if (*str == '\0' || 1415 (ns == 0 && *str == ',') || 1416 (ns == 0 && *str == ')')) 1417 break; 1418 fputc(' ', f); 1419 } else { 1420 fputc(*str, f); 1421 str++; 1422 } 1423 } 1424 fputs("\"\n", f); 1425 if (*str == '\0' || *str == ')') 1426 break; 1427 args = str; 1428 } 1429 1430 fputs(".Fc\n", f); 1431 } 1432 1433 fputs(".Sh DESCRIPTION\n", f); 1434 1435 /* 1436 * Strip the crap out of the description. 1437 * "Crap" consists of things I don't understand that mess up 1438 * parsing of the HTML, for instance, 1439 * <dl>[[foo bar]]<dt>foo bar</dt>...</dl> 1440 * These are not well-formed HTML. 1441 * Note that d->desc[d->descz] is the NUL terminator, so we 1442 * don't need to check d->descsz - 1. 1443 */ 1444 1445 for (i = 0; i < d->descsz; ) { 1446 if (d->desc[i] == '^' && 1447 d->desc[i + 1] == '(') { 1448 memmove(&d->desc[i], 1449 &d->desc[i + 2], 1450 d->descsz - i - 1); 1451 d->descsz -= 2; 1452 continue; 1453 } else if (d->desc[i] == ')' && 1454 d->desc[i + 1] == '^') { 1455 memmove(&d->desc[i], 1456 &d->desc[i + 2], 1457 d->descsz - i - 1); 1458 d->descsz -= 2; 1459 continue; 1460 } else if (d->desc[i] == '^') { 1461 memmove(&d->desc[i], 1462 &d->desc[i + 1], 1463 d->descsz - i); 1464 d->descsz -= 1; 1465 continue; 1466 } else if (d->desc[i] != '[' || 1467 d->desc[i + 1] != '[') { 1468 i++; 1469 continue; 1470 } 1471 1472 for (j = i; j < d->descsz; j++) 1473 if (d->desc[j] == ']' && 1474 d->desc[j + 1] == ']') 1475 break; 1476 1477 /* Ignore if we don't have a terminator. */ 1478 1479 assert(j > i); 1480 j += 2; 1481 if (j > d->descsz) { 1482 i++; 1483 continue; 1484 } 1485 1486 memmove(&d->desc[i], &d->desc[j], d->descsz - j + 1); 1487 d->descsz -= (j - i); 1488 } 1489 1490 /* 1491 * Here we go! 1492 * Print out the description as best we can. 1493 * Do on-the-fly processing of any HTML we encounter into 1494 * mdoc(7) and try to break lines up. 1495 */ 1496 1497 col = stripspace = 0; 1498 1499 for (i = 0; i < d->descsz; ) { 1500 /* 1501 * The "stripspace" variable is set to >=2 if we've 1502 * stripped white-space off before an anticipated macro. 1503 * Without it, if the macro ends up *not* being a macro, 1504 * we wouldn't flush the line and thus end up losing a 1505 * space. This lets the code that flushes the line know 1506 * that we've stripped spaces and adds them back in. 1507 */ 1508 1509 if (stripspace > 0) 1510 stripspace--; 1511 1512 /* Ignore NUL byte, just in case. */ 1513 1514 if (d->desc[i] == '\0') { 1515 i++; 1516 continue; 1517 } 1518 1519 /* 1520 * Newlines are paragraph breaks. 1521 * If we have multiple newlines, then keep to a single 1522 * `Pp' to keep it clean. 1523 * Only do this if we're not before a block-level HTML, 1524 * as this would mean, for instance, a `Pp'-`Bd' pair. 1525 */ 1526 1527 if (d->desc[i] == '\n') { 1528 while (isspace((unsigned char)d->desc[i])) 1529 i++; 1530 for (tag = 0; tag < TAG__MAX; tag++) { 1531 sz = strlen(tags[tag].html); 1532 if (strncasecmp(&d->desc[i], 1533 tags[tag].html, sz) == 0) 1534 break; 1535 } 1536 if (tag == TAG__MAX || 1537 (tags[tag].flags & TAGINFO_INLINE)) { 1538 if (col > 0) 1539 fputs("\n", f); 1540 fputs(".Pp\n", f); 1541 /* We're on a new line. */ 1542 col = 0; 1543 } 1544 continue; 1545 } 1546 1547 /* 1548 * New sentence, new line. 1549 * We guess whether this is the case by using the 1550 * dumbest possible heuristic. 1551 */ 1552 1553 if (d->desc[i] == ' ' && 1554 i > 0 && d->desc[i - 1] == '.') { 1555 for (j = i - 1; j > 0; j--) 1556 if (isspace((unsigned char)d->desc[j])) { 1557 j++; 1558 break; 1559 } 1560 if (newsentence(j, i, d->desc)) { 1561 while (d->desc[i] == ' ') 1562 i++; 1563 fputc('\n', f); 1564 col = 0; 1565 continue; 1566 } 1567 } 1568 1569 /* 1570 * After 65 characters, force a break when we encounter 1571 * white-space to keep our lines more or less tidy. 1572 */ 1573 1574 if (col > 65 && d->desc[i] == ' ') { 1575 while (d->desc[i] == ' ' ) 1576 i++; 1577 fputc('\n', f); 1578 col = 0; 1579 continue; 1580 } 1581 1582 /* Parse HTML tags and links. */ 1583 1584 if (d->desc[i] == '<') { 1585 for (tag = 0; tag < TAG__MAX; tag++) { 1586 sz = strlen(tags[tag].html); 1587 assert(sz > 0); 1588 if (strncmp(&d->desc[i], 1589 tags[tag].html, sz)) 1590 continue; 1591 1592 i += sz; 1593 1594 /* Blindly ignore attributes. */ 1595 1596 if (tags[tag].flags & TAGINFO_ATTRS) { 1597 while (d->desc[i] != '\0' && 1598 d->desc[i] != '>') 1599 i++; 1600 if (d->desc[i] == '\0') 1601 break; 1602 i++; 1603 } 1604 1605 /* 1606 * NOOP tags don't do anything, such as 1607 * the case of `</dd>', which only 1608 * serves to end an `It' block that will 1609 * be closed out by a subsequent `It' or 1610 * end of clause `El' anyway. 1611 * Skip the trailing space. 1612 */ 1613 1614 if (tags[tag].flags & TAGINFO_NOOP) { 1615 while (isspace((unsigned char)d->desc[i])) 1616 i++; 1617 break; 1618 } else if (tags[tag].flags & TAGINFO_INLINE) { 1619 while (stripspace > 0) { 1620 fputc(' ', f); 1621 col++; 1622 stripspace--; 1623 } 1624 fputs(tags[tag].mdoc, f); 1625 /*col += strlen(tags[tag].mdoc);*/ 1626 break; 1627 } 1628 1629 /* 1630 * A breaking mdoc(7) statement. 1631 * Break the current line, output the 1632 * macro, and conditionally break 1633 * following that (or we might do 1634 * nothing at all). 1635 */ 1636 1637 if (col > 0) { 1638 fputs("\n", f); 1639 col = 0; 1640 } 1641 1642 fputs(tags[tag].mdoc, f); 1643 if (!(tags[tag].flags & TAGINFO_NOBR)) { 1644 fputs("\n", f); 1645 col = 0; 1646 } else if (!(tags[tag].flags & TAGINFO_NOSP)) { 1647 fputs(" ", f); 1648 col++; 1649 } 1650 while (isspace((unsigned char)d->desc[i])) 1651 i++; 1652 break; 1653 } 1654 if (tag < TAG__MAX) { 1655 stripspace = 0; 1656 continue; 1657 } 1658 while (stripspace > 0) { 1659 fputc(' ', f); 1660 col++; 1661 stripspace--; 1662 } 1663 } else if (d->desc[i] == '[' && d->desc[i + 1] != ']') { 1664 /* Do we start at the bracket or bar? */ 1665 1666 for (sz = i + 1; sz < d->descsz; sz++) 1667 if (d->desc[sz] == '|' || 1668 d->desc[sz] == ']') 1669 break; 1670 1671 /* This is a degenerate case. */ 1672 1673 if (sz == d->descsz) { 1674 i++; 1675 stripspace = 0; 1676 continue; 1677 } 1678 1679 /* 1680 * Look for a trailing "()", using "j" as a 1681 * sentinel in case it was found. This lets us 1682 * print out a "Fn xxxx" instead of having the 1683 * function be ugly. If we don't have a Fn and 1684 * we'd stripped space before this, remember to 1685 * add the space back in. 1686 */ 1687 1688 j = 0; 1689 if (d->desc[sz] != '|') { 1690 i = i + 1; 1691 if (sz > 2 && 1692 d->desc[sz - 1] == ')' && 1693 d->desc[sz - 2] == '(') { 1694 if (col > 0) 1695 fputc('\n', f); 1696 fputs(".Fn ", f); 1697 j = sz - 2; 1698 assert(j > 0); 1699 } else if (stripspace) { 1700 fputc(' ', f); 1701 col++; 1702 } 1703 } else { 1704 if (stripspace) { 1705 fputc(' ', f); 1706 col++; 1707 } 1708 i = sz + 1; 1709 } 1710 1711 while (isspace((unsigned char)d->desc[i])) 1712 i++; 1713 1714 /* 1715 * Now handle in-page references. If we're a 1716 * function reference (e.g., function()), then 1717 * omit the trailing parentheses and put in a Fn 1718 * block. Otherwise print them out as-is: we've 1719 * already accumulated them into our "SEE ALSO" 1720 * values, which we'll use below. 1721 */ 1722 1723 for ( ; i < d->descsz; i++, col++) { 1724 if (j > 0 && i == j) { 1725 i += 3; 1726 for ( ; i < d->descsz; i++) 1727 if (d->desc[i] == '.') 1728 fputs(" .", f); 1729 else if (d->desc[i] == ',') 1730 fputs(" ,", f); 1731 else if (d->desc[i] == ')') 1732 fputs(" )", f); 1733 else 1734 break; 1735 1736 /* Trim trailing space. */ 1737 1738 while (i < d->descsz && 1739 isspace((unsigned char)d->desc[i])) 1740 i++; 1741 1742 fputc('\n', f); 1743 col = 0; 1744 break; 1745 } else if (d->desc[i] == ']') { 1746 i++; 1747 break; 1748 } 1749 fputc(d->desc[i], f); 1750 col++; 1751 } 1752 1753 stripspace = 0; 1754 continue; 1755 } 1756 1757 /* Strip leading spaces from output. */ 1758 1759 if (d->desc[i] == ' ' && col == 0) { 1760 while (d->desc[i] == ' ') 1761 i++; 1762 continue; 1763 } 1764 1765 /* 1766 * Strip trailing spaces from output. 1767 * Set "stripspace" to be the number of white-space 1768 * characters that we've skipped, plus one. 1769 * This means that the next loop iteration while get the 1770 * actual amount we've skipped (for '<' or '[') and we 1771 * can act upon it there. 1772 */ 1773 1774 if (d->desc[i] == ' ') { 1775 j = i; 1776 while (j < d->descsz && d->desc[j] == ' ') 1777 j++; 1778 if (j < d->descsz && 1779 (d->desc[j] == '\n' || 1780 d->desc[j] == '<' || 1781 d->desc[j] == '[')) { 1782 stripspace = d->desc[j] != '\n' ? 1783 (j - i + 1) : 0; 1784 i = j; 1785 continue; 1786 } 1787 } 1788 1789 assert(d->desc[i] != '\n'); 1790 1791 /* 1792 * Handle some oddities. 1793 * The following HTML escapes exist in the output that I 1794 * could find. 1795 * There might be others... 1796 */ 1797 1798 if (strncmp(&d->desc[i], "→", 6) == 0) { 1799 i += 6; 1800 fputs("\\(->", f); 1801 } else if (strncmp(&d->desc[i], "←", 6) == 0) { 1802 i += 6; 1803 fputs("\\(<-", f); 1804 } else if (strncmp(&d->desc[i], " ", 6) == 0) { 1805 i += 6; 1806 fputc(' ', f); 1807 } else if (strncmp(&d->desc[i], "<", 4) == 0) { 1808 i += 4; 1809 fputc('<', f); 1810 } else if (strncmp(&d->desc[i], ">", 4) == 0) { 1811 i += 4; 1812 fputc('>', f); 1813 } else if (strncmp(&d->desc[i], "[", 5) == 0) { 1814 i += 5; 1815 fputc('[', f); 1816 } else { 1817 /* Make sure we don't trigger a macro. */ 1818 if (col == 0 && 1819 (d->desc[i] == '.' || d->desc[i] == '\'')) 1820 fputs("\\&", f); 1821 fputc(d->desc[i], f); 1822 i++; 1823 } 1824 1825 col++; 1826 } 1827 1828 if (col > 0) 1829 fputs("\n", f); 1830 1831 fputs(".Sh IMPLEMENTATION NOTES\n", f); 1832 fprintf(f, "These declarations were extracted from the\n" 1833 "interface documentation at line %zu.\n", d->ln); 1834 fputs(".Bd -literal\n", f); 1835 fputs(d->fulldesc, f); 1836 fputs(".Ed\n", f); 1837 1838 /* 1839 * Look up all of our keywords (which are in the xrs field) in 1840 * the table of all known keywords. 1841 * Don't print duplicates. 1842 */ 1843 1844 if (d->xrsz > 0) { 1845 qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp); 1846 lastres = NULL; 1847 for (last = 0, i = 0; i < d->xrsz; i++) { 1848 res = lookup(d->xrs[i]); 1849 1850 /* Ignore self-reference. */ 1851 1852 if (res == d->nms[0] && verbose) 1853 warnx("%s:%zu: self-reference: %s", 1854 d->fn, d->ln, d->xrs[i]); 1855 if (res == d->nms[0]) 1856 continue; 1857 if (res == NULL && verbose) 1858 warnx("%s:%zu: ref not found: %s", 1859 d->fn, d->ln, d->xrs[i]); 1860 if (res == NULL) 1861 continue; 1862 1863 /* Ignore duplicates. */ 1864 1865 if (lastres == res) 1866 continue; 1867 1868 if (last) 1869 fputs(" ,\n", f); 1870 else 1871 fputs(".Sh SEE ALSO\n", f); 1872 1873 fprintf(f, ".Xr %s 3", res); 1874 last = 1; 1875 lastres = res; 1876 } 1877 if (last) 1878 fputs("\n", f); 1879 } 1880 1881 if (nofile == 0) 1882 fclose(f); 1883 } 1884 1885 #if HAVE_PLEDGE 1886 /* 1887 * We pledge(2) stdio if we're receiving from stdin and writing to 1888 * stdout, otherwise we need file-creation and writing. 1889 */ 1890 static void 1891 sandbox_pledge(void) 1892 { 1893 1894 if (nofile) { 1895 if (pledge("stdio", NULL) == -1) 1896 err(1, NULL); 1897 } else { 1898 if (pledge("stdio wpath cpath", NULL) == -1) 1899 err(1, NULL); 1900 } 1901 } 1902 #endif 1903 1904 #if HAVE_SANDBOX_INIT 1905 /* 1906 * Darwin's "seatbelt". 1907 * If we're writing to stdout, then use pure computation. 1908 * Otherwise we need file writing. 1909 */ 1910 static void 1911 sandbox_apple(void) 1912 { 1913 char *ep; 1914 int rc; 1915 1916 rc = sandbox_init 1917 (nofile ? kSBXProfilePureComputation : 1918 kSBXProfileNoNetwork, SANDBOX_NAMED, &ep); 1919 if (rc == 0) 1920 return; 1921 perror(ep); 1922 sandbox_free_error(ep); 1923 exit(1); 1924 } 1925 #endif 1926 1927 /* 1928 * Check to see whether there are any filename duplicates. 1929 * This is just a warning, but will really screw things up, since the 1930 * last filename will overwrite the first. 1931 */ 1932 static void 1933 check_dupes(struct parse *p) 1934 { 1935 const struct defn *d, *dd; 1936 1937 TAILQ_FOREACH(d, &p->dqhead, entries) 1938 TAILQ_FOREACH_REVERSE(dd, &p->dqhead, defnq, entries) { 1939 if (dd == d) 1940 break; 1941 if (d->fname == NULL || 1942 dd->fname == NULL || 1943 strcmp(d->fname, dd->fname)) 1944 continue; 1945 warnx("%s:%zu: duplicate filename: " 1946 "%s (from %s, line %zu)", d->fn, 1947 d->ln, d->fname, dd->nms[0], dd->ln); 1948 } 1949 } 1950 1951 int 1952 main(int argc, char *argv[]) 1953 { 1954 size_t i, bufsz; 1955 ssize_t len; 1956 FILE *f = stdin; 1957 char *cp = NULL; 1958 const char *prefix = "."; 1959 struct parse p; 1960 int rc = 0, ch; 1961 struct defn *d; 1962 struct decl *e; 1963 1964 memset(&p, 0, sizeof(struct parse)); 1965 1966 p.fn = "<stdin>"; 1967 p.ln = 0; 1968 p.phase = PHASE_INIT; 1969 1970 TAILQ_INIT(&p.dqhead); 1971 1972 while ((ch = getopt(argc, argv, "nNp:v")) != -1) 1973 switch (ch) { 1974 case 'n': 1975 nofile = 1; 1976 break; 1977 case 'N': 1978 nofile = 1; 1979 filename = 1; 1980 break; 1981 case 'p': 1982 prefix = optarg; 1983 break; 1984 case 'v': 1985 verbose = 1; 1986 break; 1987 default: 1988 goto usage; 1989 } 1990 1991 argc -= optind; 1992 argv += optind; 1993 1994 if (argc > 1) 1995 goto usage; 1996 1997 if (argc > 0) { 1998 if ((f = fopen(argv[0], "r")) == NULL) 1999 err(1, "%s", argv[0]); 2000 p.fn = argv[0]; 2001 } 2002 2003 #if HAVE_SANDBOX_INIT 2004 sandbox_apple(); 2005 #elif HAVE_PLEDGE 2006 sandbox_pledge(); 2007 #endif 2008 /* 2009 * Read in line-by-line and process in the phase dictated by our 2010 * finite state automaton. 2011 */ 2012 2013 while ((len = getline(&cp, &bufsz, f)) != -1) { 2014 assert(len > 0); 2015 p.ln++; 2016 if (cp[len - 1] != '\n') { 2017 warnx("%s:%zu: unterminated line", p.fn, p.ln); 2018 break; 2019 } 2020 2021 /* 2022 * Lines are now always NUL-terminated, and don't allow 2023 * NUL characters in the line. 2024 */ 2025 2026 cp[--len] = '\0'; 2027 len = strlen(cp); 2028 2029 switch (p.phase) { 2030 case PHASE_INIT: 2031 init(&p, cp); 2032 break; 2033 case PHASE_KEYS: 2034 keys(&p, cp, (size_t)len); 2035 break; 2036 case PHASE_DESC: 2037 desc(&p, cp, (size_t)len); 2038 break; 2039 case PHASE_SEEALSO: 2040 seealso(&p, cp, (size_t)len); 2041 break; 2042 case PHASE_DECL: 2043 decl(&p, cp, (size_t)len); 2044 break; 2045 } 2046 } 2047 2048 /* 2049 * If we hit the last line, then try to process. 2050 * Otherwise, we failed along the way. 2051 */ 2052 2053 if (feof(f)) { 2054 /* 2055 * Allow us to be at the declarations or scanning for 2056 * the next clause. 2057 */ 2058 if (p.phase == PHASE_INIT || 2059 p.phase == PHASE_DECL) { 2060 if (hcreate(5000) == 0) 2061 err(1, NULL); 2062 TAILQ_FOREACH(d, &p.dqhead, entries) 2063 postprocess(prefix, d); 2064 check_dupes(&p); 2065 TAILQ_FOREACH(d, &p.dqhead, entries) 2066 emit(d); 2067 rc = 1; 2068 } else if (p.phase != PHASE_DECL) 2069 warnx("%s:%zu: exit when not in " 2070 "initial state", p.fn, p.ln); 2071 } 2072 2073 while ((d = TAILQ_FIRST(&p.dqhead)) != NULL) { 2074 TAILQ_REMOVE(&p.dqhead, d, entries); 2075 while ((e = TAILQ_FIRST(&d->dcqhead)) != NULL) { 2076 TAILQ_REMOVE(&d->dcqhead, e, entries); 2077 free(e->text); 2078 free(e); 2079 } 2080 free(d->name); 2081 free(d->desc); 2082 free(d->fulldesc); 2083 free(d->dt); 2084 for (i = 0; i < d->nmsz; i++) 2085 free(d->nms[i]); 2086 for (i = 0; i < d->xrsz; i++) 2087 free(d->xrs[i]); 2088 for (i = 0; i < d->keysz; i++) 2089 free(d->keys[i]); 2090 free(d->keys); 2091 free(d->nms); 2092 free(d->xrs); 2093 free(d->fname); 2094 free(d->seealso); 2095 free(d->keybuf); 2096 free(d); 2097 } 2098 2099 return !rc; 2100 usage: 2101 fprintf(stderr, "usage: %s [-Nnv] [-p prefix] [file]\n", 2102 getprogname()); 2103 return 1; 2104 } 2105