xref: /netbsd-src/external/public-domain/sqlite/sqlite2mdoc/main.c (revision b9988867a8ad969c45a52aa7628bc932ec98d46b)
1 /*
2  * Copyright (c) 2016, 2018, 2023 Kristaps Dzonsons <kristaps@bsd.lv>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 #include "config.h"
17 
18 #if HAVE_SYS_QUEUE
19 # include <sys/queue.h>
20 #endif
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #if HAVE_ERR
25 # include <err.h>
26 #endif
27 #include <getopt.h>
28 #if HAVE_SANDBOX_INIT
29 # include <sandbox.h>
30 #endif
31 #include <search.h>
32 #include <stdint.h> /* uintptr_t */
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 /*
39  * Phase of parsing input file.
40  */
41 enum	phase {
42 	PHASE_INIT = 0, /* waiting to encounter definition */
43 	PHASE_KEYS, /* have definition, now keywords */
44 	PHASE_DESC, /* have keywords, now description */
45 	PHASE_SEEALSO,
46 	PHASE_DECL /* have description, now declarations */
47 };
48 
49 /*
50  * What kind of declaration (preliminary analysis).
51  */
52 enum	decltype {
53 	DECLTYPE_CPP, /* pre-processor */
54 	DECLTYPE_C, /* semicolon-closed non-preprocessor */
55 	DECLTYPE_NEITHER /* non-preprocessor, no semicolon */
56 };
57 
58 /*
59  * In variables and function declarations, we toss these.
60  */
61 enum	preproc {
62 	PREPROC_SQLITE_API,
63 	PREPROC_SQLITE_DEPRECATED,
64 	PREPROC_SQLITE_EXPERIMENTAL,
65 	PREPROC_SQLITE_EXTERN,
66 	PREPROC_SQLITE_STDCALL,
67 	PREPROC__MAX
68 };
69 
70 /*
71  * HTML tags that we recognise.
72  */
73 enum	tag {
74 	TAG_A_CLOSE,
75 	TAG_A_OPEN_ATTRS,
76 	TAG_B_CLOSE,
77 	TAG_B_OPEN,
78 	TAG_BLOCK_CLOSE,
79 	TAG_BLOCK_OPEN,
80 	TAG_BR_OPEN,
81 	TAG_DD_CLOSE,
82 	TAG_DD_OPEN,
83 	TAG_DL_CLOSE,
84 	TAG_DL_OPEN,
85 	TAG_DT_CLOSE,
86 	TAG_DT_OPEN,
87 	TAG_EM_CLOSE,
88 	TAG_EM_OPEN,
89 	TAG_H3_CLOSE,
90 	TAG_H3_OPEN,
91 	TAG_I_CLOSE,
92 	TAG_I_OPEN,
93 	TAG_LI_CLOSE,
94 	TAG_LI_OPEN,
95 	TAG_LI_OPEN_ATTRS,
96 	TAG_OL_CLOSE,
97 	TAG_OL_OPEN,
98 	TAG_P_OPEN,
99 	TAG_PRE_CLOSE,
100 	TAG_PRE_OPEN,
101 	TAG_SPAN_CLOSE,
102 	TAG_SPAN_OPEN_ATTRS,
103 	TAG_TABLE_CLOSE,
104 	TAG_TABLE_OPEN,
105 	TAG_TABLE_OPEN_ATTRS,
106 	TAG_TD_CLOSE,
107 	TAG_TD_OPEN,
108 	TAG_TD_OPEN_ATTRS,
109 	TAG_TH_CLOSE,
110 	TAG_TH_OPEN,
111 	TAG_TH_OPEN_ATTRS,
112 	TAG_TR_CLOSE,
113 	TAG_TR_OPEN,
114 	TAG_U_CLOSE,
115 	TAG_U_OPEN,
116 	TAG_UL_CLOSE,
117 	TAG_UL_OPEN,
118 	TAG__MAX
119 };
120 
121 TAILQ_HEAD(defnq, defn);
122 TAILQ_HEAD(declq, decl);
123 
124 /*
125  * A declaration of type DECLTYPE_CPP or DECLTYPE_C.
126  * These need not be unique (if ifdef'd).
127  */
128 struct	decl {
129 	enum decltype	 type; /* type of declaration */
130 	char		*text; /* text */
131 	size_t		 textsz; /* strlen(text) */
132 	TAILQ_ENTRY(decl) entries;
133 };
134 
135 /*
136  * A definition is basically the manpage contents.
137  */
138 struct	defn {
139 	char		 *name; /* really Nd */
140 	TAILQ_ENTRY(defn) entries;
141 	char		 *desc; /* long description */
142 	size_t		  descsz; /* strlen(desc) */
143 	char		 *fulldesc; /* description w/newlns */
144 	size_t		  fulldescsz; /* strlen(fulldesc) */
145 	struct declq	  dcqhead; /* declarations */
146 	int		  multiline; /* used when parsing */
147 	int		  instruct; /* used when parsing */
148 	const char	 *fn; /* parsed from file */
149 	size_t		  ln; /* parsed at line */
150 	int		  postprocessed; /* good for emission? */
151 	char		 *dt; /* manpage title */
152 	char		**nms; /* manpage names */
153 	size_t		  nmsz; /* number of names */
154 	char		 *fname; /* manpage filename */
155 	char		 *keybuf; /* raw keywords */
156 	size_t		  keybufsz; /* length of "keysbuf" */
157 	char		 *seealso; /* see also tags */
158 	size_t		  seealsosz; /* length of seealso */
159 	char		**xrs; /* parsed "see also" references */
160 	size_t		  xrsz; /* number of references */
161 	char		**keys; /* parsed keywords */
162 	size_t		  keysz; /* number of keywords */
163 };
164 
165 /*
166  * Entire parse routine.
167  */
168 struct	parse {
169 	enum phase	 phase; /* phase of parse */
170 	size_t		 ln; /* line number */
171 	const char	*fn; /* open file */
172 	struct defnq	 dqhead; /* definitions */
173 };
174 
175 /*
176  * How to handle HTML tags we find in the text.
177  */
178 struct	taginfo {
179 	const char	*html; /* HTML to key on */
180 	const char	*mdoc; /* generate mdoc(7) */
181 	unsigned int	 flags;
182 #define	TAGINFO_NOBR	 0x01 /* follow w/space, not newline */
183 #define	TAGINFO_NOOP	 0x02 /* just strip out */
184 #define	TAGINFO_NOSP	 0x04 /* follow w/o space or newline */
185 #define	TAGINFO_INLINE	 0x08 /* inline block (notused) */
186 #define TAGINFO_ATTRS	 0x10 /* ignore attributes */
187 };
188 
189 static	const struct taginfo tags[TAG__MAX] = {
190 	{ "</a>", "", TAGINFO_INLINE }, /* TAG_A_CLOSE */
191 	{ "<a ", "", TAGINFO_INLINE | TAGINFO_ATTRS }, /* TAG_A_OPEN_ATTRS */
192 	{ "</b>", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */
193 	{ "<b>", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */
194 	{ "<br>", " ", TAGINFO_INLINE }, /* TAG_BR_OPEN */
195 	{ "</blockquote>", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */
196 	{ "<blockquote>", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */
197 	{ "</dd>", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */
198 	{ "<dd>", "", TAGINFO_NOBR | TAGINFO_NOSP }, /* TAG_DD_OPEN */
199 	{ "</dl>", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */
200 	{ "<dl>", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */
201 	{ "</dt>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */
202 	{ "<dt>", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */
203 	{ "</em>", "\\fP", TAGINFO_INLINE }, /* TAG_EM_CLOSE */
204 	{ "<em>", "\\fB", TAGINFO_INLINE }, /* TAG_EM_OPEN */
205 	{ "</h3>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */
206 	{ "<h3>", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */
207 	{ "</i>", "\\fP", TAGINFO_INLINE }, /* TAG_I_CLOSE */
208 	{ "<i>", "\\fI", TAGINFO_INLINE }, /* TAG_I_OPEN */
209 	{ "</li>", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */
210 	{ "<li>", ".It", 0 }, /* TAG_LI_OPEN */
211 	{ "<li ", ".It", TAGINFO_ATTRS }, /* TAG_LI_OPEN_ATTRS */
212 	{ "</ol>", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */
213 	{ "<ol>", ".Bl -enum", 0 }, /* TAG_OL_OPEN */
214 	{ "<p>", ".Pp", 0 }, /* TAG_P_OPEN */
215 	{ "</pre>", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */
216 	{ "<pre>", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */
217 	{ "</span>", "", TAGINFO_INLINE }, /* TAG_SPAN_CLOSE */
218 	{ "<span ", "", TAGINFO_INLINE | TAGINFO_ATTRS }, /* TAG_SPAN_OPEN_ATTRS */
219 	{ "</table>", ".Pp", 0 }, /* TAG_TABLE_CLOSE */
220 	{ "<table>", ".Pp", 0 }, /* TAG_TABLE_OPEN */
221 	{ "<table ", ".Pp", TAGINFO_ATTRS }, /* TAG_TABLE_OPEN_ATTRS */
222 	{ "</td>", "", TAGINFO_NOOP }, /* TAG_TD_CLOSE */
223 	{ "<td>", " ", TAGINFO_INLINE }, /* TAG_TD_OPEN */
224 	{ "<td ", " ", TAGINFO_INLINE | TAGINFO_ATTRS}, /* TAG_TD_OPEN_ATTRS */
225 	{ "</th>", "", TAGINFO_NOOP }, /* TAG_TH_CLOSE */
226 	{ "<th>", " ", TAGINFO_INLINE }, /* TAG_TH_OPEN */
227 	{ "<th ", " ", TAGINFO_INLINE | TAGINFO_ATTRS}, /* TAG_TH_OPEN_ATTRS */
228 	{ "</tr>", "", TAGINFO_NOOP}, /* TAG_TR_CLOSE */
229 	{ "<tr>", "", TAGINFO_NOBR }, /* TAG_TR_OPEN */
230 	{ "</u>", "\\fP", TAGINFO_INLINE }, /* TAG_U_CLOSE */
231 	{ "<u>", "\\fI", TAGINFO_INLINE }, /* TAG_U_OPEN */
232 	{ "</ul>", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */
233 	{ "<ul>", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */
234 };
235 
236 static	const char *const preprocs[TAG__MAX] = {
237 	"SQLITE_API", /* PREPROC_SQLITE_API */
238 	"SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */
239 	"SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */
240 	"SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */
241 	"SQLITE_STDCALL", /* PREPROC_SQLITE_STDCALL */
242 };
243 
244 /* Verbose reporting. */
245 static	int verbose;
246 
247 /* Don't output any files: use stdout. */
248 static	int nofile;
249 
250 /* Print out only filename. */
251 static	int filename;
252 
253 static void
decl_function_add(struct parse * p __unused,char ** etext,size_t * etextsz,const char * cp,size_t len)254 decl_function_add(struct parse *p __unused, char **etext,
255 	size_t *etextsz, const char *cp, size_t len)
256 {
257 
258 	if ((*etext)[*etextsz - 1] != ' ') {
259 		*etext = realloc(*etext, *etextsz + 2);
260 		if (*etext == NULL)
261 			err(1, NULL);
262 		(*etextsz)++;
263 		strlcat(*etext, " ", *etextsz + 1);
264 	}
265 	*etext = realloc(*etext, *etextsz + len + 1);
266 	if (*etext == NULL)
267 		err(1, NULL);
268 	memcpy(*etext + *etextsz, cp, len);
269 	*etextsz += len;
270 	(*etext)[*etextsz] = '\0';
271 }
272 
273 static void
decl_function_copy(struct parse * p __unused,char ** etext,size_t * etextsz,const char * cp,size_t len)274 decl_function_copy(struct parse *p __unused, char **etext,
275 	size_t *etextsz, const char *cp, size_t len)
276 {
277 
278 	*etext = malloc(len + 1);
279 	if (*etext == NULL)
280 		err(1, NULL);
281 	memcpy(*etext, cp, len);
282 	*etextsz = len;
283 	(*etext)[*etextsz] = '\0';
284 }
285 
286 /*
287  * A C function (or variable, or whatever).
288  * This is more specifically any non-preprocessor text.
289  */
290 static int
decl_function(struct parse * p,const char * cp,size_t len)291 decl_function(struct parse *p, const char *cp, size_t len)
292 {
293 	char		*ep, *lcp, *rcp;
294 	const char	*ncp;
295 	size_t		 nlen;
296 	struct defn	*d;
297 	struct decl	*e;
298 
299 	/* Fetch current interface definition. */
300 	d = TAILQ_LAST(&p->dqhead, defnq);
301 	assert(NULL != d);
302 
303 	/*
304 	 * Since C tokens are semicolon-separated, we may be invoked any
305 	 * number of times per a single line.
306 	 */
307 again:
308 	while (isspace((unsigned char)*cp)) {
309 		cp++;
310 		len--;
311 	}
312 	if (*cp == '\0')
313 		return(1);
314 
315 	/* Whether we're a continuation clause. */
316 	if (d->multiline) {
317 		/* This might be NULL if we're not a continuation. */
318 		e = TAILQ_LAST(&d->dcqhead, declq);
319 		assert(DECLTYPE_C == e->type);
320 		assert(NULL != e);
321 		assert(NULL != e->text);
322 		assert(e->textsz);
323 	} else {
324 		assert(d->instruct == 0);
325 		e = calloc(1, sizeof(struct decl));
326 		if (e == NULL)
327 			err(1, NULL);
328 		e->type = DECLTYPE_C;
329 		TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
330 	}
331 
332 	/*
333 	 * We begin by seeing if there's a semicolon on this line.
334 	 * If there is, we'll need to do some special handling.
335 	 */
336 	ep = strchr(cp, ';');
337 	lcp = strchr(cp, '{');
338 	rcp = strchr(cp, '}');
339 
340 	/* We're only a partial statement (i.e., no closure). */
341 	if (ep == NULL && d->multiline) {
342 		assert(e->text != NULL);
343 		assert(e->textsz > 0);
344 		/* Is a struct starting or ending here? */
345 		if (d->instruct && NULL != rcp)
346 			d->instruct--;
347 		else if (NULL != lcp)
348 			d->instruct++;
349 		decl_function_add(p, &e->text, &e->textsz, cp, len);
350 		return(1);
351 	} else if (ep == NULL && !d->multiline) {
352 		d->multiline = 1;
353 		/* Is a structure starting in this line? */
354 		if (NULL != lcp &&
355 		    (rcp == NULL || rcp < lcp))
356 			d->instruct++;
357 		decl_function_copy(p, &e->text, &e->textsz, cp, len);
358 		return(1);
359 	}
360 
361 	/* Position ourselves after the semicolon. */
362 	assert(NULL != ep);
363 	ncp = cp;
364 	nlen = (ep - cp) + 1;
365 	cp = ep + 1;
366 	len -= nlen;
367 
368 	if (d->multiline) {
369 		assert(NULL != e->text);
370 		/* Don't stop the multi-line if we're in a struct. */
371 		if (d->instruct == 0) {
372 			if (lcp == NULL || lcp > cp)
373 				d->multiline = 0;
374 		} else if (NULL != rcp && rcp < cp)
375 			if (--d->instruct == 0)
376 				d->multiline = 0;
377 		decl_function_add(p, &e->text, &e->textsz, ncp, nlen);
378 	} else {
379 		assert(e->text == NULL);
380 		if (NULL != lcp && lcp < cp) {
381 			d->multiline = 1;
382 			d->instruct++;
383 		}
384 		decl_function_copy(p, &e->text, &e->textsz, ncp, nlen);
385 	}
386 
387 	goto again;
388 }
389 
390 /*
391  * A definition is just #define followed by space followed by the name,
392  * then the value of that name.
393  * We ignore the latter.
394  * FIXME: this does not understand multi-line CPP, but I don't think
395  * there are any instances of that in sqlite3.h.
396  */
397 static int
decl_define(struct parse * p,const char * cp,size_t len)398 decl_define(struct parse *p, const char *cp, size_t len)
399 {
400 	struct defn	*d;
401 	struct decl	*e;
402 	size_t		 sz;
403 
404 	while (isspace((unsigned char)*cp)) {
405 		cp++;
406 		len--;
407 	}
408 	if (len == 0) {
409 		warnx("%s:%zu: empty pre-processor "
410 			"constant", p->fn, p->ln);
411 		return(1);
412 	}
413 
414 	d = TAILQ_LAST(&p->dqhead, defnq);
415 	assert(NULL != d);
416 
417 	/*
418 	 * We're parsing a preprocessor definition, but we're still
419 	 * waiting on a semicolon from a function definition.
420 	 * It might be a comment or an error.
421 	 */
422 	if (d->multiline) {
423 		if (verbose)
424 			warnx("%s:%zu: multiline declaration "
425 				"still open", p->fn, p->ln);
426 		e = TAILQ_LAST(&d->dcqhead, declq);
427 		assert(NULL != e);
428 		e->type = DECLTYPE_NEITHER;
429 		d->multiline = d->instruct = 0;
430 	}
431 
432 	sz = 0;
433 	while (cp[sz] != '\0' && !isspace((unsigned char)cp[sz]))
434 		sz++;
435 
436 	e = calloc(1, sizeof(struct decl));
437 	if (e == NULL)
438 		err(1, NULL);
439 	e->type = DECLTYPE_CPP;
440 	e->text = calloc(1, sz + 1);
441 	if (e->text == NULL)
442 		err(1, NULL);
443 	strlcpy(e->text, cp, sz + 1);
444 	e->textsz = sz;
445 	TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
446 	return(1);
447 }
448 
449 /*
450  * A declaration is a function, variable, preprocessor definition, or
451  * really anything else until we reach a blank line.
452  */
453 static void
decl(struct parse * p,const char * cp,size_t len)454 decl(struct parse *p, const char *cp, size_t len)
455 {
456 	struct defn	*d;
457 	struct decl	*e;
458 	const char	*oldcp;
459 	size_t		 oldlen;
460 
461 	oldcp = cp;
462 	oldlen = len;
463 
464 	while (isspace((unsigned char)*cp)) {
465 		cp++;
466 		len--;
467 	}
468 
469 	d = TAILQ_LAST(&p->dqhead, defnq);
470 	assert(NULL != d);
471 
472 	/* Check closure. */
473 	if (*cp == '\0') {
474 		p->phase = PHASE_INIT;
475 		/* Check multiline status. */
476 		if (d->multiline) {
477 			if (verbose)
478 				warnx("%s:%zu: multiline declaration "
479 					"still open", p->fn, p->ln);
480 			e = TAILQ_LAST(&d->dcqhead, declq);
481 			assert(NULL != e);
482 			e->type = DECLTYPE_NEITHER;
483 			d->multiline = d->instruct = 0;
484 		}
485 		return;
486 	}
487 
488 	d->fulldesc = realloc(d->fulldesc,
489 		d->fulldescsz + oldlen + 2);
490 	if (d->fulldesc == NULL)
491 		err(1, NULL);
492 	if (d->fulldescsz == 0)
493 		d->fulldesc[0] = '\0';
494 	d->fulldescsz += oldlen + 2;
495 	strlcat(d->fulldesc, oldcp, d->fulldescsz);
496 	strlcat(d->fulldesc, "\n", d->fulldescsz);
497 
498 	/*
499 	 * Catch preprocessor defines, but discard all other types of
500 	 * preprocessor statements.
501 	 * We might already be in the middle of a declaration (a
502 	 * function declaration), but that's ok.
503 	 */
504 
505 	if (*cp == '#') {
506 		len--;
507 		cp++;
508 		while (isspace((unsigned char)*cp)) {
509 			len--;
510 			cp++;
511 		}
512 		if (strncmp(cp, "define", 6) == 0)
513 			decl_define(p, cp + 6, len - 6);
514 		return;
515 	}
516 
517 	/* Skip one-liner comments. */
518 
519 	if (len > 4 &&
520 	    cp[0] == '/' && cp[1] == '*' &&
521 	    cp[len - 2] == '*' && cp[len - 1] == '/')
522 		return;
523 
524 	decl_function(p, cp, len);
525 }
526 
527 /*
528  * Whether to end an interface description phase with an asterisk-slash.
529  * This is run within a phase already opened with slash-asterisk.  It
530  * adjusts the parse state on ending a phase or syntax errors.  It has
531  * various hacks around lacks syntax (e.g., starting single-asterisk
532  * instead of double-asterisk) found in the wild.
533  *
534  * Returns zero if not ending the phase, non-zero if ending.
535  */
536 static int
endphase(struct parse * p,const char * cp)537 endphase(struct parse *p, const char *cp)
538 {
539 
540 	if (*cp == '\0') {
541 		/*
542 		 * Error: empty line.
543 		 */
544 		warnx("%s:%zu: warn: unexpected empty line in "
545 			"interface description", p->fn, p->ln);
546 		p->phase = PHASE_INIT;
547 		return 1;
548 	} else if (strcmp(cp, "*/") == 0) {
549 		/*
550 		 * End of the interface description.
551 		 */
552 		p->phase = PHASE_DECL;
553 		return 1;
554 	} else if (!(cp[0] == '*' && cp[1] == '*')) {
555 		/*
556 		 * Error: bad syntax, not end or continuation.
557 		 */
558 		if (cp[0] == '*' && cp[1] == '\0') {
559 			if (verbose)
560 				warnx("%s:%zu: warn: ignoring "
561 					"standalone asterisk "
562 					"in interface description",
563 					p->fn, p->ln);
564 			return 0;
565 		} else if (cp[0] == '*' && cp[1] == ' ') {
566 			if (verbose)
567 				warnx("%s:%zu: warn: ignoring "
568 					"leading single asterisk "
569 					"in interface description",
570 					p->fn, p->ln);
571 			return 0;
572 		}
573 		warnx("%s:%zu: warn: ambiguous leading characters in "
574 			"interface description", p->fn, p->ln);
575 		p->phase = PHASE_INIT;
576 		return 1;
577 	}
578 
579 	/* If here, at a continuation ('**'). */
580 
581 	return 0;
582 }
583 
584 /*
585  * Parse a "SEE ALSO" phase, which can come at any point in the
586  * interface description (unlike what they claim).
587  */
588 static void
seealso(struct parse * p,const char * cp,size_t len)589 seealso(struct parse *p, const char *cp, size_t len)
590 {
591 	struct defn	*d;
592 
593 	if (endphase(p, cp) || len < 2)
594 		return;
595 
596 	cp += 2;
597 	len -= 2;
598 
599 	while (isspace((unsigned char)*cp)) {
600 		cp++;
601 		len--;
602 	}
603 
604 	/* Blank line: back to description part. */
605 	if (len == 0) {
606 		p->phase = PHASE_DESC;
607 		return;
608 	}
609 
610 	/* Fetch current interface definition. */
611 	d = TAILQ_LAST(&p->dqhead, defnq);
612 	assert(NULL != d);
613 
614 	d->seealso = realloc(d->seealso,
615 		d->seealsosz + len + 1);
616 	memcpy(d->seealso + d->seealsosz, cp, len);
617 	d->seealsosz += len;
618 	d->seealso[d->seealsosz] = '\0';
619 }
620 
621 /*
622  * A definition description is a block of text that we'll later format
623  * in mdoc(7).
624  * It extends from the name of the definition down to the declarations
625  * themselves.
626  */
627 static void
desc(struct parse * p,const char * cp,size_t len)628 desc(struct parse *p, const char *cp, size_t len)
629 {
630 	struct defn	*d;
631 	size_t		 nsz;
632 
633 	if (endphase(p, cp) || len < 2)
634 		return;
635 
636 	cp += 2;
637 	len -= 2;
638 
639 	while (isspace((unsigned char)*cp)) {
640 		cp++;
641 		len--;
642 	}
643 
644 	/* Fetch current interface definition. */
645 
646 	d = TAILQ_LAST(&p->dqhead, defnq);
647 	assert(NULL != d);
648 
649 	/* Ignore leading blank lines. */
650 
651 	if (len == 0 && d->desc == NULL)
652 		return;
653 
654 	/* Collect SEE ALSO clauses. */
655 
656 	if (strncasecmp(cp, "see also:", 9) == 0) {
657 		cp += 9;
658 		len -= 9;
659 		while (isspace((unsigned char)*cp)) {
660 			cp++;
661 			len--;
662 		}
663 		p->phase = PHASE_SEEALSO;
664 		d->seealso = realloc(d->seealso,
665 			d->seealsosz + len + 1);
666 		memcpy(d->seealso + d->seealsosz, cp, len);
667 		d->seealsosz += len;
668 		d->seealso[d->seealsosz] = '\0';
669 		return;
670 	}
671 
672 	/* White-space padding between lines. */
673 
674 	if (d->desc != NULL &&
675 	    d->descsz > 0 &&
676 	    d->desc[d->descsz - 1] != ' ' &&
677 	    d->desc[d->descsz - 1] != '\n') {
678 		d->desc = realloc(d->desc, d->descsz + 2);
679 		if (d->desc == NULL)
680 			err(1, NULL);
681 		d->descsz++;
682 		strlcat(d->desc, " ", d->descsz + 1);
683 	}
684 
685 	/* Either append the line of a newline, if blank. */
686 
687 	nsz = len == 0 ? 1 : len;
688 	if (d->desc == NULL) {
689 		assert(d->descsz == 0);
690 		d->desc = calloc(1, nsz + 1);
691 		if (d->desc == NULL)
692 			err(1, NULL);
693 	} else {
694 		d->desc = realloc(d->desc, d->descsz + nsz + 1);
695 		if (d->desc == NULL)
696 			err(1, NULL);
697 	}
698 
699 	d->descsz += nsz;
700 	strlcat(d->desc, len == 0 ? "\n" : cp, d->descsz + 1);
701 }
702 
703 /*
704  * Copy all KEYWORDS into a buffer.
705  */
706 static void
keys(struct parse * p,const char * cp,size_t len)707 keys(struct parse *p, const char *cp, size_t len)
708 {
709 	struct defn	*d;
710 
711 	if (endphase(p, cp) || len < 2)
712 		return;
713 
714 	cp += 2;
715 	len -= 2;
716 	while (isspace((unsigned char)*cp)) {
717 		cp++;
718 		len--;
719 	}
720 
721 	if (len == 0) {
722 		p->phase = PHASE_DESC;
723 		return;
724 	} else if (strncmp(cp, "KEYWORDS:", 9))
725 		return;
726 
727 	cp += 9;
728 	len -= 9;
729 
730 	d = TAILQ_LAST(&p->dqhead, defnq);
731 	assert(NULL != d);
732 	d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1);
733 	if (d->keybuf == NULL)
734 		err(1, NULL);
735 	memcpy(d->keybuf + d->keybufsz, cp, len);
736 	d->keybufsz += len;
737 	d->keybuf[d->keybufsz] = '\0';
738 }
739 
740 /*
741  * Initial state is where we're scanning forward to find commented
742  * instances of CAPI3REF.
743  */
744 static void
init(struct parse * p,const char * cp)745 init(struct parse *p, const char *cp)
746 {
747 	struct defn	*d;
748 	size_t		 i, sz;
749 
750 	/* Look for comment hook. */
751 
752 	if (cp[0] != '*' || cp[1] != '*')
753 		return;
754 	cp += 2;
755 	while (isspace((unsigned char)*cp))
756 		cp++;
757 
758 	/* Look for beginning of definition. */
759 
760 	if (strncmp(cp, "CAPI3REF:", 9))
761 		return;
762 	cp += 9;
763 	while (isspace((unsigned char)*cp))
764 		cp++;
765 	if (*cp == '\0') {
766 		warnx("%s:%zu: warn: unexpected end of "
767 			"interface definition", p->fn, p->ln);
768 		return;
769 	}
770 
771 	/* Add definition to list of existing ones. */
772 
773 	if ((d = calloc(1, sizeof(struct defn))) == NULL)
774 		err(1, NULL);
775 	if ((d->name = strdup(cp)) == NULL)
776 		err(1, NULL);
777 
778 	/* Strip trailing spaces and periods. */
779 
780 	for (sz = strlen(d->name); sz > 0; sz--)
781 		if (d->name[sz - 1] == '.' ||
782 		    d->name[sz - 1] == ' ')
783 			d->name[sz - 1] = '\0';
784 		else
785 			break;
786 
787 	/*
788 	 * Un-title case.  Use a simple heuristic where all words
789 	 * starting with an upper case letter followed by a not
790 	 * uppercase letter are lowercased.
791 	 */
792 
793 	for (i = 0; sz > 0 && i < sz - 1; i++)
794 		if ((i == 0 || d->name[i - 1] == ' ') &&
795 		    isupper((unsigned char)d->name[i]) &&
796 		    !isupper((unsigned char)d->name[i + 1]) &&
797 		    !ispunct((unsigned char)d->name[i + 1]))
798 			d->name[i] = tolower((unsigned char)d->name[i]);
799 
800 	d->fn = p->fn;
801 	d->ln = p->ln;
802 	p->phase = PHASE_KEYS;
803 	TAILQ_INIT(&d->dcqhead);
804 	TAILQ_INSERT_TAIL(&p->dqhead, d, entries);
805 }
806 
807 #define	BPOINT(_cp) \
808 	(';' == (_cp)[0] || \
809 	 '[' == (_cp)[0] || \
810 	 ('(' == (_cp)[0] && '*' != (_cp)[1]) || \
811 	 ')' == (_cp)[0] || \
812 	 '{' == (_cp)[0])
813 
814 /*
815  * Given a declaration (be it preprocessor or C), try to parse out a
816  * reasonable "name" for the affair.
817  * For a struct, for example, it'd be the struct name.
818  * For a typedef, it'd be the type name.
819  * For a function, it'd be the function name.
820  */
821 static void
grok_name(const struct decl * e,const char ** start,size_t * sz)822 grok_name(const struct decl *e,
823 	const char **start, size_t *sz)
824 {
825 	const char	*cp;
826 
827 	*start = NULL;
828 	*sz = 0;
829 
830 	if (DECLTYPE_CPP != e->type) {
831 		if (e->text[e->textsz - 1] != ';')
832 			return;
833 		cp = e->text;
834 		do {
835 			while (isspace((unsigned char)*cp))
836 				cp++;
837 			if (BPOINT(cp))
838 				break;
839 			/* Function pointers... */
840 			if (*cp == '(')
841 				cp++;
842 			/* Pass over pointers. */
843 			while (*cp == '*')
844 				cp++;
845 			*start = cp;
846 			*sz = 0;
847 			while (!isspace((unsigned char)*cp)) {
848 				if (BPOINT(cp))
849 					break;
850 				cp++;
851 				(*sz)++;
852 			}
853 		} while (!BPOINT(cp));
854 	} else {
855 		*sz = e->textsz;
856 		*start = e->text;
857 	}
858 }
859 
860 /*
861  * Extract information from the interface definition.
862  * Mark it as "postprocessed" on success.
863  */
864 static void
postprocess(const char * prefix,struct defn * d)865 postprocess(const char *prefix, struct defn *d)
866 {
867 	struct decl	*first;
868 	const char	*start;
869 	size_t		 offs, sz, i;
870 	ENTRY		 ent;
871 
872 	if (TAILQ_EMPTY(&d->dcqhead))
873 		return;
874 
875 	/* Find the first #define or declaration. */
876 
877 	TAILQ_FOREACH(first, &d->dcqhead, entries)
878 		if (DECLTYPE_CPP == first->type ||
879 		    DECLTYPE_C == first->type)
880 			break;
881 
882 	if (first == NULL) {
883 		warnx("%s:%zu: no entry to document", d->fn, d->ln);
884 		return;
885 	}
886 
887 	/*
888 	 * Now compute the document name (`Dt').
889 	 * We'll also use this for the filename.
890 	 */
891 
892 	grok_name(first, &start, &sz);
893 	if (start == NULL) {
894 		warnx("%s:%zu: couldn't deduce "
895 			"entry name", d->fn, d->ln);
896 		return;
897 	}
898 
899 	/* Document name needs all-caps. */
900 
901 	if ((d->dt = strndup(start, sz)) == NULL)
902 		err(1, NULL);
903 	sz = strlen(d->dt);
904 	for (i = 0; i < sz; i++)
905 		d->dt[i] = toupper((unsigned char)d->dt[i]);
906 
907 	/* Filename needs no special chars. */
908 
909 	if (filename) {
910 		asprintf(&d->fname, "%.*s.3", (int)sz, start);
911 		offs = 0;
912 	} else {
913 		asprintf(&d->fname, "%s/%.*s.3",
914 			prefix, (int)sz, start);
915 		offs = strlen(prefix) + 1;
916 	}
917 
918 	if (d->fname == NULL)
919 		err(1, NULL);
920 
921 	for (i = 0; i < sz; i++) {
922 		if (isalnum((unsigned char)d->fname[offs + i]) ||
923 		    d->fname[offs + i] == '_' ||
924 		    d->fname[offs + i] == '-')
925 			continue;
926 		d->fname[offs + i] = '_';
927 	}
928 
929 	/*
930 	 * First, extract all keywords.
931 	 */
932 	for (i = 0; i < d->keybufsz; ) {
933 		while (isspace((unsigned char)d->keybuf[i]))
934 			i++;
935 		if (i == d->keybufsz)
936 			break;
937 		sz = 0;
938 		start = &d->keybuf[i];
939 		if (d->keybuf[i] == '{') {
940 			start = &d->keybuf[++i];
941 			for ( ; i < d->keybufsz; i++, sz++)
942 				if (d->keybuf[i] == '}')
943 					break;
944 			if (d->keybuf[i] == '}')
945 				i++;
946 		} else
947 			for ( ; i < d->keybufsz; i++, sz++)
948 				if (isspace((unsigned char)d->keybuf[i]))
949 					break;
950 		if (sz == 0)
951 			continue;
952 		d->keys = reallocarray(d->keys,
953 			d->keysz + 1, sizeof(char *));
954 		if (d->keys == NULL)
955 			err(1, NULL);
956 		d->keys[d->keysz] = malloc(sz + 1);
957 		if (d->keys[d->keysz] == NULL)
958 			err(1, NULL);
959 		memcpy(d->keys[d->keysz], start, sz);
960 		d->keys[d->keysz][sz] = '\0';
961 		d->keysz++;
962 
963 		/* Hash the keyword. */
964 		ent.key = d->keys[d->keysz - 1];
965 		ent.data = d;
966 		(void)hsearch(ent, ENTER);
967 	}
968 
969 	/*
970 	 * Now extract all `Nm' values for this document.
971 	 * We only use CPP and C references, and hope for the best when
972 	 * doing so.
973 	 * Enter each one of these as a searchable keyword.
974 	 */
975 	TAILQ_FOREACH(first, &d->dcqhead, entries) {
976 		if (DECLTYPE_CPP != first->type &&
977 		    DECLTYPE_C != first->type)
978 			continue;
979 		grok_name(first, &start, &sz);
980 		if (start == NULL)
981 			continue;
982 		d->nms = reallocarray(d->nms,
983 			d->nmsz + 1, sizeof(char *));
984 		if (d->nms == NULL)
985 			err(1, NULL);
986 		d->nms[d->nmsz] = malloc(sz + 1);
987 		if (d->nms[d->nmsz] == NULL)
988 			err(1, NULL);
989 		memcpy(d->nms[d->nmsz], start, sz);
990 		d->nms[d->nmsz][sz] = '\0';
991 		d->nmsz++;
992 
993 		/* Hash the name. */
994 		ent.key = d->nms[d->nmsz - 1];
995 		ent.data = d;
996 		(void)hsearch(ent, ENTER);
997 	}
998 
999 	if (d->nmsz == 0) {
1000 		warnx("%s:%zu: couldn't deduce "
1001 			"any names", d->fn, d->ln);
1002 		return;
1003 	}
1004 
1005 	/*
1006 	 * Next, scan for all `Xr' values.
1007 	 * We'll add more to this list later.
1008 	 */
1009 	for (i = 0; i < d->seealsosz; i++) {
1010 		/*
1011 		 * Find next value starting with `['.
1012 		 * There's other stuff in there (whitespace or
1013 		 * free text leading up to these) that we're ok
1014 		 * to ignore.
1015 		 */
1016 		while (i < d->seealsosz && d->seealso[i] != '[')
1017 			i++;
1018 		if (i == d->seealsosz)
1019 			break;
1020 
1021 		/*
1022 		 * Now scan for the matching `]'.
1023 		 * We can also have a vertical bar if we're separating a
1024 		 * keyword and its shown name.
1025 		 */
1026 		start = &d->seealso[++i];
1027 		sz = 0;
1028 		while (i < d->seealsosz &&
1029 		      d->seealso[i] != ']' &&
1030 		      d->seealso[i] != '|') {
1031 			i++;
1032 			sz++;
1033 		}
1034 		if (i == d->seealsosz)
1035 			break;
1036 		if (sz == 0)
1037 			continue;
1038 
1039 		/*
1040 		 * Continue on to the end-of-reference, if we weren't
1041 		 * there to begin with.
1042 		 */
1043 		if (d->seealso[i] != ']')
1044 			while (i < d->seealsosz &&
1045 			      d->seealso[i] != ']')
1046 				i++;
1047 
1048 		/* Strip trailing whitespace. */
1049 		while (sz > 1 && start[sz - 1] == ' ')
1050 			sz--;
1051 
1052 		/* Strip trailing parenthesis. */
1053 		if (sz > 2 &&
1054 		    start[sz - 2] == '(' &&
1055 	 	    start[sz - 1] == ')')
1056 			sz -= 2;
1057 
1058 		d->xrs = reallocarray(d->xrs,
1059 			d->xrsz + 1, sizeof(char *));
1060 		if (d->xrs == NULL)
1061 			err(1, NULL);
1062 		d->xrs[d->xrsz] = malloc(sz + 1);
1063 		if (d->xrs[d->xrsz] == NULL)
1064 			err(1, NULL);
1065 		memcpy(d->xrs[d->xrsz], start, sz);
1066 		d->xrs[d->xrsz][sz] = '\0';
1067 		d->xrsz++;
1068 	}
1069 
1070 	/*
1071 	 * Next, extract all references.
1072 	 * We'll accumulate these into a list of SEE ALSO tags, after.
1073 	 * See how these are parsed above for a description: this is
1074 	 * basically the same thing.
1075 	 */
1076 	for (i = 0; i < d->descsz; i++) {
1077 		if (d->desc[i] != '[')
1078 			continue;
1079 		i++;
1080 		if (d->desc[i] == '[')
1081 			continue;
1082 
1083 		start = &d->desc[i];
1084 		for (sz = 0; i < d->descsz; i++, sz++)
1085 			if (d->desc[i] == ']' ||
1086 			    d->desc[i] == '|')
1087 				break;
1088 
1089 		if (i == d->descsz)
1090 			break;
1091 		else if (sz == 0)
1092 			continue;
1093 
1094 		if (d->desc[i] != ']')
1095 			while (i < d->descsz && d->desc[i] != ']')
1096 				i++;
1097 
1098 		while (sz > 1 && start[sz - 1] == ' ')
1099 			sz--;
1100 
1101 		if (sz > 2 &&
1102 		    start[sz - 2] == '(' &&
1103 		    start[sz - 1] == ')')
1104 			sz -= 2;
1105 
1106 		d->xrs = reallocarray(d->xrs,
1107 			d->xrsz + 1, sizeof(char *));
1108 		if (d->xrs == NULL)
1109 			err(1, NULL);
1110 		d->xrs[d->xrsz] = malloc(sz + 1);
1111 		if (d->xrs[d->xrsz] == NULL)
1112 			err(1, NULL);
1113 		memcpy(d->xrs[d->xrsz], start, sz);
1114 		d->xrs[d->xrsz][sz] = '\0';
1115 		d->xrsz++;
1116 	}
1117 
1118 	d->postprocessed = 1;
1119 }
1120 
1121 /*
1122  * Convenience function to look up which manpage "hosts" a certain
1123  * keyword.  For example, SQLITE_OK(3) also handles SQLITE_TOOBIG and so
1124  * on, so a reference to SQLITE_TOOBIG should actually point to
1125  * SQLITE_OK.
1126  * Returns the keyword's file if found or NULL.
1127  */
1128 static const char *
lookup(const char * key)1129 lookup(const char *key)
1130 {
1131 	ENTRY			 ent;
1132 	ENTRY			*res;
1133 	const struct defn	*d;
1134 
1135 	ent.key = (char *)(uintptr_t)key;
1136 	ent.data = NULL;
1137 
1138 	if ((res = hsearch(ent, FIND)) == NULL)
1139 		return NULL;
1140 
1141 	d = (const struct defn *)res->data;
1142 	if (d->nmsz == 0)
1143 		return NULL;
1144 
1145 	assert(d->nms[0] != NULL);
1146 	return d->nms[0];
1147 }
1148 
1149 static int
xrcmp(const void * p1,const void * p2)1150 xrcmp(const void *p1, const void *p2)
1151 {
1152 	/* Silence bogus warnings about un-consting. */
1153 
1154 	const char	*s1 = lookup(*(const char **)(uintptr_t)p1),
1155 			*s2 = lookup(*(const char **)(uintptr_t)p2);
1156 
1157 	if (s1 == NULL)
1158 		s1 = "";
1159 	if (s2 == NULL)
1160 		s2 = "";
1161 
1162 	return strcasecmp(s1, s2);
1163 }
1164 
1165 /*
1166  * Return non-zero if "new sentence, new line" is in effect, zero
1167  * otherwise.
1168  * Accepts the start and finish offset of a buffer.
1169  */
1170 static int
newsentence(size_t start,size_t finish,const char * buf)1171 newsentence(size_t start, size_t finish, const char *buf)
1172 {
1173 	size_t	 span = finish - start;
1174 
1175 	assert(finish >= start);
1176 
1177 	/* Ignore "i.e." and "e.g.". */
1178 
1179 	if ((span >= 4 &&
1180 	     strncasecmp(&buf[finish - 4], "i.e.", 4) == 0) ||
1181 	    (span >= 4 &&
1182 	     strncasecmp(&buf[finish - 4], "e.g.", 4) == 0))
1183 		return 0;
1184 
1185 	return 1;
1186 }
1187 
1188 /*
1189  * Emit a valid mdoc(7) document within the given prefix.
1190  */
1191 static void
emit(struct defn * d)1192 emit(struct defn *d)
1193 {
1194 	struct decl	*first;
1195 	size_t		 sz, i, j, col, last, ns, fnsz, stripspace;
1196 	FILE		*f;
1197 	char		*cp;
1198 	const char	*res, *lastres, *args, *str, *end, *fn;
1199 	enum tag	 tag;
1200 	enum preproc	 pre;
1201 
1202 	if (!d->postprocessed) {
1203 		warnx("%s:%zu: interface has errors, not "
1204 			"producing manpage", d->fn, d->ln);
1205 		return;
1206 	}
1207 
1208 	if (nofile == 0) {
1209 		if ((f = fopen(d->fname, "w")) == NULL) {
1210 			warn("%s: fopen", d->fname);
1211 			return;
1212 		}
1213 	} else if (filename) {
1214 		printf("%s\n", d->fname);
1215 		return;
1216 	} else
1217 		f = stdout;
1218 
1219 	/* Begin by outputting the mdoc(7) header. */
1220 
1221 	fputs(".Dd $" "Mdocdate$\n", f);
1222 	fprintf(f, ".Dt %s 3\n", d->dt);
1223 	fputs(".Os\n", f);
1224 	fputs(".Sh NAME\n", f);
1225 
1226 	/* Now print the name bits of each declaration. */
1227 
1228 	for (i = 0; i < d->nmsz; i++)
1229 		fprintf(f, ".Nm %s%s\n", d->nms[i],
1230 			i < d->nmsz - 1 ? " ," : "");
1231 
1232 	fprintf(f, ".Nd %s\n", d->name);
1233 	fputs(".Sh SYNOPSIS\n", f);
1234 	fputs(".In sqlite3.h\n", f);
1235 
1236 	TAILQ_FOREACH(first, &d->dcqhead, entries) {
1237 		if (first->type != DECLTYPE_CPP &&
1238 		    first->type != DECLTYPE_C)
1239 			continue;
1240 
1241 		/* Easy: just print the CPP name. */
1242 
1243 		if (first->type == DECLTYPE_CPP) {
1244 			fprintf(f, ".Fd #define %s\n",
1245 				first->text);
1246 			continue;
1247 		}
1248 
1249 		/* First, strip out the sqlite CPPs. */
1250 
1251 		for (i = 0; i < first->textsz; ) {
1252 			for (pre = 0; pre < PREPROC__MAX; pre++) {
1253 				sz = strlen(preprocs[pre]);
1254 				if (strncmp(preprocs[pre],
1255 				    &first->text[i], sz))
1256 					continue;
1257 				i += sz;
1258 				while (isspace((unsigned char)first->text[i]))
1259 					i++;
1260 				break;
1261 			}
1262 			if (pre == PREPROC__MAX)
1263 				break;
1264 		}
1265 
1266 		/* If we're a typedef, immediately print Vt. */
1267 
1268 		if (strncmp(&first->text[i], "typedef", 7) == 0) {
1269 			fprintf(f, ".Vt %s\n", &first->text[i]);
1270 			continue;
1271 		}
1272 
1273 		/* Are we a struct? */
1274 
1275 		if (first->textsz > 2 &&
1276 		    first->text[first->textsz - 2] == '}' &&
1277 		    (cp = strchr(&first->text[i], '{')) != NULL) {
1278 			*cp = '\0';
1279 			fprintf(f, ".Vt %s;\n", &first->text[i]);
1280 			/* Restore brace for later usage. */
1281 			*cp = '{';
1282 			continue;
1283 		}
1284 
1285 		/* Catch remaining non-functions. */
1286 
1287 		if (first->textsz > 2 &&
1288 		    first->text[first->textsz - 2] != ')') {
1289 			fprintf(f, ".Vt %s\n", &first->text[i]);
1290 			continue;
1291 		}
1292 
1293 		str = &first->text[i];
1294 		if ((args = strchr(str, '(')) == NULL || args == str) {
1295 			/* What is this? */
1296 			fputs(".Bd -literal\n", f);
1297 			fputs(&first->text[i], f);
1298 			fputs("\n.Ed\n", f);
1299 			continue;
1300 		}
1301 
1302 		/*
1303 		 * Current state:
1304 		 *  type_t *function      (args...)
1305 		 *  ^str                  ^args
1306 		 * Scroll back to end of function name.
1307 		 */
1308 
1309 		end = args - 1;
1310 		while (end > str && isspace((unsigned char)*end))
1311 			end--;
1312 
1313 		/*
1314 		 * Current state:
1315 		 *  type_t *function      (args...)
1316 		 *  ^str           ^end   ^args
1317 		 * Scroll back to what comes before.
1318 		 */
1319 
1320 		for (fnsz = 0; end > str; end--, fnsz++)
1321 			if (isspace((unsigned char)*end) || *end == '*')
1322 				break;
1323 
1324 		if (fnsz == 0)
1325 			warnx("%s:%zu: zero-length "
1326 				"function name", d->fn, d->ln);
1327 		fn = end + 1;
1328 
1329 		/*
1330 		 * Current state:
1331 		 *  type_t *function      (args...)
1332 		 *  ^str   ^end           ^args
1333 		 *  type_t  function      (args...)
1334 		 *  ^str   ^end           ^args
1335 		 * Strip away whitespace.
1336 		 */
1337 
1338 		while (end > str && isspace((unsigned char)*end))
1339 			end--;
1340 
1341 		/*
1342 		 * type_t *function      (args...)
1343 		 * ^str   ^end           ^args
1344 		 * type_t  function      (args...)
1345 		 * ^str ^end             ^args
1346 		 */
1347 
1348 		/*
1349 		 * If we can't find what came before, then the function
1350 		 * has no type, which is odd... let's just call it void.
1351 		 */
1352 
1353 		if (end > str) {
1354 			fprintf(f, ".Ft %.*s\n",
1355 				(int)(end - str + 1), str);
1356 			fprintf(f, ".Fo %.*s\n", (int)fnsz, fn);
1357 		} else {
1358 			fputs(".Ft void\n", f);
1359 			fprintf(f, ".Fo %.*s\n", (int)fnsz, fn);
1360 		}
1361 
1362 		/*
1363 		 * Convert function arguments into `Fa' clauses.
1364 		 * This also handles nested function pointers, which
1365 		 * would otherwise throw off the delimeters.
1366 		 */
1367 
1368 		for (;;) {
1369 			str = ++args;
1370 			while (isspace((unsigned char)*str))
1371 				str++;
1372 			fputs(".Fa \"", f);
1373 			ns = 0;
1374 			while (*str != '\0' &&
1375 			       (ns || *str != ',') &&
1376 			       (ns || *str != ')')) {
1377 				/*
1378 				 * Handle comments in the declarations.
1379 				 */
1380 				if (str[0] == '/' && str[1] == '*') {
1381 					str += 2;
1382 					for ( ; str[0] != '\0'; str++)
1383 						if (str[0] == '*' && str[1] == '/')
1384 							break;
1385 					if (*str == '\0')
1386 						break;
1387 					str += 2;
1388 					while (isspace((unsigned char)*str))
1389 						str++;
1390 					if (*str == '\0' ||
1391 					    (ns == 0 && *str == ',') ||
1392 					    (ns == 0 && *str == ')'))
1393 						break;
1394 				}
1395 				if (*str == '(')
1396 					ns++;
1397 				else if (*str == ')')
1398 					ns--;
1399 
1400 				/*
1401 				 * Handle some instances of whitespace
1402 				 * by compressing it down.
1403 				 * However, if the whitespace ends at
1404 				 * the end-of-definition, then don't
1405 				 * print it at all.
1406 				 */
1407 
1408 				if (isspace((unsigned char)*str)) {
1409 					while (isspace((unsigned char)*str))
1410 						str++;
1411 					/* Are we at a comment? */
1412 					if (str[0] == '/' && str[1] == '*')
1413 						continue;
1414 					if (*str == '\0' ||
1415 					    (ns == 0 && *str == ',') ||
1416 					    (ns == 0 && *str == ')'))
1417 						break;
1418 					fputc(' ', f);
1419 				} else {
1420 					fputc(*str, f);
1421 					str++;
1422 				}
1423 			}
1424 			fputs("\"\n", f);
1425 			if (*str == '\0' || *str == ')')
1426 				break;
1427 			args = str;
1428 		}
1429 
1430 		fputs(".Fc\n", f);
1431 	}
1432 
1433 	fputs(".Sh DESCRIPTION\n", f);
1434 
1435 	/*
1436 	 * Strip the crap out of the description.
1437 	 * "Crap" consists of things I don't understand that mess up
1438 	 * parsing of the HTML, for instance,
1439 	 *   <dl>[[foo bar]]<dt>foo bar</dt>...</dl>
1440 	 * These are not well-formed HTML.
1441 	 * Note that d->desc[d->descz] is the NUL terminator, so we
1442 	 * don't need to check d->descsz - 1.
1443 	 */
1444 
1445 	for (i = 0; i < d->descsz; ) {
1446 		if (d->desc[i] == '^' &&
1447 		    d->desc[i + 1] == '(') {
1448 			memmove(&d->desc[i],
1449 				&d->desc[i + 2],
1450 				d->descsz - i - 1);
1451 			d->descsz -= 2;
1452 			continue;
1453 		} else if (d->desc[i] == ')' &&
1454 			   d->desc[i + 1] == '^') {
1455 			memmove(&d->desc[i],
1456 				&d->desc[i + 2],
1457 				d->descsz - i - 1);
1458 			d->descsz -= 2;
1459 			continue;
1460 		} else if (d->desc[i] == '^') {
1461 			memmove(&d->desc[i],
1462 				&d->desc[i + 1],
1463 				d->descsz - i);
1464 			d->descsz -= 1;
1465 			continue;
1466 		} else if (d->desc[i] != '[' ||
1467 			   d->desc[i + 1] != '[') {
1468 			i++;
1469 			continue;
1470 		}
1471 
1472 		for (j = i; j < d->descsz; j++)
1473 			if (d->desc[j] == ']' &&
1474 			    d->desc[j + 1] == ']')
1475 				break;
1476 
1477 		/* Ignore if we don't have a terminator. */
1478 
1479 		assert(j > i);
1480 		j += 2;
1481 		if (j > d->descsz) {
1482 			i++;
1483 			continue;
1484 		}
1485 
1486 		memmove(&d->desc[i], &d->desc[j], d->descsz - j + 1);
1487 		d->descsz -= (j - i);
1488 	}
1489 
1490 	/*
1491 	 * Here we go!
1492 	 * Print out the description as best we can.
1493 	 * Do on-the-fly processing of any HTML we encounter into
1494 	 * mdoc(7) and try to break lines up.
1495 	 */
1496 
1497 	col = stripspace = 0;
1498 
1499 	for (i = 0; i < d->descsz; ) {
1500 		/*
1501 		 * The "stripspace" variable is set to >=2 if we've
1502 		 * stripped white-space off before an anticipated macro.
1503 		 * Without it, if the macro ends up *not* being a macro,
1504 		 * we wouldn't flush the line and thus end up losing a
1505 		 * space.  This lets the code that flushes the line know
1506 		 * that we've stripped spaces and adds them back in.
1507 		 */
1508 
1509 		if (stripspace > 0)
1510 			stripspace--;
1511 
1512 		/* Ignore NUL byte, just in case. */
1513 
1514 		if (d->desc[i] == '\0') {
1515 			i++;
1516 			continue;
1517 		}
1518 
1519 		/*
1520 		 * Newlines are paragraph breaks.
1521 		 * If we have multiple newlines, then keep to a single
1522 		 * `Pp' to keep it clean.
1523 		 * Only do this if we're not before a block-level HTML,
1524 		 * as this would mean, for instance, a `Pp'-`Bd' pair.
1525 		 */
1526 
1527 		if (d->desc[i] == '\n') {
1528 			while (isspace((unsigned char)d->desc[i]))
1529 				i++;
1530 			for (tag = 0; tag < TAG__MAX; tag++) {
1531 				sz = strlen(tags[tag].html);
1532 				if (strncasecmp(&d->desc[i],
1533 				    tags[tag].html, sz) == 0)
1534 					break;
1535 			}
1536 			if (tag == TAG__MAX ||
1537 			    (tags[tag].flags & TAGINFO_INLINE)) {
1538 				if (col > 0)
1539 					fputs("\n", f);
1540 				fputs(".Pp\n", f);
1541 				/* We're on a new line. */
1542 				col = 0;
1543 			}
1544 			continue;
1545 		}
1546 
1547 		/*
1548 		 * New sentence, new line.
1549 		 * We guess whether this is the case by using the
1550 		 * dumbest possible heuristic.
1551 		 */
1552 
1553 		if (d->desc[i] == ' ' &&
1554 		    i > 0 && d->desc[i - 1] == '.') {
1555 			for (j = i - 1; j > 0; j--)
1556 				if (isspace((unsigned char)d->desc[j])) {
1557 					j++;
1558 					break;
1559 				}
1560 			if (newsentence(j, i, d->desc)) {
1561 				while (d->desc[i] == ' ')
1562 					i++;
1563 				fputc('\n', f);
1564 				col = 0;
1565 				continue;
1566 			}
1567 		}
1568 
1569 		/*
1570 		 * After 65 characters, force a break when we encounter
1571 		 * white-space to keep our lines more or less tidy.
1572 		 */
1573 
1574 		if (col > 65 && d->desc[i] == ' ') {
1575 			while (d->desc[i] == ' ' )
1576 				i++;
1577 			fputc('\n', f);
1578 			col = 0;
1579 			continue;
1580 		}
1581 
1582 		/* Parse HTML tags and links. */
1583 
1584 		if (d->desc[i] == '<') {
1585 			for (tag = 0; tag < TAG__MAX; tag++) {
1586 				sz = strlen(tags[tag].html);
1587 				assert(sz > 0);
1588 				if (strncmp(&d->desc[i],
1589 				    tags[tag].html, sz))
1590 					continue;
1591 
1592 				i += sz;
1593 
1594 				/* Blindly ignore attributes. */
1595 
1596 				if (tags[tag].flags & TAGINFO_ATTRS) {
1597 					while (d->desc[i] != '\0' &&
1598 					       d->desc[i] != '>')
1599 						i++;
1600 					if (d->desc[i] == '\0')
1601 						break;
1602 					i++;
1603 				}
1604 
1605 				/*
1606 				 * NOOP tags don't do anything, such as
1607 				 * the case of `</dd>', which only
1608 				 * serves to end an `It' block that will
1609 				 * be closed out by a subsequent `It' or
1610 				 * end of clause `El' anyway.
1611 				 * Skip the trailing space.
1612 				 */
1613 
1614 				if (tags[tag].flags & TAGINFO_NOOP) {
1615 					while (isspace((unsigned char)d->desc[i]))
1616 						i++;
1617 					break;
1618 				} else if (tags[tag].flags & TAGINFO_INLINE) {
1619 					while (stripspace > 0) {
1620 						fputc(' ', f);
1621 						col++;
1622 						stripspace--;
1623 					}
1624 					fputs(tags[tag].mdoc, f);
1625 					/*col += strlen(tags[tag].mdoc);*/
1626 					break;
1627 				}
1628 
1629 				/*
1630 				 * A breaking mdoc(7) statement.
1631 				 * Break the current line, output the
1632 				 * macro, and conditionally break
1633 				 * following that (or we might do
1634 				 * nothing at all).
1635 				 */
1636 
1637 				if (col > 0) {
1638 					fputs("\n", f);
1639 					col = 0;
1640 				}
1641 
1642 				fputs(tags[tag].mdoc, f);
1643 				if (!(tags[tag].flags & TAGINFO_NOBR)) {
1644 					fputs("\n", f);
1645 					col = 0;
1646 				} else if (!(tags[tag].flags & TAGINFO_NOSP)) {
1647 					fputs(" ", f);
1648 					col++;
1649 				}
1650 				while (isspace((unsigned char)d->desc[i]))
1651 					i++;
1652 				break;
1653 			}
1654 			if (tag < TAG__MAX) {
1655 				stripspace = 0;
1656 				continue;
1657 			}
1658 			while (stripspace > 0) {
1659 				fputc(' ', f);
1660 				col++;
1661 				stripspace--;
1662 			}
1663 		} else if (d->desc[i] == '[' && d->desc[i + 1] != ']') {
1664 			/* Do we start at the bracket or bar? */
1665 
1666 			for (sz = i + 1; sz < d->descsz; sz++)
1667 				if (d->desc[sz] == '|' ||
1668 				    d->desc[sz] == ']')
1669 					break;
1670 
1671 			/* This is a degenerate case. */
1672 
1673 			if (sz == d->descsz) {
1674 				i++;
1675 				stripspace = 0;
1676 				continue;
1677 			}
1678 
1679 			/*
1680 			 * Look for a trailing "()", using "j" as a
1681 			 * sentinel in case it was found.  This lets us
1682 			 * print out a "Fn xxxx" instead of having the
1683 			 * function be ugly.  If we don't have a Fn and
1684 			 * we'd stripped space before this, remember to
1685 			 * add the space back in.
1686 			 */
1687 
1688 			j = 0;
1689 			if (d->desc[sz] != '|') {
1690 				i = i + 1;
1691 				if (sz > 2 &&
1692 				    d->desc[sz - 1] == ')' &&
1693 				    d->desc[sz - 2] == '(') {
1694 					if (col > 0)
1695 						fputc('\n', f);
1696 					fputs(".Fn ", f);
1697 					j = sz - 2;
1698 					assert(j > 0);
1699 				} else if (stripspace) {
1700 					fputc(' ', f);
1701 					col++;
1702 				}
1703 			} else {
1704 				if (stripspace) {
1705 					fputc(' ', f);
1706 					col++;
1707 				}
1708 				i = sz + 1;
1709 			}
1710 
1711 			while (isspace((unsigned char)d->desc[i]))
1712 				i++;
1713 
1714 			/*
1715 			 * Now handle in-page references.  If we're a
1716 			 * function reference (e.g., function()), then
1717 			 * omit the trailing parentheses and put in a Fn
1718 			 * block.  Otherwise print them out as-is: we've
1719 			 * already accumulated them into our "SEE ALSO"
1720 			 * values, which we'll use below.
1721 			 */
1722 
1723 			for ( ; i < d->descsz; i++, col++) {
1724 				if (j > 0 && i == j) {
1725 					i += 3;
1726 					for ( ; i < d->descsz; i++)
1727 						if (d->desc[i] == '.')
1728 							fputs(" .", f);
1729 						else if (d->desc[i] == ',')
1730 							fputs(" ,", f);
1731 						else if (d->desc[i] == ')')
1732 							fputs(" )", f);
1733 						else
1734 							break;
1735 
1736 					/* Trim trailing space. */
1737 
1738 					while (i < d->descsz &&
1739 					       isspace((unsigned char)d->desc[i]))
1740 						i++;
1741 
1742 					fputc('\n', f);
1743 					col = 0;
1744 					break;
1745 				} else if (d->desc[i] == ']') {
1746 					i++;
1747 					break;
1748 				}
1749 				fputc(d->desc[i], f);
1750 				col++;
1751 			}
1752 
1753 			stripspace = 0;
1754 			continue;
1755 		}
1756 
1757 		/* Strip leading spaces from output. */
1758 
1759 		if (d->desc[i] == ' ' && col == 0) {
1760 			while (d->desc[i] == ' ')
1761 				i++;
1762 			continue;
1763 		}
1764 
1765 		/*
1766 		 * Strip trailing spaces from output.
1767 		 * Set "stripspace" to be the number of white-space
1768 		 * characters that we've skipped, plus one.
1769 		 * This means that the next loop iteration while get the
1770 		 * actual amount we've skipped (for '<' or '[') and we
1771 		 * can act upon it there.
1772 		 */
1773 
1774 		if (d->desc[i] == ' ') {
1775 			j = i;
1776 			while (j < d->descsz && d->desc[j] == ' ')
1777 				j++;
1778 			if (j < d->descsz &&
1779 			    (d->desc[j] == '\n' ||
1780 			     d->desc[j] == '<' ||
1781 			     d->desc[j] == '[')) {
1782 				stripspace = d->desc[j] != '\n' ?
1783 					(j - i + 1) : 0;
1784 				i = j;
1785 				continue;
1786 			}
1787 		}
1788 
1789 		assert(d->desc[i] != '\n');
1790 
1791 		/*
1792 		 * Handle some oddities.
1793 		 * The following HTML escapes exist in the output that I
1794 		 * could find.
1795 		 * There might be others...
1796 		 */
1797 
1798 		if (strncmp(&d->desc[i], "&rarr;", 6) == 0) {
1799 			i += 6;
1800 			fputs("\\(->", f);
1801 		} else if (strncmp(&d->desc[i], "&larr;", 6) == 0) {
1802 			i += 6;
1803 			fputs("\\(<-", f);
1804 		} else if (strncmp(&d->desc[i], "&nbsp;", 6) == 0) {
1805 			i += 6;
1806 			fputc(' ', f);
1807 		} else if (strncmp(&d->desc[i], "&lt;", 4) == 0) {
1808 			i += 4;
1809 			fputc('<', f);
1810 		} else if (strncmp(&d->desc[i], "&gt;", 4) == 0) {
1811 			i += 4;
1812 			fputc('>', f);
1813 		} else if (strncmp(&d->desc[i], "&#91;", 5) == 0) {
1814 			i += 5;
1815 			fputc('[', f);
1816 		} else {
1817 			/* Make sure we don't trigger a macro. */
1818 			if (col == 0 &&
1819 			    (d->desc[i] == '.' || d->desc[i] == '\''))
1820 				fputs("\\&", f);
1821 			fputc(d->desc[i], f);
1822 			i++;
1823 		}
1824 
1825 		col++;
1826 	}
1827 
1828 	if (col > 0)
1829 		fputs("\n", f);
1830 
1831 	fputs(".Sh IMPLEMENTATION NOTES\n", f);
1832 	fprintf(f, "These declarations were extracted from the\n"
1833 	      "interface documentation at line %zu.\n", d->ln);
1834 	fputs(".Bd -literal\n", f);
1835 	fputs(d->fulldesc, f);
1836 	fputs(".Ed\n", f);
1837 
1838 	/*
1839 	 * Look up all of our keywords (which are in the xrs field) in
1840 	 * the table of all known keywords.
1841 	 * Don't print duplicates.
1842 	 */
1843 
1844 	if (d->xrsz > 0) {
1845 		qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp);
1846 		lastres = NULL;
1847 		for (last = 0, i = 0; i < d->xrsz; i++) {
1848 			res = lookup(d->xrs[i]);
1849 
1850 			/* Ignore self-reference. */
1851 
1852 			if (res == d->nms[0] && verbose)
1853 				warnx("%s:%zu: self-reference: %s",
1854 					d->fn, d->ln, d->xrs[i]);
1855 			if (res == d->nms[0])
1856 				continue;
1857 			if (res == NULL && verbose)
1858 				warnx("%s:%zu: ref not found: %s",
1859 					d->fn, d->ln, d->xrs[i]);
1860 			if (res == NULL)
1861 				continue;
1862 
1863 			/* Ignore duplicates. */
1864 
1865 			if (lastres == res)
1866 				continue;
1867 
1868 			if (last)
1869 				fputs(" ,\n", f);
1870 			else
1871 				fputs(".Sh SEE ALSO\n", f);
1872 
1873 			fprintf(f, ".Xr %s 3", res);
1874 			last = 1;
1875 			lastres = res;
1876 		}
1877 		if (last)
1878 			fputs("\n", f);
1879 	}
1880 
1881 	if (nofile == 0)
1882 		fclose(f);
1883 }
1884 
1885 #if HAVE_PLEDGE
1886 /*
1887  * We pledge(2) stdio if we're receiving from stdin and writing to
1888  * stdout, otherwise we need file-creation and writing.
1889  */
1890 static void
sandbox_pledge(void)1891 sandbox_pledge(void)
1892 {
1893 
1894 	if (nofile) {
1895 		if (pledge("stdio", NULL) == -1)
1896 			err(1, NULL);
1897 	} else {
1898 		if (pledge("stdio wpath cpath", NULL) == -1)
1899 			err(1, NULL);
1900 	}
1901 }
1902 #endif
1903 
1904 #if HAVE_SANDBOX_INIT
1905 /*
1906  * Darwin's "seatbelt".
1907  * If we're writing to stdout, then use pure computation.
1908  * Otherwise we need file writing.
1909  */
1910 static void
sandbox_apple(void)1911 sandbox_apple(void)
1912 {
1913 	char	*ep;
1914 	int	 rc;
1915 
1916 	rc = sandbox_init
1917 		(nofile ? kSBXProfilePureComputation :
1918 		 kSBXProfileNoNetwork, SANDBOX_NAMED, &ep);
1919 	if (rc == 0)
1920 		return;
1921 	perror(ep);
1922 	sandbox_free_error(ep);
1923 	exit(1);
1924 }
1925 #endif
1926 
1927 /*
1928  * Check to see whether there are any filename duplicates.
1929  * This is just a warning, but will really screw things up, since the
1930  * last filename will overwrite the first.
1931  */
1932 static void
check_dupes(struct parse * p)1933 check_dupes(struct parse *p)
1934 {
1935 	const struct defn	*d, *dd;
1936 
1937 	TAILQ_FOREACH(d, &p->dqhead, entries)
1938 		TAILQ_FOREACH_REVERSE(dd, &p->dqhead, defnq, entries) {
1939 			if (dd == d)
1940 				break;
1941 			if (d->fname == NULL ||
1942 			    dd->fname == NULL ||
1943 			    strcmp(d->fname, dd->fname))
1944 				continue;
1945 			warnx("%s:%zu: duplicate filename: "
1946 				"%s (from %s, line %zu)", d->fn,
1947 				d->ln, d->fname, dd->nms[0], dd->ln);
1948 		}
1949 }
1950 
1951 int
main(int argc,char * argv[])1952 main(int argc, char *argv[])
1953 {
1954 	size_t		 i, bufsz;
1955 	ssize_t		 len;
1956 	FILE		*f = stdin;
1957 	char		*cp = NULL;
1958 	const char	*prefix = ".";
1959 	struct parse	 p;
1960 	int		 rc = 0, ch;
1961 	struct defn	*d;
1962 	struct decl	*e;
1963 
1964 	memset(&p, 0, sizeof(struct parse));
1965 
1966 	p.fn = "<stdin>";
1967 	p.ln = 0;
1968 	p.phase = PHASE_INIT;
1969 
1970 	TAILQ_INIT(&p.dqhead);
1971 
1972 	while ((ch = getopt(argc, argv, "nNp:v")) != -1)
1973 		switch (ch) {
1974 		case 'n':
1975 			nofile = 1;
1976 			break;
1977 		case 'N':
1978 			nofile = 1;
1979 			filename = 1;
1980 			break;
1981 		case 'p':
1982 			prefix = optarg;
1983 			break;
1984 		case 'v':
1985 			verbose = 1;
1986 			break;
1987 		default:
1988 			goto usage;
1989 		}
1990 
1991 	argc -= optind;
1992 	argv += optind;
1993 
1994 	if (argc > 1)
1995 		goto usage;
1996 
1997 	if (argc > 0) {
1998 		if ((f = fopen(argv[0], "r")) == NULL)
1999 			err(1, "%s", argv[0]);
2000 		p.fn = argv[0];
2001 	}
2002 
2003 #if HAVE_SANDBOX_INIT
2004 	sandbox_apple();
2005 #elif HAVE_PLEDGE
2006 	sandbox_pledge();
2007 #endif
2008 	/*
2009 	 * Read in line-by-line and process in the phase dictated by our
2010 	 * finite state automaton.
2011 	 */
2012 
2013 	while ((len = getline(&cp, &bufsz, f)) != -1) {
2014 		assert(len > 0);
2015 		p.ln++;
2016 		if (cp[len - 1] != '\n') {
2017 			warnx("%s:%zu: unterminated line", p.fn, p.ln);
2018 			break;
2019 		}
2020 
2021 		/*
2022 		 * Lines are now always NUL-terminated, and don't allow
2023 		 * NUL characters in the line.
2024 		 */
2025 
2026 		cp[--len] = '\0';
2027 		len = strlen(cp);
2028 
2029 		switch (p.phase) {
2030 		case PHASE_INIT:
2031 			init(&p, cp);
2032 			break;
2033 		case PHASE_KEYS:
2034 			keys(&p, cp, (size_t)len);
2035 			break;
2036 		case PHASE_DESC:
2037 			desc(&p, cp, (size_t)len);
2038 			break;
2039 		case PHASE_SEEALSO:
2040 			seealso(&p, cp, (size_t)len);
2041 			break;
2042 		case PHASE_DECL:
2043 			decl(&p, cp, (size_t)len);
2044 			break;
2045 		}
2046 	}
2047 
2048 	/*
2049 	 * If we hit the last line, then try to process.
2050 	 * Otherwise, we failed along the way.
2051 	 */
2052 
2053 	if (feof(f)) {
2054 		/*
2055 		 * Allow us to be at the declarations or scanning for
2056 		 * the next clause.
2057 		 */
2058 		if (p.phase == PHASE_INIT ||
2059 		    p.phase == PHASE_DECL) {
2060 			if (hcreate(5000) == 0)
2061 				err(1, NULL);
2062 			TAILQ_FOREACH(d, &p.dqhead, entries)
2063 				postprocess(prefix, d);
2064 			check_dupes(&p);
2065 			TAILQ_FOREACH(d, &p.dqhead, entries)
2066 				emit(d);
2067 			rc = 1;
2068 		} else if (p.phase != PHASE_DECL)
2069 			warnx("%s:%zu: exit when not in "
2070 				"initial state", p.fn, p.ln);
2071 	}
2072 
2073 	while ((d = TAILQ_FIRST(&p.dqhead)) != NULL) {
2074 		TAILQ_REMOVE(&p.dqhead, d, entries);
2075 		while ((e = TAILQ_FIRST(&d->dcqhead)) != NULL) {
2076 			TAILQ_REMOVE(&d->dcqhead, e, entries);
2077 			free(e->text);
2078 			free(e);
2079 		}
2080 		free(d->name);
2081 		free(d->desc);
2082 		free(d->fulldesc);
2083 		free(d->dt);
2084 		for (i = 0; i < d->nmsz; i++)
2085 			free(d->nms[i]);
2086 		for (i = 0; i < d->xrsz; i++)
2087 			free(d->xrs[i]);
2088 		for (i = 0; i < d->keysz; i++)
2089 			free(d->keys[i]);
2090 		free(d->keys);
2091 		free(d->nms);
2092 		free(d->xrs);
2093 		free(d->fname);
2094 		free(d->seealso);
2095 		free(d->keybuf);
2096 		free(d);
2097 	}
2098 
2099 	return !rc;
2100 usage:
2101 	fprintf(stderr, "usage: %s [-Nnv] [-p prefix] [file]\n",
2102 		getprogname());
2103 	return 1;
2104 }
2105