xref: /netbsd-src/external/public-domain/sqlite/sqlite2mdoc/main.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /*	$Id: main.c,v 1.1 2016/03/30 21:30:20 christos Exp $ */
2 /*
3  * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef __linux__
18 #define _GNU_SOURCE
19 #endif
20 #include <sys/queue.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <err.h>
25 #include <getopt.h>
26 #include <search.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #ifdef __linux__
31 #include <bsd/stdio.h>
32 #include <bsd/stdlib.h>
33 #include <bsd/string.h>
34 #endif
35 
36 /*
37  * Phase of parsing input file.
38  */
39 enum	phase {
40 	PHASE_INIT = 0, /* waiting to encounter definition */
41 	PHASE_KEYS, /* have definition, now keywords */
42 	PHASE_DESC, /* have keywords, now description */
43 	PHASE_SEEALSO,
44 	PHASE_DECL /* have description, now declarations */
45 };
46 
47 /*
48  * What kind of declaration (preliminary analysis).
49  */
50 enum	decltype {
51 	DECLTYPE_CPP, /* pre-processor */
52 	DECLTYPE_C, /* semicolon-closed non-preprocessor */
53 	DECLTYPE_NEITHER /* non-preprocessor, no semicolon */
54 };
55 
56 /*
57  * In variables and function declarations, we toss these.
58  */
59 enum	preproc {
60 	PREPROC_SQLITE_API,
61 	PREPROC_SQLITE_DEPRECATED,
62 	PREPROC_SQLITE_EXPERIMENTAL,
63 	PREPROC_SQLITE_EXTERN,
64 	PREPROC__MAX
65 };
66 
67 /*
68  * HTML tags that we recognise.
69  */
70 enum	tag {
71 	TAG_B_CLOSE,
72 	TAG_B_OPEN,
73 	TAG_BLOCK_CLOSE,
74 	TAG_BLOCK_OPEN,
75 	TAG_DD_CLOSE,
76 	TAG_DD_OPEN,
77 	TAG_DL_CLOSE,
78 	TAG_DL_OPEN,
79 	TAG_DT_CLOSE,
80 	TAG_DT_OPEN,
81 	TAG_H3_CLOSE,
82 	TAG_H3_OPEN,
83 	TAG_LI_CLOSE,
84 	TAG_LI_OPEN,
85 	TAG_OL_CLOSE,
86 	TAG_OL_OPEN,
87 	TAG_PRE_CLOSE,
88 	TAG_PRE_OPEN,
89 	TAG_UL_CLOSE,
90 	TAG_UL_OPEN,
91 	TAG__MAX
92 };
93 
94 TAILQ_HEAD(defnq, defn);
95 TAILQ_HEAD(declq, decl);
96 
97 /*
98  * A declaration of type DECLTYPE_CPP or DECLTYPE_C.
99  * These need not be unique (if ifdef'd).
100  */
101 struct	decl {
102 	enum decltype	 type; /* type of declaration */
103 	char		*text; /* text */
104 	size_t		 textsz; /* strlen(text) */
105 	TAILQ_ENTRY(decl) entries;
106 };
107 
108 /*
109  * A definition is basically the manpage contents.
110  */
111 struct	defn {
112 	char		 *name; /* really Nd */
113 	TAILQ_ENTRY(defn) entries;
114 	char		 *desc; /* long description */
115 	size_t		  descsz; /* strlen(desc) */
116 	struct declq	  dcqhead; /* declarations */
117 	int		  multiline; /* used when parsing */
118 	int		  instruct; /* used when parsing */
119 	const char	 *fn; /* parsed from file */
120 	size_t		  ln; /* parsed at line */
121 	int		  postprocessed; /* good for emission? */
122 	char		 *dt; /* manpage title */
123 	char		**nms; /* manpage names */
124 	size_t		  nmsz; /* number of names */
125 	char		 *fname; /* manpage filename */
126 	char		 *keybuf; /* raw keywords */
127 	size_t		  keybufsz; /* length of "keysbuf" */
128 	char		 *seealso; /* see also tags */
129 	size_t		  seealsosz; /* length of seealso */
130 	char		**xrs; /* parsed "see also" references */
131 	size_t		  xrsz; /* number of references */
132 	char		**keys; /* parsed keywords */
133 	size_t		  keysz; /* number of keywords */
134 };
135 
136 /*
137  * Entire parse routine.
138  */
139 struct	parse {
140 	enum phase	 phase; /* phase of parse */
141 	size_t		 ln; /* line number */
142 	const char	*fn; /* open file */
143 	struct defnq	 dqhead; /* definitions */
144 };
145 
146 /*
147  * How to handle HTML tags we find in the text.
148  */
149 struct	taginfo {
150 	const char	*html; /* HTML to key on */
151 	const char	*mdoc; /* generate mdoc(7) */
152 	unsigned int	 flags;
153 #define	TAGINFO_NOBR	 0x01 /* follow w/space, not newline */
154 #define	TAGINFO_NOOP	 0x02 /* just strip out */
155 #define	TAGINFO_NOSP	 0x04 /* follow w/o space or newline */
156 #define	TAGINFO_INLINE	 0x08 /* inline block (notused) */
157 };
158 
159 static	const struct taginfo tags[TAG__MAX] = {
160 	{ "</b>", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */
161 	{ "<b>", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */
162 	{ "</blockquote>", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */
163 	{ "<blockquote>", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */
164 	{ "</dd>", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */
165 	{ "<dd>", "", TAGINFO_NOOP }, /* TAG_DD_OPEN */
166 	{ "</dl>", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */
167 	{ "<dl>", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */
168 	{ "</dt>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */
169 	{ "<dt>", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */
170 	{ "</h3>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */
171 	{ "<h3>", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */
172 	{ "</li>", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */
173 	{ "<li>", ".It", 0 }, /* TAG_LI_OPEN */
174 	{ "</ol>", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */
175 	{ "<ol>", ".Bl -enum", 0 }, /* TAG_OL_OPEN */
176 	{ "</pre>", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */
177 	{ "<pre>", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */
178 	{ "</ul>", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */
179 	{ "<ul>", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */
180 };
181 
182 static	const char *const preprocs[TAG__MAX] = {
183 	"SQLITE_API", /* PREPROC_SQLITE_API */
184 	"SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */
185 	"SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */
186 	"SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */
187 };
188 
189 /* Verbose reporting. */
190 static	int verbose;
191 /* Don't output any files: use stdout. */
192 static	int nofile;
193 
194 static void
195 decl_function_add(struct parse *p, char **etext,
196 	size_t *etextsz, const char *cp, size_t len)
197 {
198 
199 	if (' ' != (*etext)[*etextsz - 1]) {
200 		*etext = realloc(*etext, *etextsz + 2);
201 		if (NULL == *etext)
202 			err(EXIT_FAILURE, "%s:%zu: "
203 				"realloc", p->fn, p->ln);
204 		(*etextsz)++;
205 		strlcat(*etext, " ", *etextsz + 1);
206 	}
207 	*etext = realloc(*etext, *etextsz + len + 1);
208 	if (NULL == *etext)
209 		err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln);
210 	memcpy(*etext + *etextsz, cp, len);
211 	*etextsz += len;
212 	(*etext)[*etextsz] = '\0';
213 }
214 
215 static void
216 decl_function_copy(struct parse *p, char **etext,
217 	size_t *etextsz, const char *cp, size_t len)
218 {
219 
220 	*etext = malloc(len + 1);
221 	if (NULL == *etext)
222 		err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln);
223 	memcpy(*etext, cp, len);
224 	*etextsz = len;
225 	(*etext)[*etextsz] = '\0';
226 }
227 
228 /*
229  * A C function (or variable, or whatever).
230  * This is more specifically any non-preprocessor text.
231  */
232 static int
233 decl_function(struct parse *p, char *cp, size_t len)
234 {
235 	char		*ep, *ncp, *lcp, *rcp;
236 	size_t		 nlen;
237 	struct defn	*d;
238 	struct decl	*e;
239 
240 	/* Fetch current interface definition. */
241 	d = TAILQ_LAST(&p->dqhead, defnq);
242 	assert(NULL != d);
243 
244 	/*
245 	 * Since C tokens are semicolon-separated, we may be invoked any
246 	 * number of times per a single line.
247 	 */
248 again:
249 	while (isspace((int)*cp)) {
250 		cp++;
251 		len--;
252 	}
253 	if ('\0' == *cp)
254 		return(1);
255 
256 	/* Whether we're a continuation clause. */
257 	if (d->multiline) {
258 		/* This might be NULL if we're not a continuation. */
259 		e = TAILQ_LAST(&d->dcqhead, declq);
260 		assert(DECLTYPE_C == e->type);
261 		assert(NULL != e);
262 		assert(NULL != e->text);
263 		assert(e->textsz);
264 	} else {
265 		assert(0 == d->instruct);
266 		e = calloc(1, sizeof(struct decl));
267 		e->type = DECLTYPE_C;
268 		if (NULL == e)
269 			err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
270 		TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
271 	}
272 
273 	/*
274 	 * We begin by seeing if there's a semicolon on this line.
275 	 * If there is, we'll need to do some special handling.
276 	 */
277 	ep = strchr(cp, ';');
278 	lcp = strchr(cp, '{');
279 	rcp = strchr(cp, '}');
280 
281 	/* We're only a partial statement (i.e., no closure). */
282 	if (NULL == ep && d->multiline) {
283 		assert(NULL != e->text);
284 		assert(e->textsz > 0);
285 		/* Is a struct starting or ending here? */
286 		if (d->instruct && NULL != rcp)
287 			d->instruct--;
288 		else if (NULL != lcp)
289 			d->instruct++;
290 		decl_function_add(p, &e->text, &e->textsz, cp, len);
291 		return(1);
292 	} else if (NULL == ep && ! d->multiline) {
293 		d->multiline = 1;
294 		/* Is a structure starting in this line? */
295 		if (NULL != lcp &&
296 		    (NULL == rcp || rcp < lcp))
297 			d->instruct++;
298 		decl_function_copy(p, &e->text, &e->textsz, cp, len);
299 		return(1);
300 	}
301 
302 	/* Position ourselves after the semicolon. */
303 	assert(NULL != ep);
304 	ncp = cp;
305 	nlen = (ep - cp) + 1;
306 	cp = ep + 1;
307 	len -= nlen;
308 
309 	if (d->multiline) {
310 		assert(NULL != e->text);
311 		/* Don't stop the multi-line if we're in a struct. */
312 		if (0 == d->instruct) {
313 			if (NULL == lcp || lcp > cp)
314 				d->multiline = 0;
315 		} else if (NULL != rcp && rcp < cp)
316 			if (0 == --d->instruct)
317 				d->multiline = 0;
318 		decl_function_add(p, &e->text, &e->textsz, ncp, nlen);
319 	} else {
320 		assert(NULL == e->text);
321 		if (NULL != lcp && lcp < cp) {
322 			d->multiline = 1;
323 			d->instruct++;
324 		}
325 		decl_function_copy(p, &e->text, &e->textsz, ncp, nlen);
326 	}
327 
328 	goto again;
329 }
330 
331 /*
332  * A definition is just #define followed by space followed by the name,
333  * then the value of that name.
334  * We ignore the latter.
335  * FIXME: this does not understand multi-line CPP, but I don't think
336  * there are any instances of that in sqlite.h.
337  */
338 static int
339 decl_define(struct parse *p, char *cp, size_t len)
340 {
341 	struct defn	*d;
342 	struct decl	*e;
343 	size_t		 sz;
344 
345 	while (isspace((int)*cp)) {
346 		cp++;
347 		len--;
348 	}
349 	if (0 == len) {
350 		warnx("%s:%zu: empty pre-processor "
351 			"constant", p->fn, p->ln);
352 		return(1);
353 	}
354 
355 	d = TAILQ_LAST(&p->dqhead, defnq);
356 	assert(NULL != d);
357 
358 	/*
359 	 * We're parsing a preprocessor definition, but we're still
360 	 * waiting on a semicolon from a function definition.
361 	 * It might be a comment or an error.
362 	 */
363 	if (d->multiline) {
364 		warnx("%s:%zu: multiline declaration "
365 			"still open (harmless?)", p->fn, p->ln);
366 		e = TAILQ_LAST(&d->dcqhead, declq);
367 		assert(NULL != e);
368 		e->type = DECLTYPE_NEITHER;
369 		d->multiline = d->instruct = 0;
370 	}
371 
372 	sz = 0;
373 	while ('\0' != cp[sz] && ! isspace((int)cp[sz]))
374 		sz++;
375 
376 	e = calloc(1, sizeof(struct decl));
377 	if (NULL == e)
378 		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
379 	e->type = DECLTYPE_CPP;
380 	e->text = calloc(1, sz + 1);
381 	if (NULL == e->text)
382 		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
383 	strlcpy(e->text, cp, sz + 1);
384 	e->textsz = sz;
385 	TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
386 	return(1);
387 }
388 
389 /*
390  * A declaration is a function, variable, preprocessor definition, or
391  * really anything else until we reach a blank line.
392  */
393 static void
394 decl(struct parse *p, char *cp, size_t len)
395 {
396 	struct defn	*d;
397 	struct decl	*e;
398 
399 	while (isspace((int)*cp)) {
400 		cp++;
401 		len--;
402 	}
403 
404 	/* Check closure. */
405 	if ('\0' == *cp) {
406 		p->phase = PHASE_INIT;
407 		/* Check multiline status. */
408 		d = TAILQ_LAST(&p->dqhead, defnq);
409 		assert(NULL != d);
410 		if (d->multiline) {
411 			warnx("%s:%zu: multiline declaration "
412 				"still open (harmless?)", p->fn, p->ln);
413 			e = TAILQ_LAST(&d->dcqhead, declq);
414 			assert(NULL != e);
415 			e->type = DECLTYPE_NEITHER;
416 			d->multiline = d->instruct = 0;
417 		}
418 		return;
419 	}
420 
421 	/*
422 	 * Catch preprocessor defines, but discard all other types of
423 	 * preprocessor statements.
424 	 */
425 	if ('#' == *cp) {
426 		len--;
427 		cp++;
428 		while (isspace((int)*cp)) {
429 			len--;
430 			cp++;
431 		}
432 		if (0 == strncmp(cp, "define", 6))
433 			decl_define(p, cp + 6, len - 6);
434 		return;
435 	}
436 
437 	decl_function(p, cp, len);
438 }
439 
440 /*
441  * Parse "SEE ALSO" phrases, which can come at any point in the
442  * interface description (unlike what they claim).
443  */
444 static void
445 seealso(struct parse *p, char *cp, size_t len)
446 {
447 	struct defn	*d;
448 
449 	if ('\0' == *cp) {
450 		warnx("%s:%zu: warn: unexpected end of "
451 			"interface description", p->fn, p->ln);
452 		p->phase = PHASE_INIT;
453 		return;
454 	} else if (0 == strcmp(cp, "*/")) {
455 		p->phase = PHASE_DECL;
456 		return;
457 	} else if ('*' != cp[0] || '*' != cp[1]) {
458 		warnx("%s:%zu: warn: unexpected end of "
459 			"interface description", p->fn, p->ln);
460 		p->phase = PHASE_INIT;
461 		return;
462 	}
463 
464 	cp += 2;
465 	len -= 2;
466 	while (isspace((int)*cp)) {
467 		cp++;
468 		len--;
469 	}
470 
471 	/* Blank line: back to description part. */
472 	if (0 == len) {
473 		p->phase = PHASE_DESC;
474 		return;
475 	}
476 
477 	/* Fetch current interface definition. */
478 	d = TAILQ_LAST(&p->dqhead, defnq);
479 	assert(NULL != d);
480 
481 	d->seealso = realloc(d->seealso,
482 		d->seealsosz + len + 1);
483 	memcpy(d->seealso + d->seealsosz, cp, len);
484 	d->seealsosz += len;
485 	d->seealso[d->seealsosz] = '\0';
486 }
487 
488 /*
489  * A definition description is a block of text that we'll later format
490  * in mdoc(7).
491  * It extends from the name of the definition down to the declarations
492  * themselves.
493  */
494 static void
495 desc(struct parse *p, char *cp, size_t len)
496 {
497 	struct defn	*d;
498 	size_t		 nsz;
499 
500 	if ('\0' == *cp) {
501 		warnx("%s:%zu: warn: unexpected end of "
502 			"interface description", p->fn, p->ln);
503 		p->phase = PHASE_INIT;
504 		return;
505 	} else if (0 == strcmp(cp, "*/")) {
506 		/* End of comment area, start of declarations. */
507 		p->phase = PHASE_DECL;
508 		return;
509 	} else if ('*' != cp[0] || '*' != cp[1]) {
510 		warnx("%s:%zu: warn: unexpected end of "
511 			"interface description", p->fn, p->ln);
512 		p->phase = PHASE_INIT;
513 		return;
514 	}
515 
516 	cp += 2;
517 	len -= 2;
518 
519 	while (isspace((int)*cp)) {
520 		cp++;
521 		len--;
522 	}
523 
524 	/* Fetch current interface definition. */
525 	d = TAILQ_LAST(&p->dqhead, defnq);
526 	assert(NULL != d);
527 
528 	/* Ignore leading blank lines. */
529 	if (0 == len && NULL == d->desc)
530 		return;
531 
532 	/* Collect SEE ALSO clauses. */
533 	if (0 == strncasecmp(cp, "see also:", 9)) {
534 		cp += 9;
535 		len -= 9;
536 		while (isspace((int)*cp)) {
537 			cp++;
538 			len--;
539 		}
540 		p->phase = PHASE_SEEALSO;
541 		d->seealso = realloc(d->seealso,
542 			d->seealsosz + len + 1);
543 		memcpy(d->seealso + d->seealsosz, cp, len);
544 		d->seealsosz += len;
545 		d->seealso[d->seealsosz] = '\0';
546 		return;
547 	}
548 
549 	/* White-space padding between lines. */
550 	if (NULL != d->desc &&
551 	    ' ' != d->desc[d->descsz - 1] &&
552 	    '\n' != d->desc[d->descsz - 1]) {
553 		d->desc = realloc(d->desc, d->descsz + 2);
554 		if (NULL == d->desc)
555 			err(EXIT_FAILURE, "%s:%zu: realloc",
556 				p->fn, p->ln);
557 		d->descsz++;
558 		strlcat(d->desc, " ", d->descsz + 1);
559 	}
560 
561 	/* Either append the line of a newline, if blank. */
562 	nsz = 0 == len ? 1 : len;
563 	if (NULL == d->desc) {
564 		d->desc = calloc(1, nsz + 1);
565 		if (NULL == d->desc)
566 			err(EXIT_FAILURE, "%s:%zu: calloc",
567 				p->fn, p->ln);
568 	} else {
569 		d->desc = realloc(d->desc, d->descsz + nsz + 1);
570 		if (NULL == d->desc)
571 			err(EXIT_FAILURE, "%s:%zu: realloc",
572 				p->fn, p->ln);
573 	}
574 	d->descsz += nsz;
575 	strlcat(d->desc, 0 == len ? "\n" : cp, d->descsz + 1);
576 }
577 
578 /*
579  * Copy all KEYWORDS into a buffer.
580  */
581 static void
582 keys(struct parse *p, char *cp, size_t len)
583 {
584 	struct defn	*d;
585 
586 	if ('\0' == *cp) {
587 		warnx("%s:%zu: warn: unexpected end of "
588 			"interface keywords", p->fn, p->ln);
589 		p->phase = PHASE_INIT;
590 		return;
591 	} else if (0 == strcmp(cp, "*/")) {
592 		/* End of comment area, start of declarations. */
593 		p->phase = PHASE_DECL;
594 		return;
595 	} else if ('*' != cp[0] || '*' != cp[1]) {
596 		if ('\0' != cp[1]) {
597 			warnx("%s:%zu: warn: unexpected end of "
598 				"interface keywords", p->fn, p->ln);
599 			p->phase = PHASE_INIT;
600 			return;
601 		} else
602 			warnx("%s:%zu: warn: workaround in effect "
603 				"for unexpected end of "
604 				"interface keywords", p->fn, p->ln);
605 	}
606 
607 	cp += 2;
608 	len -= 2;
609 	while (isspace((int)*cp)) {
610 		cp++;
611 		len--;
612 	}
613 
614 	if (0 == len) {
615 		p->phase = PHASE_DESC;
616 		return;
617 	} else if (strncmp(cp, "KEYWORDS:", 9))
618 		return;
619 
620 	cp += 9;
621 	len -= 9;
622 
623 	d = TAILQ_LAST(&p->dqhead, defnq);
624 	assert(NULL != d);
625 	d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1);
626 	if (NULL == d->keybuf)
627 		err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln);
628 	memcpy(d->keybuf + d->keybufsz, cp, len);
629 	d->keybufsz += len;
630 	d->keybuf[d->keybufsz] = '\0';
631 }
632 
633 /*
634  * Initial state is where we're scanning forward to find commented
635  * instances of CAPI3REF.
636  */
637 static void
638 init(struct parse *p, char *cp)
639 {
640 	struct defn	*d;
641 
642 	/* Look for comment hook. */
643 	if ('*' != cp[0] || '*' != cp[1])
644 		return;
645 	cp += 2;
646 	while (isspace((int)*cp))
647 		cp++;
648 
649 	/* Look for beginning of definition. */
650 	if (strncmp(cp, "CAPI3REF:", 9))
651 		return;
652 	cp += 9;
653 	while (isspace((int)*cp))
654 		cp++;
655 	if ('\0' == *cp) {
656 		warnx("%s:%zu: warn: unexpected end of "
657 			"interface definition", p->fn, p->ln);
658 		return;
659 	}
660 
661 	/* Add definition to list of existing ones. */
662 	d = calloc(1, sizeof(struct defn));
663 	if (NULL == d)
664 		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
665 	d->name = strdup(cp);
666 	if (NULL == d->name)
667 		err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln);
668 	d->fn = p->fn;
669 	d->ln = p->ln;
670 	p->phase = PHASE_KEYS;
671 	TAILQ_INIT(&d->dcqhead);
672 	TAILQ_INSERT_TAIL(&p->dqhead, d, entries);
673 }
674 
675 #define	BPOINT(_cp) \
676 	(';' == (_cp)[0] || \
677 	 '[' == (_cp)[0] || \
678 	 ('(' == (_cp)[0] && '*' != (_cp)[1]) || \
679 	 ')' == (_cp)[0] || \
680 	 '{' == (_cp)[0])
681 
682 /*
683  * Given a declaration (be it preprocessor or C), try to parse out a
684  * reasonable "name" for the affair.
685  * For a struct, for example, it'd be the struct name.
686  * For a typedef, it'd be the type name.
687  * For a function, it'd be the function name.
688  */
689 static void
690 grok_name(const struct decl *e,
691 	const char **start, size_t *sz)
692 {
693 	const char	*cp;
694 
695 	*start = NULL;
696 	*sz = 0;
697 
698 	if (DECLTYPE_CPP != e->type) {
699 		assert(';' == e->text[e->textsz - 1]);
700 		cp = e->text;
701 		do {
702 			while (isspace((int)*cp))
703 				cp++;
704 			if (BPOINT(cp))
705 				break;
706 			/* Function pointers... */
707 			if ('(' == *cp)
708 				cp++;
709 			/* Pass over pointers. */
710 			while ('*' == *cp)
711 				cp++;
712 			*start = cp;
713 			*sz = 0;
714 			while ( ! isspace((int)*cp)) {
715 				if (BPOINT(cp))
716 					break;
717 				cp++;
718 				(*sz)++;
719 			}
720 		} while ( ! BPOINT(cp));
721 	} else {
722 		*sz = e->textsz;
723 		*start = e->text;
724 	}
725 }
726 
727 static int
728 xrcmp(const void *p1, const void *p2)
729 {
730 	const char	*s1 = *(const char **)p1,
731 	     	 	*s2 = *(const char **)p2;
732 
733 	return(strcasecmp(s1, s2));
734 }
735 
736 /*
737  * Extract information from the interface definition.
738  * Mark it as "postprocessed" on success.
739  */
740 static void
741 postprocess(const char *prefix, struct defn *d)
742 {
743 	struct decl	*first;
744 	const char	*start;
745 	size_t		 offs, sz, i;
746 	ENTRY		 ent;
747 
748 	if (TAILQ_EMPTY(&d->dcqhead))
749 		return;
750 
751 	/* Find the first #define or declaration. */
752 	TAILQ_FOREACH(first, &d->dcqhead, entries)
753 		if (DECLTYPE_CPP == first->type ||
754 		    DECLTYPE_C == first->type)
755 			break;
756 
757 	if (NULL == first) {
758 		warnx("%s:%zu: no entry to document", d->fn, d->ln);
759 		return;
760 	}
761 
762 	/*
763 	 * Now compute the document name (`Dt').
764 	 * We'll also use this for the filename.
765 	 */
766 	grok_name(first, &start, &sz);
767 	if (NULL == start) {
768 		warnx("%s:%zu: couldn't deduce "
769 			"entry name", d->fn, d->ln);
770 		return;
771 	}
772 
773 	/* Document name needs all-caps. */
774 	d->dt = malloc(sz + 1);
775 	if (NULL == d->dt)
776 		err(EXIT_FAILURE, "malloc");
777 	memcpy(d->dt, start, sz);
778 	d->dt[sz] = '\0';
779 	for (i = 0; i < sz; i++)
780 		d->dt[i] = toupper((int)d->dt[i]);
781 
782 	/* Filename needs no special chars. */
783 	asprintf(&d->fname, "%s/%.*s.3",
784 		prefix, (int)sz, start);
785 	if (NULL == d->fname)
786 		err(EXIT_FAILURE, "asprintf");
787 
788 	offs = strlen(prefix) + 1;
789 	for (i = 0; i < sz; i++) {
790 		if (isalnum((int)d->fname[offs + i]) ||
791 		    '_' == d->fname[offs + i] ||
792 		    '-' == d->fname[offs + i])
793 			continue;
794 		d->fname[offs + i] = '_';
795 	}
796 
797 	/*
798 	 * First, extract all keywords.
799 	 */
800 	for (i = 0; i < d->keybufsz; ) {
801 		while (isspace((int)d->keybuf[i]))
802 			i++;
803 		if (i == d->keybufsz)
804 			break;
805 		sz = 0;
806 		start = &d->keybuf[i];
807 		if ('{' == d->keybuf[i]) {
808 			start = &d->keybuf[++i];
809 			for ( ; i < d->keybufsz; i++, sz++)
810 				if ('}' == d->keybuf[i])
811 					break;
812 			if ('}' == d->keybuf[i])
813 				i++;
814 		} else
815 			for ( ; i < d->keybufsz; i++, sz++)
816 				if (isspace((int)d->keybuf[i]))
817 					break;
818 		if (0 == sz)
819 			continue;
820 		d->keys = realloc(d->keys,
821 			(d->keysz + 1) * sizeof(char *));
822 		if (NULL == d->keys)
823 			err(EXIT_FAILURE, "realloc");
824 		d->keys[d->keysz] = malloc(sz + 1);
825 		if (NULL == d->keys[d->keysz])
826 			err(EXIT_FAILURE, "malloc");
827 		memcpy(d->keys[d->keysz], start, sz);
828 		d->keys[d->keysz][sz] = '\0';
829 		d->keysz++;
830 
831 		/* Hash the keyword. */
832 		ent.key = d->keys[d->keysz - 1];
833 		ent.data = d;
834 		(void)hsearch(ent, ENTER);
835 	}
836 
837 	/*
838 	 * Now extract all `Nm' values for this document.
839 	 * We only use CPP and C references, and hope for the best when
840 	 * doing so.
841 	 * Enter each one of these as a searchable keyword.
842 	 */
843 	TAILQ_FOREACH(first, &d->dcqhead, entries) {
844 		if (DECLTYPE_CPP != first->type &&
845 		    DECLTYPE_C != first->type)
846 			continue;
847 		grok_name(first, &start, &sz);
848 		if (NULL == start)
849 			continue;
850 		d->nms = realloc(d->nms,
851 			(d->nmsz + 1) * sizeof(char *));
852 		if (NULL == d->nms)
853 			err(EXIT_FAILURE, "realloc");
854 		d->nms[d->nmsz] = malloc(sz + 1);
855 		if (NULL == d->nms[d->nmsz])
856 			err(EXIT_FAILURE, "malloc");
857 		memcpy(d->nms[d->nmsz], start, sz);
858 		d->nms[d->nmsz][sz] = '\0';
859 		d->nmsz++;
860 
861 		/* Hash the name. */
862 		ent.key = d->nms[d->nmsz - 1];
863 		ent.data = d;
864 		(void)hsearch(ent, ENTER);
865 	}
866 
867 	if (0 == d->nmsz) {
868 		warnx("%s:%zu: couldn't deduce "
869 			"any names", d->fn, d->ln);
870 		return;
871 	}
872 
873 	/*
874 	 * Next, scan for all `Xr' values.
875 	 * We'll add more to this list later.
876 	 */
877 	for (i = 0; i < d->seealsosz; i++) {
878 		/*
879 		 * Find next value starting with `['.
880 		 * There's other stuff in there (whitespace or
881 		 * free text leading up to these) that we're ok
882 		 * to ignore.
883 		 */
884 		while (i < d->seealsosz && '[' != d->seealso[i])
885 			i++;
886 		if (i == d->seealsosz)
887 			break;
888 
889 		/*
890 		 * Now scan for the matching `]'.
891 		 * We can also have a vertical bar if we're separating a
892 		 * keyword and its shown name.
893 		 */
894 		start = &d->seealso[++i];
895 		sz = 0;
896 		while (i < d->seealsosz &&
897 		      ']' != d->seealso[i] &&
898 		      '|' != d->seealso[i]) {
899 			i++;
900 			sz++;
901 		}
902 		if (i == d->seealsosz)
903 			break;
904 		if (0 == sz)
905 			continue;
906 
907 		/*
908 		 * Continue on to the end-of-reference, if we weren't
909 		 * there to begin with.
910 		 */
911 		if (']' != d->seealso[i])
912 			while (i < d->seealsosz &&
913 			      ']' != d->seealso[i])
914 				i++;
915 
916 		/* Strip trailing whitespace. */
917 		while (sz > 1 && ' ' == start[sz - 1])
918 			sz--;
919 
920 		/* Strip trailing parenthesis. */
921 		if (sz > 2 &&
922 		    '(' == start[sz - 2] &&
923 	 	    ')' == start[sz - 1])
924 			sz -= 2;
925 
926 		d->xrs = realloc(d->xrs,
927 			(d->xrsz + 1) * sizeof(char *));
928 		if (NULL == d->xrs)
929 			err(EXIT_FAILURE, "realloc");
930 		d->xrs[d->xrsz] = malloc(sz + 1);
931 		if (NULL == d->xrs[d->xrsz])
932 			err(EXIT_FAILURE, "malloc");
933 		memcpy(d->xrs[d->xrsz], start, sz);
934 		d->xrs[d->xrsz][sz] = '\0';
935 		d->xrsz++;
936 	}
937 
938 	/*
939 	 * Next, extract all references.
940 	 * We'll accumulate these into a list of SEE ALSO tags, after.
941 	 * See how these are parsed above for a description: this is
942 	 * basically the same thing.
943 	 */
944 	for (i = 0; i < d->descsz; i++) {
945 		if ('[' != d->desc[i])
946 			continue;
947 		i++;
948 		if ('[' == d->desc[i])
949 			continue;
950 
951 		start = &d->desc[i];
952 		for (sz = 0; i < d->descsz; i++, sz++)
953 			if (']' == d->desc[i] ||
954 			    '|' == d->desc[i])
955 				break;
956 
957 		if (i == d->descsz)
958 			break;
959 		else if (sz == 0)
960 			continue;
961 
962 		if (']' != d->desc[i])
963 			while (i < d->descsz &&
964 			      ']' != d->desc[i])
965 				i++;
966 
967 		while (sz > 1 && ' ' == start[sz - 1])
968 			sz--;
969 
970 		if (sz > 2 &&
971 		    '(' == start[sz - 2] &&
972 		    ')' == start[sz - 1])
973 			sz -= 2;
974 
975 		d->xrs = realloc(d->xrs,
976 			(d->xrsz + 1) * sizeof(char *));
977 		if (NULL == d->xrs)
978 			err(EXIT_FAILURE, "realloc");
979 		d->xrs[d->xrsz] = malloc(sz + 1);
980 		if (NULL == d->xrs[d->xrsz])
981 			err(EXIT_FAILURE, "malloc");
982 		memcpy(d->xrs[d->xrsz], start, sz);
983 		d->xrs[d->xrsz][sz] = '\0';
984 		d->xrsz++;
985 	}
986 
987 	qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp);
988 	d->postprocessed = 1;
989 }
990 
991 /*
992  * Convenience function to look up a keyword.
993  * Returns the keyword's file if found or NULL.
994  */
995 static const char *
996 lookup(char *key)
997 {
998 	ENTRY		 ent;
999 	ENTRY		*res;
1000 	struct defn	*d;
1001 
1002 	ent.key = key;
1003 	res = hsearch(ent, FIND);
1004 	if (NULL == res)
1005 		return(NULL);
1006 	d = (struct defn *)res->data;
1007 	if (0 == d->nmsz)
1008 		return(NULL);
1009 	assert(NULL != d->nms[0]);
1010 	return(d->nms[0]);
1011 }
1012 
1013 /*
1014  * Emit a valid mdoc(7) document within the given prefix.
1015  */
1016 static void
1017 emit(const struct defn *d)
1018 {
1019 	struct decl	*first;
1020 	size_t		 sz, i, col, last, ns;
1021 	FILE		*f;
1022 	char		*cp;
1023 	const char	*res, *lastres, *args, *str, *end;
1024 	enum tag	 tag;
1025 	enum preproc	 pre;
1026 
1027 	if ( ! d->postprocessed) {
1028 		warnx("%s:%zu: interface has errors, not "
1029 			"producing manpage", d->fn, d->ln);
1030 		return;
1031 	}
1032 
1033 	if (0 == nofile) {
1034 		if (NULL == (f = fopen(d->fname, "w"))) {
1035 			warn("%s: fopen", d->fname);
1036 			return;
1037 		}
1038 	} else
1039 		f = stdout;
1040 
1041 	/* Begin by outputting the mdoc(7) header. */
1042 	fputs(".Dd $" "Mdocdate$\n", f);
1043 	fprintf(f, ".Dt %s 3\n", d->dt);
1044 	fputs(".Os\n", f);
1045 	fputs(".Sh NAME\n", f);
1046 
1047 	/* Now print the name bits of each declaration. */
1048 	for (i = 0; i < d->nmsz; i++)
1049 		fprintf(f, ".Nm %s%s\n", d->nms[i],
1050 			i < d->nmsz - 1 ? " ," : "");
1051 
1052 	fprintf(f, ".Nd %s\n", d->name);
1053 	fputs(".Sh SYNOPSIS\n", f);
1054 
1055 	TAILQ_FOREACH(first, &d->dcqhead, entries) {
1056 		if (DECLTYPE_CPP != first->type &&
1057 		    DECLTYPE_C != first->type)
1058 			continue;
1059 
1060 		/* Easy: just print the CPP name. */
1061 		if (DECLTYPE_CPP == first->type) {
1062 			fprintf(f, ".Fd #define %s\n",
1063 				first->text);
1064 			continue;
1065 		}
1066 
1067 		/* First, strip out the sqlite CPPs. */
1068 		for (i = 0; i < first->textsz; ) {
1069 			for (pre = 0; pre < PREPROC__MAX; pre++) {
1070 				sz = strlen(preprocs[pre]);
1071 				if (strncmp(preprocs[pre],
1072 				    &first->text[i], sz))
1073 					continue;
1074 				i += sz;
1075 				while (isspace((int)first->text[i]))
1076 					i++;
1077 				break;
1078 			}
1079 			if (pre == PREPROC__MAX)
1080 				break;
1081 		}
1082 
1083 		/* If we're a typedef, immediately print Vt. */
1084 		if (0 == strncmp(&first->text[i], "typedef", 7)) {
1085 			fprintf(f, ".Vt %s\n", &first->text[i]);
1086 			continue;
1087 		}
1088 
1089 		/* Are we a struct? */
1090 		if (first->textsz > 2 &&
1091 		    '}' == first->text[first->textsz - 2] &&
1092 		    NULL != (cp = strchr(&first->text[i], '{'))) {
1093 			*cp = '\0';
1094 			fprintf(f, ".Vt %s;\n", &first->text[i]);
1095 			/* Restore brace for later usage. */
1096 			*cp = '{';
1097 			continue;
1098 		}
1099 
1100 		/* Catch remaining non-functions. */
1101 		if (first->textsz > 2 &&
1102 		    ')' != first->text[first->textsz - 2]) {
1103 			fprintf(f, ".Vt %s\n", &first->text[i]);
1104 			continue;
1105 		}
1106 
1107 		str = &first->text[i];
1108 		if (NULL == (args = strchr(str, '('))) {
1109 			/* What is this? */
1110 			fputs(".Bd -literal\n", f);
1111 			fputs(&first->text[i], f);
1112 			fputs("\n.Ed\n", f);
1113 			continue;
1114 		}
1115 
1116 		/* Scroll back to end of function name. */
1117 		end = args - 1;
1118 		while (end > str && isspace((int)*end))
1119 			end--;
1120 
1121 		/* Scroll back to what comes before. */
1122 		for ( ; end > str; end--)
1123 			if (isspace((int)*end) || '*' == *end)
1124 				break;
1125 
1126 		/*
1127 		 * If we can't find what came before, then the function
1128 		 * has no type, which is odd... let's just call it void.
1129 		 */
1130 		if (end > str) {
1131 			fprintf(f, ".Ft %.*s\n",
1132 				(int)(end - str + 1), str);
1133 			fprintf(f, ".Fo %.*s\n",
1134 				(int)(args - end - 1), end + 1);
1135 		} else {
1136 			fputs(".Ft void\n", f);
1137 			fprintf(f, ".Fo %.*s\n", (int)(args - end), end);
1138 		}
1139 
1140 		/*
1141 		 * Convert function arguments into `Fa' clauses.
1142 		 * This also handles nested function pointers, which
1143 		 * would otherwise throw off the delimeters.
1144 		 */
1145 		for (;;) {
1146 			str = ++args;
1147 			while (isspace((int)*str))
1148 				str++;
1149 			fputs(".Fa \"", f);
1150 			ns = 0;
1151 			while ('\0' != *str &&
1152 			       (ns || ',' != *str) &&
1153 			       (ns || ')' != *str)) {
1154 				if ('/' == str[0] && '*' == str[1]) {
1155 					str += 2;
1156 					for ( ; '\0' != str[0]; str++)
1157 						if ('*' == str[0] && '/' == str[1])
1158 							break;
1159 					if ('\0' == *str)
1160 						break;
1161 					str += 2;
1162 					while (isspace((int)*str))
1163 						str++;
1164 					if ('\0' == *str ||
1165 					    (0 == ns && ',' == *str) ||
1166 					    (0 == ns && ')' == *str))
1167 						break;
1168 				}
1169 				if ('(' == *str)
1170 					ns++;
1171 				else if (')' == *str)
1172 					ns--;
1173 				fputc(*str, f);
1174 				str++;
1175 			}
1176 			fputs("\"\n", f);
1177 			if ('\0' == *str || ')' == *str)
1178 				break;
1179 			args = str;
1180 		}
1181 
1182 		fputs(".Fc\n", f);
1183 	}
1184 
1185 	fputs(".Sh DESCRIPTION\n", f);
1186 
1187 	/*
1188 	 * Strip the crap out of the description.
1189 	 * "Crap" consists of things I don't understand that mess up
1190 	 * parsing of the HTML, for instance,
1191 	 *   <dl>[[foo bar]]<dt>foo bar</dt>...</dl>
1192 	 * These are not well-formed HTML.
1193 	 */
1194 	for (i = 0; i < d->descsz; i++) {
1195 		if ('^' == d->desc[i] &&
1196 		    '(' == d->desc[i + 1]) {
1197 			d->desc[i] = d->desc[i + 1] = ' ';
1198 			i++;
1199 			continue;
1200 		} else if (')' == d->desc[i] &&
1201 			   '^' == d->desc[i + 1]) {
1202 			d->desc[i] = d->desc[i + 1] = ' ';
1203 			i++;
1204 			continue;
1205 		} else if ('^' == d->desc[i]) {
1206 			d->desc[i] = ' ';
1207 			continue;
1208 		} else if ('[' != d->desc[i] ||
1209 			   '[' != d->desc[i + 1])
1210 			continue;
1211 		d->desc[i] = d->desc[i + 1] = ' ';
1212 		for (i += 2; i < d->descsz; i++) {
1213 			if (']' == d->desc[i] &&
1214 			    ']' == d->desc[i + 1])
1215 				break;
1216 			d->desc[i] = ' ';
1217 		}
1218 		if (i == d->descsz)
1219 			continue;
1220 		d->desc[i] = d->desc[i + 1] = ' ';
1221 		i++;
1222 	}
1223 
1224 	/*
1225 	 * Here we go!
1226 	 * Print out the description as best we can.
1227 	 * Do on-the-fly processing of any HTML we encounter into
1228 	 * mdoc(7) and try to break lines up.
1229 	 */
1230 	col = 0;
1231 	for (i = 0; i < d->descsz; ) {
1232 		/*
1233 		 * Newlines are paragraph breaks.
1234 		 * If we have multiple newlines, then keep to a single
1235 		 * `Pp' to keep it clean.
1236 		 * Only do this if we're not before a block-level HTML,
1237 		 * as this would mean, for instance, a `Pp'-`Bd' pair.
1238 		 */
1239 		if ('\n' == d->desc[i]) {
1240 			while (isspace((int)d->desc[i]))
1241 				i++;
1242 			for (tag = 0; tag < TAG__MAX; tag++) {
1243 				sz = strlen(tags[tag].html);
1244 				if (0 == strncmp(&d->desc[i], tags[tag].html, sz))
1245 					break;
1246 			}
1247 			if (TAG__MAX == tag ||
1248 			    TAGINFO_INLINE & tags[tag].flags) {
1249 				if (col > 0)
1250 					fputs("\n", f);
1251 				fputs(".Pp\n", f);
1252 				/* We're on a new line. */
1253 				col = 0;
1254 			}
1255 			continue;
1256 		}
1257 
1258 		/*
1259 		 * New sentence, new line.
1260 		 * We guess whether this is the case by using the
1261 		 * dumbest possible heuristic.
1262 		 */
1263 		if (' ' == d->desc[i] && i &&
1264 		    '.' == d->desc[i - 1]) {
1265 			while (' ' == d->desc[i])
1266 				i++;
1267 			fputs("\n", f);
1268 			col = 0;
1269 			continue;
1270 		}
1271 		/*
1272 		 * After 65 characters, force a break when we encounter
1273 		 * white-space to keep our lines more or less tidy.
1274 		 */
1275 		if (col > 65 && ' ' == d->desc[i]) {
1276 			while (' ' == d->desc[i])
1277 				i++;
1278 			fputs("\n", f);
1279 			col = 0;
1280 			continue;
1281 		}
1282 
1283 		/*
1284 		 * Parsing HTML tags.
1285 		 * Why, sqlite guys, couldn't you have used something
1286 		 * like markdown or something?
1287 		 * Sheesh.
1288 		 */
1289 		if ('<' == d->desc[i]) {
1290 			for (tag = 0; tag < TAG__MAX; tag++) {
1291 				sz = strlen(tags[tag].html);
1292 				if (strncmp(&d->desc[i],
1293 				    tags[tag].html, sz))
1294 					continue;
1295 				/*
1296 				 * NOOP tags don't do anything, such as
1297 				 * the case of `</dd>', which only
1298 				 * serves to end an `It' block that will
1299 				 * be closed out by a subsequent `It' or
1300 				 * end of clause `El' anyway.
1301 				 * Skip the trailing space.
1302 				 */
1303 				if (TAGINFO_NOOP & tags[tag].flags) {
1304 					i += sz;
1305 					while (isspace((int)d->desc[i]))
1306 						i++;
1307 					break;
1308 				} else if (TAGINFO_INLINE & tags[tag].flags) {
1309 					fputs(tags[tag].mdoc, f);
1310 					i += sz;
1311 					break;
1312 				}
1313 
1314 				/*
1315 				 * A breaking mdoc(7) statement.
1316 				 * Break the current line, output the
1317 				 * macro, and conditionally break
1318 				 * following that (or we might do
1319 				 * nothing at all).
1320 				 */
1321 				if (col > 0) {
1322 					fputs("\n", f);
1323 					col = 0;
1324 				}
1325 				fputs(tags[tag].mdoc, f);
1326 				if ( ! (TAGINFO_NOBR & tags[tag].flags)) {
1327 					fputs("\n", f);
1328 					col = 0;
1329 				} else if ( ! (TAGINFO_NOSP & tags[tag].flags)) {
1330 					fputs(" ", f);
1331 					col++;
1332 				}
1333 				i += sz;
1334 				while (isspace((int)d->desc[i]))
1335 					i++;
1336 				break;
1337 			}
1338 			if (tag < TAG__MAX)
1339 				continue;
1340 		} else if ('[' == d->desc[i] &&
1341 			   ']' != d->desc[i + 1]) {
1342 			/* Do we start at the bracket or bar? */
1343 			for (sz = i + 1; sz < d->descsz; sz++)
1344 				if ('|' == d->desc[sz] ||
1345 				    ']' == d->desc[sz])
1346 					break;
1347 
1348 			if (sz == d->descsz)
1349 				continue;
1350 			else if ('|' == d->desc[sz])
1351 				i = sz + 1;
1352 			else
1353 				i = i + 1;
1354 
1355 			/*
1356 			 * Now handle in-page references.
1357 			 * Print them out as-is: we've already
1358 			 * accumulated them into our "SEE ALSO" values,
1359 			 * which we'll use below.
1360 			 */
1361 			for ( ; i < d->descsz; i++, col++) {
1362 				if (']' == d->desc[i]) {
1363 					i++;
1364 					break;
1365 				}
1366 				fputc(d->desc[i], f);
1367 				col++;
1368 			}
1369 			continue;
1370 		}
1371 
1372 		if (' ' == d->desc[i] && 0 == col) {
1373 			while (' ' == d->desc[i])
1374 				i++;
1375 			continue;
1376 		}
1377 
1378 		assert('\n' != d->desc[i]);
1379 
1380 		/*
1381 		 * Handle some oddities.
1382 		 * The following HTML escapes exist in the output that I
1383 		 * could find.
1384 		 * There might be others...
1385 		 */
1386 		if (0 == strncmp(&d->desc[i], "&nbsp;", 6)) {
1387 			i += 6;
1388 			fputc(' ', f);
1389 		} else if (0 == strncmp(&d->desc[i], "&lt;", 4)) {
1390 			i += 4;
1391 			fputc('<', f);
1392 		} else if (0 == strncmp(&d->desc[i], "&gt;", 4)) {
1393 			i += 4;
1394 			fputc('>', f);
1395 		} else if (0 == strncmp(&d->desc[i], "&#91;", 5)) {
1396 			i += 5;
1397 			fputc('[', f);
1398 		} else {
1399 			/* Make sure we don't trigger a macro. */
1400 			if (0 == col && '.' == d->desc[i])
1401 				fputs("\\&", f);
1402 			fputc(d->desc[i], f);
1403 			i++;
1404 		}
1405 
1406 		col++;
1407 	}
1408 
1409 	if (col > 0)
1410 		fputs("\n", f);
1411 
1412 	if (d->xrsz > 0) {
1413 		/*
1414 		 * Look up all of our keywords (which are in the xrs
1415 		 * field) in the table of all known keywords.
1416 		 * Don't print duplicates.
1417 		 */
1418 		lastres = NULL;
1419 		for (last = 0, i = 0; i < d->xrsz; i++) {
1420 			res = lookup(d->xrs[i]);
1421 			/* Ignore self-reference. */
1422 			if (res == d->nms[0] && verbose)
1423 				warnx("%s:%zu: self-reference: %s",
1424 					d->fn, d->ln, d->xrs[i]);
1425 			if (res == d->nms[0] && verbose)
1426 				continue;
1427 			if (NULL == res && verbose)
1428 				warnx("%s:%zu: ref not found: %s",
1429 					d->fn, d->ln, d->xrs[i]);
1430 			if (NULL == res)
1431 				continue;
1432 
1433 			/* Ignore duplicates. */
1434 			if (NULL != lastres && lastres == res)
1435 				continue;
1436 			if (last)
1437 				fputs(" ,\n", f);
1438 			else
1439 				fputs(".Sh SEE ALSO\n", f);
1440 			fprintf(f, ".Xr %s 3", res);
1441 			last = 1;
1442 			lastres = res;
1443 		}
1444 		if (last)
1445 			fputs("\n", f);
1446 	}
1447 
1448 	if (0 == nofile)
1449 		fclose(f);
1450 }
1451 
1452 int
1453 main(int argc, char *argv[])
1454 {
1455 	size_t		 i, len;
1456 	FILE		*f;
1457 	char		*cp;
1458 	const char	*prefix;
1459 	struct parse	 p;
1460 	int		 rc, ch;
1461 	struct defn	*d;
1462 	struct decl	*e;
1463 
1464 	rc = 0;
1465 	prefix = ".";
1466 	f = stdin;
1467 	memset(&p, 0, sizeof(struct parse));
1468 	p.fn = "<stdin>";
1469 	p.ln = 0;
1470 	p.phase = PHASE_INIT;
1471 	TAILQ_INIT(&p.dqhead);
1472 
1473 	while (-1 != (ch = getopt(argc, argv, "np:v")))
1474 		switch (ch) {
1475 		case ('n'):
1476 			nofile = 1;
1477 			break;
1478 		case ('p'):
1479 			prefix = optarg;
1480 			break;
1481 		case ('v'):
1482 			verbose = 1;
1483 			break;
1484 		default:
1485 			goto usage;
1486 		}
1487 
1488 	/*
1489 	 * Read in line-by-line and process in the phase dictated by our
1490 	 * finite state automaton.
1491 	 */
1492 	while (NULL != (cp = fgetln(f, &len))) {
1493 		assert(len > 0);
1494 		p.ln++;
1495 		if ('\n' != cp[len - 1]) {
1496 			warnx("%s:%zu: unterminated line", p.fn, p.ln);
1497 			break;
1498 		}
1499 		cp[--len] = '\0';
1500 		/* Lines are always nil-terminated. */
1501 		switch (p.phase) {
1502 		case (PHASE_INIT):
1503 			init(&p, cp);
1504 			break;
1505 		case (PHASE_KEYS):
1506 			keys(&p, cp, len);
1507 			break;
1508 		case (PHASE_DESC):
1509 			desc(&p, cp, len);
1510 			break;
1511 		case (PHASE_SEEALSO):
1512 			seealso(&p, cp, len);
1513 			break;
1514 		case (PHASE_DECL):
1515 			decl(&p, cp, len);
1516 			break;
1517 		}
1518 	}
1519 
1520 	/*
1521 	 * If we hit the last line, then try to process.
1522 	 * Otherwise, we failed along the way.
1523 	 */
1524 	if (NULL == cp) {
1525 		/*
1526 		 * Allow us to be at the declarations or scanning for
1527 		 * the next clause.
1528 		 */
1529 		if (PHASE_INIT == p.phase ||
1530 		    PHASE_DECL == p.phase) {
1531 			if (0 == hcreate(5000))
1532 				err(EXIT_FAILURE, "hcreate");
1533 			TAILQ_FOREACH(d, &p.dqhead, entries)
1534 				postprocess(prefix, d);
1535 			TAILQ_FOREACH(d, &p.dqhead, entries)
1536 				emit(d);
1537 			rc = 1;
1538 		} else if (PHASE_DECL != p.phase)
1539 			warnx("%s:%zu: exit when not in "
1540 				"initial state", p.fn, p.ln);
1541 	}
1542 
1543 	while ( ! TAILQ_EMPTY(&p.dqhead)) {
1544 		d = TAILQ_FIRST(&p.dqhead);
1545 		TAILQ_REMOVE(&p.dqhead, d, entries);
1546 		while ( ! TAILQ_EMPTY(&d->dcqhead)) {
1547 			e = TAILQ_FIRST(&d->dcqhead);
1548 			TAILQ_REMOVE(&d->dcqhead, e, entries);
1549 			free(e->text);
1550 			free(e);
1551 		}
1552 		free(d->name);
1553 		free(d->desc);
1554 		free(d->dt);
1555 		for (i = 0; i < d->nmsz; i++)
1556 			free(d->nms[i]);
1557 		for (i = 0; i < d->xrsz; i++)
1558 			free(d->xrs[i]);
1559 		for (i = 0; i < d->keysz; i++)
1560 			free(d->keys[i]);
1561 		free(d->keys);
1562 		free(d->nms);
1563 		free(d->xrs);
1564 		free(d->fname);
1565 		free(d->seealso);
1566 		free(d->keybuf);
1567 		free(d);
1568 	}
1569 
1570 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1571 usage:
1572 	fprintf(stderr, "usage: %s [-nv] [-p prefix]\n", getprogname());
1573 	return(EXIT_FAILURE);
1574 }
1575