xref: /netbsd-src/external/public-domain/sqlite/sqlite2mdoc/main.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$Id: main.c,v 1.2 2016/12/18 16:56:32 christos Exp $ */
2 /*
3  * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef __linux__
18 #define _GNU_SOURCE
19 #endif
20 #include <sys/queue.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <err.h>
25 #include <time.h>
26 #include <getopt.h>
27 #include <search.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #ifdef __linux__
32 #include <bsd/stdio.h>
33 #include <bsd/stdlib.h>
34 #include <bsd/string.h>
35 #endif
36 
37 /*
38  * Phase of parsing input file.
39  */
40 enum	phase {
41 	PHASE_INIT = 0, /* waiting to encounter definition */
42 	PHASE_KEYS, /* have definition, now keywords */
43 	PHASE_DESC, /* have keywords, now description */
44 	PHASE_SEEALSO,
45 	PHASE_DECL /* have description, now declarations */
46 };
47 
48 /*
49  * What kind of declaration (preliminary analysis).
50  */
51 enum	decltype {
52 	DECLTYPE_CPP, /* pre-processor */
53 	DECLTYPE_C, /* semicolon-closed non-preprocessor */
54 	DECLTYPE_NEITHER /* non-preprocessor, no semicolon */
55 };
56 
57 /*
58  * In variables and function declarations, we toss these.
59  */
60 enum	preproc {
61 	PREPROC_SQLITE_API,
62 	PREPROC_SQLITE_DEPRECATED,
63 	PREPROC_SQLITE_EXPERIMENTAL,
64 	PREPROC_SQLITE_EXTERN,
65 	PREPROC__MAX
66 };
67 
68 /*
69  * HTML tags that we recognise.
70  */
71 enum	tag {
72 	TAG_B_CLOSE,
73 	TAG_B_OPEN,
74 	TAG_BLOCK_CLOSE,
75 	TAG_BLOCK_OPEN,
76 	TAG_DD_CLOSE,
77 	TAG_DD_OPEN,
78 	TAG_DL_CLOSE,
79 	TAG_DL_OPEN,
80 	TAG_DT_CLOSE,
81 	TAG_DT_OPEN,
82 	TAG_H3_CLOSE,
83 	TAG_H3_OPEN,
84 	TAG_LI_CLOSE,
85 	TAG_LI_OPEN,
86 	TAG_OL_CLOSE,
87 	TAG_OL_OPEN,
88 	TAG_PRE_CLOSE,
89 	TAG_PRE_OPEN,
90 	TAG_UL_CLOSE,
91 	TAG_UL_OPEN,
92 	TAG__MAX
93 };
94 
95 TAILQ_HEAD(defnq, defn);
96 TAILQ_HEAD(declq, decl);
97 
98 /*
99  * A declaration of type DECLTYPE_CPP or DECLTYPE_C.
100  * These need not be unique (if ifdef'd).
101  */
102 struct	decl {
103 	enum decltype	 type; /* type of declaration */
104 	char		*text; /* text */
105 	size_t		 textsz; /* strlen(text) */
106 	TAILQ_ENTRY(decl) entries;
107 };
108 
109 /*
110  * A definition is basically the manpage contents.
111  */
112 struct	defn {
113 	char		 *name; /* really Nd */
114 	TAILQ_ENTRY(defn) entries;
115 	char		 *desc; /* long description */
116 	size_t		  descsz; /* strlen(desc) */
117 	struct declq	  dcqhead; /* declarations */
118 	int		  multiline; /* used when parsing */
119 	int		  instruct; /* used when parsing */
120 	const char	 *fn; /* parsed from file */
121 	size_t		  ln; /* parsed at line */
122 	int		  postprocessed; /* good for emission? */
123 	char		 *dt; /* manpage title */
124 	char		**nms; /* manpage names */
125 	size_t		  nmsz; /* number of names */
126 	char		 *fname; /* manpage filename */
127 	char		 *keybuf; /* raw keywords */
128 	size_t		  keybufsz; /* length of "keysbuf" */
129 	char		 *seealso; /* see also tags */
130 	size_t		  seealsosz; /* length of seealso */
131 	char		**xrs; /* parsed "see also" references */
132 	size_t		  xrsz; /* number of references */
133 	char		**keys; /* parsed keywords */
134 	size_t		  keysz; /* number of keywords */
135 };
136 
137 /*
138  * Entire parse routine.
139  */
140 struct	parse {
141 	enum phase	 phase; /* phase of parse */
142 	size_t		 ln; /* line number */
143 	const char	*fn; /* open file */
144 	struct defnq	 dqhead; /* definitions */
145 };
146 
147 /*
148  * How to handle HTML tags we find in the text.
149  */
150 struct	taginfo {
151 	const char	*html; /* HTML to key on */
152 	const char	*mdoc; /* generate mdoc(7) */
153 	unsigned int	 flags;
154 #define	TAGINFO_NOBR	 0x01 /* follow w/space, not newline */
155 #define	TAGINFO_NOOP	 0x02 /* just strip out */
156 #define	TAGINFO_NOSP	 0x04 /* follow w/o space or newline */
157 #define	TAGINFO_INLINE	 0x08 /* inline block (notused) */
158 };
159 
160 static	const struct taginfo tags[TAG__MAX] = {
161 	{ "</b>", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */
162 	{ "<b>", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */
163 	{ "</blockquote>", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */
164 	{ "<blockquote>", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */
165 	{ "</dd>", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */
166 	{ "<dd>", "", TAGINFO_NOOP }, /* TAG_DD_OPEN */
167 	{ "</dl>", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */
168 	{ "<dl>", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */
169 	{ "</dt>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */
170 	{ "<dt>", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */
171 	{ "</h3>", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */
172 	{ "<h3>", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */
173 	{ "</li>", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */
174 	{ "<li>", ".It", 0 }, /* TAG_LI_OPEN */
175 	{ "</ol>", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */
176 	{ "<ol>", ".Bl -enum", 0 }, /* TAG_OL_OPEN */
177 	{ "</pre>", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */
178 	{ "<pre>", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */
179 	{ "</ul>", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */
180 	{ "<ul>", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */
181 };
182 
183 static	const char *const preprocs[TAG__MAX] = {
184 	"SQLITE_API", /* PREPROC_SQLITE_API */
185 	"SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */
186 	"SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */
187 	"SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */
188 };
189 
190 /* Verbose reporting. */
191 static	int verbose;
192 /* Don't output any files: use stdout. */
193 static	int nofile;
194 
195 static void
196 decl_function_add(struct parse *p, char **etext,
197 	size_t *etextsz, const char *cp, size_t len)
198 {
199 
200 	if (' ' != (*etext)[*etextsz - 1]) {
201 		*etext = realloc(*etext, *etextsz + 2);
202 		if (NULL == *etext)
203 			err(EXIT_FAILURE, "%s:%zu: "
204 				"realloc", p->fn, p->ln);
205 		(*etextsz)++;
206 		strlcat(*etext, " ", *etextsz + 1);
207 	}
208 	*etext = realloc(*etext, *etextsz + len + 1);
209 	if (NULL == *etext)
210 		err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln);
211 	memcpy(*etext + *etextsz, cp, len);
212 	*etextsz += len;
213 	(*etext)[*etextsz] = '\0';
214 }
215 
216 static void
217 decl_function_copy(struct parse *p, char **etext,
218 	size_t *etextsz, const char *cp, size_t len)
219 {
220 
221 	*etext = malloc(len + 1);
222 	if (NULL == *etext)
223 		err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln);
224 	memcpy(*etext, cp, len);
225 	*etextsz = len;
226 	(*etext)[*etextsz] = '\0';
227 }
228 
229 /*
230  * A C function (or variable, or whatever).
231  * This is more specifically any non-preprocessor text.
232  */
233 static int
234 decl_function(struct parse *p, char *cp, size_t len)
235 {
236 	char		*ep, *ncp, *lcp, *rcp;
237 	size_t		 nlen;
238 	struct defn	*d;
239 	struct decl	*e;
240 
241 	/* Fetch current interface definition. */
242 	d = TAILQ_LAST(&p->dqhead, defnq);
243 	assert(NULL != d);
244 
245 	/*
246 	 * Since C tokens are semicolon-separated, we may be invoked any
247 	 * number of times per a single line.
248 	 */
249 again:
250 	while (isspace((int)*cp)) {
251 		cp++;
252 		len--;
253 	}
254 	if ('\0' == *cp)
255 		return(1);
256 
257 	/* Whether we're a continuation clause. */
258 	if (d->multiline) {
259 		/* This might be NULL if we're not a continuation. */
260 		e = TAILQ_LAST(&d->dcqhead, declq);
261 		assert(DECLTYPE_C == e->type);
262 		assert(NULL != e);
263 		assert(NULL != e->text);
264 		assert(e->textsz);
265 	} else {
266 		assert(0 == d->instruct);
267 		e = calloc(1, sizeof(struct decl));
268 		e->type = DECLTYPE_C;
269 		if (NULL == e)
270 			err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
271 		TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
272 	}
273 
274 	/*
275 	 * We begin by seeing if there's a semicolon on this line.
276 	 * If there is, we'll need to do some special handling.
277 	 */
278 	ep = strchr(cp, ';');
279 	lcp = strchr(cp, '{');
280 	rcp = strchr(cp, '}');
281 
282 	/* We're only a partial statement (i.e., no closure). */
283 	if (NULL == ep && d->multiline) {
284 		assert(NULL != e->text);
285 		assert(e->textsz > 0);
286 		/* Is a struct starting or ending here? */
287 		if (d->instruct && NULL != rcp)
288 			d->instruct--;
289 		else if (NULL != lcp)
290 			d->instruct++;
291 		decl_function_add(p, &e->text, &e->textsz, cp, len);
292 		return(1);
293 	} else if (NULL == ep && ! d->multiline) {
294 		d->multiline = 1;
295 		/* Is a structure starting in this line? */
296 		if (NULL != lcp &&
297 		    (NULL == rcp || rcp < lcp))
298 			d->instruct++;
299 		decl_function_copy(p, &e->text, &e->textsz, cp, len);
300 		return(1);
301 	}
302 
303 	/* Position ourselves after the semicolon. */
304 	assert(NULL != ep);
305 	ncp = cp;
306 	nlen = (ep - cp) + 1;
307 	cp = ep + 1;
308 	len -= nlen;
309 
310 	if (d->multiline) {
311 		assert(NULL != e->text);
312 		/* Don't stop the multi-line if we're in a struct. */
313 		if (0 == d->instruct) {
314 			if (NULL == lcp || lcp > cp)
315 				d->multiline = 0;
316 		} else if (NULL != rcp && rcp < cp)
317 			if (0 == --d->instruct)
318 				d->multiline = 0;
319 		decl_function_add(p, &e->text, &e->textsz, ncp, nlen);
320 	} else {
321 		assert(NULL == e->text);
322 		if (NULL != lcp && lcp < cp) {
323 			d->multiline = 1;
324 			d->instruct++;
325 		}
326 		decl_function_copy(p, &e->text, &e->textsz, ncp, nlen);
327 	}
328 
329 	goto again;
330 }
331 
332 /*
333  * A definition is just #define followed by space followed by the name,
334  * then the value of that name.
335  * We ignore the latter.
336  * FIXME: this does not understand multi-line CPP, but I don't think
337  * there are any instances of that in sqlite.h.
338  */
339 static int
340 decl_define(struct parse *p, char *cp, size_t len)
341 {
342 	struct defn	*d;
343 	struct decl	*e;
344 	size_t		 sz;
345 
346 	while (isspace((int)*cp)) {
347 		cp++;
348 		len--;
349 	}
350 	if (0 == len) {
351 		warnx("%s:%zu: empty pre-processor "
352 			"constant", p->fn, p->ln);
353 		return(1);
354 	}
355 
356 	d = TAILQ_LAST(&p->dqhead, defnq);
357 	assert(NULL != d);
358 
359 	/*
360 	 * We're parsing a preprocessor definition, but we're still
361 	 * waiting on a semicolon from a function definition.
362 	 * It might be a comment or an error.
363 	 */
364 	if (d->multiline) {
365 		warnx("%s:%zu: multiline declaration "
366 			"still open (harmless?)", p->fn, p->ln);
367 		e = TAILQ_LAST(&d->dcqhead, declq);
368 		assert(NULL != e);
369 		e->type = DECLTYPE_NEITHER;
370 		d->multiline = d->instruct = 0;
371 	}
372 
373 	sz = 0;
374 	while ('\0' != cp[sz] && ! isspace((int)cp[sz]))
375 		sz++;
376 
377 	e = calloc(1, sizeof(struct decl));
378 	if (NULL == e)
379 		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
380 	e->type = DECLTYPE_CPP;
381 	e->text = calloc(1, sz + 1);
382 	if (NULL == e->text)
383 		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
384 	strlcpy(e->text, cp, sz + 1);
385 	e->textsz = sz;
386 	TAILQ_INSERT_TAIL(&d->dcqhead, e, entries);
387 	return(1);
388 }
389 
390 /*
391  * A declaration is a function, variable, preprocessor definition, or
392  * really anything else until we reach a blank line.
393  */
394 static void
395 decl(struct parse *p, char *cp, size_t len)
396 {
397 	struct defn	*d;
398 	struct decl	*e;
399 
400 	while (isspace((int)*cp)) {
401 		cp++;
402 		len--;
403 	}
404 
405 	/* Check closure. */
406 	if ('\0' == *cp) {
407 		p->phase = PHASE_INIT;
408 		/* Check multiline status. */
409 		d = TAILQ_LAST(&p->dqhead, defnq);
410 		assert(NULL != d);
411 		if (d->multiline) {
412 			warnx("%s:%zu: multiline declaration "
413 				"still open (harmless?)", p->fn, p->ln);
414 			e = TAILQ_LAST(&d->dcqhead, declq);
415 			assert(NULL != e);
416 			e->type = DECLTYPE_NEITHER;
417 			d->multiline = d->instruct = 0;
418 		}
419 		return;
420 	}
421 
422 	/*
423 	 * Catch preprocessor defines, but discard all other types of
424 	 * preprocessor statements.
425 	 */
426 	if ('#' == *cp) {
427 		len--;
428 		cp++;
429 		while (isspace((int)*cp)) {
430 			len--;
431 			cp++;
432 		}
433 		if (0 == strncmp(cp, "define", 6))
434 			decl_define(p, cp + 6, len - 6);
435 		return;
436 	}
437 
438 	decl_function(p, cp, len);
439 }
440 
441 /*
442  * Parse "SEE ALSO" phrases, which can come at any point in the
443  * interface description (unlike what they claim).
444  */
445 static void
446 seealso(struct parse *p, char *cp, size_t len)
447 {
448 	struct defn	*d;
449 
450 	if ('\0' == *cp) {
451 		warnx("%s:%zu: warn: unexpected end of "
452 			"interface description", p->fn, p->ln);
453 		p->phase = PHASE_INIT;
454 		return;
455 	} else if (0 == strcmp(cp, "*/")) {
456 		p->phase = PHASE_DECL;
457 		return;
458 	} else if ('*' != cp[0] || '*' != cp[1]) {
459 		warnx("%s:%zu: warn: unexpected end of "
460 			"interface description", p->fn, p->ln);
461 		p->phase = PHASE_INIT;
462 		return;
463 	}
464 
465 	cp += 2;
466 	len -= 2;
467 	while (isspace((int)*cp)) {
468 		cp++;
469 		len--;
470 	}
471 
472 	/* Blank line: back to description part. */
473 	if (0 == len) {
474 		p->phase = PHASE_DESC;
475 		return;
476 	}
477 
478 	/* Fetch current interface definition. */
479 	d = TAILQ_LAST(&p->dqhead, defnq);
480 	assert(NULL != d);
481 
482 	d->seealso = realloc(d->seealso,
483 		d->seealsosz + len + 1);
484 	memcpy(d->seealso + d->seealsosz, cp, len);
485 	d->seealsosz += len;
486 	d->seealso[d->seealsosz] = '\0';
487 }
488 
489 /*
490  * A definition description is a block of text that we'll later format
491  * in mdoc(7).
492  * It extends from the name of the definition down to the declarations
493  * themselves.
494  */
495 static void
496 desc(struct parse *p, char *cp, size_t len)
497 {
498 	struct defn	*d;
499 	size_t		 nsz;
500 
501 	if ('\0' == *cp) {
502 		warnx("%s:%zu: warn: unexpected end of "
503 			"interface description", p->fn, p->ln);
504 		p->phase = PHASE_INIT;
505 		return;
506 	} else if (0 == strcmp(cp, "*/")) {
507 		/* End of comment area, start of declarations. */
508 		p->phase = PHASE_DECL;
509 		return;
510 	} else if ('*' != cp[0] || '*' != cp[1]) {
511 		warnx("%s:%zu: warn: unexpected end of "
512 			"interface description", p->fn, p->ln);
513 		p->phase = PHASE_INIT;
514 		return;
515 	}
516 
517 	cp += 2;
518 	len -= 2;
519 
520 	while (isspace((int)*cp)) {
521 		cp++;
522 		len--;
523 	}
524 
525 	/* Fetch current interface definition. */
526 	d = TAILQ_LAST(&p->dqhead, defnq);
527 	assert(NULL != d);
528 
529 	/* Ignore leading blank lines. */
530 	if (0 == len && NULL == d->desc)
531 		return;
532 
533 	/* Collect SEE ALSO clauses. */
534 	if (0 == strncasecmp(cp, "see also:", 9)) {
535 		cp += 9;
536 		len -= 9;
537 		while (isspace((int)*cp)) {
538 			cp++;
539 			len--;
540 		}
541 		p->phase = PHASE_SEEALSO;
542 		d->seealso = realloc(d->seealso,
543 			d->seealsosz + len + 1);
544 		memcpy(d->seealso + d->seealsosz, cp, len);
545 		d->seealsosz += len;
546 		d->seealso[d->seealsosz] = '\0';
547 		return;
548 	}
549 
550 	/* White-space padding between lines. */
551 	if (NULL != d->desc &&
552 	    ' ' != d->desc[d->descsz - 1] &&
553 	    '\n' != d->desc[d->descsz - 1]) {
554 		d->desc = realloc(d->desc, d->descsz + 2);
555 		if (NULL == d->desc)
556 			err(EXIT_FAILURE, "%s:%zu: realloc",
557 				p->fn, p->ln);
558 		d->descsz++;
559 		strlcat(d->desc, " ", d->descsz + 1);
560 	}
561 
562 	/* Either append the line of a newline, if blank. */
563 	nsz = 0 == len ? 1 : len;
564 	if (NULL == d->desc) {
565 		d->desc = calloc(1, nsz + 1);
566 		if (NULL == d->desc)
567 			err(EXIT_FAILURE, "%s:%zu: calloc",
568 				p->fn, p->ln);
569 	} else {
570 		d->desc = realloc(d->desc, d->descsz + nsz + 1);
571 		if (NULL == d->desc)
572 			err(EXIT_FAILURE, "%s:%zu: realloc",
573 				p->fn, p->ln);
574 	}
575 	d->descsz += nsz;
576 	strlcat(d->desc, 0 == len ? "\n" : cp, d->descsz + 1);
577 }
578 
579 /*
580  * Copy all KEYWORDS into a buffer.
581  */
582 static void
583 keys(struct parse *p, char *cp, size_t len)
584 {
585 	struct defn	*d;
586 
587 	if ('\0' == *cp) {
588 		warnx("%s:%zu: warn: unexpected end of "
589 			"interface keywords", p->fn, p->ln);
590 		p->phase = PHASE_INIT;
591 		return;
592 	} else if (0 == strcmp(cp, "*/")) {
593 		/* End of comment area, start of declarations. */
594 		p->phase = PHASE_DECL;
595 		return;
596 	} else if ('*' != cp[0] || '*' != cp[1]) {
597 		if ('\0' != cp[1]) {
598 			warnx("%s:%zu: warn: unexpected end of "
599 				"interface keywords", p->fn, p->ln);
600 			p->phase = PHASE_INIT;
601 			return;
602 		} else
603 			warnx("%s:%zu: warn: workaround in effect "
604 				"for unexpected end of "
605 				"interface keywords", p->fn, p->ln);
606 	}
607 
608 	cp += 2;
609 	len -= 2;
610 	while (isspace((int)*cp)) {
611 		cp++;
612 		len--;
613 	}
614 
615 	if (0 == len) {
616 		p->phase = PHASE_DESC;
617 		return;
618 	} else if (strncmp(cp, "KEYWORDS:", 9))
619 		return;
620 
621 	cp += 9;
622 	len -= 9;
623 
624 	d = TAILQ_LAST(&p->dqhead, defnq);
625 	assert(NULL != d);
626 	d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1);
627 	if (NULL == d->keybuf)
628 		err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln);
629 	memcpy(d->keybuf + d->keybufsz, cp, len);
630 	d->keybufsz += len;
631 	d->keybuf[d->keybufsz] = '\0';
632 }
633 
634 /*
635  * Initial state is where we're scanning forward to find commented
636  * instances of CAPI3REF.
637  */
638 static void
639 init(struct parse *p, char *cp)
640 {
641 	struct defn	*d;
642 
643 	/* Look for comment hook. */
644 	if ('*' != cp[0] || '*' != cp[1])
645 		return;
646 	cp += 2;
647 	while (isspace((int)*cp))
648 		cp++;
649 
650 	/* Look for beginning of definition. */
651 	if (strncmp(cp, "CAPI3REF:", 9))
652 		return;
653 	cp += 9;
654 	while (isspace((int)*cp))
655 		cp++;
656 	if ('\0' == *cp) {
657 		warnx("%s:%zu: warn: unexpected end of "
658 			"interface definition", p->fn, p->ln);
659 		return;
660 	}
661 
662 	/* Add definition to list of existing ones. */
663 	d = calloc(1, sizeof(struct defn));
664 	if (NULL == d)
665 		err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln);
666 	d->name = strdup(cp);
667 	if (NULL == d->name)
668 		err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln);
669 	d->fn = p->fn;
670 	d->ln = p->ln;
671 	p->phase = PHASE_KEYS;
672 	TAILQ_INIT(&d->dcqhead);
673 	TAILQ_INSERT_TAIL(&p->dqhead, d, entries);
674 }
675 
676 #define	BPOINT(_cp) \
677 	(';' == (_cp)[0] || \
678 	 '[' == (_cp)[0] || \
679 	 ('(' == (_cp)[0] && '*' != (_cp)[1]) || \
680 	 ')' == (_cp)[0] || \
681 	 '{' == (_cp)[0])
682 
683 /*
684  * Given a declaration (be it preprocessor or C), try to parse out a
685  * reasonable "name" for the affair.
686  * For a struct, for example, it'd be the struct name.
687  * For a typedef, it'd be the type name.
688  * For a function, it'd be the function name.
689  */
690 static void
691 grok_name(const struct decl *e,
692 	const char **start, size_t *sz)
693 {
694 	const char	*cp;
695 
696 	*start = NULL;
697 	*sz = 0;
698 
699 	if (DECLTYPE_CPP != e->type) {
700 		assert(';' == e->text[e->textsz - 1]);
701 		cp = e->text;
702 		do {
703 			while (isspace((int)*cp))
704 				cp++;
705 			if (BPOINT(cp))
706 				break;
707 			/* Function pointers... */
708 			if ('(' == *cp)
709 				cp++;
710 			/* Pass over pointers. */
711 			while ('*' == *cp)
712 				cp++;
713 			*start = cp;
714 			*sz = 0;
715 			while ( ! isspace((int)*cp)) {
716 				if (BPOINT(cp))
717 					break;
718 				cp++;
719 				(*sz)++;
720 			}
721 		} while ( ! BPOINT(cp));
722 	} else {
723 		*sz = e->textsz;
724 		*start = e->text;
725 	}
726 }
727 
728 static int
729 xrcmp(const void *p1, const void *p2)
730 {
731 	const char	*s1 = *(const char **)p1,
732 	     	 	*s2 = *(const char **)p2;
733 
734 	return(strcasecmp(s1, s2));
735 }
736 
737 /*
738  * Extract information from the interface definition.
739  * Mark it as "postprocessed" on success.
740  */
741 static void
742 postprocess(const char *prefix, struct defn *d)
743 {
744 	struct decl	*first;
745 	const char	*start;
746 	size_t		 offs, sz, i;
747 	ENTRY		 ent;
748 
749 	if (TAILQ_EMPTY(&d->dcqhead))
750 		return;
751 
752 	/* Find the first #define or declaration. */
753 	TAILQ_FOREACH(first, &d->dcqhead, entries)
754 		if (DECLTYPE_CPP == first->type ||
755 		    DECLTYPE_C == first->type)
756 			break;
757 
758 	if (NULL == first) {
759 		warnx("%s:%zu: no entry to document", d->fn, d->ln);
760 		return;
761 	}
762 
763 	/*
764 	 * Now compute the document name (`Dt').
765 	 * We'll also use this for the filename.
766 	 */
767 	grok_name(first, &start, &sz);
768 	if (NULL == start) {
769 		warnx("%s:%zu: couldn't deduce "
770 			"entry name", d->fn, d->ln);
771 		return;
772 	}
773 
774 	/* Document name needs all-caps. */
775 	d->dt = malloc(sz + 1);
776 	if (NULL == d->dt)
777 		err(EXIT_FAILURE, "malloc");
778 	memcpy(d->dt, start, sz);
779 	d->dt[sz] = '\0';
780 	for (i = 0; i < sz; i++)
781 		d->dt[i] = toupper((int)d->dt[i]);
782 
783 	/* Filename needs no special chars. */
784 	asprintf(&d->fname, "%s/%.*s.3",
785 		prefix, (int)sz, start);
786 	if (NULL == d->fname)
787 		err(EXIT_FAILURE, "asprintf");
788 
789 	offs = strlen(prefix) + 1;
790 	for (i = 0; i < sz; i++) {
791 		if (isalnum((int)d->fname[offs + i]) ||
792 		    '_' == d->fname[offs + i] ||
793 		    '-' == d->fname[offs + i])
794 			continue;
795 		d->fname[offs + i] = '_';
796 	}
797 
798 	/*
799 	 * First, extract all keywords.
800 	 */
801 	for (i = 0; i < d->keybufsz; ) {
802 		while (isspace((int)d->keybuf[i]))
803 			i++;
804 		if (i == d->keybufsz)
805 			break;
806 		sz = 0;
807 		start = &d->keybuf[i];
808 		if ('{' == d->keybuf[i]) {
809 			start = &d->keybuf[++i];
810 			for ( ; i < d->keybufsz; i++, sz++)
811 				if ('}' == d->keybuf[i])
812 					break;
813 			if ('}' == d->keybuf[i])
814 				i++;
815 		} else
816 			for ( ; i < d->keybufsz; i++, sz++)
817 				if (isspace((int)d->keybuf[i]))
818 					break;
819 		if (0 == sz)
820 			continue;
821 		d->keys = realloc(d->keys,
822 			(d->keysz + 1) * sizeof(char *));
823 		if (NULL == d->keys)
824 			err(EXIT_FAILURE, "realloc");
825 		d->keys[d->keysz] = malloc(sz + 1);
826 		if (NULL == d->keys[d->keysz])
827 			err(EXIT_FAILURE, "malloc");
828 		memcpy(d->keys[d->keysz], start, sz);
829 		d->keys[d->keysz][sz] = '\0';
830 		d->keysz++;
831 
832 		/* Hash the keyword. */
833 		ent.key = d->keys[d->keysz - 1];
834 		ent.data = d;
835 		(void)hsearch(ent, ENTER);
836 	}
837 
838 	/*
839 	 * Now extract all `Nm' values for this document.
840 	 * We only use CPP and C references, and hope for the best when
841 	 * doing so.
842 	 * Enter each one of these as a searchable keyword.
843 	 */
844 	TAILQ_FOREACH(first, &d->dcqhead, entries) {
845 		if (DECLTYPE_CPP != first->type &&
846 		    DECLTYPE_C != first->type)
847 			continue;
848 		grok_name(first, &start, &sz);
849 		if (NULL == start)
850 			continue;
851 		d->nms = realloc(d->nms,
852 			(d->nmsz + 1) * sizeof(char *));
853 		if (NULL == d->nms)
854 			err(EXIT_FAILURE, "realloc");
855 		d->nms[d->nmsz] = malloc(sz + 1);
856 		if (NULL == d->nms[d->nmsz])
857 			err(EXIT_FAILURE, "malloc");
858 		memcpy(d->nms[d->nmsz], start, sz);
859 		d->nms[d->nmsz][sz] = '\0';
860 		d->nmsz++;
861 
862 		/* Hash the name. */
863 		ent.key = d->nms[d->nmsz - 1];
864 		ent.data = d;
865 		(void)hsearch(ent, ENTER);
866 	}
867 
868 	if (0 == d->nmsz) {
869 		warnx("%s:%zu: couldn't deduce "
870 			"any names", d->fn, d->ln);
871 		return;
872 	}
873 
874 	/*
875 	 * Next, scan for all `Xr' values.
876 	 * We'll add more to this list later.
877 	 */
878 	for (i = 0; i < d->seealsosz; i++) {
879 		/*
880 		 * Find next value starting with `['.
881 		 * There's other stuff in there (whitespace or
882 		 * free text leading up to these) that we're ok
883 		 * to ignore.
884 		 */
885 		while (i < d->seealsosz && '[' != d->seealso[i])
886 			i++;
887 		if (i == d->seealsosz)
888 			break;
889 
890 		/*
891 		 * Now scan for the matching `]'.
892 		 * We can also have a vertical bar if we're separating a
893 		 * keyword and its shown name.
894 		 */
895 		start = &d->seealso[++i];
896 		sz = 0;
897 		while (i < d->seealsosz &&
898 		      ']' != d->seealso[i] &&
899 		      '|' != d->seealso[i]) {
900 			i++;
901 			sz++;
902 		}
903 		if (i == d->seealsosz)
904 			break;
905 		if (0 == sz)
906 			continue;
907 
908 		/*
909 		 * Continue on to the end-of-reference, if we weren't
910 		 * there to begin with.
911 		 */
912 		if (']' != d->seealso[i])
913 			while (i < d->seealsosz &&
914 			      ']' != d->seealso[i])
915 				i++;
916 
917 		/* Strip trailing whitespace. */
918 		while (sz > 1 && ' ' == start[sz - 1])
919 			sz--;
920 
921 		/* Strip trailing parenthesis. */
922 		if (sz > 2 &&
923 		    '(' == start[sz - 2] &&
924 	 	    ')' == start[sz - 1])
925 			sz -= 2;
926 
927 		d->xrs = realloc(d->xrs,
928 			(d->xrsz + 1) * sizeof(char *));
929 		if (NULL == d->xrs)
930 			err(EXIT_FAILURE, "realloc");
931 		d->xrs[d->xrsz] = malloc(sz + 1);
932 		if (NULL == d->xrs[d->xrsz])
933 			err(EXIT_FAILURE, "malloc");
934 		memcpy(d->xrs[d->xrsz], start, sz);
935 		d->xrs[d->xrsz][sz] = '\0';
936 		d->xrsz++;
937 	}
938 
939 	/*
940 	 * Next, extract all references.
941 	 * We'll accumulate these into a list of SEE ALSO tags, after.
942 	 * See how these are parsed above for a description: this is
943 	 * basically the same thing.
944 	 */
945 	for (i = 0; i < d->descsz; i++) {
946 		if ('[' != d->desc[i])
947 			continue;
948 		i++;
949 		if ('[' == d->desc[i])
950 			continue;
951 
952 		start = &d->desc[i];
953 		for (sz = 0; i < d->descsz; i++, sz++)
954 			if (']' == d->desc[i] ||
955 			    '|' == d->desc[i])
956 				break;
957 
958 		if (i == d->descsz)
959 			break;
960 		else if (sz == 0)
961 			continue;
962 
963 		if (']' != d->desc[i])
964 			while (i < d->descsz &&
965 			      ']' != d->desc[i])
966 				i++;
967 
968 		while (sz > 1 && ' ' == start[sz - 1])
969 			sz--;
970 
971 		if (sz > 2 &&
972 		    '(' == start[sz - 2] &&
973 		    ')' == start[sz - 1])
974 			sz -= 2;
975 
976 		d->xrs = realloc(d->xrs,
977 			(d->xrsz + 1) * sizeof(char *));
978 		if (NULL == d->xrs)
979 			err(EXIT_FAILURE, "realloc");
980 		d->xrs[d->xrsz] = malloc(sz + 1);
981 		if (NULL == d->xrs[d->xrsz])
982 			err(EXIT_FAILURE, "malloc");
983 		memcpy(d->xrs[d->xrsz], start, sz);
984 		d->xrs[d->xrsz][sz] = '\0';
985 		d->xrsz++;
986 	}
987 
988 	qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp);
989 	d->postprocessed = 1;
990 }
991 
992 /*
993  * Convenience function to look up a keyword.
994  * Returns the keyword's file if found or NULL.
995  */
996 static const char *
997 lookup(char *key)
998 {
999 	ENTRY		 ent;
1000 	ENTRY		*res;
1001 	struct defn	*d;
1002 
1003 	ent.key = key;
1004 	res = hsearch(ent, FIND);
1005 	if (NULL == res)
1006 		return(NULL);
1007 	d = (struct defn *)res->data;
1008 	if (0 == d->nmsz)
1009 		return(NULL);
1010 	assert(NULL != d->nms[0]);
1011 	return(d->nms[0]);
1012 }
1013 
1014 /*
1015  * Emit a valid mdoc(7) document within the given prefix.
1016  */
1017 static void
1018 emit(const struct defn *d, const char *mdocdate)
1019 {
1020 	struct decl	*first;
1021 	size_t		 sz, i, col, last, ns;
1022 	FILE		*f;
1023 	char		*cp;
1024 	const char	*res, *lastres, *args, *str, *end;
1025 	enum tag	 tag;
1026 	enum preproc	 pre;
1027 
1028 	if ( ! d->postprocessed) {
1029 		warnx("%s:%zu: interface has errors, not "
1030 			"producing manpage", d->fn, d->ln);
1031 		return;
1032 	}
1033 
1034 	if (0 == nofile) {
1035 		if (NULL == (f = fopen(d->fname, "w"))) {
1036 			warn("%s: fopen", d->fname);
1037 			return;
1038 		}
1039 	} else
1040 		f = stdout;
1041 
1042 	/* Begin by outputting the mdoc(7) header. */
1043 #if 0
1044 	fputs(".Dd $" "Mdocdate$\n", f);
1045 #else
1046 	fprintf(f, ".Dd %s\n", mdocdate);
1047 #endif
1048 	fprintf(f, ".Dt %s 3\n", d->dt);
1049 	fputs(".Os\n", f);
1050 	fputs(".Sh NAME\n", f);
1051 
1052 	/* Now print the name bits of each declaration. */
1053 	for (i = 0; i < d->nmsz; i++)
1054 		fprintf(f, ".Nm %s%s\n", d->nms[i],
1055 			i < d->nmsz - 1 ? " ," : "");
1056 
1057 	fprintf(f, ".Nd %s\n", d->name);
1058 	fputs(".Sh SYNOPSIS\n", f);
1059 
1060 	TAILQ_FOREACH(first, &d->dcqhead, entries) {
1061 		if (DECLTYPE_CPP != first->type &&
1062 		    DECLTYPE_C != first->type)
1063 			continue;
1064 
1065 		/* Easy: just print the CPP name. */
1066 		if (DECLTYPE_CPP == first->type) {
1067 			fprintf(f, ".Fd #define %s\n",
1068 				first->text);
1069 			continue;
1070 		}
1071 
1072 		/* First, strip out the sqlite CPPs. */
1073 		for (i = 0; i < first->textsz; ) {
1074 			for (pre = 0; pre < PREPROC__MAX; pre++) {
1075 				sz = strlen(preprocs[pre]);
1076 				if (strncmp(preprocs[pre],
1077 				    &first->text[i], sz))
1078 					continue;
1079 				i += sz;
1080 				while (isspace((int)first->text[i]))
1081 					i++;
1082 				break;
1083 			}
1084 			if (pre == PREPROC__MAX)
1085 				break;
1086 		}
1087 
1088 		/* If we're a typedef, immediately print Vt. */
1089 		if (0 == strncmp(&first->text[i], "typedef", 7)) {
1090 			fprintf(f, ".Vt %s\n", &first->text[i]);
1091 			continue;
1092 		}
1093 
1094 		/* Are we a struct? */
1095 		if (first->textsz > 2 &&
1096 		    '}' == first->text[first->textsz - 2] &&
1097 		    NULL != (cp = strchr(&first->text[i], '{'))) {
1098 			*cp = '\0';
1099 			fprintf(f, ".Vt %s;\n", &first->text[i]);
1100 			/* Restore brace for later usage. */
1101 			*cp = '{';
1102 			continue;
1103 		}
1104 
1105 		/* Catch remaining non-functions. */
1106 		if (first->textsz > 2 &&
1107 		    ')' != first->text[first->textsz - 2]) {
1108 			fprintf(f, ".Vt %s\n", &first->text[i]);
1109 			continue;
1110 		}
1111 
1112 		str = &first->text[i];
1113 		if (NULL == (args = strchr(str, '('))) {
1114 			/* What is this? */
1115 			fputs(".Bd -literal\n", f);
1116 			fputs(&first->text[i], f);
1117 			fputs("\n.Ed\n", f);
1118 			continue;
1119 		}
1120 
1121 		/* Scroll back to end of function name. */
1122 		end = args - 1;
1123 		while (end > str && isspace((int)*end))
1124 			end--;
1125 
1126 		/* Scroll back to what comes before. */
1127 		for ( ; end > str; end--)
1128 			if (isspace((int)*end) || '*' == *end)
1129 				break;
1130 
1131 		/*
1132 		 * If we can't find what came before, then the function
1133 		 * has no type, which is odd... let's just call it void.
1134 		 */
1135 		if (end > str) {
1136 			fprintf(f, ".Ft %.*s\n",
1137 				(int)(end - str + 1), str);
1138 			fprintf(f, ".Fo %.*s\n",
1139 				(int)(args - end - 1), end + 1);
1140 		} else {
1141 			fputs(".Ft void\n", f);
1142 			fprintf(f, ".Fo %.*s\n", (int)(args - end), end);
1143 		}
1144 
1145 		/*
1146 		 * Convert function arguments into `Fa' clauses.
1147 		 * This also handles nested function pointers, which
1148 		 * would otherwise throw off the delimeters.
1149 		 */
1150 		for (;;) {
1151 			str = ++args;
1152 			while (isspace((int)*str))
1153 				str++;
1154 			fputs(".Fa \"", f);
1155 			ns = 0;
1156 			while ('\0' != *str &&
1157 			       (ns || ',' != *str) &&
1158 			       (ns || ')' != *str)) {
1159 				if ('/' == str[0] && '*' == str[1]) {
1160 					str += 2;
1161 					for ( ; '\0' != str[0]; str++)
1162 						if ('*' == str[0] && '/' == str[1])
1163 							break;
1164 					if ('\0' == *str)
1165 						break;
1166 					str += 2;
1167 					while (isspace((int)*str))
1168 						str++;
1169 					if ('\0' == *str ||
1170 					    (0 == ns && ',' == *str) ||
1171 					    (0 == ns && ')' == *str))
1172 						break;
1173 				}
1174 				if ('(' == *str)
1175 					ns++;
1176 				else if (')' == *str)
1177 					ns--;
1178 				fputc(*str, f);
1179 				str++;
1180 			}
1181 			fputs("\"\n", f);
1182 			if ('\0' == *str || ')' == *str)
1183 				break;
1184 			args = str;
1185 		}
1186 
1187 		fputs(".Fc\n", f);
1188 	}
1189 
1190 	fputs(".Sh DESCRIPTION\n", f);
1191 
1192 	/*
1193 	 * Strip the crap out of the description.
1194 	 * "Crap" consists of things I don't understand that mess up
1195 	 * parsing of the HTML, for instance,
1196 	 *   <dl>[[foo bar]]<dt>foo bar</dt>...</dl>
1197 	 * These are not well-formed HTML.
1198 	 */
1199 	for (i = 0; i < d->descsz; i++) {
1200 		if ('^' == d->desc[i] &&
1201 		    '(' == d->desc[i + 1]) {
1202 			d->desc[i] = d->desc[i + 1] = ' ';
1203 			i++;
1204 			continue;
1205 		} else if (')' == d->desc[i] &&
1206 			   '^' == d->desc[i + 1]) {
1207 			d->desc[i] = d->desc[i + 1] = ' ';
1208 			i++;
1209 			continue;
1210 		} else if ('^' == d->desc[i]) {
1211 			d->desc[i] = ' ';
1212 			continue;
1213 		} else if ('[' != d->desc[i] ||
1214 			   '[' != d->desc[i + 1])
1215 			continue;
1216 		d->desc[i] = d->desc[i + 1] = ' ';
1217 		for (i += 2; i < d->descsz; i++) {
1218 			if (']' == d->desc[i] &&
1219 			    ']' == d->desc[i + 1])
1220 				break;
1221 			d->desc[i] = ' ';
1222 		}
1223 		if (i == d->descsz)
1224 			continue;
1225 		d->desc[i] = d->desc[i + 1] = ' ';
1226 		i++;
1227 	}
1228 
1229 	/*
1230 	 * Here we go!
1231 	 * Print out the description as best we can.
1232 	 * Do on-the-fly processing of any HTML we encounter into
1233 	 * mdoc(7) and try to break lines up.
1234 	 */
1235 	col = 0;
1236 	for (i = 0; i < d->descsz; ) {
1237 		/*
1238 		 * Newlines are paragraph breaks.
1239 		 * If we have multiple newlines, then keep to a single
1240 		 * `Pp' to keep it clean.
1241 		 * Only do this if we're not before a block-level HTML,
1242 		 * as this would mean, for instance, a `Pp'-`Bd' pair.
1243 		 */
1244 		if ('\n' == d->desc[i]) {
1245 			while (isspace((int)d->desc[i]))
1246 				i++;
1247 			for (tag = 0; tag < TAG__MAX; tag++) {
1248 				sz = strlen(tags[tag].html);
1249 				if (0 == strncmp(&d->desc[i], tags[tag].html, sz))
1250 					break;
1251 			}
1252 			if (TAG__MAX == tag ||
1253 			    TAGINFO_INLINE & tags[tag].flags) {
1254 				if (col > 0)
1255 					fputs("\n", f);
1256 				fputs(".Pp\n", f);
1257 				/* We're on a new line. */
1258 				col = 0;
1259 			}
1260 			continue;
1261 		}
1262 
1263 		/*
1264 		 * New sentence, new line.
1265 		 * We guess whether this is the case by using the
1266 		 * dumbest possible heuristic.
1267 		 */
1268 		if (' ' == d->desc[i] && i &&
1269 		    '.' == d->desc[i - 1]) {
1270 			while (' ' == d->desc[i])
1271 				i++;
1272 			fputs("\n", f);
1273 			col = 0;
1274 			continue;
1275 		}
1276 		/*
1277 		 * After 65 characters, force a break when we encounter
1278 		 * white-space to keep our lines more or less tidy.
1279 		 */
1280 		if (col > 65 && ' ' == d->desc[i]) {
1281 			while (' ' == d->desc[i])
1282 				i++;
1283 			fputs("\n", f);
1284 			col = 0;
1285 			continue;
1286 		}
1287 
1288 		/*
1289 		 * Parsing HTML tags.
1290 		 * Why, sqlite guys, couldn't you have used something
1291 		 * like markdown or something?
1292 		 * Sheesh.
1293 		 */
1294 		if ('<' == d->desc[i]) {
1295 			for (tag = 0; tag < TAG__MAX; tag++) {
1296 				sz = strlen(tags[tag].html);
1297 				if (strncmp(&d->desc[i],
1298 				    tags[tag].html, sz))
1299 					continue;
1300 				/*
1301 				 * NOOP tags don't do anything, such as
1302 				 * the case of `</dd>', which only
1303 				 * serves to end an `It' block that will
1304 				 * be closed out by a subsequent `It' or
1305 				 * end of clause `El' anyway.
1306 				 * Skip the trailing space.
1307 				 */
1308 				if (TAGINFO_NOOP & tags[tag].flags) {
1309 					i += sz;
1310 					while (isspace((int)d->desc[i]))
1311 						i++;
1312 					break;
1313 				} else if (TAGINFO_INLINE & tags[tag].flags) {
1314 					fputs(tags[tag].mdoc, f);
1315 					i += sz;
1316 					break;
1317 				}
1318 
1319 				/*
1320 				 * A breaking mdoc(7) statement.
1321 				 * Break the current line, output the
1322 				 * macro, and conditionally break
1323 				 * following that (or we might do
1324 				 * nothing at all).
1325 				 */
1326 				if (col > 0) {
1327 					fputs("\n", f);
1328 					col = 0;
1329 				}
1330 				fputs(tags[tag].mdoc, f);
1331 				if ( ! (TAGINFO_NOBR & tags[tag].flags)) {
1332 					fputs("\n", f);
1333 					col = 0;
1334 				} else if ( ! (TAGINFO_NOSP & tags[tag].flags)) {
1335 					fputs(" ", f);
1336 					col++;
1337 				}
1338 				i += sz;
1339 				while (isspace((int)d->desc[i]))
1340 					i++;
1341 				break;
1342 			}
1343 			if (tag < TAG__MAX)
1344 				continue;
1345 		} else if ('[' == d->desc[i] &&
1346 			   ']' != d->desc[i + 1]) {
1347 			/* Do we start at the bracket or bar? */
1348 			for (sz = i + 1; sz < d->descsz; sz++)
1349 				if ('|' == d->desc[sz] ||
1350 				    ']' == d->desc[sz])
1351 					break;
1352 
1353 			if (sz == d->descsz)
1354 				continue;
1355 			else if ('|' == d->desc[sz])
1356 				i = sz + 1;
1357 			else
1358 				i = i + 1;
1359 
1360 			/*
1361 			 * Now handle in-page references.
1362 			 * Print them out as-is: we've already
1363 			 * accumulated them into our "SEE ALSO" values,
1364 			 * which we'll use below.
1365 			 */
1366 			for ( ; i < d->descsz; i++, col++) {
1367 				if (']' == d->desc[i]) {
1368 					i++;
1369 					break;
1370 				}
1371 				fputc(d->desc[i], f);
1372 				col++;
1373 			}
1374 			continue;
1375 		}
1376 
1377 		if (' ' == d->desc[i] && 0 == col) {
1378 			while (' ' == d->desc[i])
1379 				i++;
1380 			continue;
1381 		}
1382 
1383 		assert('\n' != d->desc[i]);
1384 
1385 		/*
1386 		 * Handle some oddities.
1387 		 * The following HTML escapes exist in the output that I
1388 		 * could find.
1389 		 * There might be others...
1390 		 */
1391 		if (0 == strncmp(&d->desc[i], "&nbsp;", 6)) {
1392 			i += 6;
1393 			fputc(' ', f);
1394 		} else if (0 == strncmp(&d->desc[i], "&lt;", 4)) {
1395 			i += 4;
1396 			fputc('<', f);
1397 		} else if (0 == strncmp(&d->desc[i], "&gt;", 4)) {
1398 			i += 4;
1399 			fputc('>', f);
1400 		} else if (0 == strncmp(&d->desc[i], "&#91;", 5)) {
1401 			i += 5;
1402 			fputc('[', f);
1403 		} else {
1404 			/* Make sure we don't trigger a macro. */
1405 			if (0 == col && '.' == d->desc[i])
1406 				fputs("\\&", f);
1407 			fputc(d->desc[i], f);
1408 			i++;
1409 		}
1410 
1411 		col++;
1412 	}
1413 
1414 	if (col > 0)
1415 		fputs("\n", f);
1416 
1417 	if (d->xrsz > 0) {
1418 		/*
1419 		 * Look up all of our keywords (which are in the xrs
1420 		 * field) in the table of all known keywords.
1421 		 * Don't print duplicates.
1422 		 */
1423 		lastres = NULL;
1424 		for (last = 0, i = 0; i < d->xrsz; i++) {
1425 			res = lookup(d->xrs[i]);
1426 			/* Ignore self-reference. */
1427 			if (res == d->nms[0] && verbose)
1428 				warnx("%s:%zu: self-reference: %s",
1429 					d->fn, d->ln, d->xrs[i]);
1430 			if (res == d->nms[0] && verbose)
1431 				continue;
1432 			if (NULL == res && verbose)
1433 				warnx("%s:%zu: ref not found: %s",
1434 					d->fn, d->ln, d->xrs[i]);
1435 			if (NULL == res)
1436 				continue;
1437 
1438 			/* Ignore duplicates. */
1439 			if (NULL != lastres && lastres == res)
1440 				continue;
1441 			if (last)
1442 				fputs(" ,\n", f);
1443 			else
1444 				fputs(".Sh SEE ALSO\n", f);
1445 			fprintf(f, ".Xr %s 3", res);
1446 			last = 1;
1447 			lastres = res;
1448 		}
1449 		if (last)
1450 			fputs("\n", f);
1451 	}
1452 
1453 	if (0 == nofile)
1454 		fclose(f);
1455 }
1456 
1457 int
1458 main(int argc, char *argv[])
1459 {
1460 	size_t		 i, len;
1461 	FILE		*f;
1462 	char		*cp;
1463 	const char	*prefix;
1464 	struct parse	 p;
1465 	int		 rc, ch;
1466 	struct defn	*d;
1467 	struct decl	*e;
1468 
1469 	rc = 0;
1470 	prefix = ".";
1471 	f = stdin;
1472 	memset(&p, 0, sizeof(struct parse));
1473 	p.fn = "<stdin>";
1474 	p.ln = 0;
1475 	p.phase = PHASE_INIT;
1476 	TAILQ_INIT(&p.dqhead);
1477 
1478 	while (-1 != (ch = getopt(argc, argv, "np:v")))
1479 		switch (ch) {
1480 		case ('n'):
1481 			nofile = 1;
1482 			break;
1483 		case ('p'):
1484 			prefix = optarg;
1485 			break;
1486 		case ('v'):
1487 			verbose = 1;
1488 			break;
1489 		default:
1490 			goto usage;
1491 		}
1492 
1493 	time_t now = time(NULL);
1494 	struct tm tm;
1495 	char mdocdate[256];
1496 	if (gmtime_r(&now, &tm) == NULL)
1497 		err(EXIT_FAILURE, "gmtime");
1498 	strftime(mdocdate, sizeof(mdocdate), "%B %d, %Y", &tm);
1499 	/*
1500 	 * Read in line-by-line and process in the phase dictated by our
1501 	 * finite state automaton.
1502 	 */
1503 	while (NULL != (cp = fgetln(f, &len))) {
1504 		assert(len > 0);
1505 		p.ln++;
1506 		if ('\n' != cp[len - 1]) {
1507 			warnx("%s:%zu: unterminated line", p.fn, p.ln);
1508 			break;
1509 		}
1510 		cp[--len] = '\0';
1511 		/* Lines are always nil-terminated. */
1512 		switch (p.phase) {
1513 		case (PHASE_INIT):
1514 			init(&p, cp);
1515 			break;
1516 		case (PHASE_KEYS):
1517 			keys(&p, cp, len);
1518 			break;
1519 		case (PHASE_DESC):
1520 			desc(&p, cp, len);
1521 			break;
1522 		case (PHASE_SEEALSO):
1523 			seealso(&p, cp, len);
1524 			break;
1525 		case (PHASE_DECL):
1526 			decl(&p, cp, len);
1527 			break;
1528 		}
1529 	}
1530 
1531 	/*
1532 	 * If we hit the last line, then try to process.
1533 	 * Otherwise, we failed along the way.
1534 	 */
1535 	if (NULL == cp) {
1536 		/*
1537 		 * Allow us to be at the declarations or scanning for
1538 		 * the next clause.
1539 		 */
1540 		if (PHASE_INIT == p.phase ||
1541 		    PHASE_DECL == p.phase) {
1542 			if (0 == hcreate(5000))
1543 				err(EXIT_FAILURE, "hcreate");
1544 			TAILQ_FOREACH(d, &p.dqhead, entries)
1545 				postprocess(prefix, d);
1546 			TAILQ_FOREACH(d, &p.dqhead, entries)
1547 				emit(d, mdocdate);
1548 			rc = 1;
1549 		} else if (PHASE_DECL != p.phase)
1550 			warnx("%s:%zu: exit when not in "
1551 				"initial state", p.fn, p.ln);
1552 	}
1553 
1554 	while ( ! TAILQ_EMPTY(&p.dqhead)) {
1555 		d = TAILQ_FIRST(&p.dqhead);
1556 		TAILQ_REMOVE(&p.dqhead, d, entries);
1557 		while ( ! TAILQ_EMPTY(&d->dcqhead)) {
1558 			e = TAILQ_FIRST(&d->dcqhead);
1559 			TAILQ_REMOVE(&d->dcqhead, e, entries);
1560 			free(e->text);
1561 			free(e);
1562 		}
1563 		free(d->name);
1564 		free(d->desc);
1565 		free(d->dt);
1566 		for (i = 0; i < d->nmsz; i++)
1567 			free(d->nms[i]);
1568 		for (i = 0; i < d->xrsz; i++)
1569 			free(d->xrs[i]);
1570 		for (i = 0; i < d->keysz; i++)
1571 			free(d->keys[i]);
1572 		free(d->keys);
1573 		free(d->nms);
1574 		free(d->xrs);
1575 		free(d->fname);
1576 		free(d->seealso);
1577 		free(d->keybuf);
1578 		free(d);
1579 	}
1580 
1581 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1582 usage:
1583 	fprintf(stderr, "usage: %s [-nv] [-p prefix]\n", getprogname());
1584 	return(EXIT_FAILURE);
1585 }
1586