xref: /netbsd-src/external/bsd/mdocml/dist/mandocdb.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$Vendor-Id: mandocdb.c,v 1.6 2011/09/17 13:54:27 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <sys/param.h>
22 
23 #include <assert.h>
24 #include <dirent.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #ifdef __linux__
33 # include <db_185.h>
34 #else
35 # include <db.h>
36 #endif
37 
38 #include "man.h"
39 #include "mdoc.h"
40 #include "mandoc.h"
41 
42 #define	MANDOC_DB	 "mandoc.db"
43 #define	MANDOC_IDX	 "mandoc.index"
44 #define	MANDOC_BUFSZ	  BUFSIZ
45 #define	MANDOC_SLOP	  1024
46 
47 /* Bit-fields.  See mandocdb.8. */
48 
49 #define TYPE_NAME	  0x01
50 #define TYPE_FUNCTION	  0x02
51 #define TYPE_UTILITY	  0x04
52 #define TYPE_INCLUDES	  0x08
53 #define TYPE_VARIABLE	  0x10
54 #define TYPE_STANDARD	  0x20
55 #define TYPE_AUTHOR	  0x40
56 #define TYPE_CONFIG	  0x80
57 #define TYPE_DESC	  0x100
58 #define TYPE_XREF	  0x200
59 #define TYPE_PATH	  0x400
60 #define TYPE_ENV	  0x800
61 #define TYPE_ERR	  0x1000
62 
63 /* Tiny list for files.  No need to bring in QUEUE. */
64 
65 struct	of {
66 	char		 *fname; /* heap-allocated */
67 	struct of	 *next; /* NULL for last one */
68 	struct of	 *first; /* first in list */
69 };
70 
71 /* Buffer for storing growable data. */
72 
73 struct	buf {
74 	char		 *cp;
75 	size_t		  len; /* current length */
76 	size_t		  size; /* total buffer size */
77 };
78 
79 /* Operation we're going to perform. */
80 
81 enum	op {
82 	OP_NEW = 0, /* new database */
83 	OP_UPDATE, /* delete/add entries in existing database */
84 	OP_DELETE /* delete entries from existing database */
85 };
86 
87 #define	MAN_ARGS	  DB *hash, \
88 			  struct buf *buf, \
89 			  struct buf *dbuf, \
90 			  const struct man_node *n
91 #define	MDOC_ARGS	  DB *hash, \
92 			  struct buf *buf, \
93 			  struct buf *dbuf, \
94 			  const struct mdoc_node *n, \
95 			  const struct mdoc_meta *m
96 
97 static	void		  buf_appendmdoc(struct buf *,
98 				const struct mdoc_node *, int);
99 static	void		  buf_append(struct buf *, const char *);
100 static	void		  buf_appendb(struct buf *,
101 				const void *, size_t);
102 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
103 static	void		  hash_put(DB *, const struct buf *, int);
104 static	void		  hash_reset(DB **);
105 static	void		  index_merge(const struct of *, struct mparse *,
106 				struct buf *, struct buf *,
107 				DB *, DB *, const char *,
108 				DB *, const char *, int,
109 				recno_t, const recno_t *, size_t);
110 static	void		  index_prune(const struct of *, DB *,
111 				const char *, DB *, const char *,
112 				int, recno_t *, recno_t **, size_t *);
113 static	void		  ofile_argbuild(char *[], int, int, struct of **);
114 static	int		  ofile_dirbuild(const char *, int, struct of **);
115 static	void		  ofile_free(struct of *);
116 static	int		  pman_node(MAN_ARGS);
117 static	void		  pmdoc_node(MDOC_ARGS);
118 static	void		  pmdoc_An(MDOC_ARGS);
119 static	void		  pmdoc_Cd(MDOC_ARGS);
120 static	void		  pmdoc_Er(MDOC_ARGS);
121 static	void		  pmdoc_Ev(MDOC_ARGS);
122 static	void		  pmdoc_Fd(MDOC_ARGS);
123 static	void		  pmdoc_In(MDOC_ARGS);
124 static	void		  pmdoc_Fn(MDOC_ARGS);
125 static	void		  pmdoc_Fo(MDOC_ARGS);
126 static	void		  pmdoc_Nd(MDOC_ARGS);
127 static	void		  pmdoc_Nm(MDOC_ARGS);
128 static	void		  pmdoc_Pa(MDOC_ARGS);
129 static	void		  pmdoc_St(MDOC_ARGS);
130 static	void		  pmdoc_Vt(MDOC_ARGS);
131 static	void		  pmdoc_Xr(MDOC_ARGS);
132 static	void		  usage(void);
133 
134 typedef	void		(*pmdoc_nf)(MDOC_ARGS);
135 
136 static	const pmdoc_nf	  mdocs[MDOC_MAX] = {
137 	NULL, /* Ap */
138 	NULL, /* Dd */
139 	NULL, /* Dt */
140 	NULL, /* Os */
141 	NULL, /* Sh */
142 	NULL, /* Ss */
143 	NULL, /* Pp */
144 	NULL, /* D1 */
145 	NULL, /* Dl */
146 	NULL, /* Bd */
147 	NULL, /* Ed */
148 	NULL, /* Bl */
149 	NULL, /* El */
150 	NULL, /* It */
151 	NULL, /* Ad */
152 	pmdoc_An, /* An */
153 	NULL, /* Ar */
154 	pmdoc_Cd, /* Cd */
155 	NULL, /* Cm */
156 	NULL, /* Dv */
157 	pmdoc_Er, /* Er */
158 	pmdoc_Ev, /* Ev */
159 	NULL, /* Ex */
160 	NULL, /* Fa */
161 	pmdoc_Fd, /* Fd */
162 	NULL, /* Fl */
163 	pmdoc_Fn, /* Fn */
164 	NULL, /* Ft */
165 	NULL, /* Ic */
166 	pmdoc_In, /* In */
167 	NULL, /* Li */
168 	pmdoc_Nd, /* Nd */
169 	pmdoc_Nm, /* Nm */
170 	NULL, /* Op */
171 	NULL, /* Ot */
172 	pmdoc_Pa, /* Pa */
173 	NULL, /* Rv */
174 	pmdoc_St, /* St */
175 	pmdoc_Vt, /* Va */
176 	pmdoc_Vt, /* Vt */
177 	pmdoc_Xr, /* Xr */
178 	NULL, /* %A */
179 	NULL, /* %B */
180 	NULL, /* %D */
181 	NULL, /* %I */
182 	NULL, /* %J */
183 	NULL, /* %N */
184 	NULL, /* %O */
185 	NULL, /* %P */
186 	NULL, /* %R */
187 	NULL, /* %T */
188 	NULL, /* %V */
189 	NULL, /* Ac */
190 	NULL, /* Ao */
191 	NULL, /* Aq */
192 	NULL, /* At */
193 	NULL, /* Bc */
194 	NULL, /* Bf */
195 	NULL, /* Bo */
196 	NULL, /* Bq */
197 	NULL, /* Bsx */
198 	NULL, /* Bx */
199 	NULL, /* Db */
200 	NULL, /* Dc */
201 	NULL, /* Do */
202 	NULL, /* Dq */
203 	NULL, /* Ec */
204 	NULL, /* Ef */
205 	NULL, /* Em */
206 	NULL, /* Eo */
207 	NULL, /* Fx */
208 	NULL, /* Ms */
209 	NULL, /* No */
210 	NULL, /* Ns */
211 	NULL, /* Nx */
212 	NULL, /* Ox */
213 	NULL, /* Pc */
214 	NULL, /* Pf */
215 	NULL, /* Po */
216 	NULL, /* Pq */
217 	NULL, /* Qc */
218 	NULL, /* Ql */
219 	NULL, /* Qo */
220 	NULL, /* Qq */
221 	NULL, /* Re */
222 	NULL, /* Rs */
223 	NULL, /* Sc */
224 	NULL, /* So */
225 	NULL, /* Sq */
226 	NULL, /* Sm */
227 	NULL, /* Sx */
228 	NULL, /* Sy */
229 	NULL, /* Tn */
230 	NULL, /* Ux */
231 	NULL, /* Xc */
232 	NULL, /* Xo */
233 	pmdoc_Fo, /* Fo */
234 	NULL, /* Fc */
235 	NULL, /* Oo */
236 	NULL, /* Oc */
237 	NULL, /* Bk */
238 	NULL, /* Ek */
239 	NULL, /* Bt */
240 	NULL, /* Hf */
241 	NULL, /* Fr */
242 	NULL, /* Ud */
243 	NULL, /* Lb */
244 	NULL, /* Lp */
245 	NULL, /* Lk */
246 	NULL, /* Mt */
247 	NULL, /* Brq */
248 	NULL, /* Bro */
249 	NULL, /* Brc */
250 	NULL, /* %C */
251 	NULL, /* Es */
252 	NULL, /* En */
253 	NULL, /* Dx */
254 	NULL, /* %Q */
255 	NULL, /* br */
256 	NULL, /* sp */
257 	NULL, /* %U */
258 	NULL, /* Ta */
259 };
260 
261 static	const char	 *progname;
262 
263 int
264 main(int argc, char *argv[])
265 {
266 	struct mparse	*mp; /* parse sequence */
267 	enum op		 op; /* current operation */
268 	const char	*dir;
269 	char		 ibuf[MAXPATHLEN], /* index fname */
270 			 fbuf[MAXPATHLEN];  /* btree fname */
271 	int		 verb, /* output verbosity */
272 			 ch, i, flags;
273 	DB		*idx, /* index database */
274 			*db, /* keyword database */
275 			*hash; /* temporary keyword hashtable */
276 	BTREEINFO	 info; /* btree configuration */
277 	recno_t		 maxrec; /* supremum of all records */
278 	recno_t		*recs; /* buffer of empty records */
279 	size_t		 sz1, sz2,
280 			 recsz, /* buffer size of recs */
281 			 reccur; /* valid number of recs */
282 	struct buf	 buf, /* keyword buffer */
283 			 dbuf; /* description buffer */
284 	struct of	*of; /* list of files for processing */
285 	extern int	 optind;
286 	extern char	*optarg;
287 
288 	progname = strrchr(argv[0], '/');
289 	if (progname == NULL)
290 		progname = argv[0];
291 	else
292 		++progname;
293 
294 	verb = 0;
295 	of = NULL;
296 	db = idx = NULL;
297 	mp = NULL;
298 	hash = NULL;
299 	recs = NULL;
300 	recsz = reccur = 0;
301 	maxrec = 0;
302 	op = OP_NEW;
303 	dir = NULL;
304 
305 	while (-1 != (ch = getopt(argc, argv, "d:u:v")))
306 		switch (ch) {
307 		case ('d'):
308 			dir = optarg;
309 			op = OP_UPDATE;
310 			break;
311 		case ('u'):
312 			dir = optarg;
313 			op = OP_DELETE;
314 			break;
315 		case ('v'):
316 			verb++;
317 			break;
318 		default:
319 			usage();
320 			return((int)MANDOCLEVEL_BADARG);
321 		}
322 
323 	argc -= optind;
324 	argv += optind;
325 
326 	memset(&info, 0, sizeof(BTREEINFO));
327 	info.flags = R_DUP;
328 
329 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
330 
331 	memset(&buf, 0, sizeof(struct buf));
332 	memset(&dbuf, 0, sizeof(struct buf));
333 
334 	buf.size = dbuf.size = MANDOC_BUFSZ;
335 
336 	buf.cp = mandoc_malloc(buf.size);
337 	dbuf.cp = mandoc_malloc(dbuf.size);
338 
339 	flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
340 
341 	if (OP_UPDATE == op || OP_DELETE == op) {
342 		ibuf[0] = fbuf[0] = '\0';
343 
344 		strlcat(fbuf, dir, MAXPATHLEN);
345 		strlcat(fbuf, "/", MAXPATHLEN);
346 		sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
347 
348 		strlcat(ibuf, dir, MAXPATHLEN);
349 		strlcat(ibuf, "/", MAXPATHLEN);
350 		sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
351 
352 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
353 			fprintf(stderr, "%s: Path too long\n", dir);
354 			exit((int)MANDOCLEVEL_BADARG);
355 		}
356 
357 		db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
358 		idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
359 
360 		if (NULL == db) {
361 			perror(fbuf);
362 			exit((int)MANDOCLEVEL_SYSERR);
363 		} else if (NULL == db) {
364 			perror(ibuf);
365 			exit((int)MANDOCLEVEL_SYSERR);
366 		}
367 
368 		if (verb > 2) {
369 			printf("%s: Opened\n", fbuf);
370 			printf("%s: Opened\n", ibuf);
371 		}
372 
373 		ofile_argbuild(argv, argc, verb, &of);
374 		if (NULL == of)
375 			goto out;
376 
377 		of = of->first;
378 
379 		index_prune(of, db, fbuf, idx, ibuf, verb,
380 				&maxrec, &recs, &recsz);
381 
382 		if (OP_UPDATE == op)
383 			index_merge(of, mp, &dbuf, &buf, hash,
384 					db, fbuf, idx, ibuf, verb,
385 					maxrec, recs, reccur);
386 
387 		goto out;
388 	}
389 
390 	for (i = 0; i < argc; i++) {
391 		ibuf[0] = fbuf[0] = '\0';
392 
393 		strlcat(fbuf, argv[i], MAXPATHLEN);
394 		strlcat(fbuf, "/", MAXPATHLEN);
395 		sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
396 
397 		strlcat(ibuf, argv[i], MAXPATHLEN);
398 		strlcat(ibuf, "/", MAXPATHLEN);
399 		sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
400 
401 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
402 			fprintf(stderr, "%s: Path too long\n", argv[i]);
403 			exit((int)MANDOCLEVEL_BADARG);
404 		}
405 
406 		db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
407 		idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
408 
409 		if (NULL == db) {
410 			perror(fbuf);
411 			exit((int)MANDOCLEVEL_SYSERR);
412 		} else if (NULL == db) {
413 			perror(ibuf);
414 			exit((int)MANDOCLEVEL_SYSERR);
415 		}
416 
417 		if (verb > 2) {
418 			printf("%s: Truncated\n", fbuf);
419 			printf("%s: Truncated\n", ibuf);
420 		}
421 
422 		ofile_free(of);
423 		of = NULL;
424 
425 		if ( ! ofile_dirbuild(argv[i], verb, &of))
426 			exit((int)MANDOCLEVEL_SYSERR);
427 
428 		if (NULL == of)
429 			continue;
430 
431 		of = of->first;
432 
433 		index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
434 				idx, ibuf, verb, maxrec, recs, reccur);
435 	}
436 
437 out:
438 	if (db)
439 		(*db->close)(db);
440 	if (idx)
441 		(*idx->close)(idx);
442 	if (hash)
443 		(*hash->close)(hash);
444 	if (mp)
445 		mparse_free(mp);
446 
447 	ofile_free(of);
448 	free(buf.cp);
449 	free(dbuf.cp);
450 	free(recs);
451 
452 	return(MANDOCLEVEL_OK);
453 }
454 
455 void
456 index_merge(const struct of *of, struct mparse *mp,
457 		struct buf *dbuf, struct buf *buf,
458 		DB *hash, DB *db, const char *dbf,
459 		DB *idx, const char *idxf, int verb,
460 		recno_t maxrec, const recno_t *recs, size_t reccur)
461 {
462 	recno_t		 rec;
463 	int		 ch;
464 	DBT		 key, val;
465 	struct mdoc	*mdoc;
466 	struct man	*man;
467 	const char	*fn, *msec, *mtitle, *arch;
468 	size_t		 sv;
469 	unsigned	 seq;
470 	char		 vbuf[8];
471 
472 	for (rec = 0; of; of = of->next) {
473 		fn = of->fname;
474 		if (reccur > 0) {
475 			--reccur;
476 			rec = recs[(int)reccur];
477 		} else if (maxrec > 0) {
478 			rec = maxrec;
479 			maxrec = 0;
480 		} else
481 			rec++;
482 
483 		mparse_reset(mp);
484 		hash_reset(&hash);
485 
486 		if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
487 			fprintf(stderr, "%s: Parse failure\n", fn);
488 			continue;
489 		}
490 
491 		mparse_result(mp, &mdoc, &man);
492 		if (NULL == mdoc && NULL == man)
493 			continue;
494 
495 		msec = NULL != mdoc ?
496 			mdoc_meta(mdoc)->msec : man_meta(man)->msec;
497 		mtitle = NULL != mdoc ?
498 			mdoc_meta(mdoc)->title : man_meta(man)->title;
499 		arch = NULL != mdoc ?
500 			mdoc_meta(mdoc)->arch : NULL;
501 
502 		if (NULL == arch)
503 			arch = "";
504 
505 		/*
506 		 * The index record value consists of a nil-terminated
507 		 * filename, a nil-terminated manual section, and a
508 		 * nil-terminated description.  Since the description
509 		 * may not be set, we set a sentinel to see if we're
510 		 * going to write a nil byte in its place.
511 		 */
512 
513 		dbuf->len = 0;
514 		buf_appendb(dbuf, fn, strlen(fn) + 1);
515 		buf_appendb(dbuf, msec, strlen(msec) + 1);
516 		buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
517 		buf_appendb(dbuf, arch, strlen(arch) + 1);
518 
519 		sv = dbuf->len;
520 
521 		/* Fix the record number in the btree value. */
522 
523 		if (mdoc)
524 			pmdoc_node(hash, buf, dbuf,
525 				mdoc_node(mdoc), mdoc_meta(mdoc));
526 		else
527 			pman_node(hash, buf, dbuf, man_node(man));
528 
529 		/*
530 		 * Copy from the in-memory hashtable of pending keywords
531 		 * into the database.
532 		 */
533 
534 		memset(vbuf, 0, sizeof(uint32_t));
535 		memcpy(vbuf + 4, &rec, sizeof(uint32_t));
536 
537 		seq = R_FIRST;
538 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
539 			seq = R_NEXT;
540 
541 			memcpy(vbuf, val.data, sizeof(uint32_t));
542 			val.size = sizeof(vbuf);
543 			val.data = vbuf;
544 
545 			if (verb > 1)
546 				printf("%s: Added keyword: %s\n",
547 						fn, (char *)key.data);
548 			dbt_put(db, dbf, &key, &val);
549 		}
550 		if (ch < 0) {
551 			perror("hash");
552 			exit((int)MANDOCLEVEL_SYSERR);
553 		}
554 
555 		/*
556 		 * Apply to the index.  If we haven't had a description
557 		 * set, put an empty one in now.
558 		 */
559 
560 		if (dbuf->len == sv)
561 			buf_appendb(dbuf, "", 1);
562 
563 		key.data = &rec;
564 		key.size = sizeof(recno_t);
565 
566 		val.data = dbuf->cp;
567 		val.size = dbuf->len;
568 
569 		if (verb)
570 			printf("%s: Added index\n", fn);
571 		dbt_put(idx, idxf, &key, &val);
572 	}
573 }
574 
575 /*
576  * Scan through all entries in the index file `idx' and prune those
577  * entries in `ofile'.
578  * Pruning consists of removing from `db', then invalidating the entry
579  * in `idx' (zeroing its value size).
580  */
581 static void
582 index_prune(const struct of *ofile, DB *db, const char *dbf,
583 		DB *idx, const char *idxf, int verb,
584 		recno_t *maxrec, recno_t **recs, size_t *recsz)
585 {
586 	const struct of	*of;
587 	const char	*fn;
588 	unsigned	 seq, sseq;
589 	DBT		 key, val;
590 	size_t		 reccur;
591 	int		 ch;
592 
593 	reccur = 0;
594 	seq = R_FIRST;
595 	while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
596 		seq = R_NEXT;
597 		*maxrec = *(recno_t *)key.data;
598 		if (0 == val.size) {
599 			if (reccur >= *recsz) {
600 				*recsz += MANDOC_SLOP;
601 				*recs = mandoc_realloc(*recs,
602 					*recsz * sizeof(recno_t));
603 			}
604 			(*recs)[(int)reccur] = *maxrec;
605 			reccur++;
606 			continue;
607 		}
608 
609 		fn = (char *)val.data;
610 		for (of = ofile; of; of = of->next)
611 			if (0 == strcmp(fn, of->fname))
612 				break;
613 
614 		if (NULL == of)
615 			continue;
616 
617 		sseq = R_FIRST;
618 		while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
619 			sseq = R_NEXT;
620 			assert(8 == val.size);
621 			if (*maxrec != *(recno_t *)(val.data + 4))
622 				continue;
623 			if (verb)
624 				printf("%s: Deleted keyword: %s\n",
625 						fn, (char *)key.data);
626 			ch = (*db->del)(db, &key, R_CURSOR);
627 			if (ch < 0)
628 				break;
629 		}
630 		if (ch < 0) {
631 			perror(dbf);
632 			exit((int)MANDOCLEVEL_SYSERR);
633 		}
634 
635 		if (verb)
636 			printf("%s: Deleted index\n", fn);
637 
638 		val.size = 0;
639 		ch = (*idx->put)(idx, &key, &val, R_CURSOR);
640 		if (ch < 0) {
641 			perror(idxf);
642 			exit((int)MANDOCLEVEL_SYSERR);
643 		}
644 
645 		if (reccur >= *recsz) {
646 			*recsz += MANDOC_SLOP;
647 			*recs = mandoc_realloc
648 				(*recs, *recsz * sizeof(recno_t));
649 		}
650 
651 		(*recs)[(int)reccur] = *maxrec;
652 		reccur++;
653 	}
654 	(*maxrec)++;
655 }
656 
657 /*
658  * Grow the buffer (if necessary) and copy in a binary string.
659  */
660 static void
661 buf_appendb(struct buf *buf, const void *cp, size_t sz)
662 {
663 
664 	/* Overshoot by MANDOC_BUFSZ. */
665 
666 	while (buf->len + sz >= buf->size) {
667 		buf->size = buf->len + sz + MANDOC_BUFSZ;
668 		buf->cp = mandoc_realloc(buf->cp, buf->size);
669 	}
670 
671 	memcpy(buf->cp + (int)buf->len, cp, sz);
672 	buf->len += sz;
673 }
674 
675 /*
676  * Append a nil-terminated string to the buffer.
677  * This can be invoked multiple times.
678  * The buffer string will be nil-terminated.
679  * If invoked multiple times, a space is put between strings.
680  */
681 static void
682 buf_append(struct buf *buf, const char *cp)
683 {
684 	size_t		 sz;
685 
686 	if (0 == (sz = strlen(cp)))
687 		return;
688 
689 	if (buf->len)
690 		buf->cp[(int)buf->len - 1] = ' ';
691 
692 	buf_appendb(buf, cp, sz + 1);
693 }
694 
695 /*
696  * Recursively add all text from a given node.
697  * This is optimised for general mdoc nodes in this context, which do
698  * not consist of subexpressions and having a recursive call for n->next
699  * would be wasteful.
700  * The "f" variable should be 0 unless called from pmdoc_Nd for the
701  * description buffer, which does not start at the beginning of the
702  * buffer.
703  */
704 static void
705 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
706 {
707 
708 	for ( ; n; n = n->next) {
709 		if (n->child)
710 			buf_appendmdoc(buf, n->child, f);
711 
712 		if (MDOC_TEXT == n->type && f) {
713 			f = 0;
714 			buf_appendb(buf, n->string,
715 					strlen(n->string) + 1);
716 		} else if (MDOC_TEXT == n->type)
717 			buf_append(buf, n->string);
718 
719 	}
720 }
721 
722 /* ARGSUSED */
723 static void
724 pmdoc_An(MDOC_ARGS)
725 {
726 
727 	if (SEC_AUTHORS != n->sec)
728 		return;
729 
730 	buf_appendmdoc(buf, n->child, 0);
731 	hash_put(hash, buf, TYPE_AUTHOR);
732 }
733 
734 static void
735 hash_reset(DB **db)
736 {
737 	DB		*hash;
738 
739 	if (NULL != (hash = *db))
740 		(*hash->close)(hash);
741 
742 	*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
743 	if (NULL == *db) {
744 		perror("hash");
745 		exit((int)MANDOCLEVEL_SYSERR);
746 	}
747 }
748 
749 /* ARGSUSED */
750 static void
751 pmdoc_Fd(MDOC_ARGS)
752 {
753 	const char	*start, *end;
754 	size_t		 sz;
755 
756 	if (SEC_SYNOPSIS != n->sec)
757 		return;
758 	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
759 		return;
760 
761 	/*
762 	 * Only consider those `Fd' macro fields that begin with an
763 	 * "inclusion" token (versus, e.g., #define).
764 	 */
765 	if (strcmp("#include", n->string))
766 		return;
767 
768 	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
769 		return;
770 
771 	/*
772 	 * Strip away the enclosing angle brackets and make sure we're
773 	 * not zero-length.
774 	 */
775 
776 	start = n->string;
777 	if ('<' == *start || '"' == *start)
778 		start++;
779 
780 	if (0 == (sz = strlen(start)))
781 		return;
782 
783 	end = &start[(int)sz - 1];
784 	if ('>' == *end || '"' == *end)
785 		end--;
786 
787 	assert(end >= start);
788 
789 	buf_appendb(buf, start, (size_t)(end - start + 1));
790 	buf_appendb(buf, "", 1);
791 
792 	hash_put(hash, buf, TYPE_INCLUDES);
793 }
794 
795 /* ARGSUSED */
796 static void
797 pmdoc_Cd(MDOC_ARGS)
798 {
799 
800 	if (SEC_SYNOPSIS != n->sec)
801 		return;
802 
803 	buf_appendmdoc(buf, n->child, 0);
804 	hash_put(hash, buf, TYPE_CONFIG);
805 }
806 
807 /* ARGSUSED */
808 static void
809 pmdoc_In(MDOC_ARGS)
810 {
811 
812 	if (SEC_SYNOPSIS != n->sec)
813 		return;
814 	if (NULL == n->child || MDOC_TEXT != n->child->type)
815 		return;
816 
817 	buf_append(buf, n->child->string);
818 	hash_put(hash, buf, TYPE_INCLUDES);
819 }
820 
821 /* ARGSUSED */
822 static void
823 pmdoc_Fn(MDOC_ARGS)
824 {
825 	const char	*cp;
826 
827 	if (SEC_SYNOPSIS != n->sec)
828 		return;
829 	if (NULL == n->child || MDOC_TEXT != n->child->type)
830 		return;
831 
832 	/* .Fn "struct type *arg" "foo" */
833 
834 	cp = strrchr(n->child->string, ' ');
835 	if (NULL == cp)
836 		cp = n->child->string;
837 
838 	/* Strip away pointer symbol. */
839 
840 	while ('*' == *cp)
841 		cp++;
842 
843 	buf_append(buf, cp);
844 	hash_put(hash, buf, TYPE_FUNCTION);
845 }
846 
847 /* ARGSUSED */
848 static void
849 pmdoc_St(MDOC_ARGS)
850 {
851 
852 	if (SEC_STANDARDS != n->sec)
853 		return;
854 	if (NULL == n->child || MDOC_TEXT != n->child->type)
855 		return;
856 
857 	buf_append(buf, n->child->string);
858 	hash_put(hash, buf, TYPE_STANDARD);
859 }
860 
861 /* ARGSUSED */
862 static void
863 pmdoc_Xr(MDOC_ARGS)
864 {
865 
866 	if (NULL == (n = n->child))
867 		return;
868 
869 	buf_appendb(buf, n->string, strlen(n->string));
870 
871 	if (NULL != (n = n->next)) {
872 		buf_appendb(buf, ".", 1);
873 		buf_appendb(buf, n->string, strlen(n->string) + 1);
874 	} else
875 		buf_appendb(buf, ".", 2);
876 
877 	hash_put(hash, buf, TYPE_XREF);
878 }
879 
880 /* ARGSUSED */
881 static void
882 pmdoc_Vt(MDOC_ARGS)
883 {
884 	const char	*start;
885 	size_t		 sz;
886 
887 	if (SEC_SYNOPSIS != n->sec)
888 		return;
889 	if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
890 		return;
891 	if (NULL == n->last || MDOC_TEXT != n->last->type)
892 		return;
893 
894 	/*
895 	 * Strip away leading pointer symbol '*' and trailing ';'.
896 	 */
897 
898 	start = n->last->string;
899 
900 	while ('*' == *start)
901 		start++;
902 
903 	if (0 == (sz = strlen(start)))
904 		return;
905 
906 	if (';' == start[(int)sz - 1])
907 		sz--;
908 
909 	if (0 == sz)
910 		return;
911 
912 	buf_appendb(buf, start, sz);
913 	buf_appendb(buf, "", 1);
914 	hash_put(hash, buf, TYPE_VARIABLE);
915 }
916 
917 /* ARGSUSED */
918 static void
919 pmdoc_Fo(MDOC_ARGS)
920 {
921 
922 	if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
923 		return;
924 	if (NULL == n->child || MDOC_TEXT != n->child->type)
925 		return;
926 
927 	buf_append(buf, n->child->string);
928 	hash_put(hash, buf, TYPE_FUNCTION);
929 }
930 
931 
932 /* ARGSUSED */
933 static void
934 pmdoc_Nd(MDOC_ARGS)
935 {
936 
937 	if (MDOC_BODY != n->type)
938 		return;
939 
940 	buf_appendmdoc(dbuf, n->child, 1);
941 	buf_appendmdoc(buf, n->child, 0);
942 
943 	hash_put(hash, buf, TYPE_DESC);
944 }
945 
946 /* ARGSUSED */
947 static void
948 pmdoc_Er(MDOC_ARGS)
949 {
950 
951 	if (SEC_ERRORS != n->sec)
952 		return;
953 
954 	buf_appendmdoc(buf, n->child, 0);
955 	hash_put(hash, buf, TYPE_ERR);
956 }
957 
958 /* ARGSUSED */
959 static void
960 pmdoc_Ev(MDOC_ARGS)
961 {
962 
963 	if (SEC_ENVIRONMENT != n->sec)
964 		return;
965 
966 	buf_appendmdoc(buf, n->child, 0);
967 	hash_put(hash, buf, TYPE_ENV);
968 }
969 
970 /* ARGSUSED */
971 static void
972 pmdoc_Pa(MDOC_ARGS)
973 {
974 
975 	if (SEC_FILES != n->sec)
976 		return;
977 
978 	buf_appendmdoc(buf, n->child, 0);
979 	hash_put(hash, buf, TYPE_PATH);
980 }
981 
982 /* ARGSUSED */
983 static void
984 pmdoc_Nm(MDOC_ARGS)
985 {
986 
987 	if (SEC_NAME == n->sec) {
988 		buf_appendmdoc(buf, n->child, 0);
989 		hash_put(hash, buf, TYPE_NAME);
990 		return;
991 	} else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
992 		return;
993 
994 	if (NULL == n->child)
995 		buf_append(buf, m->name);
996 
997 	buf_appendmdoc(buf, n->child, 0);
998 	hash_put(hash, buf, TYPE_UTILITY);
999 }
1000 
1001 static void
1002 hash_put(DB *db, const struct buf *buf, int mask)
1003 {
1004 	DBT		 key, val;
1005 	int		 rc;
1006 
1007 	if (buf->len < 2)
1008 		return;
1009 
1010 	key.data = buf->cp;
1011 	key.size = buf->len;
1012 
1013 	if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1014 		perror("hash");
1015 		exit((int)MANDOCLEVEL_SYSERR);
1016 	} else if (0 == rc)
1017 		mask |= *(int *)val.data;
1018 
1019 	val.data = &mask;
1020 	val.size = sizeof(int);
1021 
1022 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1023 		perror("hash");
1024 		exit((int)MANDOCLEVEL_SYSERR);
1025 	}
1026 }
1027 
1028 static void
1029 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1030 {
1031 
1032 	assert(key->size);
1033 	assert(val->size);
1034 
1035 	if (0 == (*db->put)(db, key, val, 0))
1036 		return;
1037 
1038 	perror(dbn);
1039 	exit((int)MANDOCLEVEL_SYSERR);
1040 	/* NOTREACHED */
1041 }
1042 
1043 /*
1044  * Call out to per-macro handlers after clearing the persistent database
1045  * key.  If the macro sets the database key, flush it to the database.
1046  */
1047 static void
1048 pmdoc_node(MDOC_ARGS)
1049 {
1050 
1051 	if (NULL == n)
1052 		return;
1053 
1054 	switch (n->type) {
1055 	case (MDOC_HEAD):
1056 		/* FALLTHROUGH */
1057 	case (MDOC_BODY):
1058 		/* FALLTHROUGH */
1059 	case (MDOC_TAIL):
1060 		/* FALLTHROUGH */
1061 	case (MDOC_BLOCK):
1062 		/* FALLTHROUGH */
1063 	case (MDOC_ELEM):
1064 		if (NULL == mdocs[n->tok])
1065 			break;
1066 
1067 		buf->len = 0;
1068 		(*mdocs[n->tok])(hash, buf, dbuf, n, m);
1069 		break;
1070 	default:
1071 		break;
1072 	}
1073 
1074 	pmdoc_node(hash, buf, dbuf, n->child, m);
1075 	pmdoc_node(hash, buf, dbuf, n->next, m);
1076 }
1077 
1078 static int
1079 pman_node(MAN_ARGS)
1080 {
1081 	const struct man_node *head, *body;
1082 	const char	*start, *sv;
1083 	size_t		 sz;
1084 
1085 	if (NULL == n)
1086 		return(0);
1087 
1088 	/*
1089 	 * We're only searching for one thing: the first text child in
1090 	 * the BODY of a NAME section.  Since we don't keep track of
1091 	 * sections in -man, run some hoops to find out whether we're in
1092 	 * the correct section or not.
1093 	 */
1094 
1095 	if (MAN_BODY == n->type && MAN_SH == n->tok) {
1096 		body = n;
1097 		assert(body->parent);
1098 		if (NULL != (head = body->parent->head) &&
1099 				1 == head->nchild &&
1100 				NULL != (head = (head->child)) &&
1101 				MAN_TEXT == head->type &&
1102 				0 == strcmp(head->string, "NAME") &&
1103 				NULL != (body = body->child) &&
1104 				MAN_TEXT == body->type) {
1105 
1106 			assert(body->string);
1107 			start = sv = body->string;
1108 
1109 			/*
1110 			 * Go through a special heuristic dance here.
1111 			 * This is why -man manuals are great!
1112 			 * (I'm being sarcastic: my eyes are bleeding.)
1113 			 * Conventionally, one or more manual names are
1114 			 * comma-specified prior to a whitespace, then a
1115 			 * dash, then a description.  Try to puzzle out
1116 			 * the name parts here.
1117 			 */
1118 
1119 			for ( ;; ) {
1120 				sz = strcspn(start, " ,");
1121 				if ('\0' == start[(int)sz])
1122 					break;
1123 
1124 				buf->len = 0;
1125 				buf_appendb(buf, start, sz);
1126 				buf_appendb(buf, "", 1);
1127 
1128 				hash_put(hash, buf, TYPE_NAME);
1129 
1130 				if (' ' == start[(int)sz]) {
1131 					start += (int)sz + 1;
1132 					break;
1133 				}
1134 
1135 				assert(',' == start[(int)sz]);
1136 				start += (int)sz + 1;
1137 				while (' ' == *start)
1138 					start++;
1139 			}
1140 
1141 			buf->len = 0;
1142 
1143 			if (sv == start) {
1144 				buf_append(buf, start);
1145 				return(1);
1146 			}
1147 
1148 			while (' ' == *start)
1149 				start++;
1150 
1151 			if (0 == strncmp(start, "-", 1))
1152 				start += 1;
1153 			else if (0 == strncmp(start, "\\-", 2))
1154 				start += 2;
1155 			else if (0 == strncmp(start, "\\(en", 4))
1156 				start += 4;
1157 			else if (0 == strncmp(start, "\\(em", 4))
1158 				start += 4;
1159 
1160 			while (' ' == *start)
1161 				start++;
1162 
1163 			sz = strlen(start) + 1;
1164 			buf_appendb(dbuf, start, sz);
1165 			buf_appendb(buf, start, sz);
1166 
1167 			hash_put(hash, buf, TYPE_DESC);
1168 		}
1169 	}
1170 
1171 	if (pman_node(hash, buf, dbuf, n->child))
1172 		return(1);
1173 	if (pman_node(hash, buf, dbuf, n->next))
1174 		return(1);
1175 
1176 	return(0);
1177 }
1178 
1179 static void
1180 ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
1181 {
1182 	int		 i;
1183 	struct of	*nof;
1184 
1185 	for (i = 0; i < argc; i++) {
1186 		nof = mandoc_calloc(1, sizeof(struct of));
1187 		nof->fname = strdup(argv[i]);
1188 		if (verb > 2)
1189 			printf("%s: Scheduling\n", argv[i]);
1190 		if (NULL == *of) {
1191 			*of = nof;
1192 			(*of)->first = nof;
1193 		} else {
1194 			nof->first = (*of)->first;
1195 			(*of)->next = nof;
1196 			*of = nof;
1197 		}
1198 	}
1199 }
1200 
1201 /*
1202  * Recursively build up a list of files to parse.
1203  * We use this instead of ftw() and so on because I don't want global
1204  * variables hanging around.
1205  * This ignores the mandoc.db and mandoc.index files, but assumes that
1206  * everything else is a manual.
1207  * Pass in a pointer to a NULL structure for the first invocation.
1208  */
1209 static int
1210 ofile_dirbuild(const char *dir, int verb, struct of **of)
1211 {
1212 	char		 buf[MAXPATHLEN];
1213 	size_t		 sz;
1214 	DIR		*d;
1215 	const char	*fn;
1216 	struct of	*nof;
1217 	struct dirent	*dp;
1218 
1219 	if (NULL == (d = opendir(dir))) {
1220 		perror(dir);
1221 		return(0);
1222 	}
1223 
1224 	while (NULL != (dp = readdir(d))) {
1225 		fn = dp->d_name;
1226 		if (DT_DIR == dp->d_type) {
1227 			if (0 == strcmp(".", fn))
1228 				continue;
1229 			if (0 == strcmp("..", fn))
1230 				continue;
1231 
1232 			buf[0] = '\0';
1233 			strlcat(buf, dir, MAXPATHLEN);
1234 			strlcat(buf, "/", MAXPATHLEN);
1235 			sz = strlcat(buf, fn, MAXPATHLEN);
1236 
1237 			if (sz < MAXPATHLEN) {
1238 				if ( ! ofile_dirbuild(buf, verb, of))
1239 					return(0);
1240 				continue;
1241 			} else if (sz < MAXPATHLEN)
1242 				continue;
1243 
1244 			fprintf(stderr, "%s: Path too long\n", dir);
1245 			return(0);
1246 		}
1247 		if (DT_REG != dp->d_type)
1248 			continue;
1249 
1250 		if (0 == strcmp(MANDOC_DB, fn) ||
1251 				0 == strcmp(MANDOC_IDX, fn))
1252 			continue;
1253 
1254 		buf[0] = '\0';
1255 		strlcat(buf, dir, MAXPATHLEN);
1256 		strlcat(buf, "/", MAXPATHLEN);
1257 		sz = strlcat(buf, fn, MAXPATHLEN);
1258 		if (sz >= MAXPATHLEN) {
1259 			fprintf(stderr, "%s: Path too long\n", dir);
1260 			return(0);
1261 		}
1262 
1263 		nof = mandoc_calloc(1, sizeof(struct of));
1264 		nof->fname = mandoc_strdup(buf);
1265 
1266 		if (verb > 2)
1267 			printf("%s: Scheduling\n", buf);
1268 
1269 		if (NULL == *of) {
1270 			*of = nof;
1271 			(*of)->first = nof;
1272 		} else {
1273 			nof->first = (*of)->first;
1274 			(*of)->next = nof;
1275 			*of = nof;
1276 		}
1277 	}
1278 
1279 	return(1);
1280 }
1281 
1282 static void
1283 ofile_free(struct of *of)
1284 {
1285 	struct of	*nof;
1286 
1287 	while (of) {
1288 		nof = of->next;
1289 		free(of->fname);
1290 		free(of);
1291 		of = nof;
1292 	}
1293 }
1294 
1295 static void
1296 usage(void)
1297 {
1298 
1299 	fprintf(stderr, "usage: %s [-v] "
1300 			"[-d dir [files...] |"
1301 			" -u dir [files...] |"
1302 			" dir...]\n", progname);
1303 }
1304