xref: /openbsd-src/usr.bin/mandoc/mandocdb.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$Id: mandocdb.c,v 1.42 2012/05/24 23:33:23 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/param.h>
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <db.h>
33 
34 #include "man.h"
35 #include "mdoc.h"
36 #include "mandoc.h"
37 #include "mandocdb.h"
38 #include "manpath.h"
39 
40 #define	MANDOC_BUFSZ	  BUFSIZ
41 #define	MANDOC_SLOP	  1024
42 
43 #define	MANDOC_SRC	  0x1
44 #define	MANDOC_FORM	  0x2
45 
46 /* Access to the mandoc database on disk. */
47 
48 struct	mdb {
49 	char		  idxn[MAXPATHLEN]; /* index db filename */
50 	char		  dbn[MAXPATHLEN]; /* keyword db filename */
51 	DB		 *idx; /* index recno database */
52 	DB		 *db; /* keyword btree database */
53 };
54 
55 /* Stack of temporarily unused index records. */
56 
57 struct	recs {
58 	recno_t		 *stack; /* pointer to a malloc'ed array */
59 	size_t		  size; /* number of allocated slots */
60 	size_t		  cur; /* current number of empty records */
61 	recno_t		  last; /* last record number in the index */
62 };
63 
64 /* Tiny list for files.  No need to bring in QUEUE. */
65 
66 struct	of {
67 	char		 *fname; /* heap-allocated */
68 	char		 *sec;
69 	char		 *arch;
70 	char		 *title;
71 	int		  src_form;
72 	struct of	 *next; /* NULL for last one */
73 	struct of	 *first; /* first in list */
74 };
75 
76 /* Buffer for storing growable data. */
77 
78 struct	buf {
79 	char		 *cp;
80 	size_t		  len; /* current length */
81 	size_t		  size; /* total buffer size */
82 };
83 
84 /* Operation we're going to perform. */
85 
86 enum	op {
87 	OP_DEFAULT = 0, /* new dbs from dir list or default config */
88 	OP_CONFFILE, /* new databases from custom config file */
89 	OP_UPDATE, /* delete/add entries in existing database */
90 	OP_DELETE, /* delete entries from existing database */
91 	OP_TEST /* change no databases, report potential problems */
92 };
93 
94 #define	MAN_ARGS	  DB *hash, \
95 			  struct buf *buf, \
96 			  struct buf *dbuf, \
97 			  const struct man_node *n
98 #define	MDOC_ARGS	  DB *hash, \
99 			  struct buf *buf, \
100 			  struct buf *dbuf, \
101 			  const struct mdoc_node *n, \
102 			  const struct mdoc_meta *m
103 
104 static	void		  buf_appendmdoc(struct buf *,
105 				const struct mdoc_node *, int);
106 static	void		  buf_append(struct buf *, const char *);
107 static	void		  buf_appendb(struct buf *,
108 				const void *, size_t);
109 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
110 static	void		  hash_put(DB *, const struct buf *, uint64_t);
111 static	void		  hash_reset(DB **);
112 static	void		  index_merge(const struct of *, struct mparse *,
113 				struct buf *, struct buf *, DB *,
114 				struct mdb *, struct recs *);
115 static	void		  index_prune(const struct of *, struct mdb *,
116 				struct recs *);
117 static	void		  ofile_argbuild(int, char *[], struct of **,
118 				const char *);
119 static	void		  ofile_dirbuild(const char *, const char *,
120 				const char *, int, struct of **);
121 static	void		  ofile_free(struct of *);
122 static	void		  pformatted(DB *, struct buf *,
123 				struct buf *, const struct of *);
124 static	int		  pman_node(MAN_ARGS);
125 static	void		  pmdoc_node(MDOC_ARGS);
126 static	int		  pmdoc_head(MDOC_ARGS);
127 static	int		  pmdoc_body(MDOC_ARGS);
128 static	int		  pmdoc_Fd(MDOC_ARGS);
129 static	int		  pmdoc_In(MDOC_ARGS);
130 static	int		  pmdoc_Fn(MDOC_ARGS);
131 static	int		  pmdoc_Nd(MDOC_ARGS);
132 static	int		  pmdoc_Nm(MDOC_ARGS);
133 static	int		  pmdoc_Sh(MDOC_ARGS);
134 static	int		  pmdoc_St(MDOC_ARGS);
135 static	int		  pmdoc_Xr(MDOC_ARGS);
136 
137 #define	MDOCF_CHILD	  0x01  /* Automatically index child nodes. */
138 
139 struct	mdoc_handler {
140 	int		(*fp)(MDOC_ARGS);  /* Optional handler. */
141 	uint64_t	  mask;  /* Set unless handler returns 0. */
142 	int		  flags;  /* For use by pmdoc_node. */
143 };
144 
145 static	const struct mdoc_handler mdocs[MDOC_MAX] = {
146 	{ NULL, 0, 0 },  /* Ap */
147 	{ NULL, 0, 0 },  /* Dd */
148 	{ NULL, 0, 0 },  /* Dt */
149 	{ NULL, 0, 0 },  /* Os */
150 	{ pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
151 	{ pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
152 	{ NULL, 0, 0 },  /* Pp */
153 	{ NULL, 0, 0 },  /* D1 */
154 	{ NULL, 0, 0 },  /* Dl */
155 	{ NULL, 0, 0 },  /* Bd */
156 	{ NULL, 0, 0 },  /* Ed */
157 	{ NULL, 0, 0 },  /* Bl */
158 	{ NULL, 0, 0 },  /* El */
159 	{ NULL, 0, 0 },  /* It */
160 	{ NULL, 0, 0 },  /* Ad */
161 	{ NULL, TYPE_An, MDOCF_CHILD },  /* An */
162 	{ NULL, TYPE_Ar, MDOCF_CHILD },  /* Ar */
163 	{ NULL, TYPE_Cd, MDOCF_CHILD },  /* Cd */
164 	{ NULL, TYPE_Cm, MDOCF_CHILD },  /* Cm */
165 	{ NULL, TYPE_Dv, MDOCF_CHILD },  /* Dv */
166 	{ NULL, TYPE_Er, MDOCF_CHILD },  /* Er */
167 	{ NULL, TYPE_Ev, MDOCF_CHILD },  /* Ev */
168 	{ NULL, 0, 0 },  /* Ex */
169 	{ NULL, TYPE_Fa, MDOCF_CHILD },  /* Fa */
170 	{ pmdoc_Fd, TYPE_In, 0 },  /* Fd */
171 	{ NULL, TYPE_Fl, MDOCF_CHILD },  /* Fl */
172 	{ pmdoc_Fn, 0, 0 },  /* Fn */
173 	{ NULL, TYPE_Ft, MDOCF_CHILD },  /* Ft */
174 	{ NULL, TYPE_Ic, MDOCF_CHILD },  /* Ic */
175 	{ pmdoc_In, TYPE_In, 0 },  /* In */
176 	{ NULL, TYPE_Li, MDOCF_CHILD },  /* Li */
177 	{ pmdoc_Nd, TYPE_Nd, MDOCF_CHILD },  /* Nd */
178 	{ pmdoc_Nm, TYPE_Nm, MDOCF_CHILD },  /* Nm */
179 	{ NULL, 0, 0 },  /* Op */
180 	{ NULL, 0, 0 },  /* Ot */
181 	{ NULL, TYPE_Pa, MDOCF_CHILD },  /* Pa */
182 	{ NULL, 0, 0 },  /* Rv */
183 	{ pmdoc_St, TYPE_St, 0 },  /* St */
184 	{ NULL, TYPE_Va, MDOCF_CHILD },  /* Va */
185 	{ pmdoc_body, TYPE_Va, MDOCF_CHILD },  /* Vt */
186 	{ pmdoc_Xr, TYPE_Xr, 0 },  /* Xr */
187 	{ NULL, 0, 0 },  /* %A */
188 	{ NULL, 0, 0 },  /* %B */
189 	{ NULL, 0, 0 },  /* %D */
190 	{ NULL, 0, 0 },  /* %I */
191 	{ NULL, 0, 0 },  /* %J */
192 	{ NULL, 0, 0 },  /* %N */
193 	{ NULL, 0, 0 },  /* %O */
194 	{ NULL, 0, 0 },  /* %P */
195 	{ NULL, 0, 0 },  /* %R */
196 	{ NULL, 0, 0 },  /* %T */
197 	{ NULL, 0, 0 },  /* %V */
198 	{ NULL, 0, 0 },  /* Ac */
199 	{ NULL, 0, 0 },  /* Ao */
200 	{ NULL, 0, 0 },  /* Aq */
201 	{ NULL, TYPE_At, MDOCF_CHILD },  /* At */
202 	{ NULL, 0, 0 },  /* Bc */
203 	{ NULL, 0, 0 },  /* Bf */
204 	{ NULL, 0, 0 },  /* Bo */
205 	{ NULL, 0, 0 },  /* Bq */
206 	{ NULL, TYPE_Bsx, MDOCF_CHILD },  /* Bsx */
207 	{ NULL, TYPE_Bx, MDOCF_CHILD },  /* Bx */
208 	{ NULL, 0, 0 },  /* Db */
209 	{ NULL, 0, 0 },  /* Dc */
210 	{ NULL, 0, 0 },  /* Do */
211 	{ NULL, 0, 0 },  /* Dq */
212 	{ NULL, 0, 0 },  /* Ec */
213 	{ NULL, 0, 0 },  /* Ef */
214 	{ NULL, TYPE_Em, MDOCF_CHILD },  /* Em */
215 	{ NULL, 0, 0 },  /* Eo */
216 	{ NULL, TYPE_Fx, MDOCF_CHILD },  /* Fx */
217 	{ NULL, TYPE_Ms, MDOCF_CHILD },  /* Ms */
218 	{ NULL, 0, 0 },  /* No */
219 	{ NULL, 0, 0 },  /* Ns */
220 	{ NULL, TYPE_Nx, MDOCF_CHILD },  /* Nx */
221 	{ NULL, TYPE_Ox, MDOCF_CHILD },  /* Ox */
222 	{ NULL, 0, 0 },  /* Pc */
223 	{ NULL, 0, 0 },  /* Pf */
224 	{ NULL, 0, 0 },  /* Po */
225 	{ NULL, 0, 0 },  /* Pq */
226 	{ NULL, 0, 0 },  /* Qc */
227 	{ NULL, 0, 0 },  /* Ql */
228 	{ NULL, 0, 0 },  /* Qo */
229 	{ NULL, 0, 0 },  /* Qq */
230 	{ NULL, 0, 0 },  /* Re */
231 	{ NULL, 0, 0 },  /* Rs */
232 	{ NULL, 0, 0 },  /* Sc */
233 	{ NULL, 0, 0 },  /* So */
234 	{ NULL, 0, 0 },  /* Sq */
235 	{ NULL, 0, 0 },  /* Sm */
236 	{ NULL, 0, 0 },  /* Sx */
237 	{ NULL, TYPE_Sy, MDOCF_CHILD },  /* Sy */
238 	{ NULL, TYPE_Tn, MDOCF_CHILD },  /* Tn */
239 	{ NULL, 0, 0 },  /* Ux */
240 	{ NULL, 0, 0 },  /* Xc */
241 	{ NULL, 0, 0 },  /* Xo */
242 	{ pmdoc_head, TYPE_Fn, 0 },  /* Fo */
243 	{ NULL, 0, 0 },  /* Fc */
244 	{ NULL, 0, 0 },  /* Oo */
245 	{ NULL, 0, 0 },  /* Oc */
246 	{ NULL, 0, 0 },  /* Bk */
247 	{ NULL, 0, 0 },  /* Ek */
248 	{ NULL, 0, 0 },  /* Bt */
249 	{ NULL, 0, 0 },  /* Hf */
250 	{ NULL, 0, 0 },  /* Fr */
251 	{ NULL, 0, 0 },  /* Ud */
252 	{ NULL, TYPE_Lb, MDOCF_CHILD },  /* Lb */
253 	{ NULL, 0, 0 },  /* Lp */
254 	{ NULL, TYPE_Lk, MDOCF_CHILD },  /* Lk */
255 	{ NULL, TYPE_Mt, MDOCF_CHILD },  /* Mt */
256 	{ NULL, 0, 0 },  /* Brq */
257 	{ NULL, 0, 0 },  /* Bro */
258 	{ NULL, 0, 0 },  /* Brc */
259 	{ NULL, 0, 0 },  /* %C */
260 	{ NULL, 0, 0 },  /* Es */
261 	{ NULL, 0, 0 },  /* En */
262 	{ NULL, TYPE_Dx, MDOCF_CHILD },  /* Dx */
263 	{ NULL, 0, 0 },  /* %Q */
264 	{ NULL, 0, 0 },  /* br */
265 	{ NULL, 0, 0 },  /* sp */
266 	{ NULL, 0, 0 },  /* %U */
267 	{ NULL, 0, 0 },  /* Ta */
268 };
269 
270 static	const char	 *progname;
271 static	int		  use_all;  /* Use all directories and files. */
272 static	int		  verb;  /* Output verbosity level. */
273 static	int		  warnings;  /* Potential problems in manuals. */
274 
275 int
276 mandocdb(int argc, char *argv[])
277 {
278 	struct mparse	*mp; /* parse sequence */
279 	struct manpaths	 dirs;
280 	struct mdb	 mdb;
281 	struct recs	 recs;
282 	enum op		 op; /* current operation */
283 	const char	*dir;
284 	char		*cp;
285 	char		 pbuf[PATH_MAX];
286 	int		 ch, i, flags;
287 	DB		*hash; /* temporary keyword hashtable */
288 	BTREEINFO	 info; /* btree configuration */
289 	size_t		 sz1, sz2;
290 	struct buf	 buf, /* keyword buffer */
291 			 dbuf; /* description buffer */
292 	struct of	*of; /* list of files for processing */
293 	extern int	 optind;
294 	extern char	*optarg;
295 
296 	progname = strrchr(argv[0], '/');
297 	if (progname == NULL)
298 		progname = argv[0];
299 	else
300 		++progname;
301 
302 	memset(&dirs, 0, sizeof(struct manpaths));
303 	memset(&mdb, 0, sizeof(struct mdb));
304 	memset(&recs, 0, sizeof(struct recs));
305 
306 	of = NULL;
307 	mp = NULL;
308 	hash = NULL;
309 	op = OP_DEFAULT;
310 	dir = NULL;
311 
312 	while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
313 		switch (ch) {
314 		case ('a'):
315 			use_all = 1;
316 			break;
317 		case ('C'):
318 			if (op) {
319 				fprintf(stderr,
320 				    "-C: conflicting options\n");
321 				goto usage;
322 			}
323 			dir = optarg;
324 			op = OP_CONFFILE;
325 			break;
326 		case ('d'):
327 			if (op) {
328 				fprintf(stderr,
329 				    "-d: conflicting options\n");
330 				goto usage;
331 			}
332 			dir = optarg;
333 			op = OP_UPDATE;
334 			break;
335 		case ('t'):
336 			dup2(STDOUT_FILENO, STDERR_FILENO);
337 			if (op) {
338 				fprintf(stderr,
339 				    "-t: conflicting options\n");
340 				goto usage;
341 			}
342 			op = OP_TEST;
343 			use_all = 1;
344 			warnings = 1;
345 			break;
346 		case ('u'):
347 			if (op) {
348 				fprintf(stderr,
349 				    "-u: conflicting options\n");
350 				goto usage;
351 			}
352 			dir = optarg;
353 			op = OP_DELETE;
354 			break;
355 		case ('v'):
356 			verb++;
357 			break;
358 		case ('W'):
359 			warnings = 1;
360 			break;
361 		default:
362 			goto usage;
363 		}
364 
365 	argc -= optind;
366 	argv += optind;
367 
368 	if (OP_CONFFILE == op && argc > 0) {
369 		fprintf(stderr, "-C: too many arguments\n");
370 		goto usage;
371 	}
372 
373 	memset(&info, 0, sizeof(BTREEINFO));
374 	info.lorder = 4321;
375 	info.flags = R_DUP;
376 
377 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
378 
379 	memset(&buf, 0, sizeof(struct buf));
380 	memset(&dbuf, 0, sizeof(struct buf));
381 
382 	buf.size = dbuf.size = MANDOC_BUFSZ;
383 
384 	buf.cp = mandoc_malloc(buf.size);
385 	dbuf.cp = mandoc_malloc(dbuf.size);
386 
387 	if (OP_TEST == op) {
388 		ofile_argbuild(argc, argv, &of, NULL);
389 		if (NULL == of)
390 			goto out;
391 		index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
392 		goto out;
393 	}
394 
395 	if (OP_UPDATE == op || OP_DELETE == op) {
396 		if (NULL == realpath(dir, pbuf)) {
397 			perror(dir);
398 			exit((int)MANDOCLEVEL_BADARG);
399 		}
400 		if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) {
401 			fprintf(stderr, "%s: path too long\n", pbuf);
402 			exit((int)MANDOCLEVEL_BADARG);
403 		}
404 
405 		strlcat(mdb.dbn, pbuf, MAXPATHLEN);
406 		sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
407 
408 		strlcat(mdb.idxn, pbuf, MAXPATHLEN);
409 		sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
410 
411 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
412 			fprintf(stderr, "%s: path too long\n", mdb.idxn);
413 			exit((int)MANDOCLEVEL_BADARG);
414 		}
415 
416 		flags = O_CREAT | O_RDWR;
417 		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
418 		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
419 
420 		if (NULL == mdb.db) {
421 			perror(mdb.dbn);
422 			exit((int)MANDOCLEVEL_SYSERR);
423 		} else if (NULL == mdb.idx) {
424 			perror(mdb.idxn);
425 			exit((int)MANDOCLEVEL_SYSERR);
426 		}
427 
428 		ofile_argbuild(argc, argv, &of, pbuf);
429 
430 		if (NULL == of)
431 			goto out;
432 
433 		index_prune(of, &mdb, &recs);
434 
435 		/*
436 		 * Go to the root of the respective manual tree.
437 		 * This must work or no manuals may be found (they're
438 		 * indexed relative to the root).
439 		 */
440 
441 		if (OP_UPDATE == op) {
442 			if (-1 == chdir(dir)) {
443 				perror(dir);
444 				exit((int)MANDOCLEVEL_SYSERR);
445 			}
446 			index_merge(of, mp, &dbuf, &buf, hash,
447 					&mdb, &recs);
448 		}
449 
450 		goto out;
451 	}
452 
453 	/*
454 	 * Configure the directories we're going to scan.
455 	 * If we have command-line arguments, use them.
456 	 * If not, we use man(1)'s method (see mandocdb.8).
457 	 */
458 
459 	if (argc > 0) {
460 		dirs.paths = mandoc_calloc(argc, sizeof(char *));
461 		dirs.sz = argc;
462 		for (i = 0; i < argc; i++) {
463 			if (NULL == (cp = realpath(argv[i], pbuf))) {
464 				perror(argv[i]);
465 				goto out;
466 			}
467 			dirs.paths[i] = mandoc_strdup(cp);
468 		}
469 	} else
470 		manpath_parse(&dirs, dir, NULL, NULL);
471 
472 	for (i = 0; i < dirs.sz; i++) {
473 
474 		/*
475 		 * Go to the root of the respective manual tree.
476 		 * This must work or no manuals may be found:
477 		 * They are indexed relative to the root.
478 		 */
479 
480 		if (-1 == chdir(dirs.paths[i])) {
481 			perror(dirs.paths[i]);
482 			exit((int)MANDOCLEVEL_SYSERR);
483 		}
484 
485 		/* Create a new database in two temporary files. */
486 
487 		flags = O_CREAT | O_EXCL | O_RDWR;
488 		while (NULL == mdb.db) {
489 			strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN);
490 			strlcat(mdb.dbn, ".XXXXXXXXXX", MAXPATHLEN);
491 			if (NULL == mktemp(mdb.dbn)) {
492 				perror(mdb.dbn);
493 				exit((int)MANDOCLEVEL_SYSERR);
494 			}
495 			mdb.db = dbopen(mdb.dbn, flags, 0644,
496 					DB_BTREE, &info);
497 			if (NULL == mdb.db && EEXIST != errno) {
498 				perror(mdb.dbn);
499 				exit((int)MANDOCLEVEL_SYSERR);
500 			}
501 		}
502 		while (NULL == mdb.idx) {
503 			strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
504 			strlcat(mdb.idxn, ".XXXXXXXXXX", MAXPATHLEN);
505 			if (NULL == mktemp(mdb.idxn)) {
506 				perror(mdb.idxn);
507 				unlink(mdb.dbn);
508 				exit((int)MANDOCLEVEL_SYSERR);
509 			}
510 			mdb.idx = dbopen(mdb.idxn, flags, 0644,
511 					DB_RECNO, NULL);
512 			if (NULL == mdb.idx && EEXIST != errno) {
513 				perror(mdb.idxn);
514 				unlink(mdb.dbn);
515 				exit((int)MANDOCLEVEL_SYSERR);
516 			}
517 		}
518 
519 		/*
520 		 * Search for manuals and fill the new database.
521 		 */
522 
523 	       	ofile_dirbuild(".", "", "", 0, &of);
524 
525 		if (NULL != of) {
526 			index_merge(of, mp, &dbuf, &buf, hash,
527 			     &mdb, &recs);
528 			ofile_free(of);
529 			of = NULL;
530 		}
531 
532 		(*mdb.db->close)(mdb.db);
533 		(*mdb.idx->close)(mdb.idx);
534 		mdb.db = NULL;
535 		mdb.idx = NULL;
536 
537 		/*
538 		 * Replace the old database with the new one.
539 		 * This is not perfectly atomic,
540 		 * but i cannot think of a better way.
541 		 */
542 
543 		if (-1 == rename(mdb.dbn, MANDOC_DB)) {
544 			perror(MANDOC_DB);
545 			unlink(mdb.dbn);
546 			unlink(mdb.idxn);
547 			exit((int)MANDOCLEVEL_SYSERR);
548 		}
549 		if (-1 == rename(mdb.idxn, MANDOC_IDX)) {
550 			perror(MANDOC_IDX);
551 			unlink(MANDOC_DB);
552 			unlink(MANDOC_IDX);
553 			unlink(mdb.idxn);
554 			exit((int)MANDOCLEVEL_SYSERR);
555 		}
556 	}
557 
558 out:
559 	if (mdb.db)
560 		(*mdb.db->close)(mdb.db);
561 	if (mdb.idx)
562 		(*mdb.idx->close)(mdb.idx);
563 	if (hash)
564 		(*hash->close)(hash);
565 	if (mp)
566 		mparse_free(mp);
567 
568 	manpath_free(&dirs);
569 	ofile_free(of);
570 	free(buf.cp);
571 	free(dbuf.cp);
572 	free(recs.stack);
573 
574 	return(MANDOCLEVEL_OK);
575 
576 usage:
577 	fprintf(stderr,
578 		"usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
579 		"                        -d dir [file ...] | "
580 		"-u dir [file ...]\n",
581 		progname);
582 
583 	return((int)MANDOCLEVEL_BADARG);
584 }
585 
586 void
587 index_merge(const struct of *of, struct mparse *mp,
588 		struct buf *dbuf, struct buf *buf, DB *hash,
589 		struct mdb *mdb, struct recs *recs)
590 {
591 	recno_t		 rec;
592 	int		 ch, skip;
593 	DBT		 key, val;
594 	DB		*files;  /* temporary file name table */
595 	struct mdoc	*mdoc;
596 	struct man	*man;
597 	const char	*fn, *msec, *march, *mtitle;
598 	char		*p;
599 	uint64_t	 mask;
600 	size_t		 sv;
601 	unsigned	 seq;
602 	uint64_t	 vbuf[2];
603 	char		 type;
604 
605 	if (warnings) {
606 		files = NULL;
607 		hash_reset(&files);
608 	}
609 
610 	rec = 0;
611 	for (of = of->first; of; of = of->next) {
612 		fn = of->fname;
613 
614 		/*
615 		 * Try interpreting the file as mdoc(7) or man(7)
616 		 * source code, unless it is already known to be
617 		 * formatted.  Fall back to formatted mode.
618 		 */
619 
620 		mparse_reset(mp);
621 		mdoc = NULL;
622 		man = NULL;
623 
624 		if ((MANDOC_SRC & of->src_form ||
625 		    ! (MANDOC_FORM & of->src_form)) &&
626 		    MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
627 			mparse_result(mp, &mdoc, &man);
628 
629 		if (NULL != mdoc) {
630 			msec = mdoc_meta(mdoc)->msec;
631 			march = mdoc_meta(mdoc)->arch;
632 			if (NULL == march)
633 				march = "";
634 			mtitle = mdoc_meta(mdoc)->title;
635 		} else if (NULL != man) {
636 			msec = man_meta(man)->msec;
637 			march = "";
638 			mtitle = man_meta(man)->title;
639 		} else {
640 			msec = of->sec;
641 			march = of->arch;
642 			mtitle = of->title;
643 		}
644 
645 		/*
646 		 * Check whether the manual section given in a file
647 		 * agrees with the directory where the file is located.
648 		 * Some manuals have suffixes like (3p) on their
649 		 * section number either inside the file or in the
650 		 * directory name, some are linked into more than one
651 		 * section, like encrypt(1) = makekey(8).  Do not skip
652 		 * manuals for such reasons.
653 		 */
654 
655 		skip = 0;
656 		assert(of->sec);
657 		assert(msec);
658 		if (warnings)
659 			if (strcasecmp(msec, of->sec))
660 				fprintf(stderr, "%s: "
661 					"section \"%s\" manual "
662 					"in \"%s\" directory\n",
663 					fn, msec, of->sec);
664 
665 		/*
666 		 * Manual page directories exist for each kernel
667 		 * architecture as returned by machine(1).
668 		 * However, many manuals only depend on the
669 		 * application architecture as returned by arch(1).
670 		 * For example, some (2/ARM) manuals are shared
671 		 * across the "armish" and "zaurus" kernel
672 		 * architectures.
673 		 * A few manuals are even shared across completely
674 		 * different architectures, for example fdformat(1)
675 		 * on amd64, i386, sparc, and sparc64.
676 		 * Thus, warn about architecture mismatches,
677 		 * but don't skip manuals for this reason.
678 		 */
679 
680 		assert(of->arch);
681 		assert(march);
682 		if (warnings)
683 			if (strcasecmp(march, of->arch))
684 				fprintf(stderr, "%s: "
685 					"architecture \"%s\" manual "
686 					"in \"%s\" directory\n",
687 					fn, march, of->arch);
688 
689 		/*
690 		 * By default, skip a file if the title given
691 		 * in the file disagrees with the file name.
692 		 * Do not warn, this happens for all MLINKs.
693 		 */
694 
695 		assert(of->title);
696 		assert(mtitle);
697 		if (strcasecmp(mtitle, of->title))
698 			skip = 1;
699 
700 		/*
701 		 * Build a title string for the file.  If it matches
702 		 * the location of the file, remember the title as
703 		 * found; else, remember it as missing.
704 		 */
705 
706 		if (warnings) {
707 			buf->len = 0;
708 			buf_appendb(buf, mtitle, strlen(mtitle));
709 			buf_appendb(buf, "(", 1);
710 			buf_appendb(buf, msec, strlen(msec));
711 			if ('\0' != *march) {
712 				buf_appendb(buf, "/", 1);
713 				buf_appendb(buf, march, strlen(march));
714 			}
715 			buf_appendb(buf, ")", 2);
716 			for (p = buf->cp; '\0' != *p; p++)
717 				*p = tolower(*p);
718 			key.data = buf->cp;
719 			key.size = buf->len;
720 			val.data = NULL;
721 			val.size = 0;
722 			if (0 == skip)
723 				val.data = "";
724 			else {
725 				ch = (*files->get)(files, &key, &val, 0);
726 				if (ch < 0) {
727 					perror("hash");
728 					exit((int)MANDOCLEVEL_SYSERR);
729 				} else if (ch > 0) {
730 					val.data = (void *)fn;
731 					val.size = strlen(fn) + 1;
732 				} else
733 					val.data = NULL;
734 			}
735 			if (NULL != val.data &&
736 			    (*files->put)(files, &key, &val, 0) < 0) {
737 				perror("hash");
738 				exit((int)MANDOCLEVEL_SYSERR);
739 			}
740 		}
741 
742 		if (skip && !use_all)
743 			continue;
744 
745 		/*
746 		 * The index record value consists of a nil-terminated
747 		 * filename, a nil-terminated manual section, and a
748 		 * nil-terminated description.  Use the actual
749 		 * location of the file, such that the user can find
750 		 * it with man(1).  Since the description may not be
751 		 * set, we set a sentinel to see if we're going to
752 		 * write a nil byte in its place.
753 		 */
754 
755 		dbuf->len = 0;
756 		type = mdoc ? 'd' : (man ? 'a' : 'c');
757 		buf_appendb(dbuf, &type, 1);
758 		buf_appendb(dbuf, fn, strlen(fn) + 1);
759 		buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
760 		buf_appendb(dbuf, of->title, strlen(of->title) + 1);
761 		buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
762 
763 		sv = dbuf->len;
764 
765 		/*
766 		 * Collect keyword/mask pairs.
767 		 * Each pair will become a new btree node.
768 		 */
769 
770 		hash_reset(&hash);
771 		if (mdoc)
772 			pmdoc_node(hash, buf, dbuf,
773 				mdoc_node(mdoc), mdoc_meta(mdoc));
774 		else if (man)
775 			pman_node(hash, buf, dbuf, man_node(man));
776 		else
777 			pformatted(hash, buf, dbuf, of);
778 
779 		/* Test mode, do not access any database. */
780 
781 		if (NULL == mdb->db || NULL == mdb->idx)
782 			continue;
783 
784 		/*
785 		 * Make sure the file name is always registered
786 		 * as an .Nm search key.
787 		 */
788 		buf->len = 0;
789 		buf_append(buf, of->title);
790 		hash_put(hash, buf, TYPE_Nm);
791 
792 		/*
793 		 * Reclaim an empty index record, if available.
794 		 * Use its record number for all new btree nodes.
795 		 */
796 
797 		if (recs->cur > 0) {
798 			recs->cur--;
799 			rec = recs->stack[(int)recs->cur];
800 		} else if (recs->last > 0) {
801 			rec = recs->last;
802 			recs->last = 0;
803 		} else
804 			rec++;
805 		vbuf[1] = htobe64(rec);
806 
807 		/*
808 		 * Copy from the in-memory hashtable of pending
809 		 * keyword/mask pairs into the database.
810 		 */
811 
812 		seq = R_FIRST;
813 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
814 			seq = R_NEXT;
815 			assert(sizeof(uint64_t) == val.size);
816 			memcpy(&mask, val.data, val.size);
817 			vbuf[0] = htobe64(mask);
818 			val.size = sizeof(vbuf);
819 			val.data = &vbuf;
820 			dbt_put(mdb->db, mdb->dbn, &key, &val);
821 		}
822 		if (ch < 0) {
823 			perror("hash");
824 			unlink(mdb->dbn);
825 			unlink(mdb->idxn);
826 			exit((int)MANDOCLEVEL_SYSERR);
827 		}
828 
829 		/*
830 		 * Apply to the index.  If we haven't had a description
831 		 * set, put an empty one in now.
832 		 */
833 
834 		if (dbuf->len == sv)
835 			buf_appendb(dbuf, "", 1);
836 
837 		key.data = &rec;
838 		key.size = sizeof(recno_t);
839 
840 		val.data = dbuf->cp;
841 		val.size = dbuf->len;
842 
843 		if (verb)
844 			printf("%s: adding to index\n", fn);
845 
846 		dbt_put(mdb->idx, mdb->idxn, &key, &val);
847 	}
848 
849 	/*
850 	 * Iterate the remembered file titles and check that
851 	 * all files can be found by their main title.
852 	 */
853 
854 	if (warnings) {
855 		seq = R_FIRST;
856 		while (0 == (*files->seq)(files, &key, &val, seq)) {
857 			seq = R_NEXT;
858 			if (val.size)
859 				fprintf(stderr, "%s: probably "
860 				    "unreachable, title is %s\n",
861 				    (char *)val.data, (char *)key.data);
862 		}
863 		(*files->close)(files);
864 	}
865 }
866 
867 /*
868  * Scan through all entries in the index file `idx' and prune those
869  * entries in `ofile'.
870  * Pruning consists of removing from `db', then invalidating the entry
871  * in `idx' (zeroing its value size).
872  */
873 static void
874 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
875 {
876 	const struct of	*of;
877 	const char	*fn;
878 	uint64_t	 vbuf[2];
879 	unsigned	 seq, sseq;
880 	DBT		 key, val;
881 	int		 ch;
882 
883 	recs->cur = 0;
884 	seq = R_FIRST;
885 	while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
886 		seq = R_NEXT;
887 		assert(sizeof(recno_t) == key.size);
888 		memcpy(&recs->last, key.data, key.size);
889 
890 		/* Deleted records are zero-sized.  Skip them. */
891 
892 		if (0 == val.size)
893 			goto cont;
894 
895 		/*
896 		 * Make sure we're sane.
897 		 * Read past our mdoc/man/cat type to the next string,
898 		 * then make sure it's bounded by a NUL.
899 		 * Failing any of these, we go into our error handler.
900 		 */
901 
902 		fn = (char *)val.data + 1;
903 		if (NULL == memchr(fn, '\0', val.size - 1))
904 			break;
905 
906 		/*
907 		 * Search for the file in those we care about.
908 		 * XXX: build this into a tree.  Too slow.
909 		 */
910 
911 		for (of = ofile->first; of; of = of->next)
912 			if (0 == strcmp(fn, of->fname))
913 				break;
914 
915 		if (NULL == of)
916 			continue;
917 
918 		/*
919 		 * Search through the keyword database, throwing out all
920 		 * references to our file.
921 		 */
922 
923 		sseq = R_FIRST;
924 		while (0 == (ch = (*mdb->db->seq)(mdb->db,
925 					&key, &val, sseq))) {
926 			sseq = R_NEXT;
927 			if (sizeof(vbuf) != val.size)
928 				break;
929 
930 			memcpy(vbuf, val.data, val.size);
931 			if (recs->last != betoh64(vbuf[1]))
932 				continue;
933 
934 			if ((ch = (*mdb->db->del)(mdb->db,
935 					&key, R_CURSOR)) < 0)
936 				break;
937 		}
938 
939 		if (ch < 0) {
940 			perror(mdb->dbn);
941 			exit((int)MANDOCLEVEL_SYSERR);
942 		} else if (1 != ch) {
943 			fprintf(stderr, "%s: corrupt database\n",
944 					mdb->dbn);
945 			exit((int)MANDOCLEVEL_SYSERR);
946 		}
947 
948 		if (verb)
949 			printf("%s: deleting from index\n", fn);
950 
951 		val.size = 0;
952 		ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
953 
954 		if (ch < 0)
955 			break;
956 cont:
957 		if (recs->cur >= recs->size) {
958 			recs->size += MANDOC_SLOP;
959 			recs->stack = mandoc_realloc(recs->stack,
960 					recs->size * sizeof(recno_t));
961 		}
962 
963 		recs->stack[(int)recs->cur] = recs->last;
964 		recs->cur++;
965 	}
966 
967 	if (ch < 0) {
968 		perror(mdb->idxn);
969 		exit((int)MANDOCLEVEL_SYSERR);
970 	} else if (1 != ch) {
971 		fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
972 		exit((int)MANDOCLEVEL_SYSERR);
973 	}
974 
975 	recs->last++;
976 }
977 
978 /*
979  * Grow the buffer (if necessary) and copy in a binary string.
980  */
981 static void
982 buf_appendb(struct buf *buf, const void *cp, size_t sz)
983 {
984 
985 	/* Overshoot by MANDOC_BUFSZ. */
986 
987 	while (buf->len + sz >= buf->size) {
988 		buf->size = buf->len + sz + MANDOC_BUFSZ;
989 		buf->cp = mandoc_realloc(buf->cp, buf->size);
990 	}
991 
992 	memcpy(buf->cp + (int)buf->len, cp, sz);
993 	buf->len += sz;
994 }
995 
996 /*
997  * Append a nil-terminated string to the buffer.
998  * This can be invoked multiple times.
999  * The buffer string will be nil-terminated.
1000  * If invoked multiple times, a space is put between strings.
1001  */
1002 static void
1003 buf_append(struct buf *buf, const char *cp)
1004 {
1005 	size_t		 sz;
1006 
1007 	if (0 == (sz = strlen(cp)))
1008 		return;
1009 
1010 	if (buf->len)
1011 		buf->cp[(int)buf->len - 1] = ' ';
1012 
1013 	buf_appendb(buf, cp, sz + 1);
1014 }
1015 
1016 /*
1017  * Recursively add all text from a given node.
1018  * This is optimised for general mdoc nodes in this context, which do
1019  * not consist of subexpressions and having a recursive call for n->next
1020  * would be wasteful.
1021  * The "f" variable should be 0 unless called from pmdoc_Nd for the
1022  * description buffer, which does not start at the beginning of the
1023  * buffer.
1024  */
1025 static void
1026 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
1027 {
1028 
1029 	for ( ; n; n = n->next) {
1030 		if (n->child)
1031 			buf_appendmdoc(buf, n->child, f);
1032 
1033 		if (MDOC_TEXT == n->type && f) {
1034 			f = 0;
1035 			buf_appendb(buf, n->string,
1036 					strlen(n->string) + 1);
1037 		} else if (MDOC_TEXT == n->type)
1038 			buf_append(buf, n->string);
1039 
1040 	}
1041 }
1042 
1043 static void
1044 hash_reset(DB **db)
1045 {
1046 	DB		*hash;
1047 
1048 	if (NULL != (hash = *db))
1049 		(*hash->close)(hash);
1050 
1051 	*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1052 	if (NULL == *db) {
1053 		perror("hash");
1054 		exit((int)MANDOCLEVEL_SYSERR);
1055 	}
1056 }
1057 
1058 /* ARGSUSED */
1059 static int
1060 pmdoc_head(MDOC_ARGS)
1061 {
1062 
1063 	return(MDOC_HEAD == n->type);
1064 }
1065 
1066 /* ARGSUSED */
1067 static int
1068 pmdoc_body(MDOC_ARGS)
1069 {
1070 
1071 	return(MDOC_BODY == n->type);
1072 }
1073 
1074 /* ARGSUSED */
1075 static int
1076 pmdoc_Fd(MDOC_ARGS)
1077 {
1078 	const char	*start, *end;
1079 	size_t		 sz;
1080 
1081 	if (SEC_SYNOPSIS != n->sec)
1082 		return(0);
1083 	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
1084 		return(0);
1085 
1086 	/*
1087 	 * Only consider those `Fd' macro fields that begin with an
1088 	 * "inclusion" token (versus, e.g., #define).
1089 	 */
1090 	if (strcmp("#include", n->string))
1091 		return(0);
1092 
1093 	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1094 		return(0);
1095 
1096 	/*
1097 	 * Strip away the enclosing angle brackets and make sure we're
1098 	 * not zero-length.
1099 	 */
1100 
1101 	start = n->string;
1102 	if ('<' == *start || '"' == *start)
1103 		start++;
1104 
1105 	if (0 == (sz = strlen(start)))
1106 		return(0);
1107 
1108 	end = &start[(int)sz - 1];
1109 	if ('>' == *end || '"' == *end)
1110 		end--;
1111 
1112 	assert(end >= start);
1113 
1114 	buf_appendb(buf, start, (size_t)(end - start + 1));
1115 	buf_appendb(buf, "", 1);
1116 	return(1);
1117 }
1118 
1119 /* ARGSUSED */
1120 static int
1121 pmdoc_In(MDOC_ARGS)
1122 {
1123 
1124 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1125 		return(0);
1126 
1127 	buf_append(buf, n->child->string);
1128 	return(1);
1129 }
1130 
1131 /* ARGSUSED */
1132 static int
1133 pmdoc_Fn(MDOC_ARGS)
1134 {
1135 	struct mdoc_node *nn;
1136 	const char	*cp;
1137 
1138 	nn = n->child;
1139 
1140 	if (NULL == nn || MDOC_TEXT != nn->type)
1141 		return(0);
1142 
1143 	/* .Fn "struct type *name" "char *arg" */
1144 
1145 	cp = strrchr(nn->string, ' ');
1146 	if (NULL == cp)
1147 		cp = nn->string;
1148 
1149 	/* Strip away pointer symbol. */
1150 
1151 	while ('*' == *cp)
1152 		cp++;
1153 
1154 	/* Store the function name. */
1155 
1156 	buf_append(buf, cp);
1157 	hash_put(hash, buf, TYPE_Fn);
1158 
1159 	/* Store the function type. */
1160 
1161 	if (nn->string < cp) {
1162 		buf->len = 0;
1163 		buf_appendb(buf, nn->string, cp - nn->string);
1164 		buf_appendb(buf, "", 1);
1165 		hash_put(hash, buf, TYPE_Ft);
1166 	}
1167 
1168 	/* Store the arguments. */
1169 
1170 	for (nn = nn->next; nn; nn = nn->next) {
1171 		if (MDOC_TEXT != nn->type)
1172 			continue;
1173 		buf->len = 0;
1174 		buf_append(buf, nn->string);
1175 		hash_put(hash, buf, TYPE_Fa);
1176 	}
1177 
1178 	return(0);
1179 }
1180 
1181 /* ARGSUSED */
1182 static int
1183 pmdoc_St(MDOC_ARGS)
1184 {
1185 
1186 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1187 		return(0);
1188 
1189 	buf_append(buf, n->child->string);
1190 	return(1);
1191 }
1192 
1193 /* ARGSUSED */
1194 static int
1195 pmdoc_Xr(MDOC_ARGS)
1196 {
1197 
1198 	if (NULL == (n = n->child))
1199 		return(0);
1200 
1201 	buf_appendb(buf, n->string, strlen(n->string));
1202 
1203 	if (NULL != (n = n->next)) {
1204 		buf_appendb(buf, ".", 1);
1205 		buf_appendb(buf, n->string, strlen(n->string) + 1);
1206 	} else
1207 		buf_appendb(buf, ".", 2);
1208 
1209 	return(1);
1210 }
1211 
1212 /* ARGSUSED */
1213 static int
1214 pmdoc_Nd(MDOC_ARGS)
1215 {
1216 
1217 	if (MDOC_BODY != n->type)
1218 		return(0);
1219 
1220 	buf_appendmdoc(dbuf, n->child, 1);
1221 	return(1);
1222 }
1223 
1224 /* ARGSUSED */
1225 static int
1226 pmdoc_Nm(MDOC_ARGS)
1227 {
1228 
1229 	if (SEC_NAME == n->sec)
1230 		return(1);
1231 	else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1232 		return(0);
1233 
1234 	if (NULL == n->child)
1235 		buf_append(buf, m->name);
1236 
1237 	return(1);
1238 }
1239 
1240 /* ARGSUSED */
1241 static int
1242 pmdoc_Sh(MDOC_ARGS)
1243 {
1244 
1245 	return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1246 }
1247 
1248 static void
1249 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1250 {
1251 	uint64_t	 oldmask;
1252 	DBT		 key, val;
1253 	int		 rc;
1254 
1255 	if (buf->len < 2)
1256 		return;
1257 
1258 	key.data = buf->cp;
1259 	key.size = buf->len;
1260 
1261 	if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1262 		perror("hash");
1263 		exit((int)MANDOCLEVEL_SYSERR);
1264 	} else if (0 == rc) {
1265 		assert(sizeof(uint64_t) == val.size);
1266 		memcpy(&oldmask, val.data, val.size);
1267 		mask |= oldmask;
1268 	}
1269 
1270 	val.data = &mask;
1271 	val.size = sizeof(uint64_t);
1272 
1273 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1274 		perror("hash");
1275 		exit((int)MANDOCLEVEL_SYSERR);
1276 	}
1277 }
1278 
1279 static void
1280 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1281 {
1282 
1283 	assert(key->size);
1284 	assert(val->size);
1285 
1286 	if (0 == (*db->put)(db, key, val, 0))
1287 		return;
1288 
1289 	perror(dbn);
1290 	exit((int)MANDOCLEVEL_SYSERR);
1291 	/* NOTREACHED */
1292 }
1293 
1294 /*
1295  * Call out to per-macro handlers after clearing the persistent database
1296  * key.  If the macro sets the database key, flush it to the database.
1297  */
1298 static void
1299 pmdoc_node(MDOC_ARGS)
1300 {
1301 
1302 	if (NULL == n)
1303 		return;
1304 
1305 	switch (n->type) {
1306 	case (MDOC_HEAD):
1307 		/* FALLTHROUGH */
1308 	case (MDOC_BODY):
1309 		/* FALLTHROUGH */
1310 	case (MDOC_TAIL):
1311 		/* FALLTHROUGH */
1312 	case (MDOC_BLOCK):
1313 		/* FALLTHROUGH */
1314 	case (MDOC_ELEM):
1315 		buf->len = 0;
1316 
1317 		/*
1318 		 * Both NULL handlers and handlers returning true
1319 		 * request using the data.  Only skip the element
1320 		 * when the handler returns false.
1321 		 */
1322 
1323 		if (NULL != mdocs[n->tok].fp &&
1324 		    0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1325 			break;
1326 
1327 		/*
1328 		 * For many macros, use the text from all children.
1329 		 * Set zero flags for macros not needing this.
1330 		 * In that case, the handler must fill the buffer.
1331 		 */
1332 
1333 		if (MDOCF_CHILD & mdocs[n->tok].flags)
1334 			buf_appendmdoc(buf, n->child, 0);
1335 
1336 		/*
1337 		 * Cover the most common case:
1338 		 * Automatically stage one string per element.
1339 		 * Set a zero mask for macros not needing this.
1340 		 * Additional staging can be done in the handler.
1341 		 */
1342 
1343 		if (mdocs[n->tok].mask)
1344 			hash_put(hash, buf, mdocs[n->tok].mask);
1345 		break;
1346 	default:
1347 		break;
1348 	}
1349 
1350 	pmdoc_node(hash, buf, dbuf, n->child, m);
1351 	pmdoc_node(hash, buf, dbuf, n->next, m);
1352 }
1353 
1354 static int
1355 pman_node(MAN_ARGS)
1356 {
1357 	const struct man_node *head, *body;
1358 	char		*start, *sv, *title;
1359 	size_t		 sz, titlesz;
1360 
1361 	if (NULL == n)
1362 		return(0);
1363 
1364 	/*
1365 	 * We're only searching for one thing: the first text child in
1366 	 * the BODY of a NAME section.  Since we don't keep track of
1367 	 * sections in -man, run some hoops to find out whether we're in
1368 	 * the correct section or not.
1369 	 */
1370 
1371 	if (MAN_BODY == n->type && MAN_SH == n->tok) {
1372 		body = n;
1373 		assert(body->parent);
1374 		if (NULL != (head = body->parent->head) &&
1375 				1 == head->nchild &&
1376 				NULL != (head = (head->child)) &&
1377 				MAN_TEXT == head->type &&
1378 				0 == strcmp(head->string, "NAME") &&
1379 				NULL != (body = body->child) &&
1380 				MAN_TEXT == body->type) {
1381 
1382 			title = NULL;
1383 			titlesz = 0;
1384 			/*
1385 			 * Suck the entire NAME section into memory.
1386 			 * Yes, we might run away.
1387 			 * But too many manuals have big, spread-out
1388 			 * NAME sections over many lines.
1389 			 */
1390 			for ( ; NULL != body; body = body->next) {
1391 				if (MAN_TEXT != body->type)
1392 					break;
1393 				if (0 == (sz = strlen(body->string)))
1394 					continue;
1395 				title = mandoc_realloc
1396 					(title, titlesz + sz + 1);
1397 				memcpy(title + titlesz, body->string, sz);
1398 				titlesz += sz + 1;
1399 				title[(int)titlesz - 1] = ' ';
1400 			}
1401 			if (NULL == title)
1402 				return(0);
1403 
1404 			title = mandoc_realloc(title, titlesz + 1);
1405 			title[(int)titlesz] = '\0';
1406 
1407 			/* Skip leading space.  */
1408 
1409 			sv = title;
1410 			while (isspace((unsigned char)*sv))
1411 				sv++;
1412 
1413 			if (0 == (sz = strlen(sv))) {
1414 				free(title);
1415 				return(0);
1416 			}
1417 
1418 			/* Erase trailing space. */
1419 
1420 			start = &sv[sz - 1];
1421 			while (start > sv && isspace((unsigned char)*start))
1422 				*start-- = '\0';
1423 
1424 			if (start == sv) {
1425 				free(title);
1426 				return(0);
1427 			}
1428 
1429 			start = sv;
1430 
1431 			/*
1432 			 * Go through a special heuristic dance here.
1433 			 * This is why -man manuals are great!
1434 			 * (I'm being sarcastic: my eyes are bleeding.)
1435 			 * Conventionally, one or more manual names are
1436 			 * comma-specified prior to a whitespace, then a
1437 			 * dash, then a description.  Try to puzzle out
1438 			 * the name parts here.
1439 			 */
1440 
1441 			for ( ;; ) {
1442 				sz = strcspn(start, " ,");
1443 				if ('\0' == start[(int)sz])
1444 					break;
1445 
1446 				buf->len = 0;
1447 				buf_appendb(buf, start, sz);
1448 				buf_appendb(buf, "", 1);
1449 
1450 				hash_put(hash, buf, TYPE_Nm);
1451 
1452 				if (' ' == start[(int)sz]) {
1453 					start += (int)sz + 1;
1454 					break;
1455 				}
1456 
1457 				assert(',' == start[(int)sz]);
1458 				start += (int)sz + 1;
1459 				while (' ' == *start)
1460 					start++;
1461 			}
1462 
1463 			buf->len = 0;
1464 
1465 			if (sv == start) {
1466 				buf_append(buf, start);
1467 				free(title);
1468 				return(1);
1469 			}
1470 
1471 			while (isspace((unsigned char)*start))
1472 				start++;
1473 
1474 			if (0 == strncmp(start, "-", 1))
1475 				start += 1;
1476 			else if (0 == strncmp(start, "\\-\\-", 4))
1477 				start += 4;
1478 			else if (0 == strncmp(start, "\\-", 2))
1479 				start += 2;
1480 			else if (0 == strncmp(start, "\\(en", 4))
1481 				start += 4;
1482 			else if (0 == strncmp(start, "\\(em", 4))
1483 				start += 4;
1484 
1485 			while (' ' == *start)
1486 				start++;
1487 
1488 			sz = strlen(start) + 1;
1489 			buf_appendb(dbuf, start, sz);
1490 			buf_appendb(buf, start, sz);
1491 
1492 			hash_put(hash, buf, TYPE_Nd);
1493 			free(title);
1494 		}
1495 	}
1496 
1497 	for (n = n->child; n; n = n->next)
1498 		if (pman_node(hash, buf, dbuf, n))
1499 			return(1);
1500 
1501 	return(0);
1502 }
1503 
1504 /*
1505  * Parse a formatted manual page.
1506  * By necessity, this involves rather crude guesswork.
1507  */
1508 static void
1509 pformatted(DB *hash, struct buf *buf,
1510 		struct buf *dbuf, const struct of *of)
1511 {
1512 	FILE		*stream;
1513 	char		*line, *p, *title;
1514 	size_t		 len, plen, titlesz;
1515 
1516 	if (NULL == (stream = fopen(of->fname, "r"))) {
1517 		if (warnings)
1518 			perror(of->fname);
1519 		return;
1520 	}
1521 
1522 	/*
1523 	 * Always use the title derived from the filename up front,
1524 	 * do not even try to find it in the file.  This also makes
1525 	 * sure we don't end up with an orphan index record, even if
1526 	 * the file content turns out to be completely unintelligible.
1527 	 */
1528 
1529 	buf->len = 0;
1530 	buf_append(buf, of->title);
1531 	hash_put(hash, buf, TYPE_Nm);
1532 
1533 	/* Skip to first blank line. */
1534 
1535 	while (NULL != (line = fgetln(stream, &len)))
1536 		if ('\n' == *line)
1537 			break;
1538 
1539 	/*
1540 	 * Assume the first line that is not indented
1541 	 * is the first section header.  Skip to it.
1542 	 */
1543 
1544 	while (NULL != (line = fgetln(stream, &len)))
1545 		if ('\n' != *line && ' ' != *line)
1546 			break;
1547 
1548 	/*
1549 	 * Read up until the next section into a buffer.
1550 	 * Strip the leading and trailing newline from each read line,
1551 	 * appending a trailing space.
1552 	 * Ignore empty (whitespace-only) lines.
1553 	 */
1554 
1555 	titlesz = 0;
1556 	title = NULL;
1557 
1558 	while (NULL != (line = fgetln(stream, &len))) {
1559 		if (' ' != *line || '\n' != line[(int)len - 1])
1560 			break;
1561 		while (len > 0 && isspace((unsigned char)*line)) {
1562 			line++;
1563 			len--;
1564 		}
1565 		if (1 == len)
1566 			continue;
1567 		title = mandoc_realloc(title, titlesz + len);
1568 		memcpy(title + titlesz, line, len);
1569 		titlesz += len;
1570 		title[(int)titlesz - 1] = ' ';
1571 	}
1572 
1573 
1574 	/*
1575 	 * If no page content can be found, or the input line
1576 	 * is already the next section header, or there is no
1577 	 * trailing newline, reuse the page title as the page
1578 	 * description.
1579 	 */
1580 
1581 	if (NULL == title || '\0' == *title) {
1582 		if (warnings)
1583 			fprintf(stderr, "%s: cannot find NAME section\n",
1584 					of->fname);
1585 		buf_appendb(dbuf, buf->cp, buf->size);
1586 		hash_put(hash, buf, TYPE_Nd);
1587 		fclose(stream);
1588 		free(title);
1589 		return;
1590 	}
1591 
1592 	title = mandoc_realloc(title, titlesz + 1);
1593 	title[(int)titlesz] = '\0';
1594 
1595 	/*
1596 	 * Skip to the first dash.
1597 	 * Use the remaining line as the description (no more than 70
1598 	 * bytes).
1599 	 */
1600 
1601 	if (NULL != (p = strstr(title, "- "))) {
1602 		for (p += 2; ' ' == *p || '\b' == *p; p++)
1603 			/* Skip to next word. */ ;
1604 	} else {
1605 		if (warnings)
1606 			fprintf(stderr, "%s: no dash in title line\n",
1607 					of->fname);
1608 		p = title;
1609 	}
1610 
1611 	plen = strlen(p);
1612 
1613 	/* Strip backspace-encoding from line. */
1614 
1615 	while (NULL != (line = memchr(p, '\b', plen))) {
1616 		len = line - p;
1617 		if (0 == len) {
1618 			memmove(line, line + 1, plen--);
1619 			continue;
1620 		}
1621 		memmove(line - 1, line + 1, plen - len);
1622 		plen -= 2;
1623 	}
1624 
1625 	buf_appendb(dbuf, p, plen + 1);
1626 	buf->len = 0;
1627 	buf_appendb(buf, p, plen + 1);
1628 	hash_put(hash, buf, TYPE_Nd);
1629 	fclose(stream);
1630 	free(title);
1631 }
1632 
1633 static void
1634 ofile_argbuild(int argc, char *argv[], struct of **of,
1635 		const char *basedir)
1636 {
1637 	char		 buf[MAXPATHLEN];
1638 	char		 pbuf[PATH_MAX];
1639 	const char	*sec, *arch, *title;
1640 	char		*relpath, *p;
1641 	int		 i, src_form;
1642 	struct of	*nof;
1643 
1644 	for (i = 0; i < argc; i++) {
1645 		if (NULL == (relpath = realpath(argv[i], pbuf))) {
1646 			perror(argv[i]);
1647 			continue;
1648 		}
1649 		if (NULL != basedir) {
1650 			if (strstr(pbuf, basedir) != pbuf) {
1651 				fprintf(stderr, "%s: file outside "
1652 				    "base directory %s\n",
1653 				    pbuf, basedir);
1654 				continue;
1655 			}
1656 			relpath = pbuf + strlen(basedir);
1657 		}
1658 
1659 		/*
1660 		 * Try to infer the manual section, architecture and
1661 		 * page title from the path, assuming it looks like
1662 		 *   man*[/<arch>]/<title>.<section>   or
1663 		 *   cat<section>[/<arch>]/<title>.0
1664 		 */
1665 
1666 		if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) {
1667 			fprintf(stderr, "%s: path too long\n", relpath);
1668 			continue;
1669 		}
1670 		sec = arch = title = "";
1671 		src_form = 0;
1672 		p = strrchr(buf, '\0');
1673 		while (p-- > buf) {
1674 			if ('\0' == *sec && '.' == *p) {
1675 				sec = p + 1;
1676 				*p = '\0';
1677 				if ('0' == *sec)
1678 					src_form |= MANDOC_FORM;
1679 				else if ('1' <= *sec && '9' >= *sec)
1680 					src_form |= MANDOC_SRC;
1681 				continue;
1682 			}
1683 			if ('/' != *p)
1684 				continue;
1685 			if ('\0' == *title) {
1686 				title = p + 1;
1687 				*p = '\0';
1688 				continue;
1689 			}
1690 			if (0 == strncmp("man", p + 1, 3))
1691 				src_form |= MANDOC_SRC;
1692 			else if (0 == strncmp("cat", p + 1, 3))
1693 				src_form |= MANDOC_FORM;
1694 			else
1695 				arch = p + 1;
1696 			break;
1697 		}
1698 		if ('\0' == *title) {
1699 			if (warnings)
1700 				fprintf(stderr,
1701 				    "%s: cannot deduce title "
1702 				    "from filename\n",
1703 				    relpath);
1704 			title = buf;
1705 		}
1706 
1707 		/*
1708 		 * Build the file structure.
1709 		 */
1710 
1711 		nof = mandoc_calloc(1, sizeof(struct of));
1712 		nof->fname = mandoc_strdup(relpath);
1713 		nof->sec = mandoc_strdup(sec);
1714 		nof->arch = mandoc_strdup(arch);
1715 		nof->title = mandoc_strdup(title);
1716 		nof->src_form = src_form;
1717 
1718 		/*
1719 		 * Add the structure to the list.
1720 		 */
1721 
1722 		if (verb > 1)
1723 			printf("%s: scheduling\n", relpath);
1724 		if (NULL == *of) {
1725 			*of = nof;
1726 			(*of)->first = nof;
1727 		} else {
1728 			nof->first = (*of)->first;
1729 			(*of)->next = nof;
1730 			*of = nof;
1731 		}
1732 	}
1733 }
1734 
1735 /*
1736  * Recursively build up a list of files to parse.
1737  * We use this instead of ftw() and so on because I don't want global
1738  * variables hanging around.
1739  * This ignores the mandoc.db and mandoc.index files, but assumes that
1740  * everything else is a manual.
1741  * Pass in a pointer to a NULL structure for the first invocation.
1742  */
1743 static void
1744 ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1745 		int p_src_form, struct of **of)
1746 {
1747 	char		 buf[MAXPATHLEN];
1748 	size_t		 sz;
1749 	DIR		*d;
1750 	const char	*fn, *sec, *arch;
1751 	char		*p, *q, *suffix;
1752 	struct of	*nof;
1753 	struct dirent	*dp;
1754 	int		 src_form;
1755 
1756 	if (NULL == (d = opendir(dir))) {
1757 		if (warnings)
1758 			perror(dir);
1759 		return;
1760 	}
1761 
1762 	while (NULL != (dp = readdir(d))) {
1763 		fn = dp->d_name;
1764 
1765 		if ('.' == *fn)
1766 			continue;
1767 
1768 		src_form = p_src_form;
1769 
1770 		if (DT_DIR == dp->d_type) {
1771 			sec = psec;
1772 			arch = parch;
1773 
1774 			/*
1775 			 * By default, only use directories called:
1776 			 *   man<section>/[<arch>/]   or
1777 			 *   cat<section>/[<arch>/]
1778 			 */
1779 
1780 			if ('\0' == *sec) {
1781 				if(0 == strncmp("man", fn, 3)) {
1782 					src_form |= MANDOC_SRC;
1783 					sec = fn + 3;
1784 				} else if (0 == strncmp("cat", fn, 3)) {
1785 					src_form |= MANDOC_FORM;
1786 					sec = fn + 3;
1787 				} else {
1788 					if (warnings) fprintf(stderr,
1789 					    "%s/%s: bad section\n",
1790 					    dir, fn);
1791 					if (use_all)
1792 						sec = fn;
1793 					else
1794 						continue;
1795 				}
1796 			} else if ('\0' == *arch) {
1797 				if (NULL != strchr(fn, '.')) {
1798 					if (warnings) fprintf(stderr,
1799 					    "%s/%s: bad architecture\n",
1800 					    dir, fn);
1801 					if (0 == use_all)
1802 						continue;
1803 				}
1804 				arch = fn;
1805 			} else {
1806 				if (warnings) fprintf(stderr, "%s/%s: "
1807 				    "excessive subdirectory\n", dir, fn);
1808 				if (0 == use_all)
1809 					continue;
1810 			}
1811 
1812 			buf[0] = '\0';
1813 			strlcat(buf, dir, MAXPATHLEN);
1814 			strlcat(buf, "/", MAXPATHLEN);
1815 			sz = strlcat(buf, fn, MAXPATHLEN);
1816 
1817 			if (MAXPATHLEN <= sz) {
1818 				if (warnings) fprintf(stderr, "%s/%s: "
1819 				    "path too long\n", dir, fn);
1820 				continue;
1821 			}
1822 
1823 			if (verb > 1)
1824 				printf("%s: scanning\n", buf);
1825 
1826 			ofile_dirbuild(buf, sec, arch, src_form, of);
1827 			continue;
1828 		}
1829 
1830 		if (DT_REG != dp->d_type) {
1831 			if (warnings)
1832 				fprintf(stderr,
1833 				    "%s/%s: not a regular file\n",
1834 				    dir, fn);
1835 			continue;
1836 		}
1837 		if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1838 			continue;
1839 		if ('\0' == *psec) {
1840 			if (warnings)
1841 				fprintf(stderr,
1842 				    "%s/%s: file outside section\n",
1843 				    dir, fn);
1844 			if (0 == use_all)
1845 				continue;
1846 		}
1847 
1848 		/*
1849 		 * By default, skip files where the file name suffix
1850 		 * does not agree with the section directory
1851 		 * they are located in.
1852 		 */
1853 
1854 		suffix = strrchr(fn, '.');
1855 		if (NULL == suffix) {
1856 			if (warnings)
1857 				fprintf(stderr,
1858 				    "%s/%s: no filename suffix\n",
1859 				    dir, fn);
1860 			if (0 == use_all)
1861 				continue;
1862 		} else if ((MANDOC_SRC & src_form &&
1863 				strcmp(suffix + 1, psec)) ||
1864 			    (MANDOC_FORM & src_form &&
1865 				strcmp(suffix + 1, "0"))) {
1866 			if (warnings)
1867 				fprintf(stderr,
1868 				    "%s/%s: wrong filename suffix\n",
1869 				    dir, fn);
1870 			if (0 == use_all)
1871 				continue;
1872 			if ('0' == suffix[1])
1873 				src_form |= MANDOC_FORM;
1874 			else if ('1' <= suffix[1] && '9' >= suffix[1])
1875 				src_form |= MANDOC_SRC;
1876 		}
1877 
1878 		/*
1879 		 * Skip formatted manuals if a source version is
1880 		 * available.  Ignore the age: it is very unlikely
1881 		 * that people install newer formatted base manuals
1882 		 * when they used to have source manuals before,
1883 		 * and in ports, old manuals get removed on update.
1884 		 */
1885 		if (0 == use_all && MANDOC_FORM & src_form &&
1886 				'\0' != *psec) {
1887 			buf[0] = '\0';
1888 			strlcat(buf, dir, MAXPATHLEN);
1889 			p = strrchr(buf, '/');
1890 			if ('\0' != *parch && NULL != p)
1891 				for (p--; p > buf; p--)
1892 					if ('/' == *p)
1893 						break;
1894 			if (NULL == p)
1895 				p = buf;
1896 			else
1897 				p++;
1898 			if (0 == strncmp("cat", p, 3))
1899 				memcpy(p, "man", 3);
1900 			strlcat(buf, "/", MAXPATHLEN);
1901 			sz = strlcat(buf, fn, MAXPATHLEN);
1902 			if (sz >= MAXPATHLEN) {
1903 				if (warnings) fprintf(stderr,
1904 				    "%s/%s: path too long\n",
1905 				    dir, fn);
1906 				continue;
1907 			}
1908 			q = strrchr(buf, '.');
1909 			if (NULL != q && p < q++) {
1910 				*q = '\0';
1911 				sz = strlcat(buf, psec, MAXPATHLEN);
1912 				if (sz >= MAXPATHLEN) {
1913 					if (warnings) fprintf(stderr,
1914 					    "%s/%s: path too long\n",
1915 					    dir, fn);
1916 					continue;
1917 				}
1918 				if (0 == access(buf, R_OK))
1919 					continue;
1920 			}
1921 		}
1922 
1923 		buf[0] = '\0';
1924 		assert('.' == dir[0]);
1925 		if ('/' == dir[1]) {
1926 			strlcat(buf, dir + 2, MAXPATHLEN);
1927 			strlcat(buf, "/", MAXPATHLEN);
1928 		}
1929 		sz = strlcat(buf, fn, MAXPATHLEN);
1930 		if (sz >= MAXPATHLEN) {
1931 			if (warnings) fprintf(stderr,
1932 			    "%s/%s: path too long\n", dir, fn);
1933 			continue;
1934 		}
1935 
1936 		nof = mandoc_calloc(1, sizeof(struct of));
1937 		nof->fname = mandoc_strdup(buf);
1938 		nof->sec = mandoc_strdup(psec);
1939 		nof->arch = mandoc_strdup(parch);
1940 		nof->src_form = src_form;
1941 
1942 		/*
1943 		 * Remember the file name without the extension,
1944 		 * to be used as the page title in the database.
1945 		 */
1946 
1947 		if (NULL != suffix)
1948 			*suffix = '\0';
1949 		nof->title = mandoc_strdup(fn);
1950 
1951 		/*
1952 		 * Add the structure to the list.
1953 		 */
1954 
1955 		if (verb > 1)
1956 			printf("%s: scheduling\n", buf);
1957 
1958 		if (NULL == *of) {
1959 			*of = nof;
1960 			(*of)->first = nof;
1961 		} else {
1962 			nof->first = (*of)->first;
1963 			(*of)->next = nof;
1964 			*of = nof;
1965 		}
1966 	}
1967 
1968 	closedir(d);
1969 }
1970 
1971 static void
1972 ofile_free(struct of *of)
1973 {
1974 	struct of	*nof;
1975 
1976 	if (NULL != of)
1977 		of = of->first;
1978 
1979 	while (NULL != of) {
1980 		nof = of->next;
1981 		free(of->fname);
1982 		free(of->sec);
1983 		free(of->arch);
1984 		free(of->title);
1985 		free(of);
1986 		of = nof;
1987 	}
1988 }
1989