xref: /openbsd-src/usr.bin/mandoc/mandocdb.c (revision be38755c412cc72cb8d40f51ea70c9893196afff)
1 /*	$Id: mandocdb.c,v 1.2 2011/09/17 13:45:28 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/param.h>
18 
19 #include <assert.h>
20 #include <dirent.h>
21 #include <fcntl.h>
22 #include <getopt.h>
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <db.h>
28 
29 #include "man.h"
30 #include "mdoc.h"
31 #include "mandoc.h"
32 
33 #define	MANDOC_DB	 "mandoc.db"
34 #define	MANDOC_IDX	 "mandoc.index"
35 #define	MANDOC_BUFSZ	  BUFSIZ
36 #define	MANDOC_SLOP	  1024
37 
38 /* Bit-fields.  See mandocdb.8. */
39 
40 #define TYPE_NAME	  0x01
41 #define TYPE_FUNCTION	  0x02
42 #define TYPE_UTILITY	  0x04
43 #define TYPE_INCLUDES	  0x08
44 #define TYPE_VARIABLE	  0x10
45 #define TYPE_STANDARD	  0x20
46 #define TYPE_AUTHOR	  0x40
47 #define TYPE_CONFIG	  0x80
48 #define TYPE_DESC	  0x100
49 #define TYPE_XREF	  0x200
50 #define TYPE_PATH	  0x400
51 #define TYPE_ENV	  0x800
52 #define TYPE_ERR	  0x1000
53 
54 /* Tiny list for files.  No need to bring in QUEUE. */
55 
56 struct	of {
57 	char		 *fname; /* heap-allocated */
58 	struct of	 *next; /* NULL for last one */
59 	struct of	 *first; /* first in list */
60 };
61 
62 /* Buffer for storing growable data. */
63 
64 struct	buf {
65 	char		 *cp;
66 	size_t		  len; /* current length */
67 	size_t		  size; /* total buffer size */
68 };
69 
70 /* Operation we're going to perform. */
71 
72 enum	op {
73 	OP_NEW = 0, /* new database */
74 	OP_UPDATE, /* delete/add entries in existing database */
75 	OP_DELETE /* delete entries from existing database */
76 };
77 
78 #define	MAN_ARGS	  DB *hash, \
79 			  struct buf *buf, \
80 			  struct buf *dbuf, \
81 			  const struct man_node *n
82 #define	MDOC_ARGS	  DB *hash, \
83 			  struct buf *buf, \
84 			  struct buf *dbuf, \
85 			  const struct mdoc_node *n, \
86 			  const struct mdoc_meta *m
87 
88 static	void		  buf_appendmdoc(struct buf *,
89 				const struct mdoc_node *, int);
90 static	void		  buf_append(struct buf *, const char *);
91 static	void		  buf_appendb(struct buf *,
92 				const void *, size_t);
93 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
94 static	void		  hash_put(DB *, const struct buf *, int);
95 static	void		  hash_reset(DB **);
96 static	void		  index_merge(const struct of *, struct mparse *,
97 				struct buf *, struct buf *,
98 				DB *, DB *, const char *,
99 				DB *, const char *, int,
100 				recno_t, const recno_t *, size_t);
101 static	void		  index_prune(const struct of *, DB *,
102 				const char *, DB *, const char *,
103 				int, recno_t *, recno_t **, size_t *);
104 static	void		  ofile_argbuild(char *[], int, int, struct of **);
105 static	int		  ofile_dirbuild(const char *, int, struct of **);
106 static	void		  ofile_free(struct of *);
107 static	int		  pman_node(MAN_ARGS);
108 static	void		  pmdoc_node(MDOC_ARGS);
109 static	void		  pmdoc_An(MDOC_ARGS);
110 static	void		  pmdoc_Cd(MDOC_ARGS);
111 static	void		  pmdoc_Er(MDOC_ARGS);
112 static	void		  pmdoc_Ev(MDOC_ARGS);
113 static	void		  pmdoc_Fd(MDOC_ARGS);
114 static	void		  pmdoc_In(MDOC_ARGS);
115 static	void		  pmdoc_Fn(MDOC_ARGS);
116 static	void		  pmdoc_Fo(MDOC_ARGS);
117 static	void		  pmdoc_Nd(MDOC_ARGS);
118 static	void		  pmdoc_Nm(MDOC_ARGS);
119 static	void		  pmdoc_Pa(MDOC_ARGS);
120 static	void		  pmdoc_St(MDOC_ARGS);
121 static	void		  pmdoc_Vt(MDOC_ARGS);
122 static	void		  pmdoc_Xr(MDOC_ARGS);
123 static	void		  usage(void);
124 
125 typedef	void		(*pmdoc_nf)(MDOC_ARGS);
126 
127 static	const pmdoc_nf	  mdocs[MDOC_MAX] = {
128 	NULL, /* Ap */
129 	NULL, /* Dd */
130 	NULL, /* Dt */
131 	NULL, /* Os */
132 	NULL, /* Sh */
133 	NULL, /* Ss */
134 	NULL, /* Pp */
135 	NULL, /* D1 */
136 	NULL, /* Dl */
137 	NULL, /* Bd */
138 	NULL, /* Ed */
139 	NULL, /* Bl */
140 	NULL, /* El */
141 	NULL, /* It */
142 	NULL, /* Ad */
143 	pmdoc_An, /* An */
144 	NULL, /* Ar */
145 	pmdoc_Cd, /* Cd */
146 	NULL, /* Cm */
147 	NULL, /* Dv */
148 	pmdoc_Er, /* Er */
149 	pmdoc_Ev, /* Ev */
150 	NULL, /* Ex */
151 	NULL, /* Fa */
152 	pmdoc_Fd, /* Fd */
153 	NULL, /* Fl */
154 	pmdoc_Fn, /* Fn */
155 	NULL, /* Ft */
156 	NULL, /* Ic */
157 	pmdoc_In, /* In */
158 	NULL, /* Li */
159 	pmdoc_Nd, /* Nd */
160 	pmdoc_Nm, /* Nm */
161 	NULL, /* Op */
162 	NULL, /* Ot */
163 	pmdoc_Pa, /* Pa */
164 	NULL, /* Rv */
165 	pmdoc_St, /* St */
166 	pmdoc_Vt, /* Va */
167 	pmdoc_Vt, /* Vt */
168 	pmdoc_Xr, /* Xr */
169 	NULL, /* %A */
170 	NULL, /* %B */
171 	NULL, /* %D */
172 	NULL, /* %I */
173 	NULL, /* %J */
174 	NULL, /* %N */
175 	NULL, /* %O */
176 	NULL, /* %P */
177 	NULL, /* %R */
178 	NULL, /* %T */
179 	NULL, /* %V */
180 	NULL, /* Ac */
181 	NULL, /* Ao */
182 	NULL, /* Aq */
183 	NULL, /* At */
184 	NULL, /* Bc */
185 	NULL, /* Bf */
186 	NULL, /* Bo */
187 	NULL, /* Bq */
188 	NULL, /* Bsx */
189 	NULL, /* Bx */
190 	NULL, /* Db */
191 	NULL, /* Dc */
192 	NULL, /* Do */
193 	NULL, /* Dq */
194 	NULL, /* Ec */
195 	NULL, /* Ef */
196 	NULL, /* Em */
197 	NULL, /* Eo */
198 	NULL, /* Fx */
199 	NULL, /* Ms */
200 	NULL, /* No */
201 	NULL, /* Ns */
202 	NULL, /* Nx */
203 	NULL, /* Ox */
204 	NULL, /* Pc */
205 	NULL, /* Pf */
206 	NULL, /* Po */
207 	NULL, /* Pq */
208 	NULL, /* Qc */
209 	NULL, /* Ql */
210 	NULL, /* Qo */
211 	NULL, /* Qq */
212 	NULL, /* Re */
213 	NULL, /* Rs */
214 	NULL, /* Sc */
215 	NULL, /* So */
216 	NULL, /* Sq */
217 	NULL, /* Sm */
218 	NULL, /* Sx */
219 	NULL, /* Sy */
220 	NULL, /* Tn */
221 	NULL, /* Ux */
222 	NULL, /* Xc */
223 	NULL, /* Xo */
224 	pmdoc_Fo, /* Fo */
225 	NULL, /* Fc */
226 	NULL, /* Oo */
227 	NULL, /* Oc */
228 	NULL, /* Bk */
229 	NULL, /* Ek */
230 	NULL, /* Bt */
231 	NULL, /* Hf */
232 	NULL, /* Fr */
233 	NULL, /* Ud */
234 	NULL, /* Lb */
235 	NULL, /* Lp */
236 	NULL, /* Lk */
237 	NULL, /* Mt */
238 	NULL, /* Brq */
239 	NULL, /* Bro */
240 	NULL, /* Brc */
241 	NULL, /* %C */
242 	NULL, /* Es */
243 	NULL, /* En */
244 	NULL, /* Dx */
245 	NULL, /* %Q */
246 	NULL, /* br */
247 	NULL, /* sp */
248 	NULL, /* %U */
249 	NULL, /* Ta */
250 };
251 
252 static	const char	 *progname;
253 
254 int
255 main(int argc, char *argv[])
256 {
257 	struct mparse	*mp; /* parse sequence */
258 	enum op		 op; /* current operation */
259 	const char	*dir;
260 	char		 ibuf[MAXPATHLEN], /* index fname */
261 			 fbuf[MAXPATHLEN];  /* btree fname */
262 	int		 verb, /* output verbosity */
263 			 ch, i, flags;
264 	DB		*idx, /* index database */
265 			*db, /* keyword database */
266 			*hash; /* temporary keyword hashtable */
267 	BTREEINFO	 info; /* btree configuration */
268 	recno_t		 maxrec; /* supremum of all records */
269 	recno_t		*recs; /* buffer of empty records */
270 	size_t		 sz1, sz2,
271 			 recsz, /* buffer size of recs */
272 			 reccur; /* valid number of recs */
273 	struct buf	 buf, /* keyword buffer */
274 			 dbuf; /* description buffer */
275 	struct of	*of; /* list of files for processing */
276 	extern int	 optind;
277 	extern char	*optarg;
278 
279 	progname = strrchr(argv[0], '/');
280 	if (progname == NULL)
281 		progname = argv[0];
282 	else
283 		++progname;
284 
285 	verb = 0;
286 	of = NULL;
287 	db = idx = NULL;
288 	mp = NULL;
289 	hash = NULL;
290 	recs = NULL;
291 	recsz = reccur = 0;
292 	maxrec = 0;
293 	op = OP_NEW;
294 	dir = NULL;
295 
296 	while (-1 != (ch = getopt(argc, argv, "d:u:v")))
297 		switch (ch) {
298 		case ('d'):
299 			dir = optarg;
300 			op = OP_UPDATE;
301 			break;
302 		case ('u'):
303 			dir = optarg;
304 			op = OP_DELETE;
305 			break;
306 		case ('v'):
307 			verb++;
308 			break;
309 		default:
310 			usage();
311 			return((int)MANDOCLEVEL_BADARG);
312 		}
313 
314 	argc -= optind;
315 	argv += optind;
316 
317 	memset(&info, 0, sizeof(BTREEINFO));
318 	info.flags = R_DUP;
319 
320 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
321 
322 	memset(&buf, 0, sizeof(struct buf));
323 	memset(&dbuf, 0, sizeof(struct buf));
324 
325 	buf.size = dbuf.size = MANDOC_BUFSZ;
326 
327 	buf.cp = mandoc_malloc(buf.size);
328 	dbuf.cp = mandoc_malloc(dbuf.size);
329 
330 	flags = OP_NEW == op ? O_CREAT|O_TRUNC|O_RDWR : O_CREAT|O_RDWR;
331 
332 	if (OP_UPDATE == op || OP_DELETE == op) {
333 		ibuf[0] = fbuf[0] = '\0';
334 
335 		strlcat(fbuf, dir, MAXPATHLEN);
336 		strlcat(fbuf, "/", MAXPATHLEN);
337 		sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
338 
339 		strlcat(ibuf, dir, MAXPATHLEN);
340 		strlcat(ibuf, "/", MAXPATHLEN);
341 		sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
342 
343 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
344 			fprintf(stderr, "%s: Path too long\n", dir);
345 			exit((int)MANDOCLEVEL_BADARG);
346 		}
347 
348 		db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
349 		idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
350 
351 		if (NULL == db) {
352 			perror(fbuf);
353 			exit((int)MANDOCLEVEL_SYSERR);
354 		} else if (NULL == db) {
355 			perror(ibuf);
356 			exit((int)MANDOCLEVEL_SYSERR);
357 		}
358 
359 		if (verb > 2) {
360 			printf("%s: Opened\n", fbuf);
361 			printf("%s: Opened\n", ibuf);
362 		}
363 
364 		ofile_argbuild(argv, argc, verb, &of);
365 		if (NULL == of)
366 			goto out;
367 
368 		of = of->first;
369 
370 		index_prune(of, db, fbuf, idx, ibuf, verb,
371 				&maxrec, &recs, &recsz);
372 
373 		if (OP_UPDATE == op)
374 			index_merge(of, mp, &dbuf, &buf, hash,
375 					db, fbuf, idx, ibuf, verb,
376 					maxrec, recs, reccur);
377 
378 		goto out;
379 	}
380 
381 	for (i = 0; i < argc; i++) {
382 		ibuf[0] = fbuf[0] = '\0';
383 
384 		strlcat(fbuf, argv[i], MAXPATHLEN);
385 		strlcat(fbuf, "/", MAXPATHLEN);
386 		sz1 = strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
387 
388 		strlcat(ibuf, argv[i], MAXPATHLEN);
389 		strlcat(ibuf, "/", MAXPATHLEN);
390 		sz2 = strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
391 
392 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
393 			fprintf(stderr, "%s: Path too long\n", argv[i]);
394 			exit((int)MANDOCLEVEL_BADARG);
395 		}
396 
397 		db = dbopen(fbuf, flags, 0644, DB_BTREE, &info);
398 		idx = dbopen(ibuf, flags, 0644, DB_RECNO, NULL);
399 
400 		if (NULL == db) {
401 			perror(fbuf);
402 			exit((int)MANDOCLEVEL_SYSERR);
403 		} else if (NULL == db) {
404 			perror(ibuf);
405 			exit((int)MANDOCLEVEL_SYSERR);
406 		}
407 
408 		if (verb > 2) {
409 			printf("%s: Truncated\n", fbuf);
410 			printf("%s: Truncated\n", ibuf);
411 		}
412 
413 		ofile_free(of);
414 		of = NULL;
415 
416 		if ( ! ofile_dirbuild(argv[i], verb, &of))
417 			exit((int)MANDOCLEVEL_SYSERR);
418 
419 		if (NULL == of)
420 			continue;
421 
422 		of = of->first;
423 
424 		index_merge(of, mp, &dbuf, &buf, hash, db, fbuf,
425 				idx, ibuf, verb, maxrec, recs, reccur);
426 	}
427 
428 out:
429 	if (db)
430 		(*db->close)(db);
431 	if (idx)
432 		(*idx->close)(idx);
433 	if (hash)
434 		(*hash->close)(hash);
435 	if (mp)
436 		mparse_free(mp);
437 
438 	ofile_free(of);
439 	free(buf.cp);
440 	free(dbuf.cp);
441 	free(recs);
442 
443 	return(MANDOCLEVEL_OK);
444 }
445 
446 void
447 index_merge(const struct of *of, struct mparse *mp,
448 		struct buf *dbuf, struct buf *buf,
449 		DB *hash, DB *db, const char *dbf,
450 		DB *idx, const char *idxf, int verb,
451 		recno_t maxrec, const recno_t *recs, size_t reccur)
452 {
453 	recno_t		 rec;
454 	int		 ch;
455 	DBT		 key, val;
456 	struct mdoc	*mdoc;
457 	struct man	*man;
458 	const char	*fn, *msec, *mtitle, *arch;
459 	size_t		 sv;
460 	unsigned	 seq;
461 	char		 vbuf[8];
462 
463 	for (rec = 0; of; of = of->next) {
464 		fn = of->fname;
465 		if (reccur > 0) {
466 			--reccur;
467 			rec = recs[(int)reccur];
468 		} else if (maxrec > 0) {
469 			rec = maxrec;
470 			maxrec = 0;
471 		} else
472 			rec++;
473 
474 		mparse_reset(mp);
475 		hash_reset(&hash);
476 
477 		if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
478 			fprintf(stderr, "%s: Parse failure\n", fn);
479 			continue;
480 		}
481 
482 		mparse_result(mp, &mdoc, &man);
483 		if (NULL == mdoc && NULL == man)
484 			continue;
485 
486 		msec = NULL != mdoc ?
487 			mdoc_meta(mdoc)->msec : man_meta(man)->msec;
488 		mtitle = NULL != mdoc ?
489 			mdoc_meta(mdoc)->title : man_meta(man)->title;
490 		arch = NULL != mdoc ?
491 			mdoc_meta(mdoc)->arch : NULL;
492 
493 		if (NULL == arch)
494 			arch = "";
495 
496 		/*
497 		 * The index record value consists of a nil-terminated
498 		 * filename, a nil-terminated manual section, and a
499 		 * nil-terminated description.  Since the description
500 		 * may not be set, we set a sentinel to see if we're
501 		 * going to write a nil byte in its place.
502 		 */
503 
504 		dbuf->len = 0;
505 		buf_appendb(dbuf, fn, strlen(fn) + 1);
506 		buf_appendb(dbuf, msec, strlen(msec) + 1);
507 		buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
508 		buf_appendb(dbuf, arch, strlen(arch) + 1);
509 
510 		sv = dbuf->len;
511 
512 		/* Fix the record number in the btree value. */
513 
514 		if (mdoc)
515 			pmdoc_node(hash, buf, dbuf,
516 				mdoc_node(mdoc), mdoc_meta(mdoc));
517 		else
518 			pman_node(hash, buf, dbuf, man_node(man));
519 
520 		/*
521 		 * Copy from the in-memory hashtable of pending keywords
522 		 * into the database.
523 		 */
524 
525 		memset(vbuf, 0, sizeof(uint32_t));
526 		memcpy(vbuf + 4, &rec, sizeof(uint32_t));
527 
528 		seq = R_FIRST;
529 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
530 			seq = R_NEXT;
531 
532 			memcpy(vbuf, val.data, sizeof(uint32_t));
533 			val.size = sizeof(vbuf);
534 			val.data = vbuf;
535 
536 			if (verb > 1)
537 				printf("%s: Added keyword: %s\n",
538 						fn, (char *)key.data);
539 			dbt_put(db, dbf, &key, &val);
540 		}
541 		if (ch < 0) {
542 			perror("hash");
543 			exit((int)MANDOCLEVEL_SYSERR);
544 		}
545 
546 		/*
547 		 * Apply to the index.  If we haven't had a description
548 		 * set, put an empty one in now.
549 		 */
550 
551 		if (dbuf->len == sv)
552 			buf_appendb(dbuf, "", 1);
553 
554 		key.data = &rec;
555 		key.size = sizeof(recno_t);
556 
557 		val.data = dbuf->cp;
558 		val.size = dbuf->len;
559 
560 		if (verb)
561 			printf("%s: Added index\n", fn);
562 		dbt_put(idx, idxf, &key, &val);
563 	}
564 }
565 
566 /*
567  * Scan through all entries in the index file `idx' and prune those
568  * entries in `ofile'.
569  * Pruning consists of removing from `db', then invalidating the entry
570  * in `idx' (zeroing its value size).
571  */
572 static void
573 index_prune(const struct of *ofile, DB *db, const char *dbf,
574 		DB *idx, const char *idxf, int verb,
575 		recno_t *maxrec, recno_t **recs, size_t *recsz)
576 {
577 	const struct of	*of;
578 	const char	*fn;
579 	unsigned	 seq, sseq;
580 	DBT		 key, val;
581 	size_t		 reccur;
582 	int		 ch;
583 
584 	reccur = 0;
585 	seq = R_FIRST;
586 	while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
587 		seq = R_NEXT;
588 		*maxrec = *(recno_t *)key.data;
589 		if (0 == val.size) {
590 			if (reccur >= *recsz) {
591 				*recsz += MANDOC_SLOP;
592 				*recs = mandoc_realloc(*recs,
593 					*recsz * sizeof(recno_t));
594 			}
595 			(*recs)[(int)reccur] = *maxrec;
596 			reccur++;
597 			continue;
598 		}
599 
600 		fn = (char *)val.data;
601 		for (of = ofile; of; of = of->next)
602 			if (0 == strcmp(fn, of->fname))
603 				break;
604 
605 		if (NULL == of)
606 			continue;
607 
608 		sseq = R_FIRST;
609 		while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
610 			sseq = R_NEXT;
611 			assert(8 == val.size);
612 			if (*maxrec != *(recno_t *)(val.data + 4))
613 				continue;
614 			if (verb)
615 				printf("%s: Deleted keyword: %s\n",
616 						fn, (char *)key.data);
617 			ch = (*db->del)(db, &key, R_CURSOR);
618 			if (ch < 0)
619 				break;
620 		}
621 		if (ch < 0) {
622 			perror(dbf);
623 			exit((int)MANDOCLEVEL_SYSERR);
624 		}
625 
626 		if (verb)
627 			printf("%s: Deleted index\n", fn);
628 
629 		val.size = 0;
630 		ch = (*idx->put)(idx, &key, &val, R_CURSOR);
631 		if (ch < 0) {
632 			perror(idxf);
633 			exit((int)MANDOCLEVEL_SYSERR);
634 		}
635 
636 		if (reccur >= *recsz) {
637 			*recsz += MANDOC_SLOP;
638 			*recs = mandoc_realloc
639 				(*recs, *recsz * sizeof(recno_t));
640 		}
641 
642 		(*recs)[(int)reccur] = *maxrec;
643 		reccur++;
644 	}
645 	(*maxrec)++;
646 }
647 
648 /*
649  * Grow the buffer (if necessary) and copy in a binary string.
650  */
651 static void
652 buf_appendb(struct buf *buf, const void *cp, size_t sz)
653 {
654 
655 	/* Overshoot by MANDOC_BUFSZ. */
656 
657 	while (buf->len + sz >= buf->size) {
658 		buf->size = buf->len + sz + MANDOC_BUFSZ;
659 		buf->cp = mandoc_realloc(buf->cp, buf->size);
660 	}
661 
662 	memcpy(buf->cp + (int)buf->len, cp, sz);
663 	buf->len += sz;
664 }
665 
666 /*
667  * Append a nil-terminated string to the buffer.
668  * This can be invoked multiple times.
669  * The buffer string will be nil-terminated.
670  * If invoked multiple times, a space is put between strings.
671  */
672 static void
673 buf_append(struct buf *buf, const char *cp)
674 {
675 	size_t		 sz;
676 
677 	if (0 == (sz = strlen(cp)))
678 		return;
679 
680 	if (buf->len)
681 		buf->cp[(int)buf->len - 1] = ' ';
682 
683 	buf_appendb(buf, cp, sz + 1);
684 }
685 
686 /*
687  * Recursively add all text from a given node.
688  * This is optimised for general mdoc nodes in this context, which do
689  * not consist of subexpressions and having a recursive call for n->next
690  * would be wasteful.
691  * The "f" variable should be 0 unless called from pmdoc_Nd for the
692  * description buffer, which does not start at the beginning of the
693  * buffer.
694  */
695 static void
696 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
697 {
698 
699 	for ( ; n; n = n->next) {
700 		if (n->child)
701 			buf_appendmdoc(buf, n->child, f);
702 
703 		if (MDOC_TEXT == n->type && f) {
704 			f = 0;
705 			buf_appendb(buf, n->string,
706 					strlen(n->string) + 1);
707 		} else if (MDOC_TEXT == n->type)
708 			buf_append(buf, n->string);
709 
710 	}
711 }
712 
713 /* ARGSUSED */
714 static void
715 pmdoc_An(MDOC_ARGS)
716 {
717 
718 	if (SEC_AUTHORS != n->sec)
719 		return;
720 
721 	buf_appendmdoc(buf, n->child, 0);
722 	hash_put(hash, buf, TYPE_AUTHOR);
723 }
724 
725 static void
726 hash_reset(DB **db)
727 {
728 	DB		*hash;
729 
730 	if (NULL != (hash = *db))
731 		(*hash->close)(hash);
732 
733 	*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
734 	if (NULL == *db) {
735 		perror("hash");
736 		exit((int)MANDOCLEVEL_SYSERR);
737 	}
738 }
739 
740 /* ARGSUSED */
741 static void
742 pmdoc_Fd(MDOC_ARGS)
743 {
744 	const char	*start, *end;
745 	size_t		 sz;
746 
747 	if (SEC_SYNOPSIS != n->sec)
748 		return;
749 	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
750 		return;
751 
752 	/*
753 	 * Only consider those `Fd' macro fields that begin with an
754 	 * "inclusion" token (versus, e.g., #define).
755 	 */
756 	if (strcmp("#include", n->string))
757 		return;
758 
759 	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
760 		return;
761 
762 	/*
763 	 * Strip away the enclosing angle brackets and make sure we're
764 	 * not zero-length.
765 	 */
766 
767 	start = n->string;
768 	if ('<' == *start || '"' == *start)
769 		start++;
770 
771 	if (0 == (sz = strlen(start)))
772 		return;
773 
774 	end = &start[(int)sz - 1];
775 	if ('>' == *end || '"' == *end)
776 		end--;
777 
778 	assert(end >= start);
779 
780 	buf_appendb(buf, start, (size_t)(end - start + 1));
781 	buf_appendb(buf, "", 1);
782 
783 	hash_put(hash, buf, TYPE_INCLUDES);
784 }
785 
786 /* ARGSUSED */
787 static void
788 pmdoc_Cd(MDOC_ARGS)
789 {
790 
791 	if (SEC_SYNOPSIS != n->sec)
792 		return;
793 
794 	buf_appendmdoc(buf, n->child, 0);
795 	hash_put(hash, buf, TYPE_CONFIG);
796 }
797 
798 /* ARGSUSED */
799 static void
800 pmdoc_In(MDOC_ARGS)
801 {
802 
803 	if (SEC_SYNOPSIS != n->sec)
804 		return;
805 	if (NULL == n->child || MDOC_TEXT != n->child->type)
806 		return;
807 
808 	buf_append(buf, n->child->string);
809 	hash_put(hash, buf, TYPE_INCLUDES);
810 }
811 
812 /* ARGSUSED */
813 static void
814 pmdoc_Fn(MDOC_ARGS)
815 {
816 	const char	*cp;
817 
818 	if (SEC_SYNOPSIS != n->sec)
819 		return;
820 	if (NULL == n->child || MDOC_TEXT != n->child->type)
821 		return;
822 
823 	/* .Fn "struct type *arg" "foo" */
824 
825 	cp = strrchr(n->child->string, ' ');
826 	if (NULL == cp)
827 		cp = n->child->string;
828 
829 	/* Strip away pointer symbol. */
830 
831 	while ('*' == *cp)
832 		cp++;
833 
834 	buf_append(buf, cp);
835 	hash_put(hash, buf, TYPE_FUNCTION);
836 }
837 
838 /* ARGSUSED */
839 static void
840 pmdoc_St(MDOC_ARGS)
841 {
842 
843 	if (SEC_STANDARDS != n->sec)
844 		return;
845 	if (NULL == n->child || MDOC_TEXT != n->child->type)
846 		return;
847 
848 	buf_append(buf, n->child->string);
849 	hash_put(hash, buf, TYPE_STANDARD);
850 }
851 
852 /* ARGSUSED */
853 static void
854 pmdoc_Xr(MDOC_ARGS)
855 {
856 
857 	if (NULL == (n = n->child))
858 		return;
859 
860 	buf_appendb(buf, n->string, strlen(n->string));
861 
862 	if (NULL != (n = n->next)) {
863 		buf_appendb(buf, ".", 1);
864 		buf_appendb(buf, n->string, strlen(n->string) + 1);
865 	} else
866 		buf_appendb(buf, ".", 2);
867 
868 	hash_put(hash, buf, TYPE_XREF);
869 }
870 
871 /* ARGSUSED */
872 static void
873 pmdoc_Vt(MDOC_ARGS)
874 {
875 	const char	*start;
876 	size_t		 sz;
877 
878 	if (SEC_SYNOPSIS != n->sec)
879 		return;
880 	if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
881 		return;
882 	if (NULL == n->last || MDOC_TEXT != n->last->type)
883 		return;
884 
885 	/*
886 	 * Strip away leading pointer symbol '*' and trailing ';'.
887 	 */
888 
889 	start = n->last->string;
890 
891 	while ('*' == *start)
892 		start++;
893 
894 	if (0 == (sz = strlen(start)))
895 		return;
896 
897 	if (';' == start[(int)sz - 1])
898 		sz--;
899 
900 	if (0 == sz)
901 		return;
902 
903 	buf_appendb(buf, start, sz);
904 	buf_appendb(buf, "", 1);
905 	hash_put(hash, buf, TYPE_VARIABLE);
906 }
907 
908 /* ARGSUSED */
909 static void
910 pmdoc_Fo(MDOC_ARGS)
911 {
912 
913 	if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
914 		return;
915 	if (NULL == n->child || MDOC_TEXT != n->child->type)
916 		return;
917 
918 	buf_append(buf, n->child->string);
919 	hash_put(hash, buf, TYPE_FUNCTION);
920 }
921 
922 
923 /* ARGSUSED */
924 static void
925 pmdoc_Nd(MDOC_ARGS)
926 {
927 
928 	if (MDOC_BODY != n->type)
929 		return;
930 
931 	buf_appendmdoc(dbuf, n->child, 1);
932 	buf_appendmdoc(buf, n->child, 0);
933 
934 	hash_put(hash, buf, TYPE_DESC);
935 }
936 
937 /* ARGSUSED */
938 static void
939 pmdoc_Er(MDOC_ARGS)
940 {
941 
942 	if (SEC_ERRORS != n->sec)
943 		return;
944 
945 	buf_appendmdoc(buf, n->child, 0);
946 	hash_put(hash, buf, TYPE_ERR);
947 }
948 
949 /* ARGSUSED */
950 static void
951 pmdoc_Ev(MDOC_ARGS)
952 {
953 
954 	if (SEC_ENVIRONMENT != n->sec)
955 		return;
956 
957 	buf_appendmdoc(buf, n->child, 0);
958 	hash_put(hash, buf, TYPE_ENV);
959 }
960 
961 /* ARGSUSED */
962 static void
963 pmdoc_Pa(MDOC_ARGS)
964 {
965 
966 	if (SEC_FILES != n->sec)
967 		return;
968 
969 	buf_appendmdoc(buf, n->child, 0);
970 	hash_put(hash, buf, TYPE_PATH);
971 }
972 
973 /* ARGSUSED */
974 static void
975 pmdoc_Nm(MDOC_ARGS)
976 {
977 
978 	if (SEC_NAME == n->sec) {
979 		buf_appendmdoc(buf, n->child, 0);
980 		hash_put(hash, buf, TYPE_NAME);
981 		return;
982 	} else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
983 		return;
984 
985 	if (NULL == n->child)
986 		buf_append(buf, m->name);
987 
988 	buf_appendmdoc(buf, n->child, 0);
989 	hash_put(hash, buf, TYPE_UTILITY);
990 }
991 
992 static void
993 hash_put(DB *db, const struct buf *buf, int mask)
994 {
995 	DBT		 key, val;
996 	int		 rc;
997 
998 	if (buf->len < 2)
999 		return;
1000 
1001 	key.data = buf->cp;
1002 	key.size = buf->len;
1003 
1004 	if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1005 		perror("hash");
1006 		exit((int)MANDOCLEVEL_SYSERR);
1007 	} else if (0 == rc)
1008 		mask |= *(int *)val.data;
1009 
1010 	val.data = &mask;
1011 	val.size = sizeof(int);
1012 
1013 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1014 		perror("hash");
1015 		exit((int)MANDOCLEVEL_SYSERR);
1016 	}
1017 }
1018 
1019 static void
1020 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1021 {
1022 
1023 	assert(key->size);
1024 	assert(val->size);
1025 
1026 	if (0 == (*db->put)(db, key, val, 0))
1027 		return;
1028 
1029 	perror(dbn);
1030 	exit((int)MANDOCLEVEL_SYSERR);
1031 	/* NOTREACHED */
1032 }
1033 
1034 /*
1035  * Call out to per-macro handlers after clearing the persistent database
1036  * key.  If the macro sets the database key, flush it to the database.
1037  */
1038 static void
1039 pmdoc_node(MDOC_ARGS)
1040 {
1041 
1042 	if (NULL == n)
1043 		return;
1044 
1045 	switch (n->type) {
1046 	case (MDOC_HEAD):
1047 		/* FALLTHROUGH */
1048 	case (MDOC_BODY):
1049 		/* FALLTHROUGH */
1050 	case (MDOC_TAIL):
1051 		/* FALLTHROUGH */
1052 	case (MDOC_BLOCK):
1053 		/* FALLTHROUGH */
1054 	case (MDOC_ELEM):
1055 		if (NULL == mdocs[n->tok])
1056 			break;
1057 
1058 		buf->len = 0;
1059 		(*mdocs[n->tok])(hash, buf, dbuf, n, m);
1060 		break;
1061 	default:
1062 		break;
1063 	}
1064 
1065 	pmdoc_node(hash, buf, dbuf, n->child, m);
1066 	pmdoc_node(hash, buf, dbuf, n->next, m);
1067 }
1068 
1069 static int
1070 pman_node(MAN_ARGS)
1071 {
1072 	const struct man_node *head, *body;
1073 	const char	*start, *sv;
1074 	size_t		 sz;
1075 
1076 	if (NULL == n)
1077 		return(0);
1078 
1079 	/*
1080 	 * We're only searching for one thing: the first text child in
1081 	 * the BODY of a NAME section.  Since we don't keep track of
1082 	 * sections in -man, run some hoops to find out whether we're in
1083 	 * the correct section or not.
1084 	 */
1085 
1086 	if (MAN_BODY == n->type && MAN_SH == n->tok) {
1087 		body = n;
1088 		assert(body->parent);
1089 		if (NULL != (head = body->parent->head) &&
1090 				1 == head->nchild &&
1091 				NULL != (head = (head->child)) &&
1092 				MAN_TEXT == head->type &&
1093 				0 == strcmp(head->string, "NAME") &&
1094 				NULL != (body = body->child) &&
1095 				MAN_TEXT == body->type) {
1096 
1097 			assert(body->string);
1098 			start = sv = body->string;
1099 
1100 			/*
1101 			 * Go through a special heuristic dance here.
1102 			 * This is why -man manuals are great!
1103 			 * (I'm being sarcastic: my eyes are bleeding.)
1104 			 * Conventionally, one or more manual names are
1105 			 * comma-specified prior to a whitespace, then a
1106 			 * dash, then a description.  Try to puzzle out
1107 			 * the name parts here.
1108 			 */
1109 
1110 			for ( ;; ) {
1111 				sz = strcspn(start, " ,");
1112 				if ('\0' == start[(int)sz])
1113 					break;
1114 
1115 				buf->len = 0;
1116 				buf_appendb(buf, start, sz);
1117 				buf_appendb(buf, "", 1);
1118 
1119 				hash_put(hash, buf, TYPE_NAME);
1120 
1121 				if (' ' == start[(int)sz]) {
1122 					start += (int)sz + 1;
1123 					break;
1124 				}
1125 
1126 				assert(',' == start[(int)sz]);
1127 				start += (int)sz + 1;
1128 				while (' ' == *start)
1129 					start++;
1130 			}
1131 
1132 			buf->len = 0;
1133 
1134 			if (sv == start) {
1135 				buf_append(buf, start);
1136 				return(1);
1137 			}
1138 
1139 			while (' ' == *start)
1140 				start++;
1141 
1142 			if (0 == strncmp(start, "-", 1))
1143 				start += 1;
1144 			else if (0 == strncmp(start, "\\-", 2))
1145 				start += 2;
1146 			else if (0 == strncmp(start, "\\(en", 4))
1147 				start += 4;
1148 			else if (0 == strncmp(start, "\\(em", 4))
1149 				start += 4;
1150 
1151 			while (' ' == *start)
1152 				start++;
1153 
1154 			sz = strlen(start) + 1;
1155 			buf_appendb(dbuf, start, sz);
1156 			buf_appendb(buf, start, sz);
1157 
1158 			hash_put(hash, buf, TYPE_DESC);
1159 		}
1160 	}
1161 
1162 	if (pman_node(hash, buf, dbuf, n->child))
1163 		return(1);
1164 	if (pman_node(hash, buf, dbuf, n->next))
1165 		return(1);
1166 
1167 	return(0);
1168 }
1169 
1170 static void
1171 ofile_argbuild(char *argv[], int argc, int verb, struct of **of)
1172 {
1173 	int		 i;
1174 	struct of	*nof;
1175 
1176 	for (i = 0; i < argc; i++) {
1177 		nof = mandoc_calloc(1, sizeof(struct of));
1178 		nof->fname = strdup(argv[i]);
1179 		if (verb > 2)
1180 			printf("%s: Scheduling\n", argv[i]);
1181 		if (NULL == *of) {
1182 			*of = nof;
1183 			(*of)->first = nof;
1184 		} else {
1185 			nof->first = (*of)->first;
1186 			(*of)->next = nof;
1187 			*of = nof;
1188 		}
1189 	}
1190 }
1191 
1192 /*
1193  * Recursively build up a list of files to parse.
1194  * We use this instead of ftw() and so on because I don't want global
1195  * variables hanging around.
1196  * This ignores the mandoc.db and mandoc.index files, but assumes that
1197  * everything else is a manual.
1198  * Pass in a pointer to a NULL structure for the first invocation.
1199  */
1200 static int
1201 ofile_dirbuild(const char *dir, int verb, struct of **of)
1202 {
1203 	char		 buf[MAXPATHLEN];
1204 	size_t		 sz;
1205 	DIR		*d;
1206 	const char	*fn;
1207 	struct of	*nof;
1208 	struct dirent	*dp;
1209 
1210 	if (NULL == (d = opendir(dir))) {
1211 		perror(dir);
1212 		return(0);
1213 	}
1214 
1215 	while (NULL != (dp = readdir(d))) {
1216 		fn = dp->d_name;
1217 		if (DT_DIR == dp->d_type) {
1218 			if (0 == strcmp(".", fn))
1219 				continue;
1220 			if (0 == strcmp("..", fn))
1221 				continue;
1222 
1223 			buf[0] = '\0';
1224 			strlcat(buf, dir, MAXPATHLEN);
1225 			strlcat(buf, "/", MAXPATHLEN);
1226 			sz = strlcat(buf, fn, MAXPATHLEN);
1227 
1228 			if (sz < MAXPATHLEN) {
1229 				if ( ! ofile_dirbuild(buf, verb, of))
1230 					return(0);
1231 				continue;
1232 			} else if (sz < MAXPATHLEN)
1233 				continue;
1234 
1235 			fprintf(stderr, "%s: Path too long\n", dir);
1236 			return(0);
1237 		}
1238 		if (DT_REG != dp->d_type)
1239 			continue;
1240 
1241 		if (0 == strcmp(MANDOC_DB, fn) ||
1242 				0 == strcmp(MANDOC_IDX, fn))
1243 			continue;
1244 
1245 		buf[0] = '\0';
1246 		strlcat(buf, dir, MAXPATHLEN);
1247 		strlcat(buf, "/", MAXPATHLEN);
1248 		sz = strlcat(buf, fn, MAXPATHLEN);
1249 		if (sz >= MAXPATHLEN) {
1250 			fprintf(stderr, "%s: Path too long\n", dir);
1251 			return(0);
1252 		}
1253 
1254 		nof = mandoc_calloc(1, sizeof(struct of));
1255 		nof->fname = mandoc_strdup(buf);
1256 
1257 		if (verb > 2)
1258 			printf("%s: Scheduling\n", buf);
1259 
1260 		if (NULL == *of) {
1261 			*of = nof;
1262 			(*of)->first = nof;
1263 		} else {
1264 			nof->first = (*of)->first;
1265 			(*of)->next = nof;
1266 			*of = nof;
1267 		}
1268 	}
1269 
1270 	return(1);
1271 }
1272 
1273 static void
1274 ofile_free(struct of *of)
1275 {
1276 	struct of	*nof;
1277 
1278 	while (of) {
1279 		nof = of->next;
1280 		free(of->fname);
1281 		free(of);
1282 		of = nof;
1283 	}
1284 }
1285 
1286 static void
1287 usage(void)
1288 {
1289 
1290 	fprintf(stderr, "usage: %s [-v] "
1291 			"[-d dir [files...] |"
1292 			" -u dir [files...] |"
1293 			" dir...]\n", progname);
1294 }
1295