xref: /netbsd-src/libexec/makewhatis/makewhatis.c (revision e771911261da1aa61e25dab7c468d1b5502278c3)
1 /*	$NetBSD: makewhatis.c,v 1.51 2017/10/02 22:14:32 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matthias Scheler.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35 
36 #include <sys/cdefs.h>
37 #if !defined(lint)
38 __COPYRIGHT("@(#) Copyright (c) 1999\
39  The NetBSD Foundation, Inc.  All rights reserved.");
40 __RCSID("$NetBSD: makewhatis.c,v 1.51 2017/10/02 22:14:32 christos Exp $");
41 #endif /* not lint */
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/wait.h>
48 
49 #include <ctype.h>
50 #include <err.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <fts.h>
54 #include <glob.h>
55 #include <locale.h>
56 #include <paths.h>
57 #include <signal.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <time.h>
62 #include <unistd.h>
63 #include <zlib.h>
64 #include <util.h>
65 
66 #include <man/manconf.h>
67 #include <man/pathnames.h>
68 
69 #ifndef NROFF
70 #define NROFF "nroff"
71 #endif
72 
73 typedef struct manpagestruct manpage;
74 struct manpagestruct {
75 	manpage *mp_left, *mp_right;
76 	ino_t	 mp_inode;
77 	size_t	 mp_sdoff;
78 	size_t	 mp_sdlen;
79 	char	 mp_name[1];
80 };
81 
82 typedef struct whatisstruct whatis;
83 struct whatisstruct {
84 	whatis	*wi_left, *wi_right;
85 	char	*wi_data;
86 	char	wi_prefix[1];
87 };
88 
89 int		main(int, char * const *);
90 static char	*findwhitespace(char *);
91 static char	*strmove(char *, char *);
92 static char	*GetS(gzFile, char *, size_t);
93 static int	pathnamesection(const char *, const char *);
94 static int	manpagesection(char *);
95 static char	*createsectionstring(char *);
96 static void	addmanpage(manpage **, ino_t, char *, size_t, size_t);
97 static void	addwhatis(whatis **, char *, char *);
98 static char	*makesection(int);
99 static char	*makewhatisline(const char *, const char *, const char *);
100 static void	catpreprocess(char *);
101 static char	*parsecatpage(const char *, gzFile);
102 static int	manpreprocess(char *);
103 static char	*nroff(const char *, gzFile);
104 static char	*parsemanpage(const char *, gzFile, int);
105 static char	*getwhatisdata(char *);
106 static void	processmanpages(manpage **, whatis **);
107 static void	dumpwhatis(FILE *, whatis *);
108 static int	makewhatis(char * const *manpath);
109 
110 static char * const default_manpath[] = {
111 	"/usr/share/man",
112 	NULL
113 };
114 
115 static const char	*sectionext = "0123456789ln";
116 static const char	*whatisdb   = _PATH_WHATIS;
117 static const char	*whatisdb_new = _PATH_WHATIS ".new";
118 static int		dowarn      = 0;
119 
120 #define	ISALPHA(c)	isalpha((unsigned char)(c))
121 #define	ISDIGIT(c)	isdigit((unsigned char)(c))
122 #define	ISSPACE(c)	isspace((unsigned char)(c))
123 
124 int
main(int argc,char * const * argv)125 main(int argc, char *const *argv)
126 {
127 	char * const	*manpath;
128 	int		c, dofork;
129 	const char	*conffile;
130 	ENTRY		*ep;
131 	TAG		*tp;
132 	int		rv, jobs, status;
133 	glob_t		pg;
134 	char		*paths[2], **p, *sl;
135 	int		retval;
136 
137 	dofork = 1;
138 	conffile = NULL;
139 	jobs = 0;
140 	retval = EXIT_SUCCESS;
141 
142 	(void)setlocale(LC_ALL, "");
143 
144 	while ((c = getopt(argc, argv, "C:fw")) != -1) {
145 		switch (c) {
146 		case 'C':
147 			conffile = optarg;
148 			break;
149 		case 'f':
150 			/* run all processing on foreground */
151 			dofork = 0;
152 			break;
153 		case 'w':
154 			dowarn++;
155 			break;
156 		default:
157 			fprintf(stderr, "Usage: %s [-fw] [-C file] [manpath ...]\n",
158 				getprogname());
159 			exit(EXIT_FAILURE);
160 		}
161 	}
162 	argc -= optind;
163 	argv += optind;
164 
165 	if (argc >= 1) {
166 		manpath = &argv[0];
167 
168 	    mkwhatis:
169 		return makewhatis(manpath);
170 	}
171 
172 	/*
173 	 * Try read config file, fallback to default_manpath[]
174 	 * if man.conf not available.
175 	 */
176 	config(conffile);
177 	if ((tp = gettag("_whatdb", 0)) == NULL) {
178 		manpath = default_manpath;
179 		goto mkwhatis;
180 	}
181 
182 	/* Build individual databases */
183 	paths[1] = NULL;
184 	TAILQ_FOREACH(ep, &tp->entrylist, q) {
185 		if ((rv = glob(ep->s,
186 		    GLOB_BRACE | GLOB_NOSORT | GLOB_ERR | GLOB_NOCHECK,
187 		    NULL, &pg)) != 0)
188 			err(EXIT_FAILURE, "glob('%s')", ep->s);
189 
190 		/* We always have something to work with here */
191 		for (p = pg.gl_pathv; *p; p++) {
192 			sl = strrchr(*p, '/');
193 			if (sl == NULL) {
194 				err(EXIT_FAILURE, "glob: _whatdb entry '%s' "
195 				    "doesn't contain slash", ep->s);
196 			}
197 
198 			/*
199 			 * Cut the last component of path, leaving just
200 			 * the directory. We will use the result as root
201 			 * for manpage search.
202 			 * glob malloc()s space for the paths, so it's
203 			 * okay to change it in-place.
204 			 */
205 			*sl = '\0';
206 			paths[0] = *p;
207 
208 			if (!dofork) {
209 				/* Do not fork child */
210 				makewhatis(paths);
211 				continue;
212 			}
213 
214 			switch (fork()) {
215 			case 0:
216 				exit(makewhatis(paths));
217 				break;
218 			case -1:
219 				warn("fork");
220 				makewhatis(paths);
221 				break;
222 			default:
223 				jobs++;
224 				break;
225 			}
226 
227 		}
228 
229 		globfree(&pg);
230 	}
231 
232 	/* Wait for the childern to finish */
233 	while (jobs > 0) {
234 		(void)wait(&status);
235 		if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS)
236 			retval = EXIT_FAILURE;
237 		jobs--;
238 	}
239 
240 	return retval;
241 }
242 
243 static int
makewhatis(char * const * manpath)244 makewhatis(char * const * manpath)
245 {
246 	FTS	*fts;
247 	FTSENT	*fe;
248 	manpage *source;
249 	whatis	*dest;
250 	FILE	*out;
251 	size_t	sdoff, sdlen;
252 	int	outfd;
253 	struct stat st_before, st_after;
254 
255 	if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL)
256 		err(EXIT_FAILURE, "Cannot open `%s'", *manpath);
257 
258 	source = NULL;
259 	while ((fe = fts_read(fts)) != NULL) {
260 		switch (fe->fts_info) {
261 		case FTS_F:
262 			if (manpagesection(fe->fts_path) >= 0) {
263 				/*
264 				 * Get manpage subdirectory prefix. Most
265 				 * commonly, this is arch-specific subdirectory.
266 				 */
267 				if (fe->fts_level >= 3) {
268 					int		sl;
269 					const char	*s, *lsl;
270 
271 					lsl = NULL;
272 					s = &fe->fts_path[fe->fts_pathlen - 1];
273 					for(sl = fe->fts_level - 1; sl > 0;
274 					    sl--) {
275 						s--;
276 						while (s[0] != '/')
277 							s--;
278 						if (lsl == NULL)
279 							lsl = s;
280 					}
281 
282 					/*
283 					 * Include trailing '/', so we get
284 					 * 'arch/'.
285 					 */
286 					sdoff = s + 1 - fe->fts_path;
287 					sdlen = lsl - s + 1;
288 				} else {
289 					sdoff = 0;
290 					sdlen = 0;
291 				}
292 
293 				addmanpage(&source, fe->fts_statp->st_ino,
294 				    fe->fts_path, sdoff, sdlen);
295 			}
296 			/*FALLTHROUGH*/
297 		case FTS_D:
298 		case FTS_DC:
299 		case FTS_DEFAULT:
300 		case FTS_DP:
301 		case FTS_SL:
302 		case FTS_DOT:
303 		case FTS_W:
304 		case FTS_NSOK:
305 		case FTS_INIT:
306 			break;
307 		case FTS_SLNONE:
308 			warnx("Symbolic link with no target: `%s'",
309 			    fe->fts_path);
310 			break;
311 		case FTS_DNR:
312 			warnx("Unreadable directory: `%s'", fe->fts_path);
313 			break;
314 		case FTS_NS:
315 			errno = fe->fts_errno;
316 			warn("Cannot stat `%s'", fe->fts_path);
317 			break;
318 		case FTS_ERR:
319 			errno = fe->fts_errno;
320 			warn("Error reading `%s'", fe->fts_path);
321 			break;
322 		default:
323 			errx(EXIT_FAILURE, "Unknown info %d returned from fts "
324 			    " for path: `%s'", fe->fts_info, fe->fts_path);
325 		}
326 	}
327 
328 	(void)fts_close(fts);
329 
330 	dest = NULL;
331 	processmanpages(&source, &dest);
332 
333 	if (chdir(manpath[0]) == -1)
334 		err(EXIT_FAILURE, "Cannot change dir to `%s'", manpath[0]);
335 
336 	/*
337 	 * makewhatis runs unattended, so it needs to be able to
338 	 * recover if the last run crashed out. Therefore, if
339 	 * whatisdb_new exists and is more than (arbitrarily) sixteen
340 	 * hours old, nuke it. If it exists but is not so old, refuse
341 	 * to run until it's cleaned up, in case another makewhatis is
342 	 * already running. Also, open the output with O_EXCL to make
343 	 * sure we get our own, in case two copies start exactly at
344 	 * once. (Unlikely? Maybe, maybe not, if two copies of cron
345 	 * end up running.)
346 	 *
347 	 * Similarly, before renaming the file after we finish writing
348 	 * to it, make sure it's still the same file we opened. This
349 	 * can't be completely race-free, but getting caught by it
350 	 * would require an unexplained sixteen-hour-or-more lag
351 	 * between the last mtime update when we wrote to it and when
352 	 * we get to the stat call *and* another makewhatis starting
353 	 * out to write at exactly the wrong moment. Not impossible,
354 	 * but not likely enough to worry about.
355 	 *
356 	 * This is maybe unnecessarily elaborate, but generating
357 	 * corrupted output isn't so good either.
358 	 */
359 
360 	if (stat(whatisdb_new, &st_before) == 0) {
361 		if (st_before.st_mtime - time(NULL) > 16*60*60) {
362 			/* Don't complain if someone else just removed it. */
363 			if (unlink(whatisdb_new) == -1 && errno != ENOENT) {
364 				err(EXIT_FAILURE, "Could not remove `%s'",
365 				    whatisdb_new);
366 			} else {
367 				warnx("Removed stale `%s'", whatisdb_new);
368 			}
369 		} else {
370 			errx(EXIT_FAILURE, "The file `%s' already exists "
371 			    "-- am I already running?", whatisdb_new);
372 		}
373 	} else if (errno != ENOENT) {
374 		/* Something unexpected happened. */
375 		err(EXIT_FAILURE, "Cannot stat `%s'", whatisdb_new);
376 	}
377 
378 	outfd = open(whatisdb_new, O_WRONLY|O_CREAT|O_EXCL,
379 	    S_IRUSR|S_IRGRP|S_IROTH);
380 	if (outfd < 0)
381 		err(EXIT_FAILURE, "Cannot open `%s'", whatisdb_new);
382 
383 	if (fstat(outfd, &st_before) == -1)
384 		err(EXIT_FAILURE, "Cannot fstat `%s'", whatisdb_new);
385 
386 	if ((out = fdopen(outfd, "w")) == NULL)
387 		err(EXIT_FAILURE, "Cannot fdopen `%s'", whatisdb_new);
388 
389 	dumpwhatis(out, dest);
390 	if (fchmod(fileno(out), S_IRUSR|S_IRGRP|S_IROTH) == -1)
391 		err(EXIT_FAILURE, "Cannot chmod `%s'", whatisdb_new);
392 	if (fclose(out) != 0)
393 		err(EXIT_FAILURE, "Cannot close `%s'", whatisdb_new);
394 
395 	if (stat(whatisdb_new, &st_after) == -1)
396 		err(EXIT_FAILURE, "Cannot stat `%s' (after writing)",
397 		    whatisdb_new);
398 
399 	if (st_before.st_dev != st_after.st_dev ||
400 	    st_before.st_ino != st_after.st_ino) {
401 		errx(EXIT_FAILURE, "The file `%s' changed under me; giving up",
402 		    whatisdb_new);
403 	}
404 
405 	if (rename(whatisdb_new, whatisdb) == -1)
406 		err(EXIT_FAILURE, "Could not rename `%s' to `%s'",
407 		    whatisdb_new, whatisdb);
408 
409 	return EXIT_SUCCESS;
410 }
411 
412 static char *
findwhitespace(char * str)413 findwhitespace(char *str)
414 {
415 	while (!ISSPACE(*str))
416 		if (*str++ == '\0') {
417 			str = NULL;
418 			break;
419 		}
420 
421 	return str;
422 }
423 
424 static char *
strmove(char * dest,char * src)425 strmove(char *dest, char *src)
426 {
427 	return memmove(dest, src, strlen(src) + 1);
428 }
429 
430 static char *
GetS(gzFile in,char * buffer,size_t length)431 GetS(gzFile in, char *buffer, size_t length)
432 {
433 	char	*ptr;
434 
435 	if (((ptr = gzgets(in, buffer, (int)length)) != NULL) && (*ptr == '\0'))
436 		ptr = NULL;
437 
438 	return ptr;
439 }
440 
441 static char *
makesection(int s)442 makesection(int s)
443 {
444 	char sectionbuffer[24];
445 	if (s == -1)
446 		return NULL;
447 	(void)snprintf(sectionbuffer, sizeof(sectionbuffer),
448 		" (%c) - ", sectionext[s]);
449 	return estrdup(sectionbuffer);
450 }
451 
452 static int
pathnamesection(const char * pat,const char * name)453 pathnamesection(const char *pat, const char *name)
454 {
455 	char *ptr, *ext;
456 	size_t len = strlen(pat);
457 
458 
459 	while ((ptr = strstr(name, pat)) != NULL) {
460 		if ((ext = strchr(sectionext, ptr[len])) != NULL) {
461 			return ext - sectionext;
462 		}
463 		name = ptr + 1;
464 	}
465 	return -1;
466 }
467 
468 
469 static int
manpagesection(char * name)470 manpagesection(char *name)
471 {
472 	char	*ptr;
473 
474 	if ((ptr = strrchr(name, '/')) != NULL)
475 		ptr++;
476 	else
477 		ptr = name;
478 
479 	while ((ptr = strchr(ptr, '.')) != NULL) {
480 		int section;
481 
482 		ptr++;
483 		section = 0;
484 		while (sectionext[section] != '\0')
485 			if (sectionext[section] == *ptr)
486 				return section;
487 			else
488 				section++;
489 	}
490 	return -1;
491 }
492 
493 static char *
createsectionstring(char * section_id)494 createsectionstring(char *section_id)
495 {
496 	char *section;
497 
498 	if (asprintf(&section, " (%s) - ", section_id) < 0)
499 		err(EXIT_FAILURE, "malloc failed");
500 	return section;
501 }
502 
503 static void
addmanpage(manpage ** tree,ino_t inode,char * name,size_t sdoff,size_t sdlen)504 addmanpage(manpage **tree, ino_t inode, char *name, size_t sdoff, size_t sdlen)
505 {
506 	manpage *mp;
507 
508 	while ((mp = *tree) != NULL) {
509 		if (mp->mp_inode == inode)
510 			return;
511 		tree = inode < mp->mp_inode ? &mp->mp_left : &mp->mp_right;
512 	}
513 
514 	mp = emalloc(sizeof(manpage) + strlen(name));
515 	mp->mp_left = NULL;
516 	mp->mp_right = NULL;
517 	mp->mp_inode = inode;
518 	mp->mp_sdoff = sdoff;
519 	mp->mp_sdlen = sdlen;
520 	(void)strcpy(mp->mp_name, name);
521 	*tree = mp;
522 }
523 
524 static void
addwhatis(whatis ** tree,char * data,char * prefix)525 addwhatis(whatis **tree, char *data, char *prefix)
526 {
527 	whatis *wi;
528 	int result;
529 
530 	while (ISSPACE(*data))
531 		data++;
532 
533 	if (*data == '/') {
534 		char *ptr;
535 
536 		ptr = ++data;
537 		while ((*ptr != '\0') && !ISSPACE(*ptr))
538 			if (*ptr++ == '/')
539 				data = ptr;
540 	}
541 
542 	while ((wi = *tree) != NULL) {
543 		result = strcmp(data, wi->wi_data);
544 		if (result == 0) result = strcmp(prefix, wi->wi_prefix);
545 		if (result == 0) return;
546 		tree = result < 0 ? &wi->wi_left : &wi->wi_right;
547 	}
548 
549 	wi = emalloc(sizeof(whatis) + strlen(prefix));
550 
551 	wi->wi_left = NULL;
552 	wi->wi_right = NULL;
553 	wi->wi_data = data;
554 	if (prefix[0] != '\0')
555 		(void) strcpy(wi->wi_prefix, prefix);
556 	else
557 		wi->wi_prefix[0] = '\0';
558 	*tree = wi;
559 }
560 
561 static void
catpreprocess(char * from)562 catpreprocess(char *from)
563 {
564 	char	*to;
565 
566 	to = from;
567 	while (ISSPACE(*from)) from++;
568 
569 	while (*from != '\0')
570 		if (ISSPACE(*from)) {
571 			while (ISSPACE(*++from));
572 			if (*from != '\0')
573 				*to++ = ' ';
574 		}
575 		else if (*(from + 1) == '\b')
576 			from += 2;
577 		else
578 			*to++ = *from++;
579 
580 	*to = '\0';
581 }
582 
583 static char *
makewhatisline(const char * file,const char * line,const char * section)584 makewhatisline(const char *file, const char *line, const char *section)
585 {
586 	static const char *del[] = {
587 		" - ",
588 		" -- ",
589 		"- ",
590 		" -",
591 		NULL
592 	};
593 	size_t i, pos;
594 	size_t llen, slen, dlen;
595 	char *result, *ptr;
596 
597 	ptr = NULL;
598 	if (section == NULL) {
599 		if (dowarn)
600 			warnx("%s: No section provided for `%s'", file, line);
601 		return estrdup(line);
602 	}
603 
604 	for (i = 0; del[i]; i++)
605 		if ((ptr = strstr(line, del[i])) != NULL)
606 			break;
607 
608 	if (del[i] == NULL) {
609 		if (dowarn)
610 			warnx("%s: Bad format line `%s'", file, line);
611 		return estrdup(line);
612 	}
613 
614 	slen = strlen(section);
615 	llen = strlen(line);
616 	dlen = strlen(del[i]);
617 
618 	result = emalloc(llen - dlen + slen + 1);
619 	pos = ptr - line;
620 
621 	(void)memcpy(result, line, pos);
622 	(void)memcpy(&result[pos], section, slen);
623 	(void)strcpy(&result[pos + slen], &line[pos + dlen]);
624 	return result;
625 }
626 
627 static char *
parsecatpage(const char * name,gzFile in)628 parsecatpage(const char *name, gzFile in)
629 {
630 	char	 buffer[8192];
631 	char	*section, *ptr, *last;
632 	size_t	 size;
633 
634 	do {
635 		if (GetS(in, buffer, sizeof(buffer)) == NULL)
636 			return NULL;
637 	}
638 	while (buffer[0] == '\n');
639 
640 	section = NULL;
641 	if ((ptr = strchr(buffer, '(')) != NULL) {
642 		if ((last = strchr(ptr + 1, ')')) !=NULL) {
643 			size_t	length;
644 
645 			length = last - ptr + 1;
646 			section = emalloc(length + 5);
647 			*section = ' ';
648 			(void) memcpy(section + 1, ptr, length);
649 			(void) strcpy(section + 1 + length, " - ");
650 		}
651 	}
652 
653 	for (;;) {
654 		if (GetS(in, buffer, sizeof(buffer)) == NULL) {
655 			free(section);
656 			return NULL;
657 		}
658 		catpreprocess(buffer);
659 		if (strncmp(buffer, "NAME", 4) == 0)
660 			break;
661 	}
662 	if (section == NULL)
663 		section = makesection(pathnamesection("/cat", name));
664 
665 	ptr = last = buffer;
666 	size = sizeof(buffer) - 1;
667 	while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
668 		int	 length;
669 
670 		catpreprocess(ptr);
671 
672 		length = strlen(ptr);
673 		if (length == 0) {
674 			*last = '\0';
675 
676 			ptr = makewhatisline(name, buffer, section);
677 			free(section);
678 			return ptr;
679 		}
680 		if ((length > 1) && (ptr[length - 1] == '-') &&
681 		    ISALPHA(ptr[length - 2]))
682 			last = &ptr[--length];
683 		else {
684 			last = &ptr[length++];
685 			*last = ' ';
686 		}
687 
688 		ptr += length;
689 		size -= length;
690 	}
691 
692 	free(section);
693 
694 	return NULL;
695 }
696 
697 static int
manpreprocess(char * line)698 manpreprocess(char *line)
699 {
700 	char	*from, *to;
701 
702 	to = from = line;
703 	while (ISSPACE(*from))
704 		from++;
705 	if (strncmp(from, ".\\\"", 3) == 0)
706 		return 1;
707 
708 	while (*from != '\0')
709 		if (ISSPACE(*from)) {
710 			while (ISSPACE(*++from));
711 			if ((*from != '\0') && (*from != ','))
712 				*to++ = ' ';
713 		} else if (*from == '\\') {
714 			switch (*++from) {
715 			case '\0':
716 			case '-':
717 				break;
718 			case 'f':
719 			case 's':
720 				from++;
721 				if ((*from=='+') || (*from=='-'))
722 					from++;
723 				while (ISDIGIT(*from))
724 					from++;
725 				break;
726 			default:
727 				from++;
728 			}
729 		} else {
730 			if (*from == '"')
731 				from++;
732 			else
733 				*to++ = *from++;
734 		}
735 
736 	*to = '\0';
737 
738 	if (strncasecmp(line, ".Xr", 3) == 0) {
739 		char	*sect;
740 
741 		from = line + 3;
742 		if (ISSPACE(*from))
743 			from++;
744 
745 		if ((sect = findwhitespace(from)) != NULL) {
746 			size_t	length;
747 			char	*trail;
748 
749 			*sect++ = '\0';
750 			if ((trail = findwhitespace(sect)) != NULL)
751 				*trail++ = '\0';
752 			length = strlen(from);
753 			(void) memmove(line, from, length);
754 			line[length++] = '(';
755 			to = &line[length];
756 			length = strlen(sect);
757 			(void) memmove(to, sect, length);
758 			if (trail == NULL) {
759 				(void) strcpy(&to[length], ")");
760 			} else {
761 				to += length;
762 				*to++ = ')';
763 				length = strlen(trail);
764 				(void) memmove(to, trail, length + 1);
765 			}
766 		}
767 	}
768 
769 	return 0;
770 }
771 
772 static char *
nroff(const char * inname,gzFile in)773 nroff(const char *inname, gzFile in)
774 {
775 	char tempname[MAXPATHLEN], buffer[65536], *data;
776 	int tempfd, bytes, pipefd[2], status;
777 	static int devnull = -1;
778 	pid_t child;
779 
780 	if (gzrewind(in) < 0)
781 		err(EXIT_FAILURE, "Cannot rewind pipe");
782 
783 	if ((devnull < 0) &&
784 	    ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0))
785 		err(EXIT_FAILURE, "Cannot open `/dev/null'");
786 
787 	(void)strlcpy(tempname, _PATH_TMP "makewhatis.XXXXXX",
788 	    sizeof(tempname));
789 	if ((tempfd = mkstemp(tempname)) == -1)
790 		err(EXIT_FAILURE, "Cannot create temp file");
791 
792 	while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
793 		if (write(tempfd, buffer, (size_t)bytes) != bytes) {
794 			bytes = -1;
795 			break;
796 		}
797 
798 	if (bytes < 0) {
799 		(void)close(tempfd);
800 		(void)unlink(tempname);
801 		err(EXIT_FAILURE, "Read from pipe failed");
802 	}
803 	if (lseek(tempfd, (off_t)0, SEEK_SET) == (off_t)-1) {
804 		(void)close(tempfd);
805 		(void)unlink(tempname);
806 		err(EXIT_FAILURE, "Cannot rewind temp file");
807 	}
808 	if (pipe(pipefd) == -1) {
809 		(void)close(tempfd);
810 		(void)unlink(tempname);
811 		err(EXIT_FAILURE, "Cannot create pipe");
812 	}
813 
814 	switch (child = vfork()) {
815 	case -1:
816 		(void)close(pipefd[1]);
817 		(void)close(pipefd[0]);
818 		(void)close(tempfd);
819 		(void)unlink(tempname);
820 		err(EXIT_FAILURE, "Fork failed");
821 		/* NOTREACHED */
822 	case 0:
823 		(void)close(pipefd[0]);
824 		if (tempfd != STDIN_FILENO) {
825 			(void)dup2(tempfd, STDIN_FILENO);
826 			(void)close(tempfd);
827 		}
828 		if (pipefd[1] != STDOUT_FILENO) {
829 			(void)dup2(pipefd[1], STDOUT_FILENO);
830 			(void)close(pipefd[1]);
831 		}
832 		if (devnull != STDERR_FILENO) {
833 			(void)dup2(devnull, STDERR_FILENO);
834 			(void)close(devnull);
835 		}
836 		(void)execlp(NROFF, NROFF, "-S", "-man", NULL);
837 		_exit(EXIT_FAILURE);
838 		/*NOTREACHED*/
839 	default:
840 		(void)close(pipefd[1]);
841 		(void)close(tempfd);
842 		break;
843 	}
844 
845 	if ((in = gzdopen(pipefd[0], "r")) == NULL) {
846 		if (errno == 0)
847 			errno = ENOMEM;
848 		(void)close(pipefd[0]);
849 		(void)kill(child, SIGTERM);
850 		while (waitpid(child, NULL, 0) != child);
851 		(void)unlink(tempname);
852 		err(EXIT_FAILURE, "Cannot read from pipe");
853 	}
854 
855 	data = parsecatpage(inname, in);
856 	while (gzread(in, buffer, sizeof(buffer)) > 0);
857 	(void)gzclose(in);
858 
859 	while (waitpid(child, &status, 0) != child);
860 	if ((data != NULL) &&
861 	    !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
862 		free(data);
863 		errx(EXIT_FAILURE, NROFF " on `%s' exited with %d status",
864 		    inname, WEXITSTATUS(status));
865 	}
866 
867 	(void)unlink(tempname);
868 	return data;
869 }
870 
871 static char *
parsemanpage(const char * name,gzFile in,int defaultsection)872 parsemanpage(const char *name, gzFile in, int defaultsection)
873 {
874 	char	*section, buffer[8192], *ptr;
875 	static const char POD[] = ".\\\" Automatically generated by Pod";
876 	static const char IX[] = ".IX TITLE";
877 
878 	section = NULL;
879 	do {
880 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
881 			free(section);
882 			return NULL;
883 		}
884 
885 		/*
886 		 * Skip over lines in man pages that have been generated
887 		 * by Pod, until we find the TITLE.
888 		 */
889 		if (strncasecmp(buffer, POD, sizeof(POD) - 1) == 0) {
890 			do {
891 				if (GetS(in, buffer, sizeof(buffer) - 1)
892 				    == NULL) {
893 					free(section);
894 					return NULL;
895 				}
896 			} while (strncasecmp(buffer, IX, sizeof(IX) - 1) != 0);
897 		}
898 
899 		if (manpreprocess(buffer))
900 			continue;
901 		if (strncasecmp(buffer, ".Dt", 3) == 0) {
902 			char	*end;
903 
904 			ptr = &buffer[3];
905 			if (ISSPACE(*ptr))
906 				ptr++;
907 			if ((ptr = findwhitespace(ptr)) == NULL)
908 				continue;
909 
910 			if ((end = findwhitespace(++ptr)) != NULL)
911 				*end = '\0';
912 
913 			free(section);
914 			section = createsectionstring(ptr);
915 		}
916 		else if (strncasecmp(buffer, ".TH", 3) == 0) {
917 			ptr = &buffer[3];
918 			while (ISSPACE(*ptr))
919 				ptr++;
920 			if ((ptr = findwhitespace(ptr)) != NULL) {
921 				char *next;
922 
923 				while (ISSPACE(*ptr))
924 					ptr++;
925 				if ((next = findwhitespace(ptr)) != NULL)
926 					*next = '\0';
927 				free(section);
928 				section = createsectionstring(ptr);
929 			}
930 		}
931 		else if (strncasecmp(buffer, ".Ds", 3) == 0) {
932 			free(section);
933 			return NULL;
934 		}
935 	} while (strncasecmp(buffer, ".Sh NAME", 8) != 0);
936 
937 	do {
938 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
939 			free(section);
940 			return NULL;
941 		}
942 	} while (manpreprocess(buffer));
943 
944 	if (strncasecmp(buffer, ".Nm", 3) == 0) {
945 		size_t	length, offset;
946 
947 		ptr = &buffer[3];
948 		while (ISSPACE(*ptr))
949 			ptr++;
950 
951 		length = strlen(ptr);
952 		if ((length > 1) && (ptr[length - 1] == ',') &&
953 		    ISSPACE(ptr[length - 2])) {
954 			ptr[--length] = '\0';
955 			ptr[length - 1] = ',';
956 		}
957 		(void) memmove(buffer, ptr, length + 1);
958 
959 		offset = length + 3;
960 		ptr = &buffer[offset];
961 		for (;;) {
962 			size_t	 more;
963 
964 			if ((sizeof(buffer) == offset) ||
965 			    (GetS(in, ptr, sizeof(buffer) - offset)
966 			       == NULL)) {
967 				free(section);
968 				return NULL;
969 			}
970 			if (manpreprocess(ptr))
971 				continue;
972 
973 			if (strncasecmp(ptr, ".Nm", 3) != 0) break;
974 
975 			ptr += 3;
976 			if (ISSPACE(*ptr))
977 				ptr++;
978 
979 			buffer[length++] = ' ';
980 			more = strlen(ptr);
981 			if ((more > 1) && (ptr[more - 1] == ',') &&
982 			    ISSPACE(ptr[more - 2])) {
983 				ptr[--more] = '\0';
984 				ptr[more - 1] = ',';
985 			}
986 
987 			(void) memmove(&buffer[length], ptr, more + 1);
988 			length += more;
989 			offset = length + 3;
990 
991 			ptr = &buffer[offset];
992 		}
993 
994 		if (strncasecmp(ptr, ".Nd", 3) == 0) {
995 			(void) strlcpy(&buffer[length], " -",
996 			    sizeof(buffer) - length);
997 
998 			while (strncasecmp(ptr, ".Sh", 3) != 0) {
999 				int	 more;
1000 
1001 				if (*ptr == '.') {
1002 					char	*space;
1003 
1004 					if (strncasecmp(ptr, ".Nd", 3) != 0 ||
1005 					    strchr(ptr, '[') != NULL) {
1006 						free(section);
1007 						return NULL;
1008 					}
1009 					space = findwhitespace(ptr);
1010 					if (space == NULL) {
1011 						ptr = "";
1012 					} else {
1013 						space++;
1014 						(void) strmove(ptr, space);
1015 					}
1016 				}
1017 
1018 				if (*ptr != '\0') {
1019 					buffer[offset - 1] = ' ';
1020 					more = strlen(ptr) + 1;
1021 					offset += more;
1022 				}
1023 				ptr = &buffer[offset];
1024 				if ((sizeof(buffer) == offset) ||
1025 				    (GetS(in, ptr, sizeof(buffer) - offset)
1026 					== NULL)) {
1027 					free(section);
1028 					return NULL;
1029 				}
1030 				if (manpreprocess(ptr))
1031 					*ptr = '\0';
1032 			}
1033 		}
1034 	}
1035 	else {
1036 		int	 offset;
1037 
1038 		if (*buffer == '.') {
1039 			char	*space;
1040 
1041 			if ((space = findwhitespace(&buffer[1])) == NULL) {
1042 				free(section);
1043 				return NULL;
1044 			}
1045 			space++;
1046 			(void) strmove(buffer, space);
1047 		}
1048 
1049 		offset = strlen(buffer) + 1;
1050 		for (;;) {
1051 			int	 more;
1052 
1053 			ptr = &buffer[offset];
1054 			if ((sizeof(buffer) == offset) ||
1055 			    (GetS(in, ptr, sizeof(buffer) - offset)
1056 				== NULL)) {
1057 				free(section);
1058 				return NULL;
1059 			}
1060 			if (manpreprocess(ptr) || (*ptr == '\0'))
1061 				continue;
1062 
1063 			if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
1064 			    (strncasecmp(ptr, ".Ss", 3) == 0))
1065 				break;
1066 
1067 			if (*ptr == '.') {
1068 				char	*space;
1069 
1070 				if ((space = findwhitespace(ptr)) == NULL) {
1071 					continue;
1072 				}
1073 
1074 				space++;
1075 				(void) memmove(ptr, space, strlen(space) + 1);
1076 			}
1077 
1078 			buffer[offset - 1] = ' ';
1079 			more = strlen(ptr);
1080 			if ((more > 1) && (ptr[more - 1] == ',') &&
1081 			    ISSPACE(ptr[more - 2])) {
1082 				ptr[more - 1] = '\0';
1083 				ptr[more - 2] = ',';
1084 			}
1085 			else more++;
1086 			offset += more;
1087 		}
1088 	}
1089 
1090 	if (section == NULL)
1091 		section = makesection(defaultsection);
1092 
1093 	ptr = makewhatisline(name, buffer, section);
1094 	free(section);
1095 	return ptr;
1096 }
1097 
1098 static char *
getwhatisdata(char * name)1099 getwhatisdata(char *name)
1100 {
1101 	gzFile	in;
1102 	char	*data;
1103 	int	 section;
1104 
1105 	if ((in = gzopen(name, "r")) == NULL) {
1106 		if (errno == 0)
1107 			errno = ENOMEM;
1108 		err(EXIT_FAILURE, "Cannot open `%s'", name);
1109 		/* NOTREACHED */
1110 	}
1111 
1112 	section = manpagesection(name);
1113 	if (section == 0) {
1114 		data = parsecatpage(name, in);
1115 	} else {
1116 		data = parsemanpage(name, in, section);
1117 		if (data == NULL)
1118 			data = nroff(name, in);
1119 	}
1120 
1121 	(void) gzclose(in);
1122 	return data;
1123 }
1124 
1125 static void
processmanpages(manpage ** source,whatis ** dest)1126 processmanpages(manpage **source, whatis **dest)
1127 {
1128 	manpage *mp;
1129 	char sd[128];
1130 
1131 	mp = *source;
1132 	*source = NULL;
1133 
1134 	while (mp != NULL) {
1135 		manpage *obsolete;
1136 		char *data;
1137 
1138 		if (mp->mp_left != NULL)
1139 			processmanpages(&mp->mp_left, dest);
1140 
1141 		if ((data = getwhatisdata(mp->mp_name)) != NULL) {
1142 			/* Pass eventual directory prefix to addwhatis() */
1143 			if (mp->mp_sdlen > 0 && mp->mp_sdlen < sizeof(sd)-1)
1144 				strlcpy(sd, &mp->mp_name[mp->mp_sdoff],
1145 					mp->mp_sdlen);
1146 			else
1147 				sd[0] = '\0';
1148 
1149 			addwhatis(dest, data, sd);
1150 		}
1151 
1152 		obsolete = mp;
1153 		mp = mp->mp_right;
1154 		free(obsolete);
1155 	}
1156 }
1157 
1158 static void
dumpwhatis(FILE * out,whatis * tree)1159 dumpwhatis(FILE *out, whatis *tree)
1160 {
1161 	while (tree != NULL) {
1162 		if (tree->wi_left)
1163 			dumpwhatis(out, tree->wi_left);
1164 
1165 		if ((tree->wi_data[0] && fputs(tree->wi_prefix, out) == EOF) ||
1166 		    (fputs(tree->wi_data, out) == EOF) ||
1167 		    (fputc('\n', out) == EOF))
1168 			err(EXIT_FAILURE, "Write failed");
1169 
1170 		tree = tree->wi_right;
1171 	}
1172 }
1173