xref: /netbsd-src/libexec/makewhatis/makewhatis.c (revision 89c5a767f8fc7a4633b2d409966e2becbb98ff92)
1 /*	$NetBSD: makewhatis.c,v 1.7 2000/01/24 23:03:54 tron Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matthias Scheler.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 	All rights reserved.\n");
43 #endif /* not lint */
44 
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.7 2000/01/24 23:03:54 tron Exp $");
47 #endif /* not lint */
48 
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fts.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <zlib.h>
62 
63 typedef struct manpagestruct manpage;
64 struct manpagestruct {
65 	manpage *mp_left,*mp_right;
66 	ino_t	 mp_inode;
67 	char     mp_name[1];
68 };
69 
70 typedef struct whatisstruct whatis;
71 struct whatisstruct {
72 	whatis	*wi_left,*wi_right;
73 	char	*wi_data;
74 };
75 
76 int              main (int, char **);
77 char		*findwhitespace(char *);
78 char		*GetS(gzFile, char *, int);
79 int		 manpagesection (char *);
80 int		 addmanpage (manpage **, ino_t, char *);
81 int		 addwhatis (whatis **, char *);
82 char		*replacestring (char *, char *, char *);
83 void		 catpreprocess (char *);
84 char		*parsecatpage (gzFile *);
85 int		 manpreprocess (char *);
86 char		*parsemanpage (gzFile *, int);
87 char		*getwhatisdata (char *);
88 void		 processmanpages (manpage **,whatis **);
89 int		 dumpwhatis (FILE *, whatis *);
90 
91 char *default_manpath[] = {
92 	"/usr/share/man",
93 	NULL
94 };
95 
96 char sectionext[] = "0123456789ln";
97 char whatisdb[]   = "whatis.db";
98 
99 extern char *__progname;
100 
101 int
102 main(int argc,char **argv)
103 {
104 	char	**manpath;
105 	FTS	*fts;
106 	FTSENT	*fe;
107 	manpage	*source;
108 	whatis	*dest;
109 	FILE	*out;
110 
111 	(void)setlocale(LC_ALL, "");
112 
113 	manpath = (argc < 2) ? default_manpath : &argv[1];
114 
115 	if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
116 		perror(__progname);
117 		return EXIT_FAILURE;
118 	}
119 
120 	source = NULL;
121 	while ((fe = fts_read(fts)) != NULL) {
122 		switch (fe->fts_info) {
123 		case FTS_F:
124 			if (manpagesection(fe->fts_path) >= 0)
125 				if (!addmanpage(&source,
126 					fe->fts_statp->st_ino,
127 					fe->fts_path))
128 					err(EXIT_FAILURE, NULL);
129 		case FTS_D:
130 		case FTS_DC:
131 		case FTS_DEFAULT:
132 		case FTS_DP:
133 		case FTS_SLNONE:
134 			break;
135 		default:
136 			errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
137 			    strerror(fe->fts_errno));
138 			/* NOTREACHED */
139 		}
140 	}
141 
142 	(void)fts_close(fts);
143 
144 	dest = NULL;
145 	processmanpages(&source, &dest);
146 
147 	if (chdir(manpath[0]) < 0)
148 		errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
149 
150 	if ((out = fopen(whatisdb, "w")) == NULL)
151 		errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
152 
153 	if (!(dumpwhatis(out, dest) ||
154 	    (fclose(out) < 0)) ||
155 	    (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
156 		errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
157 
158 	return EXIT_SUCCESS;
159 }
160 
161 char
162 *findwhitespace(char *str)
163 
164 {
165 	while (!isspace(*str))
166 		if (*str++ == '\0') {
167 			str = NULL;
168 			break;
169 		}
170 
171 	return str;
172 }
173 
174 char
175 *GetS(gzFile in, char *buffer, int length)
176 
177 {
178 	char	*ptr;
179 
180 	if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
181 		ptr = NULL;
182 
183 	return ptr;
184 }
185 
186 int
187 manpagesection(char *name)
188 {
189 	char	*ptr;
190 
191 	if ((ptr = strrchr(name, '/')) != NULL)
192 		ptr++;
193 	else
194 		ptr = name;
195 
196 	while ((ptr = strchr(ptr, '.')) != NULL) {
197 		int section;
198 
199 		ptr++;
200 		section=0;
201 		while (sectionext[section] != '\0')
202 			if (sectionext[section] == *ptr)
203 				return section;
204 			else
205 				section++;
206 	}
207 
208 	return -1;
209 }
210 
211 int
212 addmanpage(manpage **tree,ino_t inode,char *name)
213 {
214 	manpage	*mp;
215 
216 	while ((mp = *tree) != NULL) {
217 		if (mp->mp_inode == inode)
218 			return 1;
219 		tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
220 	}
221 
222 	if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
223 		return 0;
224 
225 	mp->mp_left = NULL;
226 	mp->mp_right = NULL;
227 	mp->mp_inode = inode;
228 	(void) strcpy(mp->mp_name, name);
229 	*tree = mp;
230 
231 	return 1;
232 }
233 
234 int
235 addwhatis(whatis **tree, char *data)
236 {
237 	whatis *wi;
238 	int result;
239 
240 	while (isspace(*data))
241 		data++;
242 
243 	if (*data == '/') {
244 		char *ptr;
245 
246 		ptr = ++data;
247 		while ((*ptr != '\0') && !isspace(*ptr))
248 			if (*ptr++ == '/')
249 				data = ptr;
250 	}
251 
252 	while ((wi = *tree) != NULL) {
253 		result=strcmp(data, wi->wi_data);
254 		if (result == 0) return 1;
255 		tree = &((result < 0) ? wi->wi_left : wi->wi_right);
256 	}
257 
258 	if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
259 		return 0;
260 
261 	wi->wi_left = NULL;
262 	wi->wi_right = NULL;
263 	wi->wi_data = data;
264 	*tree = wi;
265 
266 	return 1;
267 }
268 
269 void
270 catpreprocess(char *from)
271 {
272 	char	*to;
273 
274 	to = from;
275 	while (isspace(*from)) from++;
276 
277 	while (*from != '\0')
278 		if (isspace(*from)) {
279 			while (isspace(*++from));
280 			if (*from != '\0')
281 				*to++ = ' ';
282 		}
283 		else if (*(from + 1) == '\10')
284 			from += 2;
285 		else
286 			*to++ = *from++;
287 
288 	*to = '\0';
289 }
290 
291 char *
292 replacestring(char *string, char *old, char *new)
293 
294 {
295 	char	*ptr, *result;
296 	int	 slength, olength, nlength, pos;
297 
298 	if (new == NULL)
299 		return strdup(string);
300 
301 	ptr = strstr(string, old);
302 	if (ptr == NULL)
303 		return strdup(string);
304 
305 	slength = strlen(string);
306 	olength = strlen(old);
307 	nlength = strlen(new);
308 	if ((result = malloc(slength - olength + nlength + 1)) == NULL)
309 		return NULL;
310 
311 	pos = ptr - string;
312 	(void) memcpy(result, string, pos);
313 	(void) memcpy(&result[pos], new, nlength);
314 	(void) strcpy(&result[pos + nlength], &string[pos + olength]);
315 
316 	return result;
317 }
318 
319 char *
320 parsecatpage(gzFile *in)
321 {
322 	char 	 buffer[8192];
323 	char	*section, *ptr, *last;
324 	int	 size;
325 
326 	do {
327 		if (GetS(in, buffer, sizeof(buffer)) == NULL)
328 			return NULL;
329 	}
330 	while (buffer[0] == '\n');
331 
332 	section = NULL;
333 	if ((ptr = strchr(buffer, '(')) != NULL) {
334 		if ((last = strchr(ptr + 1, ')')) !=NULL) {
335 			int 	length;
336 
337 			length = last - ptr + 1;
338 			if ((section = malloc(length + 5)) == NULL)
339 				return NULL;
340 
341 			*section = ' ';
342 			(void) memcpy(section + 1, ptr, length);
343 			(void) strcpy(section + 1 + length, " - ");
344 		}
345 	}
346 
347 	for (;;) {
348 		if (GetS(in, buffer, sizeof(buffer)) == NULL) {
349 			free(section);
350 			return NULL;
351 		}
352 		if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
353 			break;
354 	}
355 
356 	ptr = last = buffer;
357 	size = sizeof(buffer) - 1;
358 	while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
359 		int	 length;
360 
361 		catpreprocess(ptr);
362 
363 		length = strlen(ptr);
364 		if (length == 0) {
365 			*last = '\0';
366 
367 			ptr = replacestring(buffer, " - ", section);
368 			free(section);
369 			return ptr;
370 		}
371 		if ((length > 1) && (ptr[length - 1] == '-') &&
372 		    isalpha(ptr[length - 2]))
373 			last = &ptr[--length];
374 		else {
375 			last = &ptr[length++];
376 			*last = ' ';
377 		}
378 
379 		ptr += length;
380 		size -= length;
381 	}
382 
383 	free(section);
384 
385 	return NULL;
386 }
387 
388 int
389 manpreprocess(char *line)
390 {
391 	char	*from, *to;
392 
393 	to = from = line;
394 	while (isspace(*from)) from++;
395 	if (strncmp(from, ".\\\"", 3) == 0)
396 		return 1;
397 
398 	while (*from != '\0')
399 		if (isspace(*from)) {
400 			while (isspace(*++from));
401 			if ((*from != '\0') && (*from != ','))
402 				*to++ = ' ';
403 		}
404 		else if (*from == '\\')
405 			switch (*++from) {
406 			case '\0':
407 			case '-':
408 				break;
409 			case 's':
410 				if ((*from=='+') || (*from=='-'))
411 					from++;
412 				while (isdigit(*from))
413 					from++;
414 				break;
415 			default:
416 				from++;
417 			}
418 		else
419 			if (*from == '"')
420 				from++;
421 			else
422 				*to++ = *from++;
423 
424 	*to = '\0';
425 
426 	if (strncasecmp(line, ".Xr", 3) == 0) {
427 		char	*sect;
428 
429 		from = line + 3;
430 		if (isspace(*from))
431 			from++;
432 
433 		if ((sect = findwhitespace(from)) != NULL) {
434 			int	 length;
435 
436 			*sect++ = '\0';
437 			length = strlen(from);
438 			(void) memmove(line, from, length);
439 			line[length++] = '(';
440 			to = &line[length];
441 			length = strlen(sect);
442 			(void) memmove(to, sect, length);
443 			(void) strcpy(&to[length], ")");
444 		}
445 	}
446 
447 	return 0;
448 }
449 
450 char *
451 parsemanpage(gzFile *in, int defaultsection)
452 {
453 	char	*section, buffer[8192], *ptr;
454 
455 	section = NULL;
456 	do {
457 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
458 			free(section);
459 			return NULL;
460 		}
461 		if (manpreprocess(buffer))
462 			continue;
463 		if (strncasecmp(buffer, ".Dt", 3) == 0) {
464 			char	*end;
465 
466 			ptr = &buffer[3];
467 			if (isspace(*ptr))
468 				ptr++;
469 			if ((ptr = findwhitespace(ptr)) == NULL)
470 				continue;
471 
472 			if ((end = findwhitespace(++ptr)) != NULL)
473 				*end = '\0';
474 
475 			free(section);
476 			if ((section = malloc(strlen(ptr) + 7)) != NULL) {
477 				section[0] = ' ';
478 				section[1] = '(';
479 				(void) strcpy(&section[2], ptr);
480 				(void) strcat(&section[2], ") - ");
481 			}
482 		}
483 	} while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
484 
485 	do {
486 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
487 			free(section);
488 			return NULL;
489 		}
490 	} while (manpreprocess(buffer));
491 
492 	if (strncasecmp(buffer, ".Nm", 3) == 0) {
493 		int	length, offset;
494 
495 		ptr = &buffer[3];
496 		while (isspace(*ptr))
497 			ptr++;
498 
499 		length = strlen(ptr);
500 		if ((length > 1) && (ptr[length - 1] == ',') &&
501 		    isspace(ptr[length - 2])) {
502 			ptr[--length] = '\0';
503 			ptr[length - 1] = ',';
504 		}
505 		(void) memmove(buffer, ptr, length + 1);
506 
507 		offset = length + 3;
508 		ptr = &buffer[offset];
509 		for (;;) {
510 			int	 more;
511 
512 			if ((sizeof(buffer) == offset) ||
513 		            (GetS(in, ptr, sizeof(buffer) - offset)
514 			       == NULL)) {
515 				free(section);
516 				return NULL;
517 			}
518 			if (manpreprocess(ptr))
519 				continue;
520 
521 			if (strncasecmp(ptr, ".Nm", 3) != 0) break;
522 
523 			ptr += 3;
524 			if (isspace(*ptr))
525 				ptr++;
526 
527 			buffer[length++] = ' ';
528 			more = strlen(ptr);
529 			if ((more > 1) && (ptr[more - 1] == ',') &&
530 			    isspace(ptr[more - 2])) {
531 				ptr[--more] = '\0';
532 				ptr[more - 1] = ',';
533 			}
534 
535 			(void) memmove(&buffer[length], ptr, more + 1);
536 			length += more;
537 			offset = length + 3;
538 
539 			ptr = &buffer[offset];
540 		}
541 
542 		if (strncasecmp(ptr, ".Nd", 3) == 0) {
543 			(void) strcpy(&buffer[length], " -");
544 
545 			while (strncasecmp(ptr, ".Sh", 3) != 0) {
546 				int	 more;
547 
548 				if (*ptr == '.') {
549 					char	*space;
550 
551 					if ((space = findwhitespace(ptr)) == NULL)
552 						ptr = "";
553 					else {
554 						space++;
555 						(void) memmove(ptr, space,
556 							   strlen(space) + 1);
557 					}
558 				}
559 
560 				if (*ptr != '\0') {
561 					buffer[offset - 1] = ' ';
562 					more = strlen(ptr) + 1;
563 					offset += more;
564 				}
565 				ptr = &buffer[offset];
566 				if ((sizeof(buffer) == offset) ||
567 			            (GetS(in, ptr, sizeof(buffer) - offset)
568 					== NULL)) {
569 					free(section);
570 					return NULL;
571 				}
572 				if (manpreprocess(ptr))
573 					*ptr = '\0';
574 			}
575 		}
576 	}
577 	else {
578 		int	 offset;
579 
580 		if (*buffer == '.') {
581 			char	*space;
582 
583 			if ((space = findwhitespace(buffer)) == NULL) {
584 				free(section);
585 				return NULL;
586 			}
587 			space++;
588 			(void) memmove(buffer, space, strlen(space) + 1);
589 		}
590 
591 		offset = strlen(buffer) + 1;
592 		for (;;) {
593 			int	 more;
594 
595 			ptr = &buffer[offset];
596 			if ((sizeof(buffer) == offset) ||
597 		            (GetS(in, ptr, sizeof(buffer) - offset)
598 				== NULL)) {
599 				free(section);
600 				return NULL;
601 			}
602 			if (manpreprocess(ptr) || (*ptr == '\0'))
603 				continue;
604 
605 			if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
606 			    (strncasecmp(ptr, ".Ss", 3) == 0))
607 				break;
608 
609 			if (*ptr == '.') {
610 				char	*space;
611 
612 				if ((space = findwhitespace(ptr)) == NULL) {
613 					continue;
614 				}
615 
616 				space++;
617 				(void) memmove(ptr, space, strlen(space) + 1);
618 			}
619 
620 			buffer[offset - 1] = ' ';
621 			more = strlen(ptr);
622 			if ((more > 1) && (ptr[more - 1] == ',') &&
623 			    isspace(ptr[more - 2])) {
624 				ptr[more - 1] = '\0';
625 				ptr[more - 2] = ',';
626 			}
627 			else more++;
628 			offset += more;
629 		}
630 	}
631 
632 	if (section == NULL) {
633 		char sectionbuffer[24];
634 
635 		(void) sprintf(sectionbuffer, " (%c) - ",
636 			sectionext[defaultsection]);
637 		ptr = replacestring(buffer, " - ", sectionbuffer);
638 	}
639 	else {
640 		ptr = replacestring(buffer, " - ", section);
641 		free(section);
642 	}
643 	return ptr;
644 }
645 
646 char *
647 getwhatisdata(char *name)
648 {
649 	gzFile	*in;
650 	char	*data;
651 	int	 section;
652 
653 	if ((in = gzopen(name, "r")) == NULL) {
654 		errx(EXIT_FAILURE, "%s: %s",
655 		    name,
656 		    strerror((errno == 0) ? ENOMEM : errno));
657 		/* NOTREACHED */
658 	}
659 
660 	section = manpagesection(name);
661 	data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
662 
663 	(void) gzclose(in);
664 	return data;
665 }
666 
667 void
668 processmanpages(manpage **source, whatis **dest)
669 {
670 	manpage	*mp;
671 
672 	mp = *source;
673 	*source = NULL;
674 
675 	while (mp != NULL) {
676 		manpage *obsolete;
677 		char *data;
678 
679 		if (mp->mp_left != NULL)
680 			processmanpages(&mp->mp_left,dest);
681 
682 		if ((data = getwhatisdata(mp->mp_name)) != NULL) {
683 			if (!addwhatis(dest,data))
684 				err(EXIT_FAILURE, NULL);
685 		}
686 
687 		obsolete = mp;
688 		mp = mp->mp_right;
689 		free(obsolete);
690 	}
691 }
692 
693 int
694 dumpwhatis (FILE *out, whatis *tree)
695 {
696 	while (tree != NULL) {
697 		if (tree->wi_left)
698 			if (!dumpwhatis(out, tree->wi_left)) return 0;
699 
700 		if ((fputs(tree->wi_data, out) == EOF) ||
701 		    (fputc('\n', out) == EOF))
702 			return 0;
703 
704 		tree = tree->wi_right;
705 	}
706 
707 	return 1;
708 }
709