1 /* $NetBSD: makewhatis.c,v 1.16 2001/04/10 21:00:00 tron Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthias Scheler. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\ 42 All rights reserved.\n"); 43 #endif /* not lint */ 44 45 #ifndef lint 46 __RCSID("$NetBSD: makewhatis.c,v 1.16 2001/04/10 21:00:00 tron Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/param.h> 51 #include <sys/stat.h> 52 #include <sys/wait.h> 53 54 #include <ctype.h> 55 #include <err.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <fts.h> 59 #include <locale.h> 60 #include <paths.h> 61 #include <signal.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <unistd.h> 66 #include <zlib.h> 67 68 typedef struct manpagestruct manpage; 69 struct manpagestruct { 70 manpage *mp_left,*mp_right; 71 ino_t mp_inode; 72 char mp_name[1]; 73 }; 74 75 typedef struct whatisstruct whatis; 76 struct whatisstruct { 77 whatis *wi_left,*wi_right; 78 char *wi_data; 79 }; 80 81 int main (int, char **); 82 char *findwhitespace (char *); 83 char *strmove (char *,char *); 84 char *GetS (gzFile, char *, int); 85 int manpagesection (char *); 86 char *createsectionstring(char *); 87 int addmanpage (manpage **, ino_t, char *); 88 int addwhatis (whatis **, char *); 89 char *replacestring (char *, char *, char *); 90 void catpreprocess (char *); 91 char *parsecatpage (gzFile *); 92 int manpreprocess (char *); 93 char *nroff (gzFile *); 94 char *parsemanpage (gzFile *, int); 95 char *getwhatisdata (char *); 96 void processmanpages (manpage **,whatis **); 97 int dumpwhatis (FILE *, whatis *); 98 99 char *default_manpath[] = { 100 "/usr/share/man", 101 NULL 102 }; 103 104 char sectionext[] = "0123456789ln"; 105 char whatisdb[] = "whatis.db"; 106 107 int 108 main(int argc,char **argv) 109 { 110 char **manpath; 111 FTS *fts; 112 FTSENT *fe; 113 manpage *source; 114 whatis *dest; 115 FILE *out; 116 117 (void)setlocale(LC_ALL, ""); 118 119 manpath = (argc < 2) ? default_manpath : &argv[1]; 120 121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) { 122 perror(getprogname()); 123 return EXIT_FAILURE; 124 } 125 126 source = NULL; 127 while ((fe = fts_read(fts)) != NULL) { 128 switch (fe->fts_info) { 129 case FTS_F: 130 if (manpagesection(fe->fts_path) >= 0) 131 if (!addmanpage(&source, 132 fe->fts_statp->st_ino, 133 fe->fts_path)) 134 err(EXIT_FAILURE, NULL); 135 case FTS_D: 136 case FTS_DC: 137 case FTS_DEFAULT: 138 case FTS_DP: 139 case FTS_SLNONE: 140 break; 141 default: 142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path, 143 strerror(fe->fts_errno)); 144 145 } 146 } 147 148 (void)fts_close(fts); 149 150 dest = NULL; 151 processmanpages(&source, &dest); 152 153 if (chdir(manpath[0]) < 0) 154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno)); 155 156 if ((out = fopen(whatisdb, "w")) == NULL) 157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 158 159 if (!(dumpwhatis(out, dest) || 160 (fclose(out) < 0)) || 161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0)) 162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 163 164 return EXIT_SUCCESS; 165 } 166 167 char 168 *findwhitespace(char *str) 169 170 { 171 while (!isspace(*str)) 172 if (*str++ == '\0') { 173 str = NULL; 174 break; 175 } 176 177 return str; 178 } 179 180 char 181 *strmove(char *dest,char *src) 182 183 { 184 return memmove(dest, src, strlen(src) + 1); 185 } 186 187 char 188 *GetS(gzFile in, char *buffer, int length) 189 190 { 191 char *ptr; 192 193 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0')) 194 ptr = NULL; 195 196 return ptr; 197 } 198 199 int 200 manpagesection(char *name) 201 { 202 char *ptr; 203 204 if ((ptr = strrchr(name, '/')) != NULL) 205 ptr++; 206 else 207 ptr = name; 208 209 while ((ptr = strchr(ptr, '.')) != NULL) { 210 int section; 211 212 ptr++; 213 section=0; 214 while (sectionext[section] != '\0') 215 if (sectionext[section] == *ptr) 216 return section; 217 else 218 section++; 219 } 220 221 return -1; 222 } 223 224 char 225 *createsectionstring(char *section_id) 226 { 227 char *section; 228 229 if ((section = malloc(strlen(section_id) + 7)) != NULL) { 230 section[0] = ' '; 231 section[1] = '('; 232 (void) strcat(strcpy(§ion[2], section_id), ") - "); 233 } 234 return section; 235 } 236 237 int 238 addmanpage(manpage **tree,ino_t inode,char *name) 239 { 240 manpage *mp; 241 242 while ((mp = *tree) != NULL) { 243 if (mp->mp_inode == inode) 244 return 1; 245 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right); 246 } 247 248 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL) 249 return 0; 250 251 mp->mp_left = NULL; 252 mp->mp_right = NULL; 253 mp->mp_inode = inode; 254 (void) strcpy(mp->mp_name, name); 255 *tree = mp; 256 257 return 1; 258 } 259 260 int 261 addwhatis(whatis **tree, char *data) 262 { 263 whatis *wi; 264 int result; 265 266 while (isspace(*data)) 267 data++; 268 269 if (*data == '/') { 270 char *ptr; 271 272 ptr = ++data; 273 while ((*ptr != '\0') && !isspace(*ptr)) 274 if (*ptr++ == '/') 275 data = ptr; 276 } 277 278 while ((wi = *tree) != NULL) { 279 result=strcmp(data, wi->wi_data); 280 if (result == 0) return 1; 281 tree = &((result < 0) ? wi->wi_left : wi->wi_right); 282 } 283 284 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL) 285 return 0; 286 287 wi->wi_left = NULL; 288 wi->wi_right = NULL; 289 wi->wi_data = data; 290 *tree = wi; 291 292 return 1; 293 } 294 295 void 296 catpreprocess(char *from) 297 { 298 char *to; 299 300 to = from; 301 while (isspace(*from)) from++; 302 303 while (*from != '\0') 304 if (isspace(*from)) { 305 while (isspace(*++from)); 306 if (*from != '\0') 307 *to++ = ' '; 308 } 309 else if (*(from + 1) == '\10') 310 from += 2; 311 else 312 *to++ = *from++; 313 314 *to = '\0'; 315 } 316 317 char * 318 replacestring(char *string, char *old, char *new) 319 320 { 321 char *ptr, *result; 322 int slength, olength, nlength, pos; 323 324 if (new == NULL) 325 return strdup(string); 326 327 ptr = strstr(string, old); 328 if (ptr == NULL) 329 return strdup(string); 330 331 slength = strlen(string); 332 olength = strlen(old); 333 nlength = strlen(new); 334 if ((result = malloc(slength - olength + nlength + 1)) == NULL) 335 return NULL; 336 337 pos = ptr - string; 338 (void) memcpy(result, string, pos); 339 (void) memcpy(&result[pos], new, nlength); 340 (void) strcpy(&result[pos + nlength], &string[pos + olength]); 341 342 return result; 343 } 344 345 char * 346 parsecatpage(gzFile *in) 347 { 348 char buffer[8192]; 349 char *section, *ptr, *last; 350 int size; 351 352 do { 353 if (GetS(in, buffer, sizeof(buffer)) == NULL) 354 return NULL; 355 } 356 while (buffer[0] == '\n'); 357 358 section = NULL; 359 if ((ptr = strchr(buffer, '(')) != NULL) { 360 if ((last = strchr(ptr + 1, ')')) !=NULL) { 361 int length; 362 363 length = last - ptr + 1; 364 if ((section = malloc(length + 5)) == NULL) 365 return NULL; 366 367 *section = ' '; 368 (void) memcpy(section + 1, ptr, length); 369 (void) strcpy(section + 1 + length, " - "); 370 } 371 } 372 373 for (;;) { 374 if (GetS(in, buffer, sizeof(buffer)) == NULL) { 375 free(section); 376 return NULL; 377 } 378 catpreprocess(buffer); 379 if (strncmp(buffer, "NAME", 4) == 0) 380 break; 381 } 382 383 ptr = last = buffer; 384 size = sizeof(buffer) - 1; 385 while ((size > 0) && (GetS(in, ptr, size) != NULL)) { 386 int length; 387 388 catpreprocess(ptr); 389 390 length = strlen(ptr); 391 if (length == 0) { 392 *last = '\0'; 393 394 ptr = replacestring(buffer, " - ", section); 395 free(section); 396 return ptr; 397 } 398 if ((length > 1) && (ptr[length - 1] == '-') && 399 isalpha(ptr[length - 2])) 400 last = &ptr[--length]; 401 else { 402 last = &ptr[length++]; 403 *last = ' '; 404 } 405 406 ptr += length; 407 size -= length; 408 } 409 410 free(section); 411 412 return NULL; 413 } 414 415 int 416 manpreprocess(char *line) 417 { 418 char *from, *to; 419 420 to = from = line; 421 while (isspace(*from)) from++; 422 if (strncmp(from, ".\\\"", 3) == 0) 423 return 1; 424 425 while (*from != '\0') 426 if (isspace(*from)) { 427 while (isspace(*++from)); 428 if ((*from != '\0') && (*from != ',')) 429 *to++ = ' '; 430 } 431 else if (*from == '\\') 432 switch (*++from) { 433 case '\0': 434 case '-': 435 break; 436 case 'f': 437 case 's': 438 from++; 439 if ((*from=='+') || (*from=='-')) 440 from++; 441 while (isdigit(*from)) 442 from++; 443 break; 444 default: 445 from++; 446 } 447 else 448 if (*from == '"') 449 from++; 450 else 451 *to++ = *from++; 452 453 *to = '\0'; 454 455 if (strncasecmp(line, ".Xr", 3) == 0) { 456 char *sect; 457 458 from = line + 3; 459 if (isspace(*from)) 460 from++; 461 462 if ((sect = findwhitespace(from)) != NULL) { 463 int length; 464 465 *sect++ = '\0'; 466 length = strlen(from); 467 (void) memmove(line, from, length); 468 line[length++] = '('; 469 to = &line[length]; 470 length = strlen(sect); 471 (void) memmove(to, sect, length); 472 (void) strcpy(&to[length], ")"); 473 } 474 } 475 476 return 0; 477 } 478 479 char * 480 nroff(gzFile *in) 481 { 482 char tempname[MAXPATHLEN], buffer[65536], *data; 483 int tempfd, bytes, pipefd[2], status; 484 static int devnull = -1; 485 pid_t child; 486 487 if (gzrewind(in) < 0) { 488 perror(getprogname()); 489 return NULL; 490 } 491 492 if ((devnull < 0) && 493 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) { 494 perror(getprogname()); 495 return NULL; 496 } 497 498 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX"); 499 if ((tempfd = mkstemp(tempname)) < 0) { 500 perror(getprogname()); 501 return NULL; 502 } 503 504 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0) 505 if (write(tempfd, buffer, bytes) != bytes) { 506 bytes = -1; 507 break; 508 } 509 510 if ((bytes < 0) || 511 (lseek(tempfd, 0, SEEK_SET) < 0) || 512 (pipe(pipefd) < 0)) { 513 perror(getprogname()); 514 (void)close(tempfd); 515 (void)unlink(tempname); 516 return NULL; 517 } 518 519 switch (child = vfork()) { 520 case -1: 521 perror(getprogname()); 522 (void)close(pipefd[1]); 523 (void)close(pipefd[0]); 524 (void)close(tempfd); 525 (void)unlink(tempname); 526 return NULL; 527 /* NOTREACHED */ 528 case 0: 529 (void)close(pipefd[0]); 530 if (tempfd != STDIN_FILENO) { 531 (void)dup2(tempfd, STDIN_FILENO); 532 (void)close(tempfd); 533 } 534 if (pipefd[1] != STDOUT_FILENO) { 535 (void)dup2(pipefd[1], STDOUT_FILENO); 536 (void)close(pipefd[1]); 537 } 538 if (devnull != STDERR_FILENO) { 539 (void)dup2(devnull, STDERR_FILENO); 540 (void)close(devnull); 541 } 542 (void)execlp("nroff", "nroff", "-S", "-man", NULL); 543 _exit(EXIT_FAILURE); 544 default: 545 (void)close(pipefd[1]); 546 (void)close(tempfd); 547 /* NOTREACHED */ 548 } 549 550 if ((in = gzdopen(pipefd[0], "r")) == NULL) { 551 if (errno == 0) 552 errno = ENOMEM; 553 perror(getprogname()); 554 (void)close(pipefd[0]); 555 (void)kill(child, SIGTERM); 556 while (waitpid(child, NULL, 0) != child); 557 (void)unlink(tempname); 558 return NULL; 559 } 560 561 data = parsecatpage(in); 562 while (gzread(in, buffer, sizeof(buffer)) > 0); 563 (void)gzclose(in); 564 565 while (waitpid(child, &status, 0) != child); 566 if ((data != NULL) && 567 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) { 568 free(data); 569 data = NULL; 570 } 571 572 (void)unlink(tempname); 573 574 return data; 575 } 576 577 char * 578 parsemanpage(gzFile *in, int defaultsection) 579 { 580 char *section, buffer[8192], *ptr; 581 582 section = NULL; 583 do { 584 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 585 free(section); 586 return NULL; 587 } 588 if (manpreprocess(buffer)) 589 continue; 590 if (strncasecmp(buffer, ".Dt", 3) == 0) { 591 char *end; 592 593 ptr = &buffer[3]; 594 if (isspace(*ptr)) 595 ptr++; 596 if ((ptr = findwhitespace(ptr)) == NULL) 597 continue; 598 599 if ((end = findwhitespace(++ptr)) != NULL) 600 *end = '\0'; 601 602 free(section); 603 section = createsectionstring(ptr); 604 } 605 else if (strncasecmp(buffer, ".TH", 3) == 0) { 606 ptr = &buffer[3]; 607 while (isspace(*ptr)) 608 ptr++; 609 if ((ptr = findwhitespace(ptr)) != NULL) { 610 char *next; 611 612 while (isspace(*ptr)) 613 ptr++; 614 if ((next = findwhitespace(ptr)) != NULL) 615 *next = '\0'; 616 free(section); 617 section = createsectionstring(ptr); 618 } 619 } 620 else if (strncasecmp(buffer, ".Ds", 3) == 0) { 621 free(section); 622 return NULL; 623 } 624 } while (strncasecmp(buffer, ".Sh NAME", 8) != 0); 625 626 do { 627 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 628 free(section); 629 return NULL; 630 } 631 } while (manpreprocess(buffer)); 632 633 if (strncasecmp(buffer, ".Nm", 3) == 0) { 634 int length, offset; 635 636 ptr = &buffer[3]; 637 while (isspace(*ptr)) 638 ptr++; 639 640 length = strlen(ptr); 641 if ((length > 1) && (ptr[length - 1] == ',') && 642 isspace(ptr[length - 2])) { 643 ptr[--length] = '\0'; 644 ptr[length - 1] = ','; 645 } 646 (void) memmove(buffer, ptr, length + 1); 647 648 offset = length + 3; 649 ptr = &buffer[offset]; 650 for (;;) { 651 int more; 652 653 if ((sizeof(buffer) == offset) || 654 (GetS(in, ptr, sizeof(buffer) - offset) 655 == NULL)) { 656 free(section); 657 return NULL; 658 } 659 if (manpreprocess(ptr)) 660 continue; 661 662 if (strncasecmp(ptr, ".Nm", 3) != 0) break; 663 664 ptr += 3; 665 if (isspace(*ptr)) 666 ptr++; 667 668 buffer[length++] = ' '; 669 more = strlen(ptr); 670 if ((more > 1) && (ptr[more - 1] == ',') && 671 isspace(ptr[more - 2])) { 672 ptr[--more] = '\0'; 673 ptr[more - 1] = ','; 674 } 675 676 (void) memmove(&buffer[length], ptr, more + 1); 677 length += more; 678 offset = length + 3; 679 680 ptr = &buffer[offset]; 681 } 682 683 if (strncasecmp(ptr, ".Nd", 3) == 0) { 684 (void) strcpy(&buffer[length], " -"); 685 686 while (strncasecmp(ptr, ".Sh", 3) != 0) { 687 int more; 688 689 if (*ptr == '.') { 690 char *space; 691 692 if (strncasecmp(ptr, ".Nd", 3) != 0) { 693 free(section); 694 return NULL; 695 } 696 space = findwhitespace(ptr); 697 if (space == NULL) 698 ptr = ""; 699 else { 700 space++; 701 (void) strmove(ptr, space); 702 } 703 } 704 705 if (*ptr != '\0') { 706 buffer[offset - 1] = ' '; 707 more = strlen(ptr) + 1; 708 offset += more; 709 } 710 ptr = &buffer[offset]; 711 if ((sizeof(buffer) == offset) || 712 (GetS(in, ptr, sizeof(buffer) - offset) 713 == NULL)) { 714 free(section); 715 return NULL; 716 } 717 if (manpreprocess(ptr)) 718 *ptr = '\0'; 719 } 720 } 721 } 722 else { 723 int offset; 724 725 if (*buffer == '.') { 726 char *space; 727 728 if ((space = findwhitespace(&buffer[1])) == NULL) { 729 free(section); 730 return NULL; 731 } 732 space++; 733 (void) strmove(buffer, space); 734 } 735 736 offset = strlen(buffer) + 1; 737 for (;;) { 738 int more; 739 740 ptr = &buffer[offset]; 741 if ((sizeof(buffer) == offset) || 742 (GetS(in, ptr, sizeof(buffer) - offset) 743 == NULL)) { 744 free(section); 745 return NULL; 746 } 747 if (manpreprocess(ptr) || (*ptr == '\0')) 748 continue; 749 750 if ((strncasecmp(ptr, ".Sh", 3) == 0) || 751 (strncasecmp(ptr, ".Ss", 3) == 0)) 752 break; 753 754 if (*ptr == '.') { 755 char *space; 756 757 if ((space = findwhitespace(ptr)) == NULL) { 758 continue; 759 } 760 761 space++; 762 (void) memmove(ptr, space, strlen(space) + 1); 763 } 764 765 buffer[offset - 1] = ' '; 766 more = strlen(ptr); 767 if ((more > 1) && (ptr[more - 1] == ',') && 768 isspace(ptr[more - 2])) { 769 ptr[more - 1] = '\0'; 770 ptr[more - 2] = ','; 771 } 772 else more++; 773 offset += more; 774 } 775 } 776 777 if (section == NULL) { 778 char sectionbuffer[24]; 779 780 (void) sprintf(sectionbuffer, " (%c) - ", 781 sectionext[defaultsection]); 782 ptr = replacestring(buffer, " - ", sectionbuffer); 783 } 784 else { 785 ptr = replacestring(buffer, " - ", section); 786 free(section); 787 } 788 return ptr; 789 } 790 791 char * 792 getwhatisdata(char *name) 793 { 794 gzFile *in; 795 char *data; 796 int section; 797 798 if ((in = gzopen(name, "r")) == NULL) { 799 errx(EXIT_FAILURE, "%s: %s", 800 name, 801 strerror((errno == 0) ? ENOMEM : errno)); 802 /* NOTREACHED */ 803 } 804 805 section = manpagesection(name); 806 if (section == 0) 807 data = parsecatpage(in); 808 else { 809 data = parsemanpage(in, section); 810 if (data == NULL) 811 data = nroff(in); 812 } 813 814 (void) gzclose(in); 815 return data; 816 } 817 818 void 819 processmanpages(manpage **source, whatis **dest) 820 { 821 manpage *mp; 822 823 mp = *source; 824 *source = NULL; 825 826 while (mp != NULL) { 827 manpage *obsolete; 828 char *data; 829 830 if (mp->mp_left != NULL) 831 processmanpages(&mp->mp_left,dest); 832 833 if ((data = getwhatisdata(mp->mp_name)) != NULL) { 834 if (!addwhatis(dest,data)) 835 err(EXIT_FAILURE, NULL); 836 } 837 838 obsolete = mp; 839 mp = mp->mp_right; 840 free(obsolete); 841 } 842 } 843 844 int 845 dumpwhatis (FILE *out, whatis *tree) 846 { 847 while (tree != NULL) { 848 if (tree->wi_left) 849 if (!dumpwhatis(out, tree->wi_left)) return 0; 850 851 if ((fputs(tree->wi_data, out) == EOF) || 852 (fputc('\n', out) == EOF)) 853 return 0; 854 855 tree = tree->wi_right; 856 } 857 858 return 1; 859 } 860